xgboost
categorical.h
Go to the documentation of this file.
1 
5 #ifndef XGBOOST_COMMON_CATEGORICAL_H_
6 #define XGBOOST_COMMON_CATEGORICAL_H_
7 
8 #include <limits>
9 
10 #include "bitfield.h"
11 #include "xgboost/base.h"
12 #include "xgboost/data.h"
13 #include "xgboost/parameter.h"
14 #include "xgboost/span.h"
15 
16 namespace xgboost {
17 namespace common {
18 
21 
22 // Cast the categorical type.
23 template <typename T>
25  return static_cast<bst_cat_t>(v);
26 }
27 
28 /* \brief Whether is fidx a categorical feature.
29  *
30  * \param ft Feature type for all features.
31  * \param fidx Feature index.
32  * \return Whether feature pointed by fidx is categorical feature.
33  */
35  return !ft.empty() && ft[fidx] == FeatureType::kCategorical;
36 }
37 
38 constexpr inline bst_cat_t OutOfRangeCat() {
39  // See the round trip assert in `InvalidCat`.
40  return static_cast<bst_cat_t>(16777217) - static_cast<bst_cat_t>(1);
41 }
42 
43 inline XGBOOST_DEVICE bool InvalidCat(float cat) {
44  constexpr auto kMaxCat = OutOfRangeCat();
45  static_assert(static_cast<bst_cat_t>(static_cast<float>(kMaxCat)) == kMaxCat, "");
46  static_assert(static_cast<bst_cat_t>(static_cast<float>(kMaxCat + 1)) != kMaxCat + 1, "");
47  static_assert(static_cast<float>(kMaxCat + 1) == kMaxCat, "");
48  return cat < 0 || cat >= kMaxCat;
49 }
50 
51 /* \brief Whether should it traverse to left branch of a tree.
52  *
53  * For one hot split, go to left if it's NOT the matching category.
54  */
55 template <bool validate = true>
56 inline XGBOOST_DEVICE bool Decision(common::Span<uint32_t const> cats, float cat, bool dft_left) {
57  CLBitField32 const s_cats(cats);
58  // FIXME: Size() is not accurate since it represents the size of bit set instead of
59  // actual number of categories.
60  if (XGBOOST_EXPECT(validate && (InvalidCat(cat) || cat >= s_cats.Size()), false)) {
61  return dft_left;
62  }
63 
64  auto pos = KCatBitField::ToBitPos(cat);
65  if (pos.int_pos >= cats.size()) {
66  return true;
67  }
68  return !s_cats.Check(AsCat(cat));
69 }
70 
71 inline void InvalidCategory() {
72  // OutOfRangeCat() can be accurately represented, but everything after it will be
73  // rounded toward it, so we use >= for comparison check. As a result, we require input
74  // values to be less than this last representable value.
75  auto str = std::to_string(OutOfRangeCat());
76  LOG(FATAL) << "Invalid categorical value detected. Categorical value should be non-negative, "
77  "less than total number of categories in training data and less than " +
78  str;
79 }
80 
81 inline void CheckMaxCat(float max_cat, size_t n_categories) {
82  CHECK_GE(max_cat + 1, n_categories)
83  << "Maximum cateogry should not be lesser than the total number of categories.";
84 }
85 
89 XGBOOST_DEVICE inline bool UseOneHot(uint32_t n_cats, uint32_t max_cat_to_onehot) {
90  bool use_one_hot = n_cats < max_cat_to_onehot;
91  return use_one_hot;
92 }
93 
94 struct IsCatOp {
96 };
97 } // namespace common
98 } // namespace xgboost
99 
100 #endif // XGBOOST_COMMON_CATEGORICAL_H_
xgboost::BitFieldContainer::ToBitPos
static XGBOOST_DEVICE Pos ToBitPos(index_type pos)
Definition: bitfield.h:77
bitfield.h
XGBOOST_EXPECT
#define XGBOOST_EXPECT(cond, ret)
Definition: base.h:75
xgboost::BitFieldContainer
A non-owning type with auxiliary methods defined for manipulating bits.
Definition: bitfield.h:59
parameter.h
macro for using C++11 enum class as DMLC parameter
xgboost::common::CheckMaxCat
void CheckMaxCat(float max_cat, size_t n_categories)
Definition: categorical.h:81
xgboost::common::UseOneHot
XGBOOST_DEVICE bool UseOneHot(uint32_t n_cats, uint32_t max_cat_to_onehot)
Whether should we use onehot encoding for categorical data.
Definition: categorical.h:89
base.h
defines configuration macros of xgboost.
xgboost::common::IsCatOp
Definition: categorical.h:94
xgboost::bst_feature_t
uint32_t bst_feature_t
Type for data column (feature) index.
Definition: base.h:123
xgboost::common::Span::empty
constexpr XGBOOST_DEVICE bool empty() const __span_noexcept
Definition: span.h:560
xgboost::common::InvalidCategory
void InvalidCategory()
Definition: categorical.h:71
span.h
xgboost::common::InvalidCat
XGBOOST_DEVICE bool InvalidCat(float cat)
Definition: categorical.h:43
xgboost::BitFieldContainer::Check
XGBOOST_DEVICE bool Check(Pos pos_v) const
Definition: bitfield.h:171
xgboost::bst_cat_t
int32_t bst_cat_t
Categorical value type.
Definition: base.h:121
xgboost::CLBitField32
BitFieldContainer< uint32_t, LBitsPolicy< uint32_t, true >, true > CLBitField32
Definition: bitfield.h:231
xgboost::FeatureType
FeatureType
Definition: data.h:41
xgboost::common::Span
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition: span.h:148
data.h
The input data structure of xgboost.
xgboost::common::Span::size
constexpr XGBOOST_DEVICE index_type size() const __span_noexcept
Definition: span.h:553
xgboost::LBitField32
BitFieldContainer< uint32_t, LBitsPolicy< uint32_t > > LBitField32
Definition: bitfield.h:230
xgboost::common::Decision
XGBOOST_DEVICE bool Decision(common::Span< uint32_t const > cats, float cat, bool dft_left)
Definition: categorical.h:56
xgboost::BitFieldContainer::Size
XGBOOST_DEVICE size_t Size() const
Definition: bitfield.h:184
xgboost::common::IsCat
XGBOOST_DEVICE bool IsCat(Span< FeatureType const > ft, bst_feature_t fidx)
Definition: categorical.h:34
xgboost::common::IsCatOp::operator()
XGBOOST_DEVICE bool operator()(FeatureType ft)
Definition: categorical.h:95
xgboost::FeatureType::kCategorical
@ kCategorical
XGBOOST_DEVICE
#define XGBOOST_DEVICE
Tag function as usable by device.
Definition: base.h:84
xgboost::common::AsCat
XGBOOST_DEVICE bst_cat_t AsCat(T const &v)
Definition: categorical.h:24
xgboost
namespace of xgboost
Definition: base.h:110
xgboost::common::OutOfRangeCat
constexpr bst_cat_t OutOfRangeCat()
Definition: categorical.h:38