xgboost
random.h
Go to the documentation of this file.
1 
7 #ifndef XGBOOST_COMMON_RANDOM_H_
8 #define XGBOOST_COMMON_RANDOM_H_
9 
10 #include <rabit/rabit.h>
11 #include <xgboost/logging.h>
12 #include <algorithm>
13 #include <vector>
14 #include <limits>
15 #include <map>
16 #include <memory>
17 #include <numeric>
18 #include <random>
19 
21 
22 namespace xgboost {
23 namespace common {
27 using RandomEngine = std::mt19937;
28 
29 #if XGBOOST_CUSTOMIZE_GLOBAL_PRNG
30 
36 class CustomGlobalRandomEngine {
37  public:
39  using result_type = uint32_t;
41  inline static constexpr result_type min() {
42  return 0;
43  }
45  inline static constexpr result_type max() {
46  return std::numeric_limits<result_type>::max();
47  }
52  void seed(result_type val);
56  result_type operator()();
57 };
58 
62 typedef CustomGlobalRandomEngine GlobalRandomEngine;
63 
64 #else
65 
68 using GlobalRandomEngine = RandomEngine;
69 #endif // XGBOOST_CUSTOMIZE_GLOBAL_PRNG
70 
76 GlobalRandomEngine& GlobalRandom(); // NOLINT(*)
77 
87  std::shared_ptr<HostDeviceVector<bst_feature_t>> feature_set_tree_;
88  std::map<int, std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_set_level_;
89  float colsample_bylevel_{1.0f};
90  float colsample_bytree_{1.0f};
91  float colsample_bynode_{1.0f};
92  GlobalRandomEngine rng_;
93 
94  std::shared_ptr<HostDeviceVector<bst_feature_t>> ColSample(
95  std::shared_ptr<HostDeviceVector<bst_feature_t>> p_features, float colsample) {
96  if (colsample == 1.0f) return p_features;
97  const auto& features = p_features->HostVector();
98  CHECK_GT(features.size(), 0);
99  int n = std::max(1, static_cast<int>(colsample * features.size()));
100  auto p_new_features = std::make_shared<HostDeviceVector<bst_feature_t>>();
101  auto& new_features = *p_new_features;
102  new_features.Resize(features.size());
103  std::copy(features.begin(), features.end(),
104  new_features.HostVector().begin());
105  std::shuffle(new_features.HostVector().begin(),
106  new_features.HostVector().end(), rng_);
107  new_features.Resize(n);
108  std::sort(new_features.HostVector().begin(),
109  new_features.HostVector().end());
110 
111  return p_new_features;
112  }
113 
114  public:
119  explicit ColumnSampler(uint32_t seed) {
120  rng_.seed(seed);
121  }
122 
128  uint32_t seed = common::GlobalRandom()();
129  rabit::Broadcast(&seed, sizeof(seed), 0, "seed");
130  rng_.seed(seed);
131  }
132 
142  void Init(int64_t num_col, float colsample_bynode, float colsample_bylevel,
143  float colsample_bytree, bool skip_index_0 = false) {
144  colsample_bylevel_ = colsample_bylevel;
145  colsample_bytree_ = colsample_bytree;
146  colsample_bynode_ = colsample_bynode;
147 
148  if (feature_set_tree_ == nullptr) {
149  feature_set_tree_ = std::make_shared<HostDeviceVector<bst_feature_t>>();
150  }
151  Reset();
152 
153  int begin_idx = skip_index_0 ? 1 : 0;
154  feature_set_tree_->Resize(num_col - begin_idx);
155  std::iota(feature_set_tree_->HostVector().begin(),
156  feature_set_tree_->HostVector().end(), begin_idx);
157 
158  feature_set_tree_ = ColSample(feature_set_tree_, colsample_bytree_);
159  }
160 
164  void Reset() {
165  feature_set_tree_->Resize(0);
166  feature_set_level_.clear();
167  }
168 
180  std::shared_ptr<HostDeviceVector<bst_feature_t>> GetFeatureSet(int depth) {
181  if (colsample_bylevel_ == 1.0f && colsample_bynode_ == 1.0f) {
182  return feature_set_tree_;
183  }
184 
185  if (feature_set_level_.count(depth) == 0) {
186  // Level sampling, level does not yet exist so generate it
187  feature_set_level_[depth] = ColSample(feature_set_tree_, colsample_bylevel_);
188  }
189  if (colsample_bynode_ == 1.0f) {
190  // Level sampling
191  return feature_set_level_[depth];
192  }
193  // Need to sample for the node individually
194  return ColSample(feature_set_level_[depth], colsample_bynode_);
195  }
196 };
197 
198 } // namespace common
199 } // namespace xgboost
200 #endif // XGBOOST_COMMON_RANDOM_H_
Definition: host_device_vector.h:86
void Reset()
Resets this object.
Definition: random.h:164
std::shared_ptr< HostDeviceVector< bst_feature_t > > GetFeatureSet(int depth)
Samples a feature set.
Definition: random.h:180
A device-and-host vector abstraction layer.
ColumnSampler()
Column sampler constructor.
Definition: random.h:127
ColumnSampler(uint32_t seed)
Column sampler constructor.
Definition: random.h:119
GlobalRandomEngine & GlobalRandom()
global singleton of a random engine. This random engine is thread-local and only visible to current t...
RandomEngine GlobalRandomEngine
global random engine
Definition: random.h:68
namespace of xgboost
Definition: base.h:102
void Init(int64_t num_col, float colsample_bynode, float colsample_bylevel, float colsample_bytree, bool skip_index_0=false)
Initialise this object before use.
Definition: random.h:142
Handles selection of columns due to colsample_bytree, colsample_bylevel and colsample_bynode paramete...
Definition: random.h:86
std::mt19937 RandomEngine
Define mt19937 as default type Random Engine.
Definition: random.h:27