xgboost
random.h
Go to the documentation of this file.
1 
7 #ifndef XGBOOST_COMMON_RANDOM_H_
8 #define XGBOOST_COMMON_RANDOM_H_
9 
10 #include <rabit/rabit.h>
11 #include <xgboost/logging.h>
12 #include <algorithm>
13 #include <vector>
14 #include <limits>
15 #include <map>
16 #include <memory>
17 #include <numeric>
18 #include <random>
19 
20 #include "io.h"
21 #include "host_device_vector.h"
22 
23 namespace xgboost {
24 namespace common {
28 using RandomEngine = std::mt19937;
29 
30 #if XGBOOST_CUSTOMIZE_GLOBAL_PRNG
31 
37 class CustomGlobalRandomEngine {
38  public:
40  using result_type = uint32_t;
42  inline static constexpr result_type min() {
43  return 0;
44  }
46  inline static constexpr result_type max() {
47  return std::numeric_limits<result_type>::max();
48  }
53  void seed(result_type val);
57  result_type operator()();
58 };
59 
63 typedef CustomGlobalRandomEngine GlobalRandomEngine;
64 
65 #else
66 
69 using GlobalRandomEngine = RandomEngine;
70 #endif // XGBOOST_CUSTOMIZE_GLOBAL_PRNG
71 
77 GlobalRandomEngine& GlobalRandom(); // NOLINT(*)
78 
88  std::shared_ptr<HostDeviceVector<int>> feature_set_tree_;
89  std::map<int, std::shared_ptr<HostDeviceVector<int>>> feature_set_level_;
90  float colsample_bylevel_{1.0f};
91  float colsample_bytree_{1.0f};
92  float colsample_bynode_{1.0f};
93  GlobalRandomEngine rng_;
94 
95  std::shared_ptr<HostDeviceVector<int>> ColSample(
96  std::shared_ptr<HostDeviceVector<int>> p_features, float colsample) {
97  if (colsample == 1.0f) return p_features;
98  const auto& features = p_features->HostVector();
99  CHECK_GT(features.size(), 0);
100  int n = std::max(1, static_cast<int>(colsample * features.size()));
101  auto p_new_features = std::make_shared<HostDeviceVector<int>>();
102  auto& new_features = *p_new_features;
103  new_features.Resize(features.size());
104  std::copy(features.begin(), features.end(),
105  new_features.HostVector().begin());
106  std::shuffle(new_features.HostVector().begin(),
107  new_features.HostVector().end(), rng_);
108  new_features.Resize(n);
109  std::sort(new_features.HostVector().begin(),
110  new_features.HostVector().end());
111 
112  return p_new_features;
113  }
114 
115  public:
120  explicit ColumnSampler(uint32_t seed) {
121  rng_.seed(seed);
122  }
123 
129  uint32_t seed = common::GlobalRandom()();
130  rabit::Broadcast(&seed, sizeof(seed), 0);
131  rng_.seed(seed);
132  }
133 
143  void Init(int64_t num_col, float colsample_bynode, float colsample_bylevel,
144  float colsample_bytree, bool skip_index_0 = false) {
145  colsample_bylevel_ = colsample_bylevel;
146  colsample_bytree_ = colsample_bytree;
147  colsample_bynode_ = colsample_bynode;
148 
149  if (feature_set_tree_ == nullptr) {
150  feature_set_tree_ = std::make_shared<HostDeviceVector<int>>();
151  }
152  Reset();
153 
154  int begin_idx = skip_index_0 ? 1 : 0;
155  feature_set_tree_->Resize(num_col - begin_idx);
156  std::iota(feature_set_tree_->HostVector().begin(),
157  feature_set_tree_->HostVector().end(), begin_idx);
158 
159  feature_set_tree_ = ColSample(feature_set_tree_, colsample_bytree_);
160  }
161 
165  void Reset() {
166  feature_set_tree_->Resize(0);
167  feature_set_level_.clear();
168  }
169 
181  std::shared_ptr<HostDeviceVector<int>> GetFeatureSet(int depth) {
182  if (colsample_bylevel_ == 1.0f && colsample_bynode_ == 1.0f) {
183  return feature_set_tree_;
184  }
185 
186  if (feature_set_level_.count(depth) == 0) {
187  // Level sampling, level does not yet exist so generate it
188  feature_set_level_[depth] = ColSample(feature_set_tree_, colsample_bylevel_);
189  }
190  if (colsample_bynode_ == 1.0f) {
191  // Level sampling
192  return feature_set_level_[depth];
193  }
194  // Need to sample for the node individually
195  return ColSample(feature_set_level_[depth], colsample_bynode_);
196  }
197 };
198 
199 } // namespace common
200 } // namespace xgboost
201 #endif // XGBOOST_COMMON_RANDOM_H_
Definition: host_device_vector.h:200
void Reset()
Resets this object.
Definition: random.h:165
general stream interface for serialization, I/O
A device-and-host vector abstraction layer.
ColumnSampler()
Column sampler constructor.
Definition: random.h:128
ColumnSampler(uint32_t seed)
Column sampler constructor.
Definition: random.h:120
GlobalRandomEngine & GlobalRandom()
global singleton of a random engine. This random engine is thread-local and only visible to current t...
RandomEngine GlobalRandomEngine
global random engine
Definition: random.h:69
namespace of xgboost
Definition: base.h:79
std::shared_ptr< HostDeviceVector< int > > GetFeatureSet(int depth)
Samples a feature set.
Definition: random.h:181
void Init(int64_t num_col, float colsample_bynode, float colsample_bylevel, float colsample_bytree, bool skip_index_0=false)
Initialise this object before use.
Definition: random.h:143
Handles selection of columns due to colsample_bytree, colsample_bylevel and colsample_bynode paramete...
Definition: random.h:87
std::mt19937 RandomEngine
Define mt19937 as default type Random Engine.
Definition: random.h:28