xgboost
json_io.h
Go to the documentation of this file.
1 
4 #ifndef XGBOOST_JSON_IO_H_
5 #define XGBOOST_JSON_IO_H_
6 #include <dmlc/endian.h>
7 #include <xgboost/base.h>
8 #include <xgboost/json.h>
9 
10 #include <cinttypes>
11 #include <limits>
12 #include <map>
13 #include <memory>
14 #include <sstream>
15 #include <string>
16 #include <utility>
17 #include <vector>
18 
19 namespace xgboost {
23 class JsonReader {
24  public:
25  using Char = std::int8_t;
26 
27  protected:
28  size_t constexpr static kMaxNumLength = std::numeric_limits<double>::max_digits10 + 1;
29 
30  struct SourceLocation {
31  private:
32  std::size_t pos_{0}; // current position in raw_str_
33 
34  public:
35  SourceLocation() = default;
36  size_t Pos() const { return pos_; }
37 
38  void Forward() { pos_++; }
39  void Forward(uint32_t n) { pos_ += n; }
41 
43 
44  protected:
45  void SkipSpaces();
46 
48  if (XGBOOST_EXPECT((cursor_.Pos() == raw_str_.size()), false)) {
49  return -1;
50  }
51  char ch = raw_str_[cursor_.Pos()];
52  cursor_.Forward();
53  return ch;
54  }
55 
57  if (cursor_.Pos() == raw_str_.size()) {
58  return -1;
59  }
60  Char ch = raw_str_[cursor_.Pos()];
61  return ch;
62  }
63 
64  /* \brief Skip spaces and consume next character. */
66  SkipSpaces();
67  return GetNextChar();
68  }
69  /* \brief Consume next character without first skipping empty space, throw when the next
70  * character is not the expected one.
71  */
72  Char GetConsecutiveChar(char expected_char) {
73  Char result = GetNextChar();
74  if (XGBOOST_EXPECT(result != expected_char, false)) { Expect(expected_char, result); }
75  return result;
76  }
77 
78  void Error(std::string msg) const;
79 
80  // Report expected character
81  void Expect(Char c, Char got) {
82  std::string msg = "Expecting: \"";
83  msg += c;
84  msg += "\", got: \"";
85  if (got == EOF) {
86  msg += "EOF\"";
87  } else if (got == 0) {
88  msg += "\\0\"";
89  } else {
90  msg += std::to_string(got) + " \"";
91  }
92  Error(msg);
93  }
94 
95  virtual Json ParseString();
96  virtual Json ParseObject();
97  virtual Json ParseArray();
98  virtual Json ParseNumber();
99  virtual Json ParseBoolean();
100  virtual Json ParseNull();
101 
103 
104  public:
105  explicit JsonReader(StringView str) :
106  raw_str_{str} {}
107 
108  virtual ~JsonReader() = default;
109 
110  virtual Json Load();
111 };
112 
113 class JsonWriter {
114  template <typename T, std::enable_if_t<!std::is_same<Json, T>::value>* = nullptr>
115  void Save(T const& v) {
116  this->Save(Json{v});
117  }
118  template <typename Array, typename Fn>
119  void WriteArray(Array const* arr, Fn&& fn) {
120  stream_->emplace_back('[');
121  auto const& vec = arr->GetArray();
122  size_t size = vec.size();
123  for (size_t i = 0; i < size; ++i) {
124  auto const& value = vec[i];
125  this->Save(fn(value));
126  if (i != size - 1) {
127  stream_->emplace_back(',');
128  }
129  }
130  stream_->emplace_back(']');
131  }
132 
133  protected:
134  std::vector<char>* stream_;
135 
136  public:
137  explicit JsonWriter(std::vector<char>* stream) : stream_{stream} {}
138 
139  virtual ~JsonWriter() = default;
140 
141  virtual void Save(Json json);
142 
143  virtual void Visit(JsonArray const* arr);
144  virtual void Visit(F32Array const* arr);
145  virtual void Visit(F64Array const*) { LOG(FATAL) << "Only UBJSON format can handle f64 array."; }
146  virtual void Visit(U8Array const* arr);
147  virtual void Visit(I32Array const* arr);
148  virtual void Visit(I64Array const* arr);
149  virtual void Visit(JsonObject const* obj);
150  virtual void Visit(JsonNumber const* num);
151  virtual void Visit(JsonInteger const* num);
152  virtual void Visit(JsonNull const* null);
153  virtual void Visit(JsonString const* str);
154  virtual void Visit(JsonBoolean const* boolean);
155 };
156 
157 #if defined(__GLIBC__)
158 template <typename T>
159 T BuiltinBSwap(T v);
160 
161 template <>
162 inline uint16_t BuiltinBSwap(uint16_t v) {
163  return __builtin_bswap16(v);
164 }
165 
166 template <>
167 inline uint32_t BuiltinBSwap(uint32_t v) {
168  return __builtin_bswap32(v);
169 }
170 
171 template <>
172 inline uint64_t BuiltinBSwap(uint64_t v) {
173  return __builtin_bswap64(v);
174 }
175 #else
176 template <typename T>
177 T BuiltinBSwap(T v) {
178  dmlc::ByteSwap(&v, sizeof(v), 1);
179  return v;
180 }
181 #endif // defined(__GLIBC__)
182 
183 template <typename T, std::enable_if_t<sizeof(T) == 1>* = nullptr>
184 inline T ToBigEndian(T v) {
185  return v;
186 }
187 
188 template <typename T, std::enable_if_t<sizeof(T) != 1>* = nullptr>
189 inline T ToBigEndian(T v) {
190  static_assert(std::is_pod<T>::value, "Only pod is supported.");
191 #if DMLC_LITTLE_ENDIAN
192  auto constexpr kS = sizeof(T);
193  std::conditional_t<kS == 2, uint16_t, std::conditional_t<kS == 4, uint32_t, uint64_t>> u;
194  std::memcpy(&u, &v, sizeof(u));
195  u = BuiltinBSwap(u);
196  std::memcpy(&v, &u, sizeof(u));
197 #endif // DMLC_LITTLE_ENDIAN
198  return v;
199 }
200 
204 class UBJReader : public JsonReader {
205  Json Parse();
206 
207  template <typename T>
208  T ReadStream() {
209  auto ptr = this->raw_str_.c_str() + cursor_.Pos();
210  T v{0};
211  std::memcpy(&v, ptr, sizeof(v));
212  cursor_.Forward(sizeof(v));
213  return v;
214  }
215 
216  template <typename T>
217  T ReadPrimitive() {
218  auto v = ReadStream<T>();
219  v = ToBigEndian(v);
220  return v;
221  }
222 
223  template <typename TypedArray>
224  auto ParseTypedArray(int64_t n) {
225  TypedArray results{static_cast<size_t>(n)};
226  for (int64_t i = 0; i < n; ++i) {
227  auto v = this->ReadPrimitive<typename TypedArray::Type>();
228  results.Set(i, v);
229  }
230  return Json{std::move(results)};
231  }
232 
233  std::string DecodeStr();
234 
235  Json ParseArray() override;
236  Json ParseObject() override;
237 
238  public:
240  Json Load() override;
241 };
242 
246 class UBJWriter : public JsonWriter {
247  void Visit(JsonArray const* arr) override;
248  void Visit(F32Array const* arr) override;
249  void Visit(F64Array const* arr) override;
250  void Visit(U8Array const* arr) override;
251  void Visit(I32Array const* arr) override;
252  void Visit(I64Array const* arr) override;
253  void Visit(JsonObject const* obj) override;
254  void Visit(JsonNumber const* num) override;
255  void Visit(JsonInteger const* num) override;
256  void Visit(JsonNull const* null) override;
257  void Visit(JsonString const* str) override;
258  void Visit(JsonBoolean const* boolean) override;
259 
260  public:
262  void Save(Json json) override;
263 };
264 } // namespace xgboost
265 
266 #endif // XGBOOST_JSON_IO_H_
Defines configuration macros and basic types for xgboost.
#define XGBOOST_EXPECT(cond, ret)
Definition: base.h:53
Definition: json.h:113
std::vector< Json > const & GetArray() &&
Definition: json.h:131
Describes both true and false.
Definition: json.h:322
Definition: json.h:261
Definition: json.h:305
Definition: json.h:226
Definition: json.h:196
A json reader, currently error checking and utf-8 is not fully supported.
Definition: json_io.h:23
virtual Json ParseString()
virtual Json Load()
virtual Json ParseNumber()
virtual Json ParseArray()
StringView raw_str_
Definition: json_io.h:42
Char PeekNextChar()
Definition: json_io.h:56
struct xgboost::JsonReader::SourceLocation cursor_
virtual Json ParseBoolean()
Char GetNextChar()
Definition: json_io.h:47
void Expect(Char c, Char got)
Definition: json_io.h:81
virtual Json ParseObject()
Char GetNextNonSpaceChar()
Definition: json_io.h:65
Char GetConsecutiveChar(char expected_char)
Definition: json_io.h:72
std::int8_t Char
Definition: json_io.h:25
void Error(std::string msg) const
virtual ~JsonReader()=default
constexpr static size_t kMaxNumLength
Definition: json_io.h:28
JsonReader(StringView str)
Definition: json_io.h:105
virtual Json ParseNull()
Definition: json.h:86
Typed array for Universal Binary JSON.
Definition: json.h:150
Definition: json_io.h:113
std::vector< char > * stream_
Definition: json_io.h:134
virtual void Save(Json json)
virtual void Visit(F64Array const *)
Definition: json_io.h:145
JsonWriter(std::vector< char > *stream)
Definition: json_io.h:137
virtual void Visit(JsonInteger const *num)
virtual void Visit(JsonNull const *null)
virtual void Visit(U8Array const *arr)
virtual void Visit(JsonArray const *arr)
virtual void Visit(F32Array const *arr)
virtual void Visit(JsonNumber const *num)
virtual ~JsonWriter()=default
virtual void Visit(I64Array const *arr)
virtual void Visit(JsonBoolean const *boolean)
virtual void Visit(I32Array const *arr)
virtual void Visit(JsonObject const *obj)
virtual void Visit(JsonString const *str)
Data structure representing JSON format.
Definition: json.h:368
Reader for UBJSON https://ubjson.org/.
Definition: json_io.h:204
Json Load() override
Writer for UBJSON https://ubjson.org/.
Definition: json_io.h:246
void Save(Json json) override
Core data structure for multi-target trees.
Definition: base.h:87
T BuiltinBSwap(T v)
Definition: json_io.h:177
T ToBigEndian(T v)
Definition: json_io.h:184
Definition: json_io.h:30
size_t Pos() const
Definition: json_io.h:36
void Forward()
Definition: json_io.h:38
void Forward(uint32_t n)
Definition: json_io.h:39
Definition: string_view.h:16
value_type const * c_str() const
Definition: string_view.h:50
constexpr std::size_t size() const
Definition: string_view.h:43