xgboost
config.h
Go to the documentation of this file.
1 
7 #ifndef XGBOOST_COMMON_CONFIG_H_
8 #define XGBOOST_COMMON_CONFIG_H_
9 
10 #include <xgboost/logging.h>
11 #include <cstdio>
12 #include <string>
13 #include <fstream>
14 #include <istream>
15 #include <sstream>
16 #include <vector>
17 #include <regex>
18 #include <iterator>
19 #include <utility>
20 
21 namespace xgboost {
22 namespace common {
26 class ConfigParser {
27  public:
32  explicit ConfigParser(const std::string path)
33  : path_(std::move(path)),
34  line_comment_regex_("^#"),
35  key_regex_(R"rx(^([^#"'=\r\n\t ]+)[\t ]*=)rx"),
36  key_regex_escaped_(R"rx(^(["'])([^"'=\r\n]+)\1[\t ]*=)rx"),
37  value_regex_(R"rx(^([^#"'\r\n\t ]+)[\t ]*(?:#.*){0,1}$)rx"),
38  value_regex_escaped_(R"rx(^(["'])([^"'\r\n]+)\1[\t ]*(?:#.*){0,1}$)rx")
39  {}
40 
41  std::string LoadConfigFile(const std::string& path) {
42  std::ifstream fin(path, std::ios_base::in | std::ios_base::binary);
43  CHECK(fin) << "Failed to open: " << path;
44  std::string content{std::istreambuf_iterator<char>(fin),
45  std::istreambuf_iterator<char>()};
46  return content;
47  }
48 
58  std::string NormalizeConfigEOL(std::string const& config_str) {
59  std::string result;
60  std::stringstream ss(config_str);
61  for (auto c : config_str) {
62  if (c == '\r') {
63  result.push_back('\n');
64  continue;
65  }
66  result.push_back(c);
67  }
68  return result;
69  }
70 
76  std::vector<std::pair<std::string, std::string>> Parse() {
77  std::string content { LoadConfigFile(path_) };
78  content = NormalizeConfigEOL(content);
79  std::stringstream ss { content };
80  std::vector<std::pair<std::string, std::string>> results;
81  std::string line;
82  std::string key, value;
83  // Loop over every line of the configuration file
84  while (std::getline(ss, line)) {
85  if (ParseKeyValuePair(line, &key, &value)) {
86  results.emplace_back(key, value);
87  }
88  }
89  return results;
90  }
91 
92  private:
93  std::string path_;
94  const std::regex line_comment_regex_, key_regex_, key_regex_escaped_,
95  value_regex_, value_regex_escaped_;
96 
97  public:
103  static std::string TrimWhitespace(const std::string& str) {
104  const auto first_char = str.find_first_not_of(" \t\n\r");
105  const auto last_char = str.find_last_not_of(" \t\n\r");
106  if (first_char == std::string::npos) {
107  // Every character in str is a whitespace
108  return std::string();
109  }
110  CHECK_NE(last_char, std::string::npos);
111  const auto substr_len = last_char + 1 - first_char;
112  return str.substr(first_char, substr_len);
113  }
114 
122  bool ParseKeyValuePair(const std::string& str, std::string* key,
123  std::string* value) {
124  std::string buf = TrimWhitespace(str);
125  if (buf.empty()) {
126  return false;
127  }
128 
129  /* Match key */
130  std::smatch m;
131  if (std::regex_search(buf, m, line_comment_regex_)) {
132  // This line is a comment
133  return false;
134  } else if (std::regex_search(buf, m, key_regex_)) {
135  // Key doesn't have whitespace or #
136  CHECK_EQ(m.size(), 2);
137  *key = m[1].str();
138  } else if (std::regex_search(buf, m, key_regex_escaped_)) {
139  // Key has a whitespace and/or #; it has to be wrapped around a pair of
140  // single or double quotes. Example: "foo bar" 'foo#bar'
141  CHECK_EQ(m.size(), 3);
142  *key = m[2].str();
143  } else {
144  LOG(FATAL) << "This line is not a valid key-value pair: " << str;
145  }
146 
147  /* Match value */
148  buf = m.suffix().str();
149  buf = TrimWhitespace(buf);
150  if (std::regex_search(buf, m, value_regex_)) {
151  // Value doesn't have whitespace or #
152  CHECK_EQ(m.size(), 2);
153  *value = m[1].str();
154  } else if (std::regex_search(buf, m, value_regex_escaped_)) {
155  // Value has a whitespace and/or #; it has to be wrapped around a pair of
156  // single or double quotes. Example: "foo bar" 'foo#bar'
157  CHECK_EQ(m.size(), 3);
158  *value = m[2].str();
159  } else {
160  LOG(FATAL) << "This line is not a valid key-value pair: " << str;
161  }
162  return true;
163  }
164 };
165 
166 } // namespace common
167 } // namespace xgboost
168 #endif // XGBOOST_COMMON_CONFIG_H_
Implementation of config reader.
Definition: config.h:26
namespace of xgboost
Definition: base.h:102