Note
Go to the end to download the full example code
Demo for prediction using number of trees
import os
import numpy as np
import xgboost as xgb
from sklearn.datasets import load_svmlight_file
CURRENT_DIR = os.path.dirname(__file__)
train = os.path.join(CURRENT_DIR, "../data/agaricus.txt.train")
test = os.path.join(CURRENT_DIR, "../data/agaricus.txt.test")
def native_interface():
# load data in do training
dtrain = xgb.DMatrix(train)
dtest = xgb.DMatrix(test)
param = {"max_depth": 2, "eta": 1, "objective": "binary:logistic"}
watchlist = [(dtest, "eval"), (dtrain, "train")]
num_round = 3
bst = xgb.train(param, dtrain, num_round, watchlist)
print("start testing prediction from first n trees")
# predict using first 1 tree
label = dtest.get_label()
ypred1 = bst.predict(dtest, iteration_range=(0, 1))
# by default, we predict using all the trees
ypred2 = bst.predict(dtest)
print("error of ypred1=%f" % (np.sum((ypred1 > 0.5) != label) / float(len(label))))
print("error of ypred2=%f" % (np.sum((ypred2 > 0.5) != label) / float(len(label))))
def sklearn_interface():
X_train, y_train = load_svmlight_file(train)
X_test, y_test = load_svmlight_file(test)
clf = xgb.XGBClassifier(n_estimators=3, max_depth=2, eta=1)
clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
assert clf.n_classes_ == 2
print("start testing prediction from first n trees")
# predict using first 1 tree
ypred1 = clf.predict(X_test, iteration_range=(0, 1))
# by default, we predict using all the trees
ypred2 = clf.predict(X_test)
print(
"error of ypred1=%f" % (np.sum((ypred1 > 0.5) != y_test) / float(len(y_test)))
)
print(
"error of ypred2=%f" % (np.sum((ypred2 > 0.5) != y_test) / float(len(y_test)))
)
if __name__ == "__main__":
native_interface()
sklearn_interface()
Total running time of the script: ( 0 minutes 0.000 seconds)