XGBoostModel

Instance Constructors

new XGBoostModel(_booster: Booster)

Abstract Value Members

abstract def predict(features: Vector): Double

Attributes
protected
Definition Classes
PredictionModel
abstract val uid: String

Definition Classes
Identifiable

Concrete Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def $[T](param: Param[T]): T

Attributes
protected
Definition Classes
Params
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
var _booster: Booster

Attributes
protected
val alpha: DoubleParam

L1 regularization term on weights, increase this value will make model more conservative.
L1 regularization term on weights, increase this value will make model more conservative. [default=0]

Definition Classes
BoosterParams
final def asInstanceOf[T0]: T0

Definition Classes
Any
def booster: Booster
val boosterType: Param[String]

Booster to use, options: {'gbtree', 'gblinear', 'dart'}
Booster to use, options: {'gbtree', 'gblinear', 'dart'}

Definition Classes
BoosterParams
final def clear(param: Param[_]): XGBoostModel.this.type

Definition Classes
Params
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
val colSampleByLevel: DoubleParam

subsample ratio of columns for each split, in each level.
subsample ratio of columns for each split, in each level. [default=1] range: (0,1]

Definition Classes
BoosterParams
val colSampleByTree: DoubleParam

subsample ratio of columns when constructing each tree.
subsample ratio of columns when constructing each tree. [default=1] range: (0,1]

Definition Classes
BoosterParams
def copy(extra: ParamMap): XGBoostModel

Definition Classes
XGBoostModel → Model → Transformer → PipelineStage → Params
def copyValues[T <: Params](to: T, extra: ParamMap): T

Attributes
protected
Definition Classes
Params
final def defaultCopy[T <: Params](extra: ParamMap): T

Attributes
protected
Definition Classes
Params
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
val eta: DoubleParam

step size shrinkage used in update to prevents overfitting.
step size shrinkage used in update to prevents overfitting. After each boosting step, we can directly get the weights of new features and eta actually shrinks the feature weights to make the boosting process more conservative. [default=0.3] range: [0,1]

Definition Classes
BoosterParams
def eval(evalDataset: RDD[org.apache.spark.ml.feature.LabeledPoint], evalName: String, evalFunc: EvalTrait = null, iter: Int = 1, useExternalCache: Boolean = false, groupData: Seq[Seq[Int]] = null): String

evaluate XGBoostModel with a RDD-wrapped dataset
evaluate XGBoostModel with a RDD-wrapped dataset
NOTE: you have to specify value of either eval or iter; when you specify both, this method adopts the default eval metric of model
evalDataset
the dataset used for evaluation
evalName
the name of evaluation
evalFunc
the customized evaluation function, null by default to use the default metric of model
iter
the current iteration, -1 to be null to use customized evaluation functions
groupData
group data specify each group size for ranking task. Top level corresponds to partition id, second level is the group sizes.
returns
the average metric over all partitions
def explainParam(param: Param[_]): String

Definition Classes
Params
def explainParams(): String

Explains all params of this instance.
Explains all params of this instance. See explainParam().

Definition Classes
BoosterParams → Params
final def extractParamMap(): ParamMap

Definition Classes
Params
final def extractParamMap(extra: ParamMap): ParamMap

Definition Classes
Params
final val featuresCol: Param[String]

Definition Classes
HasFeaturesCol
def featuresDataType: DataType

Attributes
protected
Definition Classes
PredictionModel
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
val gamma: DoubleParam

minimum loss reduction required to make a further partition on a leaf node of the tree.
minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be. [default=0] range: [0, Double.MaxValue]

Definition Classes
BoosterParams
final def get[T](param: Param[T]): Option[T]

Definition Classes
Params
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
final def getDefault[T](param: Param[T]): Option[T]

Definition Classes
Params
final def getFeaturesCol: String

Definition Classes
HasFeaturesCol
final def getLabelCol: String

Definition Classes
HasLabelCol
final def getOrDefault[T](param: Param[T]): T

Definition Classes
Params
def getParam(paramName: String): Param[Any]

Definition Classes
Params
final def getPredictionCol: String

Definition Classes
HasPredictionCol
val growthPolicty: Param[String]

growth policy for fast histogram algorithm
growth policy for fast histogram algorithm

Definition Classes
BoosterParams
final def hasDefault[T](param: Param[T]): Boolean

Definition Classes
Params
def hasParam(paramName: String): Boolean

Definition Classes
Params
def hasParent: Boolean

Definition Classes
Model
def hashCode(): Int

Definition Classes
AnyRef → Any
def initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean

Attributes
protected
Definition Classes
Logging
def initializeLogIfNecessary(isInterpreter: Boolean): Unit

Attributes
protected
Definition Classes
Logging
final def isDefined(param: Param[_]): Boolean

Definition Classes
Params
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def isSet(param: Param[_]): Boolean

Definition Classes
Params
def isTraceEnabled(): Boolean

Attributes
protected
Definition Classes
Logging
final val labelCol: Param[String]

Definition Classes
HasLabelCol
val lambda: DoubleParam

L2 regularization term on weights, increase this value will make model more conservative.
L2 regularization term on weights, increase this value will make model more conservative. [default=1]

Definition Classes
BoosterParams
val lambdaBias: DoubleParam

Parameter of linear booster L2 regularization term on bias, default 0(no L1 reg on bias because it is not important)
Parameter of linear booster L2 regularization term on bias, default 0(no L1 reg on bias because it is not important)

Definition Classes
BoosterParams
def log: Logger

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logName: String

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
val maxBins: IntParam

maximum number of bins in histogram
maximum number of bins in histogram

Definition Classes
BoosterParams
val maxDeltaStep: DoubleParam

Maximum delta step we allow each tree's weight estimation to be.
Maximum delta step we allow each tree's weight estimation to be. If the value is set to 0, it means there is no constraint. If it is set to a positive value, it can help making the update step more conservative. Usually this parameter is not needed, but it might help in logistic regression when class is extremely imbalanced. Set it to value of 1-10 might help control the update. [default=0] range: [0, Double.MaxValue]

Definition Classes
BoosterParams
val maxDepth: IntParam

maximum depth of a tree, increase this value will make model more complex / likely to be overfitting.
maximum depth of a tree, increase this value will make model more complex / likely to be overfitting. [default=6] range: [1, Int.MaxValue]

Definition Classes
BoosterParams
val minChildWeight: DoubleParam

minimum sum of instance weight(hessian) needed in a child.
minimum sum of instance weight(hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be. [default=1] range: [0, Double.MaxValue]

Definition Classes
BoosterParams
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
val normalizeType: Param[String]

Parameter of Dart booster.
Parameter of Dart booster. type of normalization algorithm, options: {'tree', 'forest'}. [default="tree"]

Definition Classes
BoosterParams
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def numFeatures: Int

Definition Classes
PredictionModel
Annotations
@Since( "1.6.0" )
lazy val params: Array[Param[_]]

Definition Classes
Params
var parent: Estimator[XGBoostModel]

Definition Classes
Model
def predict(testSet: RDD[Vector], useExternalCache: Boolean = false, outputMargin: Boolean = false): RDD[Array[Float]]

Predict result with the given test set (represented as RDD)
Predict result with the given test set (represented as RDD)
testSet
test set represented as RDD
useExternalCache
whether to use external cache for the test set
outputMargin
whether to output raw untransformed margin value
def predict(testSet: RDD[DenseVector], missingValue: Float): RDD[Array[Float]]

Predict result with the given test set (represented as RDD)
Predict result with the given test set (represented as RDD)
testSet
test set represented as RDD
missingValue
the specified value to represent the missing value
def predictLeaves(testSet: RDD[Vector]): RDD[Array[Float]]

Predict leaf instances with the given test set (represented as RDD)
Predict leaf instances with the given test set (represented as RDD)
testSet
test set represented as RDD
final val predictionCol: Param[String]

Definition Classes
HasPredictionCol
def produceRowRDD(testSet: Dataset[_], outputMargin: Boolean = false, predLeaf: Boolean = false): RDD[Row]

Attributes
protected
val rateDrop: DoubleParam

Parameter of Dart booster.
Parameter of Dart booster. dropout rate. [default=0.0] range: [0.0, 1.0]

Definition Classes
BoosterParams
val sampleType: Param[String]

Parameter for Dart booster.
Parameter for Dart booster. Type of sampling algorithm. "uniform": dropped trees are selected uniformly. "weighted": dropped trees are selected in proportion to weight. [default="uniform"]

Definition Classes
BoosterParams
def save(path: String): Unit

Definition Classes
MLWritable
Annotations
@Since( "1.6.0" ) @throws( ... )
def saveModelAsHadoopFile(modelPath: String)(implicit sc: SparkContext): Unit

Save the model as to HDFS-compatible file system.
Save the model as to HDFS-compatible file system.
modelPath
The model path as in Hadoop path.
val scalePosWeight: DoubleParam

Control the balance of positive and negative weights, useful for unbalanced classes.
Control the balance of positive and negative weights, useful for unbalanced classes. A typical value to consider: sum(negative cases) / sum(positive cases). [default=1]

Definition Classes
BoosterParams
final def set(paramPair: ParamPair[_]): XGBoostModel.this.type

Attributes
protected
Definition Classes
Params
final def set(param: String, value: Any): XGBoostModel.this.type

Attributes
protected
Definition Classes
Params
final def set[T](param: Param[T], value: T): XGBoostModel.this.type

Definition Classes
Params
final def setDefault(paramPairs: ParamPair[_]*): XGBoostModel.this.type

Attributes
protected
Definition Classes
Params
final def setDefault[T](param: Param[T], value: T): XGBoostModel.this.type

Attributes
protected
Definition Classes
Params
def setExternalMemory(value: Boolean): XGBoostModel
def setFeaturesCol(value: String): XGBoostModel

Definition Classes
PredictionModel
def setLabelCol(name: String): XGBoostModel
def setParent(parent: Estimator[XGBoostModel]): XGBoostModel

Definition Classes
Model
def setPredictionCol(value: String): XGBoostModel

Definition Classes
PredictionModel
val sketchEps: DoubleParam

This is only used for approximate greedy algorithm.
This is only used for approximate greedy algorithm. This roughly translated into O(1 / sketch_eps) number of bins. Compared to directly select number of bins, this comes with theoretical guarantee with sketch accuracy. [default=0.03] range: (0, 1)

Definition Classes
BoosterParams
val skipDrop: DoubleParam

Parameter of Dart booster.
Parameter of Dart booster. probability of skip dropout. If a dropout is skipped, new trees are added in the same manner as gbtree. [default=0.0] range: [0.0, 1.0]

Definition Classes
BoosterParams
val subSample: DoubleParam

subsample ratio of the training instance.
subsample ratio of the training instance. Setting it to 0.5 means that XGBoost randomly collected half of the data instances to grow trees and this will prevent overfitting. [default=1] range:(0,1]

Definition Classes
BoosterParams
def summary: XGBoostTrainingSummary

Returns summary (e.g.
Returns summary (e.g. train/test objective history) of model on the training set. An exception is thrown if no summary is available.
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
Identifiable → AnyRef → Any
def transform(testSet: Dataset[_]): DataFrame

produces the prediction results and append as an additional column in the original dataset NOTE: the prediction results is kept as the original format of xgboost
produces the prediction results and append as an additional column in the original dataset NOTE: the prediction results is kept as the original format of xgboost
returns
the original dataframe with an additional column containing prediction results

Definition Classes
XGBoostModel → PredictionModel → Transformer
def transform(dataset: Dataset[_], paramMap: ParamMap): DataFrame

Definition Classes
Transformer
Annotations
@Since( "2.0.0" )
def transform(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): DataFrame

Definition Classes
Transformer
Annotations
@Since( "2.0.0" ) @varargs()
def transformImpl(dataset: Dataset[_]): DataFrame

Attributes
protected
Definition Classes
PredictionModel
def transformLeaf(testSet: Dataset[_]): DataFrame

append leaf index of each row as an additional column in the original dataset
append leaf index of each row as an additional column in the original dataset
returns
the original dataframe with an additional column containing prediction results
def transformSchema(schema: StructType): StructType

Definition Classes
PredictionModel → PipelineStage
def transformSchema(schema: StructType, logging: Boolean): StructType

Attributes
protected
Definition Classes
PipelineStage
Annotations
@DeveloperApi()
val treeMethod: Param[String]

The tree construction algorithm used in XGBoost.
The tree construction algorithm used in XGBoost. options: {'auto', 'exact', 'approx'} [default='auto']

Definition Classes
BoosterParams
final val useExternalMemory: BooleanParam
def validateAndTransformSchema(schema: StructType, fitting: Boolean, featuresDataType: DataType): StructType

Attributes
protected
Definition Classes
PredictorParams
def version: Int
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
def write: MLWriter

Definition Classes
XGBoostModel → MLWritable

Related Docs: object XGBoostModel | package spark

abstract class XGBoostModel extends PredictionModel[Vector, XGBoostModel] with BoosterParams with Serializable with Params with MLWritable

Instance Constructors

new XGBoostModel(_booster: Booster)

Abstract Value Members

abstract def predict(features: Vector): Double

abstract val uid: String

Concrete Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def $[T](param: Param[T]): T

final def ==(arg0: Any): Boolean

var _booster: Booster

val alpha: DoubleParam

final def asInstanceOf[T0]: T0

def booster: Booster

val boosterType: Param[String]

final def clear(param: Param[_]): XGBoostModel.this.type

def clone(): AnyRef

val colSampleByLevel: DoubleParam

val colSampleByTree: DoubleParam

def copy(extra: ParamMap): XGBoostModel

def copyValues[T <: Params](to: T, extra: ParamMap): T

final def defaultCopy[T <: Params](extra: ParamMap): T

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

val eta: DoubleParam

def eval(evalDataset: RDD[org.apache.spark.ml.feature.LabeledPoint], evalName: String, evalFunc: EvalTrait = null, iter: Int = 1, useExternalCache: Boolean = false, groupData: Seq[Seq[Int]] = null): String

def explainParam(param: Param[_]): String

def explainParams(): String

final def extractParamMap(): ParamMap

final def extractParamMap(extra: ParamMap): ParamMap

final val featuresCol: Param[String]

def featuresDataType: DataType

def finalize(): Unit

val gamma: DoubleParam

final def get[T](param: Param[T]): Option[T]

final def getClass(): Class[_]

final def getDefault[T](param: Param[T]): Option[T]

final def getFeaturesCol: String

final def getLabelCol: String

final def getOrDefault[T](param: Param[T]): T

def getParam(paramName: String): Param[Any]

final def getPredictionCol: String

val growthPolicty: Param[String]

final def hasDefault[T](param: Param[T]): Boolean

def hasParam(paramName: String): Boolean

def hasParent: Boolean

def hashCode(): Int

def initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean

def initializeLogIfNecessary(isInterpreter: Boolean): Unit

final def isDefined(param: Param[_]): Boolean

final def isInstanceOf[T0]: Boolean

final def isSet(param: Param[_]): Boolean

def isTraceEnabled(): Boolean

final val labelCol: Param[String]

val lambda: DoubleParam

val lambdaBias: DoubleParam

def log: Logger

def logDebug(msg: ⇒ String, throwable: Throwable): Unit

def logDebug(msg: ⇒ String): Unit

def logError(msg: ⇒ String, throwable: Throwable): Unit

def logError(msg: ⇒ String): Unit

def logInfo(msg: ⇒ String, throwable: Throwable): Unit

def logInfo(msg: ⇒ String): Unit

def logName: String

def logTrace(msg: ⇒ String, throwable: Throwable): Unit

def logTrace(msg: ⇒ String): Unit

def logWarning(msg: ⇒ String, throwable: Throwable): Unit

def logWarning(msg: ⇒ String): Unit

val maxBins: IntParam

val maxDeltaStep: DoubleParam

val maxDepth: IntParam

val minChildWeight: DoubleParam

final def ne(arg0: AnyRef): Boolean

val normalizeType: Param[String]

final def notify(): Unit

final def notifyAll(): Unit

def numFeatures: Int