105 lines
3.1 KiB
Python
105 lines
3.1 KiB
Python
"""Xgboost pyspark integration submodule for params."""
|
|
|
|
from typing import Dict
|
|
|
|
from pyspark.ml.param import TypeConverters
|
|
from pyspark.ml.param.shared import Param, Params
|
|
|
|
|
|
class HasArbitraryParamsDict(Params):
|
|
"""
|
|
This is a Params based class that is extended by _SparkXGBParams
|
|
and holds the variable to store the **kwargs parts of the XGBoost
|
|
input.
|
|
"""
|
|
|
|
arbitrary_params_dict: "Param[Dict]" = Param(
|
|
Params._dummy(),
|
|
"arbitrary_params_dict",
|
|
"arbitrary_params_dict This parameter holds all of the additional parameters which are "
|
|
"not exposed as the XGBoost Spark estimator params but can be recognized by "
|
|
"underlying XGBoost library. It is stored as a dictionary.",
|
|
)
|
|
|
|
|
|
class HasBaseMarginCol(Params):
|
|
"""
|
|
This is a Params based class that is extended by _SparkXGBParams
|
|
and holds the variable to store the base margin column part of XGboost.
|
|
"""
|
|
|
|
base_margin_col = Param(
|
|
Params._dummy(),
|
|
"base_margin_col",
|
|
"This stores the name for the column of the base margin",
|
|
typeConverter=TypeConverters.toString,
|
|
)
|
|
|
|
|
|
class HasFeaturesCols(Params):
|
|
"""
|
|
Mixin for param features_cols: a list of feature column names.
|
|
This parameter is taken effect only when GPU is enabled.
|
|
"""
|
|
|
|
features_cols = Param(
|
|
Params._dummy(),
|
|
"features_cols",
|
|
"feature column names.",
|
|
typeConverter=TypeConverters.toListString,
|
|
)
|
|
|
|
def __init__(self) -> None:
|
|
super().__init__()
|
|
self._setDefault(features_cols=[])
|
|
|
|
|
|
class HasEnableSparseDataOptim(Params):
|
|
"""
|
|
This is a Params based class that is extended by _SparkXGBParams
|
|
and holds the variable to store the boolean config of enabling sparse data optimization.
|
|
"""
|
|
|
|
enable_sparse_data_optim = Param(
|
|
Params._dummy(),
|
|
"enable_sparse_data_optim",
|
|
"This stores the boolean config of enabling sparse data optimization, if enabled, "
|
|
"Xgboost DMatrix object will be constructed from sparse matrix instead of "
|
|
"dense matrix. This config is disabled by default. If most of examples in your "
|
|
"training dataset contains sparse features, we suggest to enable this config.",
|
|
typeConverter=TypeConverters.toBoolean,
|
|
)
|
|
|
|
def __init__(self) -> None:
|
|
super().__init__()
|
|
self._setDefault(enable_sparse_data_optim=False)
|
|
|
|
|
|
class HasQueryIdCol(Params):
|
|
"""
|
|
Mixin for param qid_col: query id column name.
|
|
"""
|
|
|
|
qid_col = Param(
|
|
Params._dummy(),
|
|
"qid_col",
|
|
"query id column name",
|
|
typeConverter=TypeConverters.toString,
|
|
)
|
|
|
|
|
|
class HasContribPredictionCol(Params):
|
|
"""
|
|
Mixin for param pred_contrib_col: contribution prediction column name.
|
|
|
|
Output is a 3-dim array, with (rows, groups, columns + 1) for classification case.
|
|
Else, it can be a 2 dimension for regression case.
|
|
"""
|
|
|
|
pred_contrib_col: "Param[str]" = Param(
|
|
Params._dummy(),
|
|
"pred_contrib_col",
|
|
"feature contributions to individual predictions.",
|
|
typeConverter=TypeConverters.toString,
|
|
)
|