Files
MLPproject/.venv/lib/python3.12/site-packages/xgboost/spark/params.py
2025-10-23 15:44:32 +02:00

105 lines
3.1 KiB
Python

"""Xgboost pyspark integration submodule for params."""
from typing import Dict
from pyspark.ml.param import TypeConverters
from pyspark.ml.param.shared import Param, Params
class HasArbitraryParamsDict(Params):
"""
This is a Params based class that is extended by _SparkXGBParams
and holds the variable to store the **kwargs parts of the XGBoost
input.
"""
arbitrary_params_dict: "Param[Dict]" = Param(
Params._dummy(),
"arbitrary_params_dict",
"arbitrary_params_dict This parameter holds all of the additional parameters which are "
"not exposed as the XGBoost Spark estimator params but can be recognized by "
"underlying XGBoost library. It is stored as a dictionary.",
)
class HasBaseMarginCol(Params):
"""
This is a Params based class that is extended by _SparkXGBParams
and holds the variable to store the base margin column part of XGboost.
"""
base_margin_col = Param(
Params._dummy(),
"base_margin_col",
"This stores the name for the column of the base margin",
typeConverter=TypeConverters.toString,
)
class HasFeaturesCols(Params):
"""
Mixin for param features_cols: a list of feature column names.
This parameter is taken effect only when GPU is enabled.
"""
features_cols = Param(
Params._dummy(),
"features_cols",
"feature column names.",
typeConverter=TypeConverters.toListString,
)
def __init__(self) -> None:
super().__init__()
self._setDefault(features_cols=[])
class HasEnableSparseDataOptim(Params):
"""
This is a Params based class that is extended by _SparkXGBParams
and holds the variable to store the boolean config of enabling sparse data optimization.
"""
enable_sparse_data_optim = Param(
Params._dummy(),
"enable_sparse_data_optim",
"This stores the boolean config of enabling sparse data optimization, if enabled, "
"Xgboost DMatrix object will be constructed from sparse matrix instead of "
"dense matrix. This config is disabled by default. If most of examples in your "
"training dataset contains sparse features, we suggest to enable this config.",
typeConverter=TypeConverters.toBoolean,
)
def __init__(self) -> None:
super().__init__()
self._setDefault(enable_sparse_data_optim=False)
class HasQueryIdCol(Params):
"""
Mixin for param qid_col: query id column name.
"""
qid_col = Param(
Params._dummy(),
"qid_col",
"query id column name",
typeConverter=TypeConverters.toString,
)
class HasContribPredictionCol(Params):
"""
Mixin for param pred_contrib_col: contribution prediction column name.
Output is a 3-dim array, with (rows, groups, columns + 1) for classification case.
Else, it can be a 2 dimension for regression case.
"""
pred_contrib_col: "Param[str]" = Param(
Params._dummy(),
"pred_contrib_col",
"feature contributions to individual predictions.",
typeConverter=TypeConverters.toString,
)