MLPproject/.venv/lib/python3.12/site-packages/xgboost/spark/params.py

"""Xgboost pyspark integration submodule for params."""

from typing import Dict

from pyspark.ml.param import TypeConverters
from pyspark.ml.param.shared import Param, Params


class HasArbitraryParamsDict(Params):
    """
    This is a Params based class that is extended by _SparkXGBParams
    and holds the variable to store the **kwargs parts of the XGBoost
    input.
    """

    arbitrary_params_dict: "Param[Dict]" = Param(
        Params._dummy(),
        "arbitrary_params_dict",
        "arbitrary_params_dict This parameter holds all of the additional parameters which are "
        "not exposed as the XGBoost Spark estimator params but can be recognized by "
        "underlying XGBoost library. It is stored as a dictionary.",
    )


class HasBaseMarginCol(Params):
    """
    This is a Params based class that is extended by _SparkXGBParams
    and holds the variable to store the base margin column part of XGboost.
    """

    base_margin_col = Param(
        Params._dummy(),
        "base_margin_col",
        "This stores the name for the column of the base margin",
        typeConverter=TypeConverters.toString,
    )


class HasFeaturesCols(Params):
    """
    Mixin for param features_cols: a list of feature column names.
    This parameter is taken effect only when GPU is enabled.
    """

    features_cols = Param(
        Params._dummy(),
        "features_cols",
        "feature column names.",
        typeConverter=TypeConverters.toListString,
    )

    def __init__(self) -> None:
        super().__init__()
        self._setDefault(features_cols=[])


class HasEnableSparseDataOptim(Params):
    """
    This is a Params based class that is extended by _SparkXGBParams
    and holds the variable to store the boolean config of enabling sparse data optimization.
    """

    enable_sparse_data_optim = Param(
        Params._dummy(),
        "enable_sparse_data_optim",
        "This stores the boolean config of enabling sparse data optimization, if enabled, "
        "Xgboost DMatrix object will be constructed from sparse matrix instead of "
        "dense matrix. This config is disabled by default. If most of examples in your "
        "training dataset contains sparse features, we suggest to enable this config.",
        typeConverter=TypeConverters.toBoolean,
    )

    def __init__(self) -> None:
        super().__init__()
        self._setDefault(enable_sparse_data_optim=False)


class HasQueryIdCol(Params):
    """
    Mixin for param qid_col: query id column name.
    """

    qid_col = Param(
        Params._dummy(),
        "qid_col",
        "query id column name",
        typeConverter=TypeConverters.toString,
    )


class HasContribPredictionCol(Params):
    """
    Mixin for param pred_contrib_col: contribution prediction column name.

    Output is a 3-dim array, with (rows, groups, columns + 1) for classification case.
    Else, it can be a 2 dimension for regression case.
    """

    pred_contrib_col: "Param[str]" = Param(
        Params._dummy(),
        "pred_contrib_col",
        "feature contributions to individual predictions.",
        typeConverter=TypeConverters.toString,
    )