Files
MLPproject/.venv/lib/python3.12/site-packages/xgboost/testing/metrics.py
2025-10-23 15:44:32 +02:00

271 lines
8.5 KiB
Python

"""Tests for evaluation metrics."""
from typing import Dict, List
import numpy as np
import pytest
from ..compat import concat
from ..core import DMatrix, QuantileDMatrix, _parse_eval_str
from ..sklearn import XGBClassifier, XGBRanker
from ..training import train
from .utils import Device
def check_precision_score( # pylint: disable=too-many-locals
tree_method: str, device: Device
) -> None:
"""Test for precision with ranking and classification."""
datasets = pytest.importorskip("sklearn.datasets")
X, y = datasets.make_classification(
n_samples=1024, n_features=4, n_classes=2, random_state=2023
)
qid = np.zeros(shape=y.shape) # same group
ltr = XGBRanker(n_estimators=2, tree_method=tree_method, device=device)
ltr.fit(X, y, qid=qid)
# re-generate so that XGBoost doesn't evaluate the result to 1.0
X, y = datasets.make_classification(
n_samples=512, n_features=4, n_classes=2, random_state=1994
)
ltr.set_params(eval_metric="pre@32")
result = _parse_eval_str(ltr.get_booster().eval_set(evals=[(DMatrix(X, y), "Xy")]))
score_0 = result[1][1]
X_list = []
y_list = []
n_query_groups = 3
q_list: List[np.ndarray] = []
for i in range(n_query_groups):
# same for all groups
X, y = datasets.make_classification(
n_samples=512, n_features=4, n_classes=2, random_state=1994
)
X_list.append(X)
y_list.append(y)
q = np.full(shape=y.shape, fill_value=i, dtype=np.uint64)
q_list.append(q)
qid = concat(q_list)
X = concat(X_list)
y = concat(y_list)
result = _parse_eval_str(
ltr.get_booster().eval_set(evals=[(DMatrix(X, y, qid=qid), "Xy")])
)
assert result[1][0].endswith("pre@32")
score_1 = result[1][1]
assert score_1 == score_0
def check_quantile_error(tree_method: str, device: Device) -> None:
"""Test for the `quantile` loss."""
from sklearn.datasets import make_regression
from sklearn.metrics import mean_pinball_loss
rng = np.random.RandomState(19)
# pylint: disable=unbalanced-tuple-unpacking
X, y = make_regression(128, 3, random_state=rng)
Xy = QuantileDMatrix(X, y)
evals_result: Dict[str, Dict] = {}
booster = train(
{
"tree_method": tree_method,
"eval_metric": "quantile",
"quantile_alpha": 0.3,
"device": device,
},
Xy,
evals=[(Xy, "Train")],
evals_result=evals_result,
)
predt = booster.inplace_predict(X)
loss = mean_pinball_loss(y, predt, alpha=0.3)
np.testing.assert_allclose(evals_result["Train"]["quantile"][-1], loss)
alpha = [0.25, 0.5, 0.75]
booster = train(
{
"tree_method": tree_method,
"eval_metric": "quantile",
"quantile_alpha": alpha,
"objective": "reg:quantileerror",
"device": device,
},
Xy,
evals=[(Xy, "Train")],
evals_result=evals_result,
)
predt = booster.inplace_predict(X)
loss = np.mean(
[mean_pinball_loss(y, predt[:, i], alpha=alpha[i]) for i in range(3)]
)
np.testing.assert_allclose(evals_result["Train"]["quantile"][-1], loss)
def run_roc_auc_binary(tree_method: str, n_samples: int, device: Device) -> None:
"""TestROC AUC metric on a binary classification problem."""
from sklearn.datasets import make_classification
from sklearn.metrics import roc_auc_score
rng = np.random.RandomState(1994)
n_features = 10
X, y = make_classification(
n_samples,
n_features,
n_informative=n_features,
n_redundant=0,
random_state=rng,
)
Xy = DMatrix(X, y)
booster = train(
{
"tree_method": tree_method,
"device": device,
"eval_metric": "auc",
"objective": "binary:logistic",
},
Xy,
num_boost_round=1,
)
score = booster.predict(Xy)
skl_auc = roc_auc_score(y, score)
auc = float(booster.eval(Xy).split(":")[1])
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
X = rng.randn(*X.shape)
score = booster.predict(DMatrix(X))
skl_auc = roc_auc_score(y, score)
auc = float(booster.eval(DMatrix(X, y)).split(":")[1])
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
def run_pr_auc_multi(tree_method: str, device: Device) -> None:
"""Test for PR AUC metric on a multi-class classification problem."""
from sklearn.datasets import make_classification
X, y = make_classification(64, 16, n_informative=8, n_classes=3, random_state=1994)
clf = XGBClassifier(
tree_method=tree_method, n_estimators=1, eval_metric="aucpr", device=device
)
clf.fit(X, y, eval_set=[(X, y)])
evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
# No available implementation for comparison, just check that XGBoost converges
# to 1.0
clf = XGBClassifier(
tree_method=tree_method, n_estimators=10, eval_metric="aucpr", device=device
)
clf.fit(X, y, eval_set=[(X, y)])
evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
np.testing.assert_allclose(1.0, evals_result, rtol=1e-2)
def run_roc_auc_multi( # pylint: disable=too-many-locals
tree_method: str, n_samples: int, weighted: bool, device: Device
) -> None:
"""Test for ROC AUC metric on a multi-class classification problem."""
from sklearn.datasets import make_classification
from sklearn.metrics import roc_auc_score
rng = np.random.RandomState(1994)
n_features = 10
n_classes = 4
X, y = make_classification(
n_samples,
n_features,
n_informative=n_features,
n_redundant=0,
n_classes=n_classes,
random_state=rng,
)
if weighted:
weights = rng.randn(n_samples)
weights -= weights.min()
weights /= weights.max()
else:
weights = None
Xy = DMatrix(X, y, weight=weights)
booster = train(
{
"tree_method": tree_method,
"eval_metric": "auc",
"objective": "multi:softprob",
"num_class": n_classes,
"device": device,
},
Xy,
num_boost_round=1,
)
score = booster.predict(Xy)
skl_auc = roc_auc_score(
y, score, average="weighted", sample_weight=weights, multi_class="ovr"
)
auc = float(booster.eval(Xy).split(":")[1])
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
X = rng.randn(*X.shape)
score = booster.predict(DMatrix(X, weight=weights))
skl_auc = roc_auc_score(
y, score, average="weighted", sample_weight=weights, multi_class="ovr"
)
auc = float(booster.eval(DMatrix(X, y, weight=weights)).split(":")[1])
np.testing.assert_allclose(skl_auc, auc, rtol=1e-5)
def run_pr_auc_ltr(tree_method: str, device: Device) -> None:
"""Test for PR AUC metric on a ranking problem."""
from sklearn.datasets import make_classification
X, y = make_classification(128, 4, n_classes=2, random_state=1994)
ltr = XGBRanker(
tree_method=tree_method,
n_estimators=16,
objective="rank:pairwise",
eval_metric="aucpr",
device=device,
)
groups = np.array([32, 32, 64])
ltr.fit(
X,
y,
group=groups,
eval_set=[(X, y)],
eval_group=[groups],
)
results = ltr.evals_result()["validation_0"]["aucpr"]
assert results[-1] >= 0.99
def run_pr_auc_binary(tree_method: str, device: Device) -> None:
"""Test for PR AUC metric on a binary classification problem."""
from sklearn.datasets import make_classification
from sklearn.metrics import auc, precision_recall_curve
X, y = make_classification(128, 4, n_classes=2, random_state=1994)
clf = XGBClassifier(
tree_method=tree_method, n_estimators=1, eval_metric="aucpr", device=device
)
clf.fit(X, y, eval_set=[(X, y)])
evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
y_score = clf.predict_proba(X)[:, 1] # get the positive column
precision, recall, _ = precision_recall_curve(y, y_score)
prauc = auc(recall, precision)
# Interpolation results are slightly different from sklearn, but overall should
# be similar.
np.testing.assert_allclose(prauc, evals_result, rtol=1e-2)
clf = XGBClassifier(
tree_method=tree_method, n_estimators=10, eval_metric="aucpr", device=device
)
clf.fit(X, y, eval_set=[(X, y)])
evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
np.testing.assert_allclose(0.99, evals_result, rtol=1e-2)