Files
MLPproject/.venv/lib/python3.12/site-packages/xgboost/testing/predict.py
2025-10-23 15:44:32 +02:00

97 lines
2.8 KiB
Python

"""Tests for inference."""
from typing import Type
import numpy as np
from scipy.special import logit # pylint: disable=no-name-in-module
from ..core import DMatrix
from ..training import train
from .shared import validate_leaf_output
from .updater import get_basescore
from .utils import Device
# pylint: disable=invalid-name,too-many-locals
def run_predict_leaf(device: Device, DMatrixT: Type[DMatrix]) -> np.ndarray:
"""Run tests for leaf index prediction."""
rows = 100
cols = 4
classes = 5
num_parallel_tree = 4
num_boost_round = 10
rng = np.random.RandomState(1994)
X = rng.randn(rows, cols)
y = rng.randint(low=0, high=classes, size=rows)
m = DMatrixT(X, y)
booster = train(
{
"num_parallel_tree": num_parallel_tree,
"num_class": classes,
"tree_method": "hist",
},
m,
num_boost_round=num_boost_round,
)
booster.set_param({"device": device})
empty = DMatrixT(np.ones(shape=(0, cols)))
empty_leaf = booster.predict(empty, pred_leaf=True)
assert empty_leaf.shape[0] == 0
leaf = booster.predict(m, pred_leaf=True, strict_shape=True)
assert leaf.shape[0] == rows
assert leaf.shape[1] == num_boost_round
assert leaf.shape[2] == classes
assert leaf.shape[3] == num_parallel_tree
validate_leaf_output(leaf, num_parallel_tree)
n_iters = np.int32(2)
sliced = booster.predict(
m,
pred_leaf=True,
iteration_range=(0, n_iters),
strict_shape=True,
)
first = sliced[0, ...]
assert np.prod(first.shape) == classes * num_parallel_tree * n_iters
# When there's only 1 tree, the output is a 1 dim vector
booster = train({"tree_method": "hist"}, num_boost_round=1, dtrain=m)
booster.set_param({"device": device})
assert booster.predict(m, pred_leaf=True).shape == (rows,)
return leaf
def run_base_margin_vs_base_score(device: Device) -> None:
"""Test for the relation between score and margin."""
from sklearn.datasets import make_classification
intercept = 0.5
X, y = make_classification(random_state=2025)
booster = train(
{"base_score": intercept, "objective": "binary:logistic", "device": device},
dtrain=DMatrix(X, y),
num_boost_round=1,
)
np.testing.assert_allclose(get_basescore(booster), intercept)
predt_0 = booster.predict(DMatrix(X, y))
margin = np.full(y.shape, fill_value=logit(intercept), dtype=np.float32)
Xy = DMatrix(X, y, base_margin=margin)
# 0.2 is a dummy value
booster = train(
{"base_score": 0.2, "objective": "binary:logistic", "device": device},
dtrain=Xy,
num_boost_round=1,
)
np.testing.assert_allclose(get_basescore(booster), 0.2)
predt_1 = booster.predict(Xy)
np.testing.assert_allclose(predt_0, predt_1)