89 lines
2.9 KiB
Python
89 lines
2.9 KiB
Python
"""Tests for interaction constraints."""
|
|
|
|
from typing import Optional, Sequence, Union
|
|
|
|
import numpy as np
|
|
|
|
from .._typing import FeatureNames
|
|
from ..core import DMatrix
|
|
from ..training import train
|
|
from .utils import Device
|
|
|
|
|
|
def run_interaction_constraints( # pylint: disable=too-many-locals
|
|
tree_method: str,
|
|
device: Device,
|
|
feature_names: Optional[FeatureNames] = None,
|
|
interaction_constraints: Union[str, Sequence] = "[[0, 1]]",
|
|
) -> None:
|
|
"""Tests interaction constraints on a synthetic dataset."""
|
|
x1 = np.random.normal(loc=1.0, scale=1.0, size=1000)
|
|
x2 = np.random.normal(loc=1.0, scale=1.0, size=1000)
|
|
x3 = np.random.choice([1, 2, 3], size=1000, replace=True)
|
|
y = (
|
|
x1
|
|
+ x2
|
|
+ x3
|
|
+ x1 * x2 * x3
|
|
+ np.random.normal(loc=0.001, scale=1.0, size=1000)
|
|
+ 3 * np.sin(x1)
|
|
)
|
|
X = np.column_stack((x1, x2, x3))
|
|
dtrain = DMatrix(X, label=y, feature_names=feature_names)
|
|
|
|
params = {
|
|
"max_depth": 3,
|
|
"eta": 0.1,
|
|
"nthread": 2,
|
|
"interaction_constraints": interaction_constraints,
|
|
"tree_method": tree_method,
|
|
"device": device,
|
|
}
|
|
num_boost_round = 12
|
|
# Fit a model that only allows interaction between x1 and x2
|
|
bst = train(params, dtrain, num_boost_round, evals=[(dtrain, "train")])
|
|
|
|
# Set all observations to have the same x3 values then increment by the same amount
|
|
def f(x: int) -> np.ndarray:
|
|
tmat = DMatrix(
|
|
np.column_stack((x1, x2, np.repeat(x, 1000))), feature_names=feature_names
|
|
)
|
|
return bst.predict(tmat)
|
|
|
|
preds = [f(x) for x in [1, 2, 3]]
|
|
|
|
# Check incrementing x3 has the same effect on all observations
|
|
# since x3 is constrained to be independent of x1 and x2
|
|
# and all observations start off from the same x3 value
|
|
diff1 = preds[1] - preds[0]
|
|
assert np.all(np.abs(diff1 - diff1[0]) < 1e-4)
|
|
diff2 = preds[2] - preds[1]
|
|
assert np.all(np.abs(diff2 - diff2[0]) < 1e-4)
|
|
|
|
|
|
def training_accuracy(tree_method: str, dpath: str, device: Device) -> None:
|
|
"""Test accuracy, reused by GPU tests."""
|
|
from sklearn.metrics import accuracy_score
|
|
|
|
dtrain = DMatrix(dpath + "agaricus.txt.train?indexing_mode=1&format=libsvm")
|
|
dtest = DMatrix(dpath + "agaricus.txt.test?indexing_mode=1&format=libsvm")
|
|
params = {
|
|
"eta": 1,
|
|
"max_depth": 6,
|
|
"objective": "binary:logistic",
|
|
"tree_method": tree_method,
|
|
"device": device,
|
|
"interaction_constraints": "[[1,2], [2,3,4]]",
|
|
}
|
|
num_boost_round = 5
|
|
|
|
params["grow_policy"] = "lossguide"
|
|
bst = train(params, dtrain, num_boost_round)
|
|
pred_dtest = bst.predict(dtest) < 0.5
|
|
assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1
|
|
|
|
params["grow_policy"] = "depthwise"
|
|
bst = train(params, dtrain, num_boost_round)
|
|
pred_dtest = bst.predict(dtest) < 0.5
|
|
assert accuracy_score(dtest.get_label(), pred_dtest) < 0.1
|