796 lines
22 KiB
Python
796 lines
22 KiB
Python
"""Utilities for defining Python tests. The module is private and subject to frequent
|
|
change without notice.
|
|
|
|
"""
|
|
|
|
# pylint: disable=invalid-name,missing-function-docstring
|
|
import importlib.util
|
|
import os
|
|
import platform
|
|
import queue
|
|
import socket
|
|
import sys
|
|
import threading
|
|
from contextlib import contextmanager
|
|
from io import StringIO
|
|
from platform import system
|
|
from typing import (
|
|
Any,
|
|
Callable,
|
|
Dict,
|
|
Generator,
|
|
List,
|
|
Optional,
|
|
Sequence,
|
|
Set,
|
|
Tuple,
|
|
TypedDict,
|
|
TypeVar,
|
|
Union,
|
|
)
|
|
|
|
import numpy as np
|
|
import pytest
|
|
from scipy import sparse
|
|
|
|
import xgboost as xgb
|
|
from xgboost import RabitTracker
|
|
from xgboost.core import ArrayLike
|
|
from xgboost.sklearn import SklObjective
|
|
|
|
from .._typing import PathLike
|
|
from .data import (
|
|
IteratorForTest,
|
|
get_california_housing,
|
|
get_cancer,
|
|
get_digits,
|
|
get_sparse,
|
|
make_batches,
|
|
make_categorical,
|
|
make_sparse_regression,
|
|
)
|
|
|
|
hypothesis = pytest.importorskip("hypothesis")
|
|
|
|
# pylint:disable=wrong-import-position,wrong-import-order
|
|
from hypothesis import strategies
|
|
from hypothesis.extra.numpy import arrays
|
|
|
|
datasets = pytest.importorskip("sklearn.datasets")
|
|
|
|
PytestSkip = TypedDict("PytestSkip", {"condition": bool, "reason": str})
|
|
|
|
|
|
def has_ipv6() -> bool:
|
|
"""Check whether IPv6 is enabled on this host."""
|
|
# connection error in macos, still need some fixes.
|
|
if system() not in ("Linux", "Windows"):
|
|
return False
|
|
|
|
if socket.has_ipv6:
|
|
try:
|
|
with socket.socket(
|
|
socket.AF_INET6, socket.SOCK_STREAM
|
|
) as server, socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as client:
|
|
server.bind(("::1", 0))
|
|
port = server.getsockname()[1]
|
|
server.listen()
|
|
|
|
client.connect(("::1", port))
|
|
conn, _ = server.accept()
|
|
|
|
client.sendall("abc".encode())
|
|
msg = conn.recv(3).decode()
|
|
# if the code can be executed to this point, the message should be
|
|
# correct.
|
|
assert msg == "abc"
|
|
return True
|
|
except OSError:
|
|
pass
|
|
return False
|
|
|
|
|
|
def no_mod(name: str) -> PytestSkip:
|
|
spec = importlib.util.find_spec(name)
|
|
return {"condition": spec is None, "reason": f"{name} is not installed."}
|
|
|
|
|
|
def no_ipv6() -> PytestSkip:
|
|
"""PyTest skip mark for IPv6."""
|
|
return {"condition": not has_ipv6(), "reason": "IPv6 is required to be enabled."}
|
|
|
|
|
|
def not_linux() -> PytestSkip:
|
|
return {"condition": system() != "Linux", "reason": "Linux is required."}
|
|
|
|
|
|
def no_ubjson() -> PytestSkip:
|
|
return no_mod("ubjson")
|
|
|
|
|
|
def no_sklearn() -> PytestSkip:
|
|
return no_mod("sklearn")
|
|
|
|
|
|
def no_dask() -> PytestSkip:
|
|
return no_mod("dask")
|
|
|
|
|
|
def no_loky() -> PytestSkip:
|
|
return no_mod("loky")
|
|
|
|
|
|
def no_dask_ml() -> PytestSkip:
|
|
if sys.platform.startswith("win"):
|
|
return {"reason": "Unsupported platform.", "condition": True}
|
|
return no_mod("dask_ml")
|
|
|
|
|
|
def no_spark() -> PytestSkip:
|
|
if sys.platform.startswith("win") or sys.platform.startswith("darwin"):
|
|
return {"reason": "Unsupported platform.", "condition": True}
|
|
return no_mod("pyspark")
|
|
|
|
|
|
def no_pandas() -> PytestSkip:
|
|
return no_mod("pandas")
|
|
|
|
|
|
def no_arrow() -> PytestSkip:
|
|
return no_mod("pyarrow")
|
|
|
|
|
|
def no_polars() -> PytestSkip:
|
|
return no_mod("polars")
|
|
|
|
|
|
def no_modin() -> PytestSkip:
|
|
try:
|
|
import modin.pandas as md
|
|
|
|
md.DataFrame([[1, 2.0, True], [2, 3.0, False]], columns=["a", "b", "c"])
|
|
|
|
except ImportError:
|
|
return {"reason": "Failed import modin.", "condition": True}
|
|
return {"reason": "Failed import modin.", "condition": True}
|
|
|
|
|
|
def no_matplotlib() -> PytestSkip:
|
|
reason = "Matplotlib is not installed."
|
|
try:
|
|
import matplotlib.pyplot as _ # noqa
|
|
|
|
return {"condition": False, "reason": reason}
|
|
except ImportError:
|
|
return {"condition": True, "reason": reason}
|
|
|
|
|
|
def no_dask_cuda() -> PytestSkip:
|
|
return no_mod("dask_cuda")
|
|
|
|
|
|
def no_cudf() -> PytestSkip:
|
|
return no_mod("cudf")
|
|
|
|
|
|
def no_cupy() -> PytestSkip:
|
|
skip_cupy = no_mod("cupy")
|
|
if not skip_cupy["condition"] and system() == "Windows":
|
|
import cupy as cp
|
|
|
|
# Cupy might run into issue on Windows due to missing compiler
|
|
try:
|
|
cp.array([1, 2, 3]).sum()
|
|
except Exception: # pylint: disable=broad-except
|
|
skip_cupy["condition"] = True
|
|
return skip_cupy
|
|
|
|
|
|
def no_dask_cudf() -> PytestSkip:
|
|
return no_mod("dask_cudf")
|
|
|
|
|
|
def no_json_schema() -> PytestSkip:
|
|
return no_mod("jsonschema")
|
|
|
|
|
|
def no_graphviz() -> PytestSkip:
|
|
return no_mod("graphviz")
|
|
|
|
|
|
def no_rmm() -> PytestSkip:
|
|
return no_mod("rmm")
|
|
|
|
|
|
def no_multiple(*args: Any) -> PytestSkip:
|
|
condition = False
|
|
reason = ""
|
|
for arg in args:
|
|
condition = condition or arg["condition"]
|
|
if arg["condition"]:
|
|
reason = arg["reason"]
|
|
break
|
|
return {"condition": condition, "reason": reason}
|
|
|
|
|
|
def skip_win() -> PytestSkip:
|
|
return {"reason": "Unsupported platform.", "condition": is_windows()}
|
|
|
|
|
|
def make_regression(
|
|
n_samples: int, n_features: int, use_cupy: bool
|
|
) -> Tuple[ArrayLike, ArrayLike, ArrayLike]:
|
|
"""Make a simple regression dataset."""
|
|
X, y, w = make_batches(n_samples, n_features, 1, use_cupy)
|
|
return X[0], y[0], w[0]
|
|
|
|
|
|
def make_batches_sparse(
|
|
n_samples_per_batch: int, n_features: int, n_batches: int, sparsity: float
|
|
) -> Tuple[List[sparse.csr_matrix], List[np.ndarray], List[np.ndarray]]:
|
|
X = []
|
|
y = []
|
|
w = []
|
|
rng = np.random.RandomState(1994)
|
|
for _ in range(n_batches):
|
|
_X = sparse.random(
|
|
n_samples_per_batch,
|
|
n_features,
|
|
1.0 - sparsity,
|
|
format="csr",
|
|
dtype=np.float32,
|
|
random_state=rng,
|
|
)
|
|
_y = rng.randn(n_samples_per_batch)
|
|
_w = rng.uniform(low=0, high=1, size=n_samples_per_batch)
|
|
X.append(_X)
|
|
y.append(_y)
|
|
w.append(_w)
|
|
return X, y, w
|
|
|
|
|
|
class TestDataset:
|
|
"""Contains a dataset in numpy format as well as the relevant objective and metric."""
|
|
|
|
def __init__(
|
|
self, name: str, get_dataset: Callable, objective: str, metric: str
|
|
) -> None:
|
|
self.name = name
|
|
self.objective = objective
|
|
self.metric = metric
|
|
self.X, self.y = get_dataset()
|
|
self.w: Optional[np.ndarray] = None
|
|
self.margin: Optional[np.ndarray] = None
|
|
|
|
def set_params(self, params_in: Dict[str, Any]) -> Dict[str, Any]:
|
|
params_in["objective"] = self.objective
|
|
params_in["eval_metric"] = self.metric
|
|
if self.objective == "multi:softmax":
|
|
params_in["num_class"] = int(np.max(self.y) + 1)
|
|
return params_in
|
|
|
|
def get_dmat(self) -> xgb.DMatrix:
|
|
return xgb.DMatrix(
|
|
self.X,
|
|
self.y,
|
|
weight=self.w,
|
|
base_margin=self.margin,
|
|
enable_categorical=True,
|
|
)
|
|
|
|
def get_device_dmat(self, max_bin: Optional[int]) -> xgb.QuantileDMatrix:
|
|
import cupy as cp
|
|
|
|
w = None if self.w is None else cp.array(self.w)
|
|
X = cp.array(self.X, dtype=np.float32)
|
|
y = cp.array(self.y, dtype=np.float32)
|
|
return xgb.QuantileDMatrix(
|
|
X, y, weight=w, base_margin=self.margin, max_bin=max_bin
|
|
)
|
|
|
|
def get_external_dmat(self) -> xgb.DMatrix:
|
|
n_samples = self.X.shape[0]
|
|
n_batches = 10
|
|
per_batch = n_samples // n_batches + 1
|
|
|
|
predictor = []
|
|
response = []
|
|
weight = []
|
|
for i in range(n_batches):
|
|
beg = i * per_batch
|
|
end = min((i + 1) * per_batch, n_samples)
|
|
assert end != beg
|
|
X = self.X[beg:end, ...]
|
|
y = self.y[beg:end]
|
|
w = self.w[beg:end] if self.w is not None else None
|
|
predictor.append(X)
|
|
response.append(y)
|
|
if w is not None:
|
|
weight.append(w)
|
|
|
|
it = IteratorForTest(
|
|
predictor,
|
|
response,
|
|
weight if weight else None,
|
|
cache="cache",
|
|
on_host=False,
|
|
)
|
|
return xgb.DMatrix(it)
|
|
|
|
def __repr__(self) -> str:
|
|
return self.name
|
|
|
|
|
|
def make_ltr(
|
|
n_samples: int,
|
|
n_features: int,
|
|
n_query_groups: int,
|
|
max_rel: int,
|
|
sort_qid: bool = True,
|
|
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
|
"""Make a dataset for testing LTR."""
|
|
rng = np.random.default_rng(1994)
|
|
X = rng.normal(0, 1.0, size=n_samples * n_features).reshape(n_samples, n_features)
|
|
y = np.sum(X, axis=1)
|
|
y -= y.min()
|
|
y = np.round(y / y.max() * max_rel).astype(np.int32)
|
|
|
|
qid = rng.integers(0, n_query_groups, size=n_samples, dtype=np.int32)
|
|
w = rng.normal(0, 1.0, size=n_query_groups)
|
|
w -= np.min(w)
|
|
w /= np.max(w)
|
|
if sort_qid:
|
|
qid = np.sort(qid)
|
|
return X, y, qid, w
|
|
|
|
|
|
def _cat_sampled_from() -> strategies.SearchStrategy:
|
|
@strategies.composite
|
|
def _make_cat(draw: Callable) -> Tuple[int, int, int, float]:
|
|
n_samples = draw(strategies.integers(2, 512))
|
|
n_features = draw(strategies.integers(1, 4))
|
|
n_cats = draw(strategies.integers(1, 128))
|
|
sparsity = draw(
|
|
strategies.floats(
|
|
min_value=0,
|
|
max_value=1,
|
|
allow_nan=False,
|
|
allow_infinity=False,
|
|
allow_subnormal=False,
|
|
)
|
|
)
|
|
return n_samples, n_features, n_cats, sparsity
|
|
|
|
def _build(args: Tuple[int, int, int, float]) -> TestDataset:
|
|
n_samples = args[0]
|
|
n_features = args[1]
|
|
n_cats = args[2]
|
|
sparsity = args[3]
|
|
return TestDataset(
|
|
f"{n_samples}x{n_features}-{n_cats}-{sparsity}",
|
|
lambda: make_categorical(
|
|
n_samples, n_features, n_cats, onehot=False, sparsity=sparsity
|
|
),
|
|
"reg:squarederror",
|
|
"rmse",
|
|
)
|
|
|
|
return _make_cat().map(_build) # pylint: disable=no-member
|
|
|
|
|
|
categorical_dataset_strategy: strategies.SearchStrategy = _cat_sampled_from()
|
|
|
|
sparse_datasets_strategy = strategies.sampled_from(
|
|
[
|
|
TestDataset(
|
|
"1e5x8-0.95-csr",
|
|
lambda: make_sparse_regression(int(1e5), 8, 0.95, False),
|
|
"reg:squarederror",
|
|
"rmse",
|
|
),
|
|
TestDataset(
|
|
"1e5x8-0.5-csr",
|
|
lambda: make_sparse_regression(int(1e5), 8, 0.5, False),
|
|
"reg:squarederror",
|
|
"rmse",
|
|
),
|
|
TestDataset(
|
|
"1e5x8-0.5-dense",
|
|
lambda: make_sparse_regression(int(1e5), 8, 0.5, True),
|
|
"reg:squarederror",
|
|
"rmse",
|
|
),
|
|
TestDataset(
|
|
"1e5x8-0.05-csr",
|
|
lambda: make_sparse_regression(int(1e5), 8, 0.05, False),
|
|
"reg:squarederror",
|
|
"rmse",
|
|
),
|
|
TestDataset(
|
|
"1e5x8-0.05-dense",
|
|
lambda: make_sparse_regression(int(1e5), 8, 0.05, True),
|
|
"reg:squarederror",
|
|
"rmse",
|
|
),
|
|
]
|
|
)
|
|
|
|
|
|
def make_datasets_with_margin(
|
|
unweighted_strategy: strategies.SearchStrategy,
|
|
) -> Callable[[], strategies.SearchStrategy[TestDataset]]:
|
|
"""Factory function for creating strategies that generates datasets with weight and
|
|
base margin.
|
|
|
|
"""
|
|
|
|
@strategies.composite
|
|
def weight_margin(draw: Callable) -> TestDataset:
|
|
data: TestDataset = draw(unweighted_strategy)
|
|
if draw(strategies.booleans()):
|
|
data.w = draw(
|
|
arrays(np.float64, (len(data.y)), elements=strategies.floats(0.1, 2.0))
|
|
)
|
|
if draw(strategies.booleans()):
|
|
num_class = 1
|
|
if data.objective == "multi:softmax":
|
|
num_class = int(np.max(data.y) + 1)
|
|
elif data.name.startswith("mtreg"):
|
|
num_class = data.y.shape[1]
|
|
|
|
data.margin = draw(
|
|
arrays(
|
|
np.float64,
|
|
(data.y.shape[0] * num_class),
|
|
elements=strategies.floats(0.5, 1.0),
|
|
)
|
|
)
|
|
assert data.margin is not None
|
|
if num_class != 1:
|
|
data.margin = data.margin.reshape(data.y.shape[0], num_class)
|
|
|
|
return data
|
|
|
|
return weight_margin
|
|
|
|
|
|
# A strategy for drawing from a set of example datasets. May add random weights to the
|
|
# dataset
|
|
def make_dataset_strategy() -> strategies.SearchStrategy[TestDataset]:
|
|
_unweighted_datasets_strategy = strategies.sampled_from(
|
|
[
|
|
TestDataset(
|
|
"calif_housing", get_california_housing, "reg:squarederror", "rmse"
|
|
),
|
|
TestDataset(
|
|
"calif_housing-l1", get_california_housing, "reg:absoluteerror", "mae"
|
|
),
|
|
TestDataset("cancer", get_cancer, "binary:logistic", "logloss"),
|
|
TestDataset("sparse", get_sparse, "reg:squarederror", "rmse"),
|
|
TestDataset("sparse-l1", get_sparse, "reg:absoluteerror", "mae"),
|
|
TestDataset(
|
|
"empty",
|
|
lambda: (np.empty((0, 100)), np.empty(0)),
|
|
"reg:squarederror",
|
|
"rmse",
|
|
),
|
|
]
|
|
)
|
|
return make_datasets_with_margin(_unweighted_datasets_strategy)()
|
|
|
|
|
|
_unweighted_multi_datasets_strategy = strategies.sampled_from(
|
|
[
|
|
TestDataset("digits", get_digits, "multi:softmax", "mlogloss"),
|
|
TestDataset(
|
|
"mtreg",
|
|
lambda: datasets.make_regression(n_samples=128, n_features=2, n_targets=3),
|
|
"reg:squarederror",
|
|
"rmse",
|
|
),
|
|
TestDataset(
|
|
"mtreg-l1",
|
|
lambda: datasets.make_regression(n_samples=128, n_features=2, n_targets=3),
|
|
"reg:absoluteerror",
|
|
"mae",
|
|
),
|
|
]
|
|
)
|
|
|
|
# A strategy for drawing from a set of multi-target/multi-class datasets.
|
|
multi_dataset_strategy = make_datasets_with_margin(
|
|
_unweighted_multi_datasets_strategy
|
|
)()
|
|
|
|
|
|
def non_increasing(L: Sequence[float], tolerance: float = 1e-4) -> bool:
|
|
return all((y - x) < tolerance for x, y in zip(L, L[1:]))
|
|
|
|
|
|
def non_decreasing(L: Sequence[float], tolerance: float = 1e-4) -> bool:
|
|
return all((y - x) >= -tolerance for x, y in zip(L, L[1:]))
|
|
|
|
|
|
M = TypeVar("M", xgb.Booster, xgb.XGBModel)
|
|
|
|
|
|
def logregobj(preds: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[np.ndarray, np.ndarray]:
|
|
"""Binary regression custom objective."""
|
|
labels = dtrain.get_label()
|
|
preds = 1.0 / (1.0 + np.exp(-preds))
|
|
grad = preds - labels
|
|
hess = preds * (1.0 - preds)
|
|
return grad, hess
|
|
|
|
|
|
def eval_error_metric(
|
|
predt: np.ndarray, dtrain: xgb.DMatrix, rev_link: bool
|
|
) -> Tuple[str, np.float64]:
|
|
"""Evaluation metric for xgb.train.
|
|
|
|
Parameters
|
|
----------
|
|
rev_link : Whether the metric needs to apply the reverse link function (activation).
|
|
|
|
"""
|
|
label = dtrain.get_label()
|
|
if rev_link:
|
|
predt = 1.0 / (1.0 + np.exp(-predt))
|
|
assert (0.0 <= predt).all() and (predt <= 1.0).all()
|
|
r = np.zeros(predt.shape)
|
|
gt = predt > 0.5
|
|
if predt.size == 0:
|
|
return "CustomErr", np.float64(0.0)
|
|
r[gt] = 1 - label[gt]
|
|
le = predt <= 0.5
|
|
r[le] = label[le]
|
|
return "CustomErr", np.sum(r)
|
|
|
|
|
|
def eval_error_metric_skl(
|
|
y_true: np.ndarray, y_score: np.ndarray, rev_link: bool = False
|
|
) -> np.float64:
|
|
"""Evaluation metric that looks like metrics provided by sklearn."""
|
|
|
|
if rev_link:
|
|
y_score = 1.0 / (1.0 + np.exp(-y_score))
|
|
assert (0.0 <= y_score).all() and (y_score <= 1.0).all()
|
|
|
|
r = np.zeros(y_score.shape)
|
|
gt = y_score > 0.5
|
|
r[gt] = 1 - y_true[gt]
|
|
le = y_score <= 0.5
|
|
r[le] = y_true[le]
|
|
return np.sum(r)
|
|
|
|
|
|
def root_mean_square(y_true: np.ndarray, y_score: np.ndarray) -> float:
|
|
err = y_score - y_true
|
|
rmse = np.sqrt(np.dot(err, err) / y_score.size)
|
|
return rmse
|
|
|
|
|
|
def softmax(x: np.ndarray) -> np.ndarray:
|
|
e = np.exp(x)
|
|
return e / np.sum(e)
|
|
|
|
|
|
def softprob_obj(
|
|
classes: int, use_cupy: bool = False, order: str = "C", gdtype: str = "float32"
|
|
) -> SklObjective:
|
|
"""Custom softprob objective for testing.
|
|
|
|
Parameters
|
|
----------
|
|
use_cupy :
|
|
Whether the objective should return cupy arrays.
|
|
order :
|
|
The order of gradient matrices. "C" or "F".
|
|
gdtype :
|
|
DType for gradient. Hessian is not set. This is for testing asymmetric types.
|
|
"""
|
|
if use_cupy:
|
|
import cupy as backend
|
|
else:
|
|
backend = np
|
|
|
|
def objective(
|
|
labels: backend.ndarray, predt: backend.ndarray
|
|
) -> Tuple[backend.ndarray, backend.ndarray]:
|
|
rows = labels.shape[0]
|
|
grad = backend.zeros((rows, classes), dtype=np.float32)
|
|
hess = backend.zeros((rows, classes), dtype=np.float32)
|
|
eps = 1e-6
|
|
for r in range(predt.shape[0]):
|
|
target = labels[r]
|
|
p = softmax(predt[r, :])
|
|
for c in range(predt.shape[1]):
|
|
assert target >= 0 or target <= classes
|
|
g = p[c] - 1.0 if c == target else p[c]
|
|
h = max((2.0 * p[c] * (1.0 - p[c])).item(), eps)
|
|
grad[r, c] = g
|
|
hess[r, c] = h
|
|
|
|
grad = grad.reshape((rows, classes))
|
|
hess = hess.reshape((rows, classes))
|
|
grad = backend.require(grad, requirements=order, dtype=gdtype)
|
|
hess = backend.require(hess, requirements=order)
|
|
return grad, hess
|
|
|
|
return objective
|
|
|
|
|
|
def ls_obj(
|
|
y_true: np.ndarray, y_pred: np.ndarray, sample_weight: Optional[np.ndarray] = None
|
|
) -> Tuple[np.ndarray, np.ndarray]:
|
|
"""Least squared error."""
|
|
grad = y_pred - y_true
|
|
hess = np.ones(len(y_true))
|
|
if sample_weight is not None:
|
|
grad *= sample_weight
|
|
hess *= sample_weight
|
|
return grad, hess
|
|
|
|
|
|
class DirectoryExcursion:
|
|
"""Change directory. Change back and optionally cleaning up the directory when
|
|
exit.
|
|
|
|
"""
|
|
|
|
def __init__(self, path: PathLike, cleanup: bool = False):
|
|
self.path = path
|
|
self.curdir = os.path.normpath(os.path.abspath(os.path.curdir))
|
|
self.cleanup = cleanup
|
|
self.files: Set[str] = set()
|
|
|
|
def __enter__(self) -> None:
|
|
os.chdir(self.path)
|
|
if self.cleanup:
|
|
self.files = {
|
|
os.path.join(root, f)
|
|
for root, subdir, files in os.walk(os.path.expanduser(self.path))
|
|
for f in files
|
|
}
|
|
|
|
def __exit__(self, *args: Any) -> None:
|
|
os.chdir(self.curdir)
|
|
if self.cleanup:
|
|
files = {
|
|
os.path.join(root, f)
|
|
for root, subdir, files in os.walk(os.path.expanduser(self.path))
|
|
for f in files
|
|
}
|
|
diff = files.difference(self.files)
|
|
for f in diff:
|
|
os.remove(f)
|
|
|
|
|
|
@contextmanager
|
|
def captured_output() -> Generator[Tuple[StringIO, StringIO], None, None]:
|
|
"""Reassign stdout temporarily in order to test printed statements
|
|
Taken from:
|
|
https://stackoverflow.com/questions/4219717/how-to-assert-output-with-nosetest-unittest-in-python
|
|
|
|
Also works for pytest.
|
|
|
|
"""
|
|
new_out, new_err = StringIO(), StringIO()
|
|
old_out, old_err = sys.stdout, sys.stderr
|
|
try:
|
|
sys.stdout, sys.stderr = new_out, new_err
|
|
yield sys.stdout, sys.stderr
|
|
finally:
|
|
sys.stdout, sys.stderr = old_out, old_err
|
|
|
|
|
|
def timeout(sec: int, *args: Any, enable: bool = True, **kwargs: Any) -> Any:
|
|
"""Make a pytest mark for the `pytest-timeout` package.
|
|
|
|
Parameters
|
|
----------
|
|
sec :
|
|
Timeout seconds.
|
|
enable :
|
|
Control whether timeout should be applied, used for debugging.
|
|
|
|
Returns
|
|
-------
|
|
pytest.mark.timeout
|
|
"""
|
|
|
|
if enable:
|
|
return pytest.mark.timeout(sec, *args, **kwargs)
|
|
return pytest.mark.timeout(None, *args, **kwargs)
|
|
|
|
|
|
def setup_rmm_pool(_: Any, pytestconfig: pytest.Config) -> None:
|
|
if pytestconfig.getoption("--use-rmm-pool"):
|
|
if no_rmm()["condition"]:
|
|
raise ImportError("The --use-rmm-pool option requires the RMM package")
|
|
if no_dask_cuda()["condition"]:
|
|
raise ImportError(
|
|
"The --use-rmm-pool option requires the dask_cuda package"
|
|
)
|
|
import rmm
|
|
from dask_cuda.utils import get_n_gpus
|
|
|
|
rmm.reinitialize(
|
|
pool_allocator=True,
|
|
initial_pool_size=1024 * 1024 * 1024,
|
|
devices=list(range(get_n_gpus())),
|
|
)
|
|
|
|
|
|
def demo_dir(path: str) -> str:
|
|
"""Look for the demo directory based on the test file name."""
|
|
path = normpath(os.path.dirname(path))
|
|
while True:
|
|
subdirs = [f.path for f in os.scandir(path) if f.is_dir()]
|
|
subdirs = [os.path.basename(d) for d in subdirs]
|
|
if "demo" in subdirs:
|
|
return os.path.join(path, "demo")
|
|
new_path = normpath(os.path.join(path, os.path.pardir))
|
|
assert new_path != path
|
|
path = new_path
|
|
|
|
|
|
def normpath(path: str) -> str:
|
|
return os.path.normpath(os.path.abspath(path))
|
|
|
|
|
|
def data_dir(path: str) -> str:
|
|
return os.path.join(demo_dir(path), "data")
|
|
|
|
|
|
def load_agaricus(path: str) -> Tuple[xgb.DMatrix, xgb.DMatrix]:
|
|
dpath = data_dir(path)
|
|
dtrain = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.train?format=libsvm"))
|
|
dtest = xgb.DMatrix(os.path.join(dpath, "agaricus.txt.test?format=libsvm"))
|
|
return dtrain, dtest
|
|
|
|
|
|
def project_root(path: str) -> str:
|
|
return normpath(os.path.join(demo_dir(path), os.path.pardir))
|
|
|
|
|
|
def run_with_rabit(
|
|
world_size: int, test_fn: Callable[..., Any], *args: Any, **kwargs: Any
|
|
) -> None:
|
|
exception_queue: queue.Queue = queue.Queue()
|
|
|
|
def run_worker(rabit_env: Dict[str, Union[str, int]]) -> None:
|
|
try:
|
|
with xgb.collective.CommunicatorContext(**rabit_env):
|
|
test_fn(*args, **kwargs)
|
|
except Exception as e: # pylint: disable=broad-except
|
|
exception_queue.put(e)
|
|
|
|
tracker = RabitTracker(host_ip="127.0.0.1", n_workers=world_size)
|
|
tracker.start()
|
|
|
|
workers = []
|
|
for _ in range(world_size):
|
|
worker = threading.Thread(target=run_worker, args=(tracker.worker_args(),))
|
|
workers.append(worker)
|
|
worker.start()
|
|
for worker in workers:
|
|
worker.join()
|
|
assert exception_queue.empty(), f"Worker failed: {exception_queue.get()}"
|
|
|
|
tracker.wait_for()
|
|
|
|
|
|
def column_split_feature_names(
|
|
feature_names: List[Union[str, int]], world_size: int
|
|
) -> List[str]:
|
|
"""Get the global list of feature names from the local feature names."""
|
|
return [
|
|
f"{rank}.{feature}" for rank in range(world_size) for feature in feature_names
|
|
]
|
|
|
|
|
|
def is_windows() -> bool:
|
|
"""Check if the current platform is Windows."""
|
|
return platform.system() == "Windows"
|