296 lines
13 KiB
Python
296 lines
13 KiB
Python
from __future__ import print_function
|
|
|
|
import os
|
|
from copy import copy
|
|
from enum import Enum
|
|
|
|
from .. import CatBoostError, CatBoost
|
|
from .evaluation_result import EvaluationResults, MetricEvaluationResult
|
|
from ._fold_models_handler import FoldModelsHandler
|
|
from ._readers import _SimpleStreamingFileReader
|
|
from ._splitter import _Splitter
|
|
from .execution_case import ExecutionCase
|
|
from .factor_utils import LabelMode, FactorUtils
|
|
|
|
|
|
class EvalType(Enum):
|
|
"""
|
|
Type of feature evaluation:
|
|
All: All factors presented
|
|
SeqRem: Each factor while other presented
|
|
SeqAdd: Each factor while other removed
|
|
SeqAddAndAll: SeqAdd + All
|
|
"""
|
|
All = 'All'
|
|
SeqRem = 'SeqRem'
|
|
SeqAdd = 'SeqAdd'
|
|
SeqAddAndAll = 'SeqAddAndAll'
|
|
|
|
|
|
class CatboostEvaluation(object):
|
|
|
|
def __init__(self,
|
|
path_to_dataset,
|
|
fold_size,
|
|
fold_count,
|
|
column_description,
|
|
fold_offset=0,
|
|
group_column=None,
|
|
working_dir=None,
|
|
remove_models=True,
|
|
delimiter='\t',
|
|
has_header=False,
|
|
partition_random_seed=0,
|
|
min_fold_count=1):
|
|
"""
|
|
Args:
|
|
:param path_to_dataset: (str) Path to the dataset to be used for evaluation.
|
|
:param fold_size: (int) Size of the folds in cross-validation.
|
|
:param fold_count: (int) Number of times we get a new fold, learn a model and check results as if
|
|
there wouldn't be any offset. If there'are some offset it means that the real count of folds will
|
|
be smaller.
|
|
:param column_description: (str) Path to the file where column description is placed.
|
|
:param fold_offset: (int) Number of folds we skip before begin to make cross-validation.
|
|
:param group_column: (int) GroupId column index in the dataset file.
|
|
'None' value means absence of grouping information in the dataset (it's the default).
|
|
:param working_dir: Working dir for temporary files
|
|
:param remove_models: (bool) Set true if you want models to be removed after applying them.
|
|
:param delimiter: (str) Field delimiter used in dataset files.
|
|
:param has_header: (bool) Set true if you want to skip first line in dataset files.
|
|
:param partition_random_seed: (int) The seed for random value generator used for getting permutations for
|
|
cross-validation.
|
|
:param min_fold_count: (int) Minimun amount of folds dataset can be cut to.
|
|
"""
|
|
import os.path
|
|
|
|
self._current_dir = os.getcwd()
|
|
self._path_to_dataset = os.path.join(self._current_dir, path_to_dataset)
|
|
self._column_description = os.path.join(self._current_dir,
|
|
column_description) if column_description is not None else None
|
|
self._fold_offset = fold_offset
|
|
self._fold_count = fold_count
|
|
self._fold_size = fold_size
|
|
self._delimiter = delimiter
|
|
self._has_header = has_header
|
|
self._seed = partition_random_seed
|
|
self._min_fold_count = int(min_fold_count)
|
|
self._remove_models = remove_models
|
|
|
|
if group_column is not None:
|
|
self._group_feature_num = int(group_column)
|
|
else:
|
|
self._group_feature_num = group_column
|
|
|
|
if working_dir is None:
|
|
import tempfile
|
|
self._working_dir = tempfile.mkdtemp()
|
|
else:
|
|
self._working_dir = working_dir
|
|
|
|
def __go_to_working_dir(self):
|
|
current = os.getcwd()
|
|
os.chdir(self._working_dir)
|
|
return current
|
|
|
|
@staticmethod
|
|
def _create_eval_feature_cases(params, features_to_eval, eval_type, label_mode):
|
|
if len(features_to_eval) == 0:
|
|
raise CatBoostError("Provide at least one feature to evaluation")
|
|
|
|
# baseline
|
|
test_cases = list()
|
|
baseline_case = ExecutionCase(params,
|
|
ignored_features=list(features_to_eval),
|
|
label=FactorUtils.create_label(features_to_eval,
|
|
features_to_eval,
|
|
label_mode=label_mode)
|
|
)
|
|
# test
|
|
if eval_type == EvalType.All or eval_type == EvalType.SeqAddAndAll or len(features_to_eval) == 1:
|
|
test_cases.append(ExecutionCase(params,
|
|
ignored_features=[],
|
|
label=FactorUtils.create_label(features_to_eval,
|
|
[],
|
|
label_mode=label_mode)
|
|
))
|
|
elif eval_type == EvalType.SeqRem:
|
|
for feature_num in features_to_eval:
|
|
test_cases.append(ExecutionCase(params,
|
|
ignored_features=[feature_num],
|
|
label=FactorUtils.create_label(features_to_eval,
|
|
[feature_num],
|
|
label_mode=label_mode)
|
|
))
|
|
elif eval_type == EvalType.SeqAdd or eval_type == EvalType.SeqAddAndAll:
|
|
for feature_num in features_to_eval:
|
|
cur_features = copy(features_to_eval)
|
|
cur_features.remove(feature_num)
|
|
test_cases.append(ExecutionCase(params,
|
|
label=FactorUtils.create_label(features_to_eval,
|
|
cur_features,
|
|
label_mode=label_mode),
|
|
ignored_features=list(cur_features)))
|
|
elif eval_type != EvalType.All:
|
|
raise AttributeError("Don't support {} mode.", eval_type.value)
|
|
return baseline_case, test_cases
|
|
|
|
@staticmethod
|
|
def _create_evaluation_results(by_case_results):
|
|
group_by_metric = dict()
|
|
|
|
for (case, case_result) in by_case_results.items():
|
|
for (metric, evaluation_result) in case_result.items():
|
|
if metric not in group_by_metric:
|
|
group_by_metric[metric] = list()
|
|
group_by_metric[metric].append(evaluation_result)
|
|
|
|
results = list()
|
|
for (metric, metric_results) in group_by_metric.items():
|
|
results.append(MetricEvaluationResult(metric_results))
|
|
return EvaluationResults(results)
|
|
|
|
def get_working_dir(self):
|
|
return self._working_dir
|
|
|
|
def _calculate_result_metrics(self, cases, metrics, thread_count=-1, evaluation_step=1):
|
|
"""
|
|
This method calculate metrics and return them.
|
|
|
|
Args:
|
|
:param cases: List of the ExecutionCases you want to evaluate
|
|
:param metrics: List of the metrics to be computed
|
|
:param thread_count: Count of threads to use.
|
|
:param: evaluation_step: Step to evaluate metrics
|
|
:return: instance of EvaluationResult
|
|
"""
|
|
cases_set = set(cases)
|
|
if len(cases_set) != len(cases):
|
|
raise CatBoostError("Found duplicate cases in " + cases)
|
|
current_wd = self.__go_to_working_dir()
|
|
try:
|
|
if self._fold_count <= self._fold_offset:
|
|
error_msg = 'Count of folds(folds_count - offset) need to be at least one: offset {}, folds_count {}.'
|
|
raise AttributeError(error_msg.format(self._fold_offset,
|
|
self._fold_count))
|
|
|
|
handler = FoldModelsHandler(cases=cases,
|
|
metrics=metrics,
|
|
eval_step=evaluation_step,
|
|
thread_count=thread_count,
|
|
remove_models=self._remove_models)
|
|
|
|
reader = _SimpleStreamingFileReader(self._path_to_dataset,
|
|
sep=self._delimiter,
|
|
has_header=self._has_header,
|
|
group_feature_num=self._group_feature_num)
|
|
splitter = _Splitter(reader,
|
|
self._column_description,
|
|
seed=self._seed,
|
|
min_folds_count=self._min_fold_count)
|
|
|
|
result = handler.proceed(splitter=splitter,
|
|
fold_size=self._fold_size,
|
|
folds_count=self._fold_count,
|
|
fold_offset=self._fold_offset)
|
|
|
|
return self._create_evaluation_results(result)
|
|
finally:
|
|
os.chdir(current_wd)
|
|
|
|
def eval_features(self,
|
|
learn_config,
|
|
features_to_eval,
|
|
loss_function=None,
|
|
eval_type=EvalType.SeqAdd,
|
|
eval_metrics=None,
|
|
thread_count=-1,
|
|
eval_step=None,
|
|
label_mode=LabelMode.AddFeature):
|
|
""" Evaluate features.
|
|
Args:
|
|
learn_config: dict with params or instance of CatBoost. In second case instance params will be used
|
|
features_to_eval: list of indices of features to evaluate
|
|
loss_function: one of CatBoost loss functions, get it from learn_config if not specified
|
|
eval_type: Type of feature evaluate (All, SeqAdd, SeqRem)
|
|
eval_metrics: Additional metrics to calculate
|
|
thread_count: thread_count to use. If not none will override learn_config values
|
|
Returns
|
|
-------
|
|
result : Instance of EvaluationResult class
|
|
"""
|
|
features_to_eval = set(features_to_eval)
|
|
if eval_metrics is None:
|
|
eval_metrics = []
|
|
eval_metrics = eval_metrics if isinstance(eval_metrics, list) else [eval_metrics]
|
|
if isinstance(learn_config, CatBoost):
|
|
params = learn_config.get_params()
|
|
else:
|
|
params = dict(learn_config)
|
|
|
|
if loss_function is not None:
|
|
if "loss_function" in params and params["loss_function"] != loss_function:
|
|
raise CatBoostError("Loss function in params {} should be equal to feature evaluation objective "
|
|
"function {}".format(params["loss_function"], loss_function))
|
|
else:
|
|
if "loss_function" not in params:
|
|
raise CatBoostError("Provide loss function in params or as option to eval_features method")
|
|
|
|
if thread_count is not None and thread_count != -1:
|
|
params["thread_count"] = thread_count
|
|
|
|
if eval_step is None:
|
|
eval_step = 1
|
|
|
|
if loss_function is not None:
|
|
params["loss_function"] = loss_function
|
|
else:
|
|
loss_function = params["loss_function"]
|
|
|
|
if params["loss_function"] == "PairLogit":
|
|
raise CatBoostError("Pair classification is not supported")
|
|
|
|
baseline_case, test_cases = self._create_eval_feature_cases(params,
|
|
features_to_eval,
|
|
eval_type=eval_type,
|
|
label_mode=label_mode)
|
|
if loss_function not in eval_metrics:
|
|
eval_metrics.append(loss_function)
|
|
|
|
return self.eval_cases(baseline_case=baseline_case,
|
|
compare_cases=test_cases,
|
|
eval_metrics=eval_metrics,
|
|
thread_count=thread_count,
|
|
eval_step=eval_step)
|
|
|
|
def eval_cases(self,
|
|
baseline_case,
|
|
compare_cases,
|
|
eval_metrics,
|
|
thread_count=-1,
|
|
eval_step=1):
|
|
"""More flexible evaluation of any cases.
|
|
Args:
|
|
baseline_case: Execution case used for baseline
|
|
compare_cases: List of cases to compare
|
|
eval_metrics: Metrics to calculate
|
|
thread_count: thread_count to use. Will override one in cases
|
|
Returns
|
|
-------
|
|
result : Instance of EvaluationResult class
|
|
"""
|
|
if not isinstance(compare_cases, list):
|
|
compare_cases = [compare_cases]
|
|
|
|
cases = [baseline_case]
|
|
cases += compare_cases
|
|
|
|
for case in cases:
|
|
case._set_thread_count(thread_count)
|
|
|
|
metric_result = self._calculate_result_metrics(cases,
|
|
eval_metrics,
|
|
thread_count=thread_count,
|
|
evaluation_step=eval_step)
|
|
metric_result.set_baseline_case(baseline_case)
|
|
return metric_result
|