MLPproject/.venv/lib/python3.12/site-packages/catboost/monoforest.py

import math

from . import _catboost
from .core import CatBoost, CatBoostError

from .utils import _import_matplotlib

FeatureExplanation = _catboost.FeatureExplanation


def _check_model(model):
    if not isinstance(model, CatBoost):
        raise CatBoostError("Model should be CatBoost")


def to_polynom(model):
    _check_model(model)
    return _catboost.to_polynom(model._object)


def to_polynom_string(model):
    _check_model(model)
    return _catboost.to_polynom_string(model._object)


def explain_features(model):
    _check_model(model)
    return _catboost.explain_features(model._object)


def calc_features_strength(model):
    explanations = explain_features(model)
    features_strength = [expl.calc_strength() for expl in explanations]
    return features_strength


def plot_pdp(arg, size_per_plot=(5, 5), plots_per_row=None):
    with _import_matplotlib() as _plt:
        plt = _plt
    if isinstance(arg, CatBoost):
        arg = explain_features(arg)
    if isinstance(arg, _catboost.FeatureExplanation):
        arg = [arg]
    assert len(arg) > 0
    assert isinstance(arg, list)
    for element in arg:
        assert isinstance(element, _catboost.FeatureExplanation)

    figs = []
    for feature_explanation in arg:
        dimension = feature_explanation.dimension()
        if not plots_per_row:
            plots_per_row = min(5, dimension)
        rows = int(math.ceil(dimension / plots_per_row))
        fig, axes = plt.subplots(rows, plots_per_row)
        fig.suptitle("Feature #{}".format(feature_explanation.feature))
        if rows == 1:
            axes = [axes]
        if plots_per_row == 1:
            axes = [[row_axes] for row_axes in axes]
        fig.set_size_inches(size_per_plot[0] * plots_per_row, size_per_plot[1] * rows)

        for dim in range(dimension):
            ax = axes[dim // plots_per_row][dim % plots_per_row]
            ax.set_title("Dimension={}".format(dim))
            ax.set_xlabel("feature value")
            ax.set_ylabel("model value")

            borders, values = feature_explanation.calc_pdp(dim)
            xs = []
            ys = []
            if feature_explanation.type == "Float":
                if len(borders) == 0:
                    xs.append(-0.1)
                    xs.append(0.1)
                    ys.append(feature_explanation.expected_bias[dim])
                    ys.append(feature_explanation.expected_bias[dim])
                    ax.plot(xs, ys)
                else:
                    offset = max(0.1, (borders[0] + borders[-1]) / 2)
                    xs.append(borders[0] - offset)
                    ys.append(feature_explanation.expected_bias[dim])
                    for border, value in zip(borders, values):
                        xs.append(border)
                        ys.append(ys[-1])
                        xs.append(border)
                        ys.append(value)
                    xs.append(borders[-1] + offset)
                    ys.append(ys[-1])
                    ax.plot(xs, ys)
            else:
                xs = ['bias'] + list(map(str, borders))
                ys = feature_explanation.expected_bias[dim] + values
                ax.bar(xs, ys)
        figs.append(fig)

    return figs


def plot_features_strength(model, height_per_feature=0.5, width_per_plot=5, plots_per_row=None):
    with _import_matplotlib() as _plt:
        plt = _plt
    strengths = calc_features_strength(model)
    dimension = len(strengths[0])
    features = len(strengths)
    if not plots_per_row:
        plots_per_row = min(5, dimension)
    rows = int(math.ceil(dimension / plots_per_row))
    fig, axes = plt.subplots(rows, plots_per_row)
    if rows == 1:
        axes = [axes]
    if plots_per_row == 1:
        axes = [[row_axes] for row_axes in axes]
    fig.suptitle("Features Strength")
    fig.set_size_inches(width_per_plot * plots_per_row, height_per_feature * features * rows)

    for dim in range(dimension):
        strengths = [(s[dim], i) for i, s in enumerate(strengths)]
        # strengths = list(reversed(sorted(strengths)))
        strengths = list(sorted(strengths))
        labels = ["Feature #{}".format(f) for _, f in strengths]
        strengths = [s for s, _ in strengths]

        ax = axes[dim // plots_per_row][dim % plots_per_row]
        colors = [(1, 0, 0) if s > 0 else (0, 0, 1) for s in strengths]
        ax.set_title("Dimension={}".format(dim))
        ax.barh(range(len(strengths)), strengths, align='center', color=colors)
        ax.set_yticks(range(len(strengths)))
        ax.set_yticklabels(labels)
        # ax.invert_yaxis()  # labels read top-to-bottom
        ax.set_xlabel('Prediction value change')

    return fig