fuzzy-rules-generator/distress.ipynb

192 KiB
Raw Blame History

In [15]:
import pandas as pd
import numpy as np
import seaborn as sns

random_state = 9

df = pd.read_csv("data-distress/FinancialDistress.csv").drop(["Company", "Time"], axis=1)
corr = df.corr()
display(df)
sns.heatmap(corr)
Financial Distress x1 x2 x3 x4 x5 x6 x7 x8 x9 ... x74 x75 x76 x77 x78 x79 x80 x81 x82 x83
0 0.010636 1.2810 0.022934 0.87454 1.21640 0.060940 0.188270 0.52510 0.018854 0.182790 ... 85.437 27.07 26.102 16.000 16.0 0.2 22 0.060390 30 49
1 -0.455970 1.2700 0.006454 0.82067 1.00490 -0.014080 0.181040 0.62288 0.006423 0.035991 ... 107.090 31.31 30.194 17.000 16.0 0.4 22 0.010636 31 50
2 -0.325390 1.0529 -0.059379 0.92242 0.72926 0.020476 0.044865 0.43292 -0.081423 -0.765400 ... 120.870 36.07 35.273 17.000 15.0 -0.2 22 -0.455970 32 51
3 -0.566570 1.1131 -0.015229 0.85888 0.80974 0.076037 0.091033 0.67546 -0.018807 -0.107910 ... 54.806 39.80 38.377 17.167 16.0 5.6 22 -0.325390 33 52
4 1.357300 1.0623 0.107020 0.81460 0.83593 0.199960 0.047800 0.74200 0.128030 0.577250 ... 85.437 27.07 26.102 16.000 16.0 0.2 29 1.251000 7 27
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3667 0.438020 2.2605 0.202890 0.16037 0.18588 0.175970 0.198400 2.22360 1.091500 0.241640 ... 100.000 100.00 100.000 17.125 14.5 -7.0 37 0.436380 4 41
3668 0.482410 1.9615 0.216440 0.20095 0.21642 0.203590 0.189870 1.93820 1.000100 0.270870 ... 91.500 130.50 132.400 20.000 14.5 -16.0 37 0.438020 5 42
3669 0.500770 1.7099 0.207970 0.26136 0.21399 0.193670 0.183890 1.68980 0.971860 0.281560 ... 87.100 175.90 178.100 20.000 14.5 -20.2 37 0.482410 6 43
3670 0.611030 1.5590 0.185450 0.30728 0.19307 0.172140 0.170680 1.53890 0.960570 0.267720 ... 92.900 203.20 204.500 22.000 22.0 6.4 37 0.500770 7 44
3671 0.518650 1.6148 0.176760 0.36369 0.18442 0.169550 0.197860 1.58420 0.958450 0.277780 ... 91.700 227.50 214.500 21.000 20.5 8.6 37 0.611030 8 45

3672 rows × 84 columns

Out[15]:
<Axes: >
In [16]:
columns = np.full((corr.shape[0],), True, dtype=bool)
for i in range(corr.shape[0]):
    for j in range(i + 1, corr.shape[0]):
        if corr.iloc[i, j] >= 0.9 or corr.iloc[i, j] <= -0.9:  # type: ignore
            if columns[j]:
                columns[j] = False

selected_columns = df.columns[columns]
selected_columns.shape
Out[16]:
(68,)
In [17]:
df = df[selected_columns]
df
Out[17]:
Financial Distress x1 x2 x3 x4 x5 x6 x8 x9 x10 ... x69 x70 x71 x72 x73 x74 x78 x80 x82 x83
0 0.010636 1.2810 0.022934 0.87454 1.21640 0.060940 0.188270 0.018854 0.182790 0.006449 ... 364.9500 15.8 61.476 4.0 36.0 85.437 16.0 22 30 49
1 -0.455970 1.2700 0.006454 0.82067 1.00490 -0.014080 0.181040 0.006423 0.035991 0.001795 ... 0.1896 15.6 24.579 0.0 36.0 107.090 16.0 22 31 50
2 -0.325390 1.0529 -0.059379 0.92242 0.72926 0.020476 0.044865 -0.081423 -0.765400 -0.054324 ... 11.9460 15.2 20.700 0.0 35.0 120.870 15.0 22 32 51
3 -0.566570 1.1131 -0.015229 0.85888 0.80974 0.076037 0.091033 -0.018807 -0.107910 -0.065316 ... -18.7480 10.4 47.429 4.0 33.0 54.806 16.0 22 33 52
4 1.357300 1.0623 0.107020 0.81460 0.83593 0.199960 0.047800 0.128030 0.577250 0.094075 ... 364.9500 15.8 61.476 4.0 36.0 85.437 16.0 29 7 27
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3667 0.438020 2.2605 0.202890 0.16037 0.18588 0.175970 0.198400 1.091500 0.241640 0.226860 ... 14.5290 21.5 33.768 2.0 22.0 100.000 14.5 37 4 41
3668 0.482410 1.9615 0.216440 0.20095 0.21642 0.203590 0.189870 1.000100 0.270870 0.213610 ... 3.8523 30.5 -10.665 0.0 28.0 91.500 14.5 37 5 42
3669 0.500770 1.7099 0.207970 0.26136 0.21399 0.193670 0.183890 0.971860 0.281560 0.210970 ... -25.8410 34.7 36.030 2.0 32.0 87.100 14.5 37 6 43
3670 0.611030 1.5590 0.185450 0.30728 0.19307 0.172140 0.170680 0.960570 0.267720 0.203190 ... -58.1220 15.6 22.571 2.0 30.0 92.900 22.0 37 7 44
3671 0.518650 1.6148 0.176760 0.36369 0.18442 0.169550 0.197860 0.958450 0.277780 0.213850 ... -32.2090 11.9 13.871 1.0 29.0 91.700 20.5 37 8 45

3672 rows × 68 columns

In [18]:
import statsmodels.api as sm

def backwardElimination(x, Y, sl, columns):
    numVars = len(x[0])
    for i in range(0, numVars):
        regressor_OLS = sm.OLS(Y, x).fit()
        maxVar = max(regressor_OLS.pvalues).astype(float)
        if maxVar > sl:
            for j in range(0, numVars - i):
                if regressor_OLS.pvalues[j].astype(float) == maxVar:
                    x = np.delete(x, j, 1)
                    columns = np.delete(columns, j)

    regressor_OLS.summary()
    return x, columns

selected_columns = selected_columns.drop(["Financial Distress"], errors='ignore')
selected_columns
Out[18]:
Index(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x8', 'x9', 'x10', 'x11', 'x12',
       'x14', 'x15', 'x16', 'x17', 'x18', 'x19', 'x20', 'x21', 'x22', 'x23',
       'x24', 'x25', 'x26', 'x27', 'x28', 'x29', 'x30', 'x31', 'x32', 'x35',
       'x36', 'x37', 'x39', 'x40', 'x41', 'x42', 'x43', 'x44', 'x45', 'x46',
       'x47', 'x51', 'x54', 'x55', 'x56', 'x57', 'x58', 'x59', 'x60', 'x61',
       'x63', 'x64', 'x65', 'x66', 'x67', 'x68', 'x69', 'x70', 'x71', 'x72',
       'x73', 'x74', 'x78', 'x80', 'x82', 'x83'],
      dtype='object')
In [19]:
SL = 0.05
new_data, new_columns = backwardElimination(
    df.iloc[:, 1:].values, df.iloc[:, 0].values, SL, selected_columns
)
data = pd.DataFrame(data=new_data, columns=new_columns)
data["Distress"] = df["Financial Distress"]
data
Out[19]:
x3 x4 x5 x10 x14 x18 x23 x24 x25 x29 x37 x41 x46 x54 x63 x70 x73 x80 Distress
0 0.87454 1.21640 0.060940 0.006449 6.97060 0.018265 0.148720 0.66995 214.760 0.204590 1.630700 9.69510 0.026224 209.87 3.27020 15.8 36.0 22.0 0.010636
1 0.82067 1.00490 -0.014080 0.001795 4.57640 0.027558 0.056026 0.67048 38.242 0.150190 0.837540 5.60350 0.007864 250.14 14.32100 15.6 36.0 22.0 -0.455970
2 0.92242 0.72926 0.020476 -0.054324 11.89000 0.012595 0.065220 0.84827 -498.390 0.074149 0.955790 9.40030 -0.064373 280.55 1.15380 15.2 35.0 22.0 -0.325390
3 0.85888 0.80974 0.076037 -0.065316 6.08620 0.011601 0.125160 0.80478 -75.867 0.054098 0.383350 5.73790 -0.017731 413.74 2.04080 10.4 33.0 22.0 -0.566570
4 0.81460 0.83593 0.199960 0.094075 4.39380 0.006814 0.266020 0.76770 1423.100 0.046907 0.253010 4.50880 0.131380 315.34 3.27020 15.8 36.0 29.0 1.357300
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3667 0.16037 0.18588 0.175970 0.226860 0.19101 0.014077 0.994340 0.15740 390.260 0.002976 0.003544 0.22138 1.265100 16961.00 -0.53449 21.5 22.0 37.0 0.438020
3668 0.20095 0.21642 0.203590 0.213610 0.25149 0.018249 0.992440 0.19747 443.840 0.003484 0.004359 0.27085 1.077100 20689.00 -25.73600 30.5 28.0 37.0 0.482410
3669 0.26136 0.21399 0.193670 0.210970 0.35384 0.007451 0.982420 0.25902 475.560 0.002343 0.003172 0.28971 0.795720 34012.00 -3.06590 34.7 32.0 37.0 0.500770
3670 0.30728 0.19307 0.172140 0.203190 0.44358 0.021239 0.985230 0.30533 457.060 0.001942 0.002803 0.27871 0.603540 35901.00 7.15620 15.6 30.0 37.0 0.611030
3671 0.36369 0.18442 0.169550 0.213850 0.57156 0.013783 0.994000 0.32184 505.040 0.041852 0.065773 0.28982 0.486010 28173.00 12.14500 11.9 29.0 37.0 0.518650

3672 rows × 19 columns

In [20]:
from src.utils import split_stratified_into_train_val_test


X_train, X_test, y_train, y_test = split_stratified_into_train_val_test(
    data,
    stratify_colname="Distress",
    frac_train=0.8,
    frac_val=0,
    frac_test=0.2,
    random_state=random_state,
)

display(X_train.head(3))
display(y_train.head(3))
display(X_test.head(3))
display(y_test.head(3))
x3 x4 x5 x10 x14 x18 x23 x24 x25 x29 x37 x41 x46 x54 x63 x70 x73 x80
1156 0.71056 0.93446 0.14445 0.14572 2.45500 0.045089 0.19754 0.66553 625.41 0.045031 0.155580 3.22850 0.11500 874.69 -3.0266 25.4 28.0 9.0
1993 0.21104 0.59523 0.30998 0.48288 0.26750 0.001754 0.56306 0.19858 1600.20 0.012465 0.015800 0.75445 2.10980 47173.00 -3.0659 34.7 32.0 4.0
1924 0.46072 0.90327 0.28563 0.45008 0.85431 0.024656 0.43336 0.45475 4659.80 0.005962 0.011055 1.67490 0.81567 12851.00 7.1562 15.6 30.0 25.0
Distress
1156 0.6382
1993 0.4402
1924 3.2629
x3 x4 x5 x10 x14 x18 x23 x24 x25 x29 x37 x41 x46 x54 x63 x70 x73 x80
3379 0.62266 0.74377 0.13716 0.008050 1.65010 0.034872 0.20639 0.42211 734.24 0.20055 0.53147 1.9711 0.207370 620.53 7.7373 15.400 35.5 25.0
156 0.79108 0.68615 0.10943 0.011391 3.78650 0.002455 0.19456 0.56425 653.83 0.22683 1.08570 3.2842 0.061802 225.64 1.1538 15.200 35.0 12.0
2215 0.46538 0.54146 0.25140 0.187750 0.87049 0.027462 0.46916 0.22192 601.83 0.24346 0.45540 1.0128 0.431220 473.60 9.7164 15.683 36.0 15.0
Distress
3379 0.121330
156 0.080083
2215 1.164000
In [21]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn import linear_model, tree, neighbors, ensemble

models = {
    "linear": {"model": linear_model.LinearRegression(n_jobs=-1)},
    "linear_poly": {
        "model": make_pipeline(
            PolynomialFeatures(degree=2),
            linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),
        )
    },
    "linear_interact": {
        "model": make_pipeline(
            PolynomialFeatures(interaction_only=True),
            linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),
        )
    },
    "ridge": {"model": linear_model.RidgeCV()},
    "decision_tree": {
        "model": tree.DecisionTreeRegressor(max_depth=7, random_state=random_state)
    },
    "knn": {"model": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},
    "random_forest": {
        "model": ensemble.RandomForestRegressor(
            max_depth=7, random_state=random_state, n_jobs=-1
        )
    },
}
In [22]:
from src.utils import run_regression

for model_name in models.keys():
    print(f"Model: {model_name}")
    fitted_model = models[model_name]["model"].fit(
        X_train.values, y_train.values.ravel()
    )
    models[model_name] = run_regression(
        fitted_model, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test
    )
Model: linear
Model: linear_poly
Model: linear_interact
Model: ridge
Model: decision_tree
Model: knn
Model: random_forest
In [23]:
reg_metrics = pd.DataFrame.from_dict(models, "index")[
    ["RMSE_train", "RMSE_test", "RMAE_test", "R2_test"]
]
reg_metrics.sort_values(by="RMSE_test").style.background_gradient(
    cmap="viridis", low=1, high=0.3, subset=["RMSE_train", "RMSE_test"]
).background_gradient(cmap="plasma", low=0.3, high=1, subset=["RMAE_test", "R2_test"])
Out[23]:
  RMSE_train RMSE_test RMAE_test R2_test
random_forest 1.394198 1.042729 0.778401 0.456952
ridge 2.488097 1.198888 0.865585 0.282120
linear 2.474171 1.228277 0.885807 0.246493
linear_poly 0.981309 1.267218 0.833594 0.197957
linear_interact 1.025112 1.466789 0.850850 -0.074560
knn 2.376262 1.541027 0.879611 -0.186083
decision_tree 0.872007 1.566888 0.850226 -0.226227
In [24]:
from src.utils import run_classification


def get_class(row):
    return 0 if row["Distress"] > -0.5 else 1

datac = data.copy()
datac["Distress"] = datac.apply(get_class, axis=1)
datac
Out[24]:
x3 x4 x5 x10 x14 x18 x23 x24 x25 x29 x37 x41 x46 x54 x63 x70 x73 x80 Distress
0 0.87454 1.21640 0.060940 0.006449 6.97060 0.018265 0.148720 0.66995 214.760 0.204590 1.630700 9.69510 0.026224 209.87 3.27020 15.8 36.0 22.0 0
1 0.82067 1.00490 -0.014080 0.001795 4.57640 0.027558 0.056026 0.67048 38.242 0.150190 0.837540 5.60350 0.007864 250.14 14.32100 15.6 36.0 22.0 0
2 0.92242 0.72926 0.020476 -0.054324 11.89000 0.012595 0.065220 0.84827 -498.390 0.074149 0.955790 9.40030 -0.064373 280.55 1.15380 15.2 35.0 22.0 0
3 0.85888 0.80974 0.076037 -0.065316 6.08620 0.011601 0.125160 0.80478 -75.867 0.054098 0.383350 5.73790 -0.017731 413.74 2.04080 10.4 33.0 22.0 1
4 0.81460 0.83593 0.199960 0.094075 4.39380 0.006814 0.266020 0.76770 1423.100 0.046907 0.253010 4.50880 0.131380 315.34 3.27020 15.8 36.0 29.0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3667 0.16037 0.18588 0.175970 0.226860 0.19101 0.014077 0.994340 0.15740 390.260 0.002976 0.003544 0.22138 1.265100 16961.00 -0.53449 21.5 22.0 37.0 0
3668 0.20095 0.21642 0.203590 0.213610 0.25149 0.018249 0.992440 0.19747 443.840 0.003484 0.004359 0.27085 1.077100 20689.00 -25.73600 30.5 28.0 37.0 0
3669 0.26136 0.21399 0.193670 0.210970 0.35384 0.007451 0.982420 0.25902 475.560 0.002343 0.003172 0.28971 0.795720 34012.00 -3.06590 34.7 32.0 37.0 0
3670 0.30728 0.19307 0.172140 0.203190 0.44358 0.021239 0.985230 0.30533 457.060 0.001942 0.002803 0.27871 0.603540 35901.00 7.15620 15.6 30.0 37.0 0
3671 0.36369 0.18442 0.169550 0.213850 0.57156 0.013783 0.994000 0.32184 505.040 0.041852 0.065773 0.28982 0.486010 28173.00 12.14500 11.9 29.0 37.0 0

3672 rows × 19 columns

In [25]:
from imblearn.over_sampling import ADASYN

Xc_train, Xc_test, yc_train, yc_test = split_stratified_into_train_val_test(
    datac,
    stratify_colname="Distress",
    frac_train=0.8,
    frac_val=0,
    frac_test=0.2,
    random_state=random_state,
)

ada = ADASYN()

Xc_train, yc_train = ada.fit_resample(Xc_train, yc_train)

display(Xc_train.head(3))
display(yc_train.head(3))
display(Xc_test.head(3))
display(yc_test.head(3))
x3 x4 x5 x10 x14 x18 x23 x24 x25 x29 x37 x41 x46 x54 x63 x70 x73 x80
0 0.71056 0.93446 0.14445 0.14572 2.45500 0.045089 0.19754 0.66553 625.41 0.045031 0.155580 3.22850 0.11500 874.69 -3.0266 25.4 28.0 9.0
1 0.21104 0.59523 0.30998 0.48288 0.26750 0.001754 0.56306 0.19858 1600.20 0.012465 0.015800 0.75445 2.10980 47173.00 -3.0659 34.7 32.0 4.0
2 0.46072 0.90327 0.28563 0.45008 0.85431 0.024656 0.43336 0.45475 4659.80 0.005962 0.011055 1.67490 0.81567 12851.00 7.1562 15.6 30.0 25.0
Distress
0 0
1 0
2 0
x3 x4 x5 x10 x14 x18 x23 x24 x25 x29 x37 x41 x46 x54 x63 x70 x73 x80
3379 0.62266 0.74377 0.13716 0.008050 1.65010 0.034872 0.20639 0.42211 734.24 0.20055 0.53147 1.9711 0.207370 620.53 7.7373 15.400 35.5 25.0
156 0.79108 0.68615 0.10943 0.011391 3.78650 0.002455 0.19456 0.56425 653.83 0.22683 1.08570 3.2842 0.061802 225.64 1.1538 15.200 35.0 12.0
2215 0.46538 0.54146 0.25140 0.187750 0.87049 0.027462 0.46916 0.22192 601.83 0.24346 0.45540 1.0128 0.431220 473.60 9.7164 15.683 36.0 15.0
Distress
3379 0
156 0
2215 0
In [26]:
from src.utils import run_classification
from sklearn import tree


fitted_model = tree.DecisionTreeClassifier(max_depth=7, random_state=random_state).fit(
    Xc_train.values, yc_train.values.ravel()
)
result = run_classification(
    fitted_model, X_train=Xc_train, X_test=Xc_test, y_train=yc_train, y_test=yc_test
)
result
c:\Users\user\Projects\python\fuzzy\.venv\Lib\site-packages\sklearn\base.py:486: UserWarning: X has feature names, but DecisionTreeClassifier was fitted without feature names
  warnings.warn(
c:\Users\user\Projects\python\fuzzy\.venv\Lib\site-packages\sklearn\base.py:486: UserWarning: X has feature names, but DecisionTreeClassifier was fitted without feature names
  warnings.warn(
Out[26]:
{'pipeline': DecisionTreeClassifier(max_depth=7, random_state=9),
 'probs': array([1.        , 0.17698154, 1.        , 0.04407713, 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        0.04407713, 1.        , 1.        , 1.        , 1.        ,
        1.        , 0.08695652, 1.        , 1.        , 1.        ,
        0.04407713, 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 0.04407713, 1.        ,
        1.        , 1.        , 1.        , 0.04407713, 1.        ,
        0.04407713, 1.        , 0.17698154, 1.        , 1.        ,
        1.        , 0.17698154, 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 0.17698154, 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.04407713, 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        0.17698154, 1.        , 1.        , 0.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        0.04407713, 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 0.17698154, 0.04407713, 1.        , 1.        ,
        1.        , 0.04407713, 1.        , 1.        , 0.94736842,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 0.17698154, 1.        ,
        0.17698154, 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 0.04407713,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 0.04407713, 1.        , 0.08695652, 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 0.17698154, 0.17698154, 0.17698154, 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        0.04407713, 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.04407713, 1.        , 1.        ,
        1.        , 0.04407713, 1.        , 0.04407713, 0.17698154,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.04407713, 1.        , 0.17698154,
        1.        , 0.04407713, 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.5       , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        0.17698154, 1.        , 1.        , 1.        , 1.        ,
        1.        , 0.17698154, 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.17698154, 0.13207547, 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        0.17698154, 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        0.13207547, 0.17698154, 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.17698154, 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 0.04407713, 1.        , 1.        , 1.        ,
        0.04407713, 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.625     , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 0.17698154,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        0.04407713, 1.        , 1.        , 1.        , 1.        ,
        0.17698154, 1.        , 0.13207547, 0.04407713, 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 0.17698154, 1.        ,
        0.04407713, 1.        , 0.04407713, 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 0.04407713,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.17698154, 1.        , 0.08695652,
        0.04407713, 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        0.17698154, 1.        , 0.17698154, 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 0.17698154,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 0.04407713, 1.        , 1.        , 0.13207547,
        1.        , 1.        , 0.04407713, 1.        , 1.        ,
        0.625     , 1.        , 1.        , 1.        , 1.        ,
        0.17698154, 1.        , 1.        , 0.13207547, 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 0.17698154,
        1.        , 1.        , 0.17698154, 1.        , 1.        ,
        1.        , 1.        , 0.04407713, 1.        , 1.        ,
        1.        , 1.        , 0.17698154, 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.17698154, 1.        , 1.        ,
        0.17698154, 1.        , 1.        , 0.17698154, 0.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 0.04407713, 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.04407713, 1.        , 1.        ,
        0.04407713, 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 0.17698154,
        1.        , 0.17698154, 1.        , 1.        , 1.        ,
        0.17698154, 1.        , 1.        , 0.17698154, 0.05454545,
        0.04407713, 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.17698154, 1.        , 1.        ,
        1.        , 1.        , 0.17698154, 1.        , 1.        ,
        1.        , 0.04407713, 1.        , 1.        , 0.04407713,
        0.17698154, 1.        , 1.        , 1.        , 0.04407713,
        1.        , 1.        , 1.        , 0.17698154, 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 0.17698154, 1.        , 1.        , 0.04407713,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 0.17698154,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        0.04407713, 1.        , 1.        , 1.        , 1.        ,
        0.17698154, 1.        , 0.04407713, 1.        , 1.        ,
        1.        , 1.        , 1.        , 0.04407713, 1.        ,
        1.        , 1.        , 1.        , 0.17698154, 1.        ,
        1.        , 0.04407713, 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 0.17698154, 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.04407713, 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 0.17698154,
        1.        , 1.        , 0.04407713, 1.        , 0.17698154,
        1.        , 1.        , 1.        , 0.04407713, 0.04407713,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 0.04407713, 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 0.        , 1.        , 0.17698154, 1.        ,
        1.        , 1.        , 0.04407713, 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        0.17698154, 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 0.17698154, 1.        , 0.        ,
        1.        , 0.08695652, 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        , 1.        ,
        1.        , 0.        , 1.        , 1.        , 0.17698154,
        0.04407713, 1.        , 1.        , 1.        , 1.        ]),
 'preds': array([1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
        1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
        1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1,
        1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
        1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1,
        1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
        1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0,
        1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,
        1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1,
        1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
        1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0,
        1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
        1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
        1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        0, 1, 1, 0, 0, 1, 1, 1, 1]),
 'Precision_train': np.float64(0.9157792836398838),
 'Precision_test': np.float64(0.011345218800648298),
 'Recall_train': np.float64(0.9978902953586498),
 'Recall_test': np.float64(0.3181818181818182),
 'Accuracy_train': 0.9528851244044468,
 'Accuracy_test': 0.14965986394557823,
 'ROC_AUC_test': np.float64(0.21318373071528754),
 'F1_train': np.float64(0.9550731953558809),
 'F1_test': np.float64(0.02190923317683881),
 'MCC_test': np.float64(-0.2494229220759723),
 'Cohen_kappa_test': np.float64(-0.03809571157718228),
 'Confusion_matrix': array([[103, 610],
        [ 15,   7]])}
In [27]:
rules = tree.export_text(
    fitted_model,
    feature_names=X_train.columns.values.tolist(),
)
print(rules)
|--- x46 <= 0.07
|   |--- x10 <= -0.00
|   |   |--- x14 <= 1.65
|   |   |   |--- x23 <= 0.16
|   |   |   |   |--- x54 <= 148.12
|   |   |   |   |   |--- x70 <= 15.70
|   |   |   |   |   |   |--- x54 <= 40.29
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- x54 >  40.29
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- x70 >  15.70
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |--- x54 >  148.12
|   |   |   |   |   |--- class: 0
|   |   |   |--- x23 >  0.16
|   |   |   |   |--- x24 <= 0.31
|   |   |   |   |   |--- class: 0
|   |   |   |   |--- x24 >  0.31
|   |   |   |   |   |--- class: 1
|   |   |--- x14 >  1.65
|   |   |   |--- x41 <= 0.27
|   |   |   |   |--- class: 0
|   |   |   |--- x41 >  0.27
|   |   |   |   |--- x70 <= 10.43
|   |   |   |   |   |--- x46 <= -0.02
|   |   |   |   |   |   |--- x37 <= 0.39
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- x37 >  0.39
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- x46 >  -0.02
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |--- x70 >  10.43
|   |   |   |   |   |--- x41 <= 24.10
|   |   |   |   |   |   |--- x73 <= 22.02
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- x73 >  22.02
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- x41 >  24.10
|   |   |   |   |   |   |--- class: 0
|   |--- x10 >  -0.00
|   |   |--- x41 <= 2.26
|   |   |   |--- x37 <= 0.37
|   |   |   |   |--- class: 0
|   |   |   |--- x37 >  0.37
|   |   |   |   |--- x5 <= 0.03
|   |   |   |   |   |--- x73 <= 29.22
|   |   |   |   |   |   |--- x14 <= 6.58
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- x14 >  6.58
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- x73 >  29.22
|   |   |   |   |   |   |--- x4 <= 0.20
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- x4 >  0.20
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |--- x5 >  0.03
|   |   |   |   |   |--- x29 <= 0.09
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- x29 >  0.09
|   |   |   |   |   |   |--- x41 <= 2.15
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- x41 >  2.15
|   |   |   |   |   |   |   |--- class: 0
|   |   |--- x41 >  2.26
|   |   |   |--- x37 <= 0.17
|   |   |   |   |--- x25 <= 112.70
|   |   |   |   |   |--- x41 <= 3.17
|   |   |   |   |   |   |--- x23 <= 0.26
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- x23 >  0.26
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- x41 >  3.17
|   |   |   |   |   |   |--- x4 <= 1.30
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- x4 >  1.30
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |--- x25 >  112.70
|   |   |   |   |   |--- x3 <= 0.69
|   |   |   |   |   |   |--- x23 <= 0.16
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- x23 >  0.16
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- x3 >  0.69
|   |   |   |   |   |   |--- x46 <= 0.07
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- x46 >  0.07
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |--- x37 >  0.17
|   |   |   |   |--- x73 <= 27.06
|   |   |   |   |   |--- x29 <= 0.03
|   |   |   |   |   |   |--- x80 <= 19.00
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- x80 >  19.00
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- x29 >  0.03
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |--- x73 >  27.06
|   |   |   |   |   |--- x70 <= 30.33
|   |   |   |   |   |   |--- x80 <= 27.95
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- x80 >  27.95
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- x70 >  30.33
|   |   |   |   |   |   |--- x37 <= 0.19
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- x37 >  0.19
|   |   |   |   |   |   |   |--- class: 0
|--- x46 >  0.07
|   |--- x14 <= 3.44
|   |   |--- x25 <= 152.01
|   |   |   |--- x37 <= 0.25
|   |   |   |   |--- class: 0
|   |   |   |--- x37 >  0.25
|   |   |   |   |--- x41 <= 1.66
|   |   |   |   |   |--- class: 0
|   |   |   |   |--- x41 >  1.66
|   |   |   |   |   |--- class: 1
|   |   |--- x25 >  152.01
|   |   |   |--- x46 <= 0.10
|   |   |   |   |--- x41 <= 3.13
|   |   |   |   |   |--- x25 <= 1069.03
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- x25 >  1069.03
|   |   |   |   |   |   |--- x23 <= 0.24
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |   |--- x23 >  0.24
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |--- x41 >  3.13
|   |   |   |   |   |--- x4 <= 1.12
|   |   |   |   |   |   |--- x10 <= 0.04
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- x10 >  0.04
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- x4 >  1.12
|   |   |   |   |   |   |--- class: 0
|   |   |   |--- x46 >  0.10
|   |   |   |   |--- x25 <= 240.30
|   |   |   |   |   |--- x41 <= 3.33
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- x41 >  3.33
|   |   |   |   |   |   |--- x23 <= 0.17
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- x23 >  0.17
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |--- x25 >  240.30
|   |   |   |   |   |--- x5 <= 0.02
|   |   |   |   |   |   |--- x5 <= 0.02
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- x5 >  0.02
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- x5 >  0.02
|   |   |   |   |   |   |--- class: 0
|   |--- x14 >  3.44
|   |   |--- x5 <= 0.09
|   |   |   |--- x54 <= 1165.84
|   |   |   |   |--- x37 <= 0.14
|   |   |   |   |   |--- class: 0
|   |   |   |   |--- x37 >  0.14
|   |   |   |   |   |--- x41 <= 2.36
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- x41 >  2.36
|   |   |   |   |   |   |--- x80 <= 10.34
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- x80 >  10.34
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |--- x54 >  1165.84
|   |   |   |   |--- class: 0
|   |   |--- x5 >  0.09
|   |   |   |--- x70 <= 16.37
|   |   |   |   |--- x23 <= 0.08
|   |   |   |   |   |--- class: 1
|   |   |   |   |--- x23 >  0.08
|   |   |   |   |   |--- x54 <= 150.56
|   |   |   |   |   |   |--- x10 <= 0.04
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- x10 >  0.04
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- x54 >  150.56
|   |   |   |   |   |   |--- class: 0
|   |   |   |--- x70 >  16.37
|   |   |   |   |--- x54 <= 911.20
|   |   |   |   |   |--- x41 <= 4.52
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- x41 >  4.52
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |--- x54 >  911.20
|   |   |   |   |   |--- x25 <= 2874.98
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- x25 >  2874.98
|   |   |   |   |   |   |--- class: 1

In [28]:
import pickle

pickle.dump(fitted_model, open("data-distress/tree.model.sav", "wb"))