fuzzy-rules-generator/viscosity_regression.ipynb
2024-11-01 11:04:05 +04:00

26 KiB

In [1]:
import pandas as pd

viscosity_train = pd.read_csv("data/viscosity_train.csv", sep=";", decimal=",")
viscosity_test = pd.read_csv("data/viscosity_test.csv", sep=";", decimal=",")

display(viscosity_train.head(3))
display(viscosity_test.head(3))
T Al2O3 TiO2 Viscosity
0 20 0.0 0.0 3.707
1 25 0.0 0.0 3.180
2 35 0.0 0.0 2.361
T Al2O3 TiO2 Viscosity
0 30 0.0 0.0 2.716
1 40 0.0 0.0 2.073
2 60 0.0 0.0 1.329
In [2]:
viscosity_y_train = viscosity_train["Viscosity"]
viscosity_train = viscosity_train.drop(["Viscosity"], axis=1)

display(viscosity_train.head(3))
display(viscosity_y_train.head(3))

viscosity_y_test = viscosity_test["Viscosity"]
viscosity_test = viscosity_test.drop(["Viscosity"], axis=1)

display(viscosity_test.head(3))
display(viscosity_y_test.head(3))
T Al2O3 TiO2
0 20 0.0 0.0
1 25 0.0 0.0
2 35 0.0 0.0
0    3.707
1    3.180
2    2.361
Name: Viscosity, dtype: float64
T Al2O3 TiO2
0 30 0.0 0.0
1 40 0.0 0.0
2 60 0.0 0.0
0    2.716
1    2.073
2    1.329
Name: Viscosity, dtype: float64
In [3]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn import linear_model, tree, neighbors, ensemble

random_state = 9

models = {
    "linear": {"model": linear_model.LinearRegression(n_jobs=-1)},
    "linear_poly": {
        "model": make_pipeline(
            PolynomialFeatures(degree=2),
            linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),
        )
    },
    "linear_interact": {
        "model": make_pipeline(
            PolynomialFeatures(interaction_only=True),
            linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),
        )
    },
    "ridge": {"model": linear_model.RidgeCV()},
    "decision_tree": {
        "model": tree.DecisionTreeRegressor(max_depth=7, random_state=random_state)
    },
    "knn": {"model": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},
    "random_forest": {
        "model": ensemble.RandomForestRegressor(
            max_depth=7, random_state=random_state, n_jobs=-1
        )
    },
}
In [4]:
import math
from sklearn import metrics

for model_name in models.keys():
    print(f"Model: {model_name}")
    fitted_model = models[model_name]["model"].fit(
        viscosity_train.values, viscosity_y_train.values.ravel()
    )
    y_train_pred = fitted_model.predict(viscosity_train.values)
    y_test_pred = fitted_model.predict(viscosity_test.values)
    models[model_name]["fitted"] = fitted_model
    models[model_name]["train_preds"] = y_train_pred
    models[model_name]["preds"] = y_test_pred
    models[model_name]["RMSE_train"] = math.sqrt(
        metrics.mean_squared_error(viscosity_y_train, y_train_pred)
    )
    models[model_name]["RMSE_test"] = math.sqrt(
        metrics.mean_squared_error(viscosity_y_test, y_test_pred)
    )
    models[model_name]["RMAE_test"] = math.sqrt(
        metrics.mean_absolute_error(viscosity_y_test, y_test_pred)
    )
    models[model_name]["R2_test"] = metrics.r2_score(viscosity_y_test, y_test_pred)
Model: linear
Model: linear_poly
Model: linear_interact
Model: ridge
Model: decision_tree
Model: knn
Model: random_forest
In [5]:
reg_metrics = pd.DataFrame.from_dict(models, "index")[
    ["RMSE_train", "RMSE_test", "RMAE_test", "R2_test"]
]
reg_metrics.sort_values(by="RMSE_test").style.background_gradient(
    cmap="viridis", low=1, high=0.3, subset=["RMSE_train", "RMSE_test"]
).background_gradient(cmap="plasma", low=0.3, high=1, subset=["RMAE_test", "R2_test"])
Out[5]:
  RMSE_train RMSE_test RMAE_test R2_test
linear_poly 0.150745 0.139507 0.336239 0.978119
linear_interact 0.361309 0.303389 0.527911 0.896517
random_forest 0.226420 0.341014 0.545765 0.869259
ridge 0.472399 0.378573 0.559409 0.838873
decision_tree 0.054533 0.379017 0.587467 0.838495
linear 0.441760 0.428940 0.617212 0.793147
knn 0.666903 0.566901 0.702700 0.638689
In [10]:
import numpy as np
from sklearn import model_selection

parameters = {
    "criterion": ["squared_error", "absolute_error", "friedman_mse", "poisson"],
    "max_depth": np.arange(1, 21).tolist()[0::2],
    "min_samples_split": np.arange(2, 20).tolist()[0::2],
}

grid = model_selection.GridSearchCV(
    tree.DecisionTreeRegressor(random_state=random_state), parameters, n_jobs=-1
)

grid.fit(viscosity_train, viscosity_y_train)
grid.best_params_
Out[10]:
{'criterion': 'poisson', 'max_depth': 9, 'min_samples_split': 2}
In [11]:
model = grid.best_estimator_
y_pred = model.predict(viscosity_test)
old_metrics = {
    "RMSE_test": models["decision_tree"]["RMSE_test"],
    "RMAE_test": models["decision_tree"]["RMAE_test"],
    "R2_test": models["decision_tree"]["R2_test"],
}
new_metrics = {}
new_metrics["RMSE_test"] = math.sqrt(
    metrics.mean_squared_error(viscosity_y_test, y_pred)
)
new_metrics["RMAE_test"] = math.sqrt(
    metrics.mean_absolute_error(viscosity_y_test, y_pred)
)
new_metrics["R2_test"] = metrics.r2_score(viscosity_y_test, y_pred)

display(old_metrics)
display(new_metrics)
{'RMSE_test': 0.37901722760783496,
 'RMAE_test': 0.5874671455143883,
 'R2_test': 0.8384951109125148}
{'RMSE_test': 0.39412315184917696,
 'RMAE_test': 0.593196723643326,
 'R2_test': 0.8253648477295591}
In [12]:
rules = tree.export_text(
    models["decision_tree"]["fitted"],
    feature_names=viscosity_train.columns.values.tolist(),
)
print(rules)
|--- T <= 32.50
|   |--- TiO2 <= 0.18
|   |   |--- Al2O3 <= 0.18
|   |   |   |--- T <= 22.50
|   |   |   |   |--- TiO2 <= 0.03
|   |   |   |   |   |--- Al2O3 <= 0.03
|   |   |   |   |   |   |--- value: [3.71]
|   |   |   |   |   |--- Al2O3 >  0.03
|   |   |   |   |   |   |--- value: [4.66]
|   |   |   |   |--- TiO2 >  0.03
|   |   |   |   |   |--- value: [4.88]
|   |   |   |--- T >  22.50
|   |   |   |   |--- TiO2 <= 0.03
|   |   |   |   |   |--- Al2O3 <= 0.03
|   |   |   |   |   |   |--- value: [3.18]
|   |   |   |   |   |--- Al2O3 >  0.03
|   |   |   |   |   |   |--- value: [3.38]
|   |   |   |   |--- TiO2 >  0.03
|   |   |   |   |   |--- value: [4.24]
|   |   |--- Al2O3 >  0.18
|   |   |   |--- T <= 22.50
|   |   |   |   |--- value: [6.67]
|   |   |   |--- T >  22.50
|   |   |   |   |--- T <= 27.50
|   |   |   |   |   |--- value: [5.59]
|   |   |   |   |--- T >  27.50
|   |   |   |   |   |--- value: [4.73]
|   |--- TiO2 >  0.18
|   |   |--- T <= 22.50
|   |   |   |--- value: [7.13]
|   |   |--- T >  22.50
|   |   |   |--- T <= 27.50
|   |   |   |   |--- value: [5.87]
|   |   |   |--- T >  27.50
|   |   |   |   |--- value: [4.94]
|--- T >  32.50
|   |--- T <= 47.50
|   |   |--- TiO2 <= 0.18
|   |   |   |--- Al2O3 <= 0.18
|   |   |   |   |--- T <= 42.50
|   |   |   |   |   |--- TiO2 <= 0.03
|   |   |   |   |   |   |--- Al2O3 <= 0.03
|   |   |   |   |   |   |   |--- value: [2.36]
|   |   |   |   |   |   |--- Al2O3 >  0.03
|   |   |   |   |   |   |   |--- value: [2.68]
|   |   |   |   |   |--- TiO2 >  0.03
|   |   |   |   |   |   |--- T <= 37.50
|   |   |   |   |   |   |   |--- value: [3.12]
|   |   |   |   |   |   |--- T >  37.50
|   |   |   |   |   |   |   |--- value: [2.65]
|   |   |   |   |--- T >  42.50
|   |   |   |   |   |--- TiO2 <= 0.03
|   |   |   |   |   |   |--- value: [1.83]
|   |   |   |   |   |--- TiO2 >  0.03
|   |   |   |   |   |   |--- value: [2.40]
|   |   |   |--- Al2O3 >  0.18
|   |   |   |   |--- T <= 37.50
|   |   |   |   |   |--- value: [4.12]
|   |   |   |   |--- T >  37.50
|   |   |   |   |   |--- value: [3.56]
|   |   |--- TiO2 >  0.18
|   |   |   |--- T <= 40.00
|   |   |   |   |--- value: [4.35]
|   |   |   |--- T >  40.00
|   |   |   |   |--- value: [3.56]
|   |--- T >  47.50
|   |   |--- TiO2 <= 0.18
|   |   |   |--- Al2O3 <= 0.18
|   |   |   |   |--- T <= 52.50
|   |   |   |   |   |--- TiO2 <= 0.03
|   |   |   |   |   |   |--- Al2O3 <= 0.03
|   |   |   |   |   |   |   |--- value: [1.63]
|   |   |   |   |   |   |--- Al2O3 >  0.03
|   |   |   |   |   |   |   |--- value: [1.90]
|   |   |   |   |   |--- TiO2 >  0.03
|   |   |   |   |   |   |--- value: [2.11]
|   |   |   |   |--- T >  52.50
|   |   |   |   |   |--- T <= 65.00
|   |   |   |   |   |   |--- TiO2 <= 0.03
|   |   |   |   |   |   |   |--- value: [1.55]
|   |   |   |   |   |   |--- TiO2 >  0.03
|   |   |   |   |   |   |   |--- value: [1.66]
|   |   |   |   |   |--- T >  65.00
|   |   |   |   |   |   |--- TiO2 <= 0.03
|   |   |   |   |   |   |   |--- value: [1.19]
|   |   |   |   |   |   |--- TiO2 >  0.03
|   |   |   |   |   |   |   |--- value: [1.29]
|   |   |   |--- Al2O3 >  0.18
|   |   |   |   |--- T <= 65.00
|   |   |   |   |   |--- T <= 57.50
|   |   |   |   |   |   |--- value: [2.43]
|   |   |   |   |   |--- T >  57.50
|   |   |   |   |   |   |--- value: [2.16]
|   |   |   |   |--- T >  65.00
|   |   |   |   |   |--- value: [1.73]
|   |   |--- TiO2 >  0.18
|   |   |   |--- T <= 65.00
|   |   |   |   |--- T <= 57.50
|   |   |   |   |   |--- value: [2.84]
|   |   |   |   |--- T >  57.50
|   |   |   |   |   |--- value: [2.54]
|   |   |   |--- T >  65.00
|   |   |   |   |--- value: [1.91]

In [13]:
import pickle

pickle.dump(models["decision_tree"]["fitted"], open("data/vtree.model.sav", "wb"))