fuzzy-rules-generator/temp_viscosity_regression.ipynb

26 KiB

In [2]:
import pandas as pd

train = pd.read_csv("data/viscosity_train.csv", sep=";", decimal=",")
test = pd.read_csv("data/viscosity_test.csv", sep=";", decimal=",")

display(train.head())
display(test.head())
T Al2O3 TiO2 Viscosity
0 20 0.0 0.0 3.707
1 25 0.0 0.0 3.180
2 35 0.0 0.0 2.361
3 45 0.0 0.0 1.832
4 50 0.0 0.0 1.629
T Al2O3 TiO2 Viscosity
0 30 0.00 0.0 2.716
1 40 0.00 0.0 2.073
2 60 0.00 0.0 1.329
3 65 0.00 0.0 1.211
4 25 0.05 0.0 4.120
In [3]:
y_train = train["T"]
X_train = train.drop(["T"], axis=1)

display(X_train.head())
display(y_train.head())

y_test = test["T"]
X_test = test.drop(["T"], axis=1)

display(X_test.head())
display(y_test.head())
Al2O3 TiO2 Viscosity
0 0.0 0.0 3.707
1 0.0 0.0 3.180
2 0.0 0.0 2.361
3 0.0 0.0 1.832
4 0.0 0.0 1.629
0    20
1    25
2    35
3    45
4    50
Name: T, dtype: int64
Al2O3 TiO2 Viscosity
0 0.00 0.0 2.716
1 0.00 0.0 2.073
2 0.00 0.0 1.329
3 0.00 0.0 1.211
4 0.05 0.0 4.120
0    30
1    40
2    60
3    65
4    25
Name: T, dtype: int64
In [10]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn import linear_model, tree, neighbors, ensemble

random_state = 9

models = {
    "linear": {"model": linear_model.LinearRegression(n_jobs=-1)},
    "linear_poly": {
        "model": make_pipeline(
            PolynomialFeatures(degree=2),
            linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),
        )
    },
    "linear_interact": {
        "model": make_pipeline(
            PolynomialFeatures(interaction_only=True),
            linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),
        )
    },
    "ridge": {"model": linear_model.RidgeCV()},
    "decision_tree": {
        "model": tree.DecisionTreeRegressor(random_state=random_state, max_depth=6, criterion="absolute_error")
    },
    "knn": {"model": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},
    "random_forest": {
        "model": ensemble.RandomForestRegressor(
            max_depth=7, random_state=random_state, n_jobs=-1
        )
    },
}
In [11]:
import math
from sklearn import metrics

for model_name in models.keys():
    print(f"Model: {model_name}")
    fitted_model = models[model_name]["model"].fit(
        X_train.values, y_train.values.ravel()
    )
    y_train_pred = fitted_model.predict(X_train.values)
    y_test_pred = fitted_model.predict(X_test.values)
    models[model_name]["fitted"] = fitted_model
    models[model_name]["MSE_train"] = metrics.mean_squared_error(y_train, y_train_pred)
    models[model_name]["MSE_test"] = metrics.mean_squared_error(y_test, y_test_pred)
    models[model_name]["MAE_train"] = metrics.mean_absolute_error(y_train, y_train_pred)
    models[model_name]["MAE_test"] = metrics.mean_absolute_error(y_test, y_test_pred)
    models[model_name]["R2_train"] = metrics.r2_score(y_train, y_train_pred)
    models[model_name]["R2_test"] = metrics.r2_score(y_test, y_test_pred)
Model: linear
Model: linear_poly
Model: linear_interact
Model: ridge
Model: decision_tree
Model: knn
Model: random_forest
In [12]:
reg_metrics = pd.DataFrame.from_dict(models, "index")[
    ["MSE_train", "MSE_test", "MAE_train", "MAE_test", "R2_train", "R2_test"]
]
reg_metrics.sort_values(by="MAE_test").style.background_gradient(
    cmap="viridis", low=1, high=0.3, subset=["MSE_train", "MSE_test"]
).background_gradient(cmap="plasma", low=0.3, high=1, subset=["MAE_test", "R2_test"])
Out[12]:
  MSE_train MSE_test MAE_train MAE_test R2_train R2_test
linear_poly 4.827768 4.877296 1.522643 1.743058 0.980864 0.974964
linear_interact 21.786348 23.459572 3.830996 4.381115 0.913644 0.879577
linear 27.766510 35.430313 4.088006 5.106782 0.889940 0.818129
ridge 31.827476 36.230606 4.383008 5.226480 0.873843 0.814021
random_forest 8.525285 45.444651 2.542935 5.749510 0.966208 0.766723
decision_tree 3.289474 45.588235 0.921053 6.176471 0.986961 0.765986
knn 61.855532 64.165666 6.522556 6.806723 0.754819 0.670624
In [13]:
model = models["decision_tree"]["fitted"]
rules = tree.export_text(
    model, feature_names=X_train.columns.values.tolist()
)
print(rules)
|--- Viscosity <= 2.86
|   |--- Viscosity <= 1.38
|   |   |--- value: [70.00]
|   |--- Viscosity >  1.38
|   |   |--- Viscosity <= 1.78
|   |   |   |--- Viscosity <= 1.72
|   |   |   |   |--- TiO2 <= 0.03
|   |   |   |   |   |--- Al2O3 <= 0.03
|   |   |   |   |   |   |--- value: [52.50]
|   |   |   |   |   |--- Al2O3 >  0.03
|   |   |   |   |   |   |--- value: [57.50]
|   |   |   |   |--- TiO2 >  0.03
|   |   |   |   |   |--- value: [60.00]
|   |   |   |--- Viscosity >  1.72
|   |   |   |   |--- value: [70.00]
|   |   |--- Viscosity >  1.78
|   |   |   |--- TiO2 <= 0.18
|   |   |   |   |--- Al2O3 <= 0.18
|   |   |   |   |   |--- Viscosity <= 2.24
|   |   |   |   |   |   |--- value: [50.00]
|   |   |   |   |   |--- Viscosity >  2.24
|   |   |   |   |   |   |--- value: [40.00]
|   |   |   |   |--- Al2O3 >  0.18
|   |   |   |   |   |--- Viscosity <= 2.29
|   |   |   |   |   |   |--- value: [60.00]
|   |   |   |   |   |--- Viscosity >  2.29
|   |   |   |   |   |   |--- value: [55.00]
|   |   |   |--- TiO2 >  0.18
|   |   |   |   |--- Viscosity <= 2.22
|   |   |   |   |   |--- value: [70.00]
|   |   |   |   |--- Viscosity >  2.22
|   |   |   |   |   |--- Viscosity <= 2.69
|   |   |   |   |   |   |--- value: [60.00]
|   |   |   |   |   |--- Viscosity >  2.69
|   |   |   |   |   |   |--- value: [55.00]
|--- Viscosity >  2.86
|   |--- Viscosity <= 3.64
|   |   |--- TiO2 <= 0.18
|   |   |   |--- Viscosity <= 3.15
|   |   |   |   |--- value: [35.00]
|   |   |   |--- Viscosity >  3.15
|   |   |   |   |--- Viscosity <= 3.47
|   |   |   |   |   |--- Al2O3 <= 0.03
|   |   |   |   |   |   |--- value: [25.00]
|   |   |   |   |   |--- Al2O3 >  0.03
|   |   |   |   |   |   |--- value: [30.00]
|   |   |   |   |--- Viscosity >  3.47
|   |   |   |   |   |--- value: [40.00]
|   |   |--- TiO2 >  0.18
|   |   |   |--- value: [45.00]
|   |--- Viscosity >  3.64
|   |   |--- Viscosity <= 6.27
|   |   |   |--- TiO2 <= 0.18
|   |   |   |   |--- Al2O3 <= 0.18
|   |   |   |   |   |--- TiO2 <= 0.03
|   |   |   |   |   |   |--- value: [20.00]
|   |   |   |   |   |--- TiO2 >  0.03
|   |   |   |   |   |   |--- value: [22.50]
|   |   |   |   |--- Al2O3 >  0.18
|   |   |   |   |   |--- Viscosity <= 4.42
|   |   |   |   |   |   |--- value: [35.00]
|   |   |   |   |   |--- Viscosity >  4.42
|   |   |   |   |   |   |--- value: [27.50]
|   |   |   |--- TiO2 >  0.18
|   |   |   |   |--- Viscosity <= 4.65
|   |   |   |   |   |--- value: [35.00]
|   |   |   |   |--- Viscosity >  4.65
|   |   |   |   |   |--- Viscosity <= 5.40
|   |   |   |   |   |   |--- value: [30.00]
|   |   |   |   |   |--- Viscosity >  5.40
|   |   |   |   |   |   |--- value: [25.00]
|   |   |--- Viscosity >  6.27
|   |   |   |--- value: [20.00]

In [14]:
import pickle

pickle.dump(model, open("data/temp_viscosity_tree.model.sav", "wb"))