{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" T | \n",
" Al2O3 | \n",
" TiO2 | \n",
" Viscosity | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 20 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 3.707 | \n",
"
\n",
" \n",
" 1 | \n",
" 25 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 3.180 | \n",
"
\n",
" \n",
" 2 | \n",
" 35 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 2.361 | \n",
"
\n",
" \n",
" 3 | \n",
" 45 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.832 | \n",
"
\n",
" \n",
" 4 | \n",
" 50 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.629 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" T Al2O3 TiO2 Viscosity\n",
"0 20 0.0 0.0 3.707\n",
"1 25 0.0 0.0 3.180\n",
"2 35 0.0 0.0 2.361\n",
"3 45 0.0 0.0 1.832\n",
"4 50 0.0 0.0 1.629"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" T | \n",
" Al2O3 | \n",
" TiO2 | \n",
" Viscosity | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 30 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 2.716 | \n",
"
\n",
" \n",
" 1 | \n",
" 40 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 2.073 | \n",
"
\n",
" \n",
" 2 | \n",
" 60 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 1.329 | \n",
"
\n",
" \n",
" 3 | \n",
" 65 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 1.211 | \n",
"
\n",
" \n",
" 4 | \n",
" 25 | \n",
" 0.05 | \n",
" 0.0 | \n",
" 4.120 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" T Al2O3 TiO2 Viscosity\n",
"0 30 0.00 0.0 2.716\n",
"1 40 0.00 0.0 2.073\n",
"2 60 0.00 0.0 1.329\n",
"3 65 0.00 0.0 1.211\n",
"4 25 0.05 0.0 4.120"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"\n",
"train = pd.read_csv(\"data/viscosity_train.csv\", sep=\";\", decimal=\",\")\n",
"test = pd.read_csv(\"data/viscosity_test.csv\", sep=\";\", decimal=\",\")\n",
"\n",
"display(train.head())\n",
"display(test.head())"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Al2O3 | \n",
" TiO2 | \n",
" Viscosity | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 3.707 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 3.180 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 2.361 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.832 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.629 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Al2O3 TiO2 Viscosity\n",
"0 0.0 0.0 3.707\n",
"1 0.0 0.0 3.180\n",
"2 0.0 0.0 2.361\n",
"3 0.0 0.0 1.832\n",
"4 0.0 0.0 1.629"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"0 20\n",
"1 25\n",
"2 35\n",
"3 45\n",
"4 50\n",
"Name: T, dtype: int64"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Al2O3 | \n",
" TiO2 | \n",
" Viscosity | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 2.716 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 2.073 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 1.329 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 1.211 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.05 | \n",
" 0.0 | \n",
" 4.120 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Al2O3 TiO2 Viscosity\n",
"0 0.00 0.0 2.716\n",
"1 0.00 0.0 2.073\n",
"2 0.00 0.0 1.329\n",
"3 0.00 0.0 1.211\n",
"4 0.05 0.0 4.120"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"0 30\n",
"1 40\n",
"2 60\n",
"3 65\n",
"4 25\n",
"Name: T, dtype: int64"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"y_train = train[\"T\"]\n",
"X_train = train.drop([\"T\"], axis=1)\n",
"\n",
"display(X_train.head())\n",
"display(y_train.head())\n",
"\n",
"y_test = test[\"T\"]\n",
"X_test = test.drop([\"T\"], axis=1)\n",
"\n",
"display(X_test.head())\n",
"display(y_test.head())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.preprocessing import PolynomialFeatures\n",
"from sklearn import linear_model, tree, neighbors, ensemble\n",
"\n",
"random_state = 9\n",
"\n",
"models = {\n",
" \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n",
" \"linear_poly\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(degree=2),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"linear_interact\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(interaction_only=True),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"ridge\": {\"model\": linear_model.RidgeCV()},\n",
" \"decision_tree\": {\n",
" \"model\": tree.DecisionTreeRegressor(random_state=random_state, max_depth=6, criterion=\"absolute_error\")\n",
" },\n",
" \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n",
" \"random_forest\": {\n",
" \"model\": ensemble.RandomForestRegressor(\n",
" max_depth=7, random_state=random_state, n_jobs=-1\n",
" )\n",
" },\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: linear\n",
"Model: linear_poly\n",
"Model: linear_interact\n",
"Model: ridge\n",
"Model: decision_tree\n",
"Model: knn\n",
"Model: random_forest\n"
]
}
],
"source": [
"import math\n",
"from sklearn import metrics\n",
"\n",
"for model_name in models.keys():\n",
" print(f\"Model: {model_name}\")\n",
" fitted_model = models[model_name][\"model\"].fit(\n",
" X_train.values, y_train.values.ravel()\n",
" )\n",
" y_train_pred = fitted_model.predict(X_train.values)\n",
" y_test_pred = fitted_model.predict(X_test.values)\n",
" models[model_name][\"fitted\"] = fitted_model\n",
" models[model_name][\"MSE_train\"] = metrics.mean_squared_error(y_train, y_train_pred)\n",
" models[model_name][\"MSE_test\"] = metrics.mean_squared_error(y_test, y_test_pred)\n",
" models[model_name][\"MAE_train\"] = metrics.mean_absolute_error(y_train, y_train_pred)\n",
" models[model_name][\"MAE_test\"] = metrics.mean_absolute_error(y_test, y_test_pred)\n",
" models[model_name][\"R2_train\"] = metrics.r2_score(y_train, y_train_pred)\n",
" models[model_name][\"R2_test\"] = metrics.r2_score(y_test, y_test_pred)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" MSE_train | \n",
" MSE_test | \n",
" MAE_train | \n",
" MAE_test | \n",
" R2_train | \n",
" R2_test | \n",
"
\n",
" \n",
" \n",
" \n",
" linear_poly | \n",
" 4.827768 | \n",
" 4.877296 | \n",
" 1.522643 | \n",
" 1.743058 | \n",
" 0.980864 | \n",
" 0.974964 | \n",
"
\n",
" \n",
" linear_interact | \n",
" 21.786348 | \n",
" 23.459572 | \n",
" 3.830996 | \n",
" 4.381115 | \n",
" 0.913644 | \n",
" 0.879577 | \n",
"
\n",
" \n",
" linear | \n",
" 27.766510 | \n",
" 35.430313 | \n",
" 4.088006 | \n",
" 5.106782 | \n",
" 0.889940 | \n",
" 0.818129 | \n",
"
\n",
" \n",
" ridge | \n",
" 31.827476 | \n",
" 36.230606 | \n",
" 4.383008 | \n",
" 5.226480 | \n",
" 0.873843 | \n",
" 0.814021 | \n",
"
\n",
" \n",
" random_forest | \n",
" 8.525285 | \n",
" 45.444651 | \n",
" 2.542935 | \n",
" 5.749510 | \n",
" 0.966208 | \n",
" 0.766723 | \n",
"
\n",
" \n",
" decision_tree | \n",
" 3.289474 | \n",
" 45.588235 | \n",
" 0.921053 | \n",
" 6.176471 | \n",
" 0.986961 | \n",
" 0.765986 | \n",
"
\n",
" \n",
" knn | \n",
" 61.855532 | \n",
" 64.165666 | \n",
" 6.522556 | \n",
" 6.806723 | \n",
" 0.754819 | \n",
" 0.670624 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reg_metrics = pd.DataFrame.from_dict(models, \"index\")[\n",
" [\"MSE_train\", \"MSE_test\", \"MAE_train\", \"MAE_test\", \"R2_train\", \"R2_test\"]\n",
"]\n",
"reg_metrics.sort_values(by=\"MAE_test\").style.background_gradient(\n",
" cmap=\"viridis\", low=1, high=0.3, subset=[\"MSE_train\", \"MSE_test\"]\n",
").background_gradient(cmap=\"plasma\", low=0.3, high=1, subset=[\"MAE_test\", \"R2_test\"])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"|--- Viscosity <= 2.86\n",
"| |--- Viscosity <= 1.38\n",
"| | |--- value: [70.00]\n",
"| |--- Viscosity > 1.38\n",
"| | |--- Viscosity <= 1.78\n",
"| | | |--- Viscosity <= 1.72\n",
"| | | | |--- TiO2 <= 0.03\n",
"| | | | | |--- Al2O3 <= 0.03\n",
"| | | | | | |--- value: [52.50]\n",
"| | | | | |--- Al2O3 > 0.03\n",
"| | | | | | |--- value: [57.50]\n",
"| | | | |--- TiO2 > 0.03\n",
"| | | | | |--- value: [60.00]\n",
"| | | |--- Viscosity > 1.72\n",
"| | | | |--- value: [70.00]\n",
"| | |--- Viscosity > 1.78\n",
"| | | |--- TiO2 <= 0.18\n",
"| | | | |--- Al2O3 <= 0.18\n",
"| | | | | |--- Viscosity <= 2.24\n",
"| | | | | | |--- value: [50.00]\n",
"| | | | | |--- Viscosity > 2.24\n",
"| | | | | | |--- value: [40.00]\n",
"| | | | |--- Al2O3 > 0.18\n",
"| | | | | |--- Viscosity <= 2.29\n",
"| | | | | | |--- value: [60.00]\n",
"| | | | | |--- Viscosity > 2.29\n",
"| | | | | | |--- value: [55.00]\n",
"| | | |--- TiO2 > 0.18\n",
"| | | | |--- Viscosity <= 2.22\n",
"| | | | | |--- value: [70.00]\n",
"| | | | |--- Viscosity > 2.22\n",
"| | | | | |--- Viscosity <= 2.69\n",
"| | | | | | |--- value: [60.00]\n",
"| | | | | |--- Viscosity > 2.69\n",
"| | | | | | |--- value: [55.00]\n",
"|--- Viscosity > 2.86\n",
"| |--- Viscosity <= 3.64\n",
"| | |--- TiO2 <= 0.18\n",
"| | | |--- Viscosity <= 3.15\n",
"| | | | |--- value: [35.00]\n",
"| | | |--- Viscosity > 3.15\n",
"| | | | |--- Viscosity <= 3.47\n",
"| | | | | |--- Al2O3 <= 0.03\n",
"| | | | | | |--- value: [25.00]\n",
"| | | | | |--- Al2O3 > 0.03\n",
"| | | | | | |--- value: [30.00]\n",
"| | | | |--- Viscosity > 3.47\n",
"| | | | | |--- value: [40.00]\n",
"| | |--- TiO2 > 0.18\n",
"| | | |--- value: [45.00]\n",
"| |--- Viscosity > 3.64\n",
"| | |--- Viscosity <= 6.27\n",
"| | | |--- TiO2 <= 0.18\n",
"| | | | |--- Al2O3 <= 0.18\n",
"| | | | | |--- TiO2 <= 0.03\n",
"| | | | | | |--- value: [20.00]\n",
"| | | | | |--- TiO2 > 0.03\n",
"| | | | | | |--- value: [22.50]\n",
"| | | | |--- Al2O3 > 0.18\n",
"| | | | | |--- Viscosity <= 4.42\n",
"| | | | | | |--- value: [35.00]\n",
"| | | | | |--- Viscosity > 4.42\n",
"| | | | | | |--- value: [27.50]\n",
"| | | |--- TiO2 > 0.18\n",
"| | | | |--- Viscosity <= 4.65\n",
"| | | | | |--- value: [35.00]\n",
"| | | | |--- Viscosity > 4.65\n",
"| | | | | |--- Viscosity <= 5.40\n",
"| | | | | | |--- value: [30.00]\n",
"| | | | | |--- Viscosity > 5.40\n",
"| | | | | | |--- value: [25.00]\n",
"| | |--- Viscosity > 6.27\n",
"| | | |--- value: [20.00]\n",
"\n"
]
}
],
"source": [
"model = models[\"decision_tree\"][\"fitted\"]\n",
"rules = tree.export_text(\n",
" model, feature_names=X_train.columns.values.tolist()\n",
")\n",
"print(rules)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"\n",
"pickle.dump(model, open(\"data/temp_viscosity_tree.model.sav\", \"wb\"))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}