{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" T | \n",
" Al2O3 | \n",
" TiO2 | \n",
" Viscosity | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 20 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 3.707 | \n",
"
\n",
" \n",
" 1 | \n",
" 25 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 3.180 | \n",
"
\n",
" \n",
" 2 | \n",
" 35 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 2.361 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" T Al2O3 TiO2 Viscosity\n",
"0 20 0.0 0.0 3.707\n",
"1 25 0.0 0.0 3.180\n",
"2 35 0.0 0.0 2.361"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" T | \n",
" Al2O3 | \n",
" TiO2 | \n",
" Viscosity | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 30 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 2.716 | \n",
"
\n",
" \n",
" 1 | \n",
" 40 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 2.073 | \n",
"
\n",
" \n",
" 2 | \n",
" 60 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.329 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" T Al2O3 TiO2 Viscosity\n",
"0 30 0.0 0.0 2.716\n",
"1 40 0.0 0.0 2.073\n",
"2 60 0.0 0.0 1.329"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"\n",
"viscosity_train = pd.read_csv(\"data/viscosity_train.csv\", sep=\";\", decimal=\",\")\n",
"viscosity_test = pd.read_csv(\"data/viscosity_test.csv\", sep=\";\", decimal=\",\")\n",
"\n",
"display(viscosity_train.head(3))\n",
"display(viscosity_test.head(3))"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" T | \n",
" Al2O3 | \n",
" TiO2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 20 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 25 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 35 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" T Al2O3 TiO2\n",
"0 20 0.0 0.0\n",
"1 25 0.0 0.0\n",
"2 35 0.0 0.0"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"0 3.707\n",
"1 3.180\n",
"2 2.361\n",
"Name: Viscosity, dtype: float64"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" T | \n",
" Al2O3 | \n",
" TiO2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 30 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 40 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 60 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" T Al2O3 TiO2\n",
"0 30 0.0 0.0\n",
"1 40 0.0 0.0\n",
"2 60 0.0 0.0"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"0 2.716\n",
"1 2.073\n",
"2 1.329\n",
"Name: Viscosity, dtype: float64"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"viscosity_y_train = viscosity_train[\"Viscosity\"]\n",
"viscosity_train = viscosity_train.drop([\"Viscosity\"], axis=1)\n",
"\n",
"display(viscosity_train.head(3))\n",
"display(viscosity_y_train.head(3))\n",
"\n",
"viscosity_y_test = viscosity_test[\"Viscosity\"]\n",
"viscosity_test = viscosity_test.drop([\"Viscosity\"], axis=1)\n",
"\n",
"display(viscosity_test.head(3))\n",
"display(viscosity_y_test.head(3))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.preprocessing import PolynomialFeatures\n",
"from sklearn import linear_model, tree, neighbors, ensemble\n",
"\n",
"random_state = 9\n",
"\n",
"models = {\n",
" \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n",
" \"linear_poly\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(degree=2),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"linear_interact\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(interaction_only=True),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"ridge\": {\"model\": linear_model.RidgeCV()},\n",
" \"decision_tree\": {\n",
" \"model\": tree.DecisionTreeRegressor(max_depth=7, random_state=random_state)\n",
" },\n",
" \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n",
" \"random_forest\": {\n",
" \"model\": ensemble.RandomForestRegressor(\n",
" max_depth=7, random_state=random_state, n_jobs=-1\n",
" )\n",
" },\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: linear\n",
"Model: linear_poly\n",
"Model: linear_interact\n",
"Model: ridge\n",
"Model: decision_tree\n",
"Model: knn\n",
"Model: random_forest\n"
]
}
],
"source": [
"import math\n",
"from sklearn import metrics\n",
"\n",
"for model_name in models.keys():\n",
" print(f\"Model: {model_name}\")\n",
" fitted_model = models[model_name][\"model\"].fit(\n",
" viscosity_train.values, viscosity_y_train.values.ravel()\n",
" )\n",
" y_train_pred = fitted_model.predict(viscosity_train.values)\n",
" y_test_pred = fitted_model.predict(viscosity_test.values)\n",
" models[model_name][\"fitted\"] = fitted_model\n",
" models[model_name][\"train_preds\"] = y_train_pred\n",
" models[model_name][\"preds\"] = y_test_pred\n",
" models[model_name][\"RMSE_train\"] = math.sqrt(\n",
" metrics.mean_squared_error(viscosity_y_train, y_train_pred)\n",
" )\n",
" models[model_name][\"RMSE_test\"] = math.sqrt(\n",
" metrics.mean_squared_error(viscosity_y_test, y_test_pred)\n",
" )\n",
" models[model_name][\"RMAE_test\"] = math.sqrt(\n",
" metrics.mean_absolute_error(viscosity_y_test, y_test_pred)\n",
" )\n",
" models[model_name][\"R2_test\"] = metrics.r2_score(viscosity_y_test, y_test_pred)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" RMSE_train | \n",
" RMSE_test | \n",
" RMAE_test | \n",
" R2_test | \n",
"
\n",
" \n",
" \n",
" \n",
" linear_poly | \n",
" 0.150745 | \n",
" 0.139507 | \n",
" 0.336239 | \n",
" 0.978119 | \n",
"
\n",
" \n",
" linear_interact | \n",
" 0.361309 | \n",
" 0.303389 | \n",
" 0.527911 | \n",
" 0.896517 | \n",
"
\n",
" \n",
" random_forest | \n",
" 0.226420 | \n",
" 0.341014 | \n",
" 0.545765 | \n",
" 0.869259 | \n",
"
\n",
" \n",
" ridge | \n",
" 0.472399 | \n",
" 0.378573 | \n",
" 0.559409 | \n",
" 0.838873 | \n",
"
\n",
" \n",
" decision_tree | \n",
" 0.054533 | \n",
" 0.379017 | \n",
" 0.587467 | \n",
" 0.838495 | \n",
"
\n",
" \n",
" linear | \n",
" 0.441760 | \n",
" 0.428940 | \n",
" 0.617212 | \n",
" 0.793147 | \n",
"
\n",
" \n",
" knn | \n",
" 0.666903 | \n",
" 0.566901 | \n",
" 0.702700 | \n",
" 0.638689 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reg_metrics = pd.DataFrame.from_dict(models, \"index\")[\n",
" [\"RMSE_train\", \"RMSE_test\", \"RMAE_test\", \"R2_test\"]\n",
"]\n",
"reg_metrics.sort_values(by=\"RMSE_test\").style.background_gradient(\n",
" cmap=\"viridis\", low=1, high=0.3, subset=[\"RMSE_train\", \"RMSE_test\"]\n",
").background_gradient(cmap=\"plasma\", low=0.3, high=1, subset=[\"RMAE_test\", \"R2_test\"])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'criterion': 'poisson', 'max_depth': 9, 'min_samples_split': 2}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"from sklearn import model_selection\n",
"\n",
"parameters = {\n",
" \"criterion\": [\"squared_error\", \"absolute_error\", \"friedman_mse\", \"poisson\"],\n",
" \"max_depth\": np.arange(1, 21).tolist()[0::2],\n",
" \"min_samples_split\": np.arange(2, 20).tolist()[0::2],\n",
"}\n",
"\n",
"grid = model_selection.GridSearchCV(\n",
" tree.DecisionTreeRegressor(random_state=random_state), parameters, n_jobs=-1\n",
")\n",
"\n",
"grid.fit(viscosity_train, viscosity_y_train)\n",
"grid.best_params_"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'RMSE_test': 0.37901722760783496,\n",
" 'RMAE_test': 0.5874671455143883,\n",
" 'R2_test': 0.8384951109125148}"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"{'RMSE_test': 0.39412315184917696,\n",
" 'RMAE_test': 0.593196723643326,\n",
" 'R2_test': 0.8253648477295591}"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"model = grid.best_estimator_\n",
"y_pred = model.predict(viscosity_test)\n",
"old_metrics = {\n",
" \"RMSE_test\": models[\"decision_tree\"][\"RMSE_test\"],\n",
" \"RMAE_test\": models[\"decision_tree\"][\"RMAE_test\"],\n",
" \"R2_test\": models[\"decision_tree\"][\"R2_test\"],\n",
"}\n",
"new_metrics = {}\n",
"new_metrics[\"RMSE_test\"] = math.sqrt(\n",
" metrics.mean_squared_error(viscosity_y_test, y_pred)\n",
")\n",
"new_metrics[\"RMAE_test\"] = math.sqrt(\n",
" metrics.mean_absolute_error(viscosity_y_test, y_pred)\n",
")\n",
"new_metrics[\"R2_test\"] = metrics.r2_score(viscosity_y_test, y_pred)\n",
"\n",
"display(old_metrics)\n",
"display(new_metrics)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"|--- T <= 32.50\n",
"| |--- TiO2 <= 0.18\n",
"| | |--- Al2O3 <= 0.18\n",
"| | | |--- T <= 22.50\n",
"| | | | |--- TiO2 <= 0.03\n",
"| | | | | |--- Al2O3 <= 0.03\n",
"| | | | | | |--- value: [3.71]\n",
"| | | | | |--- Al2O3 > 0.03\n",
"| | | | | | |--- value: [4.66]\n",
"| | | | |--- TiO2 > 0.03\n",
"| | | | | |--- value: [4.88]\n",
"| | | |--- T > 22.50\n",
"| | | | |--- TiO2 <= 0.03\n",
"| | | | | |--- Al2O3 <= 0.03\n",
"| | | | | | |--- value: [3.18]\n",
"| | | | | |--- Al2O3 > 0.03\n",
"| | | | | | |--- value: [3.38]\n",
"| | | | |--- TiO2 > 0.03\n",
"| | | | | |--- value: [4.24]\n",
"| | |--- Al2O3 > 0.18\n",
"| | | |--- T <= 22.50\n",
"| | | | |--- value: [6.67]\n",
"| | | |--- T > 22.50\n",
"| | | | |--- T <= 27.50\n",
"| | | | | |--- value: [5.59]\n",
"| | | | |--- T > 27.50\n",
"| | | | | |--- value: [4.73]\n",
"| |--- TiO2 > 0.18\n",
"| | |--- T <= 22.50\n",
"| | | |--- value: [7.13]\n",
"| | |--- T > 22.50\n",
"| | | |--- T <= 27.50\n",
"| | | | |--- value: [5.87]\n",
"| | | |--- T > 27.50\n",
"| | | | |--- value: [4.94]\n",
"|--- T > 32.50\n",
"| |--- T <= 47.50\n",
"| | |--- TiO2 <= 0.18\n",
"| | | |--- Al2O3 <= 0.18\n",
"| | | | |--- T <= 42.50\n",
"| | | | | |--- TiO2 <= 0.03\n",
"| | | | | | |--- Al2O3 <= 0.03\n",
"| | | | | | | |--- value: [2.36]\n",
"| | | | | | |--- Al2O3 > 0.03\n",
"| | | | | | | |--- value: [2.68]\n",
"| | | | | |--- TiO2 > 0.03\n",
"| | | | | | |--- T <= 37.50\n",
"| | | | | | | |--- value: [3.12]\n",
"| | | | | | |--- T > 37.50\n",
"| | | | | | | |--- value: [2.65]\n",
"| | | | |--- T > 42.50\n",
"| | | | | |--- TiO2 <= 0.03\n",
"| | | | | | |--- value: [1.83]\n",
"| | | | | |--- TiO2 > 0.03\n",
"| | | | | | |--- value: [2.40]\n",
"| | | |--- Al2O3 > 0.18\n",
"| | | | |--- T <= 37.50\n",
"| | | | | |--- value: [4.12]\n",
"| | | | |--- T > 37.50\n",
"| | | | | |--- value: [3.56]\n",
"| | |--- TiO2 > 0.18\n",
"| | | |--- T <= 40.00\n",
"| | | | |--- value: [4.35]\n",
"| | | |--- T > 40.00\n",
"| | | | |--- value: [3.56]\n",
"| |--- T > 47.50\n",
"| | |--- TiO2 <= 0.18\n",
"| | | |--- Al2O3 <= 0.18\n",
"| | | | |--- T <= 52.50\n",
"| | | | | |--- TiO2 <= 0.03\n",
"| | | | | | |--- Al2O3 <= 0.03\n",
"| | | | | | | |--- value: [1.63]\n",
"| | | | | | |--- Al2O3 > 0.03\n",
"| | | | | | | |--- value: [1.90]\n",
"| | | | | |--- TiO2 > 0.03\n",
"| | | | | | |--- value: [2.11]\n",
"| | | | |--- T > 52.50\n",
"| | | | | |--- T <= 65.00\n",
"| | | | | | |--- TiO2 <= 0.03\n",
"| | | | | | | |--- value: [1.55]\n",
"| | | | | | |--- TiO2 > 0.03\n",
"| | | | | | | |--- value: [1.66]\n",
"| | | | | |--- T > 65.00\n",
"| | | | | | |--- TiO2 <= 0.03\n",
"| | | | | | | |--- value: [1.19]\n",
"| | | | | | |--- TiO2 > 0.03\n",
"| | | | | | | |--- value: [1.29]\n",
"| | | |--- Al2O3 > 0.18\n",
"| | | | |--- T <= 65.00\n",
"| | | | | |--- T <= 57.50\n",
"| | | | | | |--- value: [2.43]\n",
"| | | | | |--- T > 57.50\n",
"| | | | | | |--- value: [2.16]\n",
"| | | | |--- T > 65.00\n",
"| | | | | |--- value: [1.73]\n",
"| | |--- TiO2 > 0.18\n",
"| | | |--- T <= 65.00\n",
"| | | | |--- T <= 57.50\n",
"| | | | | |--- value: [2.84]\n",
"| | | | |--- T > 57.50\n",
"| | | | | |--- value: [2.54]\n",
"| | | |--- T > 65.00\n",
"| | | | |--- value: [1.91]\n",
"\n"
]
}
],
"source": [
"rules = tree.export_text(\n",
" models[\"decision_tree\"][\"fitted\"],\n",
" feature_names=viscosity_train.columns.values.tolist(),\n",
")\n",
"print(rules)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"\n",
"pickle.dump(models[\"decision_tree\"][\"fitted\"], open(\"data/vtree.model.sav\", \"wb\"))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}