{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TAl2O3TiO2Viscosity
0200.00.03.707
1250.00.03.180
2350.00.02.361
\n", "
" ], "text/plain": [ " T Al2O3 TiO2 Viscosity\n", "0 20 0.0 0.0 3.707\n", "1 25 0.0 0.0 3.180\n", "2 35 0.0 0.0 2.361" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TAl2O3TiO2Viscosity
0300.00.02.716
1400.00.02.073
2600.00.01.329
\n", "
" ], "text/plain": [ " T Al2O3 TiO2 Viscosity\n", "0 30 0.0 0.0 2.716\n", "1 40 0.0 0.0 2.073\n", "2 60 0.0 0.0 1.329" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "\n", "viscosity_train = pd.read_csv(\"data/viscosity_train.csv\", sep=\";\", decimal=\",\")\n", "viscosity_test = pd.read_csv(\"data/viscosity_test.csv\", sep=\";\", decimal=\",\")\n", "\n", "display(viscosity_train.head(3))\n", "display(viscosity_test.head(3))" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TAl2O3TiO2
0200.00.0
1250.00.0
2350.00.0
\n", "
" ], "text/plain": [ " T Al2O3 TiO2\n", "0 20 0.0 0.0\n", "1 25 0.0 0.0\n", "2 35 0.0 0.0" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "0 3.707\n", "1 3.180\n", "2 2.361\n", "Name: Viscosity, dtype: float64" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TAl2O3TiO2
0300.00.0
1400.00.0
2600.00.0
\n", "
" ], "text/plain": [ " T Al2O3 TiO2\n", "0 30 0.0 0.0\n", "1 40 0.0 0.0\n", "2 60 0.0 0.0" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "0 2.716\n", "1 2.073\n", "2 1.329\n", "Name: Viscosity, dtype: float64" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "viscosity_y_train = viscosity_train[\"Viscosity\"]\n", "viscosity_train = viscosity_train.drop([\"Viscosity\"], axis=1)\n", "\n", "display(viscosity_train.head(3))\n", "display(viscosity_y_train.head(3))\n", "\n", "viscosity_y_test = viscosity_test[\"Viscosity\"]\n", "viscosity_test = viscosity_test.drop([\"Viscosity\"], axis=1)\n", "\n", "display(viscosity_test.head(3))\n", "display(viscosity_y_test.head(3))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from sklearn.pipeline import make_pipeline\n", "from sklearn.preprocessing import PolynomialFeatures\n", "from sklearn import linear_model, tree, neighbors, ensemble\n", "\n", "random_state = 9\n", "\n", "models = {\n", " \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n", " \"linear_poly\": {\n", " \"model\": make_pipeline(\n", " PolynomialFeatures(degree=2),\n", " linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n", " )\n", " },\n", " \"linear_interact\": {\n", " \"model\": make_pipeline(\n", " PolynomialFeatures(interaction_only=True),\n", " linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n", " )\n", " },\n", " \"ridge\": {\"model\": linear_model.RidgeCV()},\n", " \"decision_tree\": {\n", " \"model\": tree.DecisionTreeRegressor(max_depth=7, random_state=random_state)\n", " },\n", " \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n", " \"random_forest\": {\n", " \"model\": ensemble.RandomForestRegressor(\n", " max_depth=7, random_state=random_state, n_jobs=-1\n", " )\n", " },\n", "}" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: linear\n", "Model: linear_poly\n", "Model: linear_interact\n", "Model: ridge\n", "Model: decision_tree\n", "Model: knn\n", "Model: random_forest\n" ] } ], "source": [ "import math\n", "from sklearn import metrics\n", "\n", "for model_name in models.keys():\n", " print(f\"Model: {model_name}\")\n", " fitted_model = models[model_name][\"model\"].fit(\n", " viscosity_train.values, viscosity_y_train.values.ravel()\n", " )\n", " y_train_pred = fitted_model.predict(viscosity_train.values)\n", " y_test_pred = fitted_model.predict(viscosity_test.values)\n", " models[model_name][\"fitted\"] = fitted_model\n", " models[model_name][\"train_preds\"] = y_train_pred\n", " models[model_name][\"preds\"] = y_test_pred\n", " models[model_name][\"RMSE_train\"] = math.sqrt(\n", " metrics.mean_squared_error(viscosity_y_train, y_train_pred)\n", " )\n", " models[model_name][\"RMSE_test\"] = math.sqrt(\n", " metrics.mean_squared_error(viscosity_y_test, y_test_pred)\n", " )\n", " models[model_name][\"RMAE_test\"] = math.sqrt(\n", " metrics.mean_absolute_error(viscosity_y_test, y_test_pred)\n", " )\n", " models[model_name][\"R2_test\"] = metrics.r2_score(viscosity_y_test, y_test_pred)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 RMSE_trainRMSE_testRMAE_testR2_test
linear_poly0.1507450.1395070.3362390.978119
linear_interact0.3613090.3033890.5279110.896517
random_forest0.2264200.3410140.5457650.869259
ridge0.4723990.3785730.5594090.838873
decision_tree0.0545330.3790170.5874670.838495
linear0.4417600.4289400.6172120.793147
knn0.6669030.5669010.7027000.638689
\n" ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reg_metrics = pd.DataFrame.from_dict(models, \"index\")[\n", " [\"RMSE_train\", \"RMSE_test\", \"RMAE_test\", \"R2_test\"]\n", "]\n", "reg_metrics.sort_values(by=\"RMSE_test\").style.background_gradient(\n", " cmap=\"viridis\", low=1, high=0.3, subset=[\"RMSE_train\", \"RMSE_test\"]\n", ").background_gradient(cmap=\"plasma\", low=0.3, high=1, subset=[\"RMAE_test\", \"R2_test\"])" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'criterion': 'poisson', 'max_depth': 9, 'min_samples_split': 2}" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "from sklearn import model_selection\n", "\n", "parameters = {\n", " \"criterion\": [\"squared_error\", \"absolute_error\", \"friedman_mse\", \"poisson\"],\n", " \"max_depth\": np.arange(1, 21).tolist()[0::2],\n", " \"min_samples_split\": np.arange(2, 20).tolist()[0::2],\n", "}\n", "\n", "grid = model_selection.GridSearchCV(\n", " tree.DecisionTreeRegressor(random_state=random_state), parameters, n_jobs=-1\n", ")\n", "\n", "grid.fit(viscosity_train, viscosity_y_train)\n", "grid.best_params_" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'RMSE_test': 0.37901722760783496,\n", " 'RMAE_test': 0.5874671455143883,\n", " 'R2_test': 0.8384951109125148}" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "{'RMSE_test': 0.39412315184917696,\n", " 'RMAE_test': 0.593196723643326,\n", " 'R2_test': 0.8253648477295591}" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "model = grid.best_estimator_\n", "y_pred = model.predict(viscosity_test)\n", "old_metrics = {\n", " \"RMSE_test\": models[\"decision_tree\"][\"RMSE_test\"],\n", " \"RMAE_test\": models[\"decision_tree\"][\"RMAE_test\"],\n", " \"R2_test\": models[\"decision_tree\"][\"R2_test\"],\n", "}\n", "new_metrics = {}\n", "new_metrics[\"RMSE_test\"] = math.sqrt(\n", " metrics.mean_squared_error(viscosity_y_test, y_pred)\n", ")\n", "new_metrics[\"RMAE_test\"] = math.sqrt(\n", " metrics.mean_absolute_error(viscosity_y_test, y_pred)\n", ")\n", "new_metrics[\"R2_test\"] = metrics.r2_score(viscosity_y_test, y_pred)\n", "\n", "display(old_metrics)\n", "display(new_metrics)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "|--- T <= 32.50\n", "| |--- TiO2 <= 0.18\n", "| | |--- Al2O3 <= 0.18\n", "| | | |--- T <= 22.50\n", "| | | | |--- TiO2 <= 0.03\n", "| | | | | |--- Al2O3 <= 0.03\n", "| | | | | | |--- value: [3.71]\n", "| | | | | |--- Al2O3 > 0.03\n", "| | | | | | |--- value: [4.66]\n", "| | | | |--- TiO2 > 0.03\n", "| | | | | |--- value: [4.88]\n", "| | | |--- T > 22.50\n", "| | | | |--- TiO2 <= 0.03\n", "| | | | | |--- Al2O3 <= 0.03\n", "| | | | | | |--- value: [3.18]\n", "| | | | | |--- Al2O3 > 0.03\n", "| | | | | | |--- value: [3.38]\n", "| | | | |--- TiO2 > 0.03\n", "| | | | | |--- value: [4.24]\n", "| | |--- Al2O3 > 0.18\n", "| | | |--- T <= 22.50\n", "| | | | |--- value: [6.67]\n", "| | | |--- T > 22.50\n", "| | | | |--- T <= 27.50\n", "| | | | | |--- value: [5.59]\n", "| | | | |--- T > 27.50\n", "| | | | | |--- value: [4.73]\n", "| |--- TiO2 > 0.18\n", "| | |--- T <= 22.50\n", "| | | |--- value: [7.13]\n", "| | |--- T > 22.50\n", "| | | |--- T <= 27.50\n", "| | | | |--- value: [5.87]\n", "| | | |--- T > 27.50\n", "| | | | |--- value: [4.94]\n", "|--- T > 32.50\n", "| |--- T <= 47.50\n", "| | |--- TiO2 <= 0.18\n", "| | | |--- Al2O3 <= 0.18\n", "| | | | |--- T <= 42.50\n", "| | | | | |--- TiO2 <= 0.03\n", "| | | | | | |--- Al2O3 <= 0.03\n", "| | | | | | | |--- value: [2.36]\n", "| | | | | | |--- Al2O3 > 0.03\n", "| | | | | | | |--- value: [2.68]\n", "| | | | | |--- TiO2 > 0.03\n", "| | | | | | |--- T <= 37.50\n", "| | | | | | | |--- value: [3.12]\n", "| | | | | | |--- T > 37.50\n", "| | | | | | | |--- value: [2.65]\n", "| | | | |--- T > 42.50\n", "| | | | | |--- TiO2 <= 0.03\n", "| | | | | | |--- value: [1.83]\n", "| | | | | |--- TiO2 > 0.03\n", "| | | | | | |--- value: [2.40]\n", "| | | |--- Al2O3 > 0.18\n", "| | | | |--- T <= 37.50\n", "| | | | | |--- value: [4.12]\n", "| | | | |--- T > 37.50\n", "| | | | | |--- value: [3.56]\n", "| | |--- TiO2 > 0.18\n", "| | | |--- T <= 40.00\n", "| | | | |--- value: [4.35]\n", "| | | |--- T > 40.00\n", "| | | | |--- value: [3.56]\n", "| |--- T > 47.50\n", "| | |--- TiO2 <= 0.18\n", "| | | |--- Al2O3 <= 0.18\n", "| | | | |--- T <= 52.50\n", "| | | | | |--- TiO2 <= 0.03\n", "| | | | | | |--- Al2O3 <= 0.03\n", "| | | | | | | |--- value: [1.63]\n", "| | | | | | |--- Al2O3 > 0.03\n", "| | | | | | | |--- value: [1.90]\n", "| | | | | |--- TiO2 > 0.03\n", "| | | | | | |--- value: [2.11]\n", "| | | | |--- T > 52.50\n", "| | | | | |--- T <= 65.00\n", "| | | | | | |--- TiO2 <= 0.03\n", "| | | | | | | |--- value: [1.55]\n", "| | | | | | |--- TiO2 > 0.03\n", "| | | | | | | |--- value: [1.66]\n", "| | | | | |--- T > 65.00\n", "| | | | | | |--- TiO2 <= 0.03\n", "| | | | | | | |--- value: [1.19]\n", "| | | | | | |--- TiO2 > 0.03\n", "| | | | | | | |--- value: [1.29]\n", "| | | |--- Al2O3 > 0.18\n", "| | | | |--- T <= 65.00\n", "| | | | | |--- T <= 57.50\n", "| | | | | | |--- value: [2.43]\n", "| | | | | |--- T > 57.50\n", "| | | | | | |--- value: [2.16]\n", "| | | | |--- T > 65.00\n", "| | | | | |--- value: [1.73]\n", "| | |--- TiO2 > 0.18\n", "| | | |--- T <= 65.00\n", "| | | | |--- T <= 57.50\n", "| | | | | |--- value: [2.84]\n", "| | | | |--- T > 57.50\n", "| | | | | |--- value: [2.54]\n", "| | | |--- T > 65.00\n", "| | | | |--- value: [1.91]\n", "\n" ] } ], "source": [ "rules = tree.export_text(\n", " models[\"decision_tree\"][\"fitted\"],\n", " feature_names=viscosity_train.columns.values.tolist(),\n", ")\n", "print(rules)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "import pickle\n", "\n", "pickle.dump(models[\"decision_tree\"][\"fitted\"], open(\"data/vtree.model.sav\", \"wb\"))" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 2 }