{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TAl2O3TiO2Density
0200.00.01.06250
1250.00.01.05979
2350.00.01.05404
\n", "
" ], "text/plain": [ " T Al2O3 TiO2 Density\n", "0 20 0.0 0.0 1.06250\n", "1 25 0.0 0.0 1.05979\n", "2 35 0.0 0.0 1.05404" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TAl2O3TiO2Density
0300.000.01.05696
1550.000.01.04158
2250.050.01.08438
\n", "
" ], "text/plain": [ " T Al2O3 TiO2 Density\n", "0 30 0.00 0.0 1.05696\n", "1 55 0.00 0.0 1.04158\n", "2 25 0.05 0.0 1.08438" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "\n", "density_train = pd.read_csv(\"data/density_train.csv\", sep=\";\", decimal=\",\")\n", "density_test = pd.read_csv(\"data/density_test.csv\", sep=\";\", decimal=\",\")\n", "\n", "display(density_train.head(3))\n", "display(density_test.head(3))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TAl2O3TiO2
0200.00.0
1250.00.0
2350.00.0
\n", "
" ], "text/plain": [ " T Al2O3 TiO2\n", "0 20 0.0 0.0\n", "1 25 0.0 0.0\n", "2 35 0.0 0.0" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "0 1.06250\n", "1 1.05979\n", "2 1.05404\n", "Name: Density, dtype: float64" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TAl2O3TiO2
0300.000.0
1550.000.0
2250.050.0
\n", "
" ], "text/plain": [ " T Al2O3 TiO2\n", "0 30 0.00 0.0\n", "1 55 0.00 0.0\n", "2 25 0.05 0.0" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "0 1.05696\n", "1 1.04158\n", "2 1.08438\n", "Name: Density, dtype: float64" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "density_y_train = density_train[\"Density\"]\n", "density_train = density_train.drop([\"Density\"], axis=1)\n", "\n", "display(density_train.head(3))\n", "display(density_y_train.head(3))\n", "\n", "density_y_test = density_test[\"Density\"]\n", "density_test = density_test.drop([\"Density\"], axis=1)\n", "\n", "display(density_test.head(3))\n", "display(density_y_test.head(3))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from sklearn.pipeline import make_pipeline\n", "from sklearn.preprocessing import PolynomialFeatures\n", "from sklearn import linear_model, tree, neighbors, ensemble\n", "\n", "random_state = 9\n", "\n", "models = {\n", " \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n", " \"linear_poly\": {\n", " \"model\": make_pipeline(\n", " PolynomialFeatures(degree=2),\n", " linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n", " )\n", " },\n", " \"linear_interact\": {\n", " \"model\": make_pipeline(\n", " PolynomialFeatures(interaction_only=True),\n", " linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n", " )\n", " },\n", " \"ridge\": {\"model\": linear_model.RidgeCV()},\n", " \"decision_tree\": {\n", " \"model\": tree.DecisionTreeRegressor(max_depth=7, random_state=random_state)\n", " },\n", " \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n", " \"random_forest\": {\n", " \"model\": ensemble.RandomForestRegressor(\n", " max_depth=7, random_state=random_state, n_jobs=-1\n", " )\n", " },\n", "}" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: linear\n", "Model: linear_poly\n", "Model: linear_interact\n", "Model: ridge\n", "Model: decision_tree\n", "Model: knn\n", "Model: random_forest\n" ] } ], "source": [ "import math\n", "from sklearn import metrics\n", "\n", "for model_name in models.keys():\n", " print(f\"Model: {model_name}\")\n", " fitted_model = models[model_name][\"model\"].fit(\n", " density_train.values, density_y_train.values.ravel()\n", " )\n", " y_train_pred = fitted_model.predict(density_train.values)\n", " y_test_pred = fitted_model.predict(density_test.values)\n", " models[model_name][\"fitted\"] = fitted_model\n", " models[model_name][\"train_preds\"] = y_train_pred\n", " models[model_name][\"preds\"] = y_test_pred\n", " models[model_name][\"RMSE_train\"] = math.sqrt(\n", " metrics.mean_squared_error(density_y_train, y_train_pred)\n", " )\n", " models[model_name][\"RMSE_test\"] = math.sqrt(\n", " metrics.mean_squared_error(density_y_test, y_test_pred)\n", " )\n", " models[model_name][\"RMAE_test\"] = math.sqrt(\n", " metrics.mean_absolute_error(density_y_test, y_test_pred)\n", " )\n", " models[model_name][\"R2_test\"] = metrics.r2_score(density_y_test, y_test_pred)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 RMSE_trainRMSE_testRMAE_testR2_test
linear_poly0.0003190.0003620.0166430.999965
linear_interact0.0011310.0014910.0331980.999413
linear0.0024640.0032610.0498910.997191
random_forest0.0027160.0055750.0672980.991788
decision_tree0.0003460.0064330.0761380.989067
ridge0.0139890.0153560.1163800.937703
knn0.0531080.0567760.2176110.148414
\n" ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reg_metrics = pd.DataFrame.from_dict(models, \"index\")[\n", " [\"RMSE_train\", \"RMSE_test\", \"RMAE_test\", \"R2_test\"]\n", "]\n", "reg_metrics.sort_values(by=\"RMSE_test\").style.background_gradient(\n", " cmap=\"viridis\", low=1, high=0.3, subset=[\"RMSE_train\", \"RMSE_test\"]\n", ").background_gradient(cmap=\"plasma\", low=0.3, high=1, subset=[\"RMAE_test\", \"R2_test\"])" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\user\\Projects\\python\\fuzzy\\.venv\\Lib\\site-packages\\numpy\\ma\\core.py:2881: RuntimeWarning: invalid value encountered in cast\n", " _data = np.array(data, dtype=dtype, copy=copy,\n" ] }, { "data": { "text/plain": [ "{'criterion': 'absolute_error', 'max_depth': 7}" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "from sklearn import model_selection\n", "\n", "parameters = {\n", " \"criterion\": [\"squared_error\", \"absolute_error\", \"friedman_mse\", \"poisson\"],\n", " \"max_depth\": np.arange(1, 21).tolist()[0::2],\n", " # \"min_samples_split\": np.arange(2, 11).tolist()[0::2],\n", "}\n", "\n", "grid = model_selection.GridSearchCV(\n", " tree.DecisionTreeRegressor(random_state=random_state), parameters, n_jobs=-1\n", ")\n", "\n", "grid.fit(density_train, density_y_train)\n", "grid.best_params_" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'RMSE_test': 0.006433043831746894,\n", " 'RMAE_test': 0.07613841884048704,\n", " 'R2_test': 0.989067217447684}" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "{'RMSE_test': 0.005040505635233745,\n", " 'RMAE_test': 0.06943469212568175,\n", " 'R2_test': 0.9932880934907101}" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "model = grid.best_estimator_\n", "y_pred = model.predict(density_test)\n", "old_metrics = {\n", " \"RMSE_test\": models[\"decision_tree\"][\"RMSE_test\"],\n", " \"RMAE_test\": models[\"decision_tree\"][\"RMAE_test\"],\n", " \"R2_test\": models[\"decision_tree\"][\"R2_test\"],\n", "}\n", "new_metrics = {}\n", "new_metrics[\"RMSE_test\"] = math.sqrt(metrics.mean_squared_error(density_y_test, y_pred))\n", "new_metrics[\"RMAE_test\"] = math.sqrt(metrics.mean_absolute_error(density_y_test, y_pred))\n", "new_metrics[\"R2_test\"] = metrics.r2_score(density_y_test, y_pred)\n", "\n", "display(old_metrics)\n", "display(new_metrics)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "|--- Al2O3 <= 0.18\n", "| |--- TiO2 <= 0.18\n", "| | |--- T <= 32.50\n", "| | | |--- TiO2 <= 0.03\n", "| | | | |--- Al2O3 <= 0.03\n", "| | | | | |--- T <= 22.50\n", "| | | | | | |--- value: [1.06]\n", "| | | | | |--- T > 22.50\n", "| | | | | | |--- value: [1.06]\n", "| | | | |--- Al2O3 > 0.03\n", "| | | | | |--- value: [1.09]\n", "| | | |--- TiO2 > 0.03\n", "| | | | |--- T <= 27.50\n", "| | | | | |--- T <= 22.50\n", "| | | | | | |--- value: [1.09]\n", "| | | | | |--- T > 22.50\n", "| | | | | | |--- value: [1.09]\n", "| | | | |--- T > 27.50\n", "| | | | | |--- value: [1.08]\n", "| | |--- T > 32.50\n", "| | | |--- TiO2 <= 0.03\n", "| | | | |--- Al2O3 <= 0.03\n", "| | | | | |--- T <= 55.00\n", "| | | | | | |--- T <= 47.50\n", "| | | | | | | |--- value: [1.05]\n", "| | | | | | |--- T > 47.50\n", "| | | | | | | |--- value: [1.04]\n", "| | | | | |--- T > 55.00\n", "| | | | | | |--- T <= 62.50\n", "| | | | | | | |--- value: [1.04]\n", "| | | | | | |--- T > 62.50\n", "| | | | | | | |--- value: [1.03]\n", "| | | | |--- Al2O3 > 0.03\n", "| | | | | |--- T <= 60.00\n", "| | | | | | |--- T <= 52.50\n", "| | | | | | | |--- value: [1.07]\n", "| | | | | | |--- T > 52.50\n", "| | | | | | | |--- value: [1.06]\n", "| | | | | |--- T > 60.00\n", "| | | | | | |--- T <= 67.50\n", "| | | | | | | |--- value: [1.06]\n", "| | | | | | |--- T > 67.50\n", "| | | | | | | |--- value: [1.05]\n", "| | | |--- TiO2 > 0.03\n", "| | | | |--- T <= 50.00\n", "| | | | | |--- T <= 37.50\n", "| | | | | | |--- value: [1.08]\n", "| | | | | |--- T > 37.50\n", "| | | | | | |--- value: [1.08]\n", "| | | | |--- T > 50.00\n", "| | | | | |--- T <= 67.50\n", "| | | | | | |--- T <= 62.50\n", "| | | | | | | |--- value: [1.06]\n", "| | | | | | |--- T > 62.50\n", "| | | | | | | |--- value: [1.06]\n", "| | | | | |--- T > 67.50\n", "| | | | | | |--- value: [1.06]\n", "| |--- TiO2 > 0.18\n", "| | |--- T <= 40.00\n", "| | | |--- T <= 30.00\n", "| | | | |--- value: [1.22]\n", "| | | |--- T > 30.00\n", "| | | | |--- value: [1.21]\n", "| | |--- T > 40.00\n", "| | | |--- T <= 60.00\n", "| | | | |--- T <= 52.50\n", "| | | | | |--- T <= 47.50\n", "| | | | | | |--- value: [1.20]\n", "| | | | | |--- T > 47.50\n", "| | | | | | |--- value: [1.19]\n", "| | | | |--- T > 52.50\n", "| | | | | |--- value: [1.19]\n", "| | | |--- T > 60.00\n", "| | | | |--- value: [1.18]\n", "|--- Al2O3 > 0.18\n", "| |--- T <= 35.00\n", "| | |--- T <= 22.50\n", "| | | |--- value: [1.19]\n", "| | |--- T > 22.50\n", "| | | |--- T <= 27.50\n", "| | | | |--- value: [1.18]\n", "| | | |--- T > 27.50\n", "| | | | |--- value: [1.18]\n", "| |--- T > 35.00\n", "| | |--- T <= 52.50\n", "| | | |--- T <= 42.50\n", "| | | | |--- value: [1.17]\n", "| | | |--- T > 42.50\n", "| | | | |--- T <= 47.50\n", "| | | | | |--- value: [1.17]\n", "| | | | |--- T > 47.50\n", "| | | | | |--- value: [1.16]\n", "| | |--- T > 52.50\n", "| | | |--- T <= 65.00\n", "| | | | |--- T <= 57.50\n", "| | | | | |--- value: [1.16]\n", "| | | | |--- T > 57.50\n", "| | | | | |--- value: [1.15]\n", "| | | |--- T > 65.00\n", "| | | | |--- value: [1.14]\n", "\n" ] } ], "source": [ "rules = tree.export_text(\n", " model,\n", " feature_names=density_train.columns.values.tolist()\n", ")\n", "print(rules)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "import pickle\n", "\n", "pickle.dump(model, open(\"data/dtree.model.sav\", \"wb\"))" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 2 }