{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TAl2O3TiO2Density
0200.00.01.06250
1250.00.01.05979
2350.00.01.05404
3400.00.01.05103
4450.00.01.04794
\n", "
" ], "text/plain": [ " T Al2O3 TiO2 Density\n", "0 20 0.0 0.0 1.06250\n", "1 25 0.0 0.0 1.05979\n", "2 35 0.0 0.0 1.05404\n", "3 40 0.0 0.0 1.05103\n", "4 45 0.0 0.0 1.04794" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TAl2O3TiO2Density
0300.000.01.05696
1550.000.01.04158
2250.050.01.08438
3300.050.01.08112
4350.050.01.07781
\n", "
" ], "text/plain": [ " T Al2O3 TiO2 Density\n", "0 30 0.00 0.0 1.05696\n", "1 55 0.00 0.0 1.04158\n", "2 25 0.05 0.0 1.08438\n", "3 30 0.05 0.0 1.08112\n", "4 35 0.05 0.0 1.07781" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "\n", "train = pd.read_csv(\"data/density_train.csv\", sep=\";\", decimal=\",\")\n", "test = pd.read_csv(\"data/density_test.csv\", sep=\";\", decimal=\",\")\n", "\n", "display(train.head())\n", "display(test.head())" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Al2O3TiO2Density
00.00.01.06250
10.00.01.05979
20.00.01.05404
30.00.01.05103
40.00.01.04794
\n", "
" ], "text/plain": [ " Al2O3 TiO2 Density\n", "0 0.0 0.0 1.06250\n", "1 0.0 0.0 1.05979\n", "2 0.0 0.0 1.05404\n", "3 0.0 0.0 1.05103\n", "4 0.0 0.0 1.04794" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "0 20\n", "1 25\n", "2 35\n", "3 40\n", "4 45\n", "Name: T, dtype: int64" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Al2O3TiO2Density
00.000.01.05696
10.000.01.04158
20.050.01.08438
30.050.01.08112
40.050.01.07781
\n", "
" ], "text/plain": [ " Al2O3 TiO2 Density\n", "0 0.00 0.0 1.05696\n", "1 0.00 0.0 1.04158\n", "2 0.05 0.0 1.08438\n", "3 0.05 0.0 1.08112\n", "4 0.05 0.0 1.07781" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "0 30\n", "1 55\n", "2 25\n", "3 30\n", "4 35\n", "Name: T, dtype: int64" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "y_train = train[\"T\"]\n", "X_train = train.drop([\"T\"], axis=1)\n", "\n", "display(X_train.head())\n", "display(y_train.head())\n", "\n", "y_test = test[\"T\"]\n", "X_test = test.drop([\"T\"], axis=1)\n", "\n", "display(X_test.head())\n", "display(y_test.head())" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "from sklearn.pipeline import make_pipeline\n", "from sklearn.preprocessing import PolynomialFeatures\n", "from sklearn import linear_model, tree, neighbors, ensemble\n", "\n", "random_state = 9\n", "\n", "models = {\n", " \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n", " \"linear_poly\": {\n", " \"model\": make_pipeline(\n", " PolynomialFeatures(degree=2),\n", " linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n", " )\n", " },\n", " \"linear_interact\": {\n", " \"model\": make_pipeline(\n", " PolynomialFeatures(interaction_only=True),\n", " linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n", " )\n", " },\n", " \"ridge\": {\"model\": linear_model.RidgeCV()},\n", " \"decision_tree\": {\n", " \"model\": tree.DecisionTreeRegressor(random_state=random_state, max_depth=6, criterion=\"absolute_error\")\n", " },\n", " \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n", " \"random_forest\": {\n", " \"model\": ensemble.RandomForestRegressor(\n", " max_depth=7, random_state=random_state, n_jobs=-1\n", " )\n", " },\n", "}" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: linear\n", "Model: linear_poly\n", "Model: linear_interact\n", "Model: ridge\n", "Model: decision_tree\n", "Model: knn\n", "Model: random_forest\n" ] } ], "source": [ "import math\n", "from sklearn import metrics\n", "\n", "for model_name in models.keys():\n", " print(f\"Model: {model_name}\")\n", " fitted_model = models[model_name][\"model\"].fit(\n", " X_train.values, y_train.values.ravel()\n", " )\n", " y_train_pred = fitted_model.predict(X_train.values)\n", " y_test_pred = fitted_model.predict(X_test.values)\n", " models[model_name][\"fitted\"] = fitted_model\n", " models[model_name][\"MSE_train\"] = metrics.mean_squared_error(y_train, y_train_pred)\n", " models[model_name][\"MSE_test\"] = metrics.mean_squared_error(y_test, y_test_pred)\n", " models[model_name][\"MAE_train\"] = metrics.mean_absolute_error(y_train, y_train_pred)\n", " models[model_name][\"MAE_test\"] = metrics.mean_absolute_error(y_test, y_test_pred)\n", " models[model_name][\"R2_train\"] = metrics.r2_score(y_train, y_train_pred)\n", " models[model_name][\"R2_test\"] = metrics.r2_score(y_test, y_test_pred)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 MSE_trainMSE_testMAE_trainMAE_testR2_trainR2_test
linear_poly0.3027680.2032930.4194670.3926870.9988600.999047
linear_interact9.69332310.8754422.5449442.7184240.9634920.949019
linear10.46850314.8203152.6574762.9302290.9605720.930526
decision_tree10.52631647.4264711.8421055.7352940.9603550.777676
random_forest20.24387654.5012403.5929536.5981330.9237550.744512
knn174.100430191.17647110.80827111.6806720.3442850.103812
ridge243.364664199.60147713.47272412.3967990.0834150.064317
\n" ], "text/plain": [ "" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reg_metrics = pd.DataFrame.from_dict(models, \"index\")[\n", " [\"MSE_train\", \"MSE_test\", \"MAE_train\", \"MAE_test\", \"R2_train\", \"R2_test\"]\n", "]\n", "reg_metrics.sort_values(by=\"MAE_test\").style.background_gradient(\n", " cmap=\"viridis\", low=1, high=0.3, subset=[\"MSE_train\", \"MSE_test\"]\n", ").background_gradient(cmap=\"plasma\", low=0.3, high=1, subset=[\"MAE_test\", \"R2_test\"])" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "|--- Density <= 1.04\n", "| |--- Density <= 1.03\n", "| | |--- value: [70.00]\n", "| |--- Density > 1.03\n", "| | |--- Density <= 1.04\n", "| | | |--- value: [65.00]\n", "| | |--- Density > 1.04\n", "| | | |--- value: [60.00]\n", "|--- Density > 1.04\n", "| |--- Density <= 1.07\n", "| | |--- TiO2 <= 0.03\n", "| | | |--- Al2O3 <= 0.03\n", "| | | | |--- Density <= 1.05\n", "| | | | | |--- Density <= 1.05\n", "| | | | | | |--- value: [50.00]\n", "| | | | | |--- Density > 1.05\n", "| | | | | | |--- value: [42.50]\n", "| | | | |--- Density > 1.05\n", "| | | | | |--- Density <= 1.06\n", "| | | | | | |--- value: [35.00]\n", "| | | | | |--- Density > 1.06\n", "| | | | | | |--- value: [22.50]\n", "| | | |--- Al2O3 > 0.03\n", "| | | | |--- Density <= 1.06\n", "| | | | | |--- Density <= 1.05\n", "| | | | | | |--- value: [70.00]\n", "| | | | | |--- Density > 1.05\n", "| | | | | | |--- value: [65.00]\n", "| | | | |--- Density > 1.06\n", "| | | | | |--- Density <= 1.07\n", "| | | | | | |--- value: [55.00]\n", "| | | | | |--- Density > 1.07\n", "| | | | | | |--- value: [50.00]\n", "| | |--- TiO2 > 0.03\n", "| | | |--- Density <= 1.06\n", "| | | | |--- value: [70.00]\n", "| | | |--- Density > 1.06\n", "| | | | |--- Density <= 1.06\n", "| | | | | |--- value: [65.00]\n", "| | | | |--- Density > 1.06\n", "| | | | | |--- value: [60.00]\n", "| |--- Density > 1.07\n", "| | |--- Density <= 1.12\n", "| | | |--- Density <= 1.08\n", "| | | | |--- Density <= 1.07\n", "| | | | | |--- value: [45.00]\n", "| | | | |--- Density > 1.07\n", "| | | | | |--- Density <= 1.08\n", "| | | | | | |--- value: [40.00]\n", "| | | | | |--- Density > 1.08\n", "| | | | | | |--- value: [35.00]\n", "| | | |--- Density > 1.08\n", "| | | | |--- Density <= 1.09\n", "| | | | | |--- value: [30.00]\n", "| | | | |--- Density > 1.09\n", "| | | | | |--- Al2O3 <= 0.03\n", "| | | | | | |--- value: [22.50]\n", "| | | | | |--- Al2O3 > 0.03\n", "| | | | | | |--- value: [20.00]\n", "| | |--- Density > 1.12\n", "| | | |--- Density <= 1.18\n", "| | | | |--- Density <= 1.15\n", "| | | | | |--- value: [70.00]\n", "| | | | |--- Density > 1.15\n", "| | | | | |--- Al2O3 <= 0.15\n", "| | | | | | |--- value: [65.00]\n", "| | | | | |--- Al2O3 > 0.15\n", "| | | | | | |--- value: [50.00]\n", "| | | |--- Density > 1.18\n", "| | | | |--- Al2O3 <= 0.15\n", "| | | | | |--- Density <= 1.20\n", "| | | | | | |--- value: [50.00]\n", "| | | | | |--- Density > 1.20\n", "| | | | | | |--- value: [30.00]\n", "| | | | |--- Al2O3 > 0.15\n", "| | | | | |--- Density <= 1.18\n", "| | | | | | |--- value: [30.00]\n", "| | | | | |--- Density > 1.18\n", "| | | | | | |--- value: [22.50]\n", "\n" ] } ], "source": [ "model = models[\"decision_tree\"][\"fitted\"]\n", "rules = tree.export_text(\n", " model, feature_names=X_train.columns.values.tolist()\n", ")\n", "print(rules)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "import pickle\n", "\n", "pickle.dump(model, open(\"data/temp_density_tree.model.sav\", \"wb\"))" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.9" } }, "nbformat": 4, "nbformat_minor": 2 }