{ "cells": [ { "cell_type": "code", "execution_count": 163, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TAl2O3TiO2Density
0200.00.01.274429
1250.00.01.261477
2350.00.01.234322
3400.00.01.220283
4450.00.01.205995
\n", "
" ], "text/plain": [ " T Al2O3 TiO2 Density\n", "0 20 0.0 0.0 1.274429\n", "1 25 0.0 0.0 1.261477\n", "2 35 0.0 0.0 1.234322\n", "3 40 0.0 0.0 1.220283\n", "4 45 0.0 0.0 1.205995" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TAl2O3TiO2Density
0300.000.01.248056
1550.000.01.176984
2250.050.01.382694
3300.050.01.366141
4350.050.01.349487
\n", "
" ], "text/plain": [ " T Al2O3 TiO2 Density\n", "0 30 0.00 0.0 1.248056\n", "1 55 0.00 0.0 1.176984\n", "2 25 0.05 0.0 1.382694\n", "3 30 0.05 0.0 1.366141\n", "4 35 0.05 0.0 1.349487" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "\n", "train = pd.read_csv(\"data/density_train.csv\", sep=\";\", decimal=\",\")\n", "test = pd.read_csv(\"data/density_test.csv\", sep=\";\", decimal=\",\")\n", "\n", "train[\"Density\"] = pow(train[\"Density\"], 4)\n", "test[\"Density\"] = pow(test[\"Density\"], 4)\n", "\n", "display(train.head())\n", "display(test.head())" ] }, { "cell_type": "code", "execution_count": 164, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Al2O3TiO2Density
00.00.01.274429
10.00.01.261477
20.00.01.234322
30.00.01.220283
40.00.01.205995
\n", "
" ], "text/plain": [ " Al2O3 TiO2 Density\n", "0 0.0 0.0 1.274429\n", "1 0.0 0.0 1.261477\n", "2 0.0 0.0 1.234322\n", "3 0.0 0.0 1.220283\n", "4 0.0 0.0 1.205995" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "0 20\n", "1 25\n", "2 35\n", "3 40\n", "4 45\n", "Name: T, dtype: int64" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Al2O3TiO2Density
00.000.01.248056
10.000.01.176984
20.050.01.382694
30.050.01.366141
40.050.01.349487
\n", "
" ], "text/plain": [ " Al2O3 TiO2 Density\n", "0 0.00 0.0 1.248056\n", "1 0.00 0.0 1.176984\n", "2 0.05 0.0 1.382694\n", "3 0.05 0.0 1.366141\n", "4 0.05 0.0 1.349487" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "0 30\n", "1 55\n", "2 25\n", "3 30\n", "4 35\n", "Name: T, dtype: int64" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "y_train = train[\"T\"]\n", "X_train = train.drop([\"T\"], axis=1)\n", "\n", "display(X_train.head())\n", "display(y_train.head())\n", "\n", "y_test = test[\"T\"]\n", "X_test = test.drop([\"T\"], axis=1)\n", "\n", "display(X_test.head())\n", "display(y_test.head())" ] }, { "cell_type": "code", "execution_count": 165, "metadata": {}, "outputs": [], "source": [ "from sklearn.pipeline import make_pipeline\n", "from sklearn.preprocessing import PolynomialFeatures\n", "from sklearn import linear_model, tree, neighbors, ensemble\n", "\n", "random_state = 9\n", "\n", "models = {\n", " \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n", " \"linear_poly\": {\n", " \"model\": make_pipeline(\n", " PolynomialFeatures(degree=2),\n", " linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n", " )\n", " },\n", " \"linear_interact\": {\n", " \"model\": make_pipeline(\n", " PolynomialFeatures(interaction_only=True),\n", " linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n", " )\n", " },\n", " \"ridge\": {\"model\": linear_model.RidgeCV()},\n", " \"decision_tree\": {\n", " \"model\": tree.DecisionTreeRegressor(random_state=random_state, max_depth=6, criterion=\"absolute_error\")\n", " },\n", " \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n", " \"random_forest\": {\n", " \"model\": ensemble.RandomForestRegressor(\n", " max_depth=7, random_state=random_state, n_jobs=-1\n", " )\n", " },\n", "}" ] }, { "cell_type": "code", "execution_count": 166, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: linear\n", "Model: linear_poly\n", "Model: linear_interact\n", "Model: ridge\n", "Model: decision_tree\n", "Model: knn\n", "Model: random_forest\n" ] } ], "source": [ "import math\n", "from sklearn import metrics\n", "\n", "for model_name in models.keys():\n", " print(f\"Model: {model_name}\")\n", " fitted_model = models[model_name][\"model\"].fit(\n", " X_train.values, y_train.values.ravel()\n", " )\n", " y_train_pred = fitted_model.predict(X_train.values)\n", " y_test_pred = fitted_model.predict(X_test.values)\n", " models[model_name][\"fitted\"] = fitted_model\n", " models[model_name][\"MSE_train\"] = metrics.mean_squared_error(y_train, y_train_pred)\n", " models[model_name][\"MSE_test\"] = metrics.mean_squared_error(y_test, y_test_pred)\n", " models[model_name][\"MAE_train\"] = metrics.mean_absolute_error(y_train, y_train_pred)\n", " models[model_name][\"MAE_test\"] = metrics.mean_absolute_error(y_test, y_test_pred)\n", " models[model_name][\"R2_train\"] = metrics.r2_score(y_train, y_train_pred)\n", " models[model_name][\"R2_test\"] = metrics.r2_score(y_test, y_test_pred)" ] }, { "cell_type": "code", "execution_count": 167, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 MSE_trainMSE_testMAE_trainMAE_testR2_trainR2_test
linear_poly0.4652830.2099210.5135270.3749800.9982480.999016
linear_interact16.02192916.8810613.2686163.2667390.9396570.920866
linear30.84039836.8821074.6795034.5944000.8838460.827106
decision_tree10.52631647.4264711.8421055.7352940.9603550.777676
random_forest20.21464554.5012403.5708926.5981330.9238660.744512
knn161.291622140.00600210.2067679.5378150.3925270.343686
ridge204.018844162.07869612.35318810.7986420.2316040.240215
\n" ], "text/plain": [ "" ] }, "execution_count": 167, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reg_metrics = pd.DataFrame.from_dict(models, \"index\")[\n", " [\"MSE_train\", \"MSE_test\", \"MAE_train\", \"MAE_test\", \"R2_train\", \"R2_test\"]\n", "]\n", "reg_metrics.sort_values(by=\"MAE_test\").style.background_gradient(\n", " cmap=\"viridis\", low=1, high=0.3, subset=[\"MSE_train\", \"MSE_test\"]\n", ").background_gradient(cmap=\"plasma\", low=0.3, high=1, subset=[\"MAE_test\", \"R2_test\"])" ] }, { "cell_type": "code", "execution_count": 168, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "|--- Density <= 1.18\n", "| |--- Density <= 1.14\n", "| | |--- value: [70.00]\n", "| |--- Density > 1.14\n", "| | |--- Density <= 1.15\n", "| | | |--- value: [65.00]\n", "| | |--- Density > 1.15\n", "| | | |--- value: [60.00]\n", "|--- Density > 1.18\n", "| |--- Density <= 1.31\n", "| | |--- TiO2 <= 0.03\n", "| | | |--- Al2O3 <= 0.03\n", "| | | | |--- Density <= 1.23\n", "| | | | | |--- Density <= 1.20\n", "| | | | | | |--- value: [50.00]\n", "| | | | | |--- Density > 1.20\n", "| | | | | | |--- value: [42.50]\n", "| | | | |--- Density > 1.23\n", "| | | | | |--- Density <= 1.25\n", "| | | | | | |--- value: [35.00]\n", "| | | | | |--- Density > 1.25\n", "| | | | | | |--- value: [22.50]\n", "| | | |--- Al2O3 > 0.03\n", "| | | | |--- Density <= 1.26\n", "| | | | | |--- Density <= 1.24\n", "| | | | | | |--- value: [70.00]\n", "| | | | | |--- Density > 1.24\n", "| | | | | | |--- value: [65.00]\n", "| | | | |--- Density > 1.26\n", "| | | | | |--- Density <= 1.29\n", "| | | | | | |--- value: [55.00]\n", "| | | | | |--- Density > 1.29\n", "| | | | | | |--- value: [50.00]\n", "| | |--- TiO2 > 0.03\n", "| | | |--- Density <= 1.25\n", "| | | | |--- value: [70.00]\n", "| | | |--- Density > 1.25\n", "| | | | |--- Density <= 1.27\n", "| | | | | |--- value: [65.00]\n", "| | | | |--- Density > 1.27\n", "| | | | | |--- value: [60.00]\n", "| |--- Density > 1.31\n", "| | |--- Density <= 1.57\n", "| | | |--- Density <= 1.37\n", "| | | | |--- Density <= 1.33\n", "| | | | | |--- value: [45.00]\n", "| | | | |--- Density > 1.33\n", "| | | | | |--- Density <= 1.36\n", "| | | | | | |--- value: [40.00]\n", "| | | | | |--- Density > 1.36\n", "| | | | | | |--- value: [35.00]\n", "| | | |--- Density > 1.37\n", "| | | | |--- Density <= 1.39\n", "| | | | | |--- value: [30.00]\n", "| | | | |--- Density > 1.39\n", "| | | | | |--- Al2O3 <= 0.03\n", "| | | | | | |--- value: [22.50]\n", "| | | | | |--- Al2O3 > 0.03\n", "| | | | | | |--- value: [20.00]\n", "| | |--- Density > 1.57\n", "| | | |--- Density <= 1.93\n", "| | | | |--- Density <= 1.74\n", "| | | | | |--- value: [70.00]\n", "| | | | |--- Density > 1.74\n", "| | | | | |--- Al2O3 <= 0.15\n", "| | | | | | |--- value: [65.00]\n", "| | | | | |--- Al2O3 > 0.15\n", "| | | | | | |--- value: [50.00]\n", "| | | |--- Density > 1.93\n", "| | | | |--- Al2O3 <= 0.15\n", "| | | | | |--- Density <= 2.09\n", "| | | | | | |--- value: [50.00]\n", "| | | | | |--- Density > 2.09\n", "| | | | | | |--- value: [30.00]\n", "| | | | |--- Al2O3 > 0.15\n", "| | | | | |--- Density <= 1.95\n", "| | | | | | |--- value: [30.00]\n", "| | | | | |--- Density > 1.95\n", "| | | | | | |--- value: [22.50]\n", "\n" ] } ], "source": [ "model = models[\"decision_tree\"][\"fitted\"]\n", "rules = tree.export_text(\n", " model, feature_names=X_train.columns.values.tolist()\n", ")\n", "print(rules)" ] }, { "cell_type": "code", "execution_count": 169, "metadata": {}, "outputs": [], "source": [ "import pickle\n", "\n", "pickle.dump(model, open(\"data/temp_density_tree.model.sav\", \"wb\"))" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.9" } }, "nbformat": 4, "nbformat_minor": 2 }