{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TAl2O3TiO2Viscosity
0200.00.03.707
1250.00.03.180
2350.00.02.361
3450.00.01.832
4500.00.01.629
\n", "
" ], "text/plain": [ " T Al2O3 TiO2 Viscosity\n", "0 20 0.0 0.0 3.707\n", "1 25 0.0 0.0 3.180\n", "2 35 0.0 0.0 2.361\n", "3 45 0.0 0.0 1.832\n", "4 50 0.0 0.0 1.629" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TAl2O3TiO2Viscosity
0300.000.02.716
1400.000.02.073
2600.000.01.329
3650.000.01.211
4250.050.04.120
\n", "
" ], "text/plain": [ " T Al2O3 TiO2 Viscosity\n", "0 30 0.00 0.0 2.716\n", "1 40 0.00 0.0 2.073\n", "2 60 0.00 0.0 1.329\n", "3 65 0.00 0.0 1.211\n", "4 25 0.05 0.0 4.120" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "\n", "train = pd.read_csv(\"data/viscosity_train.csv\", sep=\";\", decimal=\",\")\n", "test = pd.read_csv(\"data/viscosity_test.csv\", sep=\";\", decimal=\",\")\n", "\n", "display(train.head())\n", "display(test.head())" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Al2O3TiO2Viscosity
00.00.03.707
10.00.03.180
20.00.02.361
30.00.01.832
40.00.01.629
\n", "
" ], "text/plain": [ " Al2O3 TiO2 Viscosity\n", "0 0.0 0.0 3.707\n", "1 0.0 0.0 3.180\n", "2 0.0 0.0 2.361\n", "3 0.0 0.0 1.832\n", "4 0.0 0.0 1.629" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "0 20\n", "1 25\n", "2 35\n", "3 45\n", "4 50\n", "Name: T, dtype: int64" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Al2O3TiO2Viscosity
00.000.02.716
10.000.02.073
20.000.01.329
30.000.01.211
40.050.04.120
\n", "
" ], "text/plain": [ " Al2O3 TiO2 Viscosity\n", "0 0.00 0.0 2.716\n", "1 0.00 0.0 2.073\n", "2 0.00 0.0 1.329\n", "3 0.00 0.0 1.211\n", "4 0.05 0.0 4.120" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "0 30\n", "1 40\n", "2 60\n", "3 65\n", "4 25\n", "Name: T, dtype: int64" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "y_train = train[\"T\"]\n", "X_train = train.drop([\"T\"], axis=1)\n", "\n", "display(X_train.head())\n", "display(y_train.head())\n", "\n", "y_test = test[\"T\"]\n", "X_test = test.drop([\"T\"], axis=1)\n", "\n", "display(X_test.head())\n", "display(y_test.head())" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "from sklearn.pipeline import make_pipeline\n", "from sklearn.preprocessing import PolynomialFeatures\n", "from sklearn import linear_model, tree, neighbors, ensemble\n", "\n", "random_state = 9\n", "\n", "models = {\n", " \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n", " \"linear_poly\": {\n", " \"model\": make_pipeline(\n", " PolynomialFeatures(degree=2),\n", " linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n", " )\n", " },\n", " \"linear_interact\": {\n", " \"model\": make_pipeline(\n", " PolynomialFeatures(interaction_only=True),\n", " linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n", " )\n", " },\n", " \"ridge\": {\"model\": linear_model.RidgeCV()},\n", " \"decision_tree\": {\n", " \"model\": tree.DecisionTreeRegressor(random_state=random_state, max_depth=6, criterion=\"absolute_error\")\n", " },\n", " \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n", " \"random_forest\": {\n", " \"model\": ensemble.RandomForestRegressor(\n", " max_depth=7, random_state=random_state, n_jobs=-1\n", " )\n", " },\n", "}" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: linear\n", "Model: linear_poly\n", "Model: linear_interact\n", "Model: ridge\n", "Model: decision_tree\n", "Model: knn\n", "Model: random_forest\n" ] } ], "source": [ "import math\n", "from sklearn import metrics\n", "\n", "for model_name in models.keys():\n", " print(f\"Model: {model_name}\")\n", " fitted_model = models[model_name][\"model\"].fit(\n", " X_train.values, y_train.values.ravel()\n", " )\n", " y_train_pred = fitted_model.predict(X_train.values)\n", " y_test_pred = fitted_model.predict(X_test.values)\n", " models[model_name][\"fitted\"] = fitted_model\n", " models[model_name][\"MSE_train\"] = metrics.mean_squared_error(y_train, y_train_pred)\n", " models[model_name][\"MSE_test\"] = metrics.mean_squared_error(y_test, y_test_pred)\n", " models[model_name][\"MAE_train\"] = metrics.mean_absolute_error(y_train, y_train_pred)\n", " models[model_name][\"MAE_test\"] = metrics.mean_absolute_error(y_test, y_test_pred)\n", " models[model_name][\"R2_train\"] = metrics.r2_score(y_train, y_train_pred)\n", " models[model_name][\"R2_test\"] = metrics.r2_score(y_test, y_test_pred)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 MSE_trainMSE_testMAE_trainMAE_testR2_trainR2_test
linear_poly4.8277684.8772961.5226431.7430580.9808640.974964
linear_interact21.78634823.4595723.8309964.3811150.9136440.879577
linear27.76651035.4303134.0880065.1067820.8899400.818129
ridge31.82747636.2306064.3830085.2264800.8738430.814021
random_forest8.52528545.4446512.5429355.7495100.9662080.766723
decision_tree3.28947445.5882350.9210536.1764710.9869610.765986
knn61.85553264.1656666.5225566.8067230.7548190.670624
\n" ], "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reg_metrics = pd.DataFrame.from_dict(models, \"index\")[\n", " [\"MSE_train\", \"MSE_test\", \"MAE_train\", \"MAE_test\", \"R2_train\", \"R2_test\"]\n", "]\n", "reg_metrics.sort_values(by=\"MAE_test\").style.background_gradient(\n", " cmap=\"viridis\", low=1, high=0.3, subset=[\"MSE_train\", \"MSE_test\"]\n", ").background_gradient(cmap=\"plasma\", low=0.3, high=1, subset=[\"MAE_test\", \"R2_test\"])" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "|--- Viscosity <= 2.86\n", "| |--- Viscosity <= 1.38\n", "| | |--- value: [70.00]\n", "| |--- Viscosity > 1.38\n", "| | |--- Viscosity <= 1.78\n", "| | | |--- Viscosity <= 1.72\n", "| | | | |--- TiO2 <= 0.03\n", "| | | | | |--- Al2O3 <= 0.03\n", "| | | | | | |--- value: [52.50]\n", "| | | | | |--- Al2O3 > 0.03\n", "| | | | | | |--- value: [57.50]\n", "| | | | |--- TiO2 > 0.03\n", "| | | | | |--- value: [60.00]\n", "| | | |--- Viscosity > 1.72\n", "| | | | |--- value: [70.00]\n", "| | |--- Viscosity > 1.78\n", "| | | |--- TiO2 <= 0.18\n", "| | | | |--- Al2O3 <= 0.18\n", "| | | | | |--- Viscosity <= 2.24\n", "| | | | | | |--- value: [50.00]\n", "| | | | | |--- Viscosity > 2.24\n", "| | | | | | |--- value: [40.00]\n", "| | | | |--- Al2O3 > 0.18\n", "| | | | | |--- Viscosity <= 2.29\n", "| | | | | | |--- value: [60.00]\n", "| | | | | |--- Viscosity > 2.29\n", "| | | | | | |--- value: [55.00]\n", "| | | |--- TiO2 > 0.18\n", "| | | | |--- Viscosity <= 2.22\n", "| | | | | |--- value: [70.00]\n", "| | | | |--- Viscosity > 2.22\n", "| | | | | |--- Viscosity <= 2.69\n", "| | | | | | |--- value: [60.00]\n", "| | | | | |--- Viscosity > 2.69\n", "| | | | | | |--- value: [55.00]\n", "|--- Viscosity > 2.86\n", "| |--- Viscosity <= 3.64\n", "| | |--- TiO2 <= 0.18\n", "| | | |--- Viscosity <= 3.15\n", "| | | | |--- value: [35.00]\n", "| | | |--- Viscosity > 3.15\n", "| | | | |--- Viscosity <= 3.47\n", "| | | | | |--- Al2O3 <= 0.03\n", "| | | | | | |--- value: [25.00]\n", "| | | | | |--- Al2O3 > 0.03\n", "| | | | | | |--- value: [30.00]\n", "| | | | |--- Viscosity > 3.47\n", "| | | | | |--- value: [40.00]\n", "| | |--- TiO2 > 0.18\n", "| | | |--- value: [45.00]\n", "| |--- Viscosity > 3.64\n", "| | |--- Viscosity <= 6.27\n", "| | | |--- TiO2 <= 0.18\n", "| | | | |--- Al2O3 <= 0.18\n", "| | | | | |--- TiO2 <= 0.03\n", "| | | | | | |--- value: [20.00]\n", "| | | | | |--- TiO2 > 0.03\n", "| | | | | | |--- value: [22.50]\n", "| | | | |--- Al2O3 > 0.18\n", "| | | | | |--- Viscosity <= 4.42\n", "| | | | | | |--- value: [35.00]\n", "| | | | | |--- Viscosity > 4.42\n", "| | | | | | |--- value: [27.50]\n", "| | | |--- TiO2 > 0.18\n", "| | | | |--- Viscosity <= 4.65\n", "| | | | | |--- value: [35.00]\n", "| | | | |--- Viscosity > 4.65\n", "| | | | | |--- Viscosity <= 5.40\n", "| | | | | | |--- value: [30.00]\n", "| | | | | |--- Viscosity > 5.40\n", "| | | | | | |--- value: [25.00]\n", "| | |--- Viscosity > 6.27\n", "| | | |--- value: [20.00]\n", "\n" ] } ], "source": [ "model = models[\"decision_tree\"][\"fitted\"]\n", "rules = tree.export_text(\n", " model, feature_names=X_train.columns.values.tolist()\n", ")\n", "print(rules)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "import pickle\n", "\n", "pickle.dump(model, open(\"data/temp_viscosity_tree.model.sav\", \"wb\"))" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.9" } }, "nbformat": 4, "nbformat_minor": 2 }