{
"cells": [
{
"cell_type": "code",
"execution_count": 163,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" T | \n",
" Al2O3 | \n",
" TiO2 | \n",
" Density | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 20 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.274429 | \n",
"
\n",
" \n",
" 1 | \n",
" 25 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.261477 | \n",
"
\n",
" \n",
" 2 | \n",
" 35 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.234322 | \n",
"
\n",
" \n",
" 3 | \n",
" 40 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.220283 | \n",
"
\n",
" \n",
" 4 | \n",
" 45 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.205995 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" T Al2O3 TiO2 Density\n",
"0 20 0.0 0.0 1.274429\n",
"1 25 0.0 0.0 1.261477\n",
"2 35 0.0 0.0 1.234322\n",
"3 40 0.0 0.0 1.220283\n",
"4 45 0.0 0.0 1.205995"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" T | \n",
" Al2O3 | \n",
" TiO2 | \n",
" Density | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 30 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 1.248056 | \n",
"
\n",
" \n",
" 1 | \n",
" 55 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 1.176984 | \n",
"
\n",
" \n",
" 2 | \n",
" 25 | \n",
" 0.05 | \n",
" 0.0 | \n",
" 1.382694 | \n",
"
\n",
" \n",
" 3 | \n",
" 30 | \n",
" 0.05 | \n",
" 0.0 | \n",
" 1.366141 | \n",
"
\n",
" \n",
" 4 | \n",
" 35 | \n",
" 0.05 | \n",
" 0.0 | \n",
" 1.349487 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" T Al2O3 TiO2 Density\n",
"0 30 0.00 0.0 1.248056\n",
"1 55 0.00 0.0 1.176984\n",
"2 25 0.05 0.0 1.382694\n",
"3 30 0.05 0.0 1.366141\n",
"4 35 0.05 0.0 1.349487"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"\n",
"train = pd.read_csv(\"data/density_train.csv\", sep=\";\", decimal=\",\")\n",
"test = pd.read_csv(\"data/density_test.csv\", sep=\";\", decimal=\",\")\n",
"\n",
"train[\"Density\"] = pow(train[\"Density\"], 4)\n",
"test[\"Density\"] = pow(test[\"Density\"], 4)\n",
"\n",
"display(train.head())\n",
"display(test.head())"
]
},
{
"cell_type": "code",
"execution_count": 164,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Al2O3 | \n",
" TiO2 | \n",
" Density | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.274429 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.261477 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.234322 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.220283 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.205995 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Al2O3 TiO2 Density\n",
"0 0.0 0.0 1.274429\n",
"1 0.0 0.0 1.261477\n",
"2 0.0 0.0 1.234322\n",
"3 0.0 0.0 1.220283\n",
"4 0.0 0.0 1.205995"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"0 20\n",
"1 25\n",
"2 35\n",
"3 40\n",
"4 45\n",
"Name: T, dtype: int64"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Al2O3 | \n",
" TiO2 | \n",
" Density | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 1.248056 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 1.176984 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.05 | \n",
" 0.0 | \n",
" 1.382694 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.05 | \n",
" 0.0 | \n",
" 1.366141 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.05 | \n",
" 0.0 | \n",
" 1.349487 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Al2O3 TiO2 Density\n",
"0 0.00 0.0 1.248056\n",
"1 0.00 0.0 1.176984\n",
"2 0.05 0.0 1.382694\n",
"3 0.05 0.0 1.366141\n",
"4 0.05 0.0 1.349487"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"0 30\n",
"1 55\n",
"2 25\n",
"3 30\n",
"4 35\n",
"Name: T, dtype: int64"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"y_train = train[\"T\"]\n",
"X_train = train.drop([\"T\"], axis=1)\n",
"\n",
"display(X_train.head())\n",
"display(y_train.head())\n",
"\n",
"y_test = test[\"T\"]\n",
"X_test = test.drop([\"T\"], axis=1)\n",
"\n",
"display(X_test.head())\n",
"display(y_test.head())"
]
},
{
"cell_type": "code",
"execution_count": 165,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.preprocessing import PolynomialFeatures\n",
"from sklearn import linear_model, tree, neighbors, ensemble\n",
"\n",
"random_state = 9\n",
"\n",
"models = {\n",
" \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n",
" \"linear_poly\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(degree=2),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"linear_interact\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(interaction_only=True),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"ridge\": {\"model\": linear_model.RidgeCV()},\n",
" \"decision_tree\": {\n",
" \"model\": tree.DecisionTreeRegressor(random_state=random_state, max_depth=6, criterion=\"absolute_error\")\n",
" },\n",
" \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n",
" \"random_forest\": {\n",
" \"model\": ensemble.RandomForestRegressor(\n",
" max_depth=7, random_state=random_state, n_jobs=-1\n",
" )\n",
" },\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 166,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: linear\n",
"Model: linear_poly\n",
"Model: linear_interact\n",
"Model: ridge\n",
"Model: decision_tree\n",
"Model: knn\n",
"Model: random_forest\n"
]
}
],
"source": [
"import math\n",
"from sklearn import metrics\n",
"\n",
"for model_name in models.keys():\n",
" print(f\"Model: {model_name}\")\n",
" fitted_model = models[model_name][\"model\"].fit(\n",
" X_train.values, y_train.values.ravel()\n",
" )\n",
" y_train_pred = fitted_model.predict(X_train.values)\n",
" y_test_pred = fitted_model.predict(X_test.values)\n",
" models[model_name][\"fitted\"] = fitted_model\n",
" models[model_name][\"MSE_train\"] = metrics.mean_squared_error(y_train, y_train_pred)\n",
" models[model_name][\"MSE_test\"] = metrics.mean_squared_error(y_test, y_test_pred)\n",
" models[model_name][\"MAE_train\"] = metrics.mean_absolute_error(y_train, y_train_pred)\n",
" models[model_name][\"MAE_test\"] = metrics.mean_absolute_error(y_test, y_test_pred)\n",
" models[model_name][\"R2_train\"] = metrics.r2_score(y_train, y_train_pred)\n",
" models[model_name][\"R2_test\"] = metrics.r2_score(y_test, y_test_pred)"
]
},
{
"cell_type": "code",
"execution_count": 167,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" MSE_train | \n",
" MSE_test | \n",
" MAE_train | \n",
" MAE_test | \n",
" R2_train | \n",
" R2_test | \n",
"
\n",
" \n",
" \n",
" \n",
" linear_poly | \n",
" 0.465283 | \n",
" 0.209921 | \n",
" 0.513527 | \n",
" 0.374980 | \n",
" 0.998248 | \n",
" 0.999016 | \n",
"
\n",
" \n",
" linear_interact | \n",
" 16.021929 | \n",
" 16.881061 | \n",
" 3.268616 | \n",
" 3.266739 | \n",
" 0.939657 | \n",
" 0.920866 | \n",
"
\n",
" \n",
" linear | \n",
" 30.840398 | \n",
" 36.882107 | \n",
" 4.679503 | \n",
" 4.594400 | \n",
" 0.883846 | \n",
" 0.827106 | \n",
"
\n",
" \n",
" decision_tree | \n",
" 10.526316 | \n",
" 47.426471 | \n",
" 1.842105 | \n",
" 5.735294 | \n",
" 0.960355 | \n",
" 0.777676 | \n",
"
\n",
" \n",
" random_forest | \n",
" 20.214645 | \n",
" 54.501240 | \n",
" 3.570892 | \n",
" 6.598133 | \n",
" 0.923866 | \n",
" 0.744512 | \n",
"
\n",
" \n",
" knn | \n",
" 161.291622 | \n",
" 140.006002 | \n",
" 10.206767 | \n",
" 9.537815 | \n",
" 0.392527 | \n",
" 0.343686 | \n",
"
\n",
" \n",
" ridge | \n",
" 204.018844 | \n",
" 162.078696 | \n",
" 12.353188 | \n",
" 10.798642 | \n",
" 0.231604 | \n",
" 0.240215 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 167,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reg_metrics = pd.DataFrame.from_dict(models, \"index\")[\n",
" [\"MSE_train\", \"MSE_test\", \"MAE_train\", \"MAE_test\", \"R2_train\", \"R2_test\"]\n",
"]\n",
"reg_metrics.sort_values(by=\"MAE_test\").style.background_gradient(\n",
" cmap=\"viridis\", low=1, high=0.3, subset=[\"MSE_train\", \"MSE_test\"]\n",
").background_gradient(cmap=\"plasma\", low=0.3, high=1, subset=[\"MAE_test\", \"R2_test\"])"
]
},
{
"cell_type": "code",
"execution_count": 168,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"|--- Density <= 1.18\n",
"| |--- Density <= 1.14\n",
"| | |--- value: [70.00]\n",
"| |--- Density > 1.14\n",
"| | |--- Density <= 1.15\n",
"| | | |--- value: [65.00]\n",
"| | |--- Density > 1.15\n",
"| | | |--- value: [60.00]\n",
"|--- Density > 1.18\n",
"| |--- Density <= 1.31\n",
"| | |--- TiO2 <= 0.03\n",
"| | | |--- Al2O3 <= 0.03\n",
"| | | | |--- Density <= 1.23\n",
"| | | | | |--- Density <= 1.20\n",
"| | | | | | |--- value: [50.00]\n",
"| | | | | |--- Density > 1.20\n",
"| | | | | | |--- value: [42.50]\n",
"| | | | |--- Density > 1.23\n",
"| | | | | |--- Density <= 1.25\n",
"| | | | | | |--- value: [35.00]\n",
"| | | | | |--- Density > 1.25\n",
"| | | | | | |--- value: [22.50]\n",
"| | | |--- Al2O3 > 0.03\n",
"| | | | |--- Density <= 1.26\n",
"| | | | | |--- Density <= 1.24\n",
"| | | | | | |--- value: [70.00]\n",
"| | | | | |--- Density > 1.24\n",
"| | | | | | |--- value: [65.00]\n",
"| | | | |--- Density > 1.26\n",
"| | | | | |--- Density <= 1.29\n",
"| | | | | | |--- value: [55.00]\n",
"| | | | | |--- Density > 1.29\n",
"| | | | | | |--- value: [50.00]\n",
"| | |--- TiO2 > 0.03\n",
"| | | |--- Density <= 1.25\n",
"| | | | |--- value: [70.00]\n",
"| | | |--- Density > 1.25\n",
"| | | | |--- Density <= 1.27\n",
"| | | | | |--- value: [65.00]\n",
"| | | | |--- Density > 1.27\n",
"| | | | | |--- value: [60.00]\n",
"| |--- Density > 1.31\n",
"| | |--- Density <= 1.57\n",
"| | | |--- Density <= 1.37\n",
"| | | | |--- Density <= 1.33\n",
"| | | | | |--- value: [45.00]\n",
"| | | | |--- Density > 1.33\n",
"| | | | | |--- Density <= 1.36\n",
"| | | | | | |--- value: [40.00]\n",
"| | | | | |--- Density > 1.36\n",
"| | | | | | |--- value: [35.00]\n",
"| | | |--- Density > 1.37\n",
"| | | | |--- Density <= 1.39\n",
"| | | | | |--- value: [30.00]\n",
"| | | | |--- Density > 1.39\n",
"| | | | | |--- Al2O3 <= 0.03\n",
"| | | | | | |--- value: [22.50]\n",
"| | | | | |--- Al2O3 > 0.03\n",
"| | | | | | |--- value: [20.00]\n",
"| | |--- Density > 1.57\n",
"| | | |--- Density <= 1.93\n",
"| | | | |--- Density <= 1.74\n",
"| | | | | |--- value: [70.00]\n",
"| | | | |--- Density > 1.74\n",
"| | | | | |--- Al2O3 <= 0.15\n",
"| | | | | | |--- value: [65.00]\n",
"| | | | | |--- Al2O3 > 0.15\n",
"| | | | | | |--- value: [50.00]\n",
"| | | |--- Density > 1.93\n",
"| | | | |--- Al2O3 <= 0.15\n",
"| | | | | |--- Density <= 2.09\n",
"| | | | | | |--- value: [50.00]\n",
"| | | | | |--- Density > 2.09\n",
"| | | | | | |--- value: [30.00]\n",
"| | | | |--- Al2O3 > 0.15\n",
"| | | | | |--- Density <= 1.95\n",
"| | | | | | |--- value: [30.00]\n",
"| | | | | |--- Density > 1.95\n",
"| | | | | | |--- value: [22.50]\n",
"\n"
]
}
],
"source": [
"model = models[\"decision_tree\"][\"fitted\"]\n",
"rules = tree.export_text(\n",
" model, feature_names=X_train.columns.values.tolist()\n",
")\n",
"print(rules)"
]
},
{
"cell_type": "code",
"execution_count": 169,
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"\n",
"pickle.dump(model, open(\"data/temp_density_tree.model.sav\", \"wb\"))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}