fuzzy-rules-generator/distress.ipynb

3062 lines
192 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Financial Distress</th>\n",
" <th>x1</th>\n",
" <th>x2</th>\n",
" <th>x3</th>\n",
" <th>x4</th>\n",
" <th>x5</th>\n",
" <th>x6</th>\n",
" <th>x7</th>\n",
" <th>x8</th>\n",
" <th>x9</th>\n",
" <th>...</th>\n",
" <th>x74</th>\n",
" <th>x75</th>\n",
" <th>x76</th>\n",
" <th>x77</th>\n",
" <th>x78</th>\n",
" <th>x79</th>\n",
" <th>x80</th>\n",
" <th>x81</th>\n",
" <th>x82</th>\n",
" <th>x83</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.010636</td>\n",
" <td>1.2810</td>\n",
" <td>0.022934</td>\n",
" <td>0.87454</td>\n",
" <td>1.21640</td>\n",
" <td>0.060940</td>\n",
" <td>0.188270</td>\n",
" <td>0.52510</td>\n",
" <td>0.018854</td>\n",
" <td>0.182790</td>\n",
" <td>...</td>\n",
" <td>85.437</td>\n",
" <td>27.07</td>\n",
" <td>26.102</td>\n",
" <td>16.000</td>\n",
" <td>16.0</td>\n",
" <td>0.2</td>\n",
" <td>22</td>\n",
" <td>0.060390</td>\n",
" <td>30</td>\n",
" <td>49</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>-0.455970</td>\n",
" <td>1.2700</td>\n",
" <td>0.006454</td>\n",
" <td>0.82067</td>\n",
" <td>1.00490</td>\n",
" <td>-0.014080</td>\n",
" <td>0.181040</td>\n",
" <td>0.62288</td>\n",
" <td>0.006423</td>\n",
" <td>0.035991</td>\n",
" <td>...</td>\n",
" <td>107.090</td>\n",
" <td>31.31</td>\n",
" <td>30.194</td>\n",
" <td>17.000</td>\n",
" <td>16.0</td>\n",
" <td>0.4</td>\n",
" <td>22</td>\n",
" <td>0.010636</td>\n",
" <td>31</td>\n",
" <td>50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-0.325390</td>\n",
" <td>1.0529</td>\n",
" <td>-0.059379</td>\n",
" <td>0.92242</td>\n",
" <td>0.72926</td>\n",
" <td>0.020476</td>\n",
" <td>0.044865</td>\n",
" <td>0.43292</td>\n",
" <td>-0.081423</td>\n",
" <td>-0.765400</td>\n",
" <td>...</td>\n",
" <td>120.870</td>\n",
" <td>36.07</td>\n",
" <td>35.273</td>\n",
" <td>17.000</td>\n",
" <td>15.0</td>\n",
" <td>-0.2</td>\n",
" <td>22</td>\n",
" <td>-0.455970</td>\n",
" <td>32</td>\n",
" <td>51</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>-0.566570</td>\n",
" <td>1.1131</td>\n",
" <td>-0.015229</td>\n",
" <td>0.85888</td>\n",
" <td>0.80974</td>\n",
" <td>0.076037</td>\n",
" <td>0.091033</td>\n",
" <td>0.67546</td>\n",
" <td>-0.018807</td>\n",
" <td>-0.107910</td>\n",
" <td>...</td>\n",
" <td>54.806</td>\n",
" <td>39.80</td>\n",
" <td>38.377</td>\n",
" <td>17.167</td>\n",
" <td>16.0</td>\n",
" <td>5.6</td>\n",
" <td>22</td>\n",
" <td>-0.325390</td>\n",
" <td>33</td>\n",
" <td>52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.357300</td>\n",
" <td>1.0623</td>\n",
" <td>0.107020</td>\n",
" <td>0.81460</td>\n",
" <td>0.83593</td>\n",
" <td>0.199960</td>\n",
" <td>0.047800</td>\n",
" <td>0.74200</td>\n",
" <td>0.128030</td>\n",
" <td>0.577250</td>\n",
" <td>...</td>\n",
" <td>85.437</td>\n",
" <td>27.07</td>\n",
" <td>26.102</td>\n",
" <td>16.000</td>\n",
" <td>16.0</td>\n",
" <td>0.2</td>\n",
" <td>29</td>\n",
" <td>1.251000</td>\n",
" <td>7</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3667</th>\n",
" <td>0.438020</td>\n",
" <td>2.2605</td>\n",
" <td>0.202890</td>\n",
" <td>0.16037</td>\n",
" <td>0.18588</td>\n",
" <td>0.175970</td>\n",
" <td>0.198400</td>\n",
" <td>2.22360</td>\n",
" <td>1.091500</td>\n",
" <td>0.241640</td>\n",
" <td>...</td>\n",
" <td>100.000</td>\n",
" <td>100.00</td>\n",
" <td>100.000</td>\n",
" <td>17.125</td>\n",
" <td>14.5</td>\n",
" <td>-7.0</td>\n",
" <td>37</td>\n",
" <td>0.436380</td>\n",
" <td>4</td>\n",
" <td>41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3668</th>\n",
" <td>0.482410</td>\n",
" <td>1.9615</td>\n",
" <td>0.216440</td>\n",
" <td>0.20095</td>\n",
" <td>0.21642</td>\n",
" <td>0.203590</td>\n",
" <td>0.189870</td>\n",
" <td>1.93820</td>\n",
" <td>1.000100</td>\n",
" <td>0.270870</td>\n",
" <td>...</td>\n",
" <td>91.500</td>\n",
" <td>130.50</td>\n",
" <td>132.400</td>\n",
" <td>20.000</td>\n",
" <td>14.5</td>\n",
" <td>-16.0</td>\n",
" <td>37</td>\n",
" <td>0.438020</td>\n",
" <td>5</td>\n",
" <td>42</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3669</th>\n",
" <td>0.500770</td>\n",
" <td>1.7099</td>\n",
" <td>0.207970</td>\n",
" <td>0.26136</td>\n",
" <td>0.21399</td>\n",
" <td>0.193670</td>\n",
" <td>0.183890</td>\n",
" <td>1.68980</td>\n",
" <td>0.971860</td>\n",
" <td>0.281560</td>\n",
" <td>...</td>\n",
" <td>87.100</td>\n",
" <td>175.90</td>\n",
" <td>178.100</td>\n",
" <td>20.000</td>\n",
" <td>14.5</td>\n",
" <td>-20.2</td>\n",
" <td>37</td>\n",
" <td>0.482410</td>\n",
" <td>6</td>\n",
" <td>43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3670</th>\n",
" <td>0.611030</td>\n",
" <td>1.5590</td>\n",
" <td>0.185450</td>\n",
" <td>0.30728</td>\n",
" <td>0.19307</td>\n",
" <td>0.172140</td>\n",
" <td>0.170680</td>\n",
" <td>1.53890</td>\n",
" <td>0.960570</td>\n",
" <td>0.267720</td>\n",
" <td>...</td>\n",
" <td>92.900</td>\n",
" <td>203.20</td>\n",
" <td>204.500</td>\n",
" <td>22.000</td>\n",
" <td>22.0</td>\n",
" <td>6.4</td>\n",
" <td>37</td>\n",
" <td>0.500770</td>\n",
" <td>7</td>\n",
" <td>44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3671</th>\n",
" <td>0.518650</td>\n",
" <td>1.6148</td>\n",
" <td>0.176760</td>\n",
" <td>0.36369</td>\n",
" <td>0.18442</td>\n",
" <td>0.169550</td>\n",
" <td>0.197860</td>\n",
" <td>1.58420</td>\n",
" <td>0.958450</td>\n",
" <td>0.277780</td>\n",
" <td>...</td>\n",
" <td>91.700</td>\n",
" <td>227.50</td>\n",
" <td>214.500</td>\n",
" <td>21.000</td>\n",
" <td>20.5</td>\n",
" <td>8.6</td>\n",
" <td>37</td>\n",
" <td>0.611030</td>\n",
" <td>8</td>\n",
" <td>45</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3672 rows × 84 columns</p>\n",
"</div>"
],
"text/plain": [
" Financial Distress x1 x2 x3 x4 x5 \\\n",
"0 0.010636 1.2810 0.022934 0.87454 1.21640 0.060940 \n",
"1 -0.455970 1.2700 0.006454 0.82067 1.00490 -0.014080 \n",
"2 -0.325390 1.0529 -0.059379 0.92242 0.72926 0.020476 \n",
"3 -0.566570 1.1131 -0.015229 0.85888 0.80974 0.076037 \n",
"4 1.357300 1.0623 0.107020 0.81460 0.83593 0.199960 \n",
"... ... ... ... ... ... ... \n",
"3667 0.438020 2.2605 0.202890 0.16037 0.18588 0.175970 \n",
"3668 0.482410 1.9615 0.216440 0.20095 0.21642 0.203590 \n",
"3669 0.500770 1.7099 0.207970 0.26136 0.21399 0.193670 \n",
"3670 0.611030 1.5590 0.185450 0.30728 0.19307 0.172140 \n",
"3671 0.518650 1.6148 0.176760 0.36369 0.18442 0.169550 \n",
"\n",
" x6 x7 x8 x9 ... x74 x75 x76 \\\n",
"0 0.188270 0.52510 0.018854 0.182790 ... 85.437 27.07 26.102 \n",
"1 0.181040 0.62288 0.006423 0.035991 ... 107.090 31.31 30.194 \n",
"2 0.044865 0.43292 -0.081423 -0.765400 ... 120.870 36.07 35.273 \n",
"3 0.091033 0.67546 -0.018807 -0.107910 ... 54.806 39.80 38.377 \n",
"4 0.047800 0.74200 0.128030 0.577250 ... 85.437 27.07 26.102 \n",
"... ... ... ... ... ... ... ... ... \n",
"3667 0.198400 2.22360 1.091500 0.241640 ... 100.000 100.00 100.000 \n",
"3668 0.189870 1.93820 1.000100 0.270870 ... 91.500 130.50 132.400 \n",
"3669 0.183890 1.68980 0.971860 0.281560 ... 87.100 175.90 178.100 \n",
"3670 0.170680 1.53890 0.960570 0.267720 ... 92.900 203.20 204.500 \n",
"3671 0.197860 1.58420 0.958450 0.277780 ... 91.700 227.50 214.500 \n",
"\n",
" x77 x78 x79 x80 x81 x82 x83 \n",
"0 16.000 16.0 0.2 22 0.060390 30 49 \n",
"1 17.000 16.0 0.4 22 0.010636 31 50 \n",
"2 17.000 15.0 -0.2 22 -0.455970 32 51 \n",
"3 17.167 16.0 5.6 22 -0.325390 33 52 \n",
"4 16.000 16.0 0.2 29 1.251000 7 27 \n",
"... ... ... ... ... ... ... ... \n",
"3667 17.125 14.5 -7.0 37 0.436380 4 41 \n",
"3668 20.000 14.5 -16.0 37 0.438020 5 42 \n",
"3669 20.000 14.5 -20.2 37 0.482410 6 43 \n",
"3670 22.000 22.0 6.4 37 0.500770 7 44 \n",
"3671 21.000 20.5 8.6 37 0.611030 8 45 \n",
"\n",
"[3672 rows x 84 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"<Axes: >"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
"<Figure size 640x480 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import seaborn as sns\n",
"\n",
"random_state = 9\n",
"\n",
"df = pd.read_csv(\"data-distress/FinancialDistress.csv\").drop([\"Company\", \"Time\"], axis=1)\n",
"corr = df.corr()\n",
"display(df)\n",
"sns.heatmap(corr)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(68,)"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"columns = np.full((corr.shape[0],), True, dtype=bool)\n",
"for i in range(corr.shape[0]):\n",
" for j in range(i + 1, corr.shape[0]):\n",
" if corr.iloc[i, j] >= 0.9 or corr.iloc[i, j] <= -0.9: # type: ignore\n",
" if columns[j]:\n",
" columns[j] = False\n",
"\n",
"selected_columns = df.columns[columns]\n",
"selected_columns.shape"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Financial Distress</th>\n",
" <th>x1</th>\n",
" <th>x2</th>\n",
" <th>x3</th>\n",
" <th>x4</th>\n",
" <th>x5</th>\n",
" <th>x6</th>\n",
" <th>x8</th>\n",
" <th>x9</th>\n",
" <th>x10</th>\n",
" <th>...</th>\n",
" <th>x69</th>\n",
" <th>x70</th>\n",
" <th>x71</th>\n",
" <th>x72</th>\n",
" <th>x73</th>\n",
" <th>x74</th>\n",
" <th>x78</th>\n",
" <th>x80</th>\n",
" <th>x82</th>\n",
" <th>x83</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.010636</td>\n",
" <td>1.2810</td>\n",
" <td>0.022934</td>\n",
" <td>0.87454</td>\n",
" <td>1.21640</td>\n",
" <td>0.060940</td>\n",
" <td>0.188270</td>\n",
" <td>0.018854</td>\n",
" <td>0.182790</td>\n",
" <td>0.006449</td>\n",
" <td>...</td>\n",
" <td>364.9500</td>\n",
" <td>15.8</td>\n",
" <td>61.476</td>\n",
" <td>4.0</td>\n",
" <td>36.0</td>\n",
" <td>85.437</td>\n",
" <td>16.0</td>\n",
" <td>22</td>\n",
" <td>30</td>\n",
" <td>49</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>-0.455970</td>\n",
" <td>1.2700</td>\n",
" <td>0.006454</td>\n",
" <td>0.82067</td>\n",
" <td>1.00490</td>\n",
" <td>-0.014080</td>\n",
" <td>0.181040</td>\n",
" <td>0.006423</td>\n",
" <td>0.035991</td>\n",
" <td>0.001795</td>\n",
" <td>...</td>\n",
" <td>0.1896</td>\n",
" <td>15.6</td>\n",
" <td>24.579</td>\n",
" <td>0.0</td>\n",
" <td>36.0</td>\n",
" <td>107.090</td>\n",
" <td>16.0</td>\n",
" <td>22</td>\n",
" <td>31</td>\n",
" <td>50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>-0.325390</td>\n",
" <td>1.0529</td>\n",
" <td>-0.059379</td>\n",
" <td>0.92242</td>\n",
" <td>0.72926</td>\n",
" <td>0.020476</td>\n",
" <td>0.044865</td>\n",
" <td>-0.081423</td>\n",
" <td>-0.765400</td>\n",
" <td>-0.054324</td>\n",
" <td>...</td>\n",
" <td>11.9460</td>\n",
" <td>15.2</td>\n",
" <td>20.700</td>\n",
" <td>0.0</td>\n",
" <td>35.0</td>\n",
" <td>120.870</td>\n",
" <td>15.0</td>\n",
" <td>22</td>\n",
" <td>32</td>\n",
" <td>51</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>-0.566570</td>\n",
" <td>1.1131</td>\n",
" <td>-0.015229</td>\n",
" <td>0.85888</td>\n",
" <td>0.80974</td>\n",
" <td>0.076037</td>\n",
" <td>0.091033</td>\n",
" <td>-0.018807</td>\n",
" <td>-0.107910</td>\n",
" <td>-0.065316</td>\n",
" <td>...</td>\n",
" <td>-18.7480</td>\n",
" <td>10.4</td>\n",
" <td>47.429</td>\n",
" <td>4.0</td>\n",
" <td>33.0</td>\n",
" <td>54.806</td>\n",
" <td>16.0</td>\n",
" <td>22</td>\n",
" <td>33</td>\n",
" <td>52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.357300</td>\n",
" <td>1.0623</td>\n",
" <td>0.107020</td>\n",
" <td>0.81460</td>\n",
" <td>0.83593</td>\n",
" <td>0.199960</td>\n",
" <td>0.047800</td>\n",
" <td>0.128030</td>\n",
" <td>0.577250</td>\n",
" <td>0.094075</td>\n",
" <td>...</td>\n",
" <td>364.9500</td>\n",
" <td>15.8</td>\n",
" <td>61.476</td>\n",
" <td>4.0</td>\n",
" <td>36.0</td>\n",
" <td>85.437</td>\n",
" <td>16.0</td>\n",
" <td>29</td>\n",
" <td>7</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3667</th>\n",
" <td>0.438020</td>\n",
" <td>2.2605</td>\n",
" <td>0.202890</td>\n",
" <td>0.16037</td>\n",
" <td>0.18588</td>\n",
" <td>0.175970</td>\n",
" <td>0.198400</td>\n",
" <td>1.091500</td>\n",
" <td>0.241640</td>\n",
" <td>0.226860</td>\n",
" <td>...</td>\n",
" <td>14.5290</td>\n",
" <td>21.5</td>\n",
" <td>33.768</td>\n",
" <td>2.0</td>\n",
" <td>22.0</td>\n",
" <td>100.000</td>\n",
" <td>14.5</td>\n",
" <td>37</td>\n",
" <td>4</td>\n",
" <td>41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3668</th>\n",
" <td>0.482410</td>\n",
" <td>1.9615</td>\n",
" <td>0.216440</td>\n",
" <td>0.20095</td>\n",
" <td>0.21642</td>\n",
" <td>0.203590</td>\n",
" <td>0.189870</td>\n",
" <td>1.000100</td>\n",
" <td>0.270870</td>\n",
" <td>0.213610</td>\n",
" <td>...</td>\n",
" <td>3.8523</td>\n",
" <td>30.5</td>\n",
" <td>-10.665</td>\n",
" <td>0.0</td>\n",
" <td>28.0</td>\n",
" <td>91.500</td>\n",
" <td>14.5</td>\n",
" <td>37</td>\n",
" <td>5</td>\n",
" <td>42</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3669</th>\n",
" <td>0.500770</td>\n",
" <td>1.7099</td>\n",
" <td>0.207970</td>\n",
" <td>0.26136</td>\n",
" <td>0.21399</td>\n",
" <td>0.193670</td>\n",
" <td>0.183890</td>\n",
" <td>0.971860</td>\n",
" <td>0.281560</td>\n",
" <td>0.210970</td>\n",
" <td>...</td>\n",
" <td>-25.8410</td>\n",
" <td>34.7</td>\n",
" <td>36.030</td>\n",
" <td>2.0</td>\n",
" <td>32.0</td>\n",
" <td>87.100</td>\n",
" <td>14.5</td>\n",
" <td>37</td>\n",
" <td>6</td>\n",
" <td>43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3670</th>\n",
" <td>0.611030</td>\n",
" <td>1.5590</td>\n",
" <td>0.185450</td>\n",
" <td>0.30728</td>\n",
" <td>0.19307</td>\n",
" <td>0.172140</td>\n",
" <td>0.170680</td>\n",
" <td>0.960570</td>\n",
" <td>0.267720</td>\n",
" <td>0.203190</td>\n",
" <td>...</td>\n",
" <td>-58.1220</td>\n",
" <td>15.6</td>\n",
" <td>22.571</td>\n",
" <td>2.0</td>\n",
" <td>30.0</td>\n",
" <td>92.900</td>\n",
" <td>22.0</td>\n",
" <td>37</td>\n",
" <td>7</td>\n",
" <td>44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3671</th>\n",
" <td>0.518650</td>\n",
" <td>1.6148</td>\n",
" <td>0.176760</td>\n",
" <td>0.36369</td>\n",
" <td>0.18442</td>\n",
" <td>0.169550</td>\n",
" <td>0.197860</td>\n",
" <td>0.958450</td>\n",
" <td>0.277780</td>\n",
" <td>0.213850</td>\n",
" <td>...</td>\n",
" <td>-32.2090</td>\n",
" <td>11.9</td>\n",
" <td>13.871</td>\n",
" <td>1.0</td>\n",
" <td>29.0</td>\n",
" <td>91.700</td>\n",
" <td>20.5</td>\n",
" <td>37</td>\n",
" <td>8</td>\n",
" <td>45</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3672 rows × 68 columns</p>\n",
"</div>"
],
"text/plain": [
" Financial Distress x1 x2 x3 x4 x5 \\\n",
"0 0.010636 1.2810 0.022934 0.87454 1.21640 0.060940 \n",
"1 -0.455970 1.2700 0.006454 0.82067 1.00490 -0.014080 \n",
"2 -0.325390 1.0529 -0.059379 0.92242 0.72926 0.020476 \n",
"3 -0.566570 1.1131 -0.015229 0.85888 0.80974 0.076037 \n",
"4 1.357300 1.0623 0.107020 0.81460 0.83593 0.199960 \n",
"... ... ... ... ... ... ... \n",
"3667 0.438020 2.2605 0.202890 0.16037 0.18588 0.175970 \n",
"3668 0.482410 1.9615 0.216440 0.20095 0.21642 0.203590 \n",
"3669 0.500770 1.7099 0.207970 0.26136 0.21399 0.193670 \n",
"3670 0.611030 1.5590 0.185450 0.30728 0.19307 0.172140 \n",
"3671 0.518650 1.6148 0.176760 0.36369 0.18442 0.169550 \n",
"\n",
" x6 x8 x9 x10 ... x69 x70 x71 \\\n",
"0 0.188270 0.018854 0.182790 0.006449 ... 364.9500 15.8 61.476 \n",
"1 0.181040 0.006423 0.035991 0.001795 ... 0.1896 15.6 24.579 \n",
"2 0.044865 -0.081423 -0.765400 -0.054324 ... 11.9460 15.2 20.700 \n",
"3 0.091033 -0.018807 -0.107910 -0.065316 ... -18.7480 10.4 47.429 \n",
"4 0.047800 0.128030 0.577250 0.094075 ... 364.9500 15.8 61.476 \n",
"... ... ... ... ... ... ... ... ... \n",
"3667 0.198400 1.091500 0.241640 0.226860 ... 14.5290 21.5 33.768 \n",
"3668 0.189870 1.000100 0.270870 0.213610 ... 3.8523 30.5 -10.665 \n",
"3669 0.183890 0.971860 0.281560 0.210970 ... -25.8410 34.7 36.030 \n",
"3670 0.170680 0.960570 0.267720 0.203190 ... -58.1220 15.6 22.571 \n",
"3671 0.197860 0.958450 0.277780 0.213850 ... -32.2090 11.9 13.871 \n",
"\n",
" x72 x73 x74 x78 x80 x82 x83 \n",
"0 4.0 36.0 85.437 16.0 22 30 49 \n",
"1 0.0 36.0 107.090 16.0 22 31 50 \n",
"2 0.0 35.0 120.870 15.0 22 32 51 \n",
"3 4.0 33.0 54.806 16.0 22 33 52 \n",
"4 4.0 36.0 85.437 16.0 29 7 27 \n",
"... ... ... ... ... ... ... ... \n",
"3667 2.0 22.0 100.000 14.5 37 4 41 \n",
"3668 0.0 28.0 91.500 14.5 37 5 42 \n",
"3669 2.0 32.0 87.100 14.5 37 6 43 \n",
"3670 2.0 30.0 92.900 22.0 37 7 44 \n",
"3671 1.0 29.0 91.700 20.5 37 8 45 \n",
"\n",
"[3672 rows x 68 columns]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = df[selected_columns]\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x8', 'x9', 'x10', 'x11', 'x12',\n",
" 'x14', 'x15', 'x16', 'x17', 'x18', 'x19', 'x20', 'x21', 'x22', 'x23',\n",
" 'x24', 'x25', 'x26', 'x27', 'x28', 'x29', 'x30', 'x31', 'x32', 'x35',\n",
" 'x36', 'x37', 'x39', 'x40', 'x41', 'x42', 'x43', 'x44', 'x45', 'x46',\n",
" 'x47', 'x51', 'x54', 'x55', 'x56', 'x57', 'x58', 'x59', 'x60', 'x61',\n",
" 'x63', 'x64', 'x65', 'x66', 'x67', 'x68', 'x69', 'x70', 'x71', 'x72',\n",
" 'x73', 'x74', 'x78', 'x80', 'x82', 'x83'],\n",
" dtype='object')"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import statsmodels.api as sm\n",
"\n",
"def backwardElimination(x, Y, sl, columns):\n",
" numVars = len(x[0])\n",
" for i in range(0, numVars):\n",
" regressor_OLS = sm.OLS(Y, x).fit()\n",
" maxVar = max(regressor_OLS.pvalues).astype(float)\n",
" if maxVar > sl:\n",
" for j in range(0, numVars - i):\n",
" if regressor_OLS.pvalues[j].astype(float) == maxVar:\n",
" x = np.delete(x, j, 1)\n",
" columns = np.delete(columns, j)\n",
"\n",
" regressor_OLS.summary()\n",
" return x, columns\n",
"\n",
"selected_columns = selected_columns.drop([\"Financial Distress\"], errors='ignore')\n",
"selected_columns"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x3</th>\n",
" <th>x4</th>\n",
" <th>x5</th>\n",
" <th>x10</th>\n",
" <th>x14</th>\n",
" <th>x18</th>\n",
" <th>x23</th>\n",
" <th>x24</th>\n",
" <th>x25</th>\n",
" <th>x29</th>\n",
" <th>x37</th>\n",
" <th>x41</th>\n",
" <th>x46</th>\n",
" <th>x54</th>\n",
" <th>x63</th>\n",
" <th>x70</th>\n",
" <th>x73</th>\n",
" <th>x80</th>\n",
" <th>Distress</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.87454</td>\n",
" <td>1.21640</td>\n",
" <td>0.060940</td>\n",
" <td>0.006449</td>\n",
" <td>6.97060</td>\n",
" <td>0.018265</td>\n",
" <td>0.148720</td>\n",
" <td>0.66995</td>\n",
" <td>214.760</td>\n",
" <td>0.204590</td>\n",
" <td>1.630700</td>\n",
" <td>9.69510</td>\n",
" <td>0.026224</td>\n",
" <td>209.87</td>\n",
" <td>3.27020</td>\n",
" <td>15.8</td>\n",
" <td>36.0</td>\n",
" <td>22.0</td>\n",
" <td>0.010636</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.82067</td>\n",
" <td>1.00490</td>\n",
" <td>-0.014080</td>\n",
" <td>0.001795</td>\n",
" <td>4.57640</td>\n",
" <td>0.027558</td>\n",
" <td>0.056026</td>\n",
" <td>0.67048</td>\n",
" <td>38.242</td>\n",
" <td>0.150190</td>\n",
" <td>0.837540</td>\n",
" <td>5.60350</td>\n",
" <td>0.007864</td>\n",
" <td>250.14</td>\n",
" <td>14.32100</td>\n",
" <td>15.6</td>\n",
" <td>36.0</td>\n",
" <td>22.0</td>\n",
" <td>-0.455970</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.92242</td>\n",
" <td>0.72926</td>\n",
" <td>0.020476</td>\n",
" <td>-0.054324</td>\n",
" <td>11.89000</td>\n",
" <td>0.012595</td>\n",
" <td>0.065220</td>\n",
" <td>0.84827</td>\n",
" <td>-498.390</td>\n",
" <td>0.074149</td>\n",
" <td>0.955790</td>\n",
" <td>9.40030</td>\n",
" <td>-0.064373</td>\n",
" <td>280.55</td>\n",
" <td>1.15380</td>\n",
" <td>15.2</td>\n",
" <td>35.0</td>\n",
" <td>22.0</td>\n",
" <td>-0.325390</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.85888</td>\n",
" <td>0.80974</td>\n",
" <td>0.076037</td>\n",
" <td>-0.065316</td>\n",
" <td>6.08620</td>\n",
" <td>0.011601</td>\n",
" <td>0.125160</td>\n",
" <td>0.80478</td>\n",
" <td>-75.867</td>\n",
" <td>0.054098</td>\n",
" <td>0.383350</td>\n",
" <td>5.73790</td>\n",
" <td>-0.017731</td>\n",
" <td>413.74</td>\n",
" <td>2.04080</td>\n",
" <td>10.4</td>\n",
" <td>33.0</td>\n",
" <td>22.0</td>\n",
" <td>-0.566570</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.81460</td>\n",
" <td>0.83593</td>\n",
" <td>0.199960</td>\n",
" <td>0.094075</td>\n",
" <td>4.39380</td>\n",
" <td>0.006814</td>\n",
" <td>0.266020</td>\n",
" <td>0.76770</td>\n",
" <td>1423.100</td>\n",
" <td>0.046907</td>\n",
" <td>0.253010</td>\n",
" <td>4.50880</td>\n",
" <td>0.131380</td>\n",
" <td>315.34</td>\n",
" <td>3.27020</td>\n",
" <td>15.8</td>\n",
" <td>36.0</td>\n",
" <td>29.0</td>\n",
" <td>1.357300</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3667</th>\n",
" <td>0.16037</td>\n",
" <td>0.18588</td>\n",
" <td>0.175970</td>\n",
" <td>0.226860</td>\n",
" <td>0.19101</td>\n",
" <td>0.014077</td>\n",
" <td>0.994340</td>\n",
" <td>0.15740</td>\n",
" <td>390.260</td>\n",
" <td>0.002976</td>\n",
" <td>0.003544</td>\n",
" <td>0.22138</td>\n",
" <td>1.265100</td>\n",
" <td>16961.00</td>\n",
" <td>-0.53449</td>\n",
" <td>21.5</td>\n",
" <td>22.0</td>\n",
" <td>37.0</td>\n",
" <td>0.438020</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3668</th>\n",
" <td>0.20095</td>\n",
" <td>0.21642</td>\n",
" <td>0.203590</td>\n",
" <td>0.213610</td>\n",
" <td>0.25149</td>\n",
" <td>0.018249</td>\n",
" <td>0.992440</td>\n",
" <td>0.19747</td>\n",
" <td>443.840</td>\n",
" <td>0.003484</td>\n",
" <td>0.004359</td>\n",
" <td>0.27085</td>\n",
" <td>1.077100</td>\n",
" <td>20689.00</td>\n",
" <td>-25.73600</td>\n",
" <td>30.5</td>\n",
" <td>28.0</td>\n",
" <td>37.0</td>\n",
" <td>0.482410</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3669</th>\n",
" <td>0.26136</td>\n",
" <td>0.21399</td>\n",
" <td>0.193670</td>\n",
" <td>0.210970</td>\n",
" <td>0.35384</td>\n",
" <td>0.007451</td>\n",
" <td>0.982420</td>\n",
" <td>0.25902</td>\n",
" <td>475.560</td>\n",
" <td>0.002343</td>\n",
" <td>0.003172</td>\n",
" <td>0.28971</td>\n",
" <td>0.795720</td>\n",
" <td>34012.00</td>\n",
" <td>-3.06590</td>\n",
" <td>34.7</td>\n",
" <td>32.0</td>\n",
" <td>37.0</td>\n",
" <td>0.500770</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3670</th>\n",
" <td>0.30728</td>\n",
" <td>0.19307</td>\n",
" <td>0.172140</td>\n",
" <td>0.203190</td>\n",
" <td>0.44358</td>\n",
" <td>0.021239</td>\n",
" <td>0.985230</td>\n",
" <td>0.30533</td>\n",
" <td>457.060</td>\n",
" <td>0.001942</td>\n",
" <td>0.002803</td>\n",
" <td>0.27871</td>\n",
" <td>0.603540</td>\n",
" <td>35901.00</td>\n",
" <td>7.15620</td>\n",
" <td>15.6</td>\n",
" <td>30.0</td>\n",
" <td>37.0</td>\n",
" <td>0.611030</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3671</th>\n",
" <td>0.36369</td>\n",
" <td>0.18442</td>\n",
" <td>0.169550</td>\n",
" <td>0.213850</td>\n",
" <td>0.57156</td>\n",
" <td>0.013783</td>\n",
" <td>0.994000</td>\n",
" <td>0.32184</td>\n",
" <td>505.040</td>\n",
" <td>0.041852</td>\n",
" <td>0.065773</td>\n",
" <td>0.28982</td>\n",
" <td>0.486010</td>\n",
" <td>28173.00</td>\n",
" <td>12.14500</td>\n",
" <td>11.9</td>\n",
" <td>29.0</td>\n",
" <td>37.0</td>\n",
" <td>0.518650</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3672 rows × 19 columns</p>\n",
"</div>"
],
"text/plain": [
" x3 x4 x5 x10 x14 x18 x23 \\\n",
"0 0.87454 1.21640 0.060940 0.006449 6.97060 0.018265 0.148720 \n",
"1 0.82067 1.00490 -0.014080 0.001795 4.57640 0.027558 0.056026 \n",
"2 0.92242 0.72926 0.020476 -0.054324 11.89000 0.012595 0.065220 \n",
"3 0.85888 0.80974 0.076037 -0.065316 6.08620 0.011601 0.125160 \n",
"4 0.81460 0.83593 0.199960 0.094075 4.39380 0.006814 0.266020 \n",
"... ... ... ... ... ... ... ... \n",
"3667 0.16037 0.18588 0.175970 0.226860 0.19101 0.014077 0.994340 \n",
"3668 0.20095 0.21642 0.203590 0.213610 0.25149 0.018249 0.992440 \n",
"3669 0.26136 0.21399 0.193670 0.210970 0.35384 0.007451 0.982420 \n",
"3670 0.30728 0.19307 0.172140 0.203190 0.44358 0.021239 0.985230 \n",
"3671 0.36369 0.18442 0.169550 0.213850 0.57156 0.013783 0.994000 \n",
"\n",
" x24 x25 x29 x37 x41 x46 x54 \\\n",
"0 0.66995 214.760 0.204590 1.630700 9.69510 0.026224 209.87 \n",
"1 0.67048 38.242 0.150190 0.837540 5.60350 0.007864 250.14 \n",
"2 0.84827 -498.390 0.074149 0.955790 9.40030 -0.064373 280.55 \n",
"3 0.80478 -75.867 0.054098 0.383350 5.73790 -0.017731 413.74 \n",
"4 0.76770 1423.100 0.046907 0.253010 4.50880 0.131380 315.34 \n",
"... ... ... ... ... ... ... ... \n",
"3667 0.15740 390.260 0.002976 0.003544 0.22138 1.265100 16961.00 \n",
"3668 0.19747 443.840 0.003484 0.004359 0.27085 1.077100 20689.00 \n",
"3669 0.25902 475.560 0.002343 0.003172 0.28971 0.795720 34012.00 \n",
"3670 0.30533 457.060 0.001942 0.002803 0.27871 0.603540 35901.00 \n",
"3671 0.32184 505.040 0.041852 0.065773 0.28982 0.486010 28173.00 \n",
"\n",
" x63 x70 x73 x80 Distress \n",
"0 3.27020 15.8 36.0 22.0 0.010636 \n",
"1 14.32100 15.6 36.0 22.0 -0.455970 \n",
"2 1.15380 15.2 35.0 22.0 -0.325390 \n",
"3 2.04080 10.4 33.0 22.0 -0.566570 \n",
"4 3.27020 15.8 36.0 29.0 1.357300 \n",
"... ... ... ... ... ... \n",
"3667 -0.53449 21.5 22.0 37.0 0.438020 \n",
"3668 -25.73600 30.5 28.0 37.0 0.482410 \n",
"3669 -3.06590 34.7 32.0 37.0 0.500770 \n",
"3670 7.15620 15.6 30.0 37.0 0.611030 \n",
"3671 12.14500 11.9 29.0 37.0 0.518650 \n",
"\n",
"[3672 rows x 19 columns]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"SL = 0.05\n",
"new_data, new_columns = backwardElimination(\n",
" df.iloc[:, 1:].values, df.iloc[:, 0].values, SL, selected_columns\n",
")\n",
"data = pd.DataFrame(data=new_data, columns=new_columns)\n",
"data[\"Distress\"] = df[\"Financial Distress\"]\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x3</th>\n",
" <th>x4</th>\n",
" <th>x5</th>\n",
" <th>x10</th>\n",
" <th>x14</th>\n",
" <th>x18</th>\n",
" <th>x23</th>\n",
" <th>x24</th>\n",
" <th>x25</th>\n",
" <th>x29</th>\n",
" <th>x37</th>\n",
" <th>x41</th>\n",
" <th>x46</th>\n",
" <th>x54</th>\n",
" <th>x63</th>\n",
" <th>x70</th>\n",
" <th>x73</th>\n",
" <th>x80</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1156</th>\n",
" <td>0.71056</td>\n",
" <td>0.93446</td>\n",
" <td>0.14445</td>\n",
" <td>0.14572</td>\n",
" <td>2.45500</td>\n",
" <td>0.045089</td>\n",
" <td>0.19754</td>\n",
" <td>0.66553</td>\n",
" <td>625.41</td>\n",
" <td>0.045031</td>\n",
" <td>0.155580</td>\n",
" <td>3.22850</td>\n",
" <td>0.11500</td>\n",
" <td>874.69</td>\n",
" <td>-3.0266</td>\n",
" <td>25.4</td>\n",
" <td>28.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1993</th>\n",
" <td>0.21104</td>\n",
" <td>0.59523</td>\n",
" <td>0.30998</td>\n",
" <td>0.48288</td>\n",
" <td>0.26750</td>\n",
" <td>0.001754</td>\n",
" <td>0.56306</td>\n",
" <td>0.19858</td>\n",
" <td>1600.20</td>\n",
" <td>0.012465</td>\n",
" <td>0.015800</td>\n",
" <td>0.75445</td>\n",
" <td>2.10980</td>\n",
" <td>47173.00</td>\n",
" <td>-3.0659</td>\n",
" <td>34.7</td>\n",
" <td>32.0</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1924</th>\n",
" <td>0.46072</td>\n",
" <td>0.90327</td>\n",
" <td>0.28563</td>\n",
" <td>0.45008</td>\n",
" <td>0.85431</td>\n",
" <td>0.024656</td>\n",
" <td>0.43336</td>\n",
" <td>0.45475</td>\n",
" <td>4659.80</td>\n",
" <td>0.005962</td>\n",
" <td>0.011055</td>\n",
" <td>1.67490</td>\n",
" <td>0.81567</td>\n",
" <td>12851.00</td>\n",
" <td>7.1562</td>\n",
" <td>15.6</td>\n",
" <td>30.0</td>\n",
" <td>25.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" x3 x4 x5 x10 x14 x18 x23 x24 \\\n",
"1156 0.71056 0.93446 0.14445 0.14572 2.45500 0.045089 0.19754 0.66553 \n",
"1993 0.21104 0.59523 0.30998 0.48288 0.26750 0.001754 0.56306 0.19858 \n",
"1924 0.46072 0.90327 0.28563 0.45008 0.85431 0.024656 0.43336 0.45475 \n",
"\n",
" x25 x29 x37 x41 x46 x54 x63 x70 \\\n",
"1156 625.41 0.045031 0.155580 3.22850 0.11500 874.69 -3.0266 25.4 \n",
"1993 1600.20 0.012465 0.015800 0.75445 2.10980 47173.00 -3.0659 34.7 \n",
"1924 4659.80 0.005962 0.011055 1.67490 0.81567 12851.00 7.1562 15.6 \n",
"\n",
" x73 x80 \n",
"1156 28.0 9.0 \n",
"1993 32.0 4.0 \n",
"1924 30.0 25.0 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Distress</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1156</th>\n",
" <td>0.6382</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1993</th>\n",
" <td>0.4402</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1924</th>\n",
" <td>3.2629</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Distress\n",
"1156 0.6382\n",
"1993 0.4402\n",
"1924 3.2629"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x3</th>\n",
" <th>x4</th>\n",
" <th>x5</th>\n",
" <th>x10</th>\n",
" <th>x14</th>\n",
" <th>x18</th>\n",
" <th>x23</th>\n",
" <th>x24</th>\n",
" <th>x25</th>\n",
" <th>x29</th>\n",
" <th>x37</th>\n",
" <th>x41</th>\n",
" <th>x46</th>\n",
" <th>x54</th>\n",
" <th>x63</th>\n",
" <th>x70</th>\n",
" <th>x73</th>\n",
" <th>x80</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3379</th>\n",
" <td>0.62266</td>\n",
" <td>0.74377</td>\n",
" <td>0.13716</td>\n",
" <td>0.008050</td>\n",
" <td>1.65010</td>\n",
" <td>0.034872</td>\n",
" <td>0.20639</td>\n",
" <td>0.42211</td>\n",
" <td>734.24</td>\n",
" <td>0.20055</td>\n",
" <td>0.53147</td>\n",
" <td>1.9711</td>\n",
" <td>0.207370</td>\n",
" <td>620.53</td>\n",
" <td>7.7373</td>\n",
" <td>15.400</td>\n",
" <td>35.5</td>\n",
" <td>25.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>156</th>\n",
" <td>0.79108</td>\n",
" <td>0.68615</td>\n",
" <td>0.10943</td>\n",
" <td>0.011391</td>\n",
" <td>3.78650</td>\n",
" <td>0.002455</td>\n",
" <td>0.19456</td>\n",
" <td>0.56425</td>\n",
" <td>653.83</td>\n",
" <td>0.22683</td>\n",
" <td>1.08570</td>\n",
" <td>3.2842</td>\n",
" <td>0.061802</td>\n",
" <td>225.64</td>\n",
" <td>1.1538</td>\n",
" <td>15.200</td>\n",
" <td>35.0</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2215</th>\n",
" <td>0.46538</td>\n",
" <td>0.54146</td>\n",
" <td>0.25140</td>\n",
" <td>0.187750</td>\n",
" <td>0.87049</td>\n",
" <td>0.027462</td>\n",
" <td>0.46916</td>\n",
" <td>0.22192</td>\n",
" <td>601.83</td>\n",
" <td>0.24346</td>\n",
" <td>0.45540</td>\n",
" <td>1.0128</td>\n",
" <td>0.431220</td>\n",
" <td>473.60</td>\n",
" <td>9.7164</td>\n",
" <td>15.683</td>\n",
" <td>36.0</td>\n",
" <td>15.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" x3 x4 x5 x10 x14 x18 x23 \\\n",
"3379 0.62266 0.74377 0.13716 0.008050 1.65010 0.034872 0.20639 \n",
"156 0.79108 0.68615 0.10943 0.011391 3.78650 0.002455 0.19456 \n",
"2215 0.46538 0.54146 0.25140 0.187750 0.87049 0.027462 0.46916 \n",
"\n",
" x24 x25 x29 x37 x41 x46 x54 x63 \\\n",
"3379 0.42211 734.24 0.20055 0.53147 1.9711 0.207370 620.53 7.7373 \n",
"156 0.56425 653.83 0.22683 1.08570 3.2842 0.061802 225.64 1.1538 \n",
"2215 0.22192 601.83 0.24346 0.45540 1.0128 0.431220 473.60 9.7164 \n",
"\n",
" x70 x73 x80 \n",
"3379 15.400 35.5 25.0 \n",
"156 15.200 35.0 12.0 \n",
"2215 15.683 36.0 15.0 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Distress</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3379</th>\n",
" <td>0.121330</td>\n",
" </tr>\n",
" <tr>\n",
" <th>156</th>\n",
" <td>0.080083</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2215</th>\n",
" <td>1.164000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Distress\n",
"3379 0.121330\n",
"156 0.080083\n",
"2215 1.164000"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from src.utils import split_stratified_into_train_val_test\n",
"\n",
"\n",
"X_train, X_test, y_train, y_test = split_stratified_into_train_val_test(\n",
" data,\n",
" stratify_colname=\"Distress\",\n",
" frac_train=0.8,\n",
" frac_val=0,\n",
" frac_test=0.2,\n",
" random_state=random_state,\n",
")\n",
"\n",
"display(X_train.head(3))\n",
"display(y_train.head(3))\n",
"display(X_test.head(3))\n",
"display(y_test.head(3))"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.preprocessing import PolynomialFeatures\n",
"from sklearn import linear_model, tree, neighbors, ensemble\n",
"\n",
"models = {\n",
" \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n",
" \"linear_poly\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(degree=2),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"linear_interact\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(interaction_only=True),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"ridge\": {\"model\": linear_model.RidgeCV()},\n",
" \"decision_tree\": {\n",
" \"model\": tree.DecisionTreeRegressor(max_depth=7, random_state=random_state)\n",
" },\n",
" \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n",
" \"random_forest\": {\n",
" \"model\": ensemble.RandomForestRegressor(\n",
" max_depth=7, random_state=random_state, n_jobs=-1\n",
" )\n",
" },\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: linear\n",
"Model: linear_poly\n",
"Model: linear_interact\n",
"Model: ridge\n",
"Model: decision_tree\n",
"Model: knn\n",
"Model: random_forest\n"
]
}
],
"source": [
"from src.utils import run_regression\n",
"\n",
"for model_name in models.keys():\n",
" print(f\"Model: {model_name}\")\n",
" fitted_model = models[model_name][\"model\"].fit(\n",
" X_train.values, y_train.values.ravel()\n",
" )\n",
" models[model_name] = run_regression(\n",
" fitted_model, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style type=\"text/css\">\n",
"#T_e49d6_row0_col0 {\n",
" background-color: #1fa287;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_e49d6_row0_col1, #T_e49d6_row6_col0 {\n",
" background-color: #26818e;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_e49d6_row0_col2, #T_e49d6_row6_col3 {\n",
" background-color: #4e02a2;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_e49d6_row0_col3, #T_e49d6_row2_col2 {\n",
" background-color: #da5a6a;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_e49d6_row1_col0, #T_e49d6_row6_col1 {\n",
" background-color: #a8db34;\n",
" color: #000000;\n",
"}\n",
"#T_e49d6_row1_col1 {\n",
" background-color: #1fa088;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_e49d6_row1_col2 {\n",
" background-color: #c7427c;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_e49d6_row1_col3 {\n",
" background-color: #c03a83;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_e49d6_row2_col0 {\n",
" background-color: #a5db36;\n",
" color: #000000;\n",
"}\n",
"#T_e49d6_row2_col1 {\n",
" background-color: #21a585;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_e49d6_row2_col3 {\n",
" background-color: #ba3388;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_e49d6_row3_col0 {\n",
" background-color: #24878e;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_e49d6_row3_col1 {\n",
" background-color: #26ad81;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_e49d6_row3_col2 {\n",
" background-color: #a21d9a;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_e49d6_row3_col3 {\n",
" background-color: #b12a90;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_e49d6_row4_col0 {\n",
" background-color: #238a8d;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_e49d6_row4_col1 {\n",
" background-color: #73d056;\n",
" color: #000000;\n",
"}\n",
"#T_e49d6_row4_col2 {\n",
" background-color: #b7318a;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_e49d6_row4_col3 {\n",
" background-color: #7501a8;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_e49d6_row5_col0 {\n",
" background-color: #93d741;\n",
" color: #000000;\n",
"}\n",
"#T_e49d6_row5_col1 {\n",
" background-color: #9bd93c;\n",
" color: #000000;\n",
"}\n",
"#T_e49d6_row5_col2 {\n",
" background-color: #d5536f;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_e49d6_row5_col3 {\n",
" background-color: #5801a4;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_e49d6_row6_col2 {\n",
" background-color: #b6308b;\n",
" color: #f1f1f1;\n",
"}\n",
"</style>\n",
"<table id=\"T_e49d6\">\n",
" <thead>\n",
" <tr>\n",
" <th class=\"blank level0\" >&nbsp;</th>\n",
" <th id=\"T_e49d6_level0_col0\" class=\"col_heading level0 col0\" >RMSE_train</th>\n",
" <th id=\"T_e49d6_level0_col1\" class=\"col_heading level0 col1\" >RMSE_test</th>\n",
" <th id=\"T_e49d6_level0_col2\" class=\"col_heading level0 col2\" >RMAE_test</th>\n",
" <th id=\"T_e49d6_level0_col3\" class=\"col_heading level0 col3\" >R2_test</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th id=\"T_e49d6_level0_row0\" class=\"row_heading level0 row0\" >random_forest</th>\n",
" <td id=\"T_e49d6_row0_col0\" class=\"data row0 col0\" >1.394198</td>\n",
" <td id=\"T_e49d6_row0_col1\" class=\"data row0 col1\" >1.042729</td>\n",
" <td id=\"T_e49d6_row0_col2\" class=\"data row0 col2\" >0.778401</td>\n",
" <td id=\"T_e49d6_row0_col3\" class=\"data row0 col3\" >0.456952</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_e49d6_level0_row1\" class=\"row_heading level0 row1\" >ridge</th>\n",
" <td id=\"T_e49d6_row1_col0\" class=\"data row1 col0\" >2.488097</td>\n",
" <td id=\"T_e49d6_row1_col1\" class=\"data row1 col1\" >1.198888</td>\n",
" <td id=\"T_e49d6_row1_col2\" class=\"data row1 col2\" >0.865585</td>\n",
" <td id=\"T_e49d6_row1_col3\" class=\"data row1 col3\" >0.282120</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_e49d6_level0_row2\" class=\"row_heading level0 row2\" >linear</th>\n",
" <td id=\"T_e49d6_row2_col0\" class=\"data row2 col0\" >2.474171</td>\n",
" <td id=\"T_e49d6_row2_col1\" class=\"data row2 col1\" >1.228277</td>\n",
" <td id=\"T_e49d6_row2_col2\" class=\"data row2 col2\" >0.885807</td>\n",
" <td id=\"T_e49d6_row2_col3\" class=\"data row2 col3\" >0.246493</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_e49d6_level0_row3\" class=\"row_heading level0 row3\" >linear_poly</th>\n",
" <td id=\"T_e49d6_row3_col0\" class=\"data row3 col0\" >0.981309</td>\n",
" <td id=\"T_e49d6_row3_col1\" class=\"data row3 col1\" >1.267218</td>\n",
" <td id=\"T_e49d6_row3_col2\" class=\"data row3 col2\" >0.833594</td>\n",
" <td id=\"T_e49d6_row3_col3\" class=\"data row3 col3\" >0.197957</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_e49d6_level0_row4\" class=\"row_heading level0 row4\" >linear_interact</th>\n",
" <td id=\"T_e49d6_row4_col0\" class=\"data row4 col0\" >1.025112</td>\n",
" <td id=\"T_e49d6_row4_col1\" class=\"data row4 col1\" >1.466789</td>\n",
" <td id=\"T_e49d6_row4_col2\" class=\"data row4 col2\" >0.850850</td>\n",
" <td id=\"T_e49d6_row4_col3\" class=\"data row4 col3\" >-0.074560</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_e49d6_level0_row5\" class=\"row_heading level0 row5\" >knn</th>\n",
" <td id=\"T_e49d6_row5_col0\" class=\"data row5 col0\" >2.376262</td>\n",
" <td id=\"T_e49d6_row5_col1\" class=\"data row5 col1\" >1.541027</td>\n",
" <td id=\"T_e49d6_row5_col2\" class=\"data row5 col2\" >0.879611</td>\n",
" <td id=\"T_e49d6_row5_col3\" class=\"data row5 col3\" >-0.186083</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_e49d6_level0_row6\" class=\"row_heading level0 row6\" >decision_tree</th>\n",
" <td id=\"T_e49d6_row6_col0\" class=\"data row6 col0\" >0.872007</td>\n",
" <td id=\"T_e49d6_row6_col1\" class=\"data row6 col1\" >1.566888</td>\n",
" <td id=\"T_e49d6_row6_col2\" class=\"data row6 col2\" >0.850226</td>\n",
" <td id=\"T_e49d6_row6_col3\" class=\"data row6 col3\" >-0.226227</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n"
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x227bc70f2f0>"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reg_metrics = pd.DataFrame.from_dict(models, \"index\")[\n",
" [\"RMSE_train\", \"RMSE_test\", \"RMAE_test\", \"R2_test\"]\n",
"]\n",
"reg_metrics.sort_values(by=\"RMSE_test\").style.background_gradient(\n",
" cmap=\"viridis\", low=1, high=0.3, subset=[\"RMSE_train\", \"RMSE_test\"]\n",
").background_gradient(cmap=\"plasma\", low=0.3, high=1, subset=[\"RMAE_test\", \"R2_test\"])"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x3</th>\n",
" <th>x4</th>\n",
" <th>x5</th>\n",
" <th>x10</th>\n",
" <th>x14</th>\n",
" <th>x18</th>\n",
" <th>x23</th>\n",
" <th>x24</th>\n",
" <th>x25</th>\n",
" <th>x29</th>\n",
" <th>x37</th>\n",
" <th>x41</th>\n",
" <th>x46</th>\n",
" <th>x54</th>\n",
" <th>x63</th>\n",
" <th>x70</th>\n",
" <th>x73</th>\n",
" <th>x80</th>\n",
" <th>Distress</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.87454</td>\n",
" <td>1.21640</td>\n",
" <td>0.060940</td>\n",
" <td>0.006449</td>\n",
" <td>6.97060</td>\n",
" <td>0.018265</td>\n",
" <td>0.148720</td>\n",
" <td>0.66995</td>\n",
" <td>214.760</td>\n",
" <td>0.204590</td>\n",
" <td>1.630700</td>\n",
" <td>9.69510</td>\n",
" <td>0.026224</td>\n",
" <td>209.87</td>\n",
" <td>3.27020</td>\n",
" <td>15.8</td>\n",
" <td>36.0</td>\n",
" <td>22.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.82067</td>\n",
" <td>1.00490</td>\n",
" <td>-0.014080</td>\n",
" <td>0.001795</td>\n",
" <td>4.57640</td>\n",
" <td>0.027558</td>\n",
" <td>0.056026</td>\n",
" <td>0.67048</td>\n",
" <td>38.242</td>\n",
" <td>0.150190</td>\n",
" <td>0.837540</td>\n",
" <td>5.60350</td>\n",
" <td>0.007864</td>\n",
" <td>250.14</td>\n",
" <td>14.32100</td>\n",
" <td>15.6</td>\n",
" <td>36.0</td>\n",
" <td>22.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.92242</td>\n",
" <td>0.72926</td>\n",
" <td>0.020476</td>\n",
" <td>-0.054324</td>\n",
" <td>11.89000</td>\n",
" <td>0.012595</td>\n",
" <td>0.065220</td>\n",
" <td>0.84827</td>\n",
" <td>-498.390</td>\n",
" <td>0.074149</td>\n",
" <td>0.955790</td>\n",
" <td>9.40030</td>\n",
" <td>-0.064373</td>\n",
" <td>280.55</td>\n",
" <td>1.15380</td>\n",
" <td>15.2</td>\n",
" <td>35.0</td>\n",
" <td>22.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.85888</td>\n",
" <td>0.80974</td>\n",
" <td>0.076037</td>\n",
" <td>-0.065316</td>\n",
" <td>6.08620</td>\n",
" <td>0.011601</td>\n",
" <td>0.125160</td>\n",
" <td>0.80478</td>\n",
" <td>-75.867</td>\n",
" <td>0.054098</td>\n",
" <td>0.383350</td>\n",
" <td>5.73790</td>\n",
" <td>-0.017731</td>\n",
" <td>413.74</td>\n",
" <td>2.04080</td>\n",
" <td>10.4</td>\n",
" <td>33.0</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.81460</td>\n",
" <td>0.83593</td>\n",
" <td>0.199960</td>\n",
" <td>0.094075</td>\n",
" <td>4.39380</td>\n",
" <td>0.006814</td>\n",
" <td>0.266020</td>\n",
" <td>0.76770</td>\n",
" <td>1423.100</td>\n",
" <td>0.046907</td>\n",
" <td>0.253010</td>\n",
" <td>4.50880</td>\n",
" <td>0.131380</td>\n",
" <td>315.34</td>\n",
" <td>3.27020</td>\n",
" <td>15.8</td>\n",
" <td>36.0</td>\n",
" <td>29.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3667</th>\n",
" <td>0.16037</td>\n",
" <td>0.18588</td>\n",
" <td>0.175970</td>\n",
" <td>0.226860</td>\n",
" <td>0.19101</td>\n",
" <td>0.014077</td>\n",
" <td>0.994340</td>\n",
" <td>0.15740</td>\n",
" <td>390.260</td>\n",
" <td>0.002976</td>\n",
" <td>0.003544</td>\n",
" <td>0.22138</td>\n",
" <td>1.265100</td>\n",
" <td>16961.00</td>\n",
" <td>-0.53449</td>\n",
" <td>21.5</td>\n",
" <td>22.0</td>\n",
" <td>37.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3668</th>\n",
" <td>0.20095</td>\n",
" <td>0.21642</td>\n",
" <td>0.203590</td>\n",
" <td>0.213610</td>\n",
" <td>0.25149</td>\n",
" <td>0.018249</td>\n",
" <td>0.992440</td>\n",
" <td>0.19747</td>\n",
" <td>443.840</td>\n",
" <td>0.003484</td>\n",
" <td>0.004359</td>\n",
" <td>0.27085</td>\n",
" <td>1.077100</td>\n",
" <td>20689.00</td>\n",
" <td>-25.73600</td>\n",
" <td>30.5</td>\n",
" <td>28.0</td>\n",
" <td>37.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3669</th>\n",
" <td>0.26136</td>\n",
" <td>0.21399</td>\n",
" <td>0.193670</td>\n",
" <td>0.210970</td>\n",
" <td>0.35384</td>\n",
" <td>0.007451</td>\n",
" <td>0.982420</td>\n",
" <td>0.25902</td>\n",
" <td>475.560</td>\n",
" <td>0.002343</td>\n",
" <td>0.003172</td>\n",
" <td>0.28971</td>\n",
" <td>0.795720</td>\n",
" <td>34012.00</td>\n",
" <td>-3.06590</td>\n",
" <td>34.7</td>\n",
" <td>32.0</td>\n",
" <td>37.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3670</th>\n",
" <td>0.30728</td>\n",
" <td>0.19307</td>\n",
" <td>0.172140</td>\n",
" <td>0.203190</td>\n",
" <td>0.44358</td>\n",
" <td>0.021239</td>\n",
" <td>0.985230</td>\n",
" <td>0.30533</td>\n",
" <td>457.060</td>\n",
" <td>0.001942</td>\n",
" <td>0.002803</td>\n",
" <td>0.27871</td>\n",
" <td>0.603540</td>\n",
" <td>35901.00</td>\n",
" <td>7.15620</td>\n",
" <td>15.6</td>\n",
" <td>30.0</td>\n",
" <td>37.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3671</th>\n",
" <td>0.36369</td>\n",
" <td>0.18442</td>\n",
" <td>0.169550</td>\n",
" <td>0.213850</td>\n",
" <td>0.57156</td>\n",
" <td>0.013783</td>\n",
" <td>0.994000</td>\n",
" <td>0.32184</td>\n",
" <td>505.040</td>\n",
" <td>0.041852</td>\n",
" <td>0.065773</td>\n",
" <td>0.28982</td>\n",
" <td>0.486010</td>\n",
" <td>28173.00</td>\n",
" <td>12.14500</td>\n",
" <td>11.9</td>\n",
" <td>29.0</td>\n",
" <td>37.0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3672 rows × 19 columns</p>\n",
"</div>"
],
"text/plain": [
" x3 x4 x5 x10 x14 x18 x23 \\\n",
"0 0.87454 1.21640 0.060940 0.006449 6.97060 0.018265 0.148720 \n",
"1 0.82067 1.00490 -0.014080 0.001795 4.57640 0.027558 0.056026 \n",
"2 0.92242 0.72926 0.020476 -0.054324 11.89000 0.012595 0.065220 \n",
"3 0.85888 0.80974 0.076037 -0.065316 6.08620 0.011601 0.125160 \n",
"4 0.81460 0.83593 0.199960 0.094075 4.39380 0.006814 0.266020 \n",
"... ... ... ... ... ... ... ... \n",
"3667 0.16037 0.18588 0.175970 0.226860 0.19101 0.014077 0.994340 \n",
"3668 0.20095 0.21642 0.203590 0.213610 0.25149 0.018249 0.992440 \n",
"3669 0.26136 0.21399 0.193670 0.210970 0.35384 0.007451 0.982420 \n",
"3670 0.30728 0.19307 0.172140 0.203190 0.44358 0.021239 0.985230 \n",
"3671 0.36369 0.18442 0.169550 0.213850 0.57156 0.013783 0.994000 \n",
"\n",
" x24 x25 x29 x37 x41 x46 x54 \\\n",
"0 0.66995 214.760 0.204590 1.630700 9.69510 0.026224 209.87 \n",
"1 0.67048 38.242 0.150190 0.837540 5.60350 0.007864 250.14 \n",
"2 0.84827 -498.390 0.074149 0.955790 9.40030 -0.064373 280.55 \n",
"3 0.80478 -75.867 0.054098 0.383350 5.73790 -0.017731 413.74 \n",
"4 0.76770 1423.100 0.046907 0.253010 4.50880 0.131380 315.34 \n",
"... ... ... ... ... ... ... ... \n",
"3667 0.15740 390.260 0.002976 0.003544 0.22138 1.265100 16961.00 \n",
"3668 0.19747 443.840 0.003484 0.004359 0.27085 1.077100 20689.00 \n",
"3669 0.25902 475.560 0.002343 0.003172 0.28971 0.795720 34012.00 \n",
"3670 0.30533 457.060 0.001942 0.002803 0.27871 0.603540 35901.00 \n",
"3671 0.32184 505.040 0.041852 0.065773 0.28982 0.486010 28173.00 \n",
"\n",
" x63 x70 x73 x80 Distress \n",
"0 3.27020 15.8 36.0 22.0 0 \n",
"1 14.32100 15.6 36.0 22.0 0 \n",
"2 1.15380 15.2 35.0 22.0 0 \n",
"3 2.04080 10.4 33.0 22.0 1 \n",
"4 3.27020 15.8 36.0 29.0 0 \n",
"... ... ... ... ... ... \n",
"3667 -0.53449 21.5 22.0 37.0 0 \n",
"3668 -25.73600 30.5 28.0 37.0 0 \n",
"3669 -3.06590 34.7 32.0 37.0 0 \n",
"3670 7.15620 15.6 30.0 37.0 0 \n",
"3671 12.14500 11.9 29.0 37.0 0 \n",
"\n",
"[3672 rows x 19 columns]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from src.utils import run_classification\n",
"\n",
"\n",
"def get_class(row):\n",
" return 0 if row[\"Distress\"] > -0.5 else 1\n",
"\n",
"datac = data.copy()\n",
"datac[\"Distress\"] = datac.apply(get_class, axis=1)\n",
"datac"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x3</th>\n",
" <th>x4</th>\n",
" <th>x5</th>\n",
" <th>x10</th>\n",
" <th>x14</th>\n",
" <th>x18</th>\n",
" <th>x23</th>\n",
" <th>x24</th>\n",
" <th>x25</th>\n",
" <th>x29</th>\n",
" <th>x37</th>\n",
" <th>x41</th>\n",
" <th>x46</th>\n",
" <th>x54</th>\n",
" <th>x63</th>\n",
" <th>x70</th>\n",
" <th>x73</th>\n",
" <th>x80</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.71056</td>\n",
" <td>0.93446</td>\n",
" <td>0.14445</td>\n",
" <td>0.14572</td>\n",
" <td>2.45500</td>\n",
" <td>0.045089</td>\n",
" <td>0.19754</td>\n",
" <td>0.66553</td>\n",
" <td>625.41</td>\n",
" <td>0.045031</td>\n",
" <td>0.155580</td>\n",
" <td>3.22850</td>\n",
" <td>0.11500</td>\n",
" <td>874.69</td>\n",
" <td>-3.0266</td>\n",
" <td>25.4</td>\n",
" <td>28.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.21104</td>\n",
" <td>0.59523</td>\n",
" <td>0.30998</td>\n",
" <td>0.48288</td>\n",
" <td>0.26750</td>\n",
" <td>0.001754</td>\n",
" <td>0.56306</td>\n",
" <td>0.19858</td>\n",
" <td>1600.20</td>\n",
" <td>0.012465</td>\n",
" <td>0.015800</td>\n",
" <td>0.75445</td>\n",
" <td>2.10980</td>\n",
" <td>47173.00</td>\n",
" <td>-3.0659</td>\n",
" <td>34.7</td>\n",
" <td>32.0</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.46072</td>\n",
" <td>0.90327</td>\n",
" <td>0.28563</td>\n",
" <td>0.45008</td>\n",
" <td>0.85431</td>\n",
" <td>0.024656</td>\n",
" <td>0.43336</td>\n",
" <td>0.45475</td>\n",
" <td>4659.80</td>\n",
" <td>0.005962</td>\n",
" <td>0.011055</td>\n",
" <td>1.67490</td>\n",
" <td>0.81567</td>\n",
" <td>12851.00</td>\n",
" <td>7.1562</td>\n",
" <td>15.6</td>\n",
" <td>30.0</td>\n",
" <td>25.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" x3 x4 x5 x10 x14 x18 x23 x24 \\\n",
"0 0.71056 0.93446 0.14445 0.14572 2.45500 0.045089 0.19754 0.66553 \n",
"1 0.21104 0.59523 0.30998 0.48288 0.26750 0.001754 0.56306 0.19858 \n",
"2 0.46072 0.90327 0.28563 0.45008 0.85431 0.024656 0.43336 0.45475 \n",
"\n",
" x25 x29 x37 x41 x46 x54 x63 x70 \\\n",
"0 625.41 0.045031 0.155580 3.22850 0.11500 874.69 -3.0266 25.4 \n",
"1 1600.20 0.012465 0.015800 0.75445 2.10980 47173.00 -3.0659 34.7 \n",
"2 4659.80 0.005962 0.011055 1.67490 0.81567 12851.00 7.1562 15.6 \n",
"\n",
" x73 x80 \n",
"0 28.0 9.0 \n",
"1 32.0 4.0 \n",
"2 30.0 25.0 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Distress</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Distress\n",
"0 0\n",
"1 0\n",
"2 0"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>x3</th>\n",
" <th>x4</th>\n",
" <th>x5</th>\n",
" <th>x10</th>\n",
" <th>x14</th>\n",
" <th>x18</th>\n",
" <th>x23</th>\n",
" <th>x24</th>\n",
" <th>x25</th>\n",
" <th>x29</th>\n",
" <th>x37</th>\n",
" <th>x41</th>\n",
" <th>x46</th>\n",
" <th>x54</th>\n",
" <th>x63</th>\n",
" <th>x70</th>\n",
" <th>x73</th>\n",
" <th>x80</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3379</th>\n",
" <td>0.62266</td>\n",
" <td>0.74377</td>\n",
" <td>0.13716</td>\n",
" <td>0.008050</td>\n",
" <td>1.65010</td>\n",
" <td>0.034872</td>\n",
" <td>0.20639</td>\n",
" <td>0.42211</td>\n",
" <td>734.24</td>\n",
" <td>0.20055</td>\n",
" <td>0.53147</td>\n",
" <td>1.9711</td>\n",
" <td>0.207370</td>\n",
" <td>620.53</td>\n",
" <td>7.7373</td>\n",
" <td>15.400</td>\n",
" <td>35.5</td>\n",
" <td>25.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>156</th>\n",
" <td>0.79108</td>\n",
" <td>0.68615</td>\n",
" <td>0.10943</td>\n",
" <td>0.011391</td>\n",
" <td>3.78650</td>\n",
" <td>0.002455</td>\n",
" <td>0.19456</td>\n",
" <td>0.56425</td>\n",
" <td>653.83</td>\n",
" <td>0.22683</td>\n",
" <td>1.08570</td>\n",
" <td>3.2842</td>\n",
" <td>0.061802</td>\n",
" <td>225.64</td>\n",
" <td>1.1538</td>\n",
" <td>15.200</td>\n",
" <td>35.0</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2215</th>\n",
" <td>0.46538</td>\n",
" <td>0.54146</td>\n",
" <td>0.25140</td>\n",
" <td>0.187750</td>\n",
" <td>0.87049</td>\n",
" <td>0.027462</td>\n",
" <td>0.46916</td>\n",
" <td>0.22192</td>\n",
" <td>601.83</td>\n",
" <td>0.24346</td>\n",
" <td>0.45540</td>\n",
" <td>1.0128</td>\n",
" <td>0.431220</td>\n",
" <td>473.60</td>\n",
" <td>9.7164</td>\n",
" <td>15.683</td>\n",
" <td>36.0</td>\n",
" <td>15.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" x3 x4 x5 x10 x14 x18 x23 \\\n",
"3379 0.62266 0.74377 0.13716 0.008050 1.65010 0.034872 0.20639 \n",
"156 0.79108 0.68615 0.10943 0.011391 3.78650 0.002455 0.19456 \n",
"2215 0.46538 0.54146 0.25140 0.187750 0.87049 0.027462 0.46916 \n",
"\n",
" x24 x25 x29 x37 x41 x46 x54 x63 \\\n",
"3379 0.42211 734.24 0.20055 0.53147 1.9711 0.207370 620.53 7.7373 \n",
"156 0.56425 653.83 0.22683 1.08570 3.2842 0.061802 225.64 1.1538 \n",
"2215 0.22192 601.83 0.24346 0.45540 1.0128 0.431220 473.60 9.7164 \n",
"\n",
" x70 x73 x80 \n",
"3379 15.400 35.5 25.0 \n",
"156 15.200 35.0 12.0 \n",
"2215 15.683 36.0 15.0 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Distress</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3379</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>156</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2215</th>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Distress\n",
"3379 0\n",
"156 0\n",
"2215 0"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from imblearn.over_sampling import ADASYN\n",
"\n",
"Xc_train, Xc_test, yc_train, yc_test = split_stratified_into_train_val_test(\n",
" datac,\n",
" stratify_colname=\"Distress\",\n",
" frac_train=0.8,\n",
" frac_val=0,\n",
" frac_test=0.2,\n",
" random_state=random_state,\n",
")\n",
"\n",
"ada = ADASYN()\n",
"\n",
"Xc_train, yc_train = ada.fit_resample(Xc_train, yc_train)\n",
"\n",
"display(Xc_train.head(3))\n",
"display(yc_train.head(3))\n",
"display(Xc_test.head(3))\n",
"display(yc_test.head(3))"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\user\\Projects\\python\\fuzzy\\.venv\\Lib\\site-packages\\sklearn\\base.py:486: UserWarning: X has feature names, but DecisionTreeClassifier was fitted without feature names\n",
" warnings.warn(\n",
"c:\\Users\\user\\Projects\\python\\fuzzy\\.venv\\Lib\\site-packages\\sklearn\\base.py:486: UserWarning: X has feature names, but DecisionTreeClassifier was fitted without feature names\n",
" warnings.warn(\n"
]
},
{
"data": {
"text/plain": [
"{'pipeline': DecisionTreeClassifier(max_depth=7, random_state=9),\n",
" 'probs': array([1. , 0.17698154, 1. , 0.04407713, 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 0.04407713, 1. , 1. , 1. , 1. ,\n",
" 1. , 0.08695652, 1. , 1. , 1. ,\n",
" 0.04407713, 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 0.04407713, 1. ,\n",
" 1. , 1. , 1. , 0.04407713, 1. ,\n",
" 0.04407713, 1. , 0.17698154, 1. , 1. ,\n",
" 1. , 0.17698154, 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 0.17698154, 1. , 1. , 1. ,\n",
" 1. , 1. , 0.04407713, 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 0.17698154, 1. , 1. , 0. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 0.04407713, 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 0.17698154, 0.04407713, 1. , 1. ,\n",
" 1. , 0.04407713, 1. , 1. , 0.94736842,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 0.17698154, 1. ,\n",
" 0.17698154, 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 0.04407713,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 0.04407713, 1. , 0.08695652, 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 0.17698154, 0.17698154, 0.17698154, 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 0.04407713, 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 0.04407713, 1. , 1. ,\n",
" 1. , 0.04407713, 1. , 0.04407713, 0.17698154,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 0.04407713, 1. , 0.17698154,\n",
" 1. , 0.04407713, 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 0.5 , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 0.17698154, 1. , 1. , 1. , 1. ,\n",
" 1. , 0.17698154, 1. , 1. , 1. ,\n",
" 1. , 1. , 0.17698154, 0.13207547, 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 0.17698154, 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 0.13207547, 0.17698154, 1. , 1. , 1. ,\n",
" 1. , 1. , 0.17698154, 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 0.04407713, 1. , 1. , 1. ,\n",
" 0.04407713, 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 0.625 , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 0.17698154,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 0.04407713, 1. , 1. , 1. , 1. ,\n",
" 0.17698154, 1. , 0.13207547, 0.04407713, 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 0.17698154, 1. ,\n",
" 0.04407713, 1. , 0.04407713, 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 0.04407713,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 0.17698154, 1. , 0.08695652,\n",
" 0.04407713, 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 0. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 0.17698154, 1. , 0.17698154, 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 0.17698154,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 0.04407713, 1. , 1. , 0.13207547,\n",
" 1. , 1. , 0.04407713, 1. , 1. ,\n",
" 0.625 , 1. , 1. , 1. , 1. ,\n",
" 0.17698154, 1. , 1. , 0.13207547, 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 0.17698154,\n",
" 1. , 1. , 0.17698154, 1. , 1. ,\n",
" 1. , 1. , 0.04407713, 1. , 1. ,\n",
" 1. , 1. , 0.17698154, 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 0.17698154, 1. , 1. ,\n",
" 0.17698154, 1. , 1. , 0.17698154, 0. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 0.04407713, 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 0.04407713, 1. , 1. ,\n",
" 0.04407713, 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 0.17698154,\n",
" 1. , 0.17698154, 1. , 1. , 1. ,\n",
" 0.17698154, 1. , 1. , 0.17698154, 0.05454545,\n",
" 0.04407713, 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 0.17698154, 1. , 1. ,\n",
" 1. , 1. , 0.17698154, 1. , 1. ,\n",
" 1. , 0.04407713, 1. , 1. , 0.04407713,\n",
" 0.17698154, 1. , 1. , 1. , 0.04407713,\n",
" 1. , 1. , 1. , 0.17698154, 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 0.17698154, 1. , 1. , 0.04407713,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 0.17698154,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 0.04407713, 1. , 1. , 1. , 1. ,\n",
" 0.17698154, 1. , 0.04407713, 1. , 1. ,\n",
" 1. , 1. , 1. , 0.04407713, 1. ,\n",
" 1. , 1. , 1. , 0.17698154, 1. ,\n",
" 1. , 0.04407713, 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 0.17698154, 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 0.04407713, 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 0.17698154,\n",
" 1. , 1. , 0.04407713, 1. , 0.17698154,\n",
" 1. , 1. , 1. , 0.04407713, 0.04407713,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 0.04407713, 1. , 1. , 1. ,\n",
" 1. , 1. , 0. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 0. , 1. , 0.17698154, 1. ,\n",
" 1. , 1. , 0.04407713, 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 0.17698154, 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 0.17698154, 1. , 0. ,\n",
" 1. , 0.08695652, 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 1. , 1. , 1. , 1. ,\n",
" 1. , 0. , 1. , 1. , 0.17698154,\n",
" 0.04407713, 1. , 1. , 1. , 1. ]),\n",
" 'preds': array([1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1,\n",
" 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,\n",
" 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,\n",
" 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,\n",
" 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,\n",
" 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,\n",
" 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,\n",
" 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1,\n",
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,\n",
" 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1,\n",
" 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,\n",
" 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
" 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,\n",
" 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1,\n",
" 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
" 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1,\n",
" 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,\n",
" 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0,\n",
" 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
" 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1,\n",
" 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
" 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,\n",
" 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,\n",
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1,\n",
" 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,\n",
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
" 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,\n",
" 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,\n",
" 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0,\n",
" 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,\n",
" 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,\n",
" 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
" 0, 1, 1, 0, 0, 1, 1, 1, 1]),\n",
" 'Precision_train': np.float64(0.9157792836398838),\n",
" 'Precision_test': np.float64(0.011345218800648298),\n",
" 'Recall_train': np.float64(0.9978902953586498),\n",
" 'Recall_test': np.float64(0.3181818181818182),\n",
" 'Accuracy_train': 0.9528851244044468,\n",
" 'Accuracy_test': 0.14965986394557823,\n",
" 'ROC_AUC_test': np.float64(0.21318373071528754),\n",
" 'F1_train': np.float64(0.9550731953558809),\n",
" 'F1_test': np.float64(0.02190923317683881),\n",
" 'MCC_test': np.float64(-0.2494229220759723),\n",
" 'Cohen_kappa_test': np.float64(-0.03809571157718228),\n",
" 'Confusion_matrix': array([[103, 610],\n",
" [ 15, 7]])}"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from src.utils import run_classification\n",
"from sklearn import tree\n",
"\n",
"\n",
"fitted_model = tree.DecisionTreeClassifier(max_depth=7, random_state=random_state).fit(\n",
" Xc_train.values, yc_train.values.ravel()\n",
")\n",
"result = run_classification(\n",
" fitted_model, X_train=Xc_train, X_test=Xc_test, y_train=yc_train, y_test=yc_test\n",
")\n",
"result"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"|--- x46 <= 0.07\n",
"| |--- x10 <= -0.00\n",
"| | |--- x14 <= 1.65\n",
"| | | |--- x23 <= 0.16\n",
"| | | | |--- x54 <= 148.12\n",
"| | | | | |--- x70 <= 15.70\n",
"| | | | | | |--- x54 <= 40.29\n",
"| | | | | | | |--- class: 0\n",
"| | | | | | |--- x54 > 40.29\n",
"| | | | | | | |--- class: 1\n",
"| | | | | |--- x70 > 15.70\n",
"| | | | | | |--- class: 0\n",
"| | | | |--- x54 > 148.12\n",
"| | | | | |--- class: 0\n",
"| | | |--- x23 > 0.16\n",
"| | | | |--- x24 <= 0.31\n",
"| | | | | |--- class: 0\n",
"| | | | |--- x24 > 0.31\n",
"| | | | | |--- class: 1\n",
"| | |--- x14 > 1.65\n",
"| | | |--- x41 <= 0.27\n",
"| | | | |--- class: 0\n",
"| | | |--- x41 > 0.27\n",
"| | | | |--- x70 <= 10.43\n",
"| | | | | |--- x46 <= -0.02\n",
"| | | | | | |--- x37 <= 0.39\n",
"| | | | | | | |--- class: 1\n",
"| | | | | | |--- x37 > 0.39\n",
"| | | | | | | |--- class: 0\n",
"| | | | | |--- x46 > -0.02\n",
"| | | | | | |--- class: 0\n",
"| | | | |--- x70 > 10.43\n",
"| | | | | |--- x41 <= 24.10\n",
"| | | | | | |--- x73 <= 22.02\n",
"| | | | | | | |--- class: 0\n",
"| | | | | | |--- x73 > 22.02\n",
"| | | | | | | |--- class: 1\n",
"| | | | | |--- x41 > 24.10\n",
"| | | | | | |--- class: 0\n",
"| |--- x10 > -0.00\n",
"| | |--- x41 <= 2.26\n",
"| | | |--- x37 <= 0.37\n",
"| | | | |--- class: 0\n",
"| | | |--- x37 > 0.37\n",
"| | | | |--- x5 <= 0.03\n",
"| | | | | |--- x73 <= 29.22\n",
"| | | | | | |--- x14 <= 6.58\n",
"| | | | | | | |--- class: 0\n",
"| | | | | | |--- x14 > 6.58\n",
"| | | | | | | |--- class: 1\n",
"| | | | | |--- x73 > 29.22\n",
"| | | | | | |--- x4 <= 0.20\n",
"| | | | | | | |--- class: 0\n",
"| | | | | | |--- x4 > 0.20\n",
"| | | | | | | |--- class: 1\n",
"| | | | |--- x5 > 0.03\n",
"| | | | | |--- x29 <= 0.09\n",
"| | | | | | |--- class: 1\n",
"| | | | | |--- x29 > 0.09\n",
"| | | | | | |--- x41 <= 2.15\n",
"| | | | | | | |--- class: 0\n",
"| | | | | | |--- x41 > 2.15\n",
"| | | | | | | |--- class: 0\n",
"| | |--- x41 > 2.26\n",
"| | | |--- x37 <= 0.17\n",
"| | | | |--- x25 <= 112.70\n",
"| | | | | |--- x41 <= 3.17\n",
"| | | | | | |--- x23 <= 0.26\n",
"| | | | | | | |--- class: 0\n",
"| | | | | | |--- x23 > 0.26\n",
"| | | | | | | |--- class: 1\n",
"| | | | | |--- x41 > 3.17\n",
"| | | | | | |--- x4 <= 1.30\n",
"| | | | | | | |--- class: 1\n",
"| | | | | | |--- x4 > 1.30\n",
"| | | | | | | |--- class: 0\n",
"| | | | |--- x25 > 112.70\n",
"| | | | | |--- x3 <= 0.69\n",
"| | | | | | |--- x23 <= 0.16\n",
"| | | | | | | |--- class: 0\n",
"| | | | | | |--- x23 > 0.16\n",
"| | | | | | | |--- class: 1\n",
"| | | | | |--- x3 > 0.69\n",
"| | | | | | |--- x46 <= 0.07\n",
"| | | | | | | |--- class: 0\n",
"| | | | | | |--- x46 > 0.07\n",
"| | | | | | | |--- class: 1\n",
"| | | |--- x37 > 0.17\n",
"| | | | |--- x73 <= 27.06\n",
"| | | | | |--- x29 <= 0.03\n",
"| | | | | | |--- x80 <= 19.00\n",
"| | | | | | | |--- class: 1\n",
"| | | | | | |--- x80 > 19.00\n",
"| | | | | | | |--- class: 0\n",
"| | | | | |--- x29 > 0.03\n",
"| | | | | | |--- class: 0\n",
"| | | | |--- x73 > 27.06\n",
"| | | | | |--- x70 <= 30.33\n",
"| | | | | | |--- x80 <= 27.95\n",
"| | | | | | | |--- class: 1\n",
"| | | | | | |--- x80 > 27.95\n",
"| | | | | | | |--- class: 0\n",
"| | | | | |--- x70 > 30.33\n",
"| | | | | | |--- x37 <= 0.19\n",
"| | | | | | | |--- class: 1\n",
"| | | | | | |--- x37 > 0.19\n",
"| | | | | | | |--- class: 0\n",
"|--- x46 > 0.07\n",
"| |--- x14 <= 3.44\n",
"| | |--- x25 <= 152.01\n",
"| | | |--- x37 <= 0.25\n",
"| | | | |--- class: 0\n",
"| | | |--- x37 > 0.25\n",
"| | | | |--- x41 <= 1.66\n",
"| | | | | |--- class: 0\n",
"| | | | |--- x41 > 1.66\n",
"| | | | | |--- class: 1\n",
"| | |--- x25 > 152.01\n",
"| | | |--- x46 <= 0.10\n",
"| | | | |--- x41 <= 3.13\n",
"| | | | | |--- x25 <= 1069.03\n",
"| | | | | | |--- class: 0\n",
"| | | | | |--- x25 > 1069.03\n",
"| | | | | | |--- x23 <= 0.24\n",
"| | | | | | | |--- class: 1\n",
"| | | | | | |--- x23 > 0.24\n",
"| | | | | | | |--- class: 0\n",
"| | | | |--- x41 > 3.13\n",
"| | | | | |--- x4 <= 1.12\n",
"| | | | | | |--- x10 <= 0.04\n",
"| | | | | | | |--- class: 0\n",
"| | | | | | |--- x10 > 0.04\n",
"| | | | | | | |--- class: 1\n",
"| | | | | |--- x4 > 1.12\n",
"| | | | | | |--- class: 0\n",
"| | | |--- x46 > 0.10\n",
"| | | | |--- x25 <= 240.30\n",
"| | | | | |--- x41 <= 3.33\n",
"| | | | | | |--- class: 0\n",
"| | | | | |--- x41 > 3.33\n",
"| | | | | | |--- x23 <= 0.17\n",
"| | | | | | | |--- class: 0\n",
"| | | | | | |--- x23 > 0.17\n",
"| | | | | | | |--- class: 1\n",
"| | | | |--- x25 > 240.30\n",
"| | | | | |--- x5 <= 0.02\n",
"| | | | | | |--- x5 <= 0.02\n",
"| | | | | | | |--- class: 0\n",
"| | | | | | |--- x5 > 0.02\n",
"| | | | | | | |--- class: 1\n",
"| | | | | |--- x5 > 0.02\n",
"| | | | | | |--- class: 0\n",
"| |--- x14 > 3.44\n",
"| | |--- x5 <= 0.09\n",
"| | | |--- x54 <= 1165.84\n",
"| | | | |--- x37 <= 0.14\n",
"| | | | | |--- class: 0\n",
"| | | | |--- x37 > 0.14\n",
"| | | | | |--- x41 <= 2.36\n",
"| | | | | | |--- class: 0\n",
"| | | | | |--- x41 > 2.36\n",
"| | | | | | |--- x80 <= 10.34\n",
"| | | | | | | |--- class: 0\n",
"| | | | | | |--- x80 > 10.34\n",
"| | | | | | | |--- class: 1\n",
"| | | |--- x54 > 1165.84\n",
"| | | | |--- class: 0\n",
"| | |--- x5 > 0.09\n",
"| | | |--- x70 <= 16.37\n",
"| | | | |--- x23 <= 0.08\n",
"| | | | | |--- class: 1\n",
"| | | | |--- x23 > 0.08\n",
"| | | | | |--- x54 <= 150.56\n",
"| | | | | | |--- x10 <= 0.04\n",
"| | | | | | | |--- class: 0\n",
"| | | | | | |--- x10 > 0.04\n",
"| | | | | | | |--- class: 1\n",
"| | | | | |--- x54 > 150.56\n",
"| | | | | | |--- class: 0\n",
"| | | |--- x70 > 16.37\n",
"| | | | |--- x54 <= 911.20\n",
"| | | | | |--- x41 <= 4.52\n",
"| | | | | | |--- class: 1\n",
"| | | | | |--- x41 > 4.52\n",
"| | | | | | |--- class: 0\n",
"| | | | |--- x54 > 911.20\n",
"| | | | | |--- x25 <= 2874.98\n",
"| | | | | | |--- class: 0\n",
"| | | | | |--- x25 > 2874.98\n",
"| | | | | | |--- class: 1\n",
"\n"
]
}
],
"source": [
"rules = tree.export_text(\n",
" fitted_model,\n",
" feature_names=X_train.columns.values.tolist(),\n",
")\n",
"print(rules)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"import pickle\n",
"\n",
"pickle.dump(fitted_model, open(\"data-distress/tree.model.sav\", \"wb\"))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}