3414 lines
158 KiB
Plaintext
3414 lines
158 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "Y5dMmHXIRYEg"
|
||
},
|
||
"source": [
|
||
"#### Загрузка и распаковка данных"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from urllib.request import urlretrieve\n",
|
||
"from zipfile import ZipFile\n",
|
||
"\n",
|
||
"ds_url = \"https://github.com/PacktPublishing/Interpretable-Machine-Learning-with-Python/raw/master/datasets/aa-domestic-delays-2018.csv.zip\"\n",
|
||
"ds_zip_filename = \"data/aa-domestic-delays-2018.csv.zip\"\n",
|
||
"urlretrieve(ds_url, ds_zip_filename)\n",
|
||
"\n",
|
||
"with ZipFile(ds_zip_filename, \"r\") as zObject:\n",
|
||
" zObject.extractall(path=\"data\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Загрузка данных в Dataframe"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "YvuKMosoRY7K",
|
||
"outputId": "f9f05784-9c91-4869-a02e-e1ccc8115a8d"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"RangeIndex: 899527 entries, 0 to 899526\n",
|
||
"Data columns (total 23 columns):\n",
|
||
" # Column Non-Null Count Dtype \n",
|
||
"--- ------ -------------- ----- \n",
|
||
" 0 FL_NUM 899527 non-null int64 \n",
|
||
" 1 ORIGIN 899527 non-null object \n",
|
||
" 2 DEST 899527 non-null object \n",
|
||
" 3 PLANNED_DEP_DATETIME 899527 non-null object \n",
|
||
" 4 CRS_DEP_TIME 899527 non-null int64 \n",
|
||
" 5 DEP_TIME 899527 non-null float64\n",
|
||
" 6 DEP_DELAY 899527 non-null float64\n",
|
||
" 7 DEP_AFPH 899527 non-null float64\n",
|
||
" 8 DEP_RFPH 899527 non-null float64\n",
|
||
" 9 TAXI_OUT 899527 non-null float64\n",
|
||
" 10 WHEELS_OFF 899527 non-null float64\n",
|
||
" 11 CRS_ELAPSED_TIME 899527 non-null float64\n",
|
||
" 12 PCT_ELAPSED_TIME 899527 non-null float64\n",
|
||
" 13 DISTANCE 899527 non-null float64\n",
|
||
" 14 CRS_ARR_TIME 899527 non-null int64 \n",
|
||
" 15 ARR_AFPH 899527 non-null float64\n",
|
||
" 16 ARR_RFPH 899527 non-null float64\n",
|
||
" 17 ARR_DELAY 899527 non-null float64\n",
|
||
" 18 CARRIER_DELAY 899527 non-null float64\n",
|
||
" 19 WEATHER_DELAY 899527 non-null float64\n",
|
||
" 20 NAS_DELAY 899527 non-null float64\n",
|
||
" 21 SECURITY_DELAY 899527 non-null float64\n",
|
||
" 22 LATE_AIRCRAFT_DELAY 899527 non-null float64\n",
|
||
"dtypes: float64(17), int64(3), object(3)\n",
|
||
"memory usage: 157.8+ MB\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"orig_df = pd.read_csv(\"data/aa-domestic-delays-2018.csv\")\n",
|
||
"orig_df.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "Hen4OWkxSEsb"
|
||
},
|
||
"source": [
|
||
"#### Подготовка данных и конструирование признаков"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"metadata": {
|
||
"id": "qnyD6ZeLSGwL"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>CRS_DEP_TIME</th>\n",
|
||
" <th>DEP_TIME</th>\n",
|
||
" <th>DEP_DELAY</th>\n",
|
||
" <th>DEP_AFPH</th>\n",
|
||
" <th>DEP_RFPH</th>\n",
|
||
" <th>TAXI_OUT</th>\n",
|
||
" <th>WHEELS_OFF</th>\n",
|
||
" <th>CRS_ELAPSED_TIME</th>\n",
|
||
" <th>PCT_ELAPSED_TIME</th>\n",
|
||
" <th>DISTANCE</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>ARR_RFPH</th>\n",
|
||
" <th>CARRIER_DELAY</th>\n",
|
||
" <th>WEATHER_DELAY</th>\n",
|
||
" <th>NAS_DELAY</th>\n",
|
||
" <th>SECURITY_DELAY</th>\n",
|
||
" <th>LATE_AIRCRAFT_DELAY</th>\n",
|
||
" <th>DEP_MONTH</th>\n",
|
||
" <th>DEP_DOW</th>\n",
|
||
" <th>ORIGIN_HUB</th>\n",
|
||
" <th>DEST_HUB</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1155</td>\n",
|
||
" <td>1149.0</td>\n",
|
||
" <td>-6.0</td>\n",
|
||
" <td>34.444444</td>\n",
|
||
" <td>0.956790</td>\n",
|
||
" <td>14.0</td>\n",
|
||
" <td>1203.0</td>\n",
|
||
" <td>219.0</td>\n",
|
||
" <td>0.963470</td>\n",
|
||
" <td>1192.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.854573</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>705</td>\n",
|
||
" <td>700.0</td>\n",
|
||
" <td>-5.0</td>\n",
|
||
" <td>17.454545</td>\n",
|
||
" <td>0.242424</td>\n",
|
||
" <td>16.0</td>\n",
|
||
" <td>716.0</td>\n",
|
||
" <td>171.0</td>\n",
|
||
" <td>0.918129</td>\n",
|
||
" <td>1192.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.731707</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>1148</td>\n",
|
||
" <td>1145.0</td>\n",
|
||
" <td>-3.0</td>\n",
|
||
" <td>94.736842</td>\n",
|
||
" <td>0.947368</td>\n",
|
||
" <td>14.0</td>\n",
|
||
" <td>1159.0</td>\n",
|
||
" <td>212.0</td>\n",
|
||
" <td>0.971698</td>\n",
|
||
" <td>1558.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1.092437</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>825</td>\n",
|
||
" <td>824.0</td>\n",
|
||
" <td>-1.0</td>\n",
|
||
" <td>33.559322</td>\n",
|
||
" <td>0.860495</td>\n",
|
||
" <td>16.0</td>\n",
|
||
" <td>840.0</td>\n",
|
||
" <td>271.0</td>\n",
|
||
" <td>0.918819</td>\n",
|
||
" <td>1558.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.867379</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>1155</td>\n",
|
||
" <td>1147.0</td>\n",
|
||
" <td>-8.0</td>\n",
|
||
" <td>33.461538</td>\n",
|
||
" <td>0.929487</td>\n",
|
||
" <td>13.0</td>\n",
|
||
" <td>1200.0</td>\n",
|
||
" <td>99.0</td>\n",
|
||
" <td>0.969697</td>\n",
|
||
" <td>331.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1.006803</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>899522</th>\n",
|
||
" <td>1534</td>\n",
|
||
" <td>1530.0</td>\n",
|
||
" <td>-4.0</td>\n",
|
||
" <td>35.357143</td>\n",
|
||
" <td>0.822259</td>\n",
|
||
" <td>20.0</td>\n",
|
||
" <td>1550.0</td>\n",
|
||
" <td>100.0</td>\n",
|
||
" <td>0.990000</td>\n",
|
||
" <td>331.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.837945</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>12</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>899523</th>\n",
|
||
" <td>1751</td>\n",
|
||
" <td>1757.0</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>71.818182</td>\n",
|
||
" <td>1.040843</td>\n",
|
||
" <td>18.0</td>\n",
|
||
" <td>1815.0</td>\n",
|
||
" <td>181.0</td>\n",
|
||
" <td>0.972376</td>\n",
|
||
" <td>936.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.697674</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>12</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>899524</th>\n",
|
||
" <td>2015</td>\n",
|
||
" <td>2010.0</td>\n",
|
||
" <td>-5.0</td>\n",
|
||
" <td>63.272727</td>\n",
|
||
" <td>1.193825</td>\n",
|
||
" <td>36.0</td>\n",
|
||
" <td>2046.0</td>\n",
|
||
" <td>112.0</td>\n",
|
||
" <td>1.142857</td>\n",
|
||
" <td>511.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.482897</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>12</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>899525</th>\n",
|
||
" <td>1300</td>\n",
|
||
" <td>1323.0</td>\n",
|
||
" <td>23.0</td>\n",
|
||
" <td>70.843373</td>\n",
|
||
" <td>0.770037</td>\n",
|
||
" <td>11.0</td>\n",
|
||
" <td>1334.0</td>\n",
|
||
" <td>50.0</td>\n",
|
||
" <td>0.820000</td>\n",
|
||
" <td>130.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.888031</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>12</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>899526</th>\n",
|
||
" <td>1435</td>\n",
|
||
" <td>1443.0</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>19.411765</td>\n",
|
||
" <td>0.924370</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>1451.0</td>\n",
|
||
" <td>71.0</td>\n",
|
||
" <td>0.830986</td>\n",
|
||
" <td>130.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1.011905</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>12</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>899527 rows × 22 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" CRS_DEP_TIME DEP_TIME DEP_DELAY DEP_AFPH DEP_RFPH TAXI_OUT \\\n",
|
||
"0 1155 1149.0 -6.0 34.444444 0.956790 14.0 \n",
|
||
"1 705 700.0 -5.0 17.454545 0.242424 16.0 \n",
|
||
"2 1148 1145.0 -3.0 94.736842 0.947368 14.0 \n",
|
||
"3 825 824.0 -1.0 33.559322 0.860495 16.0 \n",
|
||
"4 1155 1147.0 -8.0 33.461538 0.929487 13.0 \n",
|
||
"... ... ... ... ... ... ... \n",
|
||
"899522 1534 1530.0 -4.0 35.357143 0.822259 20.0 \n",
|
||
"899523 1751 1757.0 6.0 71.818182 1.040843 18.0 \n",
|
||
"899524 2015 2010.0 -5.0 63.272727 1.193825 36.0 \n",
|
||
"899525 1300 1323.0 23.0 70.843373 0.770037 11.0 \n",
|
||
"899526 1435 1443.0 8.0 19.411765 0.924370 8.0 \n",
|
||
"\n",
|
||
" WHEELS_OFF CRS_ELAPSED_TIME PCT_ELAPSED_TIME DISTANCE ... \\\n",
|
||
"0 1203.0 219.0 0.963470 1192.0 ... \n",
|
||
"1 716.0 171.0 0.918129 1192.0 ... \n",
|
||
"2 1159.0 212.0 0.971698 1558.0 ... \n",
|
||
"3 840.0 271.0 0.918819 1558.0 ... \n",
|
||
"4 1200.0 99.0 0.969697 331.0 ... \n",
|
||
"... ... ... ... ... ... \n",
|
||
"899522 1550.0 100.0 0.990000 331.0 ... \n",
|
||
"899523 1815.0 181.0 0.972376 936.0 ... \n",
|
||
"899524 2046.0 112.0 1.142857 511.0 ... \n",
|
||
"899525 1334.0 50.0 0.820000 130.0 ... \n",
|
||
"899526 1451.0 71.0 0.830986 130.0 ... \n",
|
||
"\n",
|
||
" ARR_RFPH CARRIER_DELAY WEATHER_DELAY NAS_DELAY SECURITY_DELAY \\\n",
|
||
"0 0.854573 0.0 0.0 0.0 0.0 \n",
|
||
"1 0.731707 0.0 0.0 0.0 0.0 \n",
|
||
"2 1.092437 0.0 0.0 0.0 0.0 \n",
|
||
"3 0.867379 0.0 0.0 0.0 0.0 \n",
|
||
"4 1.006803 0.0 0.0 0.0 0.0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"899522 0.837945 0.0 0.0 0.0 0.0 \n",
|
||
"899523 0.697674 0.0 0.0 0.0 0.0 \n",
|
||
"899524 0.482897 0.0 0.0 0.0 0.0 \n",
|
||
"899525 0.888031 0.0 0.0 0.0 0.0 \n",
|
||
"899526 1.011905 0.0 0.0 0.0 0.0 \n",
|
||
"\n",
|
||
" LATE_AIRCRAFT_DELAY DEP_MONTH DEP_DOW ORIGIN_HUB DEST_HUB \n",
|
||
"0 0.0 1 0 1 1 \n",
|
||
"1 0.0 1 0 1 1 \n",
|
||
"2 0.0 1 0 0 1 \n",
|
||
"3 0.0 1 0 1 0 \n",
|
||
"4 0.0 1 0 1 1 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"899522 0.0 12 0 1 1 \n",
|
||
"899523 0.0 12 0 1 1 \n",
|
||
"899524 0.0 12 0 1 0 \n",
|
||
"899525 0.0 12 0 1 0 \n",
|
||
"899526 0.0 12 0 0 1 \n",
|
||
"\n",
|
||
"[899527 rows x 22 columns]"
|
||
]
|
||
},
|
||
"execution_count": 23,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = orig_df.copy()\n",
|
||
"# Преобразование даты из строки в datetime\n",
|
||
"df[\"PLANNED_DEP_DATETIME\"] = pd.to_datetime(df[\"PLANNED_DEP_DATETIME\"])\n",
|
||
"# Получение месяца и дня недели вылета из даты для учета сезонности и особенностей дня недели\n",
|
||
"df[\"DEP_MONTH\"] = df[\"PLANNED_DEP_DATETIME\"].dt.month\n",
|
||
"df[\"DEP_DOW\"] = df[\"PLANNED_DEP_DATETIME\"].dt.dayofweek\n",
|
||
"# Удаление столбца с датой\n",
|
||
"df = df.drop([\"PLANNED_DEP_DATETIME\"], axis=1)\n",
|
||
"# Список аэропортов-хабов\n",
|
||
"hubs = [\"CLT\", \"ORD\", \"DFW\", \"LAX\", \"MIA\", \"JFK\", \"LGA\", \"PHL\", \"PHX\", \"DCA\"]\n",
|
||
"# Определение признака хаба для аэропортов вылета и назначения\n",
|
||
"is_origin_hub = df[\"ORIGIN\"].isin(hubs)\n",
|
||
"is_dest_hub = df[\"DEST\"].isin(hubs)\n",
|
||
"# Установка признака хаба для данных\n",
|
||
"df[\"ORIGIN_HUB\"] = 0\n",
|
||
"df.loc[is_origin_hub, \"ORIGIN_HUB\"] = 1\n",
|
||
"df[\"DEST_HUB\"] = 0\n",
|
||
"df.loc[is_dest_hub, \"DEST_HUB\"] = 1\n",
|
||
"# Удаление лишних столбцов\n",
|
||
"df = df.drop([\"FL_NUM\", \"ORIGIN\", \"DEST\"], axis=1)\n",
|
||
"# Удаление столбца с общим временем задержки прибытия, так как данные значения будут иметь сильное влияние на результат\n",
|
||
"df = df.drop([\"ARR_DELAY\"], axis=1)\n",
|
||
"df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "y6f3Z4UwUAUI"
|
||
},
|
||
"source": [
|
||
"#### Формирование тестовой и обучающей выборок данных"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"metadata": {
|
||
"id": "6LudeeYUUEPt"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>CRS_DEP_TIME</th>\n",
|
||
" <th>DEP_TIME</th>\n",
|
||
" <th>DEP_DELAY</th>\n",
|
||
" <th>DEP_AFPH</th>\n",
|
||
" <th>DEP_RFPH</th>\n",
|
||
" <th>TAXI_OUT</th>\n",
|
||
" <th>WHEELS_OFF</th>\n",
|
||
" <th>CRS_ELAPSED_TIME</th>\n",
|
||
" <th>PCT_ELAPSED_TIME</th>\n",
|
||
" <th>DISTANCE</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>ARR_AFPH</th>\n",
|
||
" <th>ARR_RFPH</th>\n",
|
||
" <th>WEATHER_DELAY</th>\n",
|
||
" <th>NAS_DELAY</th>\n",
|
||
" <th>SECURITY_DELAY</th>\n",
|
||
" <th>LATE_AIRCRAFT_DELAY</th>\n",
|
||
" <th>DEP_MONTH</th>\n",
|
||
" <th>DEP_DOW</th>\n",
|
||
" <th>ORIGIN_HUB</th>\n",
|
||
" <th>DEST_HUB</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>31121</th>\n",
|
||
" <td>845</td>\n",
|
||
" <td>842.0</td>\n",
|
||
" <td>-3.0</td>\n",
|
||
" <td>16.842105</td>\n",
|
||
" <td>0.443213</td>\n",
|
||
" <td>21.0</td>\n",
|
||
" <td>903.0</td>\n",
|
||
" <td>106.0</td>\n",
|
||
" <td>0.886792</td>\n",
|
||
" <td>331.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>85.333333</td>\n",
|
||
" <td>1.145414</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>633500</th>\n",
|
||
" <td>1315</td>\n",
|
||
" <td>1316.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>4.918033</td>\n",
|
||
" <td>0.983607</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>1325.0</td>\n",
|
||
" <td>121.0</td>\n",
|
||
" <td>1.107438</td>\n",
|
||
" <td>624.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>111.891892</td>\n",
|
||
" <td>1.286114</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>747737</th>\n",
|
||
" <td>1710</td>\n",
|
||
" <td>1704.0</td>\n",
|
||
" <td>-6.0</td>\n",
|
||
" <td>55.555556</td>\n",
|
||
" <td>1.028807</td>\n",
|
||
" <td>14.0</td>\n",
|
||
" <td>1718.0</td>\n",
|
||
" <td>67.0</td>\n",
|
||
" <td>1.074627</td>\n",
|
||
" <td>304.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>17.288136</td>\n",
|
||
" <td>0.751658</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>298943</th>\n",
|
||
" <td>1840</td>\n",
|
||
" <td>1920.0</td>\n",
|
||
" <td>40.0</td>\n",
|
||
" <td>28.200000</td>\n",
|
||
" <td>0.587500</td>\n",
|
||
" <td>18.0</td>\n",
|
||
" <td>1938.0</td>\n",
|
||
" <td>161.0</td>\n",
|
||
" <td>0.888199</td>\n",
|
||
" <td>852.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>38.780488</td>\n",
|
||
" <td>1.157627</td>\n",
|
||
" <td>22.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>843932</th>\n",
|
||
" <td>1830</td>\n",
|
||
" <td>1822.0</td>\n",
|
||
" <td>-8.0</td>\n",
|
||
" <td>28.846154</td>\n",
|
||
" <td>0.901442</td>\n",
|
||
" <td>13.0</td>\n",
|
||
" <td>1835.0</td>\n",
|
||
" <td>215.0</td>\n",
|
||
" <td>0.930233</td>\n",
|
||
" <td>1192.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>90.810811</td>\n",
|
||
" <td>1.121121</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>12</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>720822</th>\n",
|
||
" <td>1359</td>\n",
|
||
" <td>1410.0</td>\n",
|
||
" <td>11.0</td>\n",
|
||
" <td>53.239437</td>\n",
|
||
" <td>0.995130</td>\n",
|
||
" <td>26.0</td>\n",
|
||
" <td>1436.0</td>\n",
|
||
" <td>147.0</td>\n",
|
||
" <td>1.006803</td>\n",
|
||
" <td>814.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>25.000000</td>\n",
|
||
" <td>1.136364</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>459253</th>\n",
|
||
" <td>2209</td>\n",
|
||
" <td>2207.0</td>\n",
|
||
" <td>-2.0</td>\n",
|
||
" <td>80.689655</td>\n",
|
||
" <td>1.021388</td>\n",
|
||
" <td>16.0</td>\n",
|
||
" <td>2223.0</td>\n",
|
||
" <td>86.0</td>\n",
|
||
" <td>0.918605</td>\n",
|
||
" <td>413.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2.352941</td>\n",
|
||
" <td>1.176471</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>711294</th>\n",
|
||
" <td>530</td>\n",
|
||
" <td>523.0</td>\n",
|
||
" <td>-7.0</td>\n",
|
||
" <td>12.452830</td>\n",
|
||
" <td>0.830189</td>\n",
|
||
" <td>17.0</td>\n",
|
||
" <td>540.0</td>\n",
|
||
" <td>119.0</td>\n",
|
||
" <td>0.957983</td>\n",
|
||
" <td>666.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>37.500000</td>\n",
|
||
" <td>0.892857</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>872796</th>\n",
|
||
" <td>709</td>\n",
|
||
" <td>706.0</td>\n",
|
||
" <td>-3.0</td>\n",
|
||
" <td>120.000000</td>\n",
|
||
" <td>1.030043</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" <td>718.0</td>\n",
|
||
" <td>169.0</td>\n",
|
||
" <td>1.295858</td>\n",
|
||
" <td>1120.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>12.336449</td>\n",
|
||
" <td>0.850790</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>47.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>12</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>516478</th>\n",
|
||
" <td>1925</td>\n",
|
||
" <td>2030.0</td>\n",
|
||
" <td>65.0</td>\n",
|
||
" <td>14.880000</td>\n",
|
||
" <td>1.352727</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>2038.0</td>\n",
|
||
" <td>135.0</td>\n",
|
||
" <td>0.977778</td>\n",
|
||
" <td>761.0</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>93.442623</td>\n",
|
||
" <td>1.112412</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>62.0</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>764597 rows × 21 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" CRS_DEP_TIME DEP_TIME DEP_DELAY DEP_AFPH DEP_RFPH TAXI_OUT \\\n",
|
||
"31121 845 842.0 -3.0 16.842105 0.443213 21.0 \n",
|
||
"633500 1315 1316.0 1.0 4.918033 0.983607 9.0 \n",
|
||
"747737 1710 1704.0 -6.0 55.555556 1.028807 14.0 \n",
|
||
"298943 1840 1920.0 40.0 28.200000 0.587500 18.0 \n",
|
||
"843932 1830 1822.0 -8.0 28.846154 0.901442 13.0 \n",
|
||
"... ... ... ... ... ... ... \n",
|
||
"720822 1359 1410.0 11.0 53.239437 0.995130 26.0 \n",
|
||
"459253 2209 2207.0 -2.0 80.689655 1.021388 16.0 \n",
|
||
"711294 530 523.0 -7.0 12.452830 0.830189 17.0 \n",
|
||
"872796 709 706.0 -3.0 120.000000 1.030043 12.0 \n",
|
||
"516478 1925 2030.0 65.0 14.880000 1.352727 8.0 \n",
|
||
"\n",
|
||
" WHEELS_OFF CRS_ELAPSED_TIME PCT_ELAPSED_TIME DISTANCE ... \\\n",
|
||
"31121 903.0 106.0 0.886792 331.0 ... \n",
|
||
"633500 1325.0 121.0 1.107438 624.0 ... \n",
|
||
"747737 1718.0 67.0 1.074627 304.0 ... \n",
|
||
"298943 1938.0 161.0 0.888199 852.0 ... \n",
|
||
"843932 1835.0 215.0 0.930233 1192.0 ... \n",
|
||
"... ... ... ... ... ... \n",
|
||
"720822 1436.0 147.0 1.006803 814.0 ... \n",
|
||
"459253 2223.0 86.0 0.918605 413.0 ... \n",
|
||
"711294 540.0 119.0 0.957983 666.0 ... \n",
|
||
"872796 718.0 169.0 1.295858 1120.0 ... \n",
|
||
"516478 2038.0 135.0 0.977778 761.0 ... \n",
|
||
"\n",
|
||
" ARR_AFPH ARR_RFPH WEATHER_DELAY NAS_DELAY SECURITY_DELAY \\\n",
|
||
"31121 85.333333 1.145414 0.0 0.0 0.0 \n",
|
||
"633500 111.891892 1.286114 0.0 0.0 0.0 \n",
|
||
"747737 17.288136 0.751658 0.0 0.0 0.0 \n",
|
||
"298943 38.780488 1.157627 22.0 0.0 0.0 \n",
|
||
"843932 90.810811 1.121121 0.0 0.0 0.0 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"720822 25.000000 1.136364 0.0 0.0 0.0 \n",
|
||
"459253 2.352941 1.176471 0.0 0.0 0.0 \n",
|
||
"711294 37.500000 0.892857 0.0 0.0 0.0 \n",
|
||
"872796 12.336449 0.850790 0.0 47.0 0.0 \n",
|
||
"516478 93.442623 1.112412 0.0 0.0 0.0 \n",
|
||
"\n",
|
||
" LATE_AIRCRAFT_DELAY DEP_MONTH DEP_DOW ORIGIN_HUB DEST_HUB \n",
|
||
"31121 0.0 1 6 1 1 \n",
|
||
"633500 0.0 9 4 0 1 \n",
|
||
"747737 0.0 10 1 1 0 \n",
|
||
"298943 0.0 5 5 0 1 \n",
|
||
"843932 0.0 12 5 1 1 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"720822 0.0 10 4 1 0 \n",
|
||
"459253 0.0 7 6 1 0 \n",
|
||
"711294 0.0 10 1 0 1 \n",
|
||
"872796 0.0 12 3 1 0 \n",
|
||
"516478 62.0 7 0 0 1 \n",
|
||
"\n",
|
||
"[764597 rows x 21 columns]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"31121 0.0\n",
|
||
"633500 0.0\n",
|
||
"747737 0.0\n",
|
||
"298943 0.0\n",
|
||
"843932 0.0\n",
|
||
" ... \n",
|
||
"720822 0.0\n",
|
||
"459253 0.0\n",
|
||
"711294 0.0\n",
|
||
"872796 0.0\n",
|
||
"516478 0.0\n",
|
||
"Name: CARRIER_DELAY, Length: 764597, dtype: float64"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"31121 0\n",
|
||
"633500 0\n",
|
||
"747737 0\n",
|
||
"298943 0\n",
|
||
"843932 0\n",
|
||
" ..\n",
|
||
"720822 0\n",
|
||
"459253 0\n",
|
||
"711294 0\n",
|
||
"872796 0\n",
|
||
"516478 0\n",
|
||
"Name: CARRIER_DELAY, Length: 764597, dtype: int64"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"Int64Index: 764597 entries, 31121 to 516478\n",
|
||
"Data columns (total 21 columns):\n",
|
||
" # Column Non-Null Count Dtype \n",
|
||
"--- ------ -------------- ----- \n",
|
||
" 0 CRS_DEP_TIME 764597 non-null int64 \n",
|
||
" 1 DEP_TIME 764597 non-null float64\n",
|
||
" 2 DEP_DELAY 764597 non-null float64\n",
|
||
" 3 DEP_AFPH 764597 non-null float64\n",
|
||
" 4 DEP_RFPH 764597 non-null float64\n",
|
||
" 5 TAXI_OUT 764597 non-null float64\n",
|
||
" 6 WHEELS_OFF 764597 non-null float64\n",
|
||
" 7 CRS_ELAPSED_TIME 764597 non-null float64\n",
|
||
" 8 PCT_ELAPSED_TIME 764597 non-null float64\n",
|
||
" 9 DISTANCE 764597 non-null float64\n",
|
||
" 10 CRS_ARR_TIME 764597 non-null int64 \n",
|
||
" 11 ARR_AFPH 764597 non-null float64\n",
|
||
" 12 ARR_RFPH 764597 non-null float64\n",
|
||
" 13 WEATHER_DELAY 764597 non-null float64\n",
|
||
" 14 NAS_DELAY 764597 non-null float64\n",
|
||
" 15 SECURITY_DELAY 764597 non-null float64\n",
|
||
" 16 LATE_AIRCRAFT_DELAY 764597 non-null float64\n",
|
||
" 17 DEP_MONTH 764597 non-null int64 \n",
|
||
" 18 DEP_DOW 764597 non-null int64 \n",
|
||
" 19 ORIGIN_HUB 764597 non-null int64 \n",
|
||
" 20 DEST_HUB 764597 non-null int64 \n",
|
||
"dtypes: float64(15), int64(6)\n",
|
||
"memory usage: 128.3 MB\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"from sklearn.model_selection import train_test_split\n",
|
||
"\n",
|
||
"# Задание фиксированного случайного состояния для воспроизводимости результатов\n",
|
||
"rand = 9\n",
|
||
"# Выделение признака, который модель должна предсказать\n",
|
||
"y = df[\"CARRIER_DELAY\"]\n",
|
||
"# Формирование множества признаков, на основе которых модель будет обучаться (удаление столбца с y)\n",
|
||
"X = df.drop([\"CARRIER_DELAY\"], axis=1).copy()\n",
|
||
"X_train, X_test, y_train_reg, y_test_reg = train_test_split(\n",
|
||
" X, y, test_size=0.15, random_state=rand\n",
|
||
")\n",
|
||
"# Создание классов для классификаторов в виде двоичных меток (опоздание свыше 15 минут - 1, иначе - 0)\n",
|
||
"y_train_class = y_train_reg.apply(lambda x: 1 if x > 15 else 0)\n",
|
||
"y_test_class = y_test_reg.apply(lambda x: 1 if x > 15 else 0)\n",
|
||
"\n",
|
||
"display(X_train)\n",
|
||
"display(y_train_reg)\n",
|
||
"display(y_train_class)\n",
|
||
"X_train.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "aVjZol-yrYbH"
|
||
},
|
||
"source": [
|
||
"#### Определение линейной корреляции признаков с целевым признаком с помощью корреляции Пирсона"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "_1e7XE_Orf0r",
|
||
"outputId": "efbb620c-c92b-481e-d613-a34633a4ba86"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"CARRIER_DELAY 1.000000\n",
|
||
"DEP_DELAY 0.703935\n",
|
||
"ARR_RFPH 0.101742\n",
|
||
"LATE_AIRCRAFT_DELAY 0.083166\n",
|
||
"DEP_RFPH 0.058659\n",
|
||
"ARR_AFPH 0.035135\n",
|
||
"DEP_TIME 0.030941\n",
|
||
"NAS_DELAY 0.026792\n",
|
||
"WHEELS_OFF 0.026787\n",
|
||
"TAXI_OUT 0.024635\n",
|
||
"PCT_ELAPSED_TIME 0.020980\n",
|
||
"CRS_DEP_TIME 0.016032\n",
|
||
"ORIGIN_HUB 0.015334\n",
|
||
"DEST_HUB 0.013932\n",
|
||
"DISTANCE 0.010680\n",
|
||
"DEP_MONTH 0.009728\n",
|
||
"CRS_ELAPSED_TIME 0.008801\n",
|
||
"DEP_DOW 0.007043\n",
|
||
"CRS_ARR_TIME 0.007029\n",
|
||
"DEP_AFPH 0.006053\n",
|
||
"WEATHER_DELAY 0.003002\n",
|
||
"SECURITY_DELAY 0.000460\n",
|
||
"Name: CARRIER_DELAY, dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 25,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"corr = df.corr()\n",
|
||
"abs(corr[\"CARRIER_DELAY\"]).sort_values(ascending=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "xxPlvpOdrnY_"
|
||
},
|
||
"source": [
|
||
"#### Использование регрессионных моделей для предсказания задержки рейса"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"metadata": {
|
||
"id": "2m81QO87rzal"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.pipeline import make_pipeline\n",
|
||
"from sklearn.preprocessing import PolynomialFeatures, StandardScaler\n",
|
||
"from sklearn import linear_model, tree, neighbors, ensemble, neural_network\n",
|
||
"\n",
|
||
"reg_models = {\n",
|
||
" # Обобщенные линейные модели (GLM-модели)\n",
|
||
" \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n",
|
||
" \"linear_poly\": {\n",
|
||
" \"model\": make_pipeline(\n",
|
||
" PolynomialFeatures(degree=2, interaction_only=False),\n",
|
||
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
|
||
" memory=None\n",
|
||
" )\n",
|
||
" },\n",
|
||
" \"linear_interact\": {\n",
|
||
" \"model\": make_pipeline(\n",
|
||
" PolynomialFeatures(degree=2, interaction_only=True),\n",
|
||
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
|
||
" memory=None\n",
|
||
" )\n",
|
||
" },\n",
|
||
" \"ridge\": {\"model\": linear_model.RidgeCV()},\n",
|
||
" # Деревья\n",
|
||
" \"decision_tree\": {\n",
|
||
" \"model\": tree.DecisionTreeRegressor(max_depth=7, random_state=rand)\n",
|
||
" },\n",
|
||
" # Ближайшие соседи\n",
|
||
" \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n",
|
||
" # Ансамблевые методы\n",
|
||
" \"random_forest\": {\n",
|
||
" \"model\": ensemble.RandomForestRegressor(\n",
|
||
" max_depth=7, random_state=rand, n_jobs=-1\n",
|
||
" )\n",
|
||
" },\n",
|
||
" # Нейронные сети\n",
|
||
" \"mlp\": {\n",
|
||
" \"model\": neural_network.MLPRegressor(\n",
|
||
" hidden_layer_sizes=(21,),\n",
|
||
" max_iter=500,\n",
|
||
" early_stopping=True,\n",
|
||
" random_state=rand,\n",
|
||
" )\n",
|
||
" },\n",
|
||
"}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "DFJudSP_tFec"
|
||
},
|
||
"source": [
|
||
"#### Обучение и оценка регрессионных моделей"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"metadata": {
|
||
"colab": {
|
||
"base_uri": "https://localhost:8080/"
|
||
},
|
||
"id": "XZEDVAR0tIzN",
|
||
"outputId": "4f1f55ff-85dd-49d9-ddaf-3758419bdd6a"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Model: linear\n",
|
||
"Model: linear_poly\n",
|
||
"Model: linear_interact\n",
|
||
"Model: ridge\n",
|
||
"Model: decision_tree\n",
|
||
"Model: knn\n",
|
||
"Model: random_forest\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"0.00s - Debugger warning: It seems that frozen modules are being used, which may\n",
|
||
"0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off\n",
|
||
"0.00s - to python to disable frozen modules.\n",
|
||
"0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Model: mlp\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import math\n",
|
||
"from sklearn import metrics\n",
|
||
"\n",
|
||
"for model_name in reg_models.keys():\n",
|
||
" print(f'Model: {model_name}')\n",
|
||
" fitted_model = reg_models[model_name][\"model\"].fit(\n",
|
||
" X_train.values, y_train_reg.to_numpy().ravel()\n",
|
||
" )\n",
|
||
" y_train_pred = fitted_model.predict(X_train.values)\n",
|
||
" y_test_pred = fitted_model.predict(X_test.values)\n",
|
||
" reg_models[model_name][\"fitted\"] = fitted_model\n",
|
||
" reg_models[model_name][\"preds\"] = y_test_pred\n",
|
||
" reg_models[model_name][\"RMSE_train\"] = math.sqrt(\n",
|
||
" metrics.mean_squared_error(y_train_reg, y_train_pred)\n",
|
||
" )\n",
|
||
" reg_models[model_name][\"RMSE_test\"] = math.sqrt(\n",
|
||
" metrics.mean_squared_error(y_test_reg, y_test_pred)\n",
|
||
" )\n",
|
||
" reg_models[model_name][\"R2_test\"] = metrics.r2_score(y_test_reg, y_test_pred)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "6GD_HZhGHPXK"
|
||
},
|
||
"source": [
|
||
"#### Вывод оценки в виде таблицы"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"metadata": {
|
||
"id": "lvcbKDfmHQ6p"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<style type=\"text/css\">\n",
|
||
"#T_b990e_row0_col0, #T_b990e_row0_col1 {\n",
|
||
" background-color: #26818e;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_b990e_row0_col2 {\n",
|
||
" background-color: #da5a6a;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_b990e_row1_col0 {\n",
|
||
" background-color: #25ac82;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_b990e_row1_col1 {\n",
|
||
" background-color: #2cb17e;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_b990e_row1_col2 {\n",
|
||
" background-color: #b42e8d;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_b990e_row2_col0 {\n",
|
||
" background-color: #4cc26c;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_b990e_row2_col1 {\n",
|
||
" background-color: #32b67a;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_b990e_row2_col2 {\n",
|
||
" background-color: #b02991;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_b990e_row3_col0 {\n",
|
||
" background-color: #58c765;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_b990e_row3_col1 {\n",
|
||
" background-color: #38b977;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_b990e_row3_col2 {\n",
|
||
" background-color: #aa2395;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_b990e_row4_col0 {\n",
|
||
" background-color: #5cc863;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_b990e_row4_col1 {\n",
|
||
" background-color: #56c667;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_b990e_row4_col2 {\n",
|
||
" background-color: #920fa3;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_b990e_row5_col0, #T_b990e_row6_col0, #T_b990e_row7_col1 {\n",
|
||
" background-color: #a8db34;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_b990e_row5_col1 {\n",
|
||
" background-color: #67cc5c;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_b990e_row5_col2, #T_b990e_row6_col2 {\n",
|
||
" background-color: #8405a7;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_b990e_row6_col1 {\n",
|
||
" background-color: #69cd5b;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_b990e_row7_col0 {\n",
|
||
" background-color: #8bd646;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_b990e_row7_col2 {\n",
|
||
" background-color: #4e02a2;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"</style>\n",
|
||
"<table id=\"T_b990e\">\n",
|
||
" <thead>\n",
|
||
" <tr>\n",
|
||
" <th class=\"blank level0\" > </th>\n",
|
||
" <th id=\"T_b990e_level0_col0\" class=\"col_heading level0 col0\" >RMSE_train</th>\n",
|
||
" <th id=\"T_b990e_level0_col1\" class=\"col_heading level0 col1\" >RMSE_test</th>\n",
|
||
" <th id=\"T_b990e_level0_col2\" class=\"col_heading level0 col2\" >R2_test</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_b990e_level0_row0\" class=\"row_heading level0 row0\" >mlp</th>\n",
|
||
" <td id=\"T_b990e_row0_col0\" class=\"data row0 col0\" >3.243516</td>\n",
|
||
" <td id=\"T_b990e_row0_col1\" class=\"data row0 col1\" >3.308597</td>\n",
|
||
" <td id=\"T_b990e_row0_col2\" class=\"data row0 col2\" >0.987025</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_b990e_level0_row1\" class=\"row_heading level0 row1\" >random_forest</th>\n",
|
||
" <td id=\"T_b990e_row1_col0\" class=\"data row1 col0\" >5.143267</td>\n",
|
||
" <td id=\"T_b990e_row1_col1\" class=\"data row1 col1\" >6.088249</td>\n",
|
||
" <td id=\"T_b990e_row1_col2\" class=\"data row1 col2\" >0.956065</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_b990e_level0_row2\" class=\"row_heading level0 row2\" >linear_poly</th>\n",
|
||
" <td id=\"T_b990e_row2_col0\" class=\"data row2 col0\" >6.214010</td>\n",
|
||
" <td id=\"T_b990e_row2_col1\" class=\"data row2 col1\" >6.339843</td>\n",
|
||
" <td id=\"T_b990e_row2_col2\" class=\"data row2 col2\" >0.952359</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_b990e_level0_row3\" class=\"row_heading level0 row3\" >linear_interact</th>\n",
|
||
" <td id=\"T_b990e_row3_col0\" class=\"data row3 col0\" >6.454314</td>\n",
|
||
" <td id=\"T_b990e_row3_col1\" class=\"data row3 col1\" >6.562284</td>\n",
|
||
" <td id=\"T_b990e_row3_col2\" class=\"data row3 col2\" >0.948957</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_b990e_level0_row4\" class=\"row_heading level0 row4\" >decision_tree</th>\n",
|
||
" <td id=\"T_b990e_row4_col0\" class=\"data row4 col0\" >6.542924</td>\n",
|
||
" <td id=\"T_b990e_row4_col1\" class=\"data row4 col1\" >7.456335</td>\n",
|
||
" <td id=\"T_b990e_row4_col2\" class=\"data row4 col2\" >0.934102</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_b990e_level0_row5\" class=\"row_heading level0 row5\" >linear</th>\n",
|
||
" <td id=\"T_b990e_row5_col0\" class=\"data row5 col0\" >7.819643</td>\n",
|
||
" <td id=\"T_b990e_row5_col1\" class=\"data row5 col1\" >7.882875</td>\n",
|
||
" <td id=\"T_b990e_row5_col2\" class=\"data row5 col2\" >0.926347</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_b990e_level0_row6\" class=\"row_heading level0 row6\" >ridge</th>\n",
|
||
" <td id=\"T_b990e_row6_col0\" class=\"data row6 col0\" >7.832066</td>\n",
|
||
" <td id=\"T_b990e_row6_col1\" class=\"data row6 col1\" >7.898189</td>\n",
|
||
" <td id=\"T_b990e_row6_col2\" class=\"data row6 col2\" >0.926060</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_b990e_level0_row7\" class=\"row_heading level0 row7\" >knn</th>\n",
|
||
" <td id=\"T_b990e_row7_col0\" class=\"data row7 col0\" >7.360098</td>\n",
|
||
" <td id=\"T_b990e_row7_col1\" class=\"data row7 col1\" >9.259422</td>\n",
|
||
" <td id=\"T_b990e_row7_col2\" class=\"data row7 col2\" >0.898377</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n"
|
||
],
|
||
"text/plain": [
|
||
"<pandas.io.formats.style.Styler at 0x31847a090>"
|
||
]
|
||
},
|
||
"execution_count": 28,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"reg_metrics = pd.DataFrame.from_dict(reg_models, \"index\")[\n",
|
||
" [\"RMSE_train\", \"RMSE_test\", \"R2_test\"]\n",
|
||
"]\n",
|
||
"reg_metrics.sort_values(by=\"RMSE_test\").style.background_gradient(\n",
|
||
" cmap=\"viridis\", low=1, high=0.3, subset=[\"RMSE_train\", \"RMSE_test\"]\n",
|
||
").background_gradient(cmap=\"plasma\", low=0.3, high=1, subset=[\"R2_test\"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "MZrTDbnjJMrB"
|
||
},
|
||
"source": [
|
||
"#### Использование классификаторов для предсказания задержки рейса"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"metadata": {
|
||
"id": "c_1jMq5IJMSL"
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn import naive_bayes\n",
|
||
"\n",
|
||
"class_models = {\n",
|
||
" # Обобщенные линейные модели (GLM-модели)\n",
|
||
" \"logistic\": {\"model\": linear_model.LogisticRegression()},\n",
|
||
" \"ridge\": {\n",
|
||
" \"model\": linear_model.LogisticRegression(penalty=\"l2\", class_weight=\"balanced\")\n",
|
||
" },\n",
|
||
" # Дерево\n",
|
||
" \"decision_tree\": {\n",
|
||
" \"model\": tree.DecisionTreeClassifier(max_depth=7, random_state=rand)\n",
|
||
" },\n",
|
||
" # Ближайшие соседи\n",
|
||
" \"knn\": {\"model\": neighbors.KNeighborsClassifier(n_neighbors=7)},\n",
|
||
" # Наивный Байес\n",
|
||
" \"naive_bayes\": {\"model\": naive_bayes.GaussianNB()},\n",
|
||
" # Ансамблевые методы\n",
|
||
" \"gradient_boosting\": {\n",
|
||
" \"model\": ensemble.GradientBoostingClassifier(n_estimators=210)\n",
|
||
" },\n",
|
||
" \"random_forest\": {\n",
|
||
" \"model\": ensemble.RandomForestClassifier(\n",
|
||
" max_depth=11, class_weight=\"balanced\", random_state=rand\n",
|
||
" )\n",
|
||
" },\n",
|
||
" # Нейронные сети\n",
|
||
" \"mlp\": {\n",
|
||
" \"model\": make_pipeline(\n",
|
||
" StandardScaler(),\n",
|
||
" neural_network.MLPClassifier(\n",
|
||
" hidden_layer_sizes=(7,),\n",
|
||
" max_iter=500,\n",
|
||
" early_stopping=True,\n",
|
||
" random_state=rand,\n",
|
||
" ),\n",
|
||
" memory=None\n",
|
||
" )\n",
|
||
" },\n",
|
||
"}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "rMzuL4OxKAns"
|
||
},
|
||
"source": [
|
||
"#### Определение сбалансированности выборки для классификации"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"metadata": {
|
||
"id": "BOVmTCPHJ5Au"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"0.061283264255549"
|
||
]
|
||
},
|
||
"execution_count": 30,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"y_train_class[y_train_class == 1].shape[0] / y_train_class.shape[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "H8Z7-KugJ7xR"
|
||
},
|
||
"source": [
|
||
"#### Обучение и оценка классификаторов"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"metadata": {
|
||
"id": "RyR1_5m4KBTs"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Model: logistic\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/Users/user/Projects/python/ckexp/.venv/lib/python3.11/site-packages/sklearn/linear_model/_logistic.py:465: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
|
||
"STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.\n",
|
||
"\n",
|
||
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
|
||
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
|
||
"Please also refer to the documentation for alternative solver options:\n",
|
||
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
|
||
" n_iter_i = _check_optimize_result(\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Model: ridge\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"/Users/user/Projects/python/ckexp/.venv/lib/python3.11/site-packages/sklearn/linear_model/_logistic.py:465: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
|
||
"STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.\n",
|
||
"\n",
|
||
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
|
||
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
|
||
"Please also refer to the documentation for alternative solver options:\n",
|
||
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
|
||
" n_iter_i = _check_optimize_result(\n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Model: decision_tree\n",
|
||
"Model: knn\n",
|
||
"Model: naive_bayes\n",
|
||
"Model: gradient_boosting\n",
|
||
"Model: random_forest\n",
|
||
"Model: mlp\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import numpy as np\n",
|
||
"\n",
|
||
"for model_name in class_models.keys():\n",
|
||
" print(f\"Model: {model_name}\")\n",
|
||
" fitted_model = class_models[model_name][\"model\"].fit(\n",
|
||
" X_train.values,\n",
|
||
" y_train_class.to_numpy().ravel(),\n",
|
||
" )\n",
|
||
" y_train_pred = fitted_model.predict(X_train.values)\n",
|
||
" y_test_prob = fitted_model.predict_proba(X_test.values)[:, 1]\n",
|
||
" y_test_pred = fitted_model.predict(X_test.values)\n",
|
||
"\n",
|
||
" class_models[model_name][\"fitted\"] = fitted_model\n",
|
||
" class_models[model_name][\"probs\"] = y_test_prob\n",
|
||
" class_models[model_name][\"preds\"] = y_test_pred\n",
|
||
"\n",
|
||
" class_models[model_name][\"Accuracy_train\"] = metrics.accuracy_score(\n",
|
||
" y_train_class, y_train_pred\n",
|
||
" )\n",
|
||
" class_models[model_name][\"Accuracy_test\"] = metrics.accuracy_score(\n",
|
||
" y_test_class, y_test_pred\n",
|
||
" )\n",
|
||
" class_models[model_name][\"Recall_train\"] = metrics.recall_score(\n",
|
||
" y_train_class, y_train_pred\n",
|
||
" )\n",
|
||
" class_models[model_name][\"Recall_test\"] = metrics.recall_score(\n",
|
||
" y_test_class, y_test_pred\n",
|
||
" )\n",
|
||
" class_models[model_name][\"ROC_AUC_test\"] = metrics.roc_auc_score(\n",
|
||
" y_test_class, y_test_prob\n",
|
||
" )\n",
|
||
" class_models[model_name][\"F1_test\"] = metrics.f1_score(y_test_class, y_test_pred)\n",
|
||
" class_models[model_name][\"MCC_test\"] = metrics.matthews_corrcoef(\n",
|
||
" y_test_class, y_test_pred\n",
|
||
" )"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"id": "NzCyXXXFKdx2"
|
||
},
|
||
"source": [
|
||
"#### Вывод оценки в виде таблицы"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"metadata": {
|
||
"id": "VOhaFiEYKeN5"
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<style type=\"text/css\">\n",
|
||
"#T_93e7c_row0_col0, #T_93e7c_row0_col1 {\n",
|
||
" background-color: #da5a6a;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_93e7c_row0_col2 {\n",
|
||
" background-color: #a2da37;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_93e7c_row0_col3, #T_93e7c_row0_col4, #T_93e7c_row0_col5, #T_93e7c_row0_col6, #T_93e7c_row1_col4, #T_93e7c_row2_col2, #T_93e7c_row2_col3 {\n",
|
||
" background-color: #a8db34;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_93e7c_row1_col0, #T_93e7c_row1_col1 {\n",
|
||
" background-color: #d14e72;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_93e7c_row1_col2 {\n",
|
||
" background-color: #7fd34e;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_93e7c_row1_col3 {\n",
|
||
" background-color: #81d34d;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_93e7c_row1_col5, #T_93e7c_row1_col6 {\n",
|
||
" background-color: #8ed645;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_93e7c_row2_col0 {\n",
|
||
" background-color: #7401a8;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_93e7c_row2_col1 {\n",
|
||
" background-color: #7100a8;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_93e7c_row2_col4, #T_93e7c_row3_col4 {\n",
|
||
" background-color: #a0da39;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_93e7c_row2_col5 {\n",
|
||
" background-color: #35b779;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_93e7c_row2_col6 {\n",
|
||
" background-color: #3dbc74;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_93e7c_row3_col0 {\n",
|
||
" background-color: #c5407e;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_93e7c_row3_col1 {\n",
|
||
" background-color: #c43e7f;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_93e7c_row3_col2, #T_93e7c_row3_col3 {\n",
|
||
" background-color: #70cf57;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_93e7c_row3_col5, #T_93e7c_row3_col6 {\n",
|
||
" background-color: #73d056;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_93e7c_row4_col0 {\n",
|
||
" background-color: #7a02a8;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_93e7c_row4_col1 {\n",
|
||
" background-color: #7701a8;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_93e7c_row4_col2, #T_93e7c_row4_col3 {\n",
|
||
" background-color: #93d741;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_93e7c_row4_col4 {\n",
|
||
" background-color: #90d743;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_93e7c_row4_col5 {\n",
|
||
" background-color: #34b679;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_93e7c_row4_col6, #T_93e7c_row5_col2, #T_93e7c_row6_col2 {\n",
|
||
" background-color: #3aba76;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_93e7c_row5_col0 {\n",
|
||
" background-color: #b83289;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_93e7c_row5_col1 {\n",
|
||
" background-color: #b7318a;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_93e7c_row5_col3 {\n",
|
||
" background-color: #3bbb75;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_93e7c_row5_col4 {\n",
|
||
" background-color: #6ece58;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_93e7c_row5_col5 {\n",
|
||
" background-color: #52c569;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_93e7c_row5_col6 {\n",
|
||
" background-color: #54c568;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_93e7c_row6_col0 {\n",
|
||
" background-color: #b42e8d;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_93e7c_row6_col1 {\n",
|
||
" background-color: #a62098;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_93e7c_row6_col3 {\n",
|
||
" background-color: #2ab07f;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_93e7c_row6_col4 {\n",
|
||
" background-color: #5ec962;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_93e7c_row6_col5, #T_93e7c_row6_col6 {\n",
|
||
" background-color: #38b977;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_93e7c_row7_col0, #T_93e7c_row7_col1 {\n",
|
||
" background-color: #4e02a2;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_93e7c_row7_col2, #T_93e7c_row7_col3, #T_93e7c_row7_col4, #T_93e7c_row7_col5, #T_93e7c_row7_col6 {\n",
|
||
" background-color: #26818e;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"</style>\n",
|
||
"<table id=\"T_93e7c\">\n",
|
||
" <thead>\n",
|
||
" <tr>\n",
|
||
" <th class=\"blank level0\" > </th>\n",
|
||
" <th id=\"T_93e7c_level0_col0\" class=\"col_heading level0 col0\" >Accuracy_train</th>\n",
|
||
" <th id=\"T_93e7c_level0_col1\" class=\"col_heading level0 col1\" >Accuracy_test</th>\n",
|
||
" <th id=\"T_93e7c_level0_col2\" class=\"col_heading level0 col2\" >Recall_train</th>\n",
|
||
" <th id=\"T_93e7c_level0_col3\" class=\"col_heading level0 col3\" >Recall_test</th>\n",
|
||
" <th id=\"T_93e7c_level0_col4\" class=\"col_heading level0 col4\" >ROC_AUC_test</th>\n",
|
||
" <th id=\"T_93e7c_level0_col5\" class=\"col_heading level0 col5\" >F1_test</th>\n",
|
||
" <th id=\"T_93e7c_level0_col6\" class=\"col_heading level0 col6\" >MCC_test</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_93e7c_level0_row0\" class=\"row_heading level0 row0\" >mlp</th>\n",
|
||
" <td id=\"T_93e7c_row0_col0\" class=\"data row0 col0\" >0.998482</td>\n",
|
||
" <td id=\"T_93e7c_row0_col1\" class=\"data row0 col1\" >0.998555</td>\n",
|
||
" <td id=\"T_93e7c_row0_col2\" class=\"data row0 col2\" >0.987131</td>\n",
|
||
" <td id=\"T_93e7c_row0_col3\" class=\"data row0 col3\" >0.988865</td>\n",
|
||
" <td id=\"T_93e7c_row0_col4\" class=\"data row0 col4\" >0.999877</td>\n",
|
||
" <td id=\"T_93e7c_row0_col5\" class=\"data row0 col5\" >0.988207</td>\n",
|
||
" <td id=\"T_93e7c_row0_col6\" class=\"data row0 col6\" >0.987437</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_93e7c_level0_row1\" class=\"row_heading level0 row1\" >gradient_boosting</th>\n",
|
||
" <td id=\"T_93e7c_row1_col0\" class=\"data row1 col0\" >0.991725</td>\n",
|
||
" <td id=\"T_93e7c_row1_col1\" class=\"data row1 col1\" >0.991662</td>\n",
|
||
" <td id=\"T_93e7c_row1_col2\" class=\"data row1 col2\" >0.892930</td>\n",
|
||
" <td id=\"T_93e7c_row1_col3\" class=\"data row1 col3\" >0.893851</td>\n",
|
||
" <td id=\"T_93e7c_row1_col4\" class=\"data row1 col4\" >0.998885</td>\n",
|
||
" <td id=\"T_93e7c_row1_col5\" class=\"data row1 col5\" >0.929223</td>\n",
|
||
" <td id=\"T_93e7c_row1_col6\" class=\"data row1 col6\" >0.925619</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_93e7c_level0_row2\" class=\"row_heading level0 row2\" >random_forest</th>\n",
|
||
" <td id=\"T_93e7c_row2_col0\" class=\"data row2 col0\" >0.941166</td>\n",
|
||
" <td id=\"T_93e7c_row2_col1\" class=\"data row2 col1\" >0.940325</td>\n",
|
||
" <td id=\"T_93e7c_row2_col2\" class=\"data row2 col2\" >0.999552</td>\n",
|
||
" <td id=\"T_93e7c_row2_col3\" class=\"data row2 col3\" >0.992375</td>\n",
|
||
" <td id=\"T_93e7c_row2_col4\" class=\"data row2 col4\" >0.995145</td>\n",
|
||
" <td id=\"T_93e7c_row2_col5\" class=\"data row2 col5\" >0.670675</td>\n",
|
||
" <td id=\"T_93e7c_row2_col6\" class=\"data row2 col6\" >0.685702</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_93e7c_level0_row3\" class=\"row_heading level0 row3\" >decision_tree</th>\n",
|
||
" <td id=\"T_93e7c_row3_col0\" class=\"data row3 col0\" >0.983297</td>\n",
|
||
" <td id=\"T_93e7c_row3_col1\" class=\"data row3 col1\" >0.982895</td>\n",
|
||
" <td id=\"T_93e7c_row3_col2\" class=\"data row3 col2\" >0.856969</td>\n",
|
||
" <td id=\"T_93e7c_row3_col3\" class=\"data row3 col3\" >0.852215</td>\n",
|
||
" <td id=\"T_93e7c_row3_col4\" class=\"data row3 col4\" >0.994932</td>\n",
|
||
" <td id=\"T_93e7c_row3_col5\" class=\"data row3 col5\" >0.859182</td>\n",
|
||
" <td id=\"T_93e7c_row3_col6\" class=\"data row3 col6\" >0.850110</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_93e7c_level0_row4\" class=\"row_heading level0 row4\" >ridge</th>\n",
|
||
" <td id=\"T_93e7c_row4_col0\" class=\"data row4 col0\" >0.943453</td>\n",
|
||
" <td id=\"T_93e7c_row4_col1\" class=\"data row4 col1\" >0.942526</td>\n",
|
||
" <td id=\"T_93e7c_row4_col2\" class=\"data row4 col2\" >0.945579</td>\n",
|
||
" <td id=\"T_93e7c_row4_col3\" class=\"data row4 col3\" >0.940934</td>\n",
|
||
" <td id=\"T_93e7c_row4_col4\" class=\"data row4 col4\" >0.983777</td>\n",
|
||
" <td id=\"T_93e7c_row4_col5\" class=\"data row4 col5\" >0.667210</td>\n",
|
||
" <td id=\"T_93e7c_row4_col6\" class=\"data row4 col6\" >0.673110</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_93e7c_level0_row5\" class=\"row_heading level0 row5\" >logistic</th>\n",
|
||
" <td id=\"T_93e7c_row5_col0\" class=\"data row5 col0\" >0.975054</td>\n",
|
||
" <td id=\"T_93e7c_row5_col1\" class=\"data row5 col1\" >0.975031</td>\n",
|
||
" <td id=\"T_93e7c_row5_col2\" class=\"data row5 col2\" >0.683292</td>\n",
|
||
" <td id=\"T_93e7c_row5_col3\" class=\"data row5 col3\" >0.680828</td>\n",
|
||
" <td id=\"T_93e7c_row5_col4\" class=\"data row5 col4\" >0.960287</td>\n",
|
||
" <td id=\"T_93e7c_row5_col5\" class=\"data row5 col5\" >0.769546</td>\n",
|
||
" <td id=\"T_93e7c_row5_col6\" class=\"data row5 col6\" >0.763854</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_93e7c_level0_row6\" class=\"row_heading level0 row6\" >knn</th>\n",
|
||
" <td id=\"T_93e7c_row6_col0\" class=\"data row6 col0\" >0.972886</td>\n",
|
||
" <td id=\"T_93e7c_row6_col1\" class=\"data row6 col1\" >0.965123</td>\n",
|
||
" <td id=\"T_93e7c_row6_col2\" class=\"data row6 col2\" >0.680645</td>\n",
|
||
" <td id=\"T_93e7c_row6_col3\" class=\"data row6 col3\" >0.607722</td>\n",
|
||
" <td id=\"T_93e7c_row6_col4\" class=\"data row6 col4\" >0.948387</td>\n",
|
||
" <td id=\"T_93e7c_row6_col5\" class=\"data row6 col5\" >0.680906</td>\n",
|
||
" <td id=\"T_93e7c_row6_col6\" class=\"data row6 col6\" >0.668176</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_93e7c_level0_row7\" class=\"row_heading level0 row7\" >naive_bayes</th>\n",
|
||
" <td id=\"T_93e7c_row7_col0\" class=\"data row7 col0\" >0.925119</td>\n",
|
||
" <td id=\"T_93e7c_row7_col1\" class=\"data row7 col1\" >0.925539</td>\n",
|
||
" <td id=\"T_93e7c_row7_col2\" class=\"data row7 col2\" >0.279126</td>\n",
|
||
" <td id=\"T_93e7c_row7_col3\" class=\"data row7 col3\" >0.274268</td>\n",
|
||
" <td id=\"T_93e7c_row7_col4\" class=\"data row7 col4\" >0.811869</td>\n",
|
||
" <td id=\"T_93e7c_row7_col5\" class=\"data row7 col5\" >0.310858</td>\n",
|
||
" <td id=\"T_93e7c_row7_col6\" class=\"data row7 col6\" >0.274984</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n"
|
||
],
|
||
"text/plain": [
|
||
"<pandas.io.formats.style.Styler at 0x32ad627d0>"
|
||
]
|
||
},
|
||
"execution_count": 32,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"class_metrics = pd.DataFrame.from_dict(class_models, \"index\")[\n",
|
||
" [\n",
|
||
" \"Accuracy_train\",\n",
|
||
" \"Accuracy_test\",\n",
|
||
" \"Recall_train\",\n",
|
||
" \"Recall_test\",\n",
|
||
" \"ROC_AUC_test\",\n",
|
||
" \"F1_test\",\n",
|
||
" \"MCC_test\",\n",
|
||
" ]\n",
|
||
"]\n",
|
||
"class_metrics.sort_values(by=\"ROC_AUC_test\", ascending=False).style.background_gradient(\n",
|
||
" cmap=\"plasma\", low=0.3, high=1, subset=[\"Accuracy_train\", \"Accuracy_test\"]\n",
|
||
").background_gradient(\n",
|
||
" cmap=\"viridis\",\n",
|
||
" low=1,\n",
|
||
" high=0.3,\n",
|
||
" subset=[\"Recall_train\", \"Recall_test\", \"ROC_AUC_test\", \"F1_test\", \"MCC_test\"],\n",
|
||
")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"#### Интерпретация результатов для моделей на основе \"белого ящика\""
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Линейная регрессия"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"coefficients:\t[ 4.54984539e-03 -5.25067742e-03 8.94125541e-01 -1.52961053e-02\n",
|
||
" -4.69623002e-01 1.25277815e-01 -6.46744472e-04 -1.26240049e-02\n",
|
||
" 4.50112895e+01 6.76385421e-04 -3.69920254e-04 5.47855860e-04\n",
|
||
" 3.73866548e-01 -9.06364154e-01 -6.74052666e-01 -9.17411191e-01\n",
|
||
" -9.29843952e-01 -3.96621856e-02 -1.79666480e-02 -1.02912927e+00\n",
|
||
" -3.94934854e-01]\n",
|
||
"intercept:\t-37.86177932752649\n",
|
||
"y = -37.86 + 0.0045X1 + -0.0053X2 + 0.894X3 + ...\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"coefs_lm = reg_models[\"linear\"][\"fitted\"].coef_\n",
|
||
"intercept_lm = reg_models[\"linear\"][\"fitted\"].intercept_\n",
|
||
"print(\"coefficients:\\t%s\" % coefs_lm)\n",
|
||
"print(\"intercept:\\t%s\" % intercept_lm)\n",
|
||
"print(\n",
|
||
" \"y = %0.2f + %0.4fX1 + %0.4fX2 + %0.3fX3 + ...\"\n",
|
||
" % (intercept_lm, coefs_lm[0], coefs_lm[1], coefs_lm[2])\n",
|
||
")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 34,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>feature</th>\n",
|
||
" <th>coef</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>CRS_DEP_TIME</td>\n",
|
||
" <td>0.004550</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>DEP_TIME</td>\n",
|
||
" <td>-0.005251</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>DEP_DELAY</td>\n",
|
||
" <td>0.894126</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>DEP_AFPH</td>\n",
|
||
" <td>-0.015296</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>DEP_RFPH</td>\n",
|
||
" <td>-0.469623</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>TAXI_OUT</td>\n",
|
||
" <td>0.125278</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>WHEELS_OFF</td>\n",
|
||
" <td>-0.000647</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>CRS_ELAPSED_TIME</td>\n",
|
||
" <td>-0.012624</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>PCT_ELAPSED_TIME</td>\n",
|
||
" <td>45.011289</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>DISTANCE</td>\n",
|
||
" <td>0.000676</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>10</th>\n",
|
||
" <td>CRS_ARR_TIME</td>\n",
|
||
" <td>-0.000370</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>11</th>\n",
|
||
" <td>ARR_AFPH</td>\n",
|
||
" <td>0.000548</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>12</th>\n",
|
||
" <td>ARR_RFPH</td>\n",
|
||
" <td>0.373867</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>13</th>\n",
|
||
" <td>WEATHER_DELAY</td>\n",
|
||
" <td>-0.906364</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>14</th>\n",
|
||
" <td>NAS_DELAY</td>\n",
|
||
" <td>-0.674053</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>15</th>\n",
|
||
" <td>SECURITY_DELAY</td>\n",
|
||
" <td>-0.917411</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>16</th>\n",
|
||
" <td>LATE_AIRCRAFT_DELAY</td>\n",
|
||
" <td>-0.929844</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>17</th>\n",
|
||
" <td>DEP_MONTH</td>\n",
|
||
" <td>-0.039662</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>18</th>\n",
|
||
" <td>DEP_DOW</td>\n",
|
||
" <td>-0.017967</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>19</th>\n",
|
||
" <td>ORIGIN_HUB</td>\n",
|
||
" <td>-1.029129</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>20</th>\n",
|
||
" <td>DEST_HUB</td>\n",
|
||
" <td>-0.394935</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" feature coef\n",
|
||
"0 CRS_DEP_TIME 0.004550\n",
|
||
"1 DEP_TIME -0.005251\n",
|
||
"2 DEP_DELAY 0.894126\n",
|
||
"3 DEP_AFPH -0.015296\n",
|
||
"4 DEP_RFPH -0.469623\n",
|
||
"5 TAXI_OUT 0.125278\n",
|
||
"6 WHEELS_OFF -0.000647\n",
|
||
"7 CRS_ELAPSED_TIME -0.012624\n",
|
||
"8 PCT_ELAPSED_TIME 45.011289\n",
|
||
"9 DISTANCE 0.000676\n",
|
||
"10 CRS_ARR_TIME -0.000370\n",
|
||
"11 ARR_AFPH 0.000548\n",
|
||
"12 ARR_RFPH 0.373867\n",
|
||
"13 WEATHER_DELAY -0.906364\n",
|
||
"14 NAS_DELAY -0.674053\n",
|
||
"15 SECURITY_DELAY -0.917411\n",
|
||
"16 LATE_AIRCRAFT_DELAY -0.929844\n",
|
||
"17 DEP_MONTH -0.039662\n",
|
||
"18 DEP_DOW -0.017967\n",
|
||
"19 ORIGIN_HUB -1.029129\n",
|
||
"20 DEST_HUB -0.394935"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"coef_df = pd.DataFrame({\"feature\": X_train.columns.values.tolist(), \"coef\": coefs_lm})\n",
|
||
"display(coef_df)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 35,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<style type=\"text/css\">\n",
|
||
"#T_be19c_row0_col4, #T_be19c_row1_col4, #T_be19c_row2_col4, #T_be19c_row3_col4, #T_be19c_row4_col4, #T_be19c_row5_col4, #T_be19c_row6_col4, #T_be19c_row7_col4, #T_be19c_row8_col4, #T_be19c_row9_col4, #T_be19c_row10_col4, #T_be19c_row11_col4, #T_be19c_row12_col4, #T_be19c_row13_col4, #T_be19c_row14_col4, #T_be19c_row15_col4, #T_be19c_row16_col4, #T_be19c_row17_col4, #T_be19c_row17_col7, #T_be19c_row18_col4, #T_be19c_row18_col7, #T_be19c_row19_col4, #T_be19c_row19_col7, #T_be19c_row20_col7 {\n",
|
||
" background-color: #f0f921;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_be19c_row0_col7, #T_be19c_row20_col4 {\n",
|
||
" background-color: #3e049c;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_be19c_row1_col7 {\n",
|
||
" background-color: #bb3488;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_be19c_row2_col7 {\n",
|
||
" background-color: #f1814d;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_be19c_row3_col7 {\n",
|
||
" background-color: #f79342;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_be19c_row4_col7 {\n",
|
||
" background-color: #fdc627;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_be19c_row5_col7 {\n",
|
||
" background-color: #f7e225;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_be19c_row6_col7 {\n",
|
||
" background-color: #f5eb27;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_be19c_row7_col7, #T_be19c_row8_col7 {\n",
|
||
" background-color: #f2f227;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_be19c_row9_col7 {\n",
|
||
" background-color: #f1f426;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_be19c_row10_col7, #T_be19c_row11_col7, #T_be19c_row12_col7 {\n",
|
||
" background-color: #f1f525;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"#T_be19c_row13_col7, #T_be19c_row14_col7, #T_be19c_row15_col7, #T_be19c_row16_col7 {\n",
|
||
" background-color: #f0f724;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"</style>\n",
|
||
"<table id=\"T_be19c\">\n",
|
||
" <thead>\n",
|
||
" <tr>\n",
|
||
" <th class=\"blank level0\" > </th>\n",
|
||
" <th id=\"T_be19c_level0_col0\" class=\"col_heading level0 col0\" >feature</th>\n",
|
||
" <th id=\"T_be19c_level0_col1\" class=\"col_heading level0 col1\" >Coef.</th>\n",
|
||
" <th id=\"T_be19c_level0_col2\" class=\"col_heading level0 col2\" >Std.Err.</th>\n",
|
||
" <th id=\"T_be19c_level0_col3\" class=\"col_heading level0 col3\" >t</th>\n",
|
||
" <th id=\"T_be19c_level0_col4\" class=\"col_heading level0 col4\" >P>|t|</th>\n",
|
||
" <th id=\"T_be19c_level0_col5\" class=\"col_heading level0 col5\" >[0.025</th>\n",
|
||
" <th id=\"T_be19c_level0_col6\" class=\"col_heading level0 col6\" >0.975]</th>\n",
|
||
" <th id=\"T_be19c_level0_col7\" class=\"col_heading level0 col7\" >t_abs</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row0\" class=\"row_heading level0 row0\" >2</th>\n",
|
||
" <td id=\"T_be19c_row0_col0\" class=\"data row0 col0\" >DEP_DELAY</td>\n",
|
||
" <td id=\"T_be19c_row0_col1\" class=\"data row0 col1\" >0.894126</td>\n",
|
||
" <td id=\"T_be19c_row0_col2\" class=\"data row0 col2\" >0.000303</td>\n",
|
||
" <td id=\"T_be19c_row0_col3\" class=\"data row0 col3\" >2951.055978</td>\n",
|
||
" <td id=\"T_be19c_row0_col4\" class=\"data row0 col4\" >0.000000</td>\n",
|
||
" <td id=\"T_be19c_row0_col5\" class=\"data row0 col5\" >0.893532</td>\n",
|
||
" <td id=\"T_be19c_row0_col6\" class=\"data row0 col6\" >0.894719</td>\n",
|
||
" <td id=\"T_be19c_row0_col7\" class=\"data row0 col7\" >2951.055978</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row1\" class=\"row_heading level0 row1\" >16</th>\n",
|
||
" <td id=\"T_be19c_row1_col0\" class=\"data row1 col0\" >LATE_AIRCRAFT_DELAY</td>\n",
|
||
" <td id=\"T_be19c_row1_col1\" class=\"data row1 col1\" >-0.929844</td>\n",
|
||
" <td id=\"T_be19c_row1_col2\" class=\"data row1 col2\" >0.000509</td>\n",
|
||
" <td id=\"T_be19c_row1_col3\" class=\"data row1 col3\" >-1827.018082</td>\n",
|
||
" <td id=\"T_be19c_row1_col4\" class=\"data row1 col4\" >0.000000</td>\n",
|
||
" <td id=\"T_be19c_row1_col5\" class=\"data row1 col5\" >-0.930841</td>\n",
|
||
" <td id=\"T_be19c_row1_col6\" class=\"data row1 col6\" >-0.928846</td>\n",
|
||
" <td id=\"T_be19c_row1_col7\" class=\"data row1 col7\" >1827.018082</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row2\" class=\"row_heading level0 row2\" >13</th>\n",
|
||
" <td id=\"T_be19c_row2_col0\" class=\"data row2 col0\" >WEATHER_DELAY</td>\n",
|
||
" <td id=\"T_be19c_row2_col1\" class=\"data row2 col1\" >-0.906364</td>\n",
|
||
" <td id=\"T_be19c_row2_col2\" class=\"data row2 col2\" >0.000911</td>\n",
|
||
" <td id=\"T_be19c_row2_col3\" class=\"data row2 col3\" >-995.366423</td>\n",
|
||
" <td id=\"T_be19c_row2_col4\" class=\"data row2 col4\" >0.000000</td>\n",
|
||
" <td id=\"T_be19c_row2_col5\" class=\"data row2 col5\" >-0.908149</td>\n",
|
||
" <td id=\"T_be19c_row2_col6\" class=\"data row2 col6\" >-0.904579</td>\n",
|
||
" <td id=\"T_be19c_row2_col7\" class=\"data row2 col7\" >995.366423</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row3\" class=\"row_heading level0 row3\" >14</th>\n",
|
||
" <td id=\"T_be19c_row3_col0\" class=\"data row3 col0\" >NAS_DELAY</td>\n",
|
||
" <td id=\"T_be19c_row3_col1\" class=\"data row3 col1\" >-0.674053</td>\n",
|
||
" <td id=\"T_be19c_row3_col2\" class=\"data row3 col2\" >0.000813</td>\n",
|
||
" <td id=\"T_be19c_row3_col3\" class=\"data row3 col3\" >-829.128657</td>\n",
|
||
" <td id=\"T_be19c_row3_col4\" class=\"data row3 col4\" >0.000000</td>\n",
|
||
" <td id=\"T_be19c_row3_col5\" class=\"data row3 col5\" >-0.675646</td>\n",
|
||
" <td id=\"T_be19c_row3_col6\" class=\"data row3 col6\" >-0.672459</td>\n",
|
||
" <td id=\"T_be19c_row3_col7\" class=\"data row3 col7\" >829.128657</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row4\" class=\"row_heading level0 row4\" >8</th>\n",
|
||
" <td id=\"T_be19c_row4_col0\" class=\"data row4 col0\" >PCT_ELAPSED_TIME</td>\n",
|
||
" <td id=\"T_be19c_row4_col1\" class=\"data row4 col1\" >45.011289</td>\n",
|
||
" <td id=\"T_be19c_row4_col2\" class=\"data row4 col2\" >0.117195</td>\n",
|
||
" <td id=\"T_be19c_row4_col3\" class=\"data row4 col3\" >384.072566</td>\n",
|
||
" <td id=\"T_be19c_row4_col4\" class=\"data row4 col4\" >0.000000</td>\n",
|
||
" <td id=\"T_be19c_row4_col5\" class=\"data row4 col5\" >44.781592</td>\n",
|
||
" <td id=\"T_be19c_row4_col6\" class=\"data row4 col6\" >45.240987</td>\n",
|
||
" <td id=\"T_be19c_row4_col7\" class=\"data row4 col7\" >384.072566</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row5\" class=\"row_heading level0 row5\" >15</th>\n",
|
||
" <td id=\"T_be19c_row5_col0\" class=\"data row5 col0\" >SECURITY_DELAY</td>\n",
|
||
" <td id=\"T_be19c_row5_col1\" class=\"data row5 col1\" >-0.917411</td>\n",
|
||
" <td id=\"T_be19c_row5_col2\" class=\"data row5 col2\" >0.005465</td>\n",
|
||
" <td id=\"T_be19c_row5_col3\" class=\"data row5 col3\" >-167.857085</td>\n",
|
||
" <td id=\"T_be19c_row5_col4\" class=\"data row5 col4\" >0.000000</td>\n",
|
||
" <td id=\"T_be19c_row5_col5\" class=\"data row5 col5\" >-0.928123</td>\n",
|
||
" <td id=\"T_be19c_row5_col6\" class=\"data row5 col6\" >-0.906699</td>\n",
|
||
" <td id=\"T_be19c_row5_col7\" class=\"data row5 col7\" >167.857085</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row6\" class=\"row_heading level0 row6\" >5</th>\n",
|
||
" <td id=\"T_be19c_row6_col0\" class=\"data row6 col0\" >TAXI_OUT</td>\n",
|
||
" <td id=\"T_be19c_row6_col1\" class=\"data row6 col1\" >0.125278</td>\n",
|
||
" <td id=\"T_be19c_row6_col2\" class=\"data row6 col2\" >0.001203</td>\n",
|
||
" <td id=\"T_be19c_row6_col3\" class=\"data row6 col3\" >104.119579</td>\n",
|
||
" <td id=\"T_be19c_row6_col4\" class=\"data row6 col4\" >0.000000</td>\n",
|
||
" <td id=\"T_be19c_row6_col5\" class=\"data row6 col5\" >0.122920</td>\n",
|
||
" <td id=\"T_be19c_row6_col6\" class=\"data row6 col6\" >0.127636</td>\n",
|
||
" <td id=\"T_be19c_row6_col7\" class=\"data row6 col7\" >104.119579</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row7\" class=\"row_heading level0 row7\" >0</th>\n",
|
||
" <td id=\"T_be19c_row7_col0\" class=\"data row7 col0\" >CRS_DEP_TIME</td>\n",
|
||
" <td id=\"T_be19c_row7_col1\" class=\"data row7 col1\" >0.004550</td>\n",
|
||
" <td id=\"T_be19c_row7_col2\" class=\"data row7 col2\" >0.000072</td>\n",
|
||
" <td id=\"T_be19c_row7_col3\" class=\"data row7 col3\" >62.871693</td>\n",
|
||
" <td id=\"T_be19c_row7_col4\" class=\"data row7 col4\" >0.000000</td>\n",
|
||
" <td id=\"T_be19c_row7_col5\" class=\"data row7 col5\" >0.004408</td>\n",
|
||
" <td id=\"T_be19c_row7_col6\" class=\"data row7 col6\" >0.004692</td>\n",
|
||
" <td id=\"T_be19c_row7_col7\" class=\"data row7 col7\" >62.871693</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row8\" class=\"row_heading level0 row8\" >1</th>\n",
|
||
" <td id=\"T_be19c_row8_col0\" class=\"data row8 col0\" >DEP_TIME</td>\n",
|
||
" <td id=\"T_be19c_row8_col1\" class=\"data row8 col1\" >-0.005251</td>\n",
|
||
" <td id=\"T_be19c_row8_col2\" class=\"data row8 col2\" >0.000092</td>\n",
|
||
" <td id=\"T_be19c_row8_col3\" class=\"data row8 col3\" >-57.115895</td>\n",
|
||
" <td id=\"T_be19c_row8_col4\" class=\"data row8 col4\" >0.000000</td>\n",
|
||
" <td id=\"T_be19c_row8_col5\" class=\"data row8 col5\" >-0.005431</td>\n",
|
||
" <td id=\"T_be19c_row8_col6\" class=\"data row8 col6\" >-0.005070</td>\n",
|
||
" <td id=\"T_be19c_row8_col7\" class=\"data row8 col7\" >57.115895</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row9\" class=\"row_heading level0 row9\" >3</th>\n",
|
||
" <td id=\"T_be19c_row9_col0\" class=\"data row9 col0\" >DEP_AFPH</td>\n",
|
||
" <td id=\"T_be19c_row9_col1\" class=\"data row9 col1\" >-0.015296</td>\n",
|
||
" <td id=\"T_be19c_row9_col2\" class=\"data row9 col2\" >0.000321</td>\n",
|
||
" <td id=\"T_be19c_row9_col3\" class=\"data row9 col3\" >-47.724506</td>\n",
|
||
" <td id=\"T_be19c_row9_col4\" class=\"data row9 col4\" >0.000000</td>\n",
|
||
" <td id=\"T_be19c_row9_col5\" class=\"data row9 col5\" >-0.015924</td>\n",
|
||
" <td id=\"T_be19c_row9_col6\" class=\"data row9 col6\" >-0.014668</td>\n",
|
||
" <td id=\"T_be19c_row9_col7\" class=\"data row9 col7\" >47.724506</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row10\" class=\"row_heading level0 row10\" >19</th>\n",
|
||
" <td id=\"T_be19c_row10_col0\" class=\"data row10 col0\" >ORIGIN_HUB</td>\n",
|
||
" <td id=\"T_be19c_row10_col1\" class=\"data row10 col1\" >-1.029129</td>\n",
|
||
" <td id=\"T_be19c_row10_col2\" class=\"data row10 col2\" >0.026669</td>\n",
|
||
" <td id=\"T_be19c_row10_col3\" class=\"data row10 col3\" >-38.589411</td>\n",
|
||
" <td id=\"T_be19c_row10_col4\" class=\"data row10 col4\" >0.000000</td>\n",
|
||
" <td id=\"T_be19c_row10_col5\" class=\"data row10 col5\" >-1.081399</td>\n",
|
||
" <td id=\"T_be19c_row10_col6\" class=\"data row10 col6\" >-0.976860</td>\n",
|
||
" <td id=\"T_be19c_row10_col7\" class=\"data row10 col7\" >38.589411</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row11\" class=\"row_heading level0 row11\" >12</th>\n",
|
||
" <td id=\"T_be19c_row11_col0\" class=\"data row11 col0\" >ARR_RFPH</td>\n",
|
||
" <td id=\"T_be19c_row11_col1\" class=\"data row11 col1\" >0.373867</td>\n",
|
||
" <td id=\"T_be19c_row11_col2\" class=\"data row11 col2\" >0.013171</td>\n",
|
||
" <td id=\"T_be19c_row11_col3\" class=\"data row11 col3\" >28.386031</td>\n",
|
||
" <td id=\"T_be19c_row11_col4\" class=\"data row11 col4\" >0.000000</td>\n",
|
||
" <td id=\"T_be19c_row11_col5\" class=\"data row11 col5\" >0.348052</td>\n",
|
||
" <td id=\"T_be19c_row11_col6\" class=\"data row11 col6\" >0.399681</td>\n",
|
||
" <td id=\"T_be19c_row11_col7\" class=\"data row11 col7\" >28.386031</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row12\" class=\"row_heading level0 row12\" >4</th>\n",
|
||
" <td id=\"T_be19c_row12_col0\" class=\"data row12 col0\" >DEP_RFPH</td>\n",
|
||
" <td id=\"T_be19c_row12_col1\" class=\"data row12 col1\" >-0.469623</td>\n",
|
||
" <td id=\"T_be19c_row12_col2\" class=\"data row12 col2\" >0.017169</td>\n",
|
||
" <td id=\"T_be19c_row12_col3\" class=\"data row12 col3\" >-27.353179</td>\n",
|
||
" <td id=\"T_be19c_row12_col4\" class=\"data row12 col4\" >0.000000</td>\n",
|
||
" <td id=\"T_be19c_row12_col5\" class=\"data row12 col5\" >-0.503273</td>\n",
|
||
" <td id=\"T_be19c_row12_col6\" class=\"data row12 col6\" >-0.435973</td>\n",
|
||
" <td id=\"T_be19c_row12_col7\" class=\"data row12 col7\" >27.353179</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row13\" class=\"row_heading level0 row13\" >7</th>\n",
|
||
" <td id=\"T_be19c_row13_col0\" class=\"data row13 col0\" >CRS_ELAPSED_TIME</td>\n",
|
||
" <td id=\"T_be19c_row13_col1\" class=\"data row13 col1\" >-0.012624</td>\n",
|
||
" <td id=\"T_be19c_row13_col2\" class=\"data row13 col2\" >0.000660</td>\n",
|
||
" <td id=\"T_be19c_row13_col3\" class=\"data row13 col3\" >-19.131516</td>\n",
|
||
" <td id=\"T_be19c_row13_col4\" class=\"data row13 col4\" >0.000000</td>\n",
|
||
" <td id=\"T_be19c_row13_col5\" class=\"data row13 col5\" >-0.013917</td>\n",
|
||
" <td id=\"T_be19c_row13_col6\" class=\"data row13 col6\" >-0.011331</td>\n",
|
||
" <td id=\"T_be19c_row13_col7\" class=\"data row13 col7\" >19.131516</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row14\" class=\"row_heading level0 row14\" >10</th>\n",
|
||
" <td id=\"T_be19c_row14_col0\" class=\"data row14 col0\" >CRS_ARR_TIME</td>\n",
|
||
" <td id=\"T_be19c_row14_col1\" class=\"data row14 col1\" >-0.000370</td>\n",
|
||
" <td id=\"T_be19c_row14_col2\" class=\"data row14 col2\" >0.000022</td>\n",
|
||
" <td id=\"T_be19c_row14_col3\" class=\"data row14 col3\" >-16.938661</td>\n",
|
||
" <td id=\"T_be19c_row14_col4\" class=\"data row14 col4\" >0.000000</td>\n",
|
||
" <td id=\"T_be19c_row14_col5\" class=\"data row14 col5\" >-0.000413</td>\n",
|
||
" <td id=\"T_be19c_row14_col6\" class=\"data row14 col6\" >-0.000327</td>\n",
|
||
" <td id=\"T_be19c_row14_col7\" class=\"data row14 col7\" >16.938661</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row15\" class=\"row_heading level0 row15\" >20</th>\n",
|
||
" <td id=\"T_be19c_row15_col0\" class=\"data row15 col0\" >DEST_HUB</td>\n",
|
||
" <td id=\"T_be19c_row15_col1\" class=\"data row15 col1\" >-0.394935</td>\n",
|
||
" <td id=\"T_be19c_row15_col2\" class=\"data row15 col2\" >0.026256</td>\n",
|
||
" <td id=\"T_be19c_row15_col3\" class=\"data row15 col3\" >-15.041459</td>\n",
|
||
" <td id=\"T_be19c_row15_col4\" class=\"data row15 col4\" >0.000000</td>\n",
|
||
" <td id=\"T_be19c_row15_col5\" class=\"data row15 col5\" >-0.446397</td>\n",
|
||
" <td id=\"T_be19c_row15_col6\" class=\"data row15 col6\" >-0.343473</td>\n",
|
||
" <td id=\"T_be19c_row15_col7\" class=\"data row15 col7\" >15.041459</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row16\" class=\"row_heading level0 row16\" >17</th>\n",
|
||
" <td id=\"T_be19c_row16_col0\" class=\"data row16 col0\" >DEP_MONTH</td>\n",
|
||
" <td id=\"T_be19c_row16_col1\" class=\"data row16 col1\" >-0.039662</td>\n",
|
||
" <td id=\"T_be19c_row16_col2\" class=\"data row16 col2\" >0.002641</td>\n",
|
||
" <td id=\"T_be19c_row16_col3\" class=\"data row16 col3\" >-15.018808</td>\n",
|
||
" <td id=\"T_be19c_row16_col4\" class=\"data row16 col4\" >0.000000</td>\n",
|
||
" <td id=\"T_be19c_row16_col5\" class=\"data row16 col5\" >-0.044838</td>\n",
|
||
" <td id=\"T_be19c_row16_col6\" class=\"data row16 col6\" >-0.034486</td>\n",
|
||
" <td id=\"T_be19c_row16_col7\" class=\"data row16 col7\" >15.018808</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row17\" class=\"row_heading level0 row17\" >6</th>\n",
|
||
" <td id=\"T_be19c_row17_col0\" class=\"data row17 col0\" >WHEELS_OFF</td>\n",
|
||
" <td id=\"T_be19c_row17_col1\" class=\"data row17 col1\" >-0.000647</td>\n",
|
||
" <td id=\"T_be19c_row17_col2\" class=\"data row17 col2\" >0.000067</td>\n",
|
||
" <td id=\"T_be19c_row17_col3\" class=\"data row17 col3\" >-9.646104</td>\n",
|
||
" <td id=\"T_be19c_row17_col4\" class=\"data row17 col4\" >0.000000</td>\n",
|
||
" <td id=\"T_be19c_row17_col5\" class=\"data row17 col5\" >-0.000778</td>\n",
|
||
" <td id=\"T_be19c_row17_col6\" class=\"data row17 col6\" >-0.000515</td>\n",
|
||
" <td id=\"T_be19c_row17_col7\" class=\"data row17 col7\" >9.646104</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row18\" class=\"row_heading level0 row18\" >9</th>\n",
|
||
" <td id=\"T_be19c_row18_col0\" class=\"data row18 col0\" >DISTANCE</td>\n",
|
||
" <td id=\"T_be19c_row18_col1\" class=\"data row18 col1\" >0.000676</td>\n",
|
||
" <td id=\"T_be19c_row18_col2\" class=\"data row18 col2\" >0.000080</td>\n",
|
||
" <td id=\"T_be19c_row18_col3\" class=\"data row18 col3\" >8.428835</td>\n",
|
||
" <td id=\"T_be19c_row18_col4\" class=\"data row18 col4\" >0.000000</td>\n",
|
||
" <td id=\"T_be19c_row18_col5\" class=\"data row18 col5\" >0.000519</td>\n",
|
||
" <td id=\"T_be19c_row18_col6\" class=\"data row18 col6\" >0.000834</td>\n",
|
||
" <td id=\"T_be19c_row18_col7\" class=\"data row18 col7\" >8.428835</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row19\" class=\"row_heading level0 row19\" >18</th>\n",
|
||
" <td id=\"T_be19c_row19_col0\" class=\"data row19 col0\" >DEP_DOW</td>\n",
|
||
" <td id=\"T_be19c_row19_col1\" class=\"data row19 col1\" >-0.017967</td>\n",
|
||
" <td id=\"T_be19c_row19_col2\" class=\"data row19 col2\" >0.004487</td>\n",
|
||
" <td id=\"T_be19c_row19_col3\" class=\"data row19 col3\" >-4.004561</td>\n",
|
||
" <td id=\"T_be19c_row19_col4\" class=\"data row19 col4\" >0.000062</td>\n",
|
||
" <td id=\"T_be19c_row19_col5\" class=\"data row19 col5\" >-0.026760</td>\n",
|
||
" <td id=\"T_be19c_row19_col6\" class=\"data row19 col6\" >-0.009173</td>\n",
|
||
" <td id=\"T_be19c_row19_col7\" class=\"data row19 col7\" >4.004561</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_be19c_level0_row20\" class=\"row_heading level0 row20\" >11</th>\n",
|
||
" <td id=\"T_be19c_row20_col0\" class=\"data row20 col0\" >ARR_AFPH</td>\n",
|
||
" <td id=\"T_be19c_row20_col1\" class=\"data row20 col1\" >0.000548</td>\n",
|
||
" <td id=\"T_be19c_row20_col2\" class=\"data row20 col2\" >0.000332</td>\n",
|
||
" <td id=\"T_be19c_row20_col3\" class=\"data row20 col3\" >1.650788</td>\n",
|
||
" <td id=\"T_be19c_row20_col4\" class=\"data row20 col4\" >0.098782</td>\n",
|
||
" <td id=\"T_be19c_row20_col5\" class=\"data row20 col5\" >-0.000103</td>\n",
|
||
" <td id=\"T_be19c_row20_col6\" class=\"data row20 col6\" >0.001198</td>\n",
|
||
" <td id=\"T_be19c_row20_col7\" class=\"data row20 col7\" >1.650788</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n"
|
||
],
|
||
"text/plain": [
|
||
"<pandas.io.formats.style.Styler at 0x32ca95810>"
|
||
]
|
||
},
|
||
"execution_count": 35,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import statsmodels.api as sm\n",
|
||
"\n",
|
||
"linreg_mdl = sm.OLS(y_train_reg, sm.add_constant(X_train))\n",
|
||
"linreg_mdl = linreg_mdl.fit()\n",
|
||
"summary_df = linreg_mdl.summary2().tables[1]\n",
|
||
"summary_df = (\n",
|
||
" summary_df.drop([\"const\"]).reset_index().rename(columns={\"index\": \"feature\"})\n",
|
||
")\n",
|
||
"summary_df[\"t_abs\"] = abs(summary_df[\"t\"])\n",
|
||
"summary_df.sort_values(by=\"t_abs\", ascending=False).style.background_gradient(\n",
|
||
" cmap=\"plasma_r\", low=0, high=0.1, subset=[\"P>|t|\"]\n",
|
||
").background_gradient(cmap=\"plasma_r\", low=0, high=0.1, subset=[\"t_abs\"])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Гребневая регрессия"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 36,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<style type=\"text/css\">\n",
|
||
"#T_cfee5_row0_col1, #T_cfee5_row1_col1, #T_cfee5_row2_col2, #T_cfee5_row3_col1, #T_cfee5_row4_col1, #T_cfee5_row5_col2, #T_cfee5_row6_col1, #T_cfee5_row7_col2, #T_cfee5_row8_col1, #T_cfee5_row9_col2, #T_cfee5_row10_col1, #T_cfee5_row11_col2, #T_cfee5_row12_col1, #T_cfee5_row13_col2, #T_cfee5_row14_col2, #T_cfee5_row15_col2, #T_cfee5_row16_col2, #T_cfee5_row17_col2, #T_cfee5_row18_col1, #T_cfee5_row19_col1, #T_cfee5_row20_col1 {\n",
|
||
" background-color: #472f7d;\n",
|
||
" color: #f1f1f1;\n",
|
||
"}\n",
|
||
"#T_cfee5_row0_col2, #T_cfee5_row1_col2, #T_cfee5_row2_col1, #T_cfee5_row3_col2, #T_cfee5_row4_col2, #T_cfee5_row5_col1, #T_cfee5_row6_col2, #T_cfee5_row7_col1, #T_cfee5_row8_col2, #T_cfee5_row9_col1, #T_cfee5_row10_col2, #T_cfee5_row11_col1, #T_cfee5_row12_col2, #T_cfee5_row13_col1, #T_cfee5_row14_col1, #T_cfee5_row15_col1, #T_cfee5_row16_col1, #T_cfee5_row17_col1, #T_cfee5_row18_col2, #T_cfee5_row19_col2, #T_cfee5_row20_col2 {\n",
|
||
" background-color: #7ad151;\n",
|
||
" color: #000000;\n",
|
||
"}\n",
|
||
"</style>\n",
|
||
"<table id=\"T_cfee5\">\n",
|
||
" <thead>\n",
|
||
" <tr>\n",
|
||
" <th class=\"blank level0\" > </th>\n",
|
||
" <th id=\"T_cfee5_level0_col0\" class=\"col_heading level0 col0\" >feature</th>\n",
|
||
" <th id=\"T_cfee5_level0_col1\" class=\"col_heading level0 col1\" >coef_linear</th>\n",
|
||
" <th id=\"T_cfee5_level0_col2\" class=\"col_heading level0 col2\" >coef_ridge</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
|
||
" <td id=\"T_cfee5_row0_col0\" class=\"data row0 col0\" >CRS_DEP_TIME</td>\n",
|
||
" <td id=\"T_cfee5_row0_col1\" class=\"data row0 col1\" >0.004550</td>\n",
|
||
" <td id=\"T_cfee5_row0_col2\" class=\"data row0 col2\" >0.004275</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row1\" class=\"row_heading level0 row1\" >1</th>\n",
|
||
" <td id=\"T_cfee5_row1_col0\" class=\"data row1 col0\" >DEP_TIME</td>\n",
|
||
" <td id=\"T_cfee5_row1_col1\" class=\"data row1 col1\" >-0.005251</td>\n",
|
||
" <td id=\"T_cfee5_row1_col2\" class=\"data row1 col2\" >-0.005485</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row2\" class=\"row_heading level0 row2\" >2</th>\n",
|
||
" <td id=\"T_cfee5_row2_col0\" class=\"data row2 col0\" >DEP_DELAY</td>\n",
|
||
" <td id=\"T_cfee5_row2_col1\" class=\"data row2 col1\" >0.894126</td>\n",
|
||
" <td id=\"T_cfee5_row2_col2\" class=\"data row2 col2\" >0.894229</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row3\" class=\"row_heading level0 row3\" >3</th>\n",
|
||
" <td id=\"T_cfee5_row3_col0\" class=\"data row3 col0\" >DEP_AFPH</td>\n",
|
||
" <td id=\"T_cfee5_row3_col1\" class=\"data row3 col1\" >-0.015296</td>\n",
|
||
" <td id=\"T_cfee5_row3_col2\" class=\"data row3 col2\" >-0.015304</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row4\" class=\"row_heading level0 row4\" >4</th>\n",
|
||
" <td id=\"T_cfee5_row4_col0\" class=\"data row4 col0\" >DEP_RFPH</td>\n",
|
||
" <td id=\"T_cfee5_row4_col1\" class=\"data row4 col1\" >-0.469623</td>\n",
|
||
" <td id=\"T_cfee5_row4_col2\" class=\"data row4 col2\" >-0.469623</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row5\" class=\"row_heading level0 row5\" >5</th>\n",
|
||
" <td id=\"T_cfee5_row5_col0\" class=\"data row5 col0\" >TAXI_OUT</td>\n",
|
||
" <td id=\"T_cfee5_row5_col1\" class=\"data row5 col1\" >0.125278</td>\n",
|
||
" <td id=\"T_cfee5_row5_col2\" class=\"data row5 col2\" >0.125284</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row6\" class=\"row_heading level0 row6\" >6</th>\n",
|
||
" <td id=\"T_cfee5_row6_col0\" class=\"data row6 col0\" >WHEELS_OFF</td>\n",
|
||
" <td id=\"T_cfee5_row6_col1\" class=\"data row6 col1\" >-0.000647</td>\n",
|
||
" <td id=\"T_cfee5_row6_col2\" class=\"data row6 col2\" >-0.000889</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row7\" class=\"row_heading level0 row7\" >7</th>\n",
|
||
" <td id=\"T_cfee5_row7_col0\" class=\"data row7 col0\" >CRS_ELAPSED_TIME</td>\n",
|
||
" <td id=\"T_cfee5_row7_col1\" class=\"data row7 col1\" >-0.012624</td>\n",
|
||
" <td id=\"T_cfee5_row7_col2\" class=\"data row7 col2\" >-0.012618</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row8\" class=\"row_heading level0 row8\" >8</th>\n",
|
||
" <td id=\"T_cfee5_row8_col0\" class=\"data row8 col0\" >PCT_ELAPSED_TIME</td>\n",
|
||
" <td id=\"T_cfee5_row8_col1\" class=\"data row8 col1\" >45.011289</td>\n",
|
||
" <td id=\"T_cfee5_row8_col2\" class=\"data row8 col2\" >45.010279</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row9\" class=\"row_heading level0 row9\" >9</th>\n",
|
||
" <td id=\"T_cfee5_row9_col0\" class=\"data row9 col0\" >DISTANCE</td>\n",
|
||
" <td id=\"T_cfee5_row9_col1\" class=\"data row9 col1\" >0.000676</td>\n",
|
||
" <td id=\"T_cfee5_row9_col2\" class=\"data row9 col2\" >0.000718</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row10\" class=\"row_heading level0 row10\" >10</th>\n",
|
||
" <td id=\"T_cfee5_row10_col0\" class=\"data row10 col0\" >CRS_ARR_TIME</td>\n",
|
||
" <td id=\"T_cfee5_row10_col1\" class=\"data row10 col1\" >-0.000370</td>\n",
|
||
" <td id=\"T_cfee5_row10_col2\" class=\"data row10 col2\" >-0.000546</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row11\" class=\"row_heading level0 row11\" >11</th>\n",
|
||
" <td id=\"T_cfee5_row11_col0\" class=\"data row11 col0\" >ARR_AFPH</td>\n",
|
||
" <td id=\"T_cfee5_row11_col1\" class=\"data row11 col1\" >0.000548</td>\n",
|
||
" <td id=\"T_cfee5_row11_col2\" class=\"data row11 col2\" >0.000550</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row12\" class=\"row_heading level0 row12\" >12</th>\n",
|
||
" <td id=\"T_cfee5_row12_col0\" class=\"data row12 col0\" >ARR_RFPH</td>\n",
|
||
" <td id=\"T_cfee5_row12_col1\" class=\"data row12 col1\" >0.373867</td>\n",
|
||
" <td id=\"T_cfee5_row12_col2\" class=\"data row12 col2\" >0.373865</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row13\" class=\"row_heading level0 row13\" >13</th>\n",
|
||
" <td id=\"T_cfee5_row13_col0\" class=\"data row13 col0\" >WEATHER_DELAY</td>\n",
|
||
" <td id=\"T_cfee5_row13_col1\" class=\"data row13 col1\" >-0.906364</td>\n",
|
||
" <td id=\"T_cfee5_row13_col2\" class=\"data row13 col2\" >-0.906358</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row14\" class=\"row_heading level0 row14\" >14</th>\n",
|
||
" <td id=\"T_cfee5_row14_col0\" class=\"data row14 col0\" >NAS_DELAY</td>\n",
|
||
" <td id=\"T_cfee5_row14_col1\" class=\"data row14 col1\" >-0.674053</td>\n",
|
||
" <td id=\"T_cfee5_row14_col2\" class=\"data row14 col2\" >-0.674045</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row15\" class=\"row_heading level0 row15\" >15</th>\n",
|
||
" <td id=\"T_cfee5_row15_col0\" class=\"data row15 col0\" >SECURITY_DELAY</td>\n",
|
||
" <td id=\"T_cfee5_row15_col1\" class=\"data row15 col1\" >-0.917411</td>\n",
|
||
" <td id=\"T_cfee5_row15_col2\" class=\"data row15 col2\" >-0.917411</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row16\" class=\"row_heading level0 row16\" >16</th>\n",
|
||
" <td id=\"T_cfee5_row16_col0\" class=\"data row16 col0\" >LATE_AIRCRAFT_DELAY</td>\n",
|
||
" <td id=\"T_cfee5_row16_col1\" class=\"data row16 col1\" >-0.929844</td>\n",
|
||
" <td id=\"T_cfee5_row16_col2\" class=\"data row16 col2\" >-0.929805</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row17\" class=\"row_heading level0 row17\" >17</th>\n",
|
||
" <td id=\"T_cfee5_row17_col0\" class=\"data row17 col0\" >DEP_MONTH</td>\n",
|
||
" <td id=\"T_cfee5_row17_col1\" class=\"data row17 col1\" >-0.039662</td>\n",
|
||
" <td id=\"T_cfee5_row17_col2\" class=\"data row17 col2\" >-0.039661</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row18\" class=\"row_heading level0 row18\" >18</th>\n",
|
||
" <td id=\"T_cfee5_row18_col0\" class=\"data row18 col0\" >DEP_DOW</td>\n",
|
||
" <td id=\"T_cfee5_row18_col1\" class=\"data row18 col1\" >-0.017967</td>\n",
|
||
" <td id=\"T_cfee5_row18_col2\" class=\"data row18 col2\" >-0.017967</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row19\" class=\"row_heading level0 row19\" >19</th>\n",
|
||
" <td id=\"T_cfee5_row19_col0\" class=\"data row19 col0\" >ORIGIN_HUB</td>\n",
|
||
" <td id=\"T_cfee5_row19_col1\" class=\"data row19 col1\" >-1.029129</td>\n",
|
||
" <td id=\"T_cfee5_row19_col2\" class=\"data row19 col2\" >-1.029140</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th id=\"T_cfee5_level0_row20\" class=\"row_heading level0 row20\" >20</th>\n",
|
||
" <td id=\"T_cfee5_row20_col0\" class=\"data row20 col0\" >DEST_HUB</td>\n",
|
||
" <td id=\"T_cfee5_row20_col1\" class=\"data row20 col1\" >-0.394935</td>\n",
|
||
" <td id=\"T_cfee5_row20_col2\" class=\"data row20 col2\" >-0.394948</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n"
|
||
],
|
||
"text/plain": [
|
||
"<pandas.io.formats.style.Styler at 0x17753cf50>"
|
||
]
|
||
},
|
||
"execution_count": 36,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"coefs_ridge = reg_models[\"ridge\"][\"fitted\"].coef_\n",
|
||
"coef_ridge_df = pd.DataFrame(\n",
|
||
" {\n",
|
||
" \"feature\": X_train.columns.values.tolist(),\n",
|
||
" \"coef_linear\": coefs_lm,\n",
|
||
" \"coef_ridge\": coefs_ridge,\n",
|
||
" }\n",
|
||
")\n",
|
||
"coef_ridge_df.style.background_gradient(cmap=\"viridis_r\", low=0.3, high=0.2, axis=1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Полиномиальная регрессия"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 38,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"253"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"232"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
}
|
||
],
|
||
"source": [
|
||
"display(\n",
|
||
" reg_models[\"linear_poly\"][\"fitted\"].get_params()[\"linearregression\"].coef_.shape[0]\n",
|
||
")\n",
|
||
"display(\n",
|
||
" reg_models[\"linear_interact\"][\"fitted\"]\n",
|
||
" .get_params()[\"linearregression\"]\n",
|
||
" .coef_.shape[0]\n",
|
||
")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Логистическая регрессия"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 39,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"coefficients:\t[[-0.00132811 0.00034525 0.15746107 0.00349808 -0.00215053 -0.00445293\n",
|
||
" 0.00029184 -0.05167613 -0.00175222 0.0055682 -0.00031922 -0.00757532\n",
|
||
" -0.00273998 -0.15351444 -0.12133964 -0.00595224 -0.16451117 -0.01303235\n",
|
||
" -0.0052911 0.00048854 -0.00206977]]\n",
|
||
"intercept:\t[-0.00229272]\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"DEP_DELAY 6.969920\n",
|
||
"CRS_ELAPSED_TIME 4.101834\n",
|
||
"LATE_AIRCRAFT_DELAY 4.065346\n",
|
||
"DISTANCE 3.616141\n",
|
||
"NAS_DELAY 1.672065\n",
|
||
"WEATHER_DELAY 1.604186\n",
|
||
"CRS_DEP_TIME 0.665926\n",
|
||
"ARR_AFPH 0.267888\n",
|
||
"DEP_TIME 0.177772\n",
|
||
"CRS_ARR_TIME 0.168589\n",
|
||
"WHEELS_OFF 0.150765\n",
|
||
"DEP_AFPH 0.124024\n",
|
||
"DEP_MONTH 0.044475\n",
|
||
"TAXI_OUT 0.043947\n",
|
||
"DEP_DOW 0.010574\n",
|
||
"SECURITY_DELAY 0.009756\n",
|
||
"ARR_RFPH 0.001976\n",
|
||
"DEP_RFPH 0.001215\n",
|
||
"DEST_HUB 0.001007\n",
|
||
"ORIGIN_HUB 0.000238\n",
|
||
"PCT_ELAPSED_TIME 0.000185\n",
|
||
"dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 39,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"coefs_log = class_models[\"logistic\"][\"fitted\"].coef_\n",
|
||
"intercept_log = class_models[\"logistic\"][\"fitted\"].intercept_\n",
|
||
"print(\"coefficients:\\t%s\" % coefs_log)\n",
|
||
"print(\"intercept:\\t%s\" % intercept_log)\n",
|
||
"stdv = np.std(X_train, 0)\n",
|
||
"abs(\n",
|
||
" coefs_log.reshape(\n",
|
||
" 21,\n",
|
||
" )\n",
|
||
" * stdv\n",
|
||
").sort_values(ascending=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Дерево решений"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 40,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"|--- DEP_DELAY <= 20.50\n",
|
||
"| |--- DEP_DELAY <= 15.50\n",
|
||
"| | |--- class: 0\n",
|
||
"| |--- DEP_DELAY > 15.50\n",
|
||
"| | |--- PCT_ELAPSED_TIME <= 0.99\n",
|
||
"| | | |--- PCT_ELAPSED_TIME <= 0.98\n",
|
||
"| | | | |--- PCT_ELAPSED_TIME <= 0.96\n",
|
||
"| | | | | |--- CRS_ELAPSED_TIME <= 65.50\n",
|
||
"| | | | | | |--- PCT_ELAPSED_TIME <= 0.94\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- PCT_ELAPSED_TIME > 0.94\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | |--- CRS_ELAPSED_TIME > 65.50\n",
|
||
"| | | | | | |--- PCT_ELAPSED_TIME <= 0.95\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- PCT_ELAPSED_TIME > 0.95\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | |--- PCT_ELAPSED_TIME > 0.96\n",
|
||
"| | | | | |--- CRS_ELAPSED_TIME <= 140.50\n",
|
||
"| | | | | | |--- DEP_DELAY <= 18.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- DEP_DELAY > 18.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | |--- CRS_ELAPSED_TIME > 140.50\n",
|
||
"| | | | | | |--- DEP_DELAY <= 19.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- DEP_DELAY > 19.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | |--- PCT_ELAPSED_TIME > 0.98\n",
|
||
"| | | | |--- DEP_DELAY <= 18.50\n",
|
||
"| | | | | |--- DISTANCE <= 326.50\n",
|
||
"| | | | | | |--- LATE_AIRCRAFT_DELAY <= 0.50\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | | |--- LATE_AIRCRAFT_DELAY > 0.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | |--- DISTANCE > 326.50\n",
|
||
"| | | | | | |--- DEP_DELAY <= 17.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- DEP_DELAY > 17.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | |--- DEP_DELAY > 18.50\n",
|
||
"| | | | | |--- LATE_AIRCRAFT_DELAY <= 1.50\n",
|
||
"| | | | | | |--- DISTANCE <= 1358.50\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | | |--- DISTANCE > 1358.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | |--- LATE_AIRCRAFT_DELAY > 1.50\n",
|
||
"| | | | | | |--- class: 0\n",
|
||
"| | |--- PCT_ELAPSED_TIME > 0.99\n",
|
||
"| | | |--- LATE_AIRCRAFT_DELAY <= 1.50\n",
|
||
"| | | | |--- WEATHER_DELAY <= 2.00\n",
|
||
"| | | | | |--- NAS_DELAY <= 17.50\n",
|
||
"| | | | | | |--- PCT_ELAPSED_TIME <= 1.00\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- PCT_ELAPSED_TIME > 1.00\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | |--- NAS_DELAY > 17.50\n",
|
||
"| | | | | | |--- PCT_ELAPSED_TIME <= 1.09\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- PCT_ELAPSED_TIME > 1.09\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | |--- WEATHER_DELAY > 2.00\n",
|
||
"| | | | | |--- class: 0\n",
|
||
"| | | |--- LATE_AIRCRAFT_DELAY > 1.50\n",
|
||
"| | | | |--- LATE_AIRCRAFT_DELAY <= 3.50\n",
|
||
"| | | | | |--- DEP_DELAY <= 18.50\n",
|
||
"| | | | | | |--- DISTANCE <= 153.50\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | | |--- DISTANCE > 153.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | |--- DEP_DELAY > 18.50\n",
|
||
"| | | | | | |--- WEATHER_DELAY <= 2.50\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | | |--- WEATHER_DELAY > 2.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | |--- LATE_AIRCRAFT_DELAY > 3.50\n",
|
||
"| | | | | |--- LATE_AIRCRAFT_DELAY <= 4.50\n",
|
||
"| | | | | | |--- DEP_DELAY <= 19.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- DEP_DELAY > 19.50\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | |--- LATE_AIRCRAFT_DELAY > 4.50\n",
|
||
"| | | | | | |--- class: 0\n",
|
||
"|--- DEP_DELAY > 20.50\n",
|
||
"| |--- LATE_AIRCRAFT_DELAY <= 11.50\n",
|
||
"| | |--- NAS_DELAY <= 27.50\n",
|
||
"| | | |--- DEP_DELAY <= 35.50\n",
|
||
"| | | | |--- PCT_ELAPSED_TIME <= 0.96\n",
|
||
"| | | | | |--- DEP_DELAY <= 28.50\n",
|
||
"| | | | | | |--- PCT_ELAPSED_TIME <= 0.93\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- PCT_ELAPSED_TIME > 0.93\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | |--- DEP_DELAY > 28.50\n",
|
||
"| | | | | | |--- PCT_ELAPSED_TIME <= 0.92\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- PCT_ELAPSED_TIME > 0.92\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | |--- PCT_ELAPSED_TIME > 0.96\n",
|
||
"| | | | | |--- WEATHER_DELAY <= 4.50\n",
|
||
"| | | | | | |--- LATE_AIRCRAFT_DELAY <= 6.50\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | | |--- LATE_AIRCRAFT_DELAY > 6.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | |--- WEATHER_DELAY > 4.50\n",
|
||
"| | | | | | |--- WEATHER_DELAY <= 10.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- WEATHER_DELAY > 10.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | |--- DEP_DELAY > 35.50\n",
|
||
"| | | | |--- WEATHER_DELAY <= 16.50\n",
|
||
"| | | | | |--- DEP_DELAY <= 44.50\n",
|
||
"| | | | | | |--- PCT_ELAPSED_TIME <= 0.93\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | | |--- PCT_ELAPSED_TIME > 0.93\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | |--- DEP_DELAY > 44.50\n",
|
||
"| | | | | | |--- SECURITY_DELAY <= 20.50\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | | |--- SECURITY_DELAY > 20.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | |--- WEATHER_DELAY > 16.50\n",
|
||
"| | | | | |--- WEATHER_DELAY <= 23.50\n",
|
||
"| | | | | | |--- DEP_DELAY <= 57.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- DEP_DELAY > 57.50\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | |--- WEATHER_DELAY > 23.50\n",
|
||
"| | | | | | |--- DEP_DELAY <= 88.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- DEP_DELAY > 88.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | |--- NAS_DELAY > 27.50\n",
|
||
"| | | |--- PCT_ELAPSED_TIME <= 1.11\n",
|
||
"| | | | |--- NAS_DELAY <= 31.50\n",
|
||
"| | | | | |--- PCT_ELAPSED_TIME <= 1.07\n",
|
||
"| | | | | | |--- DEP_DELAY <= 69.00\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- DEP_DELAY > 69.00\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | |--- PCT_ELAPSED_TIME > 1.07\n",
|
||
"| | | | | | |--- WEATHER_DELAY <= 10.50\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | | |--- WEATHER_DELAY > 10.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | |--- NAS_DELAY > 31.50\n",
|
||
"| | | | | |--- DEP_DELAY <= 471.50\n",
|
||
"| | | | | | |--- CRS_ELAPSED_TIME <= 420.00\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- CRS_ELAPSED_TIME > 420.00\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | |--- DEP_DELAY > 471.50\n",
|
||
"| | | | | | |--- NAS_DELAY <= 388.00\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | | |--- NAS_DELAY > 388.00\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | |--- PCT_ELAPSED_TIME > 1.11\n",
|
||
"| | | | |--- NAS_DELAY <= 64.50\n",
|
||
"| | | | | |--- WEATHER_DELAY <= 20.50\n",
|
||
"| | | | | | |--- DEP_DELAY <= 43.50\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | | |--- DEP_DELAY > 43.50\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | |--- WEATHER_DELAY > 20.50\n",
|
||
"| | | | | | |--- WHEELS_OFF <= 36.00\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | | |--- WHEELS_OFF > 36.00\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | |--- NAS_DELAY > 64.50\n",
|
||
"| | | | | |--- PCT_ELAPSED_TIME <= 1.44\n",
|
||
"| | | | | | |--- NAS_DELAY <= 78.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- NAS_DELAY > 78.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | |--- PCT_ELAPSED_TIME > 1.44\n",
|
||
"| | | | | | |--- NAS_DELAY <= 119.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- NAS_DELAY > 119.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| |--- LATE_AIRCRAFT_DELAY > 11.50\n",
|
||
"| | |--- DEP_DELAY <= 75.50\n",
|
||
"| | | |--- DEP_DELAY <= 41.50\n",
|
||
"| | | | |--- LATE_AIRCRAFT_DELAY <= 14.50\n",
|
||
"| | | | | |--- DEP_DELAY <= 29.50\n",
|
||
"| | | | | | |--- DEP_DELAY <= 27.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- DEP_DELAY > 27.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | |--- DEP_DELAY > 29.50\n",
|
||
"| | | | | | |--- PCT_ELAPSED_TIME <= 0.97\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- PCT_ELAPSED_TIME > 0.97\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | |--- LATE_AIRCRAFT_DELAY > 14.50\n",
|
||
"| | | | | |--- LATE_AIRCRAFT_DELAY <= 20.50\n",
|
||
"| | | | | | |--- DEP_DELAY <= 32.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- DEP_DELAY > 32.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | |--- LATE_AIRCRAFT_DELAY > 20.50\n",
|
||
"| | | | | | |--- DEP_DELAY <= 38.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- DEP_DELAY > 38.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | |--- DEP_DELAY > 41.50\n",
|
||
"| | | | |--- LATE_AIRCRAFT_DELAY <= 29.50\n",
|
||
"| | | | | |--- PCT_ELAPSED_TIME <= 0.94\n",
|
||
"| | | | | | |--- DEP_DELAY <= 55.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- DEP_DELAY > 55.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | |--- PCT_ELAPSED_TIME > 0.94\n",
|
||
"| | | | | | |--- WEATHER_DELAY <= 0.50\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | | |--- WEATHER_DELAY > 0.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | |--- LATE_AIRCRAFT_DELAY > 29.50\n",
|
||
"| | | | | |--- LATE_AIRCRAFT_DELAY <= 38.50\n",
|
||
"| | | | | | |--- DEP_DELAY <= 59.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- DEP_DELAY > 59.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | |--- LATE_AIRCRAFT_DELAY > 38.50\n",
|
||
"| | | | | | |--- DEP_DELAY <= 60.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- DEP_DELAY > 60.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | |--- DEP_DELAY > 75.50\n",
|
||
"| | | |--- LATE_AIRCRAFT_DELAY <= 60.50\n",
|
||
"| | | | |--- WEATHER_DELAY <= 0.50\n",
|
||
"| | | | | |--- NAS_DELAY <= 38.50\n",
|
||
"| | | | | | |--- DEP_DELAY <= 88.50\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | | |--- DEP_DELAY > 88.50\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | |--- NAS_DELAY > 38.50\n",
|
||
"| | | | | | |--- TAXI_OUT <= 63.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- TAXI_OUT > 63.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | |--- WEATHER_DELAY > 0.50\n",
|
||
"| | | | | |--- WEATHER_DELAY <= 18.50\n",
|
||
"| | | | | | |--- LATE_AIRCRAFT_DELAY <= 31.50\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | | |--- LATE_AIRCRAFT_DELAY > 31.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | |--- WEATHER_DELAY > 18.50\n",
|
||
"| | | | | | |--- DEP_AFPH <= 99.64\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- DEP_AFPH > 99.64\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | |--- LATE_AIRCRAFT_DELAY > 60.50\n",
|
||
"| | | | |--- DEP_DELAY <= 114.50\n",
|
||
"| | | | | |--- LATE_AIRCRAFT_DELAY <= 71.50\n",
|
||
"| | | | | | |--- DEP_DELAY <= 95.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- DEP_DELAY > 95.50\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | |--- LATE_AIRCRAFT_DELAY > 71.50\n",
|
||
"| | | | | | |--- DEP_DELAY <= 96.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- DEP_DELAY > 96.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | |--- DEP_DELAY > 114.50\n",
|
||
"| | | | | |--- LATE_AIRCRAFT_DELAY <= 98.50\n",
|
||
"| | | | | | |--- WEATHER_DELAY <= 1.00\n",
|
||
"| | | | | | | |--- class: 1\n",
|
||
"| | | | | | |--- WEATHER_DELAY > 1.00\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | |--- LATE_AIRCRAFT_DELAY > 98.50\n",
|
||
"| | | | | | |--- DEP_DELAY <= 171.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"| | | | | | |--- DEP_DELAY > 171.50\n",
|
||
"| | | | | | | |--- class: 0\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"text_tree = tree.export_text(\n",
|
||
" class_models[\"decision_tree\"][\"fitted\"],\n",
|
||
" feature_names=X_train.columns.values.tolist(),\n",
|
||
")\n",
|
||
"print(text_tree)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>feature</th>\n",
|
||
" <th>importance</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>DEP_DELAY</td>\n",
|
||
" <td>0.527482</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>16</th>\n",
|
||
" <td>LATE_AIRCRAFT_DELAY</td>\n",
|
||
" <td>0.199153</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>8</th>\n",
|
||
" <td>PCT_ELAPSED_TIME</td>\n",
|
||
" <td>0.105381</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>13</th>\n",
|
||
" <td>WEATHER_DELAY</td>\n",
|
||
" <td>0.101649</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>14</th>\n",
|
||
" <td>NAS_DELAY</td>\n",
|
||
" <td>0.062732</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>15</th>\n",
|
||
" <td>SECURITY_DELAY</td>\n",
|
||
" <td>0.001998</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>DISTANCE</td>\n",
|
||
" <td>0.001019</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>CRS_ELAPSED_TIME</td>\n",
|
||
" <td>0.000281</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>TAXI_OUT</td>\n",
|
||
" <td>0.000239</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>WHEELS_OFF</td>\n",
|
||
" <td>0.000035</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>DEP_AFPH</td>\n",
|
||
" <td>0.000031</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>CRS_DEP_TIME</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>19</th>\n",
|
||
" <td>ORIGIN_HUB</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>18</th>\n",
|
||
" <td>DEP_DOW</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>17</th>\n",
|
||
" <td>DEP_MONTH</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>10</th>\n",
|
||
" <td>CRS_ARR_TIME</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>12</th>\n",
|
||
" <td>ARR_RFPH</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>11</th>\n",
|
||
" <td>ARR_AFPH</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>DEP_TIME</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>DEP_RFPH</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>20</th>\n",
|
||
" <td>DEST_HUB</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" feature importance\n",
|
||
"2 DEP_DELAY 0.527482\n",
|
||
"16 LATE_AIRCRAFT_DELAY 0.199153\n",
|
||
"8 PCT_ELAPSED_TIME 0.105381\n",
|
||
"13 WEATHER_DELAY 0.101649\n",
|
||
"14 NAS_DELAY 0.062732\n",
|
||
"15 SECURITY_DELAY 0.001998\n",
|
||
"9 DISTANCE 0.001019\n",
|
||
"7 CRS_ELAPSED_TIME 0.000281\n",
|
||
"5 TAXI_OUT 0.000239\n",
|
||
"6 WHEELS_OFF 0.000035\n",
|
||
"3 DEP_AFPH 0.000031\n",
|
||
"0 CRS_DEP_TIME 0.000000\n",
|
||
"19 ORIGIN_HUB 0.000000\n",
|
||
"18 DEP_DOW 0.000000\n",
|
||
"17 DEP_MONTH 0.000000\n",
|
||
"10 CRS_ARR_TIME 0.000000\n",
|
||
"12 ARR_RFPH 0.000000\n",
|
||
"11 ARR_AFPH 0.000000\n",
|
||
"1 DEP_TIME 0.000000\n",
|
||
"4 DEP_RFPH 0.000000\n",
|
||
"20 DEST_HUB 0.000000"
|
||
]
|
||
},
|
||
"execution_count": 41,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"dt_imp_df = pd.DataFrame(\n",
|
||
" {\n",
|
||
" \"feature\": X_train.columns.values.tolist(),\n",
|
||
" \"importance\": class_models[\"decision_tree\"][\"fitted\"].feature_importances_,\n",
|
||
" }\n",
|
||
").sort_values(by=\"importance\", ascending=False)\n",
|
||
"dt_imp_df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"k ближайших соседей"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 42,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"CRS_DEP_TIME 655.000000\n",
|
||
"DEP_TIME 1055.000000\n",
|
||
"DEP_DELAY 240.000000\n",
|
||
"DEP_AFPH 90.800000\n",
|
||
"DEP_RFPH 0.890196\n",
|
||
"TAXI_OUT 35.000000\n",
|
||
"WHEELS_OFF 1130.000000\n",
|
||
"CRS_ELAPSED_TIME 259.000000\n",
|
||
"PCT_ELAPSED_TIME 1.084942\n",
|
||
"DISTANCE 1660.000000\n",
|
||
"CRS_ARR_TIME 914.000000\n",
|
||
"ARR_AFPH 40.434783\n",
|
||
"ARR_RFPH 1.064073\n",
|
||
"WEATHER_DELAY 0.000000\n",
|
||
"NAS_DELAY 22.000000\n",
|
||
"SECURITY_DELAY 0.000000\n",
|
||
"LATE_AIRCRAFT_DELAY 221.000000\n",
|
||
"DEP_MONTH 10.000000\n",
|
||
"DEP_DOW 4.000000\n",
|
||
"ORIGIN_HUB 1.000000\n",
|
||
"DEST_HUB 0.000000\n",
|
||
"Name: 721043, dtype: float64\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(X_test.loc[721043, :])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 43,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(array([[143.3160128 , 173.90740076, 192.66705727, 211.57109221,\n",
|
||
" 243.57211853, 259.61593993, 259.77507391]]),\n",
|
||
" array([[105172, 571912, 73409, 89450, 77474, 705972, 706911]]))"
|
||
]
|
||
},
|
||
"execution_count": 43,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"class_models[\"knn\"][\"fitted\"].kneighbors(\n",
|
||
" X_test.loc[721043, :].values.reshape(1, 21), 7\n",
|
||
")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 44,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"3813 0\n",
|
||
"229062 1\n",
|
||
"283316 0\n",
|
||
"385831 0\n",
|
||
"581905 1\n",
|
||
"726784 1\n",
|
||
"179364 0\n",
|
||
"Name: CARRIER_DELAY, dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 44,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"y_train_class.iloc[[105172, 571912, 73409, 89450, 77474, 705972, 706911]]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 45,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"'euclidean'"
|
||
]
|
||
},
|
||
"execution_count": 45,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"class_models[\"knn\"][\"fitted\"].effective_metric_"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"Гауссов наивный Байес"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 46,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([0.93871674, 0.06128326])"
|
||
]
|
||
},
|
||
"execution_count": 46,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"class_models[\"naive_bayes\"][\"fitted\"].class_prior_"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 47,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([[2.50123026e+05, 2.61324730e+05, 9.21572605e+02, 1.26123968e+03,\n",
|
||
" 2.08339528e-01, 9.58074414e+01, 2.62606651e+05, 6.30102550e+03,\n",
|
||
" 1.13475535e-02, 4.22470414e+05, 2.75433641e+05, 1.25314386e+03,\n",
|
||
" 3.48655340e-01, 1.11234714e+02, 1.91877186e+02, 2.80302201e+00,\n",
|
||
" 5.06561612e+02, 1.17346654e+01, 3.99122491e+00, 2.39015406e-01,\n",
|
||
" 2.34996222e-01],\n",
|
||
" [2.60629652e+05, 2.96009867e+05, 1.19307931e+04, 1.14839167e+03,\n",
|
||
" 1.99929921e+00, 1.20404927e+02, 3.08568277e+05, 6.29066219e+03,\n",
|
||
" 1.38936741e-02, 4.10198938e+05, 3.28574000e+05, 1.09023147e+03,\n",
|
||
" 3.08997044e+00, 7.79140423e+01, 1.56184090e+02, 9.12112286e-01,\n",
|
||
" 2.11279954e+03, 1.02712368e+01, 4.02943162e+00, 1.77750796e-01,\n",
|
||
" 2.50208354e-01]])"
|
||
]
|
||
},
|
||
"execution_count": 47,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"class_models[\"naive_bayes\"][\"fitted\"].var_"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 48,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([[1.30740577e+03, 1.31006271e+03, 5.14196506e+00, 5.45864877e+01,\n",
|
||
" 1.09377996e+00, 1.87120810e+01, 1.33552258e+03, 1.70734929e+02,\n",
|
||
" 9.71131781e-01, 1.01824369e+03, 1.48438931e+03, 5.39873058e+01,\n",
|
||
" 1.09644787e+00, 7.39971299e-01, 2.85434558e+00, 2.41814585e-02,\n",
|
||
" 4.14674395e+00, 6.55045281e+00, 2.95035528e+00, 6.06800513e-01,\n",
|
||
" 6.24199571e-01],\n",
|
||
" [1.41305545e+03, 1.48087887e+03, 8.45867640e+01, 6.14731036e+01,\n",
|
||
" 1.25429654e+00, 1.99378321e+01, 1.49409412e+03, 1.72229998e+02,\n",
|
||
" 9.83974416e-01, 1.04363666e+03, 1.54821862e+03, 4.26486417e+01,\n",
|
||
" 1.36373798e+00, 4.50733082e-01, 4.71991378e+00, 2.11281132e-02,\n",
|
||
" 1.40744819e+01, 6.73367907e+00, 3.04251232e+00, 7.69575517e-01,\n",
|
||
" 4.85391724e-01]])"
|
||
]
|
||
},
|
||
"execution_count": 48,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"class_models[\"naive_bayes\"][\"fitted\"].theta_"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"colab": {
|
||
"provenance": []
|
||
},
|
||
"kernelspec": {
|
||
"display_name": ".venv (3.11.12)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.12"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 0
|
||
}
|