Add additional example for Lec3

This commit is contained in:
Aleksey Filippov 2025-04-24 14:25:52 +04:00
parent 10ebe528bb
commit 7368833434

View File

@ -11,7 +11,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@ -65,7 +65,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@ -273,7 +273,7 @@
"cardio 0.000000 1.000000 1.000000 "
]
},
"execution_count": 20,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@ -293,7 +293,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 4,
"metadata": {
"id": "1BXW8--WKI3b"
},
@ -319,7 +319,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 5,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@ -348,7 +348,7 @@
" <td>Dependent Variable:</td> <td>cardio</td> <td>Pseudo R-squared:</td> <td>0.194</td> \n",
"</tr>\n",
"<tr>\n",
" <td>Date:</td> <td>2025-04-24 09:36</td> <td>AIC:</td> <td>66539.7930</td>\n",
" <td>Date:</td> <td>2025-04-24 12:35</td> <td>AIC:</td> <td>66539.7930</td>\n",
"</tr>\n",
"<tr>\n",
" <td>No. Observations:</td> <td>59500</td> <td>BIC:</td> <td>66647.7178</td>\n",
@ -417,7 +417,7 @@
"\\hline\n",
"Model: & Logit & Method: & MLE \\\\\n",
"Dependent Variable: & cardio & Pseudo R-squared: & 0.194 \\\\\n",
"Date: & 2025-04-24 09:36 & AIC: & 66539.7930 \\\\\n",
"Date: & 2025-04-24 12:35 & AIC: & 66539.7930 \\\\\n",
"No. Observations: & 59500 & BIC: & 66647.7178 \\\\\n",
"Df Model: & 11 & Log-Likelihood: & -33258. \\\\\n",
"Df Residuals: & 59488 & LL-Null: & -41242. \\\\\n",
@ -457,7 +457,7 @@
"=================================================================\n",
"Model: Logit Method: MLE \n",
"Dependent Variable: cardio Pseudo R-squared: 0.194 \n",
"Date: 2025-04-24 09:36 AIC: 66539.7930\n",
"Date: 2025-04-24 12:35 AIC: 66539.7930\n",
"No. Observations: 59500 BIC: 66647.7178\n",
"Df Model: 11 Log-Likelihood: -33258. \n",
"Df Residuals: 59488 LL-Null: -41242. \n",
@ -483,7 +483,7 @@
"\"\"\""
]
},
"execution_count": 22,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@ -507,7 +507,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 6,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@ -534,7 +534,7 @@
"dtype: float64"
]
},
"execution_count": 23,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@ -545,6 +545,87 @@
"np.exp(log_result.params).sort_values(ascending=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Вычисление среднего квадратичного отклонения значений признаков"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"age 6.758844\n",
"gender 0.476715\n",
"height 7.821231\n",
"weight 13.472656\n",
"ap_hi 16.366878\n",
"ap_lo 9.071287\n",
"cholesterol 0.682134\n",
"gluc 0.571848\n",
"smoke 0.284051\n",
"alco 0.225918\n",
"active 0.397418\n",
"dtype: float64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.std(X_train, 0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Вычисление значимости признаков относительно текущей модели\n",
"\n",
"Признак const был добавлен искусственно и должен быть исключен из рассмотрения\n",
"\n",
"Признак gender имеет небольшую статистическую значимость (P>|z| = 0.6515, много больше 5 %) и поэтому исключается из рассмотрения"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ap_hi 0.839295\n",
"age 0.348905\n",
"cholesterol 0.337191\n",
"ap_lo 0.189439\n",
"weight 0.171091\n",
"active 0.087144\n",
"gluc 0.070209\n",
"smoke 0.047513\n",
"alco 0.043046\n",
"height 0.034657\n",
"dtype: float64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"coefs = log_result.params.drop(labels=[\"const\",\"gender\"])\n",
"stdv = np.std(X_train, 0).drop(labels=\"gender\")\n",
"abs(coefs * stdv).sort_values(ascending=False)"
]
},
{
"cell_type": "markdown",
"metadata": {
@ -691,7 +772,7 @@
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@ -929,7 +1010,7 @@
},
{
"cell_type": "code",
"execution_count": 46,
"execution_count": null,
"metadata": {},
"outputs": [
{