Add additional example for Lec3

This commit is contained in:
Aleksey Filippov 2025-04-24 14:25:52 +04:00
parent 10ebe528bb
commit 7368833434

View File

@ -11,7 +11,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 2,
"metadata": { "metadata": {
"colab": { "colab": {
"base_uri": "https://localhost:8080/" "base_uri": "https://localhost:8080/"
@ -65,7 +65,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 20, "execution_count": 3,
"metadata": { "metadata": {
"colab": { "colab": {
"base_uri": "https://localhost:8080/", "base_uri": "https://localhost:8080/",
@ -273,7 +273,7 @@
"cardio 0.000000 1.000000 1.000000 " "cardio 0.000000 1.000000 1.000000 "
] ]
}, },
"execution_count": 20, "execution_count": 3,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -293,7 +293,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 21, "execution_count": 4,
"metadata": { "metadata": {
"id": "1BXW8--WKI3b" "id": "1BXW8--WKI3b"
}, },
@ -319,7 +319,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 22, "execution_count": 5,
"metadata": { "metadata": {
"colab": { "colab": {
"base_uri": "https://localhost:8080/" "base_uri": "https://localhost:8080/"
@ -348,7 +348,7 @@
" <td>Dependent Variable:</td> <td>cardio</td> <td>Pseudo R-squared:</td> <td>0.194</td> \n", " <td>Dependent Variable:</td> <td>cardio</td> <td>Pseudo R-squared:</td> <td>0.194</td> \n",
"</tr>\n", "</tr>\n",
"<tr>\n", "<tr>\n",
" <td>Date:</td> <td>2025-04-24 09:36</td> <td>AIC:</td> <td>66539.7930</td>\n", " <td>Date:</td> <td>2025-04-24 12:35</td> <td>AIC:</td> <td>66539.7930</td>\n",
"</tr>\n", "</tr>\n",
"<tr>\n", "<tr>\n",
" <td>No. Observations:</td> <td>59500</td> <td>BIC:</td> <td>66647.7178</td>\n", " <td>No. Observations:</td> <td>59500</td> <td>BIC:</td> <td>66647.7178</td>\n",
@ -417,7 +417,7 @@
"\\hline\n", "\\hline\n",
"Model: & Logit & Method: & MLE \\\\\n", "Model: & Logit & Method: & MLE \\\\\n",
"Dependent Variable: & cardio & Pseudo R-squared: & 0.194 \\\\\n", "Dependent Variable: & cardio & Pseudo R-squared: & 0.194 \\\\\n",
"Date: & 2025-04-24 09:36 & AIC: & 66539.7930 \\\\\n", "Date: & 2025-04-24 12:35 & AIC: & 66539.7930 \\\\\n",
"No. Observations: & 59500 & BIC: & 66647.7178 \\\\\n", "No. Observations: & 59500 & BIC: & 66647.7178 \\\\\n",
"Df Model: & 11 & Log-Likelihood: & -33258. \\\\\n", "Df Model: & 11 & Log-Likelihood: & -33258. \\\\\n",
"Df Residuals: & 59488 & LL-Null: & -41242. \\\\\n", "Df Residuals: & 59488 & LL-Null: & -41242. \\\\\n",
@ -457,7 +457,7 @@
"=================================================================\n", "=================================================================\n",
"Model: Logit Method: MLE \n", "Model: Logit Method: MLE \n",
"Dependent Variable: cardio Pseudo R-squared: 0.194 \n", "Dependent Variable: cardio Pseudo R-squared: 0.194 \n",
"Date: 2025-04-24 09:36 AIC: 66539.7930\n", "Date: 2025-04-24 12:35 AIC: 66539.7930\n",
"No. Observations: 59500 BIC: 66647.7178\n", "No. Observations: 59500 BIC: 66647.7178\n",
"Df Model: 11 Log-Likelihood: -33258. \n", "Df Model: 11 Log-Likelihood: -33258. \n",
"Df Residuals: 59488 LL-Null: -41242. \n", "Df Residuals: 59488 LL-Null: -41242. \n",
@ -483,7 +483,7 @@
"\"\"\"" "\"\"\""
] ]
}, },
"execution_count": 22, "execution_count": 5,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -507,7 +507,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 23, "execution_count": 6,
"metadata": { "metadata": {
"colab": { "colab": {
"base_uri": "https://localhost:8080/" "base_uri": "https://localhost:8080/"
@ -534,7 +534,7 @@
"dtype: float64" "dtype: float64"
] ]
}, },
"execution_count": 23, "execution_count": 6,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -545,6 +545,87 @@
"np.exp(log_result.params).sort_values(ascending=False)" "np.exp(log_result.params).sort_values(ascending=False)"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Вычисление среднего квадратичного отклонения значений признаков"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"age 6.758844\n",
"gender 0.476715\n",
"height 7.821231\n",
"weight 13.472656\n",
"ap_hi 16.366878\n",
"ap_lo 9.071287\n",
"cholesterol 0.682134\n",
"gluc 0.571848\n",
"smoke 0.284051\n",
"alco 0.225918\n",
"active 0.397418\n",
"dtype: float64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.std(X_train, 0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Вычисление значимости признаков относительно текущей модели\n",
"\n",
"Признак const был добавлен искусственно и должен быть исключен из рассмотрения\n",
"\n",
"Признак gender имеет небольшую статистическую значимость (P>|z| = 0.6515, много больше 5 %) и поэтому исключается из рассмотрения"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ap_hi 0.839295\n",
"age 0.348905\n",
"cholesterol 0.337191\n",
"ap_lo 0.189439\n",
"weight 0.171091\n",
"active 0.087144\n",
"gluc 0.070209\n",
"smoke 0.047513\n",
"alco 0.043046\n",
"height 0.034657\n",
"dtype: float64"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"coefs = log_result.params.drop(labels=[\"const\",\"gender\"])\n",
"stdv = np.std(X_train, 0).drop(labels=\"gender\")\n",
"abs(coefs * stdv).sort_values(ascending=False)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": { "metadata": {
@ -691,7 +772,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 40, "execution_count": null,
"metadata": { "metadata": {
"colab": { "colab": {
"base_uri": "https://localhost:8080/", "base_uri": "https://localhost:8080/",
@ -929,7 +1010,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 46, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {