This commit is contained in:
Aleksey Filippov 2024-12-13 10:00:11 +04:00
parent 9c09239b02
commit 94caec03d6
5 changed files with 4511 additions and 1 deletions

View File

@ -0,0 +1,18 @@
T;Al2O3;TiO2;Density
30;0;0;1,05696
55;0;0;1,04158
25;0,05;0;1,08438
30;0,05;0;1,08112
35;0,05;0;1,07781
40;0,05;0;1,07446
60;0,05;0;1,06053
35;0,3;0;1,17459
65;0,3;0;1,14812
45;0;0,05;1,07424
50;0;0,05;1,07075
55;0;0,05;1,06721
20;0;0,3;1,22417
30;0;0,3;1,2131
40;0;0,3;1,20265
60;0;0,3;1,18265
70;0;0,3;1,17261
1 T Al2O3 TiO2 Density
2 30 0 0 1,05696
3 55 0 0 1,04158
4 25 0,05 0 1,08438
5 30 0,05 0 1,08112
6 35 0,05 0 1,07781
7 40 0,05 0 1,07446
8 60 0,05 0 1,06053
9 35 0,3 0 1,17459
10 65 0,3 0 1,14812
11 45 0 0,05 1,07424
12 50 0 0,05 1,07075
13 55 0 0,05 1,06721
14 20 0 0,3 1,22417
15 30 0 0,3 1,2131
16 40 0 0,3 1,20265
17 60 0 0,3 1,18265
18 70 0 0,3 1,17261

View File

@ -0,0 +1,39 @@
T;Al2O3;TiO2;Density
20;0;0;1,0625
25;0;0;1,05979
35;0;0;1,05404
40;0;0;1,05103
45;0;0;1,04794
50;0;0;1,04477
60;0;0;1,03826
65;0;0;1,03484
70;0;0;1,03182
20;0,05;0;1,08755
45;0,05;0;1,07105
50;0,05;0;1,0676
55;0,05;0;1,06409
65;0,05;0;1,05691
70;0,05;0;1,05291
20;0,3;0;1,18861
25;0,3;0;1,18389
30;0,3;0;1,1792
40;0,3;0;1,17017
45;0,3;0;1,16572
50;0,3;0;1,16138
55;0,3;0;1,15668
60;0,3;0;1,15233
70;0,3;0;1,14414
20;0;0,05;1,09098
25;0;0,05;1,08775
30;0;0,05;1,08443
35;0;0,05;1,08108
40;0;0,05;1,07768
60;0;0,05;1,06362
65;0;0,05;1,05999
70;0;0,05;1,05601
25;0;0,3;1,2186
35;0;0,3;1,20776
45;0;0,3;1,19759
50;0;0,3;1,19268
55;0;0,3;1,18746
65;0;0,3;1,178
1 T Al2O3 TiO2 Density
2 20 0 0 1,0625
3 25 0 0 1,05979
4 35 0 0 1,05404
5 40 0 0 1,05103
6 45 0 0 1,04794
7 50 0 0 1,04477
8 60 0 0 1,03826
9 65 0 0 1,03484
10 70 0 0 1,03182
11 20 0,05 0 1,08755
12 45 0,05 0 1,07105
13 50 0,05 0 1,0676
14 55 0,05 0 1,06409
15 65 0,05 0 1,05691
16 70 0,05 0 1,05291
17 20 0,3 0 1,18861
18 25 0,3 0 1,18389
19 30 0,3 0 1,1792
20 40 0,3 0 1,17017
21 45 0,3 0 1,16572
22 50 0,3 0 1,16138
23 55 0,3 0 1,15668
24 60 0,3 0 1,15233
25 70 0,3 0 1,14414
26 20 0 0,05 1,09098
27 25 0 0,05 1,08775
28 30 0 0,05 1,08443
29 35 0 0,05 1,08108
30 40 0 0,05 1,07768
31 60 0 0,05 1,06362
32 65 0 0,05 1,05999
33 70 0 0,05 1,05601
34 25 0 0,3 1,2186
35 35 0 0,3 1,20776
36 45 0 0,3 1,19759
37 50 0 0,3 1,19268
38 55 0 0,3 1,18746
39 65 0 0,3 1,178

4367
lec4.ipynb Normal file

File diff suppressed because one or more lines are too long

27
src/transformers.py Normal file
View File

@ -0,0 +1,27 @@
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
class TitanicFeatures(BaseEstimator, TransformerMixin):
def __init__(self):
pass
def fit(self, X, y=None):
return self
def transform(self, X, y=None):
def get_title(name) -> str:
return name.split(",")[1].split(".")[0].strip()
def get_cabin_type(cabin) -> str:
if pd.isna(cabin):
return "unknown"
return cabin[0]
X["Is_married"] = [1 if get_title(name) == "Mrs" else 0 for name in X["Name"]]
X["Cabin_type"] = [get_cabin_type(cabin) for cabin in X["Cabin"]]
return X
def get_feature_names_out(self, features_in):
return np.append(features_in, ["Is_married", "Cabin_type"], axis=0)

View File

@ -1,8 +1,12 @@
from typing import Tuple import math
from typing import Dict, Tuple
import numpy as np
import pandas as pd import pandas as pd
from pandas import DataFrame from pandas import DataFrame
from sklearn import metrics
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
def split_stratified_into_train_val_test( def split_stratified_into_train_val_test(
@ -77,3 +81,58 @@ def split_stratified_into_train_val_test(
assert len(df_input) == len(df_train) + len(df_val) + len(df_test) assert len(df_input) == len(df_train) + len(df_val) + len(df_test)
return df_train, df_val, df_test, y_train, y_val, y_test return df_train, df_val, df_test, y_train, y_val, y_test
def run_classification(
model: Pipeline,
X_train: DataFrame,
X_test: DataFrame,
y_train: DataFrame,
y_test: DataFrame,
) -> Dict:
result = {}
y_train_predict = model.predict(X_train)
y_test_probs = model.predict_proba(X_test)[:, 1]
y_test_predict = np.where(y_test_probs > 0.5, 1, 0)
result["pipeline"] = model
result["probs"] = y_test_probs
result["preds"] = y_test_predict
result["Precision_train"] = metrics.precision_score(y_train, y_train_predict)
result["Precision_test"] = metrics.precision_score(y_test, y_test_predict)
result["Recall_train"] = metrics.recall_score(y_train, y_train_predict)
result["Recall_test"] = metrics.recall_score(y_test, y_test_predict)
result["Accuracy_train"] = metrics.accuracy_score(y_train, y_train_predict)
result["Accuracy_test"] = metrics.accuracy_score(y_test, y_test_predict)
result["ROC_AUC_test"] = metrics.roc_auc_score(y_test, y_test_probs)
result["F1_train"] = metrics.f1_score(y_train, y_train_predict)
result["F1_test"] = metrics.f1_score(y_test, y_test_predict)
result["MCC_test"] = metrics.matthews_corrcoef(y_test, y_test_predict)
result["Cohen_kappa_test"] = metrics.cohen_kappa_score(y_test, y_test_predict)
result["Confusion_matrix"] = metrics.confusion_matrix(y_test, y_test_predict)
return result
def run_regression(
model: Pipeline,
X_train: DataFrame,
X_test: DataFrame,
y_train: DataFrame,
y_test: DataFrame,
) -> Dict:
result = {}
y_train_pred = model.predict(X_train.values)
y_test_pred = model.predict(X_test.values)
result["fitted"] = model
result["train_preds"] = y_train_pred
result["preds"] = y_test_pred
result["RMSE_train"] = math.sqrt(metrics.mean_squared_error(y_train, y_train_pred))
result["RMSE_test"] = math.sqrt(metrics.mean_squared_error(y_test, y_test_pred))
result["RMAE_test"] = math.sqrt(metrics.mean_absolute_error(y_test, y_test_pred))
result["R2_test"] = metrics.r2_score(y_test, y_test_pred)
return result