This commit is contained in:
Aleksey Filippov 2024-12-13 10:00:11 +04:00
parent 9c09239b02
commit 94caec03d6
5 changed files with 4511 additions and 1 deletions

View File

@ -0,0 +1,18 @@
T;Al2O3;TiO2;Density
30;0;0;1,05696
55;0;0;1,04158
25;0,05;0;1,08438
30;0,05;0;1,08112
35;0,05;0;1,07781
40;0,05;0;1,07446
60;0,05;0;1,06053
35;0,3;0;1,17459
65;0,3;0;1,14812
45;0;0,05;1,07424
50;0;0,05;1,07075
55;0;0,05;1,06721
20;0;0,3;1,22417
30;0;0,3;1,2131
40;0;0,3;1,20265
60;0;0,3;1,18265
70;0;0,3;1,17261
1 T Al2O3 TiO2 Density
2 30 0 0 1,05696
3 55 0 0 1,04158
4 25 0,05 0 1,08438
5 30 0,05 0 1,08112
6 35 0,05 0 1,07781
7 40 0,05 0 1,07446
8 60 0,05 0 1,06053
9 35 0,3 0 1,17459
10 65 0,3 0 1,14812
11 45 0 0,05 1,07424
12 50 0 0,05 1,07075
13 55 0 0,05 1,06721
14 20 0 0,3 1,22417
15 30 0 0,3 1,2131
16 40 0 0,3 1,20265
17 60 0 0,3 1,18265
18 70 0 0,3 1,17261

View File

@ -0,0 +1,39 @@
T;Al2O3;TiO2;Density
20;0;0;1,0625
25;0;0;1,05979
35;0;0;1,05404
40;0;0;1,05103
45;0;0;1,04794
50;0;0;1,04477
60;0;0;1,03826
65;0;0;1,03484
70;0;0;1,03182
20;0,05;0;1,08755
45;0,05;0;1,07105
50;0,05;0;1,0676
55;0,05;0;1,06409
65;0,05;0;1,05691
70;0,05;0;1,05291
20;0,3;0;1,18861
25;0,3;0;1,18389
30;0,3;0;1,1792
40;0,3;0;1,17017
45;0,3;0;1,16572
50;0,3;0;1,16138
55;0,3;0;1,15668
60;0,3;0;1,15233
70;0,3;0;1,14414
20;0;0,05;1,09098
25;0;0,05;1,08775
30;0;0,05;1,08443
35;0;0,05;1,08108
40;0;0,05;1,07768
60;0;0,05;1,06362
65;0;0,05;1,05999
70;0;0,05;1,05601
25;0;0,3;1,2186
35;0;0,3;1,20776
45;0;0,3;1,19759
50;0;0,3;1,19268
55;0;0,3;1,18746
65;0;0,3;1,178
1 T Al2O3 TiO2 Density
2 20 0 0 1,0625
3 25 0 0 1,05979
4 35 0 0 1,05404
5 40 0 0 1,05103
6 45 0 0 1,04794
7 50 0 0 1,04477
8 60 0 0 1,03826
9 65 0 0 1,03484
10 70 0 0 1,03182
11 20 0,05 0 1,08755
12 45 0,05 0 1,07105
13 50 0,05 0 1,0676
14 55 0,05 0 1,06409
15 65 0,05 0 1,05691
16 70 0,05 0 1,05291
17 20 0,3 0 1,18861
18 25 0,3 0 1,18389
19 30 0,3 0 1,1792
20 40 0,3 0 1,17017
21 45 0,3 0 1,16572
22 50 0,3 0 1,16138
23 55 0,3 0 1,15668
24 60 0,3 0 1,15233
25 70 0,3 0 1,14414
26 20 0 0,05 1,09098
27 25 0 0,05 1,08775
28 30 0 0,05 1,08443
29 35 0 0,05 1,08108
30 40 0 0,05 1,07768
31 60 0 0,05 1,06362
32 65 0 0,05 1,05999
33 70 0 0,05 1,05601
34 25 0 0,3 1,2186
35 35 0 0,3 1,20776
36 45 0 0,3 1,19759
37 50 0 0,3 1,19268
38 55 0 0,3 1,18746
39 65 0 0,3 1,178

4367
lec4.ipynb Normal file

File diff suppressed because one or more lines are too long

27
src/transformers.py Normal file
View File

@ -0,0 +1,27 @@
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
class TitanicFeatures(BaseEstimator, TransformerMixin):
def __init__(self):
pass
def fit(self, X, y=None):
return self
def transform(self, X, y=None):
def get_title(name) -> str:
return name.split(",")[1].split(".")[0].strip()
def get_cabin_type(cabin) -> str:
if pd.isna(cabin):
return "unknown"
return cabin[0]
X["Is_married"] = [1 if get_title(name) == "Mrs" else 0 for name in X["Name"]]
X["Cabin_type"] = [get_cabin_type(cabin) for cabin in X["Cabin"]]
return X
def get_feature_names_out(self, features_in):
return np.append(features_in, ["Is_married", "Cabin_type"], axis=0)

View File

@ -1,8 +1,12 @@
from typing import Tuple
import math
from typing import Dict, Tuple
import numpy as np
import pandas as pd
from pandas import DataFrame
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
def split_stratified_into_train_val_test(
@ -77,3 +81,58 @@ def split_stratified_into_train_val_test(
assert len(df_input) == len(df_train) + len(df_val) + len(df_test)
return df_train, df_val, df_test, y_train, y_val, y_test
def run_classification(
model: Pipeline,
X_train: DataFrame,
X_test: DataFrame,
y_train: DataFrame,
y_test: DataFrame,
) -> Dict:
result = {}
y_train_predict = model.predict(X_train)
y_test_probs = model.predict_proba(X_test)[:, 1]
y_test_predict = np.where(y_test_probs > 0.5, 1, 0)
result["pipeline"] = model
result["probs"] = y_test_probs
result["preds"] = y_test_predict
result["Precision_train"] = metrics.precision_score(y_train, y_train_predict)
result["Precision_test"] = metrics.precision_score(y_test, y_test_predict)
result["Recall_train"] = metrics.recall_score(y_train, y_train_predict)
result["Recall_test"] = metrics.recall_score(y_test, y_test_predict)
result["Accuracy_train"] = metrics.accuracy_score(y_train, y_train_predict)
result["Accuracy_test"] = metrics.accuracy_score(y_test, y_test_predict)
result["ROC_AUC_test"] = metrics.roc_auc_score(y_test, y_test_probs)
result["F1_train"] = metrics.f1_score(y_train, y_train_predict)
result["F1_test"] = metrics.f1_score(y_test, y_test_predict)
result["MCC_test"] = metrics.matthews_corrcoef(y_test, y_test_predict)
result["Cohen_kappa_test"] = metrics.cohen_kappa_score(y_test, y_test_predict)
result["Confusion_matrix"] = metrics.confusion_matrix(y_test, y_test_predict)
return result
def run_regression(
model: Pipeline,
X_train: DataFrame,
X_test: DataFrame,
y_train: DataFrame,
y_test: DataFrame,
) -> Dict:
result = {}
y_train_pred = model.predict(X_train.values)
y_test_pred = model.predict(X_test.values)
result["fitted"] = model
result["train_preds"] = y_train_pred
result["preds"] = y_test_pred
result["RMSE_train"] = math.sqrt(metrics.mean_squared_error(y_train, y_train_pred))
result["RMSE_test"] = math.sqrt(metrics.mean_squared_error(y_test, y_test_pred))
result["RMAE_test"] = math.sqrt(metrics.mean_absolute_error(y_test, y_test_pred))
result["R2_test"] = metrics.r2_score(y_test, y_test_pred)
return result