Lec4
This commit is contained in:
parent
9c09239b02
commit
94caec03d6
18
data/density/density_test.csv
Normal file
18
data/density/density_test.csv
Normal file
@ -0,0 +1,18 @@
|
||||
T;Al2O3;TiO2;Density
|
||||
30;0;0;1,05696
|
||||
55;0;0;1,04158
|
||||
25;0,05;0;1,08438
|
||||
30;0,05;0;1,08112
|
||||
35;0,05;0;1,07781
|
||||
40;0,05;0;1,07446
|
||||
60;0,05;0;1,06053
|
||||
35;0,3;0;1,17459
|
||||
65;0,3;0;1,14812
|
||||
45;0;0,05;1,07424
|
||||
50;0;0,05;1,07075
|
||||
55;0;0,05;1,06721
|
||||
20;0;0,3;1,22417
|
||||
30;0;0,3;1,2131
|
||||
40;0;0,3;1,20265
|
||||
60;0;0,3;1,18265
|
||||
70;0;0,3;1,17261
|
|
39
data/density/density_train.csv
Normal file
39
data/density/density_train.csv
Normal file
@ -0,0 +1,39 @@
|
||||
T;Al2O3;TiO2;Density
|
||||
20;0;0;1,0625
|
||||
25;0;0;1,05979
|
||||
35;0;0;1,05404
|
||||
40;0;0;1,05103
|
||||
45;0;0;1,04794
|
||||
50;0;0;1,04477
|
||||
60;0;0;1,03826
|
||||
65;0;0;1,03484
|
||||
70;0;0;1,03182
|
||||
20;0,05;0;1,08755
|
||||
45;0,05;0;1,07105
|
||||
50;0,05;0;1,0676
|
||||
55;0,05;0;1,06409
|
||||
65;0,05;0;1,05691
|
||||
70;0,05;0;1,05291
|
||||
20;0,3;0;1,18861
|
||||
25;0,3;0;1,18389
|
||||
30;0,3;0;1,1792
|
||||
40;0,3;0;1,17017
|
||||
45;0,3;0;1,16572
|
||||
50;0,3;0;1,16138
|
||||
55;0,3;0;1,15668
|
||||
60;0,3;0;1,15233
|
||||
70;0,3;0;1,14414
|
||||
20;0;0,05;1,09098
|
||||
25;0;0,05;1,08775
|
||||
30;0;0,05;1,08443
|
||||
35;0;0,05;1,08108
|
||||
40;0;0,05;1,07768
|
||||
60;0;0,05;1,06362
|
||||
65;0;0,05;1,05999
|
||||
70;0;0,05;1,05601
|
||||
25;0;0,3;1,2186
|
||||
35;0;0,3;1,20776
|
||||
45;0;0,3;1,19759
|
||||
50;0;0,3;1,19268
|
||||
55;0;0,3;1,18746
|
||||
65;0;0,3;1,178
|
|
4367
lec4.ipynb
Normal file
4367
lec4.ipynb
Normal file
File diff suppressed because one or more lines are too long
27
src/transformers.py
Normal file
27
src/transformers.py
Normal file
@ -0,0 +1,27 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.base import BaseEstimator, TransformerMixin
|
||||
|
||||
|
||||
class TitanicFeatures(BaseEstimator, TransformerMixin):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def fit(self, X, y=None):
|
||||
return self
|
||||
|
||||
def transform(self, X, y=None):
|
||||
def get_title(name) -> str:
|
||||
return name.split(",")[1].split(".")[0].strip()
|
||||
|
||||
def get_cabin_type(cabin) -> str:
|
||||
if pd.isna(cabin):
|
||||
return "unknown"
|
||||
return cabin[0]
|
||||
|
||||
X["Is_married"] = [1 if get_title(name) == "Mrs" else 0 for name in X["Name"]]
|
||||
X["Cabin_type"] = [get_cabin_type(cabin) for cabin in X["Cabin"]]
|
||||
return X
|
||||
|
||||
def get_feature_names_out(self, features_in):
|
||||
return np.append(features_in, ["Is_married", "Cabin_type"], axis=0)
|
61
src/utils.py
61
src/utils.py
@ -1,8 +1,12 @@
|
||||
from typing import Tuple
|
||||
import math
|
||||
from typing import Dict, Tuple
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
from sklearn import metrics
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.pipeline import Pipeline
|
||||
|
||||
|
||||
def split_stratified_into_train_val_test(
|
||||
@ -77,3 +81,58 @@ def split_stratified_into_train_val_test(
|
||||
|
||||
assert len(df_input) == len(df_train) + len(df_val) + len(df_test)
|
||||
return df_train, df_val, df_test, y_train, y_val, y_test
|
||||
|
||||
|
||||
def run_classification(
|
||||
model: Pipeline,
|
||||
X_train: DataFrame,
|
||||
X_test: DataFrame,
|
||||
y_train: DataFrame,
|
||||
y_test: DataFrame,
|
||||
) -> Dict:
|
||||
result = {}
|
||||
y_train_predict = model.predict(X_train)
|
||||
y_test_probs = model.predict_proba(X_test)[:, 1]
|
||||
y_test_predict = np.where(y_test_probs > 0.5, 1, 0)
|
||||
|
||||
result["pipeline"] = model
|
||||
result["probs"] = y_test_probs
|
||||
result["preds"] = y_test_predict
|
||||
|
||||
result["Precision_train"] = metrics.precision_score(y_train, y_train_predict)
|
||||
result["Precision_test"] = metrics.precision_score(y_test, y_test_predict)
|
||||
result["Recall_train"] = metrics.recall_score(y_train, y_train_predict)
|
||||
result["Recall_test"] = metrics.recall_score(y_test, y_test_predict)
|
||||
result["Accuracy_train"] = metrics.accuracy_score(y_train, y_train_predict)
|
||||
result["Accuracy_test"] = metrics.accuracy_score(y_test, y_test_predict)
|
||||
result["ROC_AUC_test"] = metrics.roc_auc_score(y_test, y_test_probs)
|
||||
result["F1_train"] = metrics.f1_score(y_train, y_train_predict)
|
||||
result["F1_test"] = metrics.f1_score(y_test, y_test_predict)
|
||||
result["MCC_test"] = metrics.matthews_corrcoef(y_test, y_test_predict)
|
||||
result["Cohen_kappa_test"] = metrics.cohen_kappa_score(y_test, y_test_predict)
|
||||
result["Confusion_matrix"] = metrics.confusion_matrix(y_test, y_test_predict)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def run_regression(
|
||||
model: Pipeline,
|
||||
X_train: DataFrame,
|
||||
X_test: DataFrame,
|
||||
y_train: DataFrame,
|
||||
y_test: DataFrame,
|
||||
) -> Dict:
|
||||
result = {}
|
||||
y_train_pred = model.predict(X_train.values)
|
||||
y_test_pred = model.predict(X_test.values)
|
||||
|
||||
result["fitted"] = model
|
||||
result["train_preds"] = y_train_pred
|
||||
result["preds"] = y_test_pred
|
||||
|
||||
result["RMSE_train"] = math.sqrt(metrics.mean_squared_error(y_train, y_train_pred))
|
||||
result["RMSE_test"] = math.sqrt(metrics.mean_squared_error(y_test, y_test_pred))
|
||||
result["RMAE_test"] = math.sqrt(metrics.mean_absolute_error(y_test, y_test_pred))
|
||||
result["R2_test"] = metrics.r2_score(y_test, y_test_pred)
|
||||
|
||||
return result
|
||||
|
Loading…
Reference in New Issue
Block a user