Lec4
This commit is contained in:
parent
9c09239b02
commit
94caec03d6
18
data/density/density_test.csv
Normal file
18
data/density/density_test.csv
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
T;Al2O3;TiO2;Density
|
||||||
|
30;0;0;1,05696
|
||||||
|
55;0;0;1,04158
|
||||||
|
25;0,05;0;1,08438
|
||||||
|
30;0,05;0;1,08112
|
||||||
|
35;0,05;0;1,07781
|
||||||
|
40;0,05;0;1,07446
|
||||||
|
60;0,05;0;1,06053
|
||||||
|
35;0,3;0;1,17459
|
||||||
|
65;0,3;0;1,14812
|
||||||
|
45;0;0,05;1,07424
|
||||||
|
50;0;0,05;1,07075
|
||||||
|
55;0;0,05;1,06721
|
||||||
|
20;0;0,3;1,22417
|
||||||
|
30;0;0,3;1,2131
|
||||||
|
40;0;0,3;1,20265
|
||||||
|
60;0;0,3;1,18265
|
||||||
|
70;0;0,3;1,17261
|
|
39
data/density/density_train.csv
Normal file
39
data/density/density_train.csv
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
T;Al2O3;TiO2;Density
|
||||||
|
20;0;0;1,0625
|
||||||
|
25;0;0;1,05979
|
||||||
|
35;0;0;1,05404
|
||||||
|
40;0;0;1,05103
|
||||||
|
45;0;0;1,04794
|
||||||
|
50;0;0;1,04477
|
||||||
|
60;0;0;1,03826
|
||||||
|
65;0;0;1,03484
|
||||||
|
70;0;0;1,03182
|
||||||
|
20;0,05;0;1,08755
|
||||||
|
45;0,05;0;1,07105
|
||||||
|
50;0,05;0;1,0676
|
||||||
|
55;0,05;0;1,06409
|
||||||
|
65;0,05;0;1,05691
|
||||||
|
70;0,05;0;1,05291
|
||||||
|
20;0,3;0;1,18861
|
||||||
|
25;0,3;0;1,18389
|
||||||
|
30;0,3;0;1,1792
|
||||||
|
40;0,3;0;1,17017
|
||||||
|
45;0,3;0;1,16572
|
||||||
|
50;0,3;0;1,16138
|
||||||
|
55;0,3;0;1,15668
|
||||||
|
60;0,3;0;1,15233
|
||||||
|
70;0,3;0;1,14414
|
||||||
|
20;0;0,05;1,09098
|
||||||
|
25;0;0,05;1,08775
|
||||||
|
30;0;0,05;1,08443
|
||||||
|
35;0;0,05;1,08108
|
||||||
|
40;0;0,05;1,07768
|
||||||
|
60;0;0,05;1,06362
|
||||||
|
65;0;0,05;1,05999
|
||||||
|
70;0;0,05;1,05601
|
||||||
|
25;0;0,3;1,2186
|
||||||
|
35;0;0,3;1,20776
|
||||||
|
45;0;0,3;1,19759
|
||||||
|
50;0;0,3;1,19268
|
||||||
|
55;0;0,3;1,18746
|
||||||
|
65;0;0,3;1,178
|
|
4367
lec4.ipynb
Normal file
4367
lec4.ipynb
Normal file
File diff suppressed because one or more lines are too long
27
src/transformers.py
Normal file
27
src/transformers.py
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from sklearn.base import BaseEstimator, TransformerMixin
|
||||||
|
|
||||||
|
|
||||||
|
class TitanicFeatures(BaseEstimator, TransformerMixin):
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def fit(self, X, y=None):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def transform(self, X, y=None):
|
||||||
|
def get_title(name) -> str:
|
||||||
|
return name.split(",")[1].split(".")[0].strip()
|
||||||
|
|
||||||
|
def get_cabin_type(cabin) -> str:
|
||||||
|
if pd.isna(cabin):
|
||||||
|
return "unknown"
|
||||||
|
return cabin[0]
|
||||||
|
|
||||||
|
X["Is_married"] = [1 if get_title(name) == "Mrs" else 0 for name in X["Name"]]
|
||||||
|
X["Cabin_type"] = [get_cabin_type(cabin) for cabin in X["Cabin"]]
|
||||||
|
return X
|
||||||
|
|
||||||
|
def get_feature_names_out(self, features_in):
|
||||||
|
return np.append(features_in, ["Is_married", "Cabin_type"], axis=0)
|
61
src/utils.py
61
src/utils.py
@ -1,8 +1,12 @@
|
|||||||
from typing import Tuple
|
import math
|
||||||
|
from typing import Dict, Tuple
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pandas import DataFrame
|
from pandas import DataFrame
|
||||||
|
from sklearn import metrics
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.pipeline import Pipeline
|
||||||
|
|
||||||
|
|
||||||
def split_stratified_into_train_val_test(
|
def split_stratified_into_train_val_test(
|
||||||
@ -77,3 +81,58 @@ def split_stratified_into_train_val_test(
|
|||||||
|
|
||||||
assert len(df_input) == len(df_train) + len(df_val) + len(df_test)
|
assert len(df_input) == len(df_train) + len(df_val) + len(df_test)
|
||||||
return df_train, df_val, df_test, y_train, y_val, y_test
|
return df_train, df_val, df_test, y_train, y_val, y_test
|
||||||
|
|
||||||
|
|
||||||
|
def run_classification(
|
||||||
|
model: Pipeline,
|
||||||
|
X_train: DataFrame,
|
||||||
|
X_test: DataFrame,
|
||||||
|
y_train: DataFrame,
|
||||||
|
y_test: DataFrame,
|
||||||
|
) -> Dict:
|
||||||
|
result = {}
|
||||||
|
y_train_predict = model.predict(X_train)
|
||||||
|
y_test_probs = model.predict_proba(X_test)[:, 1]
|
||||||
|
y_test_predict = np.where(y_test_probs > 0.5, 1, 0)
|
||||||
|
|
||||||
|
result["pipeline"] = model
|
||||||
|
result["probs"] = y_test_probs
|
||||||
|
result["preds"] = y_test_predict
|
||||||
|
|
||||||
|
result["Precision_train"] = metrics.precision_score(y_train, y_train_predict)
|
||||||
|
result["Precision_test"] = metrics.precision_score(y_test, y_test_predict)
|
||||||
|
result["Recall_train"] = metrics.recall_score(y_train, y_train_predict)
|
||||||
|
result["Recall_test"] = metrics.recall_score(y_test, y_test_predict)
|
||||||
|
result["Accuracy_train"] = metrics.accuracy_score(y_train, y_train_predict)
|
||||||
|
result["Accuracy_test"] = metrics.accuracy_score(y_test, y_test_predict)
|
||||||
|
result["ROC_AUC_test"] = metrics.roc_auc_score(y_test, y_test_probs)
|
||||||
|
result["F1_train"] = metrics.f1_score(y_train, y_train_predict)
|
||||||
|
result["F1_test"] = metrics.f1_score(y_test, y_test_predict)
|
||||||
|
result["MCC_test"] = metrics.matthews_corrcoef(y_test, y_test_predict)
|
||||||
|
result["Cohen_kappa_test"] = metrics.cohen_kappa_score(y_test, y_test_predict)
|
||||||
|
result["Confusion_matrix"] = metrics.confusion_matrix(y_test, y_test_predict)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def run_regression(
|
||||||
|
model: Pipeline,
|
||||||
|
X_train: DataFrame,
|
||||||
|
X_test: DataFrame,
|
||||||
|
y_train: DataFrame,
|
||||||
|
y_test: DataFrame,
|
||||||
|
) -> Dict:
|
||||||
|
result = {}
|
||||||
|
y_train_pred = model.predict(X_train.values)
|
||||||
|
y_test_pred = model.predict(X_test.values)
|
||||||
|
|
||||||
|
result["fitted"] = model
|
||||||
|
result["train_preds"] = y_train_pred
|
||||||
|
result["preds"] = y_test_pred
|
||||||
|
|
||||||
|
result["RMSE_train"] = math.sqrt(metrics.mean_squared_error(y_train, y_train_pred))
|
||||||
|
result["RMSE_test"] = math.sqrt(metrics.mean_squared_error(y_test, y_test_pred))
|
||||||
|
result["RMAE_test"] = math.sqrt(metrics.mean_absolute_error(y_test, y_test_pred))
|
||||||
|
result["R2_test"] = metrics.r2_score(y_test, y_test_pred)
|
||||||
|
|
||||||
|
return result
|
||||||
|
Loading…
Reference in New Issue
Block a user