356 KiB
356 KiB
In [1]:
import pickle
import pandas as pd
from sklearn import tree
model = pickle.load(open("data/dtree.model.sav", "rb"))
features = (
pd.read_csv("data/density_train.csv", sep=";", decimal=",")
.drop(["Density"], axis=1)
.columns.values.tolist()
)
rules = tree.export_text(model, feature_names=features)
print(rules)
In [2]:
from src.rules import get_rules
rules = get_rules(model, features)
display(len(rules))
rules
Out[2]:
In [3]:
from src.rules import normalise_rules
rules = normalise_rules(rules)
display(len(rules))
rules
Out[3]:
In [4]:
from src.rules import delete_same_rules
rules = delete_same_rules(rules)
display(len(rules))
rules
Out[4]:
In [5]:
from src.rules import get_features, vectorize_rules
features = get_features(rules, ["T"])
print(features)
df_rules = vectorize_rules(rules, features)
df_rules.head(5)
Out[5]:
In [6]:
from src.cluster_helper import draw_best_clusters_plot, get_best_clusters_num
random_state = 9
X = df_rules.copy()
X = X.drop(["consequent"], axis=1)
clusters_score = get_best_clusters_num(X, random_state)
display(clusters_score)
draw_best_clusters_plot(clusters_score)
clusters_num = sorted(clusters_score.items(), key=lambda x: x[1], reverse=True)[0][0]
display(f"The best clusters count is {clusters_num}")
In [7]:
from sklearn import cluster
from src.cluster_helper import print_cluster_result
kmeans = cluster.KMeans(n_clusters=clusters_num, random_state=random_state)
kmeans.fit(X)
print_cluster_result(X, clusters_num, kmeans.labels_)
In [8]:
density_train = pd.read_csv("data/density_train.csv", sep=";", decimal=",")
density_test = pd.read_csv("data/density_test.csv", sep=";", decimal=",")
display(density_train.head(3))
display(density_test.head(3))
In [9]:
from src.rules import simplify_and_group_rules
clustered_rules = simplify_and_group_rules(density_train, rules, clusters_num, kmeans.labels_)
clustered_rules
Out[9]:
In [10]:
import numpy as np
from skfuzzy import control as ctrl
import skfuzzy as fuzz
temp = ctrl.Antecedent(density_train["T"].sort_values().unique(), "temp")
al = ctrl.Antecedent(np.arange(0, 0.3, 0.005), "al")
ti = ctrl.Antecedent(np.arange(0, 0.3, 0.005), "ti")
density = ctrl.Consequent(np.arange(1.03, 1.22, 0.00001), "density")
temp.automf(3, variable_type="quant")
temp.view()
al.automf(3, variable_type="quant")
al.view()
ti.automf(3, variable_type="quant")
ti.view()
density.automf(5, variable_type="quant")
density.view()
In [11]:
from src.rules import get_fuzzy_rules
fuzzy_variables = {"Al2O3": al, "TiO2": ti, "T": temp, "consequent": density}
fuzzy_rules = get_fuzzy_rules(clustered_rules, fuzzy_variables)
fuzzy_cntrl = ctrl.ControlSystem(fuzzy_rules)
sim = ctrl.ControlSystemSimulation(fuzzy_cntrl, lenient=False)
display(len(fuzzy_rules))
fuzzy_rules
Out[11]:
In [12]:
sim.input["temp"] = 25
sim.input["al"] = 0.0
sim.input["ti"] = 0.0
sim.compute()
sim.print_state()
display(sim.output["density"])
density.view(sim=sim)
In [22]:
from sklearn import metrics
import math
def fuzzy_pred(row):
sim.input["temp"] = row["T"]
sim.input["al"] = row["Al2O3"]
sim.input["ti"] = row["TiO2"]
sim.compute()
return sim.output["density"]
def rmse(row):
return math.sqrt(metrics.mean_squared_error([row["Real"]], [row["Inferred"]]))
result_train = density_train.copy()
result_train["Real"] = result_train["Density"]
result_train["Inferred"] = result_train.apply(fuzzy_pred, axis=1)
result_train["RMSE"] = result_train.apply(rmse, axis=1)
result_train.head(15)
Out[22]:
In [26]:
result_test = density_test.copy()
result_test["Real"] = result_test["Density"]
result_test["Inferred"] = result_test.apply(fuzzy_pred, axis=1)
result_test["RMSE"] = result_test.apply(rmse, axis=1)
# result_test["RMSE"] = result_test["RMSE"].apply(lambda x: "{:,.4f}".format(x))
result_test = result_test.round({"RMSE": 3})
result_test
Out[26]:
In [25]:
rmetrics = {}
rmetrics["RMSE_train"] = math.sqrt(
metrics.mean_squared_error(result_train["Real"], result_train["Inferred"])
)
rmetrics["RMSE_test"] = math.sqrt(
metrics.mean_squared_error(result_test["Real"], result_test["Inferred"])
)
rmetrics["RMAE_test"] = math.sqrt(
metrics.mean_absolute_error(result_test["Real"], result_test["Inferred"])
)
rmetrics["R2_test"] = metrics.r2_score(result_test["Real"], result_test["Inferred"])
rmetrics
Out[25]: