357 KiB
357 KiB
In [17]:
import pickle
import pandas as pd
from sklearn import tree
model = pickle.load(open("data/dtree.model.sav", "rb"))
features = (
pd.read_csv("data/density_train.csv", sep=";", decimal=",")
.drop(["Density"], axis=1)
.columns.values.tolist()
)
rules = tree.export_text(model, feature_names=features)
print(rules)
In [18]:
from src.rules import get_rules
rules = get_rules(model, features)
display(len(rules))
rules
Out[18]:
In [19]:
from src.rules import normalise_rules
rules = normalise_rules(rules)
display(len(rules))
rules
Out[19]:
In [20]:
from src.rules import delete_same_rules
rules = delete_same_rules(rules)
display(len(rules))
for_cluster = rules.copy()
rules
Out[20]:
In [21]:
density_train = pd.read_csv("data/density_train.csv", sep=";", decimal=",")
density_test = pd.read_csv("data/density_test.csv", sep=";", decimal=",")
display(density_train.head(3))
display(density_test.head(3))
In [22]:
from src.rules import simplify_rules
rules = simplify_rules(density_train, rules)
rules
Out[22]:
In [23]:
import numpy as np
from skfuzzy import control as ctrl
import skfuzzy as fuzz
temp = ctrl.Antecedent(density_train["T"].sort_values().unique(), "temp")
al = ctrl.Antecedent(np.arange(0, 0.3, 0.005), "al")
ti = ctrl.Antecedent(np.arange(0, 0.3, 0.005), "ti")
density = ctrl.Consequent(np.arange(1.03, 1.22, 0.00001), "density")
temp.automf(3, variable_type="quant")
temp.view()
al.automf(3, variable_type="quant")
al.view()
ti.automf(3, variable_type="quant")
ti.view()
density.automf(5, variable_type="quant")
density.view()
In [24]:
from src.rules import get_fuzzy_rules
fuzzy_variables = {"Al2O3": al, "TiO2": ti, "T": temp, "consequent": density}
fuzzy_rules = get_fuzzy_rules(rules, fuzzy_variables)
fuzzy_cntrl = ctrl.ControlSystem(fuzzy_rules)
sim = ctrl.ControlSystemSimulation(fuzzy_cntrl, lenient=False)
display(len(fuzzy_rules))
fuzzy_rules
Out[24]:
In [25]:
sim.input["temp"] = 25
sim.input["al"] = 0.0
sim.input["ti"] = 0.0
sim.compute()
sim.print_state()
display(sim.output["density"])
density.view(sim=sim)
In [26]:
from sklearn import metrics
import math
def fuzzy_pred(row):
sim.input["temp"] = row["T"]
sim.input["al"] = row["Al2O3"]
sim.input["ti"] = row["TiO2"]
sim.compute()
return sim.output["density"]
def rmse(row):
return math.sqrt(metrics.mean_squared_error([row["Real"]], [row["Inferred"]]))
result_train = density_train.copy()
result_train["Real"] = result_train["Density"]
result_train["Inferred"] = result_train.apply(fuzzy_pred, axis=1)
result_train["RMSE"] = result_train.apply(rmse, axis=1)
result_train.head(15)
Out[26]:
In [27]:
result_test = density_test.copy()
result_test["Real"] = result_test["Density"]
result_test["Inferred"] = result_test.apply(fuzzy_pred, axis=1)
result_test["RMSE"] = result_test.apply(rmse, axis=1)
# result_test["RMSE"] = result_test["RMSE"].apply(lambda x: "{:,.4f}".format(x))
result_test = result_test.round({"RMSE": 3})
result_test
Out[27]:
In [28]:
rmetrics = {}
rmetrics["RMSE_train"] = math.sqrt(
metrics.mean_squared_error(result_train["Real"], result_train["Inferred"])
)
rmetrics["RMSE_test"] = math.sqrt(
metrics.mean_squared_error(result_test["Real"], result_test["Inferred"])
)
rmetrics["RMAE_test"] = math.sqrt(
metrics.mean_absolute_error(result_test["Real"], result_test["Inferred"])
)
rmetrics["R2_test"] = metrics.r2_score(result_test["Real"], result_test["Inferred"])
rmetrics
Out[28]:
In [29]:
from src.rules import get_features, vectorize_rules
features = get_features(for_cluster, ["T"])
print(features)
df_rules = vectorize_rules(for_cluster, features)
df_rules.head(5)
Out[29]:
In [30]:
from src.cluster_helper import draw_best_clusters_plot, get_best_clusters_num
random_state = 9
X = df_rules.copy()
X = X.drop(["consequent"], axis=1)
clusters_score = get_best_clusters_num(X, random_state)
display(clusters_score)
draw_best_clusters_plot(clusters_score)
clusters_num = sorted(clusters_score.items(), key=lambda x: x[1], reverse=True)[0][0]
display(f"The best clusters count is {clusters_num}")
In [31]:
from sklearn import cluster
from src.cluster_helper import print_cluster_result
kmeans = cluster.KMeans(n_clusters=clusters_num, random_state=random_state)
kmeans.fit(X)
print_cluster_result(X, clusters_num, kmeans.labels_)