366 lines
12 KiB
Python
366 lines
12 KiB
Python
import enum
|
|
import sys
|
|
from functools import reduce
|
|
from operator import and_
|
|
from typing import Dict, List, Tuple
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
from skfuzzy.control.fuzzyvariable import FuzzyVariable
|
|
from skfuzzy.control.rule import Rule as FuzzyRule
|
|
from skfuzzy.control.term import Term
|
|
from sklearn.tree._tree import TREE_UNDEFINED # type: ignore
|
|
|
|
|
|
class ComparisonType(enum.Enum):
|
|
LESS = "<="
|
|
GREATER = ">"
|
|
EQUALS = "="
|
|
|
|
|
|
class RuleAtom:
|
|
def __init__(self, variable: str, type: ComparisonType, value: float) -> None:
|
|
self._variable = variable
|
|
self._type = type
|
|
self._value = value
|
|
|
|
def get_varaible(self) -> str:
|
|
return self._variable
|
|
|
|
def get_type(self) -> ComparisonType:
|
|
return self._type
|
|
|
|
def get_value(self) -> float:
|
|
return self._value
|
|
|
|
def __repr__(self) -> str:
|
|
return f"({self._variable} {self._type.value} {np.round(self._value, 3)})"
|
|
|
|
def __eq__(self, other: object) -> bool:
|
|
if id(self) == id(other):
|
|
return True
|
|
if not isinstance(other, RuleAtom):
|
|
return False
|
|
return (
|
|
self._variable == other._variable
|
|
and self._type == other._type
|
|
and self._value == other._value
|
|
)
|
|
|
|
|
|
class Rule:
|
|
def __init__(self, antecedent: List[RuleAtom], consequent: float) -> None:
|
|
self._antecedent = antecedent
|
|
self._consequent = consequent
|
|
|
|
def get_antecedent(self) -> List[RuleAtom]:
|
|
return self._antecedent
|
|
|
|
def set_antecedent(self, antecedent: List[RuleAtom]):
|
|
self._antecedent = []
|
|
self._antecedent.extend(antecedent)
|
|
|
|
def get_consequent(self) -> float:
|
|
return self._consequent
|
|
|
|
def set_consequent(self, value: float):
|
|
self._consequent = value
|
|
|
|
def __repr__(self) -> str:
|
|
return f"if {" and ".join([str(atom) for atom in self._antecedent])} -> {np.round(self._consequent, 3)}"
|
|
|
|
|
|
# https://mljar.com/blog/extract-rules-decision-tree/
|
|
def get_rules(tree, feature_names, classes=None) -> List[Rule]:
|
|
tree_ = tree.tree_
|
|
feature_name = [
|
|
feature_names[i] if i != TREE_UNDEFINED else "undefined!" for i in tree_.feature
|
|
]
|
|
|
|
rules: List[Rule] = []
|
|
antecedent: List[RuleAtom] = []
|
|
|
|
def recurse(node, antecedent, rules):
|
|
|
|
if tree_.feature[node] != TREE_UNDEFINED:
|
|
name = feature_name[node]
|
|
threshold = tree_.threshold[node]
|
|
p1, p2 = list(antecedent), list(antecedent)
|
|
p1.append(RuleAtom(name, ComparisonType.LESS, threshold))
|
|
recurse(tree_.children_left[node], p1, rules)
|
|
p2.append(RuleAtom(name, ComparisonType.GREATER, threshold))
|
|
recurse(tree_.children_right[node], p2, rules)
|
|
else:
|
|
if classes is None:
|
|
rules.append(Rule(antecedent, tree_.value[node][0][0]))
|
|
else:
|
|
value = np.argmax(tree_.value[node][0])
|
|
rules.append(Rule(antecedent, classes[value]))
|
|
|
|
recurse(0, antecedent, rules)
|
|
|
|
# sort by values
|
|
values = [rule.get_consequent() for rule in rules]
|
|
sorted_index = list(np.argpartition(values, 1))
|
|
rules = [rules[i] for i in sorted_index]
|
|
|
|
return rules
|
|
|
|
|
|
# from
|
|
# if (Al2O3 <= 0.175) and (TiO2 <= 0.175) and (T > 32.5) and (TiO2 <= 0.025)
|
|
# and (Al2O3 <= 0.025) and (T > 55.0) and (T > 62.5)
|
|
# to
|
|
# if (Al2O3 <= 0.175) and (TiO2 <= 0.175) and (T > 32.5)
|
|
# max(<=)
|
|
# min(>)
|
|
def normalise_rules(rules: List[Rule]) -> List[Rule]:
|
|
for rule in rules:
|
|
dict: Dict[str, Dict[ComparisonType, float]] = {}
|
|
new_antecedent: List[RuleAtom] = []
|
|
for atom in rule.get_antecedent():
|
|
old_value: float | None = dict.get(atom.get_varaible(), {}).get(
|
|
atom.get_type(), None
|
|
)
|
|
new_value = 0
|
|
if atom.get_type() == ComparisonType.GREATER:
|
|
new_value = min(
|
|
old_value if old_value is not None else sys.maxsize,
|
|
atom.get_value(),
|
|
)
|
|
if atom.get_type() == ComparisonType.LESS:
|
|
new_value = max(
|
|
old_value if old_value is not None else -sys.maxsize - 1,
|
|
atom.get_value(),
|
|
)
|
|
if dict.get(atom.get_varaible(), None) is None:
|
|
dict[atom.get_varaible()] = {}
|
|
dict[atom.get_varaible()][atom.get_type()] = new_value
|
|
for key_var, other in dict.items():
|
|
for key_type, value in other.items():
|
|
new_antecedent.append(RuleAtom(key_var, key_type, value))
|
|
rule.set_antecedent(new_antecedent)
|
|
return rules
|
|
|
|
|
|
def _is_same_rules(rule1: Rule, rule2: Rule) -> bool:
|
|
antecedent1 = rule1.get_antecedent()
|
|
antecedent2 = rule2.get_antecedent()
|
|
if len(antecedent1) != len(antecedent2):
|
|
return False
|
|
match: int = len([atom for atom in antecedent1 if atom not in antecedent2])
|
|
return match == 0
|
|
|
|
|
|
def _get_rules_accum(rules: List[Rule]) -> Dict[str, Dict[str, float]]:
|
|
accum_dict: Dict[str, Dict[str, float]] = {}
|
|
for rule in rules:
|
|
key = str(rule.get_antecedent())
|
|
if accum_dict.get(key, None) is None:
|
|
accum_dict[key] = {}
|
|
cv = accum_dict[key].get("V", 0)
|
|
cv += rule.get_consequent()
|
|
cc = accum_dict[key].get("C", 0)
|
|
cc += 1
|
|
accum_dict[key]["V"] = cv
|
|
accum_dict[key]["C"] = cc
|
|
return accum_dict
|
|
|
|
|
|
def _recalculate_consequents(
|
|
accum_dict: Dict[str, Dict[str, float]], rules: List[Rule]
|
|
) -> List[Rule]:
|
|
|
|
for rule in rules:
|
|
key: str = str(rule.get_antecedent())
|
|
value: float = accum_dict[key]["V"]
|
|
count: int = int(accum_dict[key]["C"])
|
|
if count == 1:
|
|
continue
|
|
rule.set_consequent(value / count)
|
|
return rules
|
|
|
|
|
|
def delete_same_rules(rules: List[Rule]) -> List[Rule]:
|
|
same_rules: List[int] = []
|
|
accum_dict: Dict[str, Dict[str, float]] = _get_rules_accum(rules)
|
|
for rule1_index, rule1 in enumerate(rules):
|
|
for rule2_index, rule2 in enumerate(rules):
|
|
if rule1_index >= rule2_index:
|
|
continue
|
|
if _is_same_rules(rule1, rule2):
|
|
same_rules.append(rule1_index)
|
|
break
|
|
cleared_rules = [
|
|
rule for index, rule in enumerate(rules) if index not in same_rules
|
|
]
|
|
return _recalculate_consequents(accum_dict, cleared_rules)
|
|
|
|
|
|
def get_features(rules: List[Rule], exclude: List[str] | None = None) -> List[str]:
|
|
atoms: List[str] = []
|
|
for rule in rules:
|
|
for atom in rule.get_antecedent():
|
|
if exclude is not None and atom.get_varaible() in exclude:
|
|
continue
|
|
if str(atom) in atoms:
|
|
continue
|
|
atoms.append(str(atom))
|
|
atoms.sort()
|
|
return atoms
|
|
|
|
|
|
def vectorize_rules(rules: List[Rule], features: List[str]) -> pd.DataFrame:
|
|
columns: List[str] = []
|
|
columns.append("rule")
|
|
columns.extend(features)
|
|
columns.append("consequent")
|
|
df = pd.DataFrame(columns=columns)
|
|
for rule in rules:
|
|
data = [str(rule)]
|
|
mask = np.isin(list(features), [str(atom) for atom in rule.get_antecedent()])
|
|
data = np.append(data, mask.astype(int))
|
|
data = np.append(data, rule.get_consequent())
|
|
df.loc[len(df)] = pd.Series(data=data, index=df.columns)
|
|
df = df.set_index("rule")
|
|
return df
|
|
|
|
|
|
def _get_clustered_rules(
|
|
rules: List[Rule], clusters_num: int, labels: np.ndarray
|
|
) -> List[List[Rule]]:
|
|
clustered_rules: List[List[Rule]] = []
|
|
for cluster_id in range(clusters_num):
|
|
cluster_indices = np.where(labels == cluster_id)[0]
|
|
clustered_rules.append([rules[idx] for idx in cluster_indices])
|
|
return clustered_rules
|
|
|
|
|
|
def _get_variables_minmax(X: pd.DataFrame) -> Dict[str, Tuple[float, float]]:
|
|
itervals: Dict[str, Tuple[float, float]] = {}
|
|
for column in X.columns:
|
|
itervals[column] = (X[column].min(), X[column].max())
|
|
return itervals
|
|
|
|
|
|
def _get_varibles_interval(
|
|
antecedent: List[RuleAtom],
|
|
) -> Dict[str, Tuple[float | None, float | None]]:
|
|
intervals: Dict[str, Tuple[float | None, float | None]] = {}
|
|
for atom in antecedent:
|
|
if intervals.get(atom.get_varaible(), None) is None:
|
|
intervals[atom.get_varaible()] = (None, None)
|
|
if atom.get_type() == ComparisonType.GREATER:
|
|
intervals[atom.get_varaible()] = (
|
|
atom.get_value(),
|
|
intervals[atom.get_varaible()][1],
|
|
)
|
|
if atom.get_type() == ComparisonType.LESS:
|
|
intervals[atom.get_varaible()] = (
|
|
intervals[atom.get_varaible()][0],
|
|
atom.get_value(),
|
|
)
|
|
return intervals
|
|
|
|
|
|
def simplify_and_group_rules(
|
|
X: pd.DataFrame, rules: List[Rule], clusters_num: int, clusters_labels: np.ndarray
|
|
):
|
|
minmax = _get_variables_minmax(X)
|
|
|
|
new_rules: List[List[Rule]] = []
|
|
for cluster in _get_clustered_rules(rules, clusters_num, clusters_labels):
|
|
cl_rules: List[Rule] = []
|
|
for rule in cluster:
|
|
intervals = _get_varibles_interval(rule.get_antecedent())
|
|
new_atoms = []
|
|
for key, value in intervals.items():
|
|
val: float = 0
|
|
if value[0] is None and value[1] is not None:
|
|
val = minmax[key][0]
|
|
if value[1] is None and value[0] is not None:
|
|
val = minmax[key][1]
|
|
if value[0] is not None and value[1] is not None:
|
|
val = (value[0] + value[1]) / 2
|
|
new_atoms.append(RuleAtom(key, ComparisonType.EQUALS, val))
|
|
cl_rules.append(Rule(new_atoms, rule.get_consequent()))
|
|
new_rules.append(cl_rules)
|
|
return new_rules
|
|
|
|
|
|
def _get_fuzzy_rule_atom(
|
|
fuzzy_variable: FuzzyVariable, value: float
|
|
) -> Tuple[Term, float]:
|
|
values = {}
|
|
for term in fuzzy_variable.terms:
|
|
mval = np.interp(value, fuzzy_variable.universe, fuzzy_variable[term].mf)
|
|
values[term] = mval
|
|
best_value = sorted(values.items(), key=lambda x: x[1], reverse=True)[0]
|
|
return (fuzzy_variable[best_value[0]], best_value[1])
|
|
|
|
|
|
def _get_fuzzy_rules(
|
|
rules: List[Rule], fuzzy_variables: Dict[str, FuzzyVariable]
|
|
) -> List[Tuple[List[RuleAtom], Term, float]]:
|
|
fuzzy_rules: List[Tuple[List[RuleAtom], Term, float]] = []
|
|
for rule in rules:
|
|
antecedent = []
|
|
for atom in rule.get_antecedent():
|
|
if fuzzy_variables.get(atom.get_varaible(), None) is None:
|
|
continue
|
|
antecedent.append(
|
|
_get_fuzzy_rule_atom(
|
|
fuzzy_variables[atom.get_varaible()], atom.get_value()
|
|
)
|
|
)
|
|
consequent = _get_fuzzy_rule_atom(
|
|
fuzzy_variables["consequent"], rule.get_consequent()
|
|
)[0]
|
|
fuzzy_rules.append(
|
|
(
|
|
# FuzzyRule(reduce(and_, [atom[0] for atom in antecedent]), consequent),
|
|
[atom[0] for atom in antecedent],
|
|
consequent,
|
|
sum([atom[1] for atom in antecedent]),
|
|
)
|
|
)
|
|
return fuzzy_rules
|
|
|
|
|
|
def _delete_same_fuzzy_rules(
|
|
rules_cluster: List[Tuple[List[RuleAtom], Term, float]]
|
|
) -> List[Tuple[List[RuleAtom], Term, float]]:
|
|
same_rules: List[int] = []
|
|
for rule1_index, rule1 in enumerate(rules_cluster):
|
|
for rule2_index, rule2 in enumerate(rules_cluster):
|
|
if rule1_index >= rule2_index:
|
|
continue
|
|
# Remove the same rules
|
|
if str(rule1[0]) == str(rule2[0]) and str(rule1[1]) == str(rule2[1]):
|
|
same_rules.append(rule1_index)
|
|
break
|
|
# If antecedents is equals, but consequents is not equals then
|
|
# Remove rule with the higher antecedent weight
|
|
if str(rule1[0]) == str(rule2[0]) and str(rule1[2]) <= str(rule2[2]):
|
|
same_rules.append(rule2_index)
|
|
break
|
|
if str(rule1[0]) == str(rule2[0]) and str(rule1[2]) > str(rule2[2]):
|
|
same_rules.append(rule1_index)
|
|
break
|
|
return [rule for index, rule in enumerate(rules_cluster) if index not in same_rules]
|
|
|
|
|
|
def get_fuzzy_rules(
|
|
clustered_rules: List[List[Rule]], fuzzy_variables: Dict[str, FuzzyVariable]
|
|
) -> List[FuzzyRule]:
|
|
fuzzy_rules: List[List[Tuple[List[RuleAtom], Term, float]]] = []
|
|
fuzzy_rules = [
|
|
_get_fuzzy_rules(rules, fuzzy_variables) for rules in clustered_rules
|
|
]
|
|
fuzzy_rules = [_delete_same_fuzzy_rules(cluster) for cluster in fuzzy_rules]
|
|
return [
|
|
FuzzyRule(reduce(and_, item[0]), item[1])
|
|
for cluster in fuzzy_rules
|
|
for item in cluster
|
|
]
|