From bb983b53dda603fa83802a19ddff6866cc6398b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Tue, 18 Sep 2018 14:59:03 -0300 Subject: [PATCH] Fuzzyfy optimization with binary search. Several minor parameter refactorings --- pyFTS/common/FuzzySet.py | 156 +++++++++++++++++++--------- pyFTS/common/Membership.py | 11 ++ pyFTS/models/hofts.py | 6 +- pyFTS/models/multivariate/common.py | 8 +- pyFTS/models/pwfts.py | 4 +- pyFTS/tests/general.py | 13 ++- 6 files changed, 137 insertions(+), 61 deletions(-) diff --git a/pyFTS/common/FuzzySet.py b/pyFTS/common/FuzzySet.py index 69168d2..0ca7467 100644 --- a/pyFTS/common/FuzzySet.py +++ b/pyFTS/common/FuzzySet.py @@ -63,134 +63,192 @@ class FuzzySet(object): return self.name + ": " + str(self.mf.__name__) + "(" + str(self.parameters) + ")" -def set_ordered(fuzzySets): - """Order a fuzzy set list by their centroids""" - if len(fuzzySets) > 0: - tmp1 = [fuzzySets[k] for k in fuzzySets.keys()] +def __binary_search(x, fuzzy_sets, ordered_sets): + """ + Search for elegible fuzzy sets to fuzzyfy x + + :param x: input value to be fuzzyfied + :param fuzzy_sets: a dictionary where the key is the fuzzy set name and the value is the fuzzy set object. + :param ordered_sets: a list with the fuzzy sets names ordered by their centroids. + :return: A list with the best fuzzy sets that may contain x + """ + max_len = len(fuzzy_sets) + first = 0 + last = max_len + + while first <= last: + midpoint = (first + last) // 2 + fs = ordered_sets[midpoint] + fs1 = ordered_sets[midpoint - 1] if midpoint > 0 else ordered_sets[0] + fs2 = ordered_sets[midpoint + 1] if midpoint < max_len else ordered_sets[max_len] + if fuzzy_sets[fs1].centroid <= x <= fuzzy_sets[fs2].centroid: + return (midpoint-1, midpoint, midpoint+1) + else: + if x < fuzzy_sets[fs].centroid: + last = midpoint - 1 + else: + first = midpoint + 1 + + +def fuzzyfy(data, partitioner, **kwargs): + alpha_cut = kwargs.get('alpha_cut', 0.) + mode = kwargs.get('mode', 'sets') + method = kwargs.get('method', 'fuzzy') + if isinstance(data, (list, np.ndarray)): + if mode == 'vector': + return fuzzyfy_instances(data, partitioner.sets, partitioner.ordered_sets) + else: + return fuzzyfy_series(data, partitioner.sets, method, alpha_cut, partitioner.ordered_sets) + else: + if mode == 'vector': + return fuzzyfy_instance(data, partitioner.sets, partitioner.ordered_sets) + else: + return get_fuzzysets(data, partitioner.sets, partitioner.ordered_sets, alpha_cut) + + +def set_ordered(fuzzy_sets): + """ + Order a fuzzy set list by their centroids + + :param fuzzy_sets: a dictionary where the key is the fuzzy set name and the value is the fuzzy set object. + :return: a list with the fuzzy sets names ordered by their centroids. + """ + if len(fuzzy_sets) > 0: + tmp1 = [fuzzy_sets[k] for k in fuzzy_sets.keys()] return [k.name for k in sorted(tmp1, key=lambda x: x.centroid)] -def fuzzyfy_instance(inst, fuzzySets, ordered_sets=None): +def fuzzyfy_instance(inst, fuzzy_sets, ordered_sets=None): """ Calculate the membership values for a data point given fuzzy sets :param inst: data point - :param fuzzySets: dict of fuzzy sets + :param fuzzy_sets: a dictionary where the key is the fuzzy set name and the value is the fuzzy set object. + :param ordered_sets: a list with the fuzzy sets names ordered by their centroids. :return: array of membership values """ if ordered_sets is None: - ordered_sets = set_ordered(fuzzySets) + ordered_sets = set_ordered(fuzzy_sets) - mv = [] - for key in ordered_sets: - mv.append( fuzzySets[key].membership(inst)) - return np.array(mv) + mv = np.zeros(len(fuzzy_sets)) + + for ix in __binary_search(inst, fuzzy_sets, ordered_sets): + mv[ix] = fuzzy_sets[ordered_sets[ix]].membership(inst) + + return mv -def fuzzyfy_instances(data, fuzzySets, ordered_sets=None): +def fuzzyfy_instances(data, fuzzy_sets, ordered_sets=None): """ Calculate the membership values for a data point given fuzzy sets :param inst: data point - :param fuzzySets: dict of fuzzy sets + :param fuzzy_sets: a dictionary where the key is the fuzzy set name and the value is the fuzzy set object. + :param ordered_sets: a list with the fuzzy sets names ordered by their centroids. :return: array of membership values """ ret = [] if ordered_sets is None: - ordered_sets = set_ordered(fuzzySets) + ordered_sets = set_ordered(fuzzy_sets) for inst in data: - mv = np.array([fuzzySets[key].membership(inst) for key in ordered_sets]) + mv = fuzzyfy_instance(inst, fuzzy_sets, ordered_sets) ret.append(mv) return ret -def get_fuzzysets(inst, fuzzySets, ordered_sets=None, alpha_cut=0.0): +def get_fuzzysets(inst, fuzzy_sets, ordered_sets=None, alpha_cut=0.0): """ Return the fuzzy sets which membership value for a inst is greater than the alpha_cut :param inst: data point - :param fuzzySets: dict of fuzzy sets + :param fuzzy_sets: a dictionary where the key is the fuzzy set name and the value is the fuzzy set object. + :param ordered_sets: a list with the fuzzy sets names ordered by their centroids. :param alpha_cut: Minimal membership to be considered on fuzzyfication process :return: array of membership values """ if ordered_sets is None: - ordered_sets = set_ordered(fuzzySets) + ordered_sets = set_ordered(fuzzy_sets) - fs = [key for key in ordered_sets if fuzzySets[key].membership(inst) > alpha_cut] + fs = [ordered_sets[ix] + for ix in __binary_search(inst, fuzzy_sets, ordered_sets) + if fuzzy_sets[ordered_sets[ix]].membership(inst) > alpha_cut] return fs -def get_maximum_membership_fuzzyset(inst, fuzzySets, ordered_sets=None): + +def get_maximum_membership_fuzzyset(inst, fuzzy_sets, ordered_sets=None): """ Fuzzify a data point, returning the fuzzy set with maximum membership value :param inst: data point - :param fuzzySets: dict of fuzzy sets + :param fuzzy_sets: a dictionary where the key is the fuzzy set name and the value is the fuzzy set object. + :param ordered_sets: a list with the fuzzy sets names ordered by their centroids. :return: fuzzy set with maximum membership """ if ordered_sets is None: - ordered_sets = set_ordered(fuzzySets) - mv = np.array([fuzzySets[key].membership(inst) for key in ordered_sets]) + ordered_sets = set_ordered(fuzzy_sets) + mv = np.array([fuzzy_sets[key].membership(inst) for key in ordered_sets]) key = ordered_sets[np.argwhere(mv == max(mv))[0, 0]] - return fuzzySets[key] + return fuzzy_sets[key] -def get_maximum_membership_fuzzyset_index(inst, fuzzySets): +def get_maximum_membership_fuzzyset_index(inst, fuzzy_sets): """ Fuzzify a data point, returning the fuzzy set with maximum membership value :param inst: data point - :param fuzzySets: dict of fuzzy sets + :param fuzzy_sets: dict of fuzzy sets :return: fuzzy set with maximum membership """ - mv = fuzzyfy_instance(inst, fuzzySets) + mv = fuzzyfy_instance(inst, fuzzy_sets) return np.argwhere(mv == max(mv))[0, 0] -def fuzzyfy_series_old(data, fuzzySets, method='maximum'): +def fuzzyfy_series_old(data, fuzzy_sets, method='maximum'): fts = [] for item in data: - fts.append(get_maximum_membership_fuzzyset(item, fuzzySets).name) + fts.append(get_maximum_membership_fuzzyset(item, fuzzy_sets).name) return fts -def fuzzyfy_series(data, fuzzySets, method='maximum', alpha_cut=0.0): +def fuzzyfy_series(data, fuzzy_sets, method='maximum', alpha_cut=0.0, ordered_sets=None): fts = [] - ordered_sets = set_ordered(fuzzySets) + if ordered_sets is None: + ordered_sets = set_ordered(fuzzy_sets) for t, i in enumerate(data): - mv = np.array([fuzzySets[key].membership(i) for key in ordered_sets]) + mv = fuzzyfy_instance(i, fuzzy_sets, ordered_sets) if len(mv) == 0: - sets = check_bounds(i, fuzzySets.items(), ordered_sets) + sets = check_bounds(i, fuzzy_sets.items(), ordered_sets) else: if method == 'fuzzy': ix = np.ravel(np.argwhere(mv > alpha_cut)) - sets = [fuzzySets[ordered_sets[i]].name for i in ix] + sets = [fuzzy_sets[ordered_sets[i]].name for i in ix] elif method == 'maximum': mx = max(mv) ix = np.ravel(np.argwhere(mv == mx)) - sets = fuzzySets[ordered_sets[ix[0]]].name + sets = fuzzy_sets[ordered_sets[ix[0]]].name fts.append(sets) return fts -def grant_bounds(data, sets, ordered_sets): - if data < sets[ordered_sets[0]].lower: - return sets[ordered_sets[0]].lower - elif data > sets[ordered_sets[-1]].upper: - return sets[ordered_sets[-1]].upper +def grant_bounds(data, fuzzy_sets, ordered_sets): + if data < fuzzy_sets[ordered_sets[0]].lower: + return fuzzy_sets[ordered_sets[0]].lower + elif data > fuzzy_sets[ordered_sets[-1]].upper: + return fuzzy_sets[ordered_sets[-1]].upper else: return data -def check_bounds(data, sets, ordered_sets): - if data < sets[ordered_sets[0]].lower: - return sets[ordered_sets[0]] - elif data > sets[ordered_sets[-1]].upper: - return sets[ordered_sets[-1]] +def check_bounds(data, fuzzy_sets, ordered_sets): + if data < fuzzy_sets[ordered_sets[0]].lower: + return fuzzy_sets[ordered_sets[0]] + elif data > fuzzy_sets[ordered_sets[-1]].upper: + return fuzzy_sets[ordered_sets[-1]] -def check_bounds_index(data, sets, ordered_sets): - if data < sets[ordered_sets[0]].get_lower(): +def check_bounds_index(data, fuzzy_sets, ordered_sets): + if data < fuzzy_sets[ordered_sets[0]].get_lower(): return 0 - elif data > sets[ordered_sets[-1]].get_upper(): - return len(sets) -1 + elif data > fuzzy_sets[ordered_sets[-1]].get_upper(): + return len(fuzzy_sets) - 1 diff --git a/pyFTS/common/Membership.py b/pyFTS/common/Membership.py index dbf44cb..7a2355b 100644 --- a/pyFTS/common/Membership.py +++ b/pyFTS/common/Membership.py @@ -77,3 +77,14 @@ def sigmf(x, parameters): :return: """ return 1 / (1 + math.exp(-parameters[0] * (x - parameters[1]))) + + +def singleton(x, parameters): + """ + Singleton membership function, a single value fuzzy function + + :param x: + :param parameters: a list with one real value + :returns + """ + return x == parameters[0] \ No newline at end of file diff --git a/pyFTS/models/hofts.py b/pyFTS/models/hofts.py index cb339f1..c6b724b 100644 --- a/pyFTS/models/hofts.py +++ b/pyFTS/models/hofts.py @@ -68,8 +68,7 @@ class HighOrderFTS(fts.FTS): flrgs = [] for ct, o in enumerate(self.lags): - lhs = [key for key in self.partitioner.ordered_sets - if self.sets[key].membership(sample[o-1]) > self.alpha_cut] + lhs = FuzzySet.fuzzyfy(sample[o-1], partitioner=self.partitioner, mode="sets", alpha_cut=self.alpha_cut) lags[ct] = lhs root = tree.FLRGTreeNode(None) @@ -95,8 +94,7 @@ class HighOrderFTS(fts.FTS): sample = data[k - self.max_lag: k] - rhs = [key for key in self.partitioner.ordered_sets - if self.sets[key].membership(data[k]) > self.alpha_cut] + rhs = FuzzySet.fuzzyfy(data[k], partitioner=self.partitioner, mode="sets", alpha_cut=self.alpha_cut) flrgs = self.generate_lhs_flrg(sample) diff --git a/pyFTS/models/multivariate/common.py b/pyFTS/models/multivariate/common.py index 1800d88..f3869ff 100644 --- a/pyFTS/models/multivariate/common.py +++ b/pyFTS/models/multivariate/common.py @@ -1,11 +1,9 @@ import numpy as np import pandas as pd - +from pyFTS.common import FuzzySet def fuzzyfy_instance(data_point, var): - mv = np.array([var.partitioner.sets[key].membership(data_point) for key in var.partitioner.ordered_sets]) - ix = np.ravel(np.argwhere(mv > var.alpha_cut)) - sets = [(var.name, var.partitioner.ordered_sets[i]) for i in ix] - return sets + return FuzzySet.fuzzyfy(data_point, var.partitioner, mode='sets', method='fuzzy', alpha_cut=var.alpha_cut) + diff --git a/pyFTS/models/pwfts.py b/pyFTS/models/pwfts.py index 9128608..25630bc 100644 --- a/pyFTS/models/pwfts.py +++ b/pyFTS/models/pwfts.py @@ -128,8 +128,8 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): flrgs = [] for ct, o in enumerate(self.lags): - lhs = [key for key in self.partitioner.ordered_sets - if self.sets[key].membership(sample[o-1]) > self.alpha_cut] + lhs = FuzzySet.fuzzyfy(sample[o - 1], partitioner=self.partitioner, mode="sets", alpha_cut=self.alpha_cut) + lags[ct] = lhs root = tree.FLRGTreeNode(None) diff --git a/pyFTS/tests/general.py b/pyFTS/tests/general.py index 1e9c775..b52dc79 100644 --- a/pyFTS/tests/general.py +++ b/pyFTS/tests/general.py @@ -7,6 +7,8 @@ import matplotlib.pylab as plt #from mpl_toolkits.mplot3d import Axes3D import pandas as pd + +from pyFTS.common import Util as cUtil, FuzzySet from pyFTS.common import Transformations tdiff = Transformations.Differential(1) @@ -21,8 +23,16 @@ dataset = TAIEX.get_data() from pyFTS.partitioners import Grid, Util as pUtil partitioner = Grid.GridPartitioner(data=dataset[:2000], npart=20) #, transformation=tdiff) +print(partitioner) + +#print(FuzzySet.__binary_search(7000, partitioner.sets, partitioner.ordered_sets)) + +print(FuzzySet.fuzzyfy([5000, 7000, 8000], partitioner, mode='vector', method='fuzzy', alpha_cut=.5)) +print(FuzzySet.fuzzyfy([5000, 7000, 8000], partitioner, mode='sets', method='fuzzy', alpha_cut=.5)) + + +""" -from pyFTS.common import Util as cUtil from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures, knn, quantreg, arima, naive from pyFTS.models import pwfts, song, chen, ifts, hofts @@ -250,3 +260,4 @@ f, ax = plt.subplots(1, 1, figsize=[20,15]) bchmk.plot_distribution(ax, 'blue', tmp, f, 0, reference_data=dataset[train_split:train_split+200]) ''' +""" \ No newline at end of file