From d3fa1ea534de5a51b11c70c3c21f2209349d70ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Tue, 9 Apr 2019 15:17:36 -0300 Subject: [PATCH] Replacing the binary_search for a KDTree on fuzzyfication and other minor improvements on partitioner --- pyFTS/common/FuzzySet.py | 14 ++-- pyFTS/partitioners/partitioner.py | 121 +++++++++++++++++++++++++++++- pyFTS/tests/general.py | 25 +++--- 3 files changed, 139 insertions(+), 21 deletions(-) diff --git a/pyFTS/common/FuzzySet.py b/pyFTS/common/FuzzySet.py index 160d595..2855a47 100644 --- a/pyFTS/common/FuzzySet.py +++ b/pyFTS/common/FuzzySet.py @@ -7,6 +7,7 @@ class FuzzySet(object): """ Fuzzy Set """ + def __init__(self, name, mf, parameters, centroid, alpha=1.0, **kwargs): """ Create a Fuzzy Set @@ -23,15 +24,15 @@ class FuzzySet(object): """The alpha cut value""" self.type = kwargs.get('type', 'common') """The fuzzy set type (common, composite, nonstationary, etc)""" - self.variable = kwargs.get('variable',None) + self.variable = kwargs.get('variable', None) """In multivariate time series, indicate for which variable this fuzzy set belogs""" self.Z = None """Partition function in respect to the membership function""" if parameters is not None: if self.mf == Membership.gaussmf: - self.lower = parameters[0] - parameters[1]*3 - self.upper = parameters[0] + parameters[1]*3 + self.lower = parameters[0] - parameters[1] * 3 + self.upper = parameters[0] + parameters[1] * 3 elif self.mf == Membership.sigmf: k = (parameters[1] / (2 * parameters[0])) self.lower = parameters[1] - k @@ -61,7 +62,7 @@ class FuzzySet(object): """ return self.mf(self.transform(x), self.parameters) * self.alpha - def partition_function(self,uod=None, nbins=100): + def partition_function(self, uod=None, nbins=100): """ Calculate the partition function over the membership function. @@ -101,7 +102,7 @@ def __binary_search(x, fuzzy_sets, ordered_sets): fs2 = ordered_sets[midpoint + 1] if midpoint < max_len else ordered_sets[max_len] if fuzzy_sets[fs1].centroid <= fuzzy_sets[fs].transform(x) <= fuzzy_sets[fs2].centroid: - return (midpoint-1, midpoint, midpoint+1) + return (midpoint - 1, midpoint, midpoint + 1) elif midpoint <= 1: return [0] elif midpoint >= max_len: @@ -125,7 +126,7 @@ def fuzzyfy(data, partitioner, **kwargs): :keyword method: the fuzzyfication method (fuzzy: all fuzzy memberships, maximum: only the maximum membership) :keyword mode: the fuzzyfication mode (sets: return the fuzzy sets names, vector: return a vector with the membership values for all fuzzy sets, both: return a list with tuples (fuzzy set, membership value) ) - + :returns a list with the fuzzyfied values, depending on the mode """ alpha_cut = kwargs.get('alpha_cut', 0.) @@ -295,6 +296,7 @@ def grant_bounds(data, fuzzy_sets, ordered_sets): else: return data + def check_bounds(data, fuzzy_sets, ordered_sets): if data < fuzzy_sets[ordered_sets[0]].lower: return fuzzy_sets[ordered_sets[0]] diff --git a/pyFTS/partitioners/partitioner.py b/pyFTS/partitioners/partitioner.py index 47ee9d2..9c70793 100644 --- a/pyFTS/partitioners/partitioner.py +++ b/pyFTS/partitioners/partitioner.py @@ -1,5 +1,6 @@ from pyFTS.common import FuzzySet, Membership import numpy as np +from scipy.spatial import KDTree import matplotlib.pylab as plt @@ -34,6 +35,8 @@ class Partitioner(object): """Anonymous function used to extract a single primitive type from an object instance""" self.ordered_sets = None """A ordered list of the fuzzy sets names, sorted by their middle point""" + self.kdtree = None + """A spatial index to help in fuzzyfication""" if kwargs.get('preprocess',True): @@ -105,8 +108,95 @@ class Partitioner(object): """ return self.sets[self.ordered_sets[-1]] + def build_index(self): + points = [] + + #self.index = {} + + for ct, key in enumerate(self.ordered_sets): + fset = self.sets[key] + points.append([fset.lower, fset.centroid, fset.upper]) + #self.index[ct] = fset.name + + import sys + sys.setrecursionlimit(100000) + + self.kdtree = KDTree(points) + + sys.setrecursionlimit(1000) + def fuzzyfy(self, data, **kwargs): - return FuzzySet.fuzzyfy(data, self, **kwargs) + """ + A general method for fuzzyfication. + + :param data: input value to be fuzzyfied + :keyword alpha_cut: the minimal membership value to be considered on fuzzyfication (only for mode='sets') + :keyword method: the fuzzyfication method (fuzzy: all fuzzy memberships, maximum: only the maximum membership) + :keyword mode: the fuzzyfication mode (sets: return the fuzzy sets names, vector: return a vector with the membership + values for all fuzzy sets, both: return a list with tuples (fuzzy set, membership value) ) + + :returns a list with the fuzzyfied values, depending on the mode + """ + + if isinstance(data, (list, np.ndarray)): + ret = [] + for inst in data: + mv = self.fuzzyfy(inst, **kwargs) + return ret + + alpha_cut = kwargs.get('alpha_cut', 0.) + mode = kwargs.get('mode', 'sets') + method = kwargs.get('method', 'fuzzy') + + nearest = self.search(data, type='index') + + mv = np.zeros(self.partitions) + + for ix in nearest: + tmp = self[ix].membership(data) + mv[ix] = tmp if tmp >= alpha_cut else 0. + + ix = np.ravel(np.argwhere(mv > 0.)) + if ix.size == 0: + mv[self.check_bounds(data)] = 1. + + if method == 'fuzzy' and mode == 'vector': + return mv + elif method == 'fuzzy' and mode == 'sets': + ix = np.ravel(np.argwhere(mv > 0.)) + sets = [self.ordered_sets[i] for i in ix] + return sets + elif method == 'maximum' and mode == 'sets': + mx = max(mv) + ix = np.ravel(np.argwhere(mv == mx)) + return self.ordered_sets[ix[0]] + + def check_bounds(self, data): + if data < self.min: + return 0 + elif data > self.max: + return self.partitions-1 + + def search(self, data, type='index', results=3): + ''' + Perform a search for the nearest fuzzy sets of the point 'data'. This function were designed to work with several + overlapped fuzzy sets. + + :param data: the value to search for the nearest fuzzy sets + :param type: the return type: 'index' for the fuzzy set indexes or 'name' for fuzzy set names. + :param results: the number of nearest fuzzy sets to return + :return: a list with the nearest fuzzy sets + ''' + if self.kdtree is None: + self.build_index() + + _, ix = self.kdtree.query([data, data, data], results) + + if type == 'name': + return [self.ordered_sets[k] for k in sorted(ix)] + else: + return sorted(ix) + def plot(self, ax, rounding=0): """ @@ -167,3 +257,32 @@ class Partitioner(object): :return: number of partitions """ return self.partitions + + def __getitem__(self, item): + """ + Return a fuzzy set by its order or its name. + + :param item: If item is an integer then it represents the fuzzy set index (order), if it was a string then + it represents the fuzzy set name. + :return: the fuzzy set + """ + if isinstance(item, (int, np.int, np.int8, np.int16, np.int32, np.int64)): + if item < 0 or item >= self.partitions: + raise ValueError("The fuzzy set index must be between 0 and {}.".format(self.partitions)) + return self.sets[self.ordered_sets[item]] + elif isinstance(item, str): + if item not in self.sets: + raise ValueError("The fuzzy set with name {} does not exist.".format(item)) + return self.sets[item] + else: + raise ValueError("The parameter 'item' must be an integer or a string and the value informed was {} of type {}!".format(item, type(item))) + + def __iter__(self): + """ + Iterate over the fuzzy sets, ordered by its midpoints. + + :return: An iterator over the fuzzy sets. + """ + for key in self.ordered_sets: + yield self.sets[key] + diff --git a/pyFTS/tests/general.py b/pyFTS/tests/general.py index b3f4e9f..cc3eac3 100644 --- a/pyFTS/tests/general.py +++ b/pyFTS/tests/general.py @@ -14,19 +14,16 @@ from pyFTS.benchmarks import benchmarks as bchmk, Measures from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts from pyFTS.common import Transformations, Membership -from pyFTS.data import artificial +from pyFTS.data import TAIEX + +fs = Grid.GridPartitioner(data=TAIEX.get_data(), npart=23) + +print(fs.min, fs.max) + +tmp = fs.search(5500) +print(tmp) + +tmp = fs.fuzzyfy(5500, method='fuzzy', alpha_cut=0.3) +print(tmp) -""" -cd = artificial.SignalEmulator()\ - .stationary_gaussian(0,.2,length=10, it=1)\ - .incremental_gaussian(0.5, 0,start=5,length=5)\ - .blip()\ - .stationary_gaussian(3,.2,length=10, it=1, additive=False) -print([round(k,3) for k in cd.run()]) -""" -signal = artificial.SignalEmulator()\ - .stationary_gaussian(1,0.2,length=130,it=10)\ - .periodic_gaussian('sinoidal',100, 0.5,0.5,10,1,start=100,length=2000)\ - .blip()\ - .run()