Replacing the binary_search for a KDTree on fuzzyfication and other minor improvements on partitioner

2019-04-09 15:17:36 -03:00 · 2019-04-09 15:17:36 -03:00 · d3fa1ea534
commit d3fa1ea534
parent 6b8607cf3c
3 changed files with 139 additions and 21 deletions
--- a/pyFTS/common/FuzzySet.py
+++ b/pyFTS/common/FuzzySet.py
@ -7,6 +7,7 @@ class FuzzySet(object):
    """
    Fuzzy Set
    """
+
    def __init__(self, name, mf, parameters, centroid, alpha=1.0, **kwargs):
        """
        Create a Fuzzy Set
@ -23,15 +24,15 @@ class FuzzySet(object):
        """The alpha cut value"""
        self.type = kwargs.get('type', 'common')
        """The fuzzy set type (common, composite, nonstationary, etc)"""
-        self.variable = kwargs.get('variable',None)
+        self.variable = kwargs.get('variable', None)
        """In multivariate time series, indicate for which variable this fuzzy set belogs"""
        self.Z = None
        """Partition function in respect to the membership function"""

        if parameters is not None:
            if self.mf == Membership.gaussmf:
-                self.lower = parameters[0] - parameters[1]*3
-                self.upper = parameters[0] + parameters[1]*3
+                self.lower = parameters[0] - parameters[1] * 3
+                self.upper = parameters[0] + parameters[1] * 3
            elif self.mf == Membership.sigmf:
                k = (parameters[1] / (2 * parameters[0]))
                self.lower = parameters[1] - k
@ -61,7 +62,7 @@ class FuzzySet(object):
        """
        return self.mf(self.transform(x), self.parameters) * self.alpha

-    def partition_function(self,uod=None, nbins=100):
+    def partition_function(self, uod=None, nbins=100):
        """
        Calculate the partition function over the membership function.

@ -101,7 +102,7 @@ def __binary_search(x, fuzzy_sets, ordered_sets):
        fs2 = ordered_sets[midpoint + 1] if midpoint < max_len else ordered_sets[max_len]

        if fuzzy_sets[fs1].centroid <= fuzzy_sets[fs].transform(x) <= fuzzy_sets[fs2].centroid:
-            return (midpoint-1, midpoint, midpoint+1)
+            return (midpoint - 1, midpoint, midpoint + 1)
        elif midpoint <= 1:
            return [0]
        elif midpoint >= max_len:
@ -125,7 +126,7 @@ def fuzzyfy(data, partitioner, **kwargs):
    :keyword method: the fuzzyfication method (fuzzy: all fuzzy memberships, maximum: only the maximum membership)
    :keyword mode: the fuzzyfication mode (sets: return the fuzzy sets names, vector: return a vector with the membership
    values for all fuzzy sets, both: return a list with tuples (fuzzy set, membership value) )
-    
+
    :returns a list with the fuzzyfied values, depending on the mode
    """
    alpha_cut = kwargs.get('alpha_cut', 0.)
@ -295,6 +296,7 @@ def grant_bounds(data, fuzzy_sets, ordered_sets):
    else:
        return data

+
 def check_bounds(data, fuzzy_sets, ordered_sets):
    if data < fuzzy_sets[ordered_sets[0]].lower:
        return fuzzy_sets[ordered_sets[0]]
--- a/pyFTS/partitioners/partitioner.py
+++ b/pyFTS/partitioners/partitioner.py
@ -1,5 +1,6 @@
 from pyFTS.common import FuzzySet, Membership
 import numpy as np
+from scipy.spatial import KDTree
 import matplotlib.pylab as plt


@ -34,6 +35,8 @@ class Partitioner(object):
        """Anonymous function used to extract a single primitive type from an object instance"""
        self.ordered_sets = None
        """A ordered list of the fuzzy sets names, sorted by their middle point"""
+        self.kdtree = None
+        """A spatial index to help in fuzzyfication"""

        if kwargs.get('preprocess',True):

@ -105,8 +108,95 @@ class Partitioner(object):
        """
        return self.sets[self.ordered_sets[-1]]

+    def build_index(self):
+        points = []
+
+        #self.index = {}
+
+        for ct, key in enumerate(self.ordered_sets):
+            fset = self.sets[key]
+            points.append([fset.lower, fset.centroid, fset.upper])
+            #self.index[ct] = fset.name
+
+        import sys
+        sys.setrecursionlimit(100000)
+
+        self.kdtree = KDTree(points)
+
+        sys.setrecursionlimit(1000)
+
    def fuzzyfy(self, data, **kwargs):
-        return FuzzySet.fuzzyfy(data, self, **kwargs)
+        """
+        A general method for fuzzyfication.
+
+        :param data: input value to be fuzzyfied
+        :keyword alpha_cut: the minimal membership value to be considered on fuzzyfication (only for mode='sets')
+        :keyword method: the fuzzyfication method (fuzzy: all fuzzy memberships, maximum: only the maximum membership)
+        :keyword mode: the fuzzyfication mode (sets: return the fuzzy sets names, vector: return a vector with the membership
+        values for all fuzzy sets, both: return a list with tuples (fuzzy set, membership value) )
+
+        :returns a list with the fuzzyfied values, depending on the mode
+        """
+
+        if isinstance(data, (list, np.ndarray)):
+            ret = []
+            for inst in data:
+                mv = self.fuzzyfy(inst, **kwargs)
+            return ret
+
+        alpha_cut = kwargs.get('alpha_cut', 0.)
+        mode = kwargs.get('mode', 'sets')
+        method = kwargs.get('method', 'fuzzy')
+
+        nearest = self.search(data, type='index')
+
+        mv = np.zeros(self.partitions)
+
+        for ix in nearest:
+            tmp = self[ix].membership(data)
+            mv[ix] = tmp if tmp >= alpha_cut else 0.
+
+        ix = np.ravel(np.argwhere(mv > 0.))
+        if ix.size == 0:
+            mv[self.check_bounds(data)] = 1.
+
+        if method == 'fuzzy' and mode == 'vector':
+            return mv
+        elif method == 'fuzzy' and mode == 'sets':
+            ix = np.ravel(np.argwhere(mv > 0.))
+            sets = [self.ordered_sets[i] for i in ix]
+            return sets
+        elif method == 'maximum' and mode == 'sets':
+            mx = max(mv)
+            ix = np.ravel(np.argwhere(mv == mx))
+            return self.ordered_sets[ix[0]]
+
+    def check_bounds(self, data):
+        if data < self.min:
+            return 0
+        elif data > self.max:
+            return self.partitions-1
+
+    def search(self, data, type='index', results=3):
+        '''
+        Perform a search for the nearest fuzzy sets of the point 'data'. This function were designed to work with several
+        overlapped fuzzy sets.
+
+        :param data: the value to search for the nearest fuzzy sets
+        :param type: the return type: 'index' for the fuzzy set indexes or 'name' for fuzzy set names.
+        :param results: the number of nearest fuzzy sets to return
+        :return: a list with the nearest fuzzy sets
+        '''
+        if self.kdtree is None:
+            self.build_index()
+
+        _, ix = self.kdtree.query([data, data, data], results)
+
+        if type == 'name':
+            return [self.ordered_sets[k] for k in sorted(ix)]
+        else:
+            return sorted(ix)
+

    def plot(self, ax, rounding=0):
        """
@ -167,3 +257,32 @@ class Partitioner(object):
        :return: number of partitions
        """
        return self.partitions
+
+    def __getitem__(self, item):
+        """
+        Return a fuzzy set by its order or its name.
+
+        :param item: If item is an integer then it represents the fuzzy set index (order), if it was a string then
+        it represents the fuzzy set name.
+        :return: the fuzzy set
+        """
+        if isinstance(item, (int, np.int, np.int8, np.int16, np.int32, np.int64)):
+            if item < 0 or item >= self.partitions:
+                raise ValueError("The fuzzy set index must be between 0 and {}.".format(self.partitions))
+            return self.sets[self.ordered_sets[item]]
+        elif isinstance(item, str):
+            if item not in self.sets:
+                raise ValueError("The fuzzy set with name {} does not exist.".format(item))
+            return self.sets[item]
+        else:
+            raise ValueError("The parameter 'item' must be an integer or a string and the value informed was {} of type {}!".format(item, type(item)))
+
+    def __iter__(self):
+        """
+        Iterate over the fuzzy sets, ordered by its midpoints.
+
+        :return: An iterator over the fuzzy sets.
+        """
+        for key in self.ordered_sets:
+            yield self.sets[key]
+
--- a/pyFTS/tests/general.py
+++ b/pyFTS/tests/general.py
@ -14,19 +14,16 @@ from pyFTS.benchmarks import benchmarks as bchmk, Measures
 from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts
 from pyFTS.common import Transformations, Membership

-from pyFTS.data import artificial
+from pyFTS.data import TAIEX
+
+fs = Grid.GridPartitioner(data=TAIEX.get_data(), npart=23)
+
+print(fs.min, fs.max)
+
+tmp = fs.search(5500)
+print(tmp)
+
+tmp = fs.fuzzyfy(5500, method='fuzzy', alpha_cut=0.3)
+print(tmp)

-"""
-cd = artificial.SignalEmulator()\
-    .stationary_gaussian(0,.2,length=10, it=1)\
-    .incremental_gaussian(0.5, 0,start=5,length=5)\
-    .blip()\
-    .stationary_gaussian(3,.2,length=10, it=1, additive=False)
-print([round(k,3) for k in cd.run()])
-"""

-signal = artificial.SignalEmulator()\
-    .stationary_gaussian(1,0.2,length=130,it=10)\
-    .periodic_gaussian('sinoidal',100, 0.5,0.5,10,1,start=100,length=2000)\
-    .blip()\
-    .run()