Replacing the binary_search for a KDTree on fuzzyfication and other minor improvements on partitioner

This commit is contained in:
Petrônio Cândido 2019-04-09 15:17:36 -03:00
parent 6b8607cf3c
commit d3fa1ea534
3 changed files with 139 additions and 21 deletions

View File

@ -7,6 +7,7 @@ class FuzzySet(object):
""" """
Fuzzy Set Fuzzy Set
""" """
def __init__(self, name, mf, parameters, centroid, alpha=1.0, **kwargs): def __init__(self, name, mf, parameters, centroid, alpha=1.0, **kwargs):
""" """
Create a Fuzzy Set Create a Fuzzy Set
@ -23,15 +24,15 @@ class FuzzySet(object):
"""The alpha cut value""" """The alpha cut value"""
self.type = kwargs.get('type', 'common') self.type = kwargs.get('type', 'common')
"""The fuzzy set type (common, composite, nonstationary, etc)""" """The fuzzy set type (common, composite, nonstationary, etc)"""
self.variable = kwargs.get('variable',None) self.variable = kwargs.get('variable', None)
"""In multivariate time series, indicate for which variable this fuzzy set belogs""" """In multivariate time series, indicate for which variable this fuzzy set belogs"""
self.Z = None self.Z = None
"""Partition function in respect to the membership function""" """Partition function in respect to the membership function"""
if parameters is not None: if parameters is not None:
if self.mf == Membership.gaussmf: if self.mf == Membership.gaussmf:
self.lower = parameters[0] - parameters[1]*3 self.lower = parameters[0] - parameters[1] * 3
self.upper = parameters[0] + parameters[1]*3 self.upper = parameters[0] + parameters[1] * 3
elif self.mf == Membership.sigmf: elif self.mf == Membership.sigmf:
k = (parameters[1] / (2 * parameters[0])) k = (parameters[1] / (2 * parameters[0]))
self.lower = parameters[1] - k self.lower = parameters[1] - k
@ -61,7 +62,7 @@ class FuzzySet(object):
""" """
return self.mf(self.transform(x), self.parameters) * self.alpha return self.mf(self.transform(x), self.parameters) * self.alpha
def partition_function(self,uod=None, nbins=100): def partition_function(self, uod=None, nbins=100):
""" """
Calculate the partition function over the membership function. Calculate the partition function over the membership function.
@ -101,7 +102,7 @@ def __binary_search(x, fuzzy_sets, ordered_sets):
fs2 = ordered_sets[midpoint + 1] if midpoint < max_len else ordered_sets[max_len] fs2 = ordered_sets[midpoint + 1] if midpoint < max_len else ordered_sets[max_len]
if fuzzy_sets[fs1].centroid <= fuzzy_sets[fs].transform(x) <= fuzzy_sets[fs2].centroid: if fuzzy_sets[fs1].centroid <= fuzzy_sets[fs].transform(x) <= fuzzy_sets[fs2].centroid:
return (midpoint-1, midpoint, midpoint+1) return (midpoint - 1, midpoint, midpoint + 1)
elif midpoint <= 1: elif midpoint <= 1:
return [0] return [0]
elif midpoint >= max_len: elif midpoint >= max_len:
@ -125,7 +126,7 @@ def fuzzyfy(data, partitioner, **kwargs):
:keyword method: the fuzzyfication method (fuzzy: all fuzzy memberships, maximum: only the maximum membership) :keyword method: the fuzzyfication method (fuzzy: all fuzzy memberships, maximum: only the maximum membership)
:keyword mode: the fuzzyfication mode (sets: return the fuzzy sets names, vector: return a vector with the membership :keyword mode: the fuzzyfication mode (sets: return the fuzzy sets names, vector: return a vector with the membership
values for all fuzzy sets, both: return a list with tuples (fuzzy set, membership value) ) values for all fuzzy sets, both: return a list with tuples (fuzzy set, membership value) )
:returns a list with the fuzzyfied values, depending on the mode :returns a list with the fuzzyfied values, depending on the mode
""" """
alpha_cut = kwargs.get('alpha_cut', 0.) alpha_cut = kwargs.get('alpha_cut', 0.)
@ -295,6 +296,7 @@ def grant_bounds(data, fuzzy_sets, ordered_sets):
else: else:
return data return data
def check_bounds(data, fuzzy_sets, ordered_sets): def check_bounds(data, fuzzy_sets, ordered_sets):
if data < fuzzy_sets[ordered_sets[0]].lower: if data < fuzzy_sets[ordered_sets[0]].lower:
return fuzzy_sets[ordered_sets[0]] return fuzzy_sets[ordered_sets[0]]

View File

@ -1,5 +1,6 @@
from pyFTS.common import FuzzySet, Membership from pyFTS.common import FuzzySet, Membership
import numpy as np import numpy as np
from scipy.spatial import KDTree
import matplotlib.pylab as plt import matplotlib.pylab as plt
@ -34,6 +35,8 @@ class Partitioner(object):
"""Anonymous function used to extract a single primitive type from an object instance""" """Anonymous function used to extract a single primitive type from an object instance"""
self.ordered_sets = None self.ordered_sets = None
"""A ordered list of the fuzzy sets names, sorted by their middle point""" """A ordered list of the fuzzy sets names, sorted by their middle point"""
self.kdtree = None
"""A spatial index to help in fuzzyfication"""
if kwargs.get('preprocess',True): if kwargs.get('preprocess',True):
@ -105,8 +108,95 @@ class Partitioner(object):
""" """
return self.sets[self.ordered_sets[-1]] return self.sets[self.ordered_sets[-1]]
def build_index(self):
points = []
#self.index = {}
for ct, key in enumerate(self.ordered_sets):
fset = self.sets[key]
points.append([fset.lower, fset.centroid, fset.upper])
#self.index[ct] = fset.name
import sys
sys.setrecursionlimit(100000)
self.kdtree = KDTree(points)
sys.setrecursionlimit(1000)
def fuzzyfy(self, data, **kwargs): def fuzzyfy(self, data, **kwargs):
return FuzzySet.fuzzyfy(data, self, **kwargs) """
A general method for fuzzyfication.
:param data: input value to be fuzzyfied
:keyword alpha_cut: the minimal membership value to be considered on fuzzyfication (only for mode='sets')
:keyword method: the fuzzyfication method (fuzzy: all fuzzy memberships, maximum: only the maximum membership)
:keyword mode: the fuzzyfication mode (sets: return the fuzzy sets names, vector: return a vector with the membership
values for all fuzzy sets, both: return a list with tuples (fuzzy set, membership value) )
:returns a list with the fuzzyfied values, depending on the mode
"""
if isinstance(data, (list, np.ndarray)):
ret = []
for inst in data:
mv = self.fuzzyfy(inst, **kwargs)
return ret
alpha_cut = kwargs.get('alpha_cut', 0.)
mode = kwargs.get('mode', 'sets')
method = kwargs.get('method', 'fuzzy')
nearest = self.search(data, type='index')
mv = np.zeros(self.partitions)
for ix in nearest:
tmp = self[ix].membership(data)
mv[ix] = tmp if tmp >= alpha_cut else 0.
ix = np.ravel(np.argwhere(mv > 0.))
if ix.size == 0:
mv[self.check_bounds(data)] = 1.
if method == 'fuzzy' and mode == 'vector':
return mv
elif method == 'fuzzy' and mode == 'sets':
ix = np.ravel(np.argwhere(mv > 0.))
sets = [self.ordered_sets[i] for i in ix]
return sets
elif method == 'maximum' and mode == 'sets':
mx = max(mv)
ix = np.ravel(np.argwhere(mv == mx))
return self.ordered_sets[ix[0]]
def check_bounds(self, data):
if data < self.min:
return 0
elif data > self.max:
return self.partitions-1
def search(self, data, type='index', results=3):
'''
Perform a search for the nearest fuzzy sets of the point 'data'. This function were designed to work with several
overlapped fuzzy sets.
:param data: the value to search for the nearest fuzzy sets
:param type: the return type: 'index' for the fuzzy set indexes or 'name' for fuzzy set names.
:param results: the number of nearest fuzzy sets to return
:return: a list with the nearest fuzzy sets
'''
if self.kdtree is None:
self.build_index()
_, ix = self.kdtree.query([data, data, data], results)
if type == 'name':
return [self.ordered_sets[k] for k in sorted(ix)]
else:
return sorted(ix)
def plot(self, ax, rounding=0): def plot(self, ax, rounding=0):
""" """
@ -167,3 +257,32 @@ class Partitioner(object):
:return: number of partitions :return: number of partitions
""" """
return self.partitions return self.partitions
def __getitem__(self, item):
"""
Return a fuzzy set by its order or its name.
:param item: If item is an integer then it represents the fuzzy set index (order), if it was a string then
it represents the fuzzy set name.
:return: the fuzzy set
"""
if isinstance(item, (int, np.int, np.int8, np.int16, np.int32, np.int64)):
if item < 0 or item >= self.partitions:
raise ValueError("The fuzzy set index must be between 0 and {}.".format(self.partitions))
return self.sets[self.ordered_sets[item]]
elif isinstance(item, str):
if item not in self.sets:
raise ValueError("The fuzzy set with name {} does not exist.".format(item))
return self.sets[item]
else:
raise ValueError("The parameter 'item' must be an integer or a string and the value informed was {} of type {}!".format(item, type(item)))
def __iter__(self):
"""
Iterate over the fuzzy sets, ordered by its midpoints.
:return: An iterator over the fuzzy sets.
"""
for key in self.ordered_sets:
yield self.sets[key]

View File

@ -14,19 +14,16 @@ from pyFTS.benchmarks import benchmarks as bchmk, Measures
from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts
from pyFTS.common import Transformations, Membership from pyFTS.common import Transformations, Membership
from pyFTS.data import artificial from pyFTS.data import TAIEX
fs = Grid.GridPartitioner(data=TAIEX.get_data(), npart=23)
print(fs.min, fs.max)
tmp = fs.search(5500)
print(tmp)
tmp = fs.fuzzyfy(5500, method='fuzzy', alpha_cut=0.3)
print(tmp)
"""
cd = artificial.SignalEmulator()\
.stationary_gaussian(0,.2,length=10, it=1)\
.incremental_gaussian(0.5, 0,start=5,length=5)\
.blip()\
.stationary_gaussian(3,.2,length=10, it=1, additive=False)
print([round(k,3) for k in cd.run()])
"""
signal = artificial.SignalEmulator()\
.stationary_gaussian(1,0.2,length=130,it=10)\
.periodic_gaussian('sinoidal',100, 0.5,0.5,10,1,start=100,length=2000)\
.blip()\
.run()