Bugfixes and improvements on multivariate methods

This commit is contained in:
Petrônio Cândido 2019-04-12 11:25:13 -03:00
parent 0e4f3c536b
commit 4b07599c43
8 changed files with 89 additions and 38 deletions

View File

@ -38,8 +38,6 @@ class ClusteredMVFTS(mvfts.MVFTS):
ndata = [] ndata = []
for index, row in data.iterrows(): for index, row in data.iterrows():
data_point = self.format_data(row) data_point = self.format_data(row)
#ndata.append(common.fuzzyfy_instance_clustered(data_point, self.partitioner,
# alpha_cut=self.alpha_cut))
ndata.append(self.partitioner.fuzzyfy(data_point, mode='sets')) ndata.append(self.partitioner.fuzzyfy(data_point, mode='sets'))
return ndata return ndata

View File

@ -54,7 +54,7 @@ def fuzzyfy_instance_clustered(data_point, cluster, **kwargs):
alpha_cut = kwargs.get('alpha_cut', 0.0) alpha_cut = kwargs.get('alpha_cut', 0.0)
mode = kwargs.get('mode', 'sets') mode = kwargs.get('mode', 'sets')
fsets = [] fsets = []
for fset in cluster.knn(data_point): for fset in cluster.search(data_point):
if cluster.sets[fset].membership(data_point) > alpha_cut: if cluster.sets[fset].membership(data_point) > alpha_cut:
if mode == 'sets': if mode == 'sets':
fsets.append(fset) fsets.append(fset)

View File

@ -42,18 +42,41 @@ class IncrementalGridCluster(partitioner.MultivariatePartitioner):
if isinstance(data, pd.DataFrame): if isinstance(data, pd.DataFrame):
ret = [] ret = []
for inst in data.iterrows(): for index, inst in data.iterrows():
mv = self.fuzzyfy(inst, **kwargs) mv = self.fuzzyfy(inst, **kwargs)
ret.append(mv) ret.append(mv)
return ret return ret
if self.kdtree is not None:
fsets = self.search(data, **kwargs)
else:
fsets = self.incremental_search(data, **kwargs)
if len(fsets) == 0:
fsets = self.incremental_search(data, **kwargs)
raise Exception("{}".format(data))
mode = kwargs.get('mode', 'sets')
if mode == 'sets':
return fsets
elif mode == 'vector':
raise NotImplementedError()
elif mode == 'both':
ret = []
for key in fsets:
mvfset = self.sets[key]
ret.append((key, mvfset.membership(data)))
return ret
def incremental_search(self, data, **kwargs):
alpha_cut = kwargs.get('alpha_cut', 0.) alpha_cut = kwargs.get('alpha_cut', 0.)
mode = kwargs.get('mode', 'sets') mode = kwargs.get('mode', 'sets')
fsets = {} fsets = {}
ret = [] ret = []
for var in self.explanatory_variables: for var in self.explanatory_variables:
fsets[var.name] = var.partitioner.fuzzyfy(data[var.name], mode='sets') ac = alpha_cut if alpha_cut > 0. else var.alpha_cut
fsets[var.name] = var.partitioner.fuzzyfy(data[var.name], mode='sets', alpha_cut=ac)
fset = [val for key, val in fsets.items()] fset = [val for key, val in fsets.items()]
@ -66,17 +89,11 @@ class IncrementalGridCluster(partitioner.MultivariatePartitioner):
self.explanatory_variables[ct].partitioner[fs]) self.explanatory_variables[ct].partitioner[fs])
mvfset.name = key mvfset.name = key
self.sets[key] = mvfset self.sets[key] = mvfset
ret.append(key)
if mode=='sets':
ret.append(key)
elif mode=='vector':
raise NotImplementedError()
elif mode == 'both':
mvfset = self.sets[key]
ret.append((key, mvfset.membership(data)))
return ret return ret
def prune(self): def prune(self):
pass self.build_index()

View File

@ -45,7 +45,6 @@ class MVFTS(fts.FTS):
def format_data(self, data): def format_data(self, data):
ndata = {} ndata = {}
for var in self.explanatory_variables: for var in self.explanatory_variables:
#ndata[var.name] = data[var.data_label]
ndata[var.name] = var.partitioner.extractor(data[var.data_label]) ndata[var.name] = var.partitioner.extractor(data[var.data_label])
return ndata return ndata

View File

@ -27,6 +27,13 @@ class MultivariatePartitioner(partitioner.Partitioner):
data = kwargs.get('data', None) data = kwargs.get('data', None)
self.build(data) self.build(data)
def format_data(self, data):
ndata = {}
for var in self.explanatory_variables:
ndata[var.name] = var.partitioner.extractor(data[var.data_label])
return ndata
def build(self, data): def build(self, data):
pass pass
@ -45,10 +52,22 @@ class MultivariatePartitioner(partitioner.Partitioner):
self.build_index() self.build_index()
def knn(self, data): def search(self, data, **kwargs):
tmp = [data[k.name] '''
for k in self.explanatory_variables] Perform a search for the nearest fuzzy sets of the point 'data'. This function were designed to work with several
tmp, ix = self.kdtree.query(tmp, self.neighbors) overlapped fuzzy sets.
:param data: the value to search for the nearest fuzzy sets
:param type: the return type: 'index' for the fuzzy set indexes or 'name' for fuzzy set names.
:return: a list with the nearest fuzzy sets
'''
if self.kdtree is None:
self.build_index()
type = kwargs.get('type', 'index')
ndata = [data[k.name] for k in self.explanatory_variables]
_, ix = self.kdtree.query(ndata, self.neighbors)
if not isinstance(ix, (list, np.ndarray)): if not isinstance(ix, (list, np.ndarray)):
ix = [ix] ix = [ix]
@ -58,9 +77,14 @@ class MultivariatePartitioner(partitioner.Partitioner):
for k in ix: for k in ix:
tmp.append(self.index[k]) tmp.append(self.index[k])
self.count[self.index[k]] = 1 self.count[self.index[k]] = 1
return tmp
else: if type == 'name':
return [self.index[k] for k in ix] return [self.index[k] for k in ix]
elif type == 'index':
return sorted(ix)
def fuzzyfy(self, data, **kwargs): def fuzzyfy(self, data, **kwargs):
return fuzzyfy_instance_clustered(data, self, **kwargs) return fuzzyfy_instance_clustered(data, self, **kwargs)

View File

@ -77,21 +77,21 @@ class TimeGridPartitioner(partitioner.Partitioner):
tmp = Composite(set_name, superset=True, **kwargs) tmp = Composite(set_name, superset=True, **kwargs)
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf, tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
[self.season.value - pl2, self.season.value, [self.season.value - pl2, self.season.value,
self.season.value + 0.0000001], self.season.value, alpha=.5, self.season.value + pl2], self.season.value, alpha=1,
**kwargs)) **kwargs))
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf, tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
[c - 0.0000001, c, c + partlen], c, [c - partlen, c, c + partlen], c,
**kwargs)) **kwargs))
tmp.centroid = c tmp.centroid = c
sets[set_name] = tmp sets[set_name] = tmp
elif c == self.max - partlen: elif c == self.max - partlen:
tmp = Composite(set_name, superset=True, **kwargs) tmp = Composite(set_name, superset=True, **kwargs)
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf, tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
[0.0000001, 0.0, [-pl2, 0.0,
pl2], 0.0, alpha=.5, pl2], 0.0, alpha=1,
**kwargs)) **kwargs))
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf, tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
[c - partlen, c, c + 0.0000001], c, [c - partlen, c, c + partlen], c,
**kwargs)) **kwargs))
tmp.centroid = c tmp.centroid = c
sets[set_name] = tmp sets[set_name] = tmp
@ -129,14 +129,14 @@ class TimeGridPartitioner(partitioner.Partitioner):
points = [] points = []
fset = self.sets[self.ordered_sets[0]] fset = self.sets[self.ordered_sets[0]]
points.append([fset.centroid, fset.centroid, fset.centroid]) points.append([fset.sets[1].lower, fset.sets[1].centroid, fset.sets[1].upper])
for ct, key in enumerate(self.ordered_sets[1:-2]): for ct, key in enumerate(self.ordered_sets[1:-1]):
fset = self.sets[key] fset = self.sets[key]
points.append([fset.lower, fset.centroid, fset.upper]) points.append([fset.lower, fset.centroid, fset.upper])
fset = self.sets[self.ordered_sets[-1]] fset = self.sets[self.ordered_sets[-1]]
points.append([fset.centroid, fset.centroid, fset.centroid]) points.append([fset.sets[1].lower, fset.sets[1].centroid, fset.sets[1].upper])
import sys import sys
sys.setrecursionlimit(100000) sys.setrecursionlimit(100000)
@ -145,7 +145,7 @@ class TimeGridPartitioner(partitioner.Partitioner):
sys.setrecursionlimit(1000) sys.setrecursionlimit(1000)
def search(self, data, type='index', results=3): def search(self, data, **kwargs):
''' '''
Perform a search for the nearest fuzzy sets of the point 'data'. This function were designed to work with several Perform a search for the nearest fuzzy sets of the point 'data'. This function were designed to work with several
overlapped fuzzy sets. overlapped fuzzy sets.
@ -155,15 +155,21 @@ class TimeGridPartitioner(partitioner.Partitioner):
:param results: the number of nearest fuzzy sets to return :param results: the number of nearest fuzzy sets to return
:return: a list with the nearest fuzzy sets :return: a list with the nearest fuzzy sets
''' '''
type = kwargs.get('type','index')
results = kwargs.get('results',3)
if self.kdtree is None: if self.kdtree is None:
self.build_index() self.build_index()
_, ix = self.kdtree.query([data, data, data], results) _, ix = self.kdtree.query([data, data, data], results)
ix = ix.tolist()
if 0 in ix: if 0 in ix:
ix[-1] = self.partitions-1 ix.insert(0, self.partitions-1)
elif self.partitions-1 in ix: elif self.partitions-1 in ix:
ix[-1] = 0 ix.insert(0, 0)
if type == 'name': if type == 'name':
return [self.ordered_sets[k] for k in sorted(ix)] return [self.ordered_sets[k] for k in sorted(ix)]

View File

@ -191,7 +191,7 @@ class Partitioner(object):
elif data > self.max: elif data > self.max:
return self.partitions-1 return self.partitions-1
def search(self, data, type='index', results=3): def search(self, data, **kwargs):
''' '''
Perform a search for the nearest fuzzy sets of the point 'data'. This function were designed to work with several Perform a search for the nearest fuzzy sets of the point 'data'. This function were designed to work with several
overlapped fuzzy sets. overlapped fuzzy sets.
@ -204,6 +204,9 @@ class Partitioner(object):
if self.kdtree is None: if self.kdtree is None:
self.build_index() self.build_index()
type = kwargs.get('type','index')
results = kwargs.get('results', 3)
_, ix = self.kdtree.query([data, data, data], results) _, ix = self.kdtree.query([data, data, data], results)
if type == 'name': if type == 'name':

View File

@ -171,25 +171,29 @@ from pyFTS.partitioners import Grid
sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Fev','Mar','Abr','Mai','Jun','Jul', 'Ago','Set','Out','Nov','Dez']} sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Fev','Mar','Abr','Mai','Jun','Jul', 'Ago','Set','Out','Nov','Dez']}
vmonth = variable.Variable("Month", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=12, vmonth = variable.Variable("Month", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=12,
data=train, partitioner_specific=sp) data=train, partitioner_specific=sp, alpha_cut=.5)
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]} sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24, vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train, partitioner_specific=sp) data=train, partitioner_specific=sp, alpha_cut=.5)
#print(vhour.partitioner)
#print(vmonth.partitioner.fuzzyfy(180))
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad', vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
partitioner=Grid.GridPartitioner, npart=30, alpha_cut=.3, partitioner=Grid.GridPartitioner, npart=25, alpha_cut=.3,
data=train) data=train)
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid
fs = grid.IncrementalGridCluster(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg) fs = grid.IncrementalGridCluster(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg)
model = cmvfts.ClusteredMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg, model = cmvfts.ClusteredMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg,
partitioner=fs, knn=3) partitioner=fs, knn=3)
model.fit(train) model.fit(train)
print(fs) print(len(model))
print(model)