Bugfixes and improvements on multivariate methods
This commit is contained in:
parent
0e4f3c536b
commit
4b07599c43
@ -38,8 +38,6 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
|||||||
ndata = []
|
ndata = []
|
||||||
for index, row in data.iterrows():
|
for index, row in data.iterrows():
|
||||||
data_point = self.format_data(row)
|
data_point = self.format_data(row)
|
||||||
#ndata.append(common.fuzzyfy_instance_clustered(data_point, self.partitioner,
|
|
||||||
# alpha_cut=self.alpha_cut))
|
|
||||||
ndata.append(self.partitioner.fuzzyfy(data_point, mode='sets'))
|
ndata.append(self.partitioner.fuzzyfy(data_point, mode='sets'))
|
||||||
|
|
||||||
return ndata
|
return ndata
|
||||||
|
@ -54,7 +54,7 @@ def fuzzyfy_instance_clustered(data_point, cluster, **kwargs):
|
|||||||
alpha_cut = kwargs.get('alpha_cut', 0.0)
|
alpha_cut = kwargs.get('alpha_cut', 0.0)
|
||||||
mode = kwargs.get('mode', 'sets')
|
mode = kwargs.get('mode', 'sets')
|
||||||
fsets = []
|
fsets = []
|
||||||
for fset in cluster.knn(data_point):
|
for fset in cluster.search(data_point):
|
||||||
if cluster.sets[fset].membership(data_point) > alpha_cut:
|
if cluster.sets[fset].membership(data_point) > alpha_cut:
|
||||||
if mode == 'sets':
|
if mode == 'sets':
|
||||||
fsets.append(fset)
|
fsets.append(fset)
|
||||||
|
@ -42,18 +42,41 @@ class IncrementalGridCluster(partitioner.MultivariatePartitioner):
|
|||||||
|
|
||||||
if isinstance(data, pd.DataFrame):
|
if isinstance(data, pd.DataFrame):
|
||||||
ret = []
|
ret = []
|
||||||
for inst in data.iterrows():
|
for index, inst in data.iterrows():
|
||||||
mv = self.fuzzyfy(inst, **kwargs)
|
mv = self.fuzzyfy(inst, **kwargs)
|
||||||
ret.append(mv)
|
ret.append(mv)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
if self.kdtree is not None:
|
||||||
|
fsets = self.search(data, **kwargs)
|
||||||
|
else:
|
||||||
|
fsets = self.incremental_search(data, **kwargs)
|
||||||
|
|
||||||
|
if len(fsets) == 0:
|
||||||
|
fsets = self.incremental_search(data, **kwargs)
|
||||||
|
raise Exception("{}".format(data))
|
||||||
|
|
||||||
|
mode = kwargs.get('mode', 'sets')
|
||||||
|
if mode == 'sets':
|
||||||
|
return fsets
|
||||||
|
elif mode == 'vector':
|
||||||
|
raise NotImplementedError()
|
||||||
|
elif mode == 'both':
|
||||||
|
ret = []
|
||||||
|
for key in fsets:
|
||||||
|
mvfset = self.sets[key]
|
||||||
|
ret.append((key, mvfset.membership(data)))
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def incremental_search(self, data, **kwargs):
|
||||||
alpha_cut = kwargs.get('alpha_cut', 0.)
|
alpha_cut = kwargs.get('alpha_cut', 0.)
|
||||||
mode = kwargs.get('mode', 'sets')
|
mode = kwargs.get('mode', 'sets')
|
||||||
|
|
||||||
fsets = {}
|
fsets = {}
|
||||||
ret = []
|
ret = []
|
||||||
for var in self.explanatory_variables:
|
for var in self.explanatory_variables:
|
||||||
fsets[var.name] = var.partitioner.fuzzyfy(data[var.name], mode='sets')
|
ac = alpha_cut if alpha_cut > 0. else var.alpha_cut
|
||||||
|
fsets[var.name] = var.partitioner.fuzzyfy(data[var.name], mode='sets', alpha_cut=ac)
|
||||||
|
|
||||||
fset = [val for key, val in fsets.items()]
|
fset = [val for key, val in fsets.items()]
|
||||||
|
|
||||||
@ -66,17 +89,11 @@ class IncrementalGridCluster(partitioner.MultivariatePartitioner):
|
|||||||
self.explanatory_variables[ct].partitioner[fs])
|
self.explanatory_variables[ct].partitioner[fs])
|
||||||
mvfset.name = key
|
mvfset.name = key
|
||||||
self.sets[key] = mvfset
|
self.sets[key] = mvfset
|
||||||
|
ret.append(key)
|
||||||
|
|
||||||
if mode=='sets':
|
|
||||||
ret.append(key)
|
|
||||||
elif mode=='vector':
|
|
||||||
raise NotImplementedError()
|
|
||||||
elif mode == 'both':
|
|
||||||
mvfset = self.sets[key]
|
|
||||||
ret.append((key, mvfset.membership(data)))
|
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def prune(self):
|
def prune(self):
|
||||||
pass
|
self.build_index()
|
||||||
|
|
||||||
|
@ -45,7 +45,6 @@ class MVFTS(fts.FTS):
|
|||||||
def format_data(self, data):
|
def format_data(self, data):
|
||||||
ndata = {}
|
ndata = {}
|
||||||
for var in self.explanatory_variables:
|
for var in self.explanatory_variables:
|
||||||
#ndata[var.name] = data[var.data_label]
|
|
||||||
ndata[var.name] = var.partitioner.extractor(data[var.data_label])
|
ndata[var.name] = var.partitioner.extractor(data[var.data_label])
|
||||||
|
|
||||||
return ndata
|
return ndata
|
||||||
|
@ -27,6 +27,13 @@ class MultivariatePartitioner(partitioner.Partitioner):
|
|||||||
data = kwargs.get('data', None)
|
data = kwargs.get('data', None)
|
||||||
self.build(data)
|
self.build(data)
|
||||||
|
|
||||||
|
def format_data(self, data):
|
||||||
|
ndata = {}
|
||||||
|
for var in self.explanatory_variables:
|
||||||
|
ndata[var.name] = var.partitioner.extractor(data[var.data_label])
|
||||||
|
|
||||||
|
return ndata
|
||||||
|
|
||||||
def build(self, data):
|
def build(self, data):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -45,10 +52,22 @@ class MultivariatePartitioner(partitioner.Partitioner):
|
|||||||
|
|
||||||
self.build_index()
|
self.build_index()
|
||||||
|
|
||||||
def knn(self, data):
|
def search(self, data, **kwargs):
|
||||||
tmp = [data[k.name]
|
'''
|
||||||
for k in self.explanatory_variables]
|
Perform a search for the nearest fuzzy sets of the point 'data'. This function were designed to work with several
|
||||||
tmp, ix = self.kdtree.query(tmp, self.neighbors)
|
overlapped fuzzy sets.
|
||||||
|
|
||||||
|
:param data: the value to search for the nearest fuzzy sets
|
||||||
|
:param type: the return type: 'index' for the fuzzy set indexes or 'name' for fuzzy set names.
|
||||||
|
:return: a list with the nearest fuzzy sets
|
||||||
|
'''
|
||||||
|
if self.kdtree is None:
|
||||||
|
self.build_index()
|
||||||
|
|
||||||
|
type = kwargs.get('type', 'index')
|
||||||
|
|
||||||
|
ndata = [data[k.name] for k in self.explanatory_variables]
|
||||||
|
_, ix = self.kdtree.query(ndata, self.neighbors)
|
||||||
|
|
||||||
if not isinstance(ix, (list, np.ndarray)):
|
if not isinstance(ix, (list, np.ndarray)):
|
||||||
ix = [ix]
|
ix = [ix]
|
||||||
@ -58,9 +77,14 @@ class MultivariatePartitioner(partitioner.Partitioner):
|
|||||||
for k in ix:
|
for k in ix:
|
||||||
tmp.append(self.index[k])
|
tmp.append(self.index[k])
|
||||||
self.count[self.index[k]] = 1
|
self.count[self.index[k]] = 1
|
||||||
return tmp
|
|
||||||
else:
|
if type == 'name':
|
||||||
return [self.index[k] for k in ix]
|
return [self.index[k] for k in ix]
|
||||||
|
elif type == 'index':
|
||||||
|
return sorted(ix)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def fuzzyfy(self, data, **kwargs):
|
def fuzzyfy(self, data, **kwargs):
|
||||||
return fuzzyfy_instance_clustered(data, self, **kwargs)
|
return fuzzyfy_instance_clustered(data, self, **kwargs)
|
||||||
|
@ -77,21 +77,21 @@ class TimeGridPartitioner(partitioner.Partitioner):
|
|||||||
tmp = Composite(set_name, superset=True, **kwargs)
|
tmp = Composite(set_name, superset=True, **kwargs)
|
||||||
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
|
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
|
||||||
[self.season.value - pl2, self.season.value,
|
[self.season.value - pl2, self.season.value,
|
||||||
self.season.value + 0.0000001], self.season.value, alpha=.5,
|
self.season.value + pl2], self.season.value, alpha=1,
|
||||||
**kwargs))
|
**kwargs))
|
||||||
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
|
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
|
||||||
[c - 0.0000001, c, c + partlen], c,
|
[c - partlen, c, c + partlen], c,
|
||||||
**kwargs))
|
**kwargs))
|
||||||
tmp.centroid = c
|
tmp.centroid = c
|
||||||
sets[set_name] = tmp
|
sets[set_name] = tmp
|
||||||
elif c == self.max - partlen:
|
elif c == self.max - partlen:
|
||||||
tmp = Composite(set_name, superset=True, **kwargs)
|
tmp = Composite(set_name, superset=True, **kwargs)
|
||||||
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
|
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
|
||||||
[0.0000001, 0.0,
|
[-pl2, 0.0,
|
||||||
pl2], 0.0, alpha=.5,
|
pl2], 0.0, alpha=1,
|
||||||
**kwargs))
|
**kwargs))
|
||||||
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
|
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
|
||||||
[c - partlen, c, c + 0.0000001], c,
|
[c - partlen, c, c + partlen], c,
|
||||||
**kwargs))
|
**kwargs))
|
||||||
tmp.centroid = c
|
tmp.centroid = c
|
||||||
sets[set_name] = tmp
|
sets[set_name] = tmp
|
||||||
@ -129,14 +129,14 @@ class TimeGridPartitioner(partitioner.Partitioner):
|
|||||||
points = []
|
points = []
|
||||||
|
|
||||||
fset = self.sets[self.ordered_sets[0]]
|
fset = self.sets[self.ordered_sets[0]]
|
||||||
points.append([fset.centroid, fset.centroid, fset.centroid])
|
points.append([fset.sets[1].lower, fset.sets[1].centroid, fset.sets[1].upper])
|
||||||
|
|
||||||
for ct, key in enumerate(self.ordered_sets[1:-2]):
|
for ct, key in enumerate(self.ordered_sets[1:-1]):
|
||||||
fset = self.sets[key]
|
fset = self.sets[key]
|
||||||
points.append([fset.lower, fset.centroid, fset.upper])
|
points.append([fset.lower, fset.centroid, fset.upper])
|
||||||
|
|
||||||
fset = self.sets[self.ordered_sets[-1]]
|
fset = self.sets[self.ordered_sets[-1]]
|
||||||
points.append([fset.centroid, fset.centroid, fset.centroid])
|
points.append([fset.sets[1].lower, fset.sets[1].centroid, fset.sets[1].upper])
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
sys.setrecursionlimit(100000)
|
sys.setrecursionlimit(100000)
|
||||||
@ -145,7 +145,7 @@ class TimeGridPartitioner(partitioner.Partitioner):
|
|||||||
|
|
||||||
sys.setrecursionlimit(1000)
|
sys.setrecursionlimit(1000)
|
||||||
|
|
||||||
def search(self, data, type='index', results=3):
|
def search(self, data, **kwargs):
|
||||||
'''
|
'''
|
||||||
Perform a search for the nearest fuzzy sets of the point 'data'. This function were designed to work with several
|
Perform a search for the nearest fuzzy sets of the point 'data'. This function were designed to work with several
|
||||||
overlapped fuzzy sets.
|
overlapped fuzzy sets.
|
||||||
@ -155,15 +155,21 @@ class TimeGridPartitioner(partitioner.Partitioner):
|
|||||||
:param results: the number of nearest fuzzy sets to return
|
:param results: the number of nearest fuzzy sets to return
|
||||||
:return: a list with the nearest fuzzy sets
|
:return: a list with the nearest fuzzy sets
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
type = kwargs.get('type','index')
|
||||||
|
results = kwargs.get('results',3)
|
||||||
|
|
||||||
if self.kdtree is None:
|
if self.kdtree is None:
|
||||||
self.build_index()
|
self.build_index()
|
||||||
|
|
||||||
_, ix = self.kdtree.query([data, data, data], results)
|
_, ix = self.kdtree.query([data, data, data], results)
|
||||||
|
|
||||||
|
ix = ix.tolist()
|
||||||
|
|
||||||
if 0 in ix:
|
if 0 in ix:
|
||||||
ix[-1] = self.partitions-1
|
ix.insert(0, self.partitions-1)
|
||||||
elif self.partitions-1 in ix:
|
elif self.partitions-1 in ix:
|
||||||
ix[-1] = 0
|
ix.insert(0, 0)
|
||||||
|
|
||||||
if type == 'name':
|
if type == 'name':
|
||||||
return [self.ordered_sets[k] for k in sorted(ix)]
|
return [self.ordered_sets[k] for k in sorted(ix)]
|
||||||
|
@ -191,7 +191,7 @@ class Partitioner(object):
|
|||||||
elif data > self.max:
|
elif data > self.max:
|
||||||
return self.partitions-1
|
return self.partitions-1
|
||||||
|
|
||||||
def search(self, data, type='index', results=3):
|
def search(self, data, **kwargs):
|
||||||
'''
|
'''
|
||||||
Perform a search for the nearest fuzzy sets of the point 'data'. This function were designed to work with several
|
Perform a search for the nearest fuzzy sets of the point 'data'. This function were designed to work with several
|
||||||
overlapped fuzzy sets.
|
overlapped fuzzy sets.
|
||||||
@ -204,6 +204,9 @@ class Partitioner(object):
|
|||||||
if self.kdtree is None:
|
if self.kdtree is None:
|
||||||
self.build_index()
|
self.build_index()
|
||||||
|
|
||||||
|
type = kwargs.get('type','index')
|
||||||
|
results = kwargs.get('results', 3)
|
||||||
|
|
||||||
_, ix = self.kdtree.query([data, data, data], results)
|
_, ix = self.kdtree.query([data, data, data], results)
|
||||||
|
|
||||||
if type == 'name':
|
if type == 'name':
|
||||||
|
@ -171,25 +171,29 @@ from pyFTS.partitioners import Grid
|
|||||||
sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Fev','Mar','Abr','Mai','Jun','Jul', 'Ago','Set','Out','Nov','Dez']}
|
sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Fev','Mar','Abr','Mai','Jun','Jul', 'Ago','Set','Out','Nov','Dez']}
|
||||||
|
|
||||||
vmonth = variable.Variable("Month", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=12,
|
vmonth = variable.Variable("Month", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=12,
|
||||||
data=train, partitioner_specific=sp)
|
data=train, partitioner_specific=sp, alpha_cut=.5)
|
||||||
|
|
||||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
|
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
|
||||||
|
|
||||||
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||||
data=train, partitioner_specific=sp)
|
data=train, partitioner_specific=sp, alpha_cut=.5)
|
||||||
|
|
||||||
|
#print(vhour.partitioner)
|
||||||
|
|
||||||
|
#print(vmonth.partitioner.fuzzyfy(180))
|
||||||
|
|
||||||
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
|
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
|
||||||
partitioner=Grid.GridPartitioner, npart=30, alpha_cut=.3,
|
partitioner=Grid.GridPartitioner, npart=25, alpha_cut=.3,
|
||||||
data=train)
|
data=train)
|
||||||
|
|
||||||
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid
|
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid
|
||||||
|
|
||||||
fs = grid.IncrementalGridCluster(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg)
|
fs = grid.IncrementalGridCluster(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg)
|
||||||
|
|
||||||
|
|
||||||
model = cmvfts.ClusteredMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg,
|
model = cmvfts.ClusteredMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg,
|
||||||
partitioner=fs, knn=3)
|
partitioner=fs, knn=3)
|
||||||
|
|
||||||
model.fit(train)
|
model.fit(train)
|
||||||
|
|
||||||
print(fs)
|
print(len(model))
|
||||||
|
|
||||||
print(model)
|
|
||||||
|
Loading…
Reference in New Issue
Block a user