- First fully-functional multivariate FTS method (multiavariate extension of Chen's method)

This commit is contained in:
Petrônio Cândido 2018-03-01 14:13:17 -03:00
parent 0e2ea9927d
commit 5e7c4794e2
18 changed files with 337 additions and 66 deletions

View File

@ -7,7 +7,7 @@ class FuzzySet(object):
"""
Fuzzy Set
"""
def __init__(self, name, mf, parameters, centroid, alpha=1.0, type='common'):
def __init__(self, name, mf, parameters, centroid, alpha=1.0, **kwargs):
"""
Create a Fuzzy Set
:param name: fuzzy set name
@ -20,7 +20,8 @@ class FuzzySet(object):
self.parameters = parameters
self.centroid = centroid
self.alpha = alpha
self.type = type
self.type = kwargs.get('type', 'common')
self.variable = kwargs.get('variable',None)
":param Z: Partition function in respect to the membership function"
self.Z = None
if self.mf == Membership.trimf:

View File

@ -15,14 +15,14 @@ class FLRG(object):
ret = 0.0
if isinstance(self.LHS, (list, set)):
assert len(self.LHS) == len(data)
ret = min([self.LHS[ct].membership(dat) for ct, dat in enumerate(data)])
ret = np.nanmin([self.LHS[ct].membership(dat) for ct, dat in enumerate(data)])
else:
ret = self.LHS.membership(data)
return ret
def get_midpoint(self):
if self.midpoint is None:
self.midpoint = sum(self.get_midpoints())/len(self.RHS)
self.midpoint = np.nanmean(self.get_midpoints())
return self.midpoint
def get_midpoints(self):

View File

@ -53,7 +53,7 @@ def flat(dados):
yield inst
def buildTreeWithoutOrder(node, lags, level):
def build_tree_without_order(node, lags, level):
if level not in lags:
return
@ -62,4 +62,4 @@ def buildTreeWithoutOrder(node, lags, level):
node.appendChild(FLRGTreeNode(s))
for child in node.getChildren():
buildTreeWithoutOrder(child, lags, level + 1)
build_tree_without_order(child, lags, level + 1)

View File

@ -6,6 +6,13 @@ import pkg_resources
def get_data():
filename = pkg_resources.resource_filename('pyFTS', 'data/TAIEX.csv')
dat = pd.read_csv(filename, sep=";")
dat = pd.read_csv(filename, sep=",")
dat = np.array(dat["avg"])
return dat
def get_dataframe():
filename = pkg_resources.resource_filename('pyFTS', 'data/TAIEX.csv')
dat = pd.read_csv(filename, sep=",")
dat["Date"] = pd.to_datetime(dat["Date"])
return dat

View File

@ -154,7 +154,7 @@ class EnsembleFTS(fts.FTS):
root = tree.FLRGTreeNode(None)
tree.buildTreeWithoutOrder(root, lags, 0)
tree.build_tree_without_order(root, lags, 0)
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
@ -199,7 +199,7 @@ class EnsembleFTS(fts.FTS):
root = tree.FLRGTreeNode(None)
tree.buildTreeWithoutOrder(root, lags, 0)
tree.build_tree_without_order(root, lags, 0)
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))

View File

@ -1,4 +1,6 @@
import numpy as np
from pyFTS.common import flrg as flg
class FLR(object):
"""Multivariate Fuzzy Logical Relationship"""
@ -19,4 +21,40 @@ class FLR(object):
self.RHS = set
def __str__(self):
return str([k.name for k in self.LHS]) + " -> " + self.RHS.name
return str([k +":"+self.LHS[k].name for k in self.LHS.keys()]) + " -> " + self.RHS.name
class FLRG(flg.FLRG):
def __init__(self, **kwargs):
super(FLRG,self).__init__(0,**kwargs)
self.LHS = kwargs.get('lhs', {})
self.RHS = set()
self.key = None
def set_lhs(self, var, set):
self.LHS[var] = set
def append_rhs(self, set):
self.RHS.add(set)
def get_key(self):
if self.key is None:
_str = ""
for k in self.LHS.keys():
_str += "," if len(_str) > 0 else ""
_str += k + ":" + self.LHS[k].name
self.key = _str
return self.key
def get_membership(self, data):
return np.nanmin([self.LHS[k].membership(data[k]) for k in self.LHS.keys()])
def __str__(self):
_str = ""
for k in self.RHS:
_str += "," if len(_str) > 0 else ""
_str += k.name
return self.get_key() + " -> " + _str

View File

@ -1,17 +1,16 @@
from pyFTS.common import fts
from pyFTS.common import fts, FuzzySet, FLR, Membership, tree
from pyFTS.partitioners import Grid
from pyFTS.models.multivariate import FLR as MVFLR
import numpy as np
import pandas as pd
class Variable:
def __init__(self,name, **kwargs):
self.name = name
self.alias = kwargs.get('alias', self.name)
self.data_label = kwargs.get('alias', self.name)
self.partitioner = kwargs.get('partitioner',None)
self.type = kwargs.get('type', 'common')
self.transformation = kwargs.get('transformation', None)
def __str__(self):
return self.name
def fuzzyfy_instance(data_point, var):
mv = np.array([fs.membership(data_point) for fs in var.partitioner.sets])
ix = np.ravel(np.argwhere(mv > 0.0))
sets = [var.partitioner.sets[i] for i in ix]
return sets
class MVFTS(fts.FTS):
@ -19,8 +18,122 @@ class MVFTS(fts.FTS):
super(MVFTS, self).__init__(1, name, **kwargs)
self.explanatory_variables = []
self.target_variable = None
self.flrgs = {}
def append_variable(self, var):
self.explanatory_variables.append(var)
def format_data(self, data):
ndata = {}
for var in self.explanatory_variables:
ndata[var.name] = data[var.data_label]
return ndata
def apply_transformations(self, data, params=None, updateUoD=False, **kwargs):
ndata = data.copy(deep=True)
for var in self.explanatory_variables:
ndata[var.data_label] = var.apply_transformations(data[var.data_label].values)
return ndata
def generate_lhs_flrs(self, data):
flrs = []
lags = {}
for vc, var in enumerate(self.explanatory_variables):
data_point = data[var.data_label]
lags[vc] = fuzzyfy_instance(data_point, var)
root = tree.FLRGTreeNode(None)
tree.build_tree_without_order(root, lags, 0)
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
flr = MVFLR.FLR()
for c, e in enumerate(path, start=0):
flr.set_lhs(e.variable, e)
flrs.append(flr)
return flrs
def generate_flrs(self, data):
flrs = []
for ct in range(1, len(data.index)):
ix = data.index[ct-1]
data_point = data.loc[ix]
tmp_flrs = self.generate_lhs_flrs(data_point)
target_ix = data.index[ct]
target_point = data[self.target_variable.data_label][target_ix]
target = fuzzyfy_instance(target_point, self.target_variable)
for flr in tmp_flrs:
for t in target:
flr.set_rhs(t)
flrs.append(flr)
return flrs
def generate_flrg(self, flrs):
flrgs = {}
for flr in flrs:
flrg = MVFLR.FLRG(lhs=flr.LHS)
if flrg.get_key() not in flrgs:
flrgs[flrg.get_key()] = flrg
flrgs[flrg.get_key()].append_rhs(flr.RHS)
return flrgs
def train(self, data, **kwargs):
ndata = self.apply_transformations(data)
flrs = self.generate_flrs(ndata)
self.flrgs = self.generate_flrg(flrs)
def forecast(self, data, **kwargs):
ret = []
ndata = self.apply_transformations(data)
for ix in ndata.index:
data_point = ndata.loc[ix]
flrs = self.generate_lhs_flrs(data_point)
mvs = []
mps = []
for flr in flrs:
flrg = MVFLR.FLRG(lhs=flr.LHS)
if flrg.get_key() not in self.flrgs:
#print('hit')
mvs.append(0.)
mps.append(0.)
else:
mvs.append(self.flrgs[flrg.get_key()].get_membership(self.format_data(data_point)))
mps.append(self.flrgs[flrg.get_key()].get_midpoint())
#print('mv', mvs)
#print('mp', mps)
mv = np.array(mvs)
mp = np.array(mps)
ret.append(np.dot(mv,mp.T)/np.sum(mv))
self.target_variable.apply_inverse_transformations(ret,
params=data[self.target_variable.data_label].values)
return ret
def __str__(self):
_str = self.name + ":\n"
for k in self.flrgs.keys():
_str += str(self.flrgs[k]) + "\n"
return _str

View File

@ -0,0 +1,49 @@
from pyFTS.common import fts, FuzzySet, FLR, Membership, tree
from pyFTS.partitioners import Grid
from pyFTS.models.multivariate import FLR as MVFLR
class Variable:
def __init__(self, name, **kwargs):
self.name = name
self.alias = kwargs.get('alias', self.name)
self.data_label = kwargs.get('data_label', self.name)
self.type = kwargs.get('type', 'common')
self.transformation = kwargs.get('transformation', None)
self.transformation_params = kwargs.get('transformation_params', None)
self.partitioner = None
if kwargs.get('data', None) is not None:
self.build(**kwargs)
def build(self, **kwargs):
fs = kwargs.get('partitioner', Grid.GridPartitioner)
mf = kwargs.get('func', Membership.trimf)
np = kwargs.get('npart', 10)
data = kwargs.get('data', None)
self.partitioner = fs(data=data[self.data_label].values, npart=np, func=mf,
transformation=self.transformation, prefix=self.alias,
variable=self.name)
def apply_transformations(self, data, **kwargs):
if kwargs.get('params', None) is not None:
self.transformation_params = kwargs.get('params', None)
if self.transformation is not None:
return self.transformation.apply(data, self.transformation_params)
return data
def apply_inverse_transformations(self, data, **kwargs):
if kwargs.get('params', None) is not None:
self.transformation_params = kwargs.get('params', None)
if self.transformation is not None:
return self.transformation.inverse(data, self.transformation_params)
return data
def __str__(self):
return self.name

View File

@ -87,8 +87,8 @@ class FuzzySet(FuzzySet.FuzzySet):
Temporal/Seasonal Fuzzy Set
"""
def __init__(self, datepart, name, mf, parameters, centroid, alpha=1.0):
super(FuzzySet, self).__init__(name, mf, parameters, centroid, alpha)
def __init__(self, datepart, name, mf, parameters, centroid, alpha=1.0, **kwargs):
super(FuzzySet, self).__init__(name, mf, parameters, centroid, alpha, type = 'datetime', **kwargs)
self.datepart = datepart
def membership(self, x):

View File

@ -9,19 +9,20 @@ import matplotlib.pylab as plt
class TimeGridPartitioner(partitioner.Partitioner):
"""Even Length DateTime Grid Partitioner"""
def __init__(self, data, npart, season, func=Membership.trimf, names=None):
def __init__(self, **kwargs):
"""
Even Length Grid Partitioner
:param seasonality: Time granularity, from pyFTS.models.seasonal.common.DateTime
:param data: Training data of which the universe of discourse will be extracted. The universe of discourse is the open interval between the minimum and maximum values of the training data.
:param npart: The number of universe of discourse partitions, i.e., the number of fuzzy sets that will be created
:param func: Fuzzy membership function (pyFTS.common.Membership)
"""
super(TimeGridPartitioner, self).__init__("TimeGrid", data, npart, func=func, names=names, transformation=None,
indexer=None, preprocess=False)
super(TimeGridPartitioner, self).__init__(name="TimeGrid", **kwargs)
self.season = season
self.season = kwargs.get('seasonality', DateTime.day_of_year)
data = kwargs.get('data', None)
if self.season == DateTime.year:
ndata = [strip_datepart(k, season) for k in data]
ndata = [strip_datepart(k, self.season) for k in data]
self.min = min(ndata)
self.max = max(ndata)
else:
@ -34,6 +35,8 @@ class TimeGridPartitioner(partitioner.Partitioner):
def build(self, data):
sets = []
kwargs = {'variable': self.variable}
if self.season == DateTime.year:
dlen = (self.max - self.min)
partlen = dlen / self.partitions
@ -49,30 +52,37 @@ class TimeGridPartitioner(partitioner.Partitioner):
tmp = Composite(set_name, superset=True)
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
[self.season.value - pl2, self.season.value,
self.season.value + 0.0000001], self.season.value, alpha=.5))
self.season.value + 0.0000001], self.season.value, alpha=.5,
**kwargs))
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
[c - partlen, c, c + partlen], c))
[c - partlen, c, c + partlen], c,
**kwargs))
tmp.centroid = c
sets.append(tmp)
else:
sets.append(FuzzySet(self.season, set_name, Membership.trimf,
[c - partlen, c, c + partlen], c))
[c - partlen, c, c + partlen], c,
**kwargs))
elif self.membership_function == Membership.gaussmf:
sets.append(FuzzySet(self.season, set_name, Membership.gaussmf, [c, partlen / 3], c))
sets.append(FuzzySet(self.season, set_name, Membership.gaussmf, [c, partlen / 3], c,
**kwargs))
elif self.membership_function == Membership.trapmf:
q = partlen / 4
if c == self.min:
tmp = Composite(set_name, superset=True)
tmp.append_set(FuzzySet(self.season, set_name, Membership.trimf,
[self.season.value - pl2, self.season.value,
self.season.value + 0.0000001], 0))
self.season.value + 0.0000001], 0,
**kwargs))
tmp.append_set(FuzzySet(self.season, set_name, Membership.trapmf,
[c - partlen, c - q, c + q, c + partlen], c))
[c - partlen, c - q, c + q, c + partlen], c,
**kwargs))
tmp.centroid = c
sets.append(tmp)
else:
sets.append(FuzzySet(self.season, set_name, Membership.trapmf,
[c - partlen, c - q, c + q, c + partlen], c))
[c - partlen, c - q, c + q, c + partlen], c,
**kwargs))
count += 1
self.min = 0

View File

@ -78,8 +78,8 @@ def c_means(k, dados, tam):
class CMeansPartitioner(partitioner.Partitioner):
def __init__(self, data, npart, func = Membership.trimf, transformation=None, indexer=None):
super(CMeansPartitioner, self).__init__("CMeans", data, npart, func=func, transformation=transformation, indexer=indexer)
def __init__(self, **kwargs):
super(CMeansPartitioner, self).__init__(name="CMeans", **kwargs)
def build(self, data):
sets = []

View File

@ -79,8 +79,8 @@ def bestSplit(data, npart):
class EntropyPartitioner(partitioner.Partitioner):
"""Huarng Entropy Partitioner"""
def __init__(self, data, npart, func = Membership.trimf, transformation=None, indexer=None):
super(EntropyPartitioner, self).__init__("Entropy", data, npart, func=func, transformation=transformation, indexer=indexer)
def __init__(self, **kwargs):
super(EntropyPartitioner, self).__init__(name="Entropy", **kwargs)
def build(self, data):
sets = []

View File

@ -104,8 +104,8 @@ class FCMPartitioner(partitioner.Partitioner):
"""
"""
def __init__(self, data,npart,func = Membership.trimf, transformation=None, indexer=None):
super(FCMPartitioner, self).__init__("FCM", data, npart, func=func, transformation=transformation, indexer=indexer)
def __init__(self, **kwargs):
super(FCMPartitioner, self).__init__(name="FCM", **kwargs)
def build(self,data):
sets = []

View File

@ -9,7 +9,7 @@ from pyFTS.partitioners import partitioner
class GridPartitioner(partitioner.Partitioner):
"""Even Length Grid Partitioner"""
def __init__(self, data, npart, func = Membership.trimf, transformation=None, indexer=None):
def __init__(self, **kwargs):
"""
Even Length Grid Partitioner
:param data: Training data of which the universe of discourse will be extracted. The universe of discourse is the open interval between the minimum and maximum values of the training data.
@ -18,11 +18,13 @@ class GridPartitioner(partitioner.Partitioner):
:param transformation: data transformation to be applied on data
:param indexer:
"""
super(GridPartitioner, self).__init__("Grid", data, npart, func=func, transformation=transformation, indexer=indexer)
super(GridPartitioner, self).__init__(name="Grid", **kwargs)
def build(self, data):
sets = []
kwargs = {'type': self.type, 'variable': self.variable}
dlen = self.max - self.min
partlen = dlen / self.partitions
@ -30,14 +32,14 @@ class GridPartitioner(partitioner.Partitioner):
for c in np.arange(self.min, self.max, partlen):
if self.membership_function == Membership.trimf:
sets.append(
FuzzySet.FuzzySet(self.prefix + str(count), Membership.trimf, [c - partlen, c, c + partlen],c))
FuzzySet.FuzzySet(self.prefix + str(count), Membership.trimf, [c - partlen, c, c + partlen],c,**kwargs))
elif self.membership_function == Membership.gaussmf:
sets.append(
FuzzySet.FuzzySet(self.prefix + str(count), Membership.gaussmf, [c, partlen / 3], c))
FuzzySet.FuzzySet(self.prefix + str(count), Membership.gaussmf, [c, partlen / 3], c,**kwargs))
elif self.membership_function == Membership.trapmf:
q = partlen / 2
sets.append(
FuzzySet.FuzzySet(self.prefix + str(count), Membership.trapmf, [c - partlen, c - q, c + q, c + partlen], c))
FuzzySet.FuzzySet(self.prefix + str(count), Membership.trapmf, [c - partlen, c - q, c + q, c + partlen], c,**kwargs))
count += 1
self.min = self.min - partlen

View File

@ -12,8 +12,8 @@ from pyFTS.partitioners import partitioner
class HuarngPartitioner(partitioner.Partitioner):
"""Huarng Empirical Partitioner"""
def __init__(self, data,npart,func = Membership.trimf, transformation=None, indexer=None):
super(HuarngPartitioner, self).__init__("Huarng", data, npart, func=func, transformation=transformation, indexer=indexer)
def __init__(self, **kwargs):
super(HuarngPartitioner, self).__init__(name="Huarng", **kwargs)
def build(self, data):
diff = Transformations.Differential(1)

View File

@ -8,8 +8,7 @@ class Partitioner(object):
Universe of Discourse partitioner. Split data on several fuzzy sets
"""
def __init__(self, name, data, npart, func=Membership.trimf, names=None, prefix="A",
transformation=None, indexer=None, preprocess=True):
def __init__(self, **kwargs):
"""
Universe of Discourse partitioner scheme. Split data on several fuzzy sets
:param name: partitioner name
@ -20,24 +19,28 @@ class Partitioner(object):
:param prefix: prefix of auto generated partition names
:param transformation: data transformation to be applied on data
"""
self.name = name
self.partitions = npart
self.name = kwargs.get('name',"")
self.partitions = kwargs.get('npart',10)
self.sets = []
self.membership_function = func
self.setnames = names
self.prefix = prefix
self.transformation = transformation
self.indexer = indexer
self.membership_function = kwargs.get('func',Membership.trimf)
self.setnames = kwargs.get('names',None)
self.prefix = kwargs.get('prefix','A')
self.transformation = kwargs.get('transformation',None)
self.indexer = kwargs.get('indexer',None)
self.variable = kwargs.get('variable', None)
self.type = kwargs.get('type', 'common')
if preprocess:
if kwargs.get('preprocess',True):
data = kwargs.get('data',[None])
if self.indexer is not None:
ndata = self.indexer.get_data(data)
else:
ndata = data
if transformation is not None:
ndata = transformation.apply(ndata)
if self.transformation is not None:
ndata = self.transformation.apply(ndata)
else:
ndata = data
@ -84,7 +87,8 @@ class Partitioner(object):
self.plot_set(ax, ss)
ticks.append(str(round(s.centroid,0))+'\n'+s.name)
x.append(s.centroid)
plt.xticks(x,ticks)
ax.xaxis.set_ticklabels(ticks)
ax.xaxis.set_ticks(x)
def plot_set(self, ax, s):
if s.mf == Membership.trimf:

View File

@ -0,0 +1,48 @@
import pandas as pd
import matplotlib.pylab as plt
from pyFTS.data import TAIEX as tx
from pyFTS.common import Transformations
bc = Transformations.BoxCox(0)
diff = Transformations.Differential(1)
df = tx.get_dataframe()
df = df.dropna()
#df.loc[2209]
train = df.iloc[2000:2500]
test = df.iloc[2500:3000]
from pyFTS.partitioners import Grid, Util as pUtil
from pyFTS.models.multivariate import common, variable
model = common.MVFTS("")
#fig, axes = plt.subplots(nrows=5, ncols=1,figsize=[10,10])
vopen = variable.Variable("Open", data_label="Openly", partitioner=Grid.GridPartitioner, npart=40, data=df)
model.append_variable(vopen)
#vopen.partitioner.plot(axes[0])
vhigh = variable.Variable("High", data_label="Highest", partitioner=Grid.GridPartitioner, npart=40, data=df)#train)
model.append_variable(vhigh)
#vhigh.partitioner.plot(axes[1])
vlow = variable.Variable("Low", data_label="Lowermost", partitioner=Grid.GridPartitioner, npart=40, data=df)#train)
model.append_variable(vlow)
#vlow.partitioner.plot(axes[2])
vclose = variable.Variable("Close", data_label="Close", partitioner=Grid.GridPartitioner, npart=40, data=df)#train)
model.append_variable(vclose)
#vclose.partitioner.plot(axes[3])
vvol = variable.Variable("Volume", data_label="Volume", partitioner=Grid.GridPartitioner, npart=100, data=df,
transformation=bc)#train)
model.append_variable(vvol)
#vvol.partitioner.plot(axes[4])
model.target_variable = vvol
#plt.tight_layout()
model.train(train)
forecasted = model.forecast(test)
print([round(k,0) for k in test['Volume'].values.tolist()])
print([round(k,0) for k in forecasted])

View File

@ -4,11 +4,10 @@ setup(
name='pyFTS',
packages=['pyFTS', 'pyFTS.benchmarks', 'pyFTS.common', 'pyFTS.data', 'pyFTS.models.ensemble',
'pyFTS.models', 'pyFTS.models.seasonal', 'pyFTS.partitioners', 'pyFTS.probabilistic',
'pyFTS.tests', 'pyFTS.models.nonstationary'],
'pyFTS.tests', 'pyFTS.models.nonstationary', 'pyFTS.models.multivariate'],
#package_dir={}
package_data={'benchmarks': ['*'], 'common': ['*'], 'data': ['*'],
'models': ['*'], 'seasonal': ['*'], 'ensemble': ['*'],
'partitioners': ['*'], 'probabilistic': ['*'], 'tests': ['*']},
'models': ['*'], 'partitioners': ['*'], 'probabilistic': ['*'], 'tests': ['*']},
#data_files=[('data', ['pyFTS/data/Enrollments.csv', 'pyFTS/data/AirPassengers.csv'])],
include_package_data=True,
version='1.1.1',