Improvements on Seasonal and Multivariate models

This commit is contained in:
Petrônio Cândido 2019-02-13 14:11:24 -02:00
parent e010df344a
commit 66e41f42ee
6 changed files with 87 additions and 59 deletions

View File

@ -177,8 +177,11 @@ def persist_obj(obj, file):
:param obj: object on memory
:param file: file name to store the object
"""
try:
with open(file, 'wb') as _file:
dill.dump(obj, _file)
except Exception as ex:
print("File {} could not be saved due exception {}".format(file, ex))
def load_obj(file):

View File

@ -42,9 +42,12 @@ class MultivariateFuzzySet(Composite.FuzzySet):
return np.nanmin(mv)
def fuzzyfy_instance(data_point, var):
def fuzzyfy_instance(data_point, var, tuples=True):
fsets = FuzzySet.fuzzyfy(data_point, var.partitioner, mode='sets', method='fuzzy', alpha_cut=var.alpha_cut)
if tuples:
return [(var.name, fs) for fs in fsets]
else:
return fsets
def fuzzyfy_instance_clustered(data_point, cluster, **kwargs):

View File

@ -1,11 +1,24 @@
from pyFTS.common import fts, FuzzySet, FLR, Membership, tree
from pyFTS.common import fts, FuzzySet, FLR, Membership
from pyFTS.partitioners import Grid
from pyFTS.models.multivariate import FLR as MVFLR, common, flrg as mvflrg
from itertools import product
import numpy as np
import pandas as pd
def product_dict(**kwargs):
'''
Code by Seth Johnson
:param kwargs:
:return:
'''
keys = kwargs.keys()
vals = kwargs.values()
for instance in product(*vals):
yield dict(zip(keys, instance))
class MVFTS(fts.FTS):
"""
Multivariate extension of Chen's ConventionalFTS method
@ -52,22 +65,18 @@ class MVFTS(fts.FTS):
lags = {}
for vc, var in enumerate(self.explanatory_variables):
data_point = data[var.name]
lags[vc] = common.fuzzyfy_instance(data_point, var)
root = tree.FLRGTreeNode(None)
tree.build_tree_without_order(root, lags, 0)
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
lags[var.name] = common.fuzzyfy_instance(data_point, var, tuples=False)
for path in product_dict(**lags):
flr = MVFLR.FLR()
for v, s in path:
flr.set_lhs(v, s)
for var, fset in path.items():
flr.set_lhs(var, fset)
if len(flr.LHS.keys()) == len(self.explanatory_variables):
flrs.append(flr)
else:
print(flr)
return flrs
@ -110,6 +119,7 @@ class MVFTS(fts.FTS):
def forecast(self, data, **kwargs):
ret = []
ndata = self.apply_transformations(data)
c = 0
for index, row in ndata.iterrows():
data_point = self.format_data(row)
flrs = self.generate_lhs_flrs(data_point)
@ -118,6 +128,15 @@ class MVFTS(fts.FTS):
for flr in flrs:
flrg = mvflrg.FLRG(lhs=flr.LHS)
if flrg.get_key() not in self.flrgs:
#Naïve approach is applied when no rules were found
if self.target_variable.name in flrg.LHS:
fs = flrg.LHS[self.target_variable.name]
fset = self.target_variable.partitioner.sets[fs]
mp = fset.centroid
mv = fset.membership(data_point[self.target_variable.name])
mvs.append(mv)
mps.append(mp)
else:
mvs.append(0.)
mps.append(0.)
else:

View File

@ -6,9 +6,15 @@ from pyFTS.partitioners import partitioner, Grid
from datetime import date as dt, datetime as dtm
class DateTime(Enum):
"""
Data and Time granularity for time granularity and seasonality identification
"""
year = 1
half = 2 # six months
third = 3 # four months
quarter = 4 # three months
sixth = 6 # two months
month = 12
day_of_month = 30
day_of_year = 364
@ -37,6 +43,8 @@ def strip_datepart(date, date_part, mask=''):
tmp = date.year
elif date_part == DateTime.month:
tmp = date.month
elif date_part in (DateTime.half, DateTime.third, DateTime.quarter, DateTime.sixth):
tmp = (date.month // date_part.value) + 1
elif date_part == DateTime.day_of_year:
tmp = date.timetuple().tm_yday
elif date_part == DateTime.day_of_month:

View File

@ -55,10 +55,13 @@ class TimeGridPartitioner(partitioner.Partitioner):
dlen = (self.max - self.min)
partlen = dlen / self.partitions
elif self.season == DateTime.day_of_week:
self.min = 1
self.max = 8
partlen = 1
pl2 = 1
self.min, self.max, partlen, pl2 = 0, 7, 1, 1
elif self.season == DateTime.hour:
self.min, self.max, partlen, pl2 = 0, 24, 1, 1
elif self.season == DateTime.month:
self.min, self.max, partlen, pl2 = 1, 13, 1, 1
elif self.season in (DateTime.half, DateTime.third, DateTime.quarter, DateTime.sixth):
self.min, self.max, partlen, pl2 = 1, self.season.value+1, 1, 1
else:
partlen = self.season.value / self.partitions
pl2 = partlen / 2

View File

@ -92,14 +92,14 @@ from pyFTS.models.seasonal.common import DateTime
dataset = pd.read_csv('/home/petronio/Downloads/gefcom12.csv')
dataset = dataset.dropna()
train_mv = dataset.iloc[:25000]
test_mv = dataset.iloc[25000:]
train_mv = dataset.iloc[:15000]
test_mv = dataset.iloc[15000:]
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
#'''
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
@ -110,56 +110,48 @@ sp = {'seasonality': DateTime.day_of_week, 'names': ['mon','tue','wed','tur','fr
vday = variable.Variable("DayOfWeek", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=7,
data=train_mv, partitioner_specific=sp)
sp = {'seasonality': DateTime.day_of_year, 'names': ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']}
#sp = {'seasonality': DateTime.day_of_month, 'names': ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']}
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=12,
sp = {'seasonality': DateTime.quarter}
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=4,
data=train_mv, partitioner_specific=sp)
vload = variable.Variable("Load", data_label="load", alias='load',
partitioner=Grid.GridPartitioner, npart=35,
partitioner=Grid.GridPartitioner, npart=20,
data=train_mv)
vtemp = variable.Variable("Temperature", data_label="temperature", alias='temperature',
partitioner=Grid.GridPartitioner, npart=35,
partitioner=Grid.GridPartitioner, npart=20,
data=train_mv)
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid
from itertools import combinations
models = []
vars = [vhour, vday, vload]
variables = [vhour, vday, vmonth, vtemp]
parameters = [
{}, {},
{'order': 2, 'knn': 1},
{'order': 2, 'knn': 2},
{'order': 2, 'knn': 3},
]
for ct, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS,
cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS]):
for nc in np.arange(1, 5):
for comb in combinations(variables, nc):
_vars = []
_vars.extend(comb)
_vars.append(vload)
if not method == cmvfts.ClusteredMVFTS:
model = method(explanatory_variables=_vars, target_variable=vload, **parameters[ct])
else:
fs = grid.GridCluster(explanatory_variables=_vars, target_variable=vload)
model = method(explanatory_variables=_vars, target_variable=vload, partitioner=fs, **parameters[ct])
for _v in comb:
model.shortname += _v.name
#fs = grid.GridCluster(explanatory_variables=vars, target_variable=vload)
#model = mvfts.MVFTS(explanatory_variables=vars, target_variable=vload)
model = wmvfts.WeightedMVFTS(explanatory_variables=vars, target_variable=vload)
#model = cmvfts.ClusteredMVFTS(explanatory_variables=vars, target_variable=vload,order=2, knn=3, partitioner=fs)
model.fit(train_mv)
print(model.shortname)
Util.persist_obj(model, model.shortname)
#'''
models.append(model.shortname)
#model = Util.load_obj('MVFTS')
#Util.persist_obj(model, model.shortname)
with open("rules.txt","w") as file:
file.write(str(model))
forecasts = model.predict(test_mv.iloc[:100])
forecasts.insert(0,None)
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15,3])
ax.plot(test_mv['load'].values[:100],label='Original')
ax.plot(forecasts, label='predicted')
handles, labels = ax.get_legend_handles_labels()
lgd = ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1))
Util.show_and_save_image(fig, model.shortname, True)