Improvements on Seasonal and Multivariate models

This commit is contained in:
Petrônio Cândido 2019-02-13 14:11:24 -02:00
parent e010df344a
commit 66e41f42ee
6 changed files with 87 additions and 59 deletions

View File

@ -177,8 +177,11 @@ def persist_obj(obj, file):
:param obj: object on memory :param obj: object on memory
:param file: file name to store the object :param file: file name to store the object
""" """
with open(file, 'wb') as _file: try:
dill.dump(obj, _file) with open(file, 'wb') as _file:
dill.dump(obj, _file)
except Exception as ex:
print("File {} could not be saved due exception {}".format(file, ex))
def load_obj(file): def load_obj(file):

View File

@ -42,9 +42,12 @@ class MultivariateFuzzySet(Composite.FuzzySet):
return np.nanmin(mv) return np.nanmin(mv)
def fuzzyfy_instance(data_point, var): def fuzzyfy_instance(data_point, var, tuples=True):
fsets = FuzzySet.fuzzyfy(data_point, var.partitioner, mode='sets', method='fuzzy', alpha_cut=var.alpha_cut) fsets = FuzzySet.fuzzyfy(data_point, var.partitioner, mode='sets', method='fuzzy', alpha_cut=var.alpha_cut)
return [(var.name, fs) for fs in fsets] if tuples:
return [(var.name, fs) for fs in fsets]
else:
return fsets
def fuzzyfy_instance_clustered(data_point, cluster, **kwargs): def fuzzyfy_instance_clustered(data_point, cluster, **kwargs):

View File

@ -1,11 +1,24 @@
from pyFTS.common import fts, FuzzySet, FLR, Membership, tree from pyFTS.common import fts, FuzzySet, FLR, Membership
from pyFTS.partitioners import Grid from pyFTS.partitioners import Grid
from pyFTS.models.multivariate import FLR as MVFLR, common, flrg as mvflrg from pyFTS.models.multivariate import FLR as MVFLR, common, flrg as mvflrg
from itertools import product
import numpy as np import numpy as np
import pandas as pd import pandas as pd
def product_dict(**kwargs):
'''
Code by Seth Johnson
:param kwargs:
:return:
'''
keys = kwargs.keys()
vals = kwargs.values()
for instance in product(*vals):
yield dict(zip(keys, instance))
class MVFTS(fts.FTS): class MVFTS(fts.FTS):
""" """
Multivariate extension of Chen's ConventionalFTS method Multivariate extension of Chen's ConventionalFTS method
@ -52,22 +65,18 @@ class MVFTS(fts.FTS):
lags = {} lags = {}
for vc, var in enumerate(self.explanatory_variables): for vc, var in enumerate(self.explanatory_variables):
data_point = data[var.name] data_point = data[var.name]
lags[vc] = common.fuzzyfy_instance(data_point, var) lags[var.name] = common.fuzzyfy_instance(data_point, var, tuples=False)
root = tree.FLRGTreeNode(None)
tree.build_tree_without_order(root, lags, 0)
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
for path in product_dict(**lags):
flr = MVFLR.FLR() flr = MVFLR.FLR()
for v, s in path: for var, fset in path.items():
flr.set_lhs(v, s) flr.set_lhs(var, fset)
if len(flr.LHS.keys()) == len(self.explanatory_variables): if len(flr.LHS.keys()) == len(self.explanatory_variables):
flrs.append(flr) flrs.append(flr)
else:
print(flr)
return flrs return flrs
@ -110,6 +119,7 @@ class MVFTS(fts.FTS):
def forecast(self, data, **kwargs): def forecast(self, data, **kwargs):
ret = [] ret = []
ndata = self.apply_transformations(data) ndata = self.apply_transformations(data)
c = 0
for index, row in ndata.iterrows(): for index, row in ndata.iterrows():
data_point = self.format_data(row) data_point = self.format_data(row)
flrs = self.generate_lhs_flrs(data_point) flrs = self.generate_lhs_flrs(data_point)
@ -118,8 +128,17 @@ class MVFTS(fts.FTS):
for flr in flrs: for flr in flrs:
flrg = mvflrg.FLRG(lhs=flr.LHS) flrg = mvflrg.FLRG(lhs=flr.LHS)
if flrg.get_key() not in self.flrgs: if flrg.get_key() not in self.flrgs:
mvs.append(0.) #Naïve approach is applied when no rules were found
mps.append(0.) if self.target_variable.name in flrg.LHS:
fs = flrg.LHS[self.target_variable.name]
fset = self.target_variable.partitioner.sets[fs]
mp = fset.centroid
mv = fset.membership(data_point[self.target_variable.name])
mvs.append(mv)
mps.append(mp)
else:
mvs.append(0.)
mps.append(0.)
else: else:
mvs.append(self.flrgs[flrg.get_key()].get_membership(data_point, self.explanatory_variables)) mvs.append(self.flrgs[flrg.get_key()].get_membership(data_point, self.explanatory_variables))
mps.append(self.flrgs[flrg.get_key()].get_midpoint(self.target_variable.partitioner.sets)) mps.append(self.flrgs[flrg.get_key()].get_midpoint(self.target_variable.partitioner.sets))

View File

@ -6,9 +6,15 @@ from pyFTS.partitioners import partitioner, Grid
from datetime import date as dt, datetime as dtm from datetime import date as dt, datetime as dtm
class DateTime(Enum): class DateTime(Enum):
"""
Data and Time granularity for time granularity and seasonality identification
"""
year = 1 year = 1
half = 2 # six months
third = 3 # four months
quarter = 4 # three months
sixth = 6 # two months
month = 12 month = 12
day_of_month = 30 day_of_month = 30
day_of_year = 364 day_of_year = 364
@ -37,6 +43,8 @@ def strip_datepart(date, date_part, mask=''):
tmp = date.year tmp = date.year
elif date_part == DateTime.month: elif date_part == DateTime.month:
tmp = date.month tmp = date.month
elif date_part in (DateTime.half, DateTime.third, DateTime.quarter, DateTime.sixth):
tmp = (date.month // date_part.value) + 1
elif date_part == DateTime.day_of_year: elif date_part == DateTime.day_of_year:
tmp = date.timetuple().tm_yday tmp = date.timetuple().tm_yday
elif date_part == DateTime.day_of_month: elif date_part == DateTime.day_of_month:

View File

@ -55,10 +55,13 @@ class TimeGridPartitioner(partitioner.Partitioner):
dlen = (self.max - self.min) dlen = (self.max - self.min)
partlen = dlen / self.partitions partlen = dlen / self.partitions
elif self.season == DateTime.day_of_week: elif self.season == DateTime.day_of_week:
self.min = 1 self.min, self.max, partlen, pl2 = 0, 7, 1, 1
self.max = 8 elif self.season == DateTime.hour:
partlen = 1 self.min, self.max, partlen, pl2 = 0, 24, 1, 1
pl2 = 1 elif self.season == DateTime.month:
self.min, self.max, partlen, pl2 = 1, 13, 1, 1
elif self.season in (DateTime.half, DateTime.third, DateTime.quarter, DateTime.sixth):
self.min, self.max, partlen, pl2 = 1, self.season.value+1, 1, 1
else: else:
partlen = self.season.value / self.partitions partlen = self.season.value / self.partitions
pl2 = partlen / 2 pl2 = partlen / 2

View File

@ -92,14 +92,14 @@ from pyFTS.models.seasonal.common import DateTime
dataset = pd.read_csv('/home/petronio/Downloads/gefcom12.csv') dataset = pd.read_csv('/home/petronio/Downloads/gefcom12.csv')
dataset = dataset.dropna() dataset = dataset.dropna()
train_mv = dataset.iloc[:25000] train_mv = dataset.iloc[:15000]
test_mv = dataset.iloc[25000:] test_mv = dataset.iloc[15000:]
from pyFTS.models.multivariate import common, variable, mvfts from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime from pyFTS.models.seasonal.common import DateTime
#'''
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]} sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24, vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
@ -110,56 +110,48 @@ sp = {'seasonality': DateTime.day_of_week, 'names': ['mon','tue','wed','tur','fr
vday = variable.Variable("DayOfWeek", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=7, vday = variable.Variable("DayOfWeek", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=7,
data=train_mv, partitioner_specific=sp) data=train_mv, partitioner_specific=sp)
sp = {'seasonality': DateTime.day_of_year, 'names': ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']} #sp = {'seasonality': DateTime.day_of_month, 'names': ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']}
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=12, sp = {'seasonality': DateTime.quarter}
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=4,
data=train_mv, partitioner_specific=sp) data=train_mv, partitioner_specific=sp)
vload = variable.Variable("Load", data_label="load", alias='load', vload = variable.Variable("Load", data_label="load", alias='load',
partitioner=Grid.GridPartitioner, npart=35, partitioner=Grid.GridPartitioner, npart=20,
data=train_mv) data=train_mv)
vtemp = variable.Variable("Temperature", data_label="temperature", alias='temperature', vtemp = variable.Variable("Temperature", data_label="temperature", alias='temperature',
partitioner=Grid.GridPartitioner, npart=35, partitioner=Grid.GridPartitioner, npart=20,
data=train_mv) data=train_mv)
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid
from itertools import combinations
models = [] vars = [vhour, vday, vload]
variables = [vhour, vday, vmonth, vtemp] #fs = grid.GridCluster(explanatory_variables=vars, target_variable=vload)
parameters = [ #model = mvfts.MVFTS(explanatory_variables=vars, target_variable=vload)
{}, {}, model = wmvfts.WeightedMVFTS(explanatory_variables=vars, target_variable=vload)
{'order': 2, 'knn': 1}, #model = cmvfts.ClusteredMVFTS(explanatory_variables=vars, target_variable=vload,order=2, knn=3, partitioner=fs)
{'order': 2, 'knn': 2}, model.fit(train_mv)
{'order': 2, 'knn': 3}, print(model.shortname)
] Util.persist_obj(model, model.shortname)
#'''
#model = Util.load_obj('MVFTS')
for ct, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS, with open("rules.txt","w") as file:
cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS]): file.write(str(model))
for nc in np.arange(1, 5):
for comb in combinations(variables, nc):
_vars = []
_vars.extend(comb)
_vars.append(vload)
if not method == cmvfts.ClusteredMVFTS: forecasts = model.predict(test_mv.iloc[:100])
model = method(explanatory_variables=_vars, target_variable=vload, **parameters[ct]) forecasts.insert(0,None)
else:
fs = grid.GridCluster(explanatory_variables=_vars, target_variable=vload)
model = method(explanatory_variables=_vars, target_variable=vload, partitioner=fs, **parameters[ct])
for _v in comb: fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15,3])
model.shortname += _v.name ax.plot(test_mv['load'].values[:100],label='Original')
ax.plot(forecasts, label='predicted')
handles, labels = ax.get_legend_handles_labels()
lgd = ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1))
model.fit(train_mv) Util.show_and_save_image(fig, model.shortname, True)
models.append(model.shortname)
#Util.persist_obj(model, model.shortname)
forecasts = model.predict(test_mv.iloc[:100])