Improvements on Seasonal and Multivariate models
This commit is contained in:
parent
e010df344a
commit
66e41f42ee
@ -177,8 +177,11 @@ def persist_obj(obj, file):
|
||||
:param obj: object on memory
|
||||
:param file: file name to store the object
|
||||
"""
|
||||
with open(file, 'wb') as _file:
|
||||
dill.dump(obj, _file)
|
||||
try:
|
||||
with open(file, 'wb') as _file:
|
||||
dill.dump(obj, _file)
|
||||
except Exception as ex:
|
||||
print("File {} could not be saved due exception {}".format(file, ex))
|
||||
|
||||
|
||||
def load_obj(file):
|
||||
|
@ -42,9 +42,12 @@ class MultivariateFuzzySet(Composite.FuzzySet):
|
||||
return np.nanmin(mv)
|
||||
|
||||
|
||||
def fuzzyfy_instance(data_point, var):
|
||||
def fuzzyfy_instance(data_point, var, tuples=True):
|
||||
fsets = FuzzySet.fuzzyfy(data_point, var.partitioner, mode='sets', method='fuzzy', alpha_cut=var.alpha_cut)
|
||||
return [(var.name, fs) for fs in fsets]
|
||||
if tuples:
|
||||
return [(var.name, fs) for fs in fsets]
|
||||
else:
|
||||
return fsets
|
||||
|
||||
|
||||
def fuzzyfy_instance_clustered(data_point, cluster, **kwargs):
|
||||
|
@ -1,11 +1,24 @@
|
||||
from pyFTS.common import fts, FuzzySet, FLR, Membership, tree
|
||||
from pyFTS.common import fts, FuzzySet, FLR, Membership
|
||||
from pyFTS.partitioners import Grid
|
||||
from pyFTS.models.multivariate import FLR as MVFLR, common, flrg as mvflrg
|
||||
from itertools import product
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def product_dict(**kwargs):
|
||||
'''
|
||||
Code by Seth Johnson
|
||||
:param kwargs:
|
||||
:return:
|
||||
'''
|
||||
keys = kwargs.keys()
|
||||
vals = kwargs.values()
|
||||
for instance in product(*vals):
|
||||
yield dict(zip(keys, instance))
|
||||
|
||||
|
||||
class MVFTS(fts.FTS):
|
||||
"""
|
||||
Multivariate extension of Chen's ConventionalFTS method
|
||||
@ -52,22 +65,18 @@ class MVFTS(fts.FTS):
|
||||
lags = {}
|
||||
for vc, var in enumerate(self.explanatory_variables):
|
||||
data_point = data[var.name]
|
||||
lags[vc] = common.fuzzyfy_instance(data_point, var)
|
||||
|
||||
root = tree.FLRGTreeNode(None)
|
||||
|
||||
tree.build_tree_without_order(root, lags, 0)
|
||||
|
||||
for p in root.paths():
|
||||
path = list(reversed(list(filter(None.__ne__, p))))
|
||||
lags[var.name] = common.fuzzyfy_instance(data_point, var, tuples=False)
|
||||
|
||||
for path in product_dict(**lags):
|
||||
flr = MVFLR.FLR()
|
||||
|
||||
for v, s in path:
|
||||
flr.set_lhs(v, s)
|
||||
for var, fset in path.items():
|
||||
flr.set_lhs(var, fset)
|
||||
|
||||
if len(flr.LHS.keys()) == len(self.explanatory_variables):
|
||||
flrs.append(flr)
|
||||
else:
|
||||
print(flr)
|
||||
|
||||
return flrs
|
||||
|
||||
@ -110,6 +119,7 @@ class MVFTS(fts.FTS):
|
||||
def forecast(self, data, **kwargs):
|
||||
ret = []
|
||||
ndata = self.apply_transformations(data)
|
||||
c = 0
|
||||
for index, row in ndata.iterrows():
|
||||
data_point = self.format_data(row)
|
||||
flrs = self.generate_lhs_flrs(data_point)
|
||||
@ -118,8 +128,17 @@ class MVFTS(fts.FTS):
|
||||
for flr in flrs:
|
||||
flrg = mvflrg.FLRG(lhs=flr.LHS)
|
||||
if flrg.get_key() not in self.flrgs:
|
||||
mvs.append(0.)
|
||||
mps.append(0.)
|
||||
#Naïve approach is applied when no rules were found
|
||||
if self.target_variable.name in flrg.LHS:
|
||||
fs = flrg.LHS[self.target_variable.name]
|
||||
fset = self.target_variable.partitioner.sets[fs]
|
||||
mp = fset.centroid
|
||||
mv = fset.membership(data_point[self.target_variable.name])
|
||||
mvs.append(mv)
|
||||
mps.append(mp)
|
||||
else:
|
||||
mvs.append(0.)
|
||||
mps.append(0.)
|
||||
else:
|
||||
mvs.append(self.flrgs[flrg.get_key()].get_membership(data_point, self.explanatory_variables))
|
||||
mps.append(self.flrgs[flrg.get_key()].get_midpoint(self.target_variable.partitioner.sets))
|
||||
|
@ -6,9 +6,15 @@ from pyFTS.partitioners import partitioner, Grid
|
||||
from datetime import date as dt, datetime as dtm
|
||||
|
||||
|
||||
|
||||
class DateTime(Enum):
|
||||
"""
|
||||
Data and Time granularity for time granularity and seasonality identification
|
||||
"""
|
||||
year = 1
|
||||
half = 2 # six months
|
||||
third = 3 # four months
|
||||
quarter = 4 # three months
|
||||
sixth = 6 # two months
|
||||
month = 12
|
||||
day_of_month = 30
|
||||
day_of_year = 364
|
||||
@ -37,6 +43,8 @@ def strip_datepart(date, date_part, mask=''):
|
||||
tmp = date.year
|
||||
elif date_part == DateTime.month:
|
||||
tmp = date.month
|
||||
elif date_part in (DateTime.half, DateTime.third, DateTime.quarter, DateTime.sixth):
|
||||
tmp = (date.month // date_part.value) + 1
|
||||
elif date_part == DateTime.day_of_year:
|
||||
tmp = date.timetuple().tm_yday
|
||||
elif date_part == DateTime.day_of_month:
|
||||
|
@ -55,10 +55,13 @@ class TimeGridPartitioner(partitioner.Partitioner):
|
||||
dlen = (self.max - self.min)
|
||||
partlen = dlen / self.partitions
|
||||
elif self.season == DateTime.day_of_week:
|
||||
self.min = 1
|
||||
self.max = 8
|
||||
partlen = 1
|
||||
pl2 = 1
|
||||
self.min, self.max, partlen, pl2 = 0, 7, 1, 1
|
||||
elif self.season == DateTime.hour:
|
||||
self.min, self.max, partlen, pl2 = 0, 24, 1, 1
|
||||
elif self.season == DateTime.month:
|
||||
self.min, self.max, partlen, pl2 = 1, 13, 1, 1
|
||||
elif self.season in (DateTime.half, DateTime.third, DateTime.quarter, DateTime.sixth):
|
||||
self.min, self.max, partlen, pl2 = 1, self.season.value+1, 1, 1
|
||||
else:
|
||||
partlen = self.season.value / self.partitions
|
||||
pl2 = partlen / 2
|
||||
|
@ -92,14 +92,14 @@ from pyFTS.models.seasonal.common import DateTime
|
||||
dataset = pd.read_csv('/home/petronio/Downloads/gefcom12.csv')
|
||||
dataset = dataset.dropna()
|
||||
|
||||
train_mv = dataset.iloc[:25000]
|
||||
test_mv = dataset.iloc[25000:]
|
||||
train_mv = dataset.iloc[:15000]
|
||||
test_mv = dataset.iloc[15000:]
|
||||
|
||||
from pyFTS.models.multivariate import common, variable, mvfts
|
||||
from pyFTS.models.seasonal import partitioner as seasonal
|
||||
from pyFTS.models.seasonal.common import DateTime
|
||||
|
||||
|
||||
#'''
|
||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
|
||||
|
||||
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||
@ -110,56 +110,48 @@ sp = {'seasonality': DateTime.day_of_week, 'names': ['mon','tue','wed','tur','fr
|
||||
vday = variable.Variable("DayOfWeek", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=7,
|
||||
data=train_mv, partitioner_specific=sp)
|
||||
|
||||
sp = {'seasonality': DateTime.day_of_year, 'names': ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']}
|
||||
#sp = {'seasonality': DateTime.day_of_month, 'names': ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']}
|
||||
|
||||
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=12,
|
||||
sp = {'seasonality': DateTime.quarter}
|
||||
|
||||
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=4,
|
||||
data=train_mv, partitioner_specific=sp)
|
||||
|
||||
|
||||
vload = variable.Variable("Load", data_label="load", alias='load',
|
||||
partitioner=Grid.GridPartitioner, npart=35,
|
||||
partitioner=Grid.GridPartitioner, npart=20,
|
||||
data=train_mv)
|
||||
|
||||
vtemp = variable.Variable("Temperature", data_label="temperature", alias='temperature',
|
||||
partitioner=Grid.GridPartitioner, npart=35,
|
||||
partitioner=Grid.GridPartitioner, npart=20,
|
||||
data=train_mv)
|
||||
|
||||
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid
|
||||
from itertools import combinations
|
||||
|
||||
models = []
|
||||
vars = [vhour, vday, vload]
|
||||
|
||||
variables = [vhour, vday, vmonth, vtemp]
|
||||
#fs = grid.GridCluster(explanatory_variables=vars, target_variable=vload)
|
||||
|
||||
parameters = [
|
||||
{}, {},
|
||||
{'order': 2, 'knn': 1},
|
||||
{'order': 2, 'knn': 2},
|
||||
{'order': 2, 'knn': 3},
|
||||
]
|
||||
#model = mvfts.MVFTS(explanatory_variables=vars, target_variable=vload)
|
||||
model = wmvfts.WeightedMVFTS(explanatory_variables=vars, target_variable=vload)
|
||||
#model = cmvfts.ClusteredMVFTS(explanatory_variables=vars, target_variable=vload,order=2, knn=3, partitioner=fs)
|
||||
model.fit(train_mv)
|
||||
print(model.shortname)
|
||||
Util.persist_obj(model, model.shortname)
|
||||
#'''
|
||||
|
||||
#model = Util.load_obj('MVFTS')
|
||||
|
||||
for ct, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS,
|
||||
cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS]):
|
||||
for nc in np.arange(1, 5):
|
||||
for comb in combinations(variables, nc):
|
||||
_vars = []
|
||||
_vars.extend(comb)
|
||||
_vars.append(vload)
|
||||
with open("rules.txt","w") as file:
|
||||
file.write(str(model))
|
||||
|
||||
if not method == cmvfts.ClusteredMVFTS:
|
||||
model = method(explanatory_variables=_vars, target_variable=vload, **parameters[ct])
|
||||
else:
|
||||
fs = grid.GridCluster(explanatory_variables=_vars, target_variable=vload)
|
||||
model = method(explanatory_variables=_vars, target_variable=vload, partitioner=fs, **parameters[ct])
|
||||
forecasts = model.predict(test_mv.iloc[:100])
|
||||
forecasts.insert(0,None)
|
||||
|
||||
for _v in comb:
|
||||
model.shortname += _v.name
|
||||
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15,3])
|
||||
ax.plot(test_mv['load'].values[:100],label='Original')
|
||||
ax.plot(forecasts, label='predicted')
|
||||
handles, labels = ax.get_legend_handles_labels()
|
||||
lgd = ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1))
|
||||
|
||||
model.fit(train_mv)
|
||||
|
||||
models.append(model.shortname)
|
||||
|
||||
#Util.persist_obj(model, model.shortname)
|
||||
|
||||
forecasts = model.predict(test_mv.iloc[:100])
|
||||
Util.show_and_save_image(fig, model.shortname, True)
|
||||
|
Loading…
Reference in New Issue
Block a user