Improvements on Seasonal and Multivariate models
This commit is contained in:
parent
e010df344a
commit
66e41f42ee
@ -177,8 +177,11 @@ def persist_obj(obj, file):
|
|||||||
:param obj: object on memory
|
:param obj: object on memory
|
||||||
:param file: file name to store the object
|
:param file: file name to store the object
|
||||||
"""
|
"""
|
||||||
with open(file, 'wb') as _file:
|
try:
|
||||||
dill.dump(obj, _file)
|
with open(file, 'wb') as _file:
|
||||||
|
dill.dump(obj, _file)
|
||||||
|
except Exception as ex:
|
||||||
|
print("File {} could not be saved due exception {}".format(file, ex))
|
||||||
|
|
||||||
|
|
||||||
def load_obj(file):
|
def load_obj(file):
|
||||||
|
@ -42,9 +42,12 @@ class MultivariateFuzzySet(Composite.FuzzySet):
|
|||||||
return np.nanmin(mv)
|
return np.nanmin(mv)
|
||||||
|
|
||||||
|
|
||||||
def fuzzyfy_instance(data_point, var):
|
def fuzzyfy_instance(data_point, var, tuples=True):
|
||||||
fsets = FuzzySet.fuzzyfy(data_point, var.partitioner, mode='sets', method='fuzzy', alpha_cut=var.alpha_cut)
|
fsets = FuzzySet.fuzzyfy(data_point, var.partitioner, mode='sets', method='fuzzy', alpha_cut=var.alpha_cut)
|
||||||
return [(var.name, fs) for fs in fsets]
|
if tuples:
|
||||||
|
return [(var.name, fs) for fs in fsets]
|
||||||
|
else:
|
||||||
|
return fsets
|
||||||
|
|
||||||
|
|
||||||
def fuzzyfy_instance_clustered(data_point, cluster, **kwargs):
|
def fuzzyfy_instance_clustered(data_point, cluster, **kwargs):
|
||||||
|
@ -1,11 +1,24 @@
|
|||||||
from pyFTS.common import fts, FuzzySet, FLR, Membership, tree
|
from pyFTS.common import fts, FuzzySet, FLR, Membership
|
||||||
from pyFTS.partitioners import Grid
|
from pyFTS.partitioners import Grid
|
||||||
from pyFTS.models.multivariate import FLR as MVFLR, common, flrg as mvflrg
|
from pyFTS.models.multivariate import FLR as MVFLR, common, flrg as mvflrg
|
||||||
|
from itertools import product
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def product_dict(**kwargs):
|
||||||
|
'''
|
||||||
|
Code by Seth Johnson
|
||||||
|
:param kwargs:
|
||||||
|
:return:
|
||||||
|
'''
|
||||||
|
keys = kwargs.keys()
|
||||||
|
vals = kwargs.values()
|
||||||
|
for instance in product(*vals):
|
||||||
|
yield dict(zip(keys, instance))
|
||||||
|
|
||||||
|
|
||||||
class MVFTS(fts.FTS):
|
class MVFTS(fts.FTS):
|
||||||
"""
|
"""
|
||||||
Multivariate extension of Chen's ConventionalFTS method
|
Multivariate extension of Chen's ConventionalFTS method
|
||||||
@ -52,22 +65,18 @@ class MVFTS(fts.FTS):
|
|||||||
lags = {}
|
lags = {}
|
||||||
for vc, var in enumerate(self.explanatory_variables):
|
for vc, var in enumerate(self.explanatory_variables):
|
||||||
data_point = data[var.name]
|
data_point = data[var.name]
|
||||||
lags[vc] = common.fuzzyfy_instance(data_point, var)
|
lags[var.name] = common.fuzzyfy_instance(data_point, var, tuples=False)
|
||||||
|
|
||||||
root = tree.FLRGTreeNode(None)
|
|
||||||
|
|
||||||
tree.build_tree_without_order(root, lags, 0)
|
|
||||||
|
|
||||||
for p in root.paths():
|
|
||||||
path = list(reversed(list(filter(None.__ne__, p))))
|
|
||||||
|
|
||||||
|
for path in product_dict(**lags):
|
||||||
flr = MVFLR.FLR()
|
flr = MVFLR.FLR()
|
||||||
|
|
||||||
for v, s in path:
|
for var, fset in path.items():
|
||||||
flr.set_lhs(v, s)
|
flr.set_lhs(var, fset)
|
||||||
|
|
||||||
if len(flr.LHS.keys()) == len(self.explanatory_variables):
|
if len(flr.LHS.keys()) == len(self.explanatory_variables):
|
||||||
flrs.append(flr)
|
flrs.append(flr)
|
||||||
|
else:
|
||||||
|
print(flr)
|
||||||
|
|
||||||
return flrs
|
return flrs
|
||||||
|
|
||||||
@ -110,6 +119,7 @@ class MVFTS(fts.FTS):
|
|||||||
def forecast(self, data, **kwargs):
|
def forecast(self, data, **kwargs):
|
||||||
ret = []
|
ret = []
|
||||||
ndata = self.apply_transformations(data)
|
ndata = self.apply_transformations(data)
|
||||||
|
c = 0
|
||||||
for index, row in ndata.iterrows():
|
for index, row in ndata.iterrows():
|
||||||
data_point = self.format_data(row)
|
data_point = self.format_data(row)
|
||||||
flrs = self.generate_lhs_flrs(data_point)
|
flrs = self.generate_lhs_flrs(data_point)
|
||||||
@ -118,8 +128,17 @@ class MVFTS(fts.FTS):
|
|||||||
for flr in flrs:
|
for flr in flrs:
|
||||||
flrg = mvflrg.FLRG(lhs=flr.LHS)
|
flrg = mvflrg.FLRG(lhs=flr.LHS)
|
||||||
if flrg.get_key() not in self.flrgs:
|
if flrg.get_key() not in self.flrgs:
|
||||||
mvs.append(0.)
|
#Naïve approach is applied when no rules were found
|
||||||
mps.append(0.)
|
if self.target_variable.name in flrg.LHS:
|
||||||
|
fs = flrg.LHS[self.target_variable.name]
|
||||||
|
fset = self.target_variable.partitioner.sets[fs]
|
||||||
|
mp = fset.centroid
|
||||||
|
mv = fset.membership(data_point[self.target_variable.name])
|
||||||
|
mvs.append(mv)
|
||||||
|
mps.append(mp)
|
||||||
|
else:
|
||||||
|
mvs.append(0.)
|
||||||
|
mps.append(0.)
|
||||||
else:
|
else:
|
||||||
mvs.append(self.flrgs[flrg.get_key()].get_membership(data_point, self.explanatory_variables))
|
mvs.append(self.flrgs[flrg.get_key()].get_membership(data_point, self.explanatory_variables))
|
||||||
mps.append(self.flrgs[flrg.get_key()].get_midpoint(self.target_variable.partitioner.sets))
|
mps.append(self.flrgs[flrg.get_key()].get_midpoint(self.target_variable.partitioner.sets))
|
||||||
|
@ -6,9 +6,15 @@ from pyFTS.partitioners import partitioner, Grid
|
|||||||
from datetime import date as dt, datetime as dtm
|
from datetime import date as dt, datetime as dtm
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class DateTime(Enum):
|
class DateTime(Enum):
|
||||||
|
"""
|
||||||
|
Data and Time granularity for time granularity and seasonality identification
|
||||||
|
"""
|
||||||
year = 1
|
year = 1
|
||||||
|
half = 2 # six months
|
||||||
|
third = 3 # four months
|
||||||
|
quarter = 4 # three months
|
||||||
|
sixth = 6 # two months
|
||||||
month = 12
|
month = 12
|
||||||
day_of_month = 30
|
day_of_month = 30
|
||||||
day_of_year = 364
|
day_of_year = 364
|
||||||
@ -37,6 +43,8 @@ def strip_datepart(date, date_part, mask=''):
|
|||||||
tmp = date.year
|
tmp = date.year
|
||||||
elif date_part == DateTime.month:
|
elif date_part == DateTime.month:
|
||||||
tmp = date.month
|
tmp = date.month
|
||||||
|
elif date_part in (DateTime.half, DateTime.third, DateTime.quarter, DateTime.sixth):
|
||||||
|
tmp = (date.month // date_part.value) + 1
|
||||||
elif date_part == DateTime.day_of_year:
|
elif date_part == DateTime.day_of_year:
|
||||||
tmp = date.timetuple().tm_yday
|
tmp = date.timetuple().tm_yday
|
||||||
elif date_part == DateTime.day_of_month:
|
elif date_part == DateTime.day_of_month:
|
||||||
|
@ -55,10 +55,13 @@ class TimeGridPartitioner(partitioner.Partitioner):
|
|||||||
dlen = (self.max - self.min)
|
dlen = (self.max - self.min)
|
||||||
partlen = dlen / self.partitions
|
partlen = dlen / self.partitions
|
||||||
elif self.season == DateTime.day_of_week:
|
elif self.season == DateTime.day_of_week:
|
||||||
self.min = 1
|
self.min, self.max, partlen, pl2 = 0, 7, 1, 1
|
||||||
self.max = 8
|
elif self.season == DateTime.hour:
|
||||||
partlen = 1
|
self.min, self.max, partlen, pl2 = 0, 24, 1, 1
|
||||||
pl2 = 1
|
elif self.season == DateTime.month:
|
||||||
|
self.min, self.max, partlen, pl2 = 1, 13, 1, 1
|
||||||
|
elif self.season in (DateTime.half, DateTime.third, DateTime.quarter, DateTime.sixth):
|
||||||
|
self.min, self.max, partlen, pl2 = 1, self.season.value+1, 1, 1
|
||||||
else:
|
else:
|
||||||
partlen = self.season.value / self.partitions
|
partlen = self.season.value / self.partitions
|
||||||
pl2 = partlen / 2
|
pl2 = partlen / 2
|
||||||
|
@ -92,14 +92,14 @@ from pyFTS.models.seasonal.common import DateTime
|
|||||||
dataset = pd.read_csv('/home/petronio/Downloads/gefcom12.csv')
|
dataset = pd.read_csv('/home/petronio/Downloads/gefcom12.csv')
|
||||||
dataset = dataset.dropna()
|
dataset = dataset.dropna()
|
||||||
|
|
||||||
train_mv = dataset.iloc[:25000]
|
train_mv = dataset.iloc[:15000]
|
||||||
test_mv = dataset.iloc[25000:]
|
test_mv = dataset.iloc[15000:]
|
||||||
|
|
||||||
from pyFTS.models.multivariate import common, variable, mvfts
|
from pyFTS.models.multivariate import common, variable, mvfts
|
||||||
from pyFTS.models.seasonal import partitioner as seasonal
|
from pyFTS.models.seasonal import partitioner as seasonal
|
||||||
from pyFTS.models.seasonal.common import DateTime
|
from pyFTS.models.seasonal.common import DateTime
|
||||||
|
|
||||||
|
#'''
|
||||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
|
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
|
||||||
|
|
||||||
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||||
@ -110,56 +110,48 @@ sp = {'seasonality': DateTime.day_of_week, 'names': ['mon','tue','wed','tur','fr
|
|||||||
vday = variable.Variable("DayOfWeek", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=7,
|
vday = variable.Variable("DayOfWeek", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=7,
|
||||||
data=train_mv, partitioner_specific=sp)
|
data=train_mv, partitioner_specific=sp)
|
||||||
|
|
||||||
sp = {'seasonality': DateTime.day_of_year, 'names': ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']}
|
#sp = {'seasonality': DateTime.day_of_month, 'names': ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']}
|
||||||
|
|
||||||
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=12,
|
sp = {'seasonality': DateTime.quarter}
|
||||||
|
|
||||||
|
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=4,
|
||||||
data=train_mv, partitioner_specific=sp)
|
data=train_mv, partitioner_specific=sp)
|
||||||
|
|
||||||
|
|
||||||
vload = variable.Variable("Load", data_label="load", alias='load',
|
vload = variable.Variable("Load", data_label="load", alias='load',
|
||||||
partitioner=Grid.GridPartitioner, npart=35,
|
partitioner=Grid.GridPartitioner, npart=20,
|
||||||
data=train_mv)
|
data=train_mv)
|
||||||
|
|
||||||
vtemp = variable.Variable("Temperature", data_label="temperature", alias='temperature',
|
vtemp = variable.Variable("Temperature", data_label="temperature", alias='temperature',
|
||||||
partitioner=Grid.GridPartitioner, npart=35,
|
partitioner=Grid.GridPartitioner, npart=20,
|
||||||
data=train_mv)
|
data=train_mv)
|
||||||
|
|
||||||
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid
|
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid
|
||||||
from itertools import combinations
|
|
||||||
|
|
||||||
models = []
|
vars = [vhour, vday, vload]
|
||||||
|
|
||||||
variables = [vhour, vday, vmonth, vtemp]
|
#fs = grid.GridCluster(explanatory_variables=vars, target_variable=vload)
|
||||||
|
|
||||||
parameters = [
|
#model = mvfts.MVFTS(explanatory_variables=vars, target_variable=vload)
|
||||||
{}, {},
|
model = wmvfts.WeightedMVFTS(explanatory_variables=vars, target_variable=vload)
|
||||||
{'order': 2, 'knn': 1},
|
#model = cmvfts.ClusteredMVFTS(explanatory_variables=vars, target_variable=vload,order=2, knn=3, partitioner=fs)
|
||||||
{'order': 2, 'knn': 2},
|
model.fit(train_mv)
|
||||||
{'order': 2, 'knn': 3},
|
print(model.shortname)
|
||||||
]
|
Util.persist_obj(model, model.shortname)
|
||||||
|
#'''
|
||||||
|
|
||||||
|
#model = Util.load_obj('MVFTS')
|
||||||
|
|
||||||
for ct, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS,
|
with open("rules.txt","w") as file:
|
||||||
cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS]):
|
file.write(str(model))
|
||||||
for nc in np.arange(1, 5):
|
|
||||||
for comb in combinations(variables, nc):
|
|
||||||
_vars = []
|
|
||||||
_vars.extend(comb)
|
|
||||||
_vars.append(vload)
|
|
||||||
|
|
||||||
if not method == cmvfts.ClusteredMVFTS:
|
forecasts = model.predict(test_mv.iloc[:100])
|
||||||
model = method(explanatory_variables=_vars, target_variable=vload, **parameters[ct])
|
forecasts.insert(0,None)
|
||||||
else:
|
|
||||||
fs = grid.GridCluster(explanatory_variables=_vars, target_variable=vload)
|
|
||||||
model = method(explanatory_variables=_vars, target_variable=vload, partitioner=fs, **parameters[ct])
|
|
||||||
|
|
||||||
for _v in comb:
|
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15,3])
|
||||||
model.shortname += _v.name
|
ax.plot(test_mv['load'].values[:100],label='Original')
|
||||||
|
ax.plot(forecasts, label='predicted')
|
||||||
|
handles, labels = ax.get_legend_handles_labels()
|
||||||
|
lgd = ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1))
|
||||||
|
|
||||||
model.fit(train_mv)
|
Util.show_and_save_image(fig, model.shortname, True)
|
||||||
|
|
||||||
models.append(model.shortname)
|
|
||||||
|
|
||||||
#Util.persist_obj(model, model.shortname)
|
|
||||||
|
|
||||||
forecasts = model.predict(test_mv.iloc[:100])
|
|
||||||
|
Loading…
Reference in New Issue
Block a user