From 66e41f42ee3650a3555d016da7d3a91ff1cd817f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Wed, 13 Feb 2019 14:11:24 -0200 Subject: [PATCH] Improvements on Seasonal and Multivariate models --- pyFTS/common/Util.py | 7 ++- pyFTS/models/multivariate/common.py | 7 ++- pyFTS/models/multivariate/mvfts.py | 45 +++++++++++++------ pyFTS/models/seasonal/common.py | 10 ++++- pyFTS/models/seasonal/partitioner.py | 11 +++-- pyFTS/tests/multivariate.py | 66 ++++++++++++---------------- 6 files changed, 87 insertions(+), 59 deletions(-) diff --git a/pyFTS/common/Util.py b/pyFTS/common/Util.py index 5bfce62..a3b33f3 100644 --- a/pyFTS/common/Util.py +++ b/pyFTS/common/Util.py @@ -177,8 +177,11 @@ def persist_obj(obj, file): :param obj: object on memory :param file: file name to store the object """ - with open(file, 'wb') as _file: - dill.dump(obj, _file) + try: + with open(file, 'wb') as _file: + dill.dump(obj, _file) + except Exception as ex: + print("File {} could not be saved due exception {}".format(file, ex)) def load_obj(file): diff --git a/pyFTS/models/multivariate/common.py b/pyFTS/models/multivariate/common.py index 53b6fdd..15475c1 100644 --- a/pyFTS/models/multivariate/common.py +++ b/pyFTS/models/multivariate/common.py @@ -42,9 +42,12 @@ class MultivariateFuzzySet(Composite.FuzzySet): return np.nanmin(mv) -def fuzzyfy_instance(data_point, var): +def fuzzyfy_instance(data_point, var, tuples=True): fsets = FuzzySet.fuzzyfy(data_point, var.partitioner, mode='sets', method='fuzzy', alpha_cut=var.alpha_cut) - return [(var.name, fs) for fs in fsets] + if tuples: + return [(var.name, fs) for fs in fsets] + else: + return fsets def fuzzyfy_instance_clustered(data_point, cluster, **kwargs): diff --git a/pyFTS/models/multivariate/mvfts.py b/pyFTS/models/multivariate/mvfts.py index 14f640f..c8c54f2 100644 --- a/pyFTS/models/multivariate/mvfts.py +++ b/pyFTS/models/multivariate/mvfts.py @@ -1,11 +1,24 @@ -from pyFTS.common import fts, FuzzySet, FLR, Membership, tree +from pyFTS.common import fts, FuzzySet, FLR, Membership from pyFTS.partitioners import Grid from pyFTS.models.multivariate import FLR as MVFLR, common, flrg as mvflrg +from itertools import product import numpy as np import pandas as pd +def product_dict(**kwargs): + ''' + Code by Seth Johnson + :param kwargs: + :return: + ''' + keys = kwargs.keys() + vals = kwargs.values() + for instance in product(*vals): + yield dict(zip(keys, instance)) + + class MVFTS(fts.FTS): """ Multivariate extension of Chen's ConventionalFTS method @@ -52,22 +65,18 @@ class MVFTS(fts.FTS): lags = {} for vc, var in enumerate(self.explanatory_variables): data_point = data[var.name] - lags[vc] = common.fuzzyfy_instance(data_point, var) - - root = tree.FLRGTreeNode(None) - - tree.build_tree_without_order(root, lags, 0) - - for p in root.paths(): - path = list(reversed(list(filter(None.__ne__, p)))) + lags[var.name] = common.fuzzyfy_instance(data_point, var, tuples=False) + for path in product_dict(**lags): flr = MVFLR.FLR() - for v, s in path: - flr.set_lhs(v, s) + for var, fset in path.items(): + flr.set_lhs(var, fset) if len(flr.LHS.keys()) == len(self.explanatory_variables): flrs.append(flr) + else: + print(flr) return flrs @@ -110,6 +119,7 @@ class MVFTS(fts.FTS): def forecast(self, data, **kwargs): ret = [] ndata = self.apply_transformations(data) + c = 0 for index, row in ndata.iterrows(): data_point = self.format_data(row) flrs = self.generate_lhs_flrs(data_point) @@ -118,8 +128,17 @@ class MVFTS(fts.FTS): for flr in flrs: flrg = mvflrg.FLRG(lhs=flr.LHS) if flrg.get_key() not in self.flrgs: - mvs.append(0.) - mps.append(0.) + #Naïve approach is applied when no rules were found + if self.target_variable.name in flrg.LHS: + fs = flrg.LHS[self.target_variable.name] + fset = self.target_variable.partitioner.sets[fs] + mp = fset.centroid + mv = fset.membership(data_point[self.target_variable.name]) + mvs.append(mv) + mps.append(mp) + else: + mvs.append(0.) + mps.append(0.) else: mvs.append(self.flrgs[flrg.get_key()].get_membership(data_point, self.explanatory_variables)) mps.append(self.flrgs[flrg.get_key()].get_midpoint(self.target_variable.partitioner.sets)) diff --git a/pyFTS/models/seasonal/common.py b/pyFTS/models/seasonal/common.py index c4e9bf2..2038ad5 100644 --- a/pyFTS/models/seasonal/common.py +++ b/pyFTS/models/seasonal/common.py @@ -6,9 +6,15 @@ from pyFTS.partitioners import partitioner, Grid from datetime import date as dt, datetime as dtm - class DateTime(Enum): + """ + Data and Time granularity for time granularity and seasonality identification + """ year = 1 + half = 2 # six months + third = 3 # four months + quarter = 4 # three months + sixth = 6 # two months month = 12 day_of_month = 30 day_of_year = 364 @@ -37,6 +43,8 @@ def strip_datepart(date, date_part, mask=''): tmp = date.year elif date_part == DateTime.month: tmp = date.month + elif date_part in (DateTime.half, DateTime.third, DateTime.quarter, DateTime.sixth): + tmp = (date.month // date_part.value) + 1 elif date_part == DateTime.day_of_year: tmp = date.timetuple().tm_yday elif date_part == DateTime.day_of_month: diff --git a/pyFTS/models/seasonal/partitioner.py b/pyFTS/models/seasonal/partitioner.py index 6f82d86..11f98a4 100644 --- a/pyFTS/models/seasonal/partitioner.py +++ b/pyFTS/models/seasonal/partitioner.py @@ -55,10 +55,13 @@ class TimeGridPartitioner(partitioner.Partitioner): dlen = (self.max - self.min) partlen = dlen / self.partitions elif self.season == DateTime.day_of_week: - self.min = 1 - self.max = 8 - partlen = 1 - pl2 = 1 + self.min, self.max, partlen, pl2 = 0, 7, 1, 1 + elif self.season == DateTime.hour: + self.min, self.max, partlen, pl2 = 0, 24, 1, 1 + elif self.season == DateTime.month: + self.min, self.max, partlen, pl2 = 1, 13, 1, 1 + elif self.season in (DateTime.half, DateTime.third, DateTime.quarter, DateTime.sixth): + self.min, self.max, partlen, pl2 = 1, self.season.value+1, 1, 1 else: partlen = self.season.value / self.partitions pl2 = partlen / 2 diff --git a/pyFTS/tests/multivariate.py b/pyFTS/tests/multivariate.py index bab41e7..3cfcc0b 100644 --- a/pyFTS/tests/multivariate.py +++ b/pyFTS/tests/multivariate.py @@ -92,14 +92,14 @@ from pyFTS.models.seasonal.common import DateTime dataset = pd.read_csv('/home/petronio/Downloads/gefcom12.csv') dataset = dataset.dropna() -train_mv = dataset.iloc[:25000] -test_mv = dataset.iloc[25000:] +train_mv = dataset.iloc[:15000] +test_mv = dataset.iloc[15000:] from pyFTS.models.multivariate import common, variable, mvfts from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal.common import DateTime - +#''' sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]} vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24, @@ -110,56 +110,48 @@ sp = {'seasonality': DateTime.day_of_week, 'names': ['mon','tue','wed','tur','fr vday = variable.Variable("DayOfWeek", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=7, data=train_mv, partitioner_specific=sp) -sp = {'seasonality': DateTime.day_of_year, 'names': ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']} +#sp = {'seasonality': DateTime.day_of_month, 'names': ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']} -vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=12, +sp = {'seasonality': DateTime.quarter} + +vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=4, data=train_mv, partitioner_specific=sp) vload = variable.Variable("Load", data_label="load", alias='load', - partitioner=Grid.GridPartitioner, npart=35, + partitioner=Grid.GridPartitioner, npart=20, data=train_mv) vtemp = variable.Variable("Temperature", data_label="temperature", alias='temperature', - partitioner=Grid.GridPartitioner, npart=35, + partitioner=Grid.GridPartitioner, npart=20, data=train_mv) from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid -from itertools import combinations -models = [] +vars = [vhour, vday, vload] -variables = [vhour, vday, vmonth, vtemp] +#fs = grid.GridCluster(explanatory_variables=vars, target_variable=vload) -parameters = [ - {}, {}, - {'order': 2, 'knn': 1}, - {'order': 2, 'knn': 2}, - {'order': 2, 'knn': 3}, -] +#model = mvfts.MVFTS(explanatory_variables=vars, target_variable=vload) +model = wmvfts.WeightedMVFTS(explanatory_variables=vars, target_variable=vload) +#model = cmvfts.ClusteredMVFTS(explanatory_variables=vars, target_variable=vload,order=2, knn=3, partitioner=fs) +model.fit(train_mv) +print(model.shortname) +Util.persist_obj(model, model.shortname) +#''' +#model = Util.load_obj('MVFTS') -for ct, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS, - cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS]): - for nc in np.arange(1, 5): - for comb in combinations(variables, nc): - _vars = [] - _vars.extend(comb) - _vars.append(vload) +with open("rules.txt","w") as file: + file.write(str(model)) - if not method == cmvfts.ClusteredMVFTS: - model = method(explanatory_variables=_vars, target_variable=vload, **parameters[ct]) - else: - fs = grid.GridCluster(explanatory_variables=_vars, target_variable=vload) - model = method(explanatory_variables=_vars, target_variable=vload, partitioner=fs, **parameters[ct]) +forecasts = model.predict(test_mv.iloc[:100]) +forecasts.insert(0,None) - for _v in comb: - model.shortname += _v.name +fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15,3]) +ax.plot(test_mv['load'].values[:100],label='Original') +ax.plot(forecasts, label='predicted') +handles, labels = ax.get_legend_handles_labels() +lgd = ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1)) - model.fit(train_mv) - - models.append(model.shortname) - - #Util.persist_obj(model, model.shortname) - - forecasts = model.predict(test_mv.iloc[:100]) +Util.show_and_save_image(fig, model.shortname, True)