From e010df344a89094c102c4e04c24d95a580eb0405 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Tue, 12 Feb 2019 15:24:01 -0200 Subject: [PATCH] MVFTS bugfixes --- .../models/incremental/IncrementalEnsemble.py | 60 +++++++++ .../{Retrainer.py => TimeVariant.py} | 0 pyFTS/models/multivariate/mvfts.py | 9 +- pyFTS/models/seasonal/common.py | 6 +- pyFTS/models/seasonal/partitioner.py | 6 +- pyFTS/tests/multivariate.py | 118 +++++++----------- 6 files changed, 118 insertions(+), 81 deletions(-) create mode 100644 pyFTS/models/incremental/IncrementalEnsemble.py rename pyFTS/models/incremental/{Retrainer.py => TimeVariant.py} (100%) diff --git a/pyFTS/models/incremental/IncrementalEnsemble.py b/pyFTS/models/incremental/IncrementalEnsemble.py new file mode 100644 index 0000000..7d8278e --- /dev/null +++ b/pyFTS/models/incremental/IncrementalEnsemble.py @@ -0,0 +1,60 @@ +''' +Incremental Ensemble of FTS methods +''' + + +import numpy as np +import pandas as pd +from pyFTS.common import FuzzySet, FLR, fts, flrg +from pyFTS.models.ensemble import ensemble + + +class IncrementalEnsembleFTS(ensemble.EnsembleFTS): + """ + Ensemble FTS + """ + def __init__(self, **kwargs): + super(IncrementalEnsembleFTS, self).__init__(**kwargs) + self.shortname = "IncrementalEnsembleFTS" + self.name = "Incremental Ensemble FTS" + + self.order = kwargs.get('order',1) + + self.order = kwargs.get('order', 1) + + self.partitioner_method = kwargs.get('partitioner_method', Grid.GridPartitioner) + """The partitioner method to be called when a new model is build""" + self.partitioner_params = kwargs.get('partitioner_params', {'npart': 10}) + """The partitioner method parameters""" + self.partitioner = None + """The most recent trained partitioner""" + + self.fts_method = kwargs.get('fts_method', None) + """The FTS method to be called when a new model is build""" + self.fts_params = kwargs.get('fts_params', {}) + """The FTS method specific parameters""" + + self.window_length = kwargs.get('window_length', 100) + """The memory window length""" + + self.batch_size = kwargs.get('batch_size', 10) + """The batch interval between each retraining""" + self.is_high_order = True + self.uod_clip = False + self.max_lag = self.window_length + self.max_lag + + def train(self, data, **kwargs): + + self.partitioner = self.partitioner_method(data=data, **self.partitioner_params) + self.model = self.fts_method(partitioner=self.partitioner, **self.fts_params) + if self.model.is_high_order: + self.model.order = self.model = self.fts_method(partitioner=self.partitioner, + order=self.order, **self.fts_params) + self.model.fit(data, **kwargs) + self.shortname = self.model.shortname + + + + + + diff --git a/pyFTS/models/incremental/Retrainer.py b/pyFTS/models/incremental/TimeVariant.py similarity index 100% rename from pyFTS/models/incremental/Retrainer.py rename to pyFTS/models/incremental/TimeVariant.py diff --git a/pyFTS/models/multivariate/mvfts.py b/pyFTS/models/multivariate/mvfts.py index 377dae4..14f640f 100644 --- a/pyFTS/models/multivariate/mvfts.py +++ b/pyFTS/models/multivariate/mvfts.py @@ -51,7 +51,7 @@ class MVFTS(fts.FTS): flrs = [] lags = {} for vc, var in enumerate(self.explanatory_variables): - data_point = data[var.data_label] + data_point = data[var.name] lags[vc] = common.fuzzyfy_instance(data_point, var) root = tree.FLRGTreeNode(None) @@ -75,7 +75,7 @@ class MVFTS(fts.FTS): flrs = [] for ct in range(1, len(data.index)): ix = data.index[ct-1] - data_point = data.loc[ix] + data_point = self.format_data( data.loc[ix] ) tmp_flrs = self.generate_lhs_flrs(data_point) @@ -111,7 +111,8 @@ class MVFTS(fts.FTS): ret = [] ndata = self.apply_transformations(data) for index, row in ndata.iterrows(): - flrs = self.generate_lhs_flrs(row) + data_point = self.format_data(row) + flrs = self.generate_lhs_flrs(data_point) mvs = [] mps = [] for flr in flrs: @@ -120,7 +121,7 @@ class MVFTS(fts.FTS): mvs.append(0.) mps.append(0.) else: - mvs.append(self.flrgs[flrg.get_key()].get_membership(self.format_data(row), self.explanatory_variables)) + mvs.append(self.flrgs[flrg.get_key()].get_membership(data_point, self.explanatory_variables)) mps.append(self.flrgs[flrg.get_key()].get_midpoint(self.target_variable.partitioner.sets)) mv = np.array(mvs) diff --git a/pyFTS/models/seasonal/common.py b/pyFTS/models/seasonal/common.py index 3ccf7f4..c4e9bf2 100644 --- a/pyFTS/models/seasonal/common.py +++ b/pyFTS/models/seasonal/common.py @@ -3,7 +3,7 @@ import pandas as pd from enum import Enum from pyFTS.common import FuzzySet, Membership from pyFTS.partitioners import partitioner, Grid -from datetime import date as dt +from datetime import date as dt, datetime as dtm @@ -30,7 +30,9 @@ class DateTime(Enum): second_of_day = 86400 -def strip_datepart(date, date_part): +def strip_datepart(date, date_part, mask=''): + if isinstance(date, str): + date = dtm.strptime(date, mask) if date_part == DateTime.year: tmp = date.year elif date_part == DateTime.month: diff --git a/pyFTS/models/seasonal/partitioner.py b/pyFTS/models/seasonal/partitioner.py index 8f48af0..6f82d86 100644 --- a/pyFTS/models/seasonal/partitioner.py +++ b/pyFTS/models/seasonal/partitioner.py @@ -20,6 +20,10 @@ class TimeGridPartitioner(partitioner.Partitioner): super(TimeGridPartitioner, self).__init__(name="TimeGrid", preprocess=False, **kwargs) self.season = kwargs.get('seasonality', DateTime.day_of_year) + '''Seasonality, a pyFTS.models.seasonal.common.DateTime object''' + self.mask = kwargs.get('mask', '%Y-%m-%d %H:%M:%S') + '''A string with datetime formating mask''' + data = kwargs.get('data', None) if self.season == DateTime.year: ndata = [strip_datepart(k, self.season) for k in data] @@ -40,7 +44,7 @@ class TimeGridPartitioner(partitioner.Partitioner): self.ordered_sets = FS.set_ordered(self.sets) if self.type == 'seasonal': - self.extractor = lambda x: strip_datepart(x, self.season) + self.extractor = lambda x: strip_datepart(x, self.season, self.mask) def build(self, data): sets = {} diff --git a/pyFTS/tests/multivariate.py b/pyFTS/tests/multivariate.py index 8f9d197..bab41e7 100644 --- a/pyFTS/tests/multivariate.py +++ b/pyFTS/tests/multivariate.py @@ -89,107 +89,77 @@ from pyFTS.models.multivariate import common, variable, mvfts, wmvfts, cmvfts, g from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal.common import DateTime -dataset = pd.read_csv('/home/petronio/Downloads/Klang-daily Max.csv', sep=',') +dataset = pd.read_csv('/home/petronio/Downloads/gefcom12.csv') +dataset = dataset.dropna() -dataset['date'] = pd.to_datetime(dataset["Day/Month/Year"], format='%m/%d/%Y') -dataset['value'] = dataset['Daily-Max API'] +train_mv = dataset.iloc[:25000] +test_mv = dataset.iloc[25000:] + +from pyFTS.models.multivariate import common, variable, mvfts +from pyFTS.models.seasonal import partitioner as seasonal +from pyFTS.models.seasonal.common import DateTime -train_mv = dataset.iloc[:732] -test_mv = dataset.iloc[732:] +sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]} + +vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24, + data=train_mv, partitioner_specific=sp) sp = {'seasonality': DateTime.day_of_week, 'names': ['mon','tue','wed','tur','fri','sat','sun']} vday = variable.Variable("DayOfWeek", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=7, data=train_mv, partitioner_specific=sp) - sp = {'seasonality': DateTime.day_of_year, 'names': ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']} vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=12, data=train_mv, partitioner_specific=sp) -vvalue = variable.Variable("Pollution", data_label="value", alias='value', +vload = variable.Variable("Load", data_label="load", alias='load', partitioner=Grid.GridPartitioner, npart=35, data=train_mv) -fs = grid.GridCluster(explanatory_variables=[vday, vmonth, vvalue], target_variable=vvalue) +vtemp = variable.Variable("Temperature", data_label="temperature", alias='temperature', + partitioner=Grid.GridPartitioner, npart=35, + data=train_mv) -print(len(fs.sets)) +from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid +from itertools import combinations -#model = wmvfts.WeightedMVFTS(explanatory_variables=[vhour, vvalue], target_variable=vvalue) -model = cmvfts.ClusteredMVFTS(explanatory_variables=[vday, vmonth, vvalue], target_variable=vvalue, - partitioner=fs, knn=5, order=2) +models = [] -model.fit(train_mv) #, distributed='spark', url='spark://192.168.0.106:7077') -#''' -#print(model) +variables = [vhour, vday, vmonth, vtemp] -print(len(fs.sets)) +parameters = [ + {}, {}, + {'order': 2, 'knn': 1}, + {'order': 2, 'knn': 2}, + {'order': 2, 'knn': 3}, +] -from pyFTS.benchmarks import Measures -print(Measures.get_point_statistics(test_mv, model)) +for ct, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS, + cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS]): + for nc in np.arange(1, 5): + for comb in combinations(variables, nc): + _vars = [] + _vars.extend(comb) + _vars.append(vload) -#print(model) + if not method == cmvfts.ClusteredMVFTS: + model = method(explanatory_variables=_vars, target_variable=vload, **parameters[ct]) + else: + fs = grid.GridCluster(explanatory_variables=_vars, target_variable=vload) + model = method(explanatory_variables=_vars, target_variable=vload, partitioner=fs, **parameters[ct]) -''' -def fun(x): - return (x, x % 2) + for _v in comb: + model.shortname += _v.name + model.fit(train_mv) -def get_fs(): - fs_tmp = Simple.SimplePartitioner() - - for fset in part.value.keys(): - fz = part.value[fset] - fs_tmp.append(fset, fz.mf, fz.parameters) - - return fs_tmp - -def fuzzyfy(x): - - fs_tmp = get_fs() - - ret = [] - - for k in x: - ret.append(fs_tmp.fuzzyfy(k, mode='both')) - - return ret - - -def train(fuzzyfied): - model = hofts.WeightedHighOrderFTS(partitioner=get_fs(), order=order.value) - - ndata = [k for k in fuzzyfied] - - model.train(ndata) - - return [(k, model.flrgs[k]) for k in model.flrgs] - - -with SparkContext(conf=conf) as sc: - - part = sc.broadcast(fs.sets) - - order = sc.broadcast(2) - - #ret = sc.parallelize(np.arange(0,100)).map(fun) - - #fuzzyfied = sc.parallelize(data).mapPartitions(fuzzyfy) - - flrgs = sc.parallelize(data).mapPartitions(train) - - model = hofts.WeightedHighOrderFTS(partitioner=fs, order=order.value) - - for k in flrgs.collect(): - model.append_rule(k[1]) - - print(model) - -''' - + models.append(model.shortname) + #Util.persist_obj(model, model.shortname) + forecasts = model.predict(test_mv.iloc[:100])