MVFTS bugfixes

This commit is contained in:
Petrônio Cândido 2019-02-12 15:24:01 -02:00
parent 1fce1145cc
commit e010df344a
6 changed files with 118 additions and 81 deletions

View File

@ -0,0 +1,60 @@
'''
Incremental Ensemble of FTS methods
'''
import numpy as np
import pandas as pd
from pyFTS.common import FuzzySet, FLR, fts, flrg
from pyFTS.models.ensemble import ensemble
class IncrementalEnsembleFTS(ensemble.EnsembleFTS):
"""
Ensemble FTS
"""
def __init__(self, **kwargs):
super(IncrementalEnsembleFTS, self).__init__(**kwargs)
self.shortname = "IncrementalEnsembleFTS"
self.name = "Incremental Ensemble FTS"
self.order = kwargs.get('order',1)
self.order = kwargs.get('order', 1)
self.partitioner_method = kwargs.get('partitioner_method', Grid.GridPartitioner)
"""The partitioner method to be called when a new model is build"""
self.partitioner_params = kwargs.get('partitioner_params', {'npart': 10})
"""The partitioner method parameters"""
self.partitioner = None
"""The most recent trained partitioner"""
self.fts_method = kwargs.get('fts_method', None)
"""The FTS method to be called when a new model is build"""
self.fts_params = kwargs.get('fts_params', {})
"""The FTS method specific parameters"""
self.window_length = kwargs.get('window_length', 100)
"""The memory window length"""
self.batch_size = kwargs.get('batch_size', 10)
"""The batch interval between each retraining"""
self.is_high_order = True
self.uod_clip = False
self.max_lag = self.window_length + self.max_lag
def train(self, data, **kwargs):
self.partitioner = self.partitioner_method(data=data, **self.partitioner_params)
self.model = self.fts_method(partitioner=self.partitioner, **self.fts_params)
if self.model.is_high_order:
self.model.order = self.model = self.fts_method(partitioner=self.partitioner,
order=self.order, **self.fts_params)
self.model.fit(data, **kwargs)
self.shortname = self.model.shortname

View File

@ -51,7 +51,7 @@ class MVFTS(fts.FTS):
flrs = [] flrs = []
lags = {} lags = {}
for vc, var in enumerate(self.explanatory_variables): for vc, var in enumerate(self.explanatory_variables):
data_point = data[var.data_label] data_point = data[var.name]
lags[vc] = common.fuzzyfy_instance(data_point, var) lags[vc] = common.fuzzyfy_instance(data_point, var)
root = tree.FLRGTreeNode(None) root = tree.FLRGTreeNode(None)
@ -75,7 +75,7 @@ class MVFTS(fts.FTS):
flrs = [] flrs = []
for ct in range(1, len(data.index)): for ct in range(1, len(data.index)):
ix = data.index[ct-1] ix = data.index[ct-1]
data_point = data.loc[ix] data_point = self.format_data( data.loc[ix] )
tmp_flrs = self.generate_lhs_flrs(data_point) tmp_flrs = self.generate_lhs_flrs(data_point)
@ -111,7 +111,8 @@ class MVFTS(fts.FTS):
ret = [] ret = []
ndata = self.apply_transformations(data) ndata = self.apply_transformations(data)
for index, row in ndata.iterrows(): for index, row in ndata.iterrows():
flrs = self.generate_lhs_flrs(row) data_point = self.format_data(row)
flrs = self.generate_lhs_flrs(data_point)
mvs = [] mvs = []
mps = [] mps = []
for flr in flrs: for flr in flrs:
@ -120,7 +121,7 @@ class MVFTS(fts.FTS):
mvs.append(0.) mvs.append(0.)
mps.append(0.) mps.append(0.)
else: else:
mvs.append(self.flrgs[flrg.get_key()].get_membership(self.format_data(row), self.explanatory_variables)) mvs.append(self.flrgs[flrg.get_key()].get_membership(data_point, self.explanatory_variables))
mps.append(self.flrgs[flrg.get_key()].get_midpoint(self.target_variable.partitioner.sets)) mps.append(self.flrgs[flrg.get_key()].get_midpoint(self.target_variable.partitioner.sets))
mv = np.array(mvs) mv = np.array(mvs)

View File

@ -3,7 +3,7 @@ import pandas as pd
from enum import Enum from enum import Enum
from pyFTS.common import FuzzySet, Membership from pyFTS.common import FuzzySet, Membership
from pyFTS.partitioners import partitioner, Grid from pyFTS.partitioners import partitioner, Grid
from datetime import date as dt from datetime import date as dt, datetime as dtm
@ -30,7 +30,9 @@ class DateTime(Enum):
second_of_day = 86400 second_of_day = 86400
def strip_datepart(date, date_part): def strip_datepart(date, date_part, mask=''):
if isinstance(date, str):
date = dtm.strptime(date, mask)
if date_part == DateTime.year: if date_part == DateTime.year:
tmp = date.year tmp = date.year
elif date_part == DateTime.month: elif date_part == DateTime.month:

View File

@ -20,6 +20,10 @@ class TimeGridPartitioner(partitioner.Partitioner):
super(TimeGridPartitioner, self).__init__(name="TimeGrid", preprocess=False, **kwargs) super(TimeGridPartitioner, self).__init__(name="TimeGrid", preprocess=False, **kwargs)
self.season = kwargs.get('seasonality', DateTime.day_of_year) self.season = kwargs.get('seasonality', DateTime.day_of_year)
'''Seasonality, a pyFTS.models.seasonal.common.DateTime object'''
self.mask = kwargs.get('mask', '%Y-%m-%d %H:%M:%S')
'''A string with datetime formating mask'''
data = kwargs.get('data', None) data = kwargs.get('data', None)
if self.season == DateTime.year: if self.season == DateTime.year:
ndata = [strip_datepart(k, self.season) for k in data] ndata = [strip_datepart(k, self.season) for k in data]
@ -40,7 +44,7 @@ class TimeGridPartitioner(partitioner.Partitioner):
self.ordered_sets = FS.set_ordered(self.sets) self.ordered_sets = FS.set_ordered(self.sets)
if self.type == 'seasonal': if self.type == 'seasonal':
self.extractor = lambda x: strip_datepart(x, self.season) self.extractor = lambda x: strip_datepart(x, self.season, self.mask)
def build(self, data): def build(self, data):
sets = {} sets = {}

View File

@ -89,107 +89,77 @@ from pyFTS.models.multivariate import common, variable, mvfts, wmvfts, cmvfts, g
from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime from pyFTS.models.seasonal.common import DateTime
dataset = pd.read_csv('/home/petronio/Downloads/Klang-daily Max.csv', sep=',') dataset = pd.read_csv('/home/petronio/Downloads/gefcom12.csv')
dataset = dataset.dropna()
dataset['date'] = pd.to_datetime(dataset["Day/Month/Year"], format='%m/%d/%Y') train_mv = dataset.iloc[:25000]
dataset['value'] = dataset['Daily-Max API'] test_mv = dataset.iloc[25000:]
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
train_mv = dataset.iloc[:732] sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
test_mv = dataset.iloc[732:]
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train_mv, partitioner_specific=sp)
sp = {'seasonality': DateTime.day_of_week, 'names': ['mon','tue','wed','tur','fri','sat','sun']} sp = {'seasonality': DateTime.day_of_week, 'names': ['mon','tue','wed','tur','fri','sat','sun']}
vday = variable.Variable("DayOfWeek", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=7, vday = variable.Variable("DayOfWeek", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=7,
data=train_mv, partitioner_specific=sp) data=train_mv, partitioner_specific=sp)
sp = {'seasonality': DateTime.day_of_year, 'names': ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']} sp = {'seasonality': DateTime.day_of_year, 'names': ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']}
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=12, vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=12,
data=train_mv, partitioner_specific=sp) data=train_mv, partitioner_specific=sp)
vvalue = variable.Variable("Pollution", data_label="value", alias='value', vload = variable.Variable("Load", data_label="load", alias='load',
partitioner=Grid.GridPartitioner, npart=35, partitioner=Grid.GridPartitioner, npart=35,
data=train_mv) data=train_mv)
fs = grid.GridCluster(explanatory_variables=[vday, vmonth, vvalue], target_variable=vvalue) vtemp = variable.Variable("Temperature", data_label="temperature", alias='temperature',
partitioner=Grid.GridPartitioner, npart=35,
data=train_mv)
print(len(fs.sets)) from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid
from itertools import combinations
#model = wmvfts.WeightedMVFTS(explanatory_variables=[vhour, vvalue], target_variable=vvalue) models = []
model = cmvfts.ClusteredMVFTS(explanatory_variables=[vday, vmonth, vvalue], target_variable=vvalue,
partitioner=fs, knn=5, order=2)
model.fit(train_mv) #, distributed='spark', url='spark://192.168.0.106:7077') variables = [vhour, vday, vmonth, vtemp]
#'''
#print(model)
print(len(fs.sets)) parameters = [
{}, {},
{'order': 2, 'knn': 1},
{'order': 2, 'knn': 2},
{'order': 2, 'knn': 3},
]
from pyFTS.benchmarks import Measures for ct, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS,
print(Measures.get_point_statistics(test_mv, model)) cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS]):
for nc in np.arange(1, 5):
for comb in combinations(variables, nc):
_vars = []
_vars.extend(comb)
_vars.append(vload)
#print(model) if not method == cmvfts.ClusteredMVFTS:
model = method(explanatory_variables=_vars, target_variable=vload, **parameters[ct])
else:
fs = grid.GridCluster(explanatory_variables=_vars, target_variable=vload)
model = method(explanatory_variables=_vars, target_variable=vload, partitioner=fs, **parameters[ct])
''' for _v in comb:
def fun(x): model.shortname += _v.name
return (x, x % 2)
model.fit(train_mv)
def get_fs(): models.append(model.shortname)
fs_tmp = Simple.SimplePartitioner()
for fset in part.value.keys():
fz = part.value[fset]
fs_tmp.append(fset, fz.mf, fz.parameters)
return fs_tmp
def fuzzyfy(x):
fs_tmp = get_fs()
ret = []
for k in x:
ret.append(fs_tmp.fuzzyfy(k, mode='both'))
return ret
def train(fuzzyfied):
model = hofts.WeightedHighOrderFTS(partitioner=get_fs(), order=order.value)
ndata = [k for k in fuzzyfied]
model.train(ndata)
return [(k, model.flrgs[k]) for k in model.flrgs]
with SparkContext(conf=conf) as sc:
part = sc.broadcast(fs.sets)
order = sc.broadcast(2)
#ret = sc.parallelize(np.arange(0,100)).map(fun)
#fuzzyfied = sc.parallelize(data).mapPartitions(fuzzyfy)
flrgs = sc.parallelize(data).mapPartitions(train)
model = hofts.WeightedHighOrderFTS(partitioner=fs, order=order.value)
for k in flrgs.collect():
model.append_rule(k[1])
print(model)
'''
#Util.persist_obj(model, model.shortname)
forecasts = model.predict(test_mv.iloc[:100])