MVFTS bugfixes
This commit is contained in:
parent
1fce1145cc
commit
e010df344a
60
pyFTS/models/incremental/IncrementalEnsemble.py
Normal file
60
pyFTS/models/incremental/IncrementalEnsemble.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
'''
|
||||||
|
Incremental Ensemble of FTS methods
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from pyFTS.common import FuzzySet, FLR, fts, flrg
|
||||||
|
from pyFTS.models.ensemble import ensemble
|
||||||
|
|
||||||
|
|
||||||
|
class IncrementalEnsembleFTS(ensemble.EnsembleFTS):
|
||||||
|
"""
|
||||||
|
Ensemble FTS
|
||||||
|
"""
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(IncrementalEnsembleFTS, self).__init__(**kwargs)
|
||||||
|
self.shortname = "IncrementalEnsembleFTS"
|
||||||
|
self.name = "Incremental Ensemble FTS"
|
||||||
|
|
||||||
|
self.order = kwargs.get('order',1)
|
||||||
|
|
||||||
|
self.order = kwargs.get('order', 1)
|
||||||
|
|
||||||
|
self.partitioner_method = kwargs.get('partitioner_method', Grid.GridPartitioner)
|
||||||
|
"""The partitioner method to be called when a new model is build"""
|
||||||
|
self.partitioner_params = kwargs.get('partitioner_params', {'npart': 10})
|
||||||
|
"""The partitioner method parameters"""
|
||||||
|
self.partitioner = None
|
||||||
|
"""The most recent trained partitioner"""
|
||||||
|
|
||||||
|
self.fts_method = kwargs.get('fts_method', None)
|
||||||
|
"""The FTS method to be called when a new model is build"""
|
||||||
|
self.fts_params = kwargs.get('fts_params', {})
|
||||||
|
"""The FTS method specific parameters"""
|
||||||
|
|
||||||
|
self.window_length = kwargs.get('window_length', 100)
|
||||||
|
"""The memory window length"""
|
||||||
|
|
||||||
|
self.batch_size = kwargs.get('batch_size', 10)
|
||||||
|
"""The batch interval between each retraining"""
|
||||||
|
self.is_high_order = True
|
||||||
|
self.uod_clip = False
|
||||||
|
self.max_lag = self.window_length + self.max_lag
|
||||||
|
|
||||||
|
def train(self, data, **kwargs):
|
||||||
|
|
||||||
|
self.partitioner = self.partitioner_method(data=data, **self.partitioner_params)
|
||||||
|
self.model = self.fts_method(partitioner=self.partitioner, **self.fts_params)
|
||||||
|
if self.model.is_high_order:
|
||||||
|
self.model.order = self.model = self.fts_method(partitioner=self.partitioner,
|
||||||
|
order=self.order, **self.fts_params)
|
||||||
|
self.model.fit(data, **kwargs)
|
||||||
|
self.shortname = self.model.shortname
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -51,7 +51,7 @@ class MVFTS(fts.FTS):
|
|||||||
flrs = []
|
flrs = []
|
||||||
lags = {}
|
lags = {}
|
||||||
for vc, var in enumerate(self.explanatory_variables):
|
for vc, var in enumerate(self.explanatory_variables):
|
||||||
data_point = data[var.data_label]
|
data_point = data[var.name]
|
||||||
lags[vc] = common.fuzzyfy_instance(data_point, var)
|
lags[vc] = common.fuzzyfy_instance(data_point, var)
|
||||||
|
|
||||||
root = tree.FLRGTreeNode(None)
|
root = tree.FLRGTreeNode(None)
|
||||||
@ -75,7 +75,7 @@ class MVFTS(fts.FTS):
|
|||||||
flrs = []
|
flrs = []
|
||||||
for ct in range(1, len(data.index)):
|
for ct in range(1, len(data.index)):
|
||||||
ix = data.index[ct-1]
|
ix = data.index[ct-1]
|
||||||
data_point = data.loc[ix]
|
data_point = self.format_data( data.loc[ix] )
|
||||||
|
|
||||||
tmp_flrs = self.generate_lhs_flrs(data_point)
|
tmp_flrs = self.generate_lhs_flrs(data_point)
|
||||||
|
|
||||||
@ -111,7 +111,8 @@ class MVFTS(fts.FTS):
|
|||||||
ret = []
|
ret = []
|
||||||
ndata = self.apply_transformations(data)
|
ndata = self.apply_transformations(data)
|
||||||
for index, row in ndata.iterrows():
|
for index, row in ndata.iterrows():
|
||||||
flrs = self.generate_lhs_flrs(row)
|
data_point = self.format_data(row)
|
||||||
|
flrs = self.generate_lhs_flrs(data_point)
|
||||||
mvs = []
|
mvs = []
|
||||||
mps = []
|
mps = []
|
||||||
for flr in flrs:
|
for flr in flrs:
|
||||||
@ -120,7 +121,7 @@ class MVFTS(fts.FTS):
|
|||||||
mvs.append(0.)
|
mvs.append(0.)
|
||||||
mps.append(0.)
|
mps.append(0.)
|
||||||
else:
|
else:
|
||||||
mvs.append(self.flrgs[flrg.get_key()].get_membership(self.format_data(row), self.explanatory_variables))
|
mvs.append(self.flrgs[flrg.get_key()].get_membership(data_point, self.explanatory_variables))
|
||||||
mps.append(self.flrgs[flrg.get_key()].get_midpoint(self.target_variable.partitioner.sets))
|
mps.append(self.flrgs[flrg.get_key()].get_midpoint(self.target_variable.partitioner.sets))
|
||||||
|
|
||||||
mv = np.array(mvs)
|
mv = np.array(mvs)
|
||||||
|
@ -3,7 +3,7 @@ import pandas as pd
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
from pyFTS.common import FuzzySet, Membership
|
from pyFTS.common import FuzzySet, Membership
|
||||||
from pyFTS.partitioners import partitioner, Grid
|
from pyFTS.partitioners import partitioner, Grid
|
||||||
from datetime import date as dt
|
from datetime import date as dt, datetime as dtm
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -30,7 +30,9 @@ class DateTime(Enum):
|
|||||||
second_of_day = 86400
|
second_of_day = 86400
|
||||||
|
|
||||||
|
|
||||||
def strip_datepart(date, date_part):
|
def strip_datepart(date, date_part, mask=''):
|
||||||
|
if isinstance(date, str):
|
||||||
|
date = dtm.strptime(date, mask)
|
||||||
if date_part == DateTime.year:
|
if date_part == DateTime.year:
|
||||||
tmp = date.year
|
tmp = date.year
|
||||||
elif date_part == DateTime.month:
|
elif date_part == DateTime.month:
|
||||||
|
@ -20,6 +20,10 @@ class TimeGridPartitioner(partitioner.Partitioner):
|
|||||||
super(TimeGridPartitioner, self).__init__(name="TimeGrid", preprocess=False, **kwargs)
|
super(TimeGridPartitioner, self).__init__(name="TimeGrid", preprocess=False, **kwargs)
|
||||||
|
|
||||||
self.season = kwargs.get('seasonality', DateTime.day_of_year)
|
self.season = kwargs.get('seasonality', DateTime.day_of_year)
|
||||||
|
'''Seasonality, a pyFTS.models.seasonal.common.DateTime object'''
|
||||||
|
self.mask = kwargs.get('mask', '%Y-%m-%d %H:%M:%S')
|
||||||
|
'''A string with datetime formating mask'''
|
||||||
|
|
||||||
data = kwargs.get('data', None)
|
data = kwargs.get('data', None)
|
||||||
if self.season == DateTime.year:
|
if self.season == DateTime.year:
|
||||||
ndata = [strip_datepart(k, self.season) for k in data]
|
ndata = [strip_datepart(k, self.season) for k in data]
|
||||||
@ -40,7 +44,7 @@ class TimeGridPartitioner(partitioner.Partitioner):
|
|||||||
self.ordered_sets = FS.set_ordered(self.sets)
|
self.ordered_sets = FS.set_ordered(self.sets)
|
||||||
|
|
||||||
if self.type == 'seasonal':
|
if self.type == 'seasonal':
|
||||||
self.extractor = lambda x: strip_datepart(x, self.season)
|
self.extractor = lambda x: strip_datepart(x, self.season, self.mask)
|
||||||
|
|
||||||
def build(self, data):
|
def build(self, data):
|
||||||
sets = {}
|
sets = {}
|
||||||
|
@ -89,107 +89,77 @@ from pyFTS.models.multivariate import common, variable, mvfts, wmvfts, cmvfts, g
|
|||||||
from pyFTS.models.seasonal import partitioner as seasonal
|
from pyFTS.models.seasonal import partitioner as seasonal
|
||||||
from pyFTS.models.seasonal.common import DateTime
|
from pyFTS.models.seasonal.common import DateTime
|
||||||
|
|
||||||
dataset = pd.read_csv('/home/petronio/Downloads/Klang-daily Max.csv', sep=',')
|
dataset = pd.read_csv('/home/petronio/Downloads/gefcom12.csv')
|
||||||
|
dataset = dataset.dropna()
|
||||||
|
|
||||||
dataset['date'] = pd.to_datetime(dataset["Day/Month/Year"], format='%m/%d/%Y')
|
train_mv = dataset.iloc[:25000]
|
||||||
dataset['value'] = dataset['Daily-Max API']
|
test_mv = dataset.iloc[25000:]
|
||||||
|
|
||||||
|
from pyFTS.models.multivariate import common, variable, mvfts
|
||||||
|
from pyFTS.models.seasonal import partitioner as seasonal
|
||||||
|
from pyFTS.models.seasonal.common import DateTime
|
||||||
|
|
||||||
|
|
||||||
train_mv = dataset.iloc[:732]
|
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
|
||||||
test_mv = dataset.iloc[732:]
|
|
||||||
|
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||||
|
data=train_mv, partitioner_specific=sp)
|
||||||
|
|
||||||
sp = {'seasonality': DateTime.day_of_week, 'names': ['mon','tue','wed','tur','fri','sat','sun']}
|
sp = {'seasonality': DateTime.day_of_week, 'names': ['mon','tue','wed','tur','fri','sat','sun']}
|
||||||
|
|
||||||
vday = variable.Variable("DayOfWeek", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=7,
|
vday = variable.Variable("DayOfWeek", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=7,
|
||||||
data=train_mv, partitioner_specific=sp)
|
data=train_mv, partitioner_specific=sp)
|
||||||
|
|
||||||
|
|
||||||
sp = {'seasonality': DateTime.day_of_year, 'names': ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']}
|
sp = {'seasonality': DateTime.day_of_year, 'names': ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']}
|
||||||
|
|
||||||
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=12,
|
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=12,
|
||||||
data=train_mv, partitioner_specific=sp)
|
data=train_mv, partitioner_specific=sp)
|
||||||
|
|
||||||
|
|
||||||
vvalue = variable.Variable("Pollution", data_label="value", alias='value',
|
vload = variable.Variable("Load", data_label="load", alias='load',
|
||||||
partitioner=Grid.GridPartitioner, npart=35,
|
partitioner=Grid.GridPartitioner, npart=35,
|
||||||
data=train_mv)
|
data=train_mv)
|
||||||
|
|
||||||
fs = grid.GridCluster(explanatory_variables=[vday, vmonth, vvalue], target_variable=vvalue)
|
vtemp = variable.Variable("Temperature", data_label="temperature", alias='temperature',
|
||||||
|
partitioner=Grid.GridPartitioner, npart=35,
|
||||||
|
data=train_mv)
|
||||||
|
|
||||||
print(len(fs.sets))
|
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid
|
||||||
|
from itertools import combinations
|
||||||
|
|
||||||
#model = wmvfts.WeightedMVFTS(explanatory_variables=[vhour, vvalue], target_variable=vvalue)
|
models = []
|
||||||
model = cmvfts.ClusteredMVFTS(explanatory_variables=[vday, vmonth, vvalue], target_variable=vvalue,
|
|
||||||
partitioner=fs, knn=5, order=2)
|
|
||||||
|
|
||||||
model.fit(train_mv) #, distributed='spark', url='spark://192.168.0.106:7077')
|
variables = [vhour, vday, vmonth, vtemp]
|
||||||
#'''
|
|
||||||
#print(model)
|
|
||||||
|
|
||||||
print(len(fs.sets))
|
parameters = [
|
||||||
|
{}, {},
|
||||||
|
{'order': 2, 'knn': 1},
|
||||||
|
{'order': 2, 'knn': 2},
|
||||||
|
{'order': 2, 'knn': 3},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
from pyFTS.benchmarks import Measures
|
for ct, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS,
|
||||||
print(Measures.get_point_statistics(test_mv, model))
|
cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS]):
|
||||||
|
for nc in np.arange(1, 5):
|
||||||
|
for comb in combinations(variables, nc):
|
||||||
|
_vars = []
|
||||||
|
_vars.extend(comb)
|
||||||
|
_vars.append(vload)
|
||||||
|
|
||||||
#print(model)
|
if not method == cmvfts.ClusteredMVFTS:
|
||||||
|
model = method(explanatory_variables=_vars, target_variable=vload, **parameters[ct])
|
||||||
|
else:
|
||||||
|
fs = grid.GridCluster(explanatory_variables=_vars, target_variable=vload)
|
||||||
|
model = method(explanatory_variables=_vars, target_variable=vload, partitioner=fs, **parameters[ct])
|
||||||
|
|
||||||
'''
|
for _v in comb:
|
||||||
def fun(x):
|
model.shortname += _v.name
|
||||||
return (x, x % 2)
|
|
||||||
|
|
||||||
|
model.fit(train_mv)
|
||||||
|
|
||||||
def get_fs():
|
models.append(model.shortname)
|
||||||
fs_tmp = Simple.SimplePartitioner()
|
|
||||||
|
|
||||||
for fset in part.value.keys():
|
|
||||||
fz = part.value[fset]
|
|
||||||
fs_tmp.append(fset, fz.mf, fz.parameters)
|
|
||||||
|
|
||||||
return fs_tmp
|
|
||||||
|
|
||||||
def fuzzyfy(x):
|
|
||||||
|
|
||||||
fs_tmp = get_fs()
|
|
||||||
|
|
||||||
ret = []
|
|
||||||
|
|
||||||
for k in x:
|
|
||||||
ret.append(fs_tmp.fuzzyfy(k, mode='both'))
|
|
||||||
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
def train(fuzzyfied):
|
|
||||||
model = hofts.WeightedHighOrderFTS(partitioner=get_fs(), order=order.value)
|
|
||||||
|
|
||||||
ndata = [k for k in fuzzyfied]
|
|
||||||
|
|
||||||
model.train(ndata)
|
|
||||||
|
|
||||||
return [(k, model.flrgs[k]) for k in model.flrgs]
|
|
||||||
|
|
||||||
|
|
||||||
with SparkContext(conf=conf) as sc:
|
|
||||||
|
|
||||||
part = sc.broadcast(fs.sets)
|
|
||||||
|
|
||||||
order = sc.broadcast(2)
|
|
||||||
|
|
||||||
#ret = sc.parallelize(np.arange(0,100)).map(fun)
|
|
||||||
|
|
||||||
#fuzzyfied = sc.parallelize(data).mapPartitions(fuzzyfy)
|
|
||||||
|
|
||||||
flrgs = sc.parallelize(data).mapPartitions(train)
|
|
||||||
|
|
||||||
model = hofts.WeightedHighOrderFTS(partitioner=fs, order=order.value)
|
|
||||||
|
|
||||||
for k in flrgs.collect():
|
|
||||||
model.append_rule(k[1])
|
|
||||||
|
|
||||||
print(model)
|
|
||||||
|
|
||||||
'''
|
|
||||||
|
|
||||||
|
|
||||||
|
#Util.persist_obj(model, model.shortname)
|
||||||
|
|
||||||
|
forecasts = model.predict(test_mv.iloc[:100])
|
||||||
|
Loading…
Reference in New Issue
Block a user