forecast_ahead for multivariate methods; other minor improvements

This commit is contained in:
Petrônio Cândido 2018-11-13 11:54:20 -02:00
parent ef71a86a7f
commit 5b2b5ece55
4 changed files with 64 additions and 18 deletions

View File

@ -25,6 +25,7 @@ class ClusteredMVFTS(mvfts.MVFTS):
"""The FTS method specific parameters""" """The FTS method specific parameters"""
self.model = None self.model = None
"""The most recent trained model""" """The most recent trained model"""
self.knn = kwargs.get('knn', 2)
self.is_high_order = True self.is_high_order = True
@ -32,9 +33,12 @@ class ClusteredMVFTS(mvfts.MVFTS):
self.lags = kwargs.get("lags", None) self.lags = kwargs.get("lags", None)
self.alpha_cut = kwargs.get('alpha_cut', 0.25) self.alpha_cut = kwargs.get('alpha_cut', 0.25)
self.shortname = "ClusteredMVFTS"
self.name = "Clustered Multivariate FTS"
def fuzzyfy(self,data): def fuzzyfy(self,data):
ndata = [] ndata = []
for ct in range(1, len(data.index)): for ct in range(1, len(data.index)+1):
ix = data.index[ct - 1] ix = data.index[ct - 1]
data_point = self.format_data(data.loc[ix]) data_point = self.format_data(data.loc[ix])
ndata.append(common.fuzzyfy_instance_clustered(data_point, self.cluster, self.alpha_cut)) ndata.append(common.fuzzyfy_instance_clustered(data_point, self.cluster, self.alpha_cut))
@ -44,7 +48,7 @@ class ClusteredMVFTS(mvfts.MVFTS):
def train(self, data, **kwargs): def train(self, data, **kwargs):
self.cluster = self.cluster_method(data=data, mvfts=self) self.cluster = self.cluster_method(data=data, mvfts=self, neighbors=self.knn)
self.model = self.fts_method(partitioner=self.cluster, **self.fts_params) self.model = self.fts_method(partitioner=self.cluster, **self.fts_params)
if self.model.is_high_order: if self.model.is_high_order:
@ -54,8 +58,6 @@ class ClusteredMVFTS(mvfts.MVFTS):
ndata = self.fuzzyfy(data) ndata = self.fuzzyfy(data)
self.model.train(ndata, fuzzyfied=True) self.model.train(ndata, fuzzyfied=True)
self.shortname = self.model.shortname
def forecast(self, ndata, **kwargs): def forecast(self, ndata, **kwargs):
@ -63,8 +65,6 @@ class ClusteredMVFTS(mvfts.MVFTS):
return self.model.forecast(ndata, fuzzyfied=True, **kwargs) return self.model.forecast(ndata, fuzzyfied=True, **kwargs)
def __str__(self): def __str__(self):
"""String representation of the model""" """String representation of the model"""

View File

@ -18,6 +18,7 @@ class GridCluster(partitioner.Partitioner):
self.kdtree = None self.kdtree = None
self.index = {} self.index = {}
self.build(None) self.build(None)
self.neighbors = kwargs.get('neighbors', 2)
def build(self, data): def build(self, data):
@ -47,7 +48,7 @@ class GridCluster(partitioner.Partitioner):
def knn(self, data): def knn(self, data):
tmp = [data[k.name] for k in self.mvfts.explanatory_variables] tmp = [data[k.name] for k in self.mvfts.explanatory_variables]
tmp, ix = self.kdtree.query(tmp,2) tmp, ix = self.kdtree.query(tmp, self.neighbors )
if not isinstance(ix, (list, np.ndarray)): if not isinstance(ix, (list, np.ndarray)):
ix = [ix] ix = [ix]

View File

@ -113,15 +113,12 @@ class MVFTS(fts.FTS):
for flr in flrs: for flr in flrs:
flrg = mvflrg.FLRG(lhs=flr.LHS) flrg = mvflrg.FLRG(lhs=flr.LHS)
if flrg.get_key() not in self.flrgs: if flrg.get_key() not in self.flrgs:
#print('hit')
mvs.append(0.) mvs.append(0.)
mps.append(0.) mps.append(0.)
else: else:
mvs.append(self.flrgs[flrg.get_key()].get_membership(self.format_data(data_point), self.explanatory_variables)) mvs.append(self.flrgs[flrg.get_key()].get_membership(self.format_data(data_point), self.explanatory_variables))
mps.append(self.flrgs[flrg.get_key()].get_midpoint(self.target_variable.partitioner.sets)) mps.append(self.flrgs[flrg.get_key()].get_midpoint(self.target_variable.partitioner.sets))
#print('mv', mvs)
#print('mp', mps)
mv = np.array(mvs) mv = np.array(mvs)
mp = np.array(mps) mp = np.array(mps)
@ -131,6 +128,43 @@ class MVFTS(fts.FTS):
params=data[self.target_variable.data_label].values) params=data[self.target_variable.data_label].values)
return ret return ret
def forecast_ahead(self, data, steps, **kwargs):
generators = kwargs.get('generators',None)
if generators is None:
raise Exception('You must provide parameter \'generators\'! generators is a dict where the keys' +
' are the variables names (except the target_variable) and the values are ' +
'lambda functions that accept one value (the actual value of the variable) '
' and return the next value.')
ndata = self.apply_transformations(data)
ret = []
for k in np.arange(0, steps):
ix = ndata.index[-self.max_lag:]
sample = ndata.loc[ix]
tmp = self.forecast(sample, **kwargs)
if isinstance(tmp, (list, np.ndarray)):
tmp = tmp[-1]
ret.append(tmp)
last_data_point = sample.loc[sample.index[-1]]
new_data_point = {}
for var in self.explanatory_variables:
if var.name != self.target_variable.name:
new_data_point[var.data_label] = generators[var.name](last_data_point[var.data_label])
new_data_point[self.target_variable.data_label] = tmp
ndata = ndata.append(new_data_point, ignore_index=True)
return ret
def clone_parameters(self, model): def clone_parameters(self, model):
super(MVFTS, self).clone_parameters(model) super(MVFTS, self).clone_parameters(model)

View File

@ -93,14 +93,25 @@ vhour = variable.Variable("Hour", data_label="hour", partitioner=seasonal.TimeGr
vprice = variable.Variable("Price", data_label="price", partitioner=Grid.GridPartitioner, npart=10, vprice = variable.Variable("Price", data_label="price", partitioner=Grid.GridPartitioner, npart=10,
data=train_mv) data=train_mv)
model1 = cmvfts.ClusteredMVFTS(order=2)
model1.shortname += "1"
model1.append_variable(vhour)
model1.append_variable(vprice)
model1.target_variable = vprice
model1.fit(train_mv)
print(model1) params = [
{},
{},
{'order': 2, 'knn': 1},
{'order': 2, 'knn': 2},
{'order': 2, 'knn': 3}
]
for ct, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS]):
model = method(**params[ct])
model.append_variable(vhour)
model.append_variable(vprice)
model.target_variable = vprice
model.fit(train_mv)
print(model.shortname, params[ct], Measures.get_point_statistics(test_mv, model))
#print(model1)
#print(model1.predict(test_mv, steps_ahead=24, generators={'Hour': lambda x : (x+1)%24 }))
print(Measures.get_point_statistics(test_mv, model1))
#""" #"""