2019-02-13 20:11:24 +04:00
|
|
|
from pyFTS.common import fts, FuzzySet, FLR, Membership
|
2018-03-02 00:30:39 +04:00
|
|
|
from pyFTS.partitioners import Grid
|
|
|
|
from pyFTS.models.multivariate import FLR as MVFLR, common, flrg as mvflrg
|
2019-02-13 20:11:24 +04:00
|
|
|
from itertools import product
|
2019-02-16 15:08:13 +04:00
|
|
|
from types import LambdaType
|
2019-08-05 17:24:29 +04:00
|
|
|
from copy import deepcopy
|
2018-03-02 00:30:39 +04:00
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
|
2019-02-13 20:11:24 +04:00
|
|
|
def product_dict(**kwargs):
|
2019-04-22 17:01:58 +04:00
|
|
|
"""
|
2019-02-13 20:11:24 +04:00
|
|
|
Code by Seth Johnson
|
|
|
|
:param kwargs:
|
|
|
|
:return:
|
2019-04-22 17:01:58 +04:00
|
|
|
"""
|
2019-02-13 20:11:24 +04:00
|
|
|
keys = kwargs.keys()
|
|
|
|
vals = kwargs.values()
|
|
|
|
for instance in product(*vals):
|
|
|
|
yield dict(zip(keys, instance))
|
|
|
|
|
|
|
|
|
2018-03-02 00:30:39 +04:00
|
|
|
class MVFTS(fts.FTS):
|
|
|
|
"""
|
|
|
|
Multivariate extension of Chen's ConventionalFTS method
|
|
|
|
"""
|
2018-06-29 23:01:01 +04:00
|
|
|
def __init__(self, **kwargs):
|
2018-11-09 23:49:39 +04:00
|
|
|
super(MVFTS, self).__init__(**kwargs)
|
2019-01-18 15:06:53 +04:00
|
|
|
self.explanatory_variables = kwargs.get('explanatory_variables',[])
|
|
|
|
self.target_variable = kwargs.get('target_variable',None)
|
2018-03-02 00:30:39 +04:00
|
|
|
self.flrgs = {}
|
2018-03-03 02:20:21 +04:00
|
|
|
self.is_multivariate = True
|
2018-06-29 23:01:01 +04:00
|
|
|
self.shortname = "MVFTS"
|
|
|
|
self.name = "Multivariate FTS"
|
2019-06-17 16:25:47 +04:00
|
|
|
self.uod_clip = False
|
2018-03-02 00:30:39 +04:00
|
|
|
|
|
|
|
def append_variable(self, var):
|
2018-08-30 09:05:29 +04:00
|
|
|
"""
|
|
|
|
Append a new endogenous variable to the model
|
|
|
|
|
|
|
|
:param var: variable object
|
|
|
|
:return:
|
|
|
|
"""
|
2018-03-02 00:30:39 +04:00
|
|
|
self.explanatory_variables.append(var)
|
|
|
|
|
|
|
|
def format_data(self, data):
|
|
|
|
ndata = {}
|
|
|
|
for var in self.explanatory_variables:
|
2018-11-19 14:30:06 +04:00
|
|
|
ndata[var.name] = var.partitioner.extractor(data[var.data_label])
|
2018-03-02 00:30:39 +04:00
|
|
|
|
|
|
|
return ndata
|
|
|
|
|
|
|
|
def apply_transformations(self, data, params=None, updateUoD=False, **kwargs):
|
|
|
|
ndata = data.copy(deep=True)
|
|
|
|
for var in self.explanatory_variables:
|
2019-02-16 15:08:13 +04:00
|
|
|
try:
|
|
|
|
values = ndata[var.data_label].values #if isinstance(ndata, pd.DataFrame) else ndata[var.data_label]
|
|
|
|
if self.uod_clip and var.partitioner.type == 'common':
|
|
|
|
ndata[var.data_label] = np.clip(values,
|
|
|
|
var.partitioner.min, var.partitioner.max)
|
2018-11-14 00:54:18 +04:00
|
|
|
|
2019-02-16 15:08:13 +04:00
|
|
|
ndata[var.data_label] = var.apply_transformations(values)
|
|
|
|
except:
|
|
|
|
pass
|
2018-03-02 00:30:39 +04:00
|
|
|
|
|
|
|
return ndata
|
|
|
|
|
|
|
|
def generate_lhs_flrs(self, data):
|
|
|
|
flrs = []
|
|
|
|
lags = {}
|
|
|
|
for vc, var in enumerate(self.explanatory_variables):
|
2019-02-12 21:24:01 +04:00
|
|
|
data_point = data[var.name]
|
2019-02-13 20:11:24 +04:00
|
|
|
lags[var.name] = common.fuzzyfy_instance(data_point, var, tuples=False)
|
2018-03-02 00:30:39 +04:00
|
|
|
|
2019-02-13 20:11:24 +04:00
|
|
|
for path in product_dict(**lags):
|
2018-03-02 00:30:39 +04:00
|
|
|
flr = MVFLR.FLR()
|
|
|
|
|
2019-08-05 17:24:29 +04:00
|
|
|
flr.LHS = path
|
|
|
|
|
|
|
|
#for var, fset in path.items():
|
|
|
|
# flr.set_lhs(var, fset)
|
2018-03-02 00:30:39 +04:00
|
|
|
|
2018-03-05 22:07:02 +04:00
|
|
|
if len(flr.LHS.keys()) == len(self.explanatory_variables):
|
2018-03-02 00:30:39 +04:00
|
|
|
flrs.append(flr)
|
|
|
|
|
|
|
|
return flrs
|
|
|
|
|
|
|
|
def generate_flrs(self, data):
|
|
|
|
flrs = []
|
2019-08-05 17:24:29 +04:00
|
|
|
for ct in np.arange(1, len(data.index)):
|
2018-03-02 00:30:39 +04:00
|
|
|
ix = data.index[ct-1]
|
2019-02-12 21:24:01 +04:00
|
|
|
data_point = self.format_data( data.loc[ix] )
|
2018-03-02 00:30:39 +04:00
|
|
|
|
|
|
|
tmp_flrs = self.generate_lhs_flrs(data_point)
|
|
|
|
|
|
|
|
target_ix = data.index[ct]
|
|
|
|
target_point = data[self.target_variable.data_label][target_ix]
|
|
|
|
target = common.fuzzyfy_instance(target_point, self.target_variable)
|
|
|
|
|
|
|
|
for flr in tmp_flrs:
|
2018-03-05 22:07:02 +04:00
|
|
|
for v, s in target:
|
2019-08-05 17:24:29 +04:00
|
|
|
new_flr = deepcopy(flr)
|
|
|
|
new_flr.set_rhs(s)
|
|
|
|
flrs.append(new_flr)
|
2018-03-02 00:30:39 +04:00
|
|
|
|
|
|
|
return flrs
|
|
|
|
|
|
|
|
def generate_flrg(self, flrs):
|
|
|
|
for flr in flrs:
|
|
|
|
flrg = mvflrg.FLRG(lhs=flr.LHS)
|
|
|
|
|
2018-03-03 02:20:21 +04:00
|
|
|
if flrg.get_key() not in self.flrgs:
|
|
|
|
self.flrgs[flrg.get_key()] = flrg
|
2018-03-02 00:30:39 +04:00
|
|
|
|
2018-03-03 02:20:21 +04:00
|
|
|
self.flrgs[flrg.get_key()].append_rhs(flr.RHS)
|
2018-03-02 00:30:39 +04:00
|
|
|
|
|
|
|
def train(self, data, **kwargs):
|
|
|
|
|
|
|
|
ndata = self.apply_transformations(data)
|
|
|
|
|
|
|
|
flrs = self.generate_flrs(ndata)
|
2018-03-03 02:20:21 +04:00
|
|
|
self.generate_flrg(flrs)
|
2018-03-02 00:30:39 +04:00
|
|
|
|
|
|
|
def forecast(self, data, **kwargs):
|
|
|
|
ret = []
|
|
|
|
ndata = self.apply_transformations(data)
|
2019-02-13 20:11:24 +04:00
|
|
|
c = 0
|
2019-02-16 15:08:13 +04:00
|
|
|
for index, row in ndata.iterrows() if isinstance(ndata, pd.DataFrame) else enumerate(ndata):
|
2019-02-12 21:24:01 +04:00
|
|
|
data_point = self.format_data(row)
|
|
|
|
flrs = self.generate_lhs_flrs(data_point)
|
2018-03-02 00:30:39 +04:00
|
|
|
mvs = []
|
|
|
|
mps = []
|
|
|
|
for flr in flrs:
|
|
|
|
flrg = mvflrg.FLRG(lhs=flr.LHS)
|
|
|
|
if flrg.get_key() not in self.flrgs:
|
2019-02-13 20:11:24 +04:00
|
|
|
#Naïve approach is applied when no rules were found
|
|
|
|
if self.target_variable.name in flrg.LHS:
|
|
|
|
fs = flrg.LHS[self.target_variable.name]
|
|
|
|
fset = self.target_variable.partitioner.sets[fs]
|
|
|
|
mp = fset.centroid
|
|
|
|
mv = fset.membership(data_point[self.target_variable.name])
|
|
|
|
mvs.append(mv)
|
|
|
|
mps.append(mp)
|
|
|
|
else:
|
|
|
|
mvs.append(0.)
|
|
|
|
mps.append(0.)
|
2018-03-02 00:30:39 +04:00
|
|
|
else:
|
2019-03-22 22:07:44 +04:00
|
|
|
_flrg = self.flrgs[flrg.get_key()]
|
|
|
|
mvs.append(_flrg.get_membership(data_point, self.explanatory_variables))
|
|
|
|
mps.append(_flrg.get_midpoint(self.target_variable.partitioner.sets))
|
2018-03-02 00:30:39 +04:00
|
|
|
|
|
|
|
mv = np.array(mvs)
|
|
|
|
mp = np.array(mps)
|
|
|
|
|
2019-06-22 16:20:53 +04:00
|
|
|
ret.append(np.dot(mv,mp.T)/np.nansum(mv))
|
2018-03-02 00:30:39 +04:00
|
|
|
|
|
|
|
ret = self.target_variable.apply_inverse_transformations(ret,
|
|
|
|
params=data[self.target_variable.data_label].values)
|
|
|
|
return ret
|
|
|
|
|
2018-11-13 17:54:20 +04:00
|
|
|
def forecast_ahead(self, data, steps, **kwargs):
|
|
|
|
generators = kwargs.get('generators',None)
|
|
|
|
|
|
|
|
if generators is None:
|
|
|
|
raise Exception('You must provide parameter \'generators\'! generators is a dict where the keys' +
|
2019-02-18 22:00:25 +04:00
|
|
|
' are the dataframe column names (except the target_variable) and the values are ' +
|
2018-11-13 17:54:20 +04:00
|
|
|
'lambda functions that accept one value (the actual value of the variable) '
|
2019-02-16 15:08:13 +04:00
|
|
|
' and return the next value or trained FTS models that accept the actual values and '
|
|
|
|
'forecast new ones.')
|
2018-11-13 17:54:20 +04:00
|
|
|
|
|
|
|
ndata = self.apply_transformations(data)
|
|
|
|
|
2019-06-21 22:40:20 +04:00
|
|
|
start = kwargs.get('start_at', 0)
|
2019-06-06 18:04:20 +04:00
|
|
|
|
2019-06-21 22:40:20 +04:00
|
|
|
ndata = ndata.iloc[start: start + self.max_lag]
|
2018-11-13 17:54:20 +04:00
|
|
|
ret = []
|
2019-06-21 22:40:20 +04:00
|
|
|
for k in np.arange(0, steps):
|
|
|
|
sample = ndata.iloc[-self.max_lag:]
|
2018-11-13 17:54:20 +04:00
|
|
|
tmp = self.forecast(sample, **kwargs)
|
|
|
|
|
|
|
|
if isinstance(tmp, (list, np.ndarray)):
|
|
|
|
tmp = tmp[-1]
|
|
|
|
|
|
|
|
ret.append(tmp)
|
|
|
|
|
|
|
|
new_data_point = {}
|
|
|
|
|
2019-02-18 22:00:25 +04:00
|
|
|
for data_label in generators.keys():
|
|
|
|
if data_label != self.target_variable.data_label:
|
|
|
|
if isinstance(generators[data_label], LambdaType):
|
2019-06-17 18:40:24 +04:00
|
|
|
last_data_point = ndata.loc[ndata.index[-1]]
|
2019-02-18 22:00:25 +04:00
|
|
|
new_data_point[data_label] = generators[data_label](last_data_point[data_label])
|
|
|
|
elif isinstance(generators[data_label], fts.FTS):
|
2019-06-21 22:40:20 +04:00
|
|
|
gen_model = generators[data_label]
|
|
|
|
last_data_point = sample.iloc[-gen_model.order:]
|
|
|
|
|
|
|
|
if not gen_model.is_multivariate:
|
2019-02-18 22:00:25 +04:00
|
|
|
last_data_point = last_data_point[data_label].values
|
2019-02-16 15:08:13 +04:00
|
|
|
|
2019-06-21 22:40:20 +04:00
|
|
|
new_data_point[data_label] = gen_model.forecast(last_data_point)[0]
|
2018-11-13 17:54:20 +04:00
|
|
|
|
|
|
|
new_data_point[self.target_variable.data_label] = tmp
|
|
|
|
|
|
|
|
ndata = ndata.append(new_data_point, ignore_index=True)
|
|
|
|
|
2019-06-17 17:57:09 +04:00
|
|
|
return ret[-steps:]
|
2018-11-13 17:54:20 +04:00
|
|
|
|
2019-03-22 22:07:44 +04:00
|
|
|
def forecast_interval(self, data, **kwargs):
|
|
|
|
ret = []
|
|
|
|
ndata = self.apply_transformations(data)
|
|
|
|
c = 0
|
|
|
|
for index, row in ndata.iterrows() if isinstance(ndata, pd.DataFrame) else enumerate(ndata):
|
|
|
|
data_point = self.format_data(row)
|
|
|
|
flrs = self.generate_lhs_flrs(data_point)
|
|
|
|
mvs = []
|
|
|
|
ups = []
|
|
|
|
los = []
|
|
|
|
for flr in flrs:
|
|
|
|
flrg = mvflrg.FLRG(lhs=flr.LHS)
|
|
|
|
if flrg.get_key() not in self.flrgs:
|
|
|
|
#Naïve approach is applied when no rules were found
|
|
|
|
if self.target_variable.name in flrg.LHS:
|
|
|
|
fs = flrg.LHS[self.target_variable.name]
|
|
|
|
fset = self.target_variable.partitioner.sets[fs]
|
|
|
|
up = fset.upper
|
|
|
|
lo = fset.lower
|
|
|
|
mv = fset.membership(data_point[self.target_variable.name])
|
|
|
|
mvs.append(mv)
|
|
|
|
ups.append(up)
|
|
|
|
los.append(lo)
|
|
|
|
else:
|
|
|
|
mvs.append(0.)
|
|
|
|
ups.append(0.)
|
|
|
|
los.append(0.)
|
|
|
|
else:
|
|
|
|
_flrg = self.flrgs[flrg.get_key()]
|
|
|
|
mvs.append(_flrg.get_membership(data_point, self.explanatory_variables))
|
|
|
|
ups.append(_flrg.get_upper(self.target_variable.partitioner.sets))
|
|
|
|
los.append(_flrg.get_lower(self.target_variable.partitioner.sets))
|
|
|
|
|
|
|
|
mv = np.array(mvs)
|
2019-03-23 02:46:42 +04:00
|
|
|
up = np.dot(mv, np.array(ups).T) / np.nansum(mv)
|
|
|
|
lo = np.dot(mv, np.array(los).T) / np.nansum(mv)
|
2019-03-22 22:07:44 +04:00
|
|
|
|
|
|
|
ret.append([lo, up])
|
|
|
|
|
|
|
|
ret = self.target_variable.apply_inverse_transformations(ret,
|
|
|
|
params=data[self.target_variable.data_label].values)
|
|
|
|
return ret
|
|
|
|
|
2019-06-06 18:04:20 +04:00
|
|
|
def forecast_ahead_interval(self, data, steps, **kwargs):
|
|
|
|
generators = kwargs.get('generators', None)
|
|
|
|
|
|
|
|
if generators is None:
|
|
|
|
raise Exception('You must provide parameter \'generators\'! generators is a dict where the keys' +
|
|
|
|
' are the dataframe column names (except the target_variable) and the values are ' +
|
|
|
|
'lambda functions that accept one value (the actual value of the variable) '
|
|
|
|
' and return the next value or trained FTS models that accept the actual values and '
|
|
|
|
'forecast new ones.')
|
|
|
|
|
|
|
|
ndata = self.apply_transformations(data)
|
|
|
|
|
2019-06-21 22:40:20 +04:00
|
|
|
start = kwargs.get('start_at', 0)
|
2019-06-06 18:04:20 +04:00
|
|
|
|
|
|
|
ret = []
|
2019-06-21 22:40:20 +04:00
|
|
|
ix = ndata.index[start: start + self.max_lag]
|
2019-06-21 22:10:19 +04:00
|
|
|
lo = ndata.loc[ix] #[ndata.loc[k] for k in ix]
|
|
|
|
up = ndata.loc[ix] #[ndata.loc[k] for k in ix]
|
2019-06-06 18:04:20 +04:00
|
|
|
for k in np.arange(0, steps):
|
2019-06-21 22:10:19 +04:00
|
|
|
tmp_lo = self.forecast_interval(lo[-self.max_lag:], **kwargs)[0]
|
|
|
|
tmp_up = self.forecast_interval(up[-self.max_lag:], **kwargs)[0]
|
2019-06-06 18:04:20 +04:00
|
|
|
|
|
|
|
ret.append([min(tmp_lo), max(tmp_up)])
|
|
|
|
|
|
|
|
new_data_point_lo = {}
|
|
|
|
new_data_point_up = {}
|
|
|
|
|
|
|
|
for data_label in generators.keys():
|
|
|
|
if data_label != self.target_variable.data_label:
|
|
|
|
if isinstance(generators[data_label], LambdaType):
|
|
|
|
last_data_point_lo = lo.loc[lo.index[-1]]
|
|
|
|
new_data_point_lo[data_label] = generators[data_label](last_data_point_lo[data_label])
|
|
|
|
last_data_point_up = up.loc[up.index[-1]]
|
|
|
|
new_data_point_up[data_label] = generators[data_label](last_data_point_up[data_label])
|
|
|
|
elif isinstance(generators[data_label], fts.FTS):
|
|
|
|
model = generators[data_label]
|
2019-06-21 22:40:20 +04:00
|
|
|
last_data_point_lo = lo.loc[lo.index[-model.order:]]
|
|
|
|
last_data_point_up = up.loc[up.index[-model.order:]]
|
2019-06-06 18:04:20 +04:00
|
|
|
|
|
|
|
if not model.is_multivariate:
|
|
|
|
last_data_point_lo = last_data_point_lo[data_label].values
|
|
|
|
last_data_point_up = last_data_point_up[data_label].values
|
|
|
|
|
|
|
|
new_data_point_lo[data_label] = model.forecast(last_data_point_lo)[0]
|
|
|
|
new_data_point_up[data_label] = model.forecast(last_data_point_up)[0]
|
|
|
|
|
|
|
|
new_data_point_lo[self.target_variable.data_label] = min(tmp_lo)
|
|
|
|
new_data_point_up[self.target_variable.data_label] = max(tmp_up)
|
|
|
|
|
|
|
|
lo = lo.append(new_data_point_lo, ignore_index=True)
|
|
|
|
up = up.append(new_data_point_up, ignore_index=True)
|
|
|
|
|
2019-06-21 22:10:19 +04:00
|
|
|
return ret[-steps:]
|
2019-06-06 18:04:20 +04:00
|
|
|
|
2018-03-04 03:07:50 +04:00
|
|
|
def clone_parameters(self, model):
|
|
|
|
super(MVFTS, self).clone_parameters(model)
|
|
|
|
|
|
|
|
self.explanatory_variables = model.explanatory_variables
|
|
|
|
self.target_variable = model.target_variable
|
|
|
|
|
2018-03-02 00:30:39 +04:00
|
|
|
def __str__(self):
|
|
|
|
_str = self.name + ":\n"
|
|
|
|
for k in self.flrgs.keys():
|
|
|
|
_str += str(self.flrgs[k]) + "\n"
|
|
|
|
|
|
|
|
return _str
|
|
|
|
|
|
|
|
|