Introducing FTS.lags, the lag indexing for HOFTS, IFTS and PWFTS

This commit is contained in:
Petrônio Cândido 2018-07-02 18:55:28 -03:00
parent d81900f519
commit 07bfceae78
6 changed files with 118 additions and 64 deletions

View File

@ -64,7 +64,7 @@ def mape(targets, forecasts):
targets = np.array(targets)
if isinstance(forecasts, list):
forecasts = np.array(forecasts)
return np.mean(np.abs(targets - forecasts) / targets) * 100
return np.mean(np.abs((targets - forecasts) / targets)) * 100
def smape(targets, forecasts, type=2):
@ -334,9 +334,9 @@ def get_point_statistics(data, model, **kwargs):
nforecasts = np.array(forecasts[:-1])
ret.append(np.round(rmse(ndata[model.order:], nforecasts), 2))
ret.append(np.round(mape(ndata[model.order:], nforecasts), 2))
ret.append(np.round(UStatistic(ndata[model.order:], nforecasts), 2))
ret.append(np.round(rmse(ndata[model.max_lag:], nforecasts), 2))
ret.append(np.round(mape(ndata[model.max_lag:], nforecasts), 2))
ret.append(np.round(UStatistic(ndata[model.max_lag:], nforecasts), 2))
else:
steps_ahead_sampler = kwargs.get('steps_ahead_sampler', 1)
nforecasts = []
@ -345,7 +345,7 @@ def get_point_statistics(data, model, **kwargs):
tmp = model.predict(sample, **kwargs)
nforecasts.append(tmp[-1])
start = model.order + steps_ahead -1
start = model.max_lag + steps_ahead -1
ret.append(np.round(rmse(ndata[start:-1:steps_ahead_sampler], nforecasts), 2))
ret.append(np.round(mape(ndata[start:-1:steps_ahead_sampler], nforecasts), 2))
ret.append(np.round(UStatistic(ndata[start:-1:steps_ahead_sampler], nforecasts), 2))
@ -373,12 +373,12 @@ def get_interval_statistics(data, model, **kwargs):
ret.append(round(sharpness(forecasts), 2))
ret.append(round(resolution(forecasts), 2))
ret.append(round(coverage(data[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.05, data[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.25, data[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.75, data[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.95, data[model.order:], forecasts[:-1]), 2))
ret.append(round(winkler_mean(0.05, data[model.order:], forecasts[:-1]), 2))
ret.append(round(winkler_mean(0.25, data[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.05, data[model.max_lag:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.25, data[model.max_lag:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.75, data[model.max_lag:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.95, data[model.max_lag:], forecasts[:-1]), 2))
ret.append(round(winkler_mean(0.05, data[model.max_lag:], forecasts[:-1]), 2))
ret.append(round(winkler_mean(0.25, data[model.max_lag:], forecasts[:-1]), 2))
else:
forecasts = []
for k in np.arange(model.order, len(data) - steps_ahead):
@ -386,10 +386,10 @@ def get_interval_statistics(data, model, **kwargs):
tmp = model.predict(sample, **kwargs)
forecasts.append(tmp[-1])
start = model.order + steps_ahead -1
start = model.max_lag + steps_ahead -1
ret.append(round(sharpness(forecasts), 2))
ret.append(round(resolution(forecasts), 2))
ret.append(round(coverage(data[model.order:], forecasts), 2))
ret.append(round(coverage(data[model.max_lag:], forecasts), 2))
ret.append(round(pinball_mean(0.05, data[start:], forecasts), 2))
ret.append(round(pinball_mean(0.25, data[start:], forecasts), 2))
ret.append(round(pinball_mean(0.75, data[start:], forecasts), 2))
@ -416,20 +416,20 @@ def get_distribution_statistics(data, model, **kwargs):
_s1 = time.time()
forecasts = model.predict(data, **kwargs)
_e1 = time.time()
ret.append(round(crps(data[model.order:], forecasts[:-1]), 3))
ret.append(round(crps(data[model.max_lag:], forecasts[:-1]), 3))
ret.append(round(_e1 - _s1, 3))
ret.append(round(brier_score(data[model.order:], forecasts[:-1]), 3))
ret.append(round(brier_score(data[model.max_lag:], forecasts[:-1]), 3))
else:
skip = kwargs.get('steps_ahead_sampler', 1)
forecasts = []
_s1 = time.time()
for k in np.arange(model.order, len(data) - steps_ahead, skip):
sample = data[k - model.order: k]
for k in np.arange(model.max_lag, len(data) - steps_ahead, skip):
sample = data[k - model.max_lag: k]
tmp = model.predict(sample, **kwargs)
forecasts.append(tmp[-1])
_e1 = time.time()
start = model.order + steps_ahead
start = model.max_lag + steps_ahead
ret.append(round(crps(data[start:-1:skip], forecasts), 3))
ret.append(round(_e1 - _s1, 3))
ret.append(round(brier_score(data[start:-1:skip], forecasts), 3))

View File

@ -22,6 +22,7 @@ class FTS(object):
has_point_forecasting: Boolean, if the model support point forecasting, default: True
has_interval_forecasting: Boolean, if the model support interval forecasting, default: False
has_probability_forecasting: Boolean, if the model support probabilistic forecasting, default: False
max_lag: Integer, maximum lag index used by the model, default: 1
min_order: Integer, minimal order supported for the model, default: 1
name: Model name
order: model order (number of past lags are used on forecasting)
@ -37,7 +38,7 @@ class FTS(object):
self.sets = {}
self.flrgs = {}
self.order = kwargs.get('order',"")
self.order = kwargs.get('order',1)
self.shortname = kwargs.get('name',"")
self.name = kwargs.get('name',"")
self.detail = kwargs.get('name',"")
@ -61,6 +62,7 @@ class FTS(object):
self.indexer = kwargs.get("indexer", None)
self.uod_clip = kwargs.get("uod_clip", True)
self.alpha_cut = kwargs.get("alpha_cut", 0.0)
self.max_lag = self.order
def fuzzy(self, data):
"""

View File

@ -45,16 +45,32 @@ class HighOrderFTS(fts.FTS):
self.detail = "Chen"
self.is_high_order = True
self.min_order = 2
self.order= kwargs.get("order", 2)
self.lags = kwargs.get("lags", None)
self.configure_lags(**kwargs)
def configure_lags(self, **kwargs):
if "order" in kwargs:
self.order = kwargs.get("order", 2)
if "lags" in kwargs:
self.lags = kwargs.get("lags", None)
if self.lags is not None:
self.max_lag = max(self.lags)
else:
self.max_lag = self.order
self.lags = np.arange(1, self.order+1)
def generate_lhs_flrg(self, sample):
lags = {}
flrgs = []
for o in np.arange(0, self.order):
for ct, o in enumerate(self.lags):
lhs = [key for key in self.partitioner.ordered_sets
if self.sets[key].membership(sample[o]) > self.alpha_cut]
lags[o] = lhs
if self.sets[key].membership(sample[o-1]) > self.alpha_cut]
lags[ct] = lhs
root = tree.FLRGTreeNode(None)
@ -74,10 +90,10 @@ class HighOrderFTS(fts.FTS):
def generate_flrg(self, data):
l = len(data)
for k in np.arange(self.order, l):
for k in np.arange(self.max_lag, l):
if self.dump: print("FLR: " + str(k))
sample = data[k - self.order: k]
sample = data[k - self.max_lag: k]
rhs = [key for key in self.partitioner.ordered_sets
if self.sets[key].membership(data[k]) > self.alpha_cut]
@ -91,9 +107,8 @@ class HighOrderFTS(fts.FTS):
for st in rhs:
self.flrgs[flrg.get_key()].append_rhs(st)
def train(self, data, **kwargs):
self.configure_lags(**kwargs)
self.generate_flrg(data)
def forecast(self, ndata, **kwargs):
@ -102,11 +117,11 @@ class HighOrderFTS(fts.FTS):
l = len(ndata)
if l <= self.order:
if l <= self.max_lag:
return ndata
for k in np.arange(self.order, l+1):
flrgs = self.generate_lhs_flrg(ndata[k - self.order: k])
for k in np.arange(self.max_lag, l+1):
flrgs = self.generate_lhs_flrg(ndata[k - self.max_lag: k])
tmp = []
for flrg in flrgs:

View File

@ -62,9 +62,9 @@ class IntervalFTS(hofts.HighOrderFTS):
if l <= self.order:
return ndata
for k in np.arange(self.order, l+1):
for k in np.arange(self.max_lag, l+1):
sample = ndata[k - self.order: k]
sample = ndata[k - self.max_lag: k]
flrgs = self.generate_lhs_flrg(sample)

View File

@ -21,7 +21,8 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
def get_membership(self, data, sets):
if isinstance(data, (np.ndarray, list)):
return np.nanprod([sets[key].membership(data[count]) for count, key in enumerate(self.LHS)])
return np.nanprod([sets[key].membership(data[count])
for count, key in enumerate(self.LHS, start=0)])
else:
return sets[self.LHS[0]].membership(data)
@ -107,9 +108,11 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
self.is_high_order = True
self.min_order = 1
self.auto_update = kwargs.get('update',False)
self.configure_lags(**kwargs)
def train(self, data, **kwargs):
self.configure_lags(**kwargs)
parameters = kwargs.get('parameters','fuzzy')
if parameters == 'monotonic':
@ -124,10 +127,10 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
flrgs = []
for o in np.arange(0, self.order):
for ct, o in enumerate(self.lags):
lhs = [key for key in self.partitioner.ordered_sets
if self.sets[key].membership(sample[o]) > self.alpha_cut]
lags[o] = lhs
if self.sets[key].membership(sample[o-1]) > self.alpha_cut]
lags[ct] = lhs
root = tree.FLRGTreeNode(None)
@ -147,10 +150,10 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
def generate_flrg(self, data):
l = len(data)
for k in np.arange(self.order, l):
for k in np.arange(self.max_lag, l):
if self.dump: print("FLR: " + str(k))
sample = data[k - self.order: k]
sample = data[k - self.max_lag: k]
flrgs = self.generate_lhs_flrg(sample)
@ -253,8 +256,8 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
ret = []
for k in np.arange(self.order - 1, l):
sample = data[k - (self.order - 1): k + 1]
for k in np.arange(self.max_lag - 1, l):
sample = data[k - (self.max_lag - 1): k + 1]
if method == 'heuristic':
ret.append(self.point_heuristic(sample, **kwargs))
@ -300,9 +303,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
ret = []
for k in np.arange(self.order - 1, l):
for k in np.arange(self.max_lag - 1, l):
sample = ndata[k - (self.order - 1): k + 1]
sample = ndata[k - (self.max_lag - 1): k + 1]
if method == 'heuristic':
ret.append(self.interval_heuristic(sample))
@ -358,8 +361,8 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
ret = []
for k in np.arange(self.order - 1, l):
sample = ndata[k - (self.order - 1): k + 1]
for k in np.arange(self.max_lag - 1, l):
sample = ndata[k - (self.max_lag - 1): k + 1]
flrgs = self.generate_lhs_flrg(sample)
@ -398,19 +401,19 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
l = len(data)
start = kwargs.get('start', self.order)
start = kwargs.get('start', self.max_lag)
ret = data[start - self.order: start].tolist()
ret = data[start - self.max_lag: start].tolist()
for k in np.arange(self.order, steps+self.order):
for k in np.arange(self.max_lag, steps+self.max_lag):
if self.__check_point_bounds(ret[-1]) :
ret.append(ret[-1])
else:
mp = self.forecast(ret[k - self.order: k], **kwargs)
mp = self.forecast(ret[k - self.max_lag: k], **kwargs)
ret.append(mp[0])
return ret[self.order:]
return ret[self.max_lag:]
def __check_interval_bounds(self, interval):
if len(self.transformations) > 0:
@ -424,21 +427,21 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
l = len(data)
start = kwargs.get('start', self.order)
start = kwargs.get('start', self.max_lag)
sample = data[start - self.order: start]
sample = data[start - self.max_lag: start]
ret = [[k, k] for k in sample]
ret.append(self.forecast_interval(sample)[0])
for k in np.arange(self.order+1, steps+self.order):
for k in np.arange(self.max_lag+1, steps+self.max_lag):
if len(ret) > 0 and self.__check_interval_bounds(ret[-1]):
ret.append(ret[-1])
else:
lower = self.forecast_interval([ret[x][0] for x in np.arange(k - self.order, k)], **kwargs)
upper = self.forecast_interval([ret[x][1] for x in np.arange(k - self.order, k)], **kwargs)
lower = self.forecast_interval([ret[x][0] for x in np.arange(k - self.max_lag, k)], **kwargs)
upper = self.forecast_interval([ret[x][1] for x in np.arange(k - self.max_lag, k)], **kwargs)
ret.append([np.min(lower), np.max(upper)])
@ -459,9 +462,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
nbins = kwargs.get("num_bins", 100)
_bins = np.linspace(uod[0], uod[1], nbins)
start = kwargs.get('start', self.order)
start = kwargs.get('start', self.max_lag)
sample = ndata[start - self.order: start]
sample = ndata[start - self.max_lag: start]
for dat in sample:
if 'type' in kwargs:
@ -474,14 +477,15 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
ret.append(dist)
for k in np.arange(self.order+1, steps+self.order+1):
for k in np.arange(self.max_lag+1, steps+self.max_lag+1):
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
lags = {}
# Find all bins of past distributions with probability greater than zero
for ct, dd in enumerate(ret[k - self.order: k]):
for ct, d in enumerate(self.lags):
dd = ret[k - d]
vals = [float(v) for v in dd.bins if round(dd.density(v), 4) > 0]
lags[ct] = sorted(vals)
@ -496,8 +500,8 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
# get the combined probabilities for this path
pk = np.prod([ret[k - self.order + o].density(path[o])
for o in np.arange(0, self.order)])
pk = np.prod([ret[k - self.max_lag + o].density(path[ct])
for ct, o in enumerate(self.lags)])
d = self.forecast_distribution(path)[0]

View File

@ -20,8 +20,45 @@ test = taiex[3000:3200]
from pyFTS.common import Transformations
tdiff = Transformations.Differential(1)
'''
from pyFTS.benchmarks import benchmarks as bchmk, Measures
from pyFTS.models import pwfts,hofts,ifts
from pyFTS.partitioners import Grid, Util as pUtil
fs = Grid.GridPartitioner(data=train, npart=30) #, transformation=tdiff)
model1 = hofts.HighOrderFTS(partitioner=fs, lags=[1,2])#lags=[0,1])
model1.shortname = "1"
model2 = pwfts.ProbabilisticWeightedFTS(partitioner=fs, lags=[1,2])
#model2.append_transformation(tdiff)
model2.shortname = "2"
#model = pwfts.ProbabilisticWeightedFTS(partitioner=fs, order=2)# lags=[1,2])
model1.fit(train)
model2.fit(train)
#print(model1)
#print(model2)
for model in [model1, model2]:
#forecasts = model.predict(test)
print(model.shortname)
print(Measures.get_point_statistics(test, model))
#handles, labels = ax.get_legend_handles_labels()
#ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1))
#print(Measures.get_point_statistics(test,model))
'''
bchmk.sliding_window_benchmarks(train,1000,0.8,
methods=[pwfts.ProbabilisticWeightedFTS], #,ifts.IntervalFTS],
orders=[1,2,3],
partitions=[10])
'''
'''
from pyFTS.common import FLR,FuzzySet,Membership,SortedCollection
taiex_fs1 = Grid.GridPartitioner(data=train, npart=30)
taiex_fs2 = Grid.GridPartitioner(data=train, npart=10, transformation=tdiff)
@ -39,7 +76,3 @@ print(pfts1_taiex)
'''
model = Util.load_obj('pwfts')
model.predict(test, type='distribution')
#'''