Improvements on forecast_ahead benchmarks

This commit is contained in:
Petrônio Cândido 2019-06-10 13:33:53 -03:00
parent 471e096208
commit 48fcf8daca
7 changed files with 148 additions and 88 deletions

View File

@ -72,6 +72,9 @@ class ARIMA(fts.FTS):
def forecast(self, ndata, **kwargs): def forecast(self, ndata, **kwargs):
raise NotImplementedError() raise NotImplementedError()
def forecast_ahead(self, data, steps, **kwargs):
return self.model.predict(steps, intervals=False).values.flatten().tolist()
def forecast_interval(self, data, **kwargs): def forecast_interval(self, data, **kwargs):
raise NotImplementedError() raise NotImplementedError()
@ -92,7 +95,16 @@ class ARIMA(fts.FTS):
return ret return ret
def forecast_distribution(self, data, **kwargs): def forecast_distribution(self, data, **kwargs):
raise NotImplementedError()
sim_vector = self.inference(steps)
ret = []
for ct, sample in enumerate(sim_vector):
pd = ProbabilityDistribution.ProbabilityDistribution(type='histogram', data=sample, nbins=500)
ret.append(pd)
return ret
def forecast_ahead_distribution(self, data, steps, **kwargs): def forecast_ahead_distribution(self, data, steps, **kwargs):

View File

@ -105,12 +105,20 @@ def UStatistic(targets, forecasts):
:param forecasts: :param forecasts:
:return: :return:
""" """
l = len(targets)
if isinstance(targets, list): if not isinstance(forecasts, (list, np.ndarray)):
targets = np.array(targets) forecasts = np.array([forecasts])
if isinstance(forecasts, list): else:
forecasts = np.array(forecasts) forecasts = np.array(forecasts)
if not isinstance(targets, (list, np.ndarray)):
targets = np.array([targets])
else:
targets = np.array(targets)
l = forecasts.size
l = 2 if l == 1 else l
naive = [] naive = []
y = [] y = []
for k in np.arange(0, l - 1): for k in np.arange(0, l - 1):
@ -359,6 +367,38 @@ def get_point_statistics(data, model, **kwargs):
return ret return ret
def get_point_ahead_statistics(data, forecasts, **kwargs):
"""
Condensate all measures for point forecasters
:param data: test data
:param model: FTS model with point forecasting capability
:param kwargs:
:return: a list with the RMSE, SMAPE and U Statistic
"""
l = len(forecasts)
if len(data) != l:
raise Exception("Data and intervals have different lenghts!")
lags = {}
for lag in range(l):
ret = {}
datum = data[lag]
forecast = forecasts[lag]
ret['steps'] = lag
ret['method'] = ''
ret['rmse'] = rmse(datum, forecast)
ret['mape'] = mape(datum, forecast)
sample = data[lag-1:lag+1] if lag > 0 else [datum, datum]
ret['u'] = UStatistic(sample, forecast)
lags[lag] = ret
return lags
def get_interval_statistics(data, model, **kwargs): def get_interval_statistics(data, model, **kwargs):
""" """
Condensate all measures for point interval forecasters Condensate all measures for point interval forecasters
@ -411,7 +451,7 @@ def get_interval_ahead_statistics(data, intervals, **kwargs):
Condensate all measures for point interval forecasters Condensate all measures for point interval forecasters
:param data: test data :param data: test data
:param model: FTS model with interval forecasting capability :param intervals: predicted intervals for each datapoint
:param kwargs: :param kwargs:
:return: a list with the sharpness, resolution, coverage, .05 pinball mean, :return: a list with the sharpness, resolution, coverage, .05 pinball mean,
.25 pinball mean, .75 pinball mean and .95 pinball mean. .25 pinball mean, .75 pinball mean and .95 pinball mean.

View File

@ -102,18 +102,11 @@ def sliding_window_benchmarks2(data, windowsize, train=0.8, **kwargs):
steps_ahead = [k for k in steps_ahead] steps_ahead = [k for k in steps_ahead]
fts_methods = __pop('methods', None, kwargs) fts_methods = __pop('methods', [], kwargs)
if fts_methods is not None:
methods_parameters = __pop('methods_parameters', None, kwargs) methods_parameters = __pop('methods_parameters', None, kwargs)
if fts_methods is None:
if type == 'point':
fts_methods = get_point_methods()
elif type == 'interval':
fts_methods = get_interval_methods()
elif type == 'distribution':
fts_methods = get_probabilistic_methods()
ix_methods = [k for k in np.arange(len(fts_methods))] ix_methods = [k for k in np.arange(len(fts_methods))]
benchmark_models = __pop("benchmark_models", False, kwargs) benchmark_models = __pop("benchmark_models", False, kwargs)
@ -162,7 +155,8 @@ def sliding_window_benchmarks2(data, windowsize, train=0.8, **kwargs):
else: else:
job = cluster.submit(method, None, None, None, None, train, test, ct, **kwargs) job = cluster.submit(method, None, None, None, None, train, test, ct, **kwargs)
jobs.append(job) jobs.append(job)
else:
if fts_methods is not None:
params = [ix_methods, orders, partitioners_methods, partitions, transformations] params = [ix_methods, orders, partitioners_methods, partitions, transformations]
for id, instance in enumerate(product(*params)): for id, instance in enumerate(product(*params)):
fts_method = fts_methods[instance[0]] fts_method = fts_methods[instance[0]]
@ -670,6 +664,7 @@ def run_point2(fts_method, order, partitioner_method, partitions, transformation
_end = time.time() _end = time.time()
times = _end - _start times = _end - _start
if steps_ahead == 1:
_start = time.time() _start = time.time()
_rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts, **kwargs) _rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts, **kwargs)
@ -681,6 +676,24 @@ def run_point2(fts_method, order, partitioner_method, partitions, transformation
'size': len(mfts), 'time': times, 'size': len(mfts), 'time': times,
'rmse': _rmse, 'smape': _smape, 'u': _u, 'window': window_key, 'rmse': _rmse, 'smape': _smape, 'u': _u, 'window': window_key,
'steps': steps_ahead, 'method': method} 'steps': steps_ahead, 'method': method}
else:
_start = time.time()
forecasts = mfts.predict(test_data, **kwargs)
_end = time.time()
times += _end - _start
eval = Measures.get_point_ahead_statistics(test_data[mfts.order:mfts.order+steps_ahead], forecasts)
for key in eval.keys():
eval[key]["time"] = times
eval[key]["method"] = method
ret = {'model': mfts.shortname, 'partitioner': pttr, 'order': order, 'partitions': partitions,
'transformation': '' if transformation is None else transformation.name,
'size': len(mfts), 'time': times,
'window': window_key, 'steps': steps_ahead, 'method': method,
'ahead_results': eval
}
return ret return ret
@ -812,20 +825,14 @@ def run_probabilistic2(fts_method, order, partitioner_method, partitions, transf
def common_process_point_jobs(conn, data, job): def common_process_point_jobs(conn, data, job):
data.append(job['steps']) dta = deepcopy(data)
data.append(job['method']) dta.append(job['steps'])
rmse = deepcopy(data) dta.append(job['method'])
rmse.extend(["rmse", job["rmse"]]) for key in ["rmse", "mape", "u", "time"]:
bUtil.insert_benchmark(rmse, conn) if key in job:
smape = deepcopy(data) data2 = deepcopy(dta)
smape.extend(["smape", job["smape"]]) data2.extend([key, job[key]])
bUtil.insert_benchmark(smape, conn) bUtil.insert_benchmark(data2, conn)
u = deepcopy(data)
u.extend(["u", job["u"]])
bUtil.insert_benchmark(u, conn)
time = deepcopy(data)
time.extend(["time", job["time"]])
bUtil.insert_benchmark(time, conn)
def process_point_jobs(dataset, tag, job, conn): def process_point_jobs(dataset, tag, job, conn):

View File

@ -26,6 +26,7 @@ class KNearestNeighbors(fts.FTS):
self.benchmark_only = True self.benchmark_only = True
self.min_order = 1 self.min_order = 1
self.alpha = kwargs.get("alpha", 0.05) self.alpha = kwargs.get("alpha", 0.05)
self.max_lag = self.order
self.lag = None self.lag = None
self.k = kwargs.get("k", 30) self.k = kwargs.get("k", 30)
self.uod = None self.uod = None
@ -70,8 +71,9 @@ class KNearestNeighbors(fts.FTS):
return [self.values[k] for k in ix.flatten() ] return [self.values[k] for k in ix.flatten() ]
def forecast(self, data, **kwargs): def forecast(self, data, **kwargs):
l = len(data)
ret = [] ret = []
for k in np.arange(self.order, len(data)): for k in np.arange(self.order, l+(1 if self.order == l else 0)):
sample = data[k-self.order : k] sample = data[k-self.order : k]
@ -81,17 +83,6 @@ class KNearestNeighbors(fts.FTS):
return ret return ret
def forecast_ahead(self, data, steps, **kwargs):
start = kwargs.get('start', self.order)
sample = [k for k in data[start - self.order: start]]
for k in np.arange(self.order, steps + self.order):
tmp = self.forecast(sample[k-self.order:k])
sample.append(tmp)
return sample[-steps]
def forecast_interval(self, data, **kwargs): def forecast_interval(self, data, **kwargs):
alpha = kwargs.get('alpha',self.alpha) alpha = kwargs.get('alpha',self.alpha)

View File

@ -241,7 +241,7 @@ class FTS(object):
start = kwargs.get('start_at',0) start = kwargs.get('start_at',0)
ret = [] ret = []
for k in np.arange(start+self.max_lag, steps): for k in np.arange(start+self.max_lag, steps+start+self.max_lag):
tmp = self.forecast(data[k-self.max_lag:k], **kwargs) tmp = self.forecast(data[k-self.max_lag:k], **kwargs)
if isinstance(tmp,(list, np.ndarray)): if isinstance(tmp,(list, np.ndarray)):

View File

@ -422,9 +422,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
l = len(data) l = len(data)
start = kwargs.get('start', self.max_lag) start = kwargs.get('start_at', 0)
ret = data[start - self.max_lag: start].tolist() ret = data[start: start+self.max_lag].tolist()
for k in np.arange(self.max_lag, steps+self.max_lag): for k in np.arange(self.max_lag, steps+self.max_lag):
@ -434,7 +434,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
mp = self.forecast(ret[k - self.max_lag: k], **kwargs) mp = self.forecast(ret[k - self.max_lag: k], **kwargs)
ret.append(mp[0]) ret.append(mp[0])
return ret[self.max_lag:] return ret[-steps:]
def __check_interval_bounds(self, interval): def __check_interval_bounds(self, interval):
if len(self.transformations) > 0: if len(self.transformations) > 0:
@ -446,11 +446,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
def forecast_ahead_interval(self, data, steps, **kwargs): def forecast_ahead_interval(self, data, steps, **kwargs):
l = len(data) start = kwargs.get('start_at', 0)
start = kwargs.get('start', self.max_lag) sample = data[start: start + self.max_lag]
sample = data[start - self.max_lag: start]
ret = [[k, k] for k in sample] ret = [[k, k] for k in sample]
@ -466,7 +464,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
ret.append([np.min(lower), np.max(upper)]) ret.append([np.min(lower), np.max(upper)])
return ret[self.order:] return ret[-steps:]
def forecast_ahead_distribution(self, ndata, steps, **kwargs): def forecast_ahead_distribution(self, ndata, steps, **kwargs):
@ -483,9 +481,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
nbins = kwargs.get("num_bins", 100) nbins = kwargs.get("num_bins", 100)
_bins = np.linspace(uod[0], uod[1], nbins) _bins = np.linspace(uod[0], uod[1], nbins)
start = kwargs.get('start', self.max_lag) start = kwargs.get('start_at', 0)
sample = ndata[start - self.max_lag: start] sample = ndata[start: start + self.max_lag]
for dat in sample: for dat in sample:
if 'type' in kwargs: if 'type' in kwargs:
@ -527,7 +525,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
ret.append(dist) ret.append(dist)
return ret[self.order:] return ret[-steps:]
def __str__(self): def __str__(self):
tmp = self.name + ":\n" tmp = self.name + ":\n"

View File

@ -52,34 +52,46 @@ datasets['TAIEX'] = TAIEX.get_data()[:5000]
datasets['NASDAQ'] = NASDAQ.get_data()[:5000] datasets['NASDAQ'] = NASDAQ.get_data()[:5000]
datasets['SP500'] = SP500.get_data()[10000:15000] datasets['SP500'] = SP500.get_data()[10000:15000]
methods = [ competitor_methods = []
arima.ARIMA,arima.ARIMA, competitor_methods.extend([arima.ARIMA]*3)
quantreg.QuantileRegression, competitor_methods.extend([quantreg.QuantileRegression]*2)
BSTS.ARIMA,BSTS.ARIMA, competitor_methods.extend([BSTS.ARIMA]*3)
knn.KNearestNeighbors competitor_methods.extend([knn.KNearestNeighbors]*2)
]
methods_parameters = [ competitor_methods_parameters = [
{'order':(1,0,0), 'alpha':.05}, {'order': (1, 0, 0)},
{'order':(1,0,1), 'alpha':.05}, {'order': (1, 0, 1)},
{'order':1, 'dist': True}, {'order': (2, 0, 0)},
{'order': (1, 0, 0), 'alpha': .05}, {'order': 1, 'alpha': .5},
{'order': (1, 0, 1), 'alpha': .05}, {'order': 2, 'alpha': .5},
{'order': 1} {'order': (1, 0, 0)},
{'order': (1, 0, 1)},
{'order': (2, 0, 0)},
{'order': 1},
{'order': 2}
]
proposed_methods = [
hofts.HighOrderFTS, hofts.WeightedHighOrderFTS, pwfts.ProbabilisticWeightedFTS
]
proposed_methods_parameters=[
{},{},{}
] ]
for dataset_name, dataset in datasets.items(): for dataset_name, dataset in datasets.items():
bchmk.sliding_window_benchmarks2(dataset, 1000, train=0.8, inc=0.2, bchmk.sliding_window_benchmarks2(dataset, 1000, train=0.8, inc=0.2,
benchmark_models=True, benchmark_models=True,
benchmark_methods=methods, benchmark_methods=competitor_methods,
benchmark_methods_parameters=methods_parameters, benchmark_methods_parameters=competitor_methods_parameters,
methods=[], methods=proposed_methods,
methods_parameters=[{},{}], methods_parameters=proposed_methods_parameters,
transformations=[None], orders=[1],
orders=[], partitions=[35],
steps_ahead=[10], steps_ahead=[10],
partitions=[], progress=False, type='point',
type='distribution', distributed=False, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'], file="tmp.db", dataset=dataset_name,
file="experiments.db", dataset=dataset_name, tag="experiments") tag="experiments")
#''' #'''