- Optimizations for interval distributed benchmarks
This commit is contained in:
parent
3b21889d6c
commit
2c0e685e87
@ -164,6 +164,36 @@ def coverage(targets, forecasts):
|
|||||||
return np.mean(preds)
|
return np.mean(preds)
|
||||||
|
|
||||||
|
|
||||||
|
def pinball(tau, target, forecast):
|
||||||
|
"""
|
||||||
|
Pinball loss function. Measure the distance of forecast to the tau-quantile of the target
|
||||||
|
:param tau: quantile value in the range (0,1)
|
||||||
|
:param target:
|
||||||
|
:param forecast:
|
||||||
|
:return: distance of forecast to the tau-quantile of the target
|
||||||
|
"""
|
||||||
|
if target >= forecast:
|
||||||
|
return (target - forecast) * tau
|
||||||
|
else:
|
||||||
|
return (forecast - target) * (1 - tau)
|
||||||
|
|
||||||
|
|
||||||
|
def pinball_mean(tau, targets, forecasts):
|
||||||
|
"""
|
||||||
|
Mean pinball loss value of the forecast for a given tau-quantile of the targets
|
||||||
|
:param tau: quantile value in the range (0,1)
|
||||||
|
:param targets: list of target values
|
||||||
|
:param forecasts: list of prediction intervals
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
preds = []
|
||||||
|
if tau <= 0.5:
|
||||||
|
preds = [pinball(tau, targets[i], forecasts[i][0]) for i in np.arange(0, len(forecasts))]
|
||||||
|
else:
|
||||||
|
preds = [pinball(tau, targets[i], forecasts[i][1]) for i in np.arange(0, len(forecasts))]
|
||||||
|
return np.nanmean(preds)
|
||||||
|
|
||||||
|
|
||||||
def pmf_to_cdf(density):
|
def pmf_to_cdf(density):
|
||||||
ret = []
|
ret = []
|
||||||
for row in density.index:
|
for row in density.index:
|
||||||
@ -248,6 +278,10 @@ def get_interval_statistics(original, model):
|
|||||||
ret.append(round(sharpness(forecasts), 2))
|
ret.append(round(sharpness(forecasts), 2))
|
||||||
ret.append(round(resolution(forecasts), 2))
|
ret.append(round(resolution(forecasts), 2))
|
||||||
ret.append(round(coverage(original[model.order:], forecasts[:-1]), 2))
|
ret.append(round(coverage(original[model.order:], forecasts[:-1]), 2))
|
||||||
|
ret.append(round(pinball_mean(0.05, original[model.order:], forecasts[:-1]), 2))
|
||||||
|
ret.append(round(pinball_mean(0.25, original[model.order:], forecasts[:-1]), 2))
|
||||||
|
ret.append(round(pinball_mean(0.75, original[model.order:], forecasts[:-1]), 2))
|
||||||
|
ret.append(round(pinball_mean(0.95, original[model.order:], forecasts[:-1]), 2))
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
@ -240,8 +240,7 @@ def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam):
|
|||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
def save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, synthetic, times, q05, q25, q75, q95):
|
||||||
def save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, synthetic, times):
|
|
||||||
ret = []
|
ret = []
|
||||||
if synthetic:
|
if synthetic:
|
||||||
for k in sorted(objs.keys()):
|
for k in sorted(objs.keys()):
|
||||||
@ -265,6 +264,14 @@ def save_dataframe_interval(coverage, experiments, file, objs, resolution, save,
|
|||||||
mod.append(round(np.nanstd(coverage[k]), 2))
|
mod.append(round(np.nanstd(coverage[k]), 2))
|
||||||
mod.append(round(np.nanmean(times[k]), 2))
|
mod.append(round(np.nanmean(times[k]), 2))
|
||||||
mod.append(round(np.nanstd(times[k]), 2))
|
mod.append(round(np.nanstd(times[k]), 2))
|
||||||
|
mod.append(round(np.nanmean(q05[k]), 2))
|
||||||
|
mod.append(round(np.nanstd(q05[k]), 2))
|
||||||
|
mod.append(round(np.nanmean(q25[k]), 2))
|
||||||
|
mod.append(round(np.nanstd(q25[k]), 2))
|
||||||
|
mod.append(round(np.nanmean(q75[k]), 2))
|
||||||
|
mod.append(round(np.nanstd(q75[k]), 2))
|
||||||
|
mod.append(round(np.nanmean(q95[k]), 2))
|
||||||
|
mod.append(round(np.nanstd(q95[k]), 2))
|
||||||
mod.append(l)
|
mod.append(l)
|
||||||
ret.append(mod)
|
ret.append(mod)
|
||||||
|
|
||||||
@ -296,6 +303,18 @@ def save_dataframe_interval(coverage, experiments, file, objs, resolution, save,
|
|||||||
tmp = [n, o, s, p, l, 'TIME']
|
tmp = [n, o, s, p, l, 'TIME']
|
||||||
tmp.extend(times[k])
|
tmp.extend(times[k])
|
||||||
ret.append(deepcopy(tmp))
|
ret.append(deepcopy(tmp))
|
||||||
|
tmp = [n, o, s, p, l, 'Q05']
|
||||||
|
tmp.extend(q05[k])
|
||||||
|
ret.append(deepcopy(tmp))
|
||||||
|
tmp = [n, o, s, p, l, 'Q25']
|
||||||
|
tmp.extend(q25[k])
|
||||||
|
ret.append(deepcopy(tmp))
|
||||||
|
tmp = [n, o, s, p, l, 'Q75']
|
||||||
|
tmp.extend(q75[k])
|
||||||
|
ret.append(deepcopy(tmp))
|
||||||
|
tmp = [n, o, s, p, l, 'Q95']
|
||||||
|
tmp.extend(q95[k])
|
||||||
|
ret.append(deepcopy(tmp))
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
print("Erro ao salvar ", k)
|
print("Erro ao salvar ", k)
|
||||||
print("Exceção ", ex)
|
print("Exceção ", ex)
|
||||||
@ -317,7 +336,7 @@ def interval_dataframe_analytic_columns(experiments):
|
|||||||
|
|
||||||
def interval_dataframe_synthetic_columns():
|
def interval_dataframe_synthetic_columns():
|
||||||
columns = ["Model", "Order", "Scheme", "Partitions", "SHARPAVG", "SHARPSTD", "RESAVG", "RESSTD", "COVAVG",
|
columns = ["Model", "Order", "Scheme", "Partitions", "SHARPAVG", "SHARPSTD", "RESAVG", "RESSTD", "COVAVG",
|
||||||
"COVSTD", "TIMEAVG", "TIMESTD", "SIZE"]
|
"COVSTD", "TIMEAVG", "TIMESTD", "Q05AVG", "Q05STD", "Q25AVG", "Q25STD", "Q75AVG", "Q75STD", "Q95AVG", "Q95STD", "SIZE"]
|
||||||
return columns
|
return columns
|
||||||
|
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from statsmodels.tsa.arima_model import ARIMA as stats_arima
|
from statsmodels.tsa.arima_model import ARIMA as stats_arima
|
||||||
|
import scipy.stats as st
|
||||||
from pyFTS import fts
|
from pyFTS import fts
|
||||||
|
|
||||||
|
|
||||||
@ -15,6 +16,8 @@ class ARIMA(fts.FTS):
|
|||||||
self.name = "ARIMA"
|
self.name = "ARIMA"
|
||||||
self.detail = "Auto Regressive Integrated Moving Average"
|
self.detail = "Auto Regressive Integrated Moving Average"
|
||||||
self.is_high_order = True
|
self.is_high_order = True
|
||||||
|
self.has_point_forecasting = True
|
||||||
|
self.has_interval_forecasting = True
|
||||||
self.model = None
|
self.model = None
|
||||||
self.model_fit = None
|
self.model_fit = None
|
||||||
self.trained_data = None
|
self.trained_data = None
|
||||||
@ -23,6 +26,7 @@ class ARIMA(fts.FTS):
|
|||||||
self.q = 0
|
self.q = 0
|
||||||
self.benchmark_only = True
|
self.benchmark_only = True
|
||||||
self.min_order = 1
|
self.min_order = 1
|
||||||
|
self.alpha = (1 - kwargs.get("alpha", 0.90))/2
|
||||||
|
|
||||||
def train(self, data, sets, order, parameters=None):
|
def train(self, data, sets, order, parameters=None):
|
||||||
self.p = order[0]
|
self.p = order[0]
|
||||||
@ -35,6 +39,7 @@ class ARIMA(fts.FTS):
|
|||||||
try:
|
try:
|
||||||
self.model = stats_arima(data, order=(self.p, self.d, self.q))
|
self.model = stats_arima(data, order=(self.p, self.d, self.q))
|
||||||
self.model_fit = self.model.fit(disp=0)
|
self.model_fit = self.model.fit(disp=0)
|
||||||
|
print(np.sqrt(self.model_fit.sigma2))
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
print(ex)
|
print(ex)
|
||||||
self.model_fit = None
|
self.model_fit = None
|
||||||
@ -71,4 +76,65 @@ class ARIMA(fts.FTS):
|
|||||||
|
|
||||||
ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]])
|
ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]])
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def forecastInterval(self, data, **kwargs):
|
||||||
|
|
||||||
|
if self.model_fit is None:
|
||||||
|
return np.nan
|
||||||
|
|
||||||
|
sigma = np.sqrt(self.model_fit.sigma2)
|
||||||
|
|
||||||
|
ndata = np.array(self.doTransformations(data))
|
||||||
|
|
||||||
|
l = len(ndata)
|
||||||
|
|
||||||
|
ret = []
|
||||||
|
|
||||||
|
for k in np.arange(self.order, l+1):
|
||||||
|
tmp = []
|
||||||
|
|
||||||
|
sample = [ndata[i] for i in np.arange(k - self.order, k)]
|
||||||
|
|
||||||
|
mean = self.forecast(sample)[0]
|
||||||
|
|
||||||
|
tmp.append(mean + st.norm.ppf(self.alpha) * sigma)
|
||||||
|
tmp.append(mean + st.norm.ppf(1 - self.alpha) * sigma)
|
||||||
|
|
||||||
|
ret.append(tmp)
|
||||||
|
|
||||||
|
ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]])
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def forecastAheadInterval(self, data, steps, **kwargs):
|
||||||
|
if self.model_fit is None:
|
||||||
|
return np.nan
|
||||||
|
|
||||||
|
smoothing = kwargs.get("smoothing",0.2)
|
||||||
|
|
||||||
|
alpha = (1 - kwargs.get("alpha", 0.95))/2
|
||||||
|
|
||||||
|
sigma = np.sqrt(self.model_fit.sigma2)
|
||||||
|
|
||||||
|
ndata = np.array(self.doTransformations(data))
|
||||||
|
|
||||||
|
l = len(ndata)
|
||||||
|
|
||||||
|
means = self.forecastAhead(data,steps,kwargs)
|
||||||
|
|
||||||
|
ret = []
|
||||||
|
|
||||||
|
for k in np.arange(0, steps):
|
||||||
|
tmp = []
|
||||||
|
|
||||||
|
hsigma = (1 + k*smoothing)*sigma
|
||||||
|
|
||||||
|
tmp.append(means[k] + st.norm.ppf(alpha) * hsigma)
|
||||||
|
tmp.append(means[k] + st.norm.ppf(1 - alpha) * hsigma)
|
||||||
|
|
||||||
|
ret.append(tmp)
|
||||||
|
|
||||||
|
ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]])
|
||||||
|
|
||||||
return ret
|
return ret
|
@ -73,7 +73,7 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, transfo
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=[Grid.GridPartitioner],
|
def point_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None, partitioners=[Grid.GridPartitioner],
|
||||||
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
|
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
|
||||||
benchmark_models=None, benchmark_models_parameters = None,
|
benchmark_models=None, benchmark_models_parameters = None,
|
||||||
save=False, file=None, sintetic=False,nodes=None, depends=None):
|
save=False, file=None, sintetic=False,nodes=None, depends=None):
|
||||||
@ -82,6 +82,7 @@ def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=
|
|||||||
:param data:
|
:param data:
|
||||||
:param windowsize: size of sliding window
|
:param windowsize: size of sliding window
|
||||||
:param train: percentual of sliding window data used to train the models
|
:param train: percentual of sliding window data used to train the models
|
||||||
|
:param inc: percentual of window is used do increment
|
||||||
:param models: FTS point forecasters
|
:param models: FTS point forecasters
|
||||||
:param partitioners: Universe of Discourse partitioner
|
:param partitioners: Universe of Discourse partitioner
|
||||||
:param partitions: the max number of partitions on the Universe of Discourse
|
:param partitions: the max number of partitions on the Universe of Discourse
|
||||||
@ -146,7 +147,7 @@ def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=
|
|||||||
pool.append(mfts)
|
pool.append(mfts)
|
||||||
|
|
||||||
experiments = 0
|
experiments = 0
|
||||||
for ct, train, test in Util.sliding_window(data, windowsize, train):
|
for ct, train, test in Util.sliding_window(data, windowsize, train, inc):
|
||||||
experiments += 1
|
experiments += 1
|
||||||
|
|
||||||
benchmarks_only = {}
|
benchmarks_only = {}
|
||||||
@ -223,13 +224,18 @@ def run_interval(mfts, partitioner, train_data, test_data, window_key=None, tran
|
|||||||
|
|
||||||
tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
|
tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
|
||||||
|
|
||||||
|
tmp4 = [arima.ARIMA, quantreg.QuantileRegression]
|
||||||
|
|
||||||
tmp3 = [Measures.get_interval_statistics]
|
tmp3 = [Measures.get_interval_statistics]
|
||||||
|
|
||||||
pttr = str(partitioner.__module__).split('.')[-1]
|
if mfts.benchmark_only:
|
||||||
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
|
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
|
||||||
mfts.partitioner = partitioner
|
else:
|
||||||
if transformation is not None:
|
pttr = str(partitioner.__module__).split('.')[-1]
|
||||||
mfts.appendTransformation(transformation)
|
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
|
||||||
|
mfts.partitioner = partitioner
|
||||||
|
if transformation is not None:
|
||||||
|
mfts.appendTransformation(transformation)
|
||||||
|
|
||||||
_start = time.time()
|
_start = time.time()
|
||||||
mfts.train(train_data, partitioner.sets, order=mfts.order)
|
mfts.train(train_data, partitioner.sets, order=mfts.order)
|
||||||
@ -237,24 +243,26 @@ def run_interval(mfts, partitioner, train_data, test_data, window_key=None, tran
|
|||||||
times = _end - _start
|
times = _end - _start
|
||||||
|
|
||||||
_start = time.time()
|
_start = time.time()
|
||||||
_sharp, _res, _cov = Measures.get_interval_statistics(test_data, mfts)
|
_sharp, _res, _cov, _q05, _q25, _q75, _q95 = Measures.get_interval_statistics(test_data, mfts)
|
||||||
_end = time.time()
|
_end = time.time()
|
||||||
times += _end - _start
|
times += _end - _start
|
||||||
|
|
||||||
ret = {'key': _key, 'obj': mfts, 'sharpness': _sharp, 'resolution': _res, 'coverage': _cov, 'time': times,
|
ret = {'key': _key, 'obj': mfts, 'sharpness': _sharp, 'resolution': _res, 'coverage': _cov, 'time': times,
|
||||||
'window': window_key}
|
'Q05': _q05, 'Q25': _q25, 'Q75': _q75, 'Q95': _q95, 'window': window_key}
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def interval_sliding_window(data, windowsize, train=0.8, models=None, partitioners=[Grid.GridPartitioner],
|
def interval_sliding_window(data, windowsize, train=0.8, inc=0.1, models=None, partitioners=[Grid.GridPartitioner],
|
||||||
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
|
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
|
||||||
save=False, file=None, sintetic=False,nodes=None, depends=None):
|
benchmark_models=None, benchmark_models_parameters = None,
|
||||||
|
save=False, file=None, sintetic=False,nodes=None, depends=None):
|
||||||
"""
|
"""
|
||||||
Distributed sliding window benchmarks for FTS interval forecasters
|
Distributed sliding window benchmarks for FTS interval forecasters
|
||||||
:param data:
|
:param data:
|
||||||
:param windowsize: size of sliding window
|
:param windowsize: size of sliding window
|
||||||
:param train: percentual of sliding window data used to train the models
|
:param train: percentual of sliding window data used to train the models
|
||||||
|
:param inc:
|
||||||
:param models: FTS point forecasters
|
:param models: FTS point forecasters
|
||||||
:param partitioners: Universe of Discourse partitioner
|
:param partitioners: Universe of Discourse partitioner
|
||||||
:param partitions: the max number of partitions on the Universe of Discourse
|
:param partitions: the max number of partitions on the Universe of Discourse
|
||||||
@ -262,6 +270,8 @@ def interval_sliding_window(data, windowsize, train=0.8, models=None, partitione
|
|||||||
:param transformation: data transformation
|
:param transformation: data transformation
|
||||||
:param indexer: seasonal indexer
|
:param indexer: seasonal indexer
|
||||||
:param dump:
|
:param dump:
|
||||||
|
:param benchmark_models:
|
||||||
|
:param benchmark_models_parameters:
|
||||||
:param save: save results
|
:param save: save results
|
||||||
:param file: file path to save the results
|
:param file: file path to save the results
|
||||||
:param sintetic: if true only the average and standard deviation of the results
|
:param sintetic: if true only the average and standard deviation of the results
|
||||||
@ -270,6 +280,15 @@ def interval_sliding_window(data, windowsize, train=0.8, models=None, partitione
|
|||||||
:return: DataFrame with the results
|
:return: DataFrame with the results
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
alphas = [0.5, 0.25]
|
||||||
|
|
||||||
|
if benchmark_models is None and models is None:
|
||||||
|
benchmark_models = [arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA,
|
||||||
|
quantreg.QuantileRegression, quantreg.QuantileRegression]
|
||||||
|
|
||||||
|
if benchmark_models_parameters is None:
|
||||||
|
benchmark_models_parameters = [(1, 0, 0), (1, 0, 1), (2, 0, 1), (2, 0, 2), 1, 2]
|
||||||
|
|
||||||
cluster = dispy.JobCluster(run_point, nodes=nodes) #, depends=dependencies)
|
cluster = dispy.JobCluster(run_point, nodes=nodes) #, depends=dependencies)
|
||||||
|
|
||||||
http_server = dispy.httpd.DispyHTTPServer(cluster)
|
http_server = dispy.httpd.DispyHTTPServer(cluster)
|
||||||
@ -284,6 +303,10 @@ def interval_sliding_window(data, windowsize, train=0.8, models=None, partitione
|
|||||||
sharpness = {}
|
sharpness = {}
|
||||||
resolution = {}
|
resolution = {}
|
||||||
coverage = {}
|
coverage = {}
|
||||||
|
q05 = {}
|
||||||
|
q25 = {}
|
||||||
|
q75 = {}
|
||||||
|
q95 = {}
|
||||||
times = {}
|
times = {}
|
||||||
|
|
||||||
if models is None:
|
if models is None:
|
||||||
@ -299,12 +322,23 @@ def interval_sliding_window(data, windowsize, train=0.8, models=None, partitione
|
|||||||
mfts.order = order
|
mfts.order = order
|
||||||
pool.append(mfts)
|
pool.append(mfts)
|
||||||
else:
|
else:
|
||||||
|
mfts.order = 1
|
||||||
pool.append(mfts)
|
pool.append(mfts)
|
||||||
|
|
||||||
|
if benchmark_models is not None:
|
||||||
|
for count, model in enumerate(benchmark_models, start=0):
|
||||||
|
for a in alphas:
|
||||||
|
par = benchmark_models_parameters[count]
|
||||||
|
mfts = model(str(par if par is not None else ""), alpha=a)
|
||||||
|
mfts.order = par
|
||||||
|
pool.append(mfts)
|
||||||
|
|
||||||
experiments = 0
|
experiments = 0
|
||||||
for ct, train, test in Util.sliding_window(data, windowsize, train):
|
for ct, train, test in Util.sliding_window(data, windowsize, train, inc=inc):
|
||||||
experiments += 1
|
experiments += 1
|
||||||
|
|
||||||
|
benchmarks_only = {}
|
||||||
|
|
||||||
if dump: print('\nWindow: {0}\n'.format(ct))
|
if dump: print('\nWindow: {0}\n'.format(ct))
|
||||||
|
|
||||||
for partition in partitions:
|
for partition in partitions:
|
||||||
@ -314,6 +348,10 @@ def interval_sliding_window(data, windowsize, train=0.8, models=None, partitione
|
|||||||
data_train_fs = partitioner(train, partition, transformation=transformation)
|
data_train_fs = partitioner(train, partition, transformation=transformation)
|
||||||
|
|
||||||
for id, m in enumerate(pool,start=0):
|
for id, m in enumerate(pool,start=0):
|
||||||
|
if m.benchmark_only and m.shortname in benchmarks_only:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
benchmarks_only[m.shortname] = m
|
||||||
job = cluster.submit(m, data_train_fs, train, test, ct, transformation)
|
job = cluster.submit(m, data_train_fs, train, test, ct, transformation)
|
||||||
job.id = id # associate an ID to identify jobs (if needed later)
|
job.id = id # associate an ID to identify jobs (if needed later)
|
||||||
jobs.append(job)
|
jobs.append(job)
|
||||||
@ -327,11 +365,19 @@ def interval_sliding_window(data, windowsize, train=0.8, models=None, partitione
|
|||||||
resolution[tmp['key']] = []
|
resolution[tmp['key']] = []
|
||||||
coverage[tmp['key']] = []
|
coverage[tmp['key']] = []
|
||||||
times[tmp['key']] = []
|
times[tmp['key']] = []
|
||||||
|
q05[tmp['key']] = []
|
||||||
|
q25[tmp['key']] = []
|
||||||
|
q75[tmp['key']] = []
|
||||||
|
q95[tmp['key']] = []
|
||||||
|
|
||||||
sharpness[tmp['key']].append(tmp['sharpness'])
|
sharpness[tmp['key']].append(tmp['sharpness'])
|
||||||
resolution[tmp['key']].append(tmp['resolution'])
|
resolution[tmp['key']].append(tmp['resolution'])
|
||||||
coverage[tmp['key']].append(tmp['coverage'])
|
coverage[tmp['key']].append(tmp['coverage'])
|
||||||
times[tmp['key']].append(tmp['time'])
|
times[tmp['key']].append(tmp['time'])
|
||||||
|
q05[tmp['key']].append(tmp['Q05'])
|
||||||
|
q25[tmp['key']].append(tmp['Q25'])
|
||||||
|
q75[tmp['key']].append(tmp['Q75'])
|
||||||
|
q95[tmp['key']].append(tmp['Q95'])
|
||||||
print(tmp['key'])
|
print(tmp['key'])
|
||||||
else:
|
else:
|
||||||
print(job.exception)
|
print(job.exception)
|
||||||
@ -350,8 +396,8 @@ def interval_sliding_window(data, windowsize, train=0.8, models=None, partitione
|
|||||||
http_server.shutdown() # this waits until browser gets all updates
|
http_server.shutdown() # this waits until browser gets all updates
|
||||||
cluster.close()
|
cluster.close()
|
||||||
|
|
||||||
return benchmarks.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic,
|
return bUtil.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic,
|
||||||
times)
|
times, q05, q25, q75, q95)
|
||||||
|
|
||||||
|
|
||||||
def run_ahead(mfts, partitioner, train_data, test_data, steps, resolution, window_key=None, transformation=None, indexer=None):
|
def run_ahead(mfts, partitioner, train_data, test_data, steps, resolution, window_key=None, transformation=None, indexer=None):
|
||||||
|
@ -19,7 +19,7 @@ class QuantileRegression(fts.FTS):
|
|||||||
self.has_probability_forecasting = True
|
self.has_probability_forecasting = True
|
||||||
self.benchmark_only = True
|
self.benchmark_only = True
|
||||||
self.minOrder = 1
|
self.minOrder = 1
|
||||||
self.alpha = 0.5
|
self.alpha = kwargs.get("alpha", 0.05)
|
||||||
self.upper_qt = None
|
self.upper_qt = None
|
||||||
self.mean_qt = None
|
self.mean_qt = None
|
||||||
self.lower_qt = None
|
self.lower_qt = None
|
||||||
@ -27,8 +27,6 @@ class QuantileRegression(fts.FTS):
|
|||||||
def train(self, data, sets, order=1, parameters=None):
|
def train(self, data, sets, order=1, parameters=None):
|
||||||
self.order = order
|
self.order = order
|
||||||
|
|
||||||
self.alpha = parameters
|
|
||||||
|
|
||||||
tmp = np.array(self.doTransformations(data))
|
tmp = np.array(self.doTransformations(data))
|
||||||
|
|
||||||
lagdata, ndata = lagmat(tmp, maxlag=order, trim="both", original='sep')
|
lagdata, ndata = lagmat(tmp, maxlag=order, trim="both", original='sep')
|
||||||
|
@ -72,7 +72,7 @@ class EnsembleFTS(fts.FTS):
|
|||||||
|
|
||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
for k in np.arange(0, l):
|
for k in np.arange(0, l+1):
|
||||||
tmp = []
|
tmp = []
|
||||||
for model in self.models:
|
for model in self.models:
|
||||||
if k >= model.minOrder - 1:
|
if k >= model.minOrder - 1:
|
||||||
|
8
fts.py
8
fts.py
@ -88,7 +88,13 @@ class FTS(object):
|
|||||||
:param kwargs:
|
:param kwargs:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
pass
|
ret = []
|
||||||
|
for k in np.arange(0,steps):
|
||||||
|
tmp = self.forecast(data[-self.order:],kwargs)
|
||||||
|
ret.append(tmp)
|
||||||
|
data.append(tmp)
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
def forecastAheadInterval(self, data, steps, **kwargs):
|
def forecastAheadInterval(self, data, steps, **kwargs):
|
||||||
"""
|
"""
|
||||||
|
@ -22,40 +22,36 @@ os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/")
|
|||||||
#enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";")
|
#enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";")
|
||||||
#enrollments = np.array(enrollments["Enrollments"])
|
#enrollments = np.array(enrollments["Enrollments"])
|
||||||
|
|
||||||
#from pyFTS import song
|
"""
|
||||||
|
DATASETS
|
||||||
#enrollments_fs = Grid.GridPartitioner(enrollments, 10).sets
|
"""
|
||||||
|
|
||||||
#model = song.ConventionalFTS('')
|
|
||||||
#model.train(enrollments,enrollments_fs)
|
|
||||||
#teste = model.forecast(enrollments)
|
|
||||||
|
|
||||||
#print(teste)
|
|
||||||
|
|
||||||
|
|
||||||
#print(FCM.FCMPartitionerTrimf.__module__)
|
|
||||||
|
|
||||||
#gauss = random.normal(0,1.0,5000)
|
#gauss = random.normal(0,1.0,5000)
|
||||||
#gauss_teste = random.normal(0,1.0,400)
|
#gauss_teste = random.normal(0,1.0,400)
|
||||||
|
|
||||||
taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",")
|
#taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",")
|
||||||
taiex = np.array(taiexpd["avg"][:2000])
|
#taiex = np.array(taiexpd["avg"][:5000])
|
||||||
|
|
||||||
#nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",")
|
#nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",")
|
||||||
#nasdaq = np.array(nasdaqpd["avg"][0:5000])
|
#nasdaq = np.array(nasdaqpd["avg"][0:5000])
|
||||||
|
|
||||||
#from statsmodels.tsa.arima_model import ARIMA as stats_arima
|
#sp500pd = pd.read_csv("DataSets/S&P500.csv", sep=",")
|
||||||
from statsmodels.tsa.tsatools import lagmat
|
#sp500 = np.array(sp500pd["Avg"][11000:])
|
||||||
|
#del(sp500pd)
|
||||||
|
|
||||||
#tmp = np.arange(10)
|
sondapd = pd.read_csv("DataSets/SONDA_BSB_HOURLY_AVG.csv", sep=";")
|
||||||
|
sondapd = sondapd.dropna(axis=0, how='any')
|
||||||
|
sonda = np.array(sondapd["ws_10m"])
|
||||||
|
del(sondapd)
|
||||||
|
|
||||||
#lag, a = lagmat(tmp, maxlag=2, trim="both", original='sep')
|
#bestpd = pd.read_csv("DataSets/BEST_TAVG.csv", sep=";")
|
||||||
|
#best = np.array(bestpd["Anomaly"])
|
||||||
|
|
||||||
#print(lag)
|
#print(lag)
|
||||||
#print(a)
|
#print(a)
|
||||||
|
|
||||||
from pyFTS.benchmarks import benchmarks as bchmk
|
#from pyFTS.benchmarks import benchmarks as bchmk
|
||||||
#from pyFTS.benchmarks import distributed_benchmarks as bchmk
|
from pyFTS.benchmarks import distributed_benchmarks as bchmk
|
||||||
#from pyFTS.benchmarks import parallel_benchmarks as bchmk
|
#from pyFTS.benchmarks import parallel_benchmarks as bchmk
|
||||||
from pyFTS.benchmarks import Util
|
from pyFTS.benchmarks import Util
|
||||||
from pyFTS.benchmarks import arima, quantreg
|
from pyFTS.benchmarks import arima, quantreg
|
||||||
@ -66,9 +62,9 @@ from pyFTS.benchmarks import arima, quantreg
|
|||||||
|
|
||||||
#tmp = arima.ARIMA("")
|
#tmp = arima.ARIMA("")
|
||||||
#tmp.train(taiex[:1600], None, order=(2,0,2))
|
#tmp.train(taiex[:1600], None, order=(2,0,2))
|
||||||
#teste = tmp.forecast(taiex[1600:1605])
|
#teste = tmp.forecastInterval(taiex[1600:1605])
|
||||||
|
|
||||||
#tmp = quantreg.QuantileRegression("")
|
#tmp = quan#treg.QuantileRegression("")
|
||||||
#tmp.train(taiex[:1600], None, order=2)
|
#tmp.train(taiex[:1600], None, order=2)
|
||||||
#teste = tmp.forecast(taiex[1600:1605])
|
#teste = tmp.forecast(taiex[1600:1605])
|
||||||
|
|
||||||
@ -80,21 +76,20 @@ from pyFTS.benchmarks import arima, quantreg
|
|||||||
from pyFTS import song, chen, yu, cheng
|
from pyFTS import song, chen, yu, cheng
|
||||||
|
|
||||||
|
|
||||||
bchmk.point_sliding_window(taiex,1000,train=0.8, models=[], #song.ConventionalFTS, chen.ConventionalFTS], #[yu.WeightedFTS, cheng.TrendWeightedFTS], # #
|
bchmk.point_sliding_window(sonda, 9000, train=0.8, inc=0.4,#models=[yu.WeightedFTS], # #
|
||||||
partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
|
partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
|
||||||
partitions= [10], #np.arange(10,200,step=10), #transformation=diff,
|
partitions= np.arange(10,200,step=10), #transformation=diff,
|
||||||
dump=True, save=True, file="experiments/XXXtaiex_point_analytic.csv") #,
|
dump=True, save=True, file="experiments/sondaws_point_analytic.csv",
|
||||||
# nodes=['192.168.0.102', '192.168.0.109', '192.168.0.106']) #, depends=[hofts, ifts])
|
nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
|
||||||
|
|
||||||
"""
|
|
||||||
diff = Transformations.Differential(1)
|
diff = Transformations.Differential(1)
|
||||||
|
|
||||||
bchmk.point_sliding_window(taiex,2000,train=0.8, #models=[yu.WeightedFTS], # #
|
bchmk.point_sliding_window(sonda, 9000, train=0.8, inc=0.4, #models=[yu.WeightedFTS], # #
|
||||||
partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
|
partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
|
||||||
partitions= np.arange(10,200,step=10), transformation=diff,
|
partitions= np.arange(3,20,step=2), #transformation=diff,
|
||||||
dump=True, save=True, file="experiments/taiex_point_analytic_diff.csv",
|
dump=True, save=True, file="experiments/sondaws_point_analytic_diff.csv",
|
||||||
nodes=['192.168.0.102', '192.168.0.109', '192.168.0.106']) #, depends=[hofts, ifts])
|
nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
|
||||||
"""
|
#"""
|
||||||
#bchmk.testa(taiex,[10,20],partitioners=[Grid.GridPartitioner], nodes=['192.168.0.109', '192.168.0.101'])
|
#bchmk.testa(taiex,[10,20],partitioners=[Grid.GridPartitioner], nodes=['192.168.0.109', '192.168.0.101'])
|
||||||
|
|
||||||
#parallel_util.explore_partitioners(taiex,20)
|
#parallel_util.explore_partitioners(taiex,20)
|
||||||
|
Loading…
Reference in New Issue
Block a user