New version of sliding_window_benchmarks; Bugfixes in benchmarks, ifts and ensemble]
This commit is contained in:
parent
9b90853f6b
commit
0dc2fabdcc
@ -397,7 +397,7 @@ def get_interval_statistics(data, model, **kwargs):
|
|||||||
forecasts = model.predict(data, **kwargs)
|
forecasts = model.predict(data, **kwargs)
|
||||||
ret.append(round(sharpness(forecasts), 2))
|
ret.append(round(sharpness(forecasts), 2))
|
||||||
ret.append(round(resolution(forecasts), 2))
|
ret.append(round(resolution(forecasts), 2))
|
||||||
ret.append(round(coverage(data[model.order:], forecasts[:-1]), 2))
|
ret.append(round(coverage(data[model.max_lag:], forecasts[:-1]), 2))
|
||||||
ret.append(round(pinball_mean(0.05, data[model.max_lag:], forecasts[:-1]), 2))
|
ret.append(round(pinball_mean(0.05, data[model.max_lag:], forecasts[:-1]), 2))
|
||||||
ret.append(round(pinball_mean(0.25, data[model.max_lag:], forecasts[:-1]), 2))
|
ret.append(round(pinball_mean(0.25, data[model.max_lag:], forecasts[:-1]), 2))
|
||||||
ret.append(round(pinball_mean(0.75, data[model.max_lag:], forecasts[:-1]), 2))
|
ret.append(round(pinball_mean(0.75, data[model.max_lag:], forecasts[:-1]), 2))
|
||||||
|
@ -108,6 +108,30 @@ def process_common_data(dataset, tag, type, job):
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def process_common_data2(dataset, tag, type, job):
|
||||||
|
"""
|
||||||
|
Wraps benchmark information on a tuple for sqlite database
|
||||||
|
|
||||||
|
:param dataset: benchmark dataset
|
||||||
|
:param tag: benchmark set alias
|
||||||
|
:param type: forecasting type
|
||||||
|
:param job: a dictionary with benchmark data
|
||||||
|
:return: tuple for sqlite database
|
||||||
|
"""
|
||||||
|
data = [dataset, tag, type,
|
||||||
|
job['model'],
|
||||||
|
job['transformation'],
|
||||||
|
job['order'],
|
||||||
|
job['partitioner'],
|
||||||
|
job['partitions'],
|
||||||
|
job['size'],
|
||||||
|
job['steps'],
|
||||||
|
job['method']
|
||||||
|
]
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
def get_dataframe_from_bd(file, filter):
|
def get_dataframe_from_bd(file, filter):
|
||||||
"""
|
"""
|
||||||
Query the sqlite benchmark database and return a pandas dataframe with the results
|
Query the sqlite benchmark database and return a pandas dataframe with the results
|
||||||
|
@ -14,6 +14,7 @@ import matplotlib as plt
|
|||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from mpl_toolkits.mplot3d import Axes3D
|
from mpl_toolkits.mplot3d import Axes3D
|
||||||
|
from itertools import product
|
||||||
|
|
||||||
from pyFTS.common import Transformations
|
from pyFTS.common import Transformations
|
||||||
from pyFTS.models import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, hwang
|
from pyFTS.models import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, hwang
|
||||||
@ -67,7 +68,7 @@ def get_benchmark_interval_methods():
|
|||||||
|
|
||||||
def get_interval_methods():
|
def get_interval_methods():
|
||||||
"""Return all FTS methods for point_to_interval forecasting"""
|
"""Return all FTS methods for point_to_interval forecasting"""
|
||||||
return [ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS]
|
return [ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS]
|
||||||
|
|
||||||
|
|
||||||
def get_probabilistic_methods():
|
def get_probabilistic_methods():
|
||||||
@ -80,6 +81,126 @@ def get_benchmark_probabilistic_methods():
|
|||||||
return [arima.ARIMA, quantreg.QuantileRegression, knn.KNearestNeighbors]
|
return [arima.ARIMA, quantreg.QuantileRegression, knn.KNearestNeighbors]
|
||||||
|
|
||||||
|
|
||||||
|
def sliding_window_benchmarks2(data, windowsize, train=0.8, **kwargs):
|
||||||
|
tag = __pop('tag', None, kwargs)
|
||||||
|
dataset = __pop('dataset', None, kwargs)
|
||||||
|
|
||||||
|
distributed = __pop('distributed', False, kwargs)
|
||||||
|
|
||||||
|
transformations = kwargs.get('transformations', [None])
|
||||||
|
|
||||||
|
type = kwargs.get("type", 'point')
|
||||||
|
|
||||||
|
orders = __pop("orders", [1, 2, 3], kwargs)
|
||||||
|
|
||||||
|
partitioners_methods = __pop("partitioners_methods", [Grid.GridPartitioner], kwargs)
|
||||||
|
partitions = __pop("partitions", [10], kwargs)
|
||||||
|
|
||||||
|
partitions = [k for k in partitions]
|
||||||
|
|
||||||
|
steps_ahead = __pop('steps_ahead', [1], kwargs)
|
||||||
|
|
||||||
|
steps_ahead = [k for k in steps_ahead]
|
||||||
|
|
||||||
|
fts_methods = __pop('methods', None, kwargs)
|
||||||
|
|
||||||
|
methods_parameters = __pop('methods_parameters', None, kwargs)
|
||||||
|
|
||||||
|
if fts_methods is None:
|
||||||
|
if type == 'point':
|
||||||
|
fts_methods = get_point_methods()
|
||||||
|
elif type == 'interval':
|
||||||
|
fts_methods = get_interval_methods()
|
||||||
|
elif type == 'distribution':
|
||||||
|
fts_methods = get_probabilistic_methods()
|
||||||
|
|
||||||
|
ix_methods = [k for k in np.arange(len(fts_methods))]
|
||||||
|
|
||||||
|
benchmark_models = __pop("benchmark_models", False, kwargs)
|
||||||
|
benchmark_methods = __pop("benchmark_methods", None, kwargs)
|
||||||
|
benchmark_methods_parameters = __pop("benchmark_methods_parameters", None, kwargs)
|
||||||
|
|
||||||
|
if type == 'point':
|
||||||
|
experiment_method = run_point2
|
||||||
|
synthesis_method = process_point_jobs2
|
||||||
|
elif type == 'interval':
|
||||||
|
experiment_method = run_interval2
|
||||||
|
synthesis_method = process_interval_jobs2
|
||||||
|
elif type == 'distribution':
|
||||||
|
experiment_method = run_probabilistic2
|
||||||
|
synthesis_method = process_probabilistic_jobs2
|
||||||
|
else:
|
||||||
|
raise ValueError("Type parameter has a unkown value!")
|
||||||
|
|
||||||
|
if distributed:
|
||||||
|
import pyFTS.distributed.dispy as dispy
|
||||||
|
|
||||||
|
nodes = kwargs.get("nodes", ['127.0.0.1'])
|
||||||
|
cluster, http_server = dispy.start_dispy_cluster(experiment_method, nodes)
|
||||||
|
|
||||||
|
inc = __pop("inc", 0.1, kwargs)
|
||||||
|
|
||||||
|
file = kwargs.get('file', "benchmarks.db")
|
||||||
|
|
||||||
|
conn = bUtil.open_benchmark_db(file)
|
||||||
|
|
||||||
|
jobs = []
|
||||||
|
for ct, train, test in cUtil.sliding_window(data, windowsize, train, inc=inc, **kwargs):
|
||||||
|
|
||||||
|
if benchmark_models:
|
||||||
|
for bm, method in enumerate(benchmark_methods):
|
||||||
|
for step in steps_ahead:
|
||||||
|
|
||||||
|
kwargs['steps_ahead'] = step
|
||||||
|
kwargs['parameters'] = benchmark_methods_parameters[bm]
|
||||||
|
|
||||||
|
if not distributed:
|
||||||
|
try:
|
||||||
|
job = experiment_method(method, None, None, None, train, test, ct, **kwargs)
|
||||||
|
synthesis_method(dataset, tag, job, conn)
|
||||||
|
except Exception as ex:
|
||||||
|
print('EXCEPTION! ', method, benchmark_methods_parameters[bm])
|
||||||
|
traceback.print_exc()
|
||||||
|
else:
|
||||||
|
job = cluster.submit(method, None, None, None, train, test, ct, **kwargs)
|
||||||
|
jobs.append(job)
|
||||||
|
else:
|
||||||
|
params = [ix_methods, orders, partitioners_methods, partitions, transformations, steps_ahead]
|
||||||
|
for id, instance in enumerate(product(*params)):
|
||||||
|
fts_method = fts_methods[instance[0]]
|
||||||
|
kwargs['steps_ahead'] = instance[5]
|
||||||
|
if methods_parameters is not None:
|
||||||
|
kwargs['parameters'] = methods_parameters[instance[0]]
|
||||||
|
if not distributed:
|
||||||
|
try:
|
||||||
|
job = experiment_method(fts_method, instance[1], instance[2], instance[3], instance[4], train, test, ct, **kwargs)
|
||||||
|
synthesis_method(dataset, tag, job, conn)
|
||||||
|
except Exception as ex:
|
||||||
|
print('EXCEPTION! ', instance)
|
||||||
|
traceback.print_exc()
|
||||||
|
else:
|
||||||
|
job = cluster.submit(fts_method, instance[1], instance[2], instance[3], instance[4], train, test, ct, **kwargs)
|
||||||
|
job.id = id
|
||||||
|
jobs.append(job)
|
||||||
|
|
||||||
|
if distributed:
|
||||||
|
for job in jobs:
|
||||||
|
job()
|
||||||
|
if job.status == dispy.dispy.DispyJob.Finished and job is not None:
|
||||||
|
tmp = job.result
|
||||||
|
synthesis_method(dataset, tag, tmp, conn)
|
||||||
|
else:
|
||||||
|
print("status", job.status)
|
||||||
|
print("result", job.result)
|
||||||
|
print("stdout", job.stdout)
|
||||||
|
print("stderr", job.exception)
|
||||||
|
|
||||||
|
cluster.wait() # wait for all jobs to finish
|
||||||
|
dispy.stop_dispy_cluster(cluster, http_server)
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||||
"""
|
"""
|
||||||
Sliding window benchmarks for FTS forecasters.
|
Sliding window benchmarks for FTS forecasters.
|
||||||
@ -314,8 +435,6 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
|||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwargs):
|
def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwargs):
|
||||||
"""
|
"""
|
||||||
Run the point forecasting benchmarks
|
Run the point forecasting benchmarks
|
||||||
@ -502,6 +621,159 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def __build_model(fts_method, order, parameters, partitioner_method, partitions, train_data, transformation):
|
||||||
|
mfts = fts_method(**parameters)
|
||||||
|
if mfts.benchmark_only or mfts.is_wrapper:
|
||||||
|
pttr = ''
|
||||||
|
else:
|
||||||
|
fs = partitioner_method(npart=partitions, data=train_data, transformation=transformation)
|
||||||
|
pttr = str(fs.__module__).split('.')[-1]
|
||||||
|
if order > 1:
|
||||||
|
mfts = fts_method(partitioner=fs, order=order, **parameters)
|
||||||
|
else:
|
||||||
|
mfts.partitioner = fs
|
||||||
|
|
||||||
|
if transformation is not None:
|
||||||
|
mfts.append_transformation(transformation)
|
||||||
|
return mfts, pttr
|
||||||
|
|
||||||
|
|
||||||
|
def run_point2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
|
||||||
|
|
||||||
|
import time
|
||||||
|
from pyFTS.models import yu, chen, hofts, pwfts,ismailefendi,sadaei, song, cheng, hwang
|
||||||
|
from pyFTS.partitioners import Grid, Entropy, FCM
|
||||||
|
from pyFTS.benchmarks import Measures, naive, arima, quantreg
|
||||||
|
from pyFTS.common import Transformations
|
||||||
|
|
||||||
|
tmp = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
|
||||||
|
cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, hwang.HighOrderFTS,
|
||||||
|
pwfts.ProbabilisticWeightedFTS]
|
||||||
|
|
||||||
|
tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
|
||||||
|
|
||||||
|
tmp4 = [naive.Naive, arima.ARIMA, quantreg.QuantileRegression]
|
||||||
|
|
||||||
|
tmp3 = [Measures.get_point_statistics]
|
||||||
|
|
||||||
|
tmp5 = [Transformations.Differential]
|
||||||
|
|
||||||
|
indexer = kwargs.get('indexer', None)
|
||||||
|
|
||||||
|
steps_ahead = kwargs.get('steps_ahead', 1)
|
||||||
|
method = kwargs.get('method', None)
|
||||||
|
parameters = kwargs.get('parameters', {})
|
||||||
|
|
||||||
|
mfts, pttr = __build_model(fts_method, order, parameters, partitioner_method, partitions, train_data,
|
||||||
|
transformation)
|
||||||
|
|
||||||
|
_start = time.time()
|
||||||
|
mfts.fit(train_data, **kwargs)
|
||||||
|
_end = time.time()
|
||||||
|
times = _end - _start
|
||||||
|
|
||||||
|
|
||||||
|
_start = time.time()
|
||||||
|
_rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts, **kwargs)
|
||||||
|
_end = time.time()
|
||||||
|
times += _end - _start
|
||||||
|
|
||||||
|
ret = {'model': mfts.shortname, 'partitioner': pttr, 'order': order, 'partitions': partitions,
|
||||||
|
'transformation': '' if transformation is None else transformation.name,
|
||||||
|
'size': len(mfts), 'time': times,
|
||||||
|
'rmse': _rmse, 'smape': _smape, 'u': _u, 'window': window_key,
|
||||||
|
'steps': steps_ahead, 'method': method}
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def run_interval2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
|
||||||
|
import time
|
||||||
|
from pyFTS.models import hofts,ifts,pwfts
|
||||||
|
from pyFTS.partitioners import Grid, Entropy, FCM
|
||||||
|
from pyFTS.benchmarks import Measures, arima, quantreg, BSTS
|
||||||
|
|
||||||
|
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS]
|
||||||
|
|
||||||
|
tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
|
||||||
|
|
||||||
|
tmp4 = [arima.ARIMA, quantreg.QuantileRegression, BSTS.ARIMA]
|
||||||
|
|
||||||
|
tmp3 = [Measures.get_interval_statistics]
|
||||||
|
|
||||||
|
steps_ahead = kwargs.get('steps_ahead', 1)
|
||||||
|
method = kwargs.get('method', None)
|
||||||
|
parameters = kwargs.get('parameters',{})
|
||||||
|
|
||||||
|
mfts, pttr = __build_model(fts_method, order, parameters, partitioner_method, partitions, train_data,
|
||||||
|
transformation)
|
||||||
|
_start = time.time()
|
||||||
|
mfts.fit(train_data, **kwargs)
|
||||||
|
_end = time.time()
|
||||||
|
times = _end - _start
|
||||||
|
|
||||||
|
_start = time.time()
|
||||||
|
#_sharp, _res, _cov, _q05, _q25, _q75, _q95, _w05, _w25
|
||||||
|
metrics = Measures.get_interval_statistics(test_data, mfts, **kwargs)
|
||||||
|
_end = time.time()
|
||||||
|
times += _end - _start
|
||||||
|
|
||||||
|
ret = {'model': mfts.shortname, 'partitioner': pttr, 'order': order, 'partitions': partitions,
|
||||||
|
'transformation': '' if transformation is None else transformation.name,
|
||||||
|
'size': len(mfts), 'time': times,
|
||||||
|
'sharpness': metrics[0], 'resolution': metrics[1], 'coverage': metrics[2],
|
||||||
|
'time': times,'Q05': metrics[3], 'Q25': metrics[4], 'Q75': metrics[5], 'Q95': metrics[6],
|
||||||
|
'winkler05': metrics[7], 'winkler25': metrics[8],
|
||||||
|
'window': window_key,'steps': steps_ahead, 'method': method}
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def run_probabilistic2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
|
||||||
|
import time
|
||||||
|
import numpy as np
|
||||||
|
from pyFTS.models import hofts, ifts, pwfts
|
||||||
|
from pyFTS.models.ensemble import ensemble
|
||||||
|
from pyFTS.partitioners import Grid, Entropy, FCM
|
||||||
|
from pyFTS.benchmarks import Measures, arima, quantreg, knn
|
||||||
|
from pyFTS.models.seasonal import SeasonalIndexer
|
||||||
|
|
||||||
|
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS, arima.ARIMA,
|
||||||
|
ensemble.AllMethodEnsembleFTS, knn.KNearestNeighbors]
|
||||||
|
|
||||||
|
tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
|
||||||
|
|
||||||
|
tmp3 = [Measures.get_distribution_statistics, SeasonalIndexer.SeasonalIndexer, SeasonalIndexer.LinearSeasonalIndexer]
|
||||||
|
|
||||||
|
indexer = kwargs.get('indexer', None)
|
||||||
|
|
||||||
|
steps_ahead = kwargs.get('steps_ahead', 1)
|
||||||
|
method = kwargs.get('method', None)
|
||||||
|
parameters = kwargs.get('parameters', {})
|
||||||
|
|
||||||
|
mfts, pttr = __build_model(fts_method, order, parameters, partitioner_method, partitions, train_data,
|
||||||
|
transformation)
|
||||||
|
|
||||||
|
if mfts.has_seasonality:
|
||||||
|
mfts.indexer = indexer
|
||||||
|
|
||||||
|
_start = time.time()
|
||||||
|
mfts.fit(train_data, **kwargs)
|
||||||
|
_end = time.time()
|
||||||
|
times = _end - _start
|
||||||
|
|
||||||
|
_crps1, _t1, _brier = Measures.get_distribution_statistics(test_data, mfts, **kwargs)
|
||||||
|
_t1 += times
|
||||||
|
|
||||||
|
ret = {'model': mfts.shortname, 'partitioner': pttr, 'order': order, 'partitions': partitions,
|
||||||
|
'transformation': '' if transformation is None else transformation.name,
|
||||||
|
'size': len(mfts), 'time': times,
|
||||||
|
'CRPS': _crps1, 'brier': _brier, 'window': window_key,
|
||||||
|
'steps': steps_ahead, 'method': method}
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def process_point_jobs(dataset, tag, job, conn):
|
def process_point_jobs(dataset, tag, job, conn):
|
||||||
"""
|
"""
|
||||||
Extract information from a dictionary with point benchmark results and save it on a database
|
Extract information from a dictionary with point benchmark results and save it on a database
|
||||||
@ -528,6 +800,32 @@ def process_point_jobs(dataset, tag, job, conn):
|
|||||||
time.extend(["time", job["time"]])
|
time.extend(["time", job["time"]])
|
||||||
bUtil.insert_benchmark(time, conn)
|
bUtil.insert_benchmark(time, conn)
|
||||||
|
|
||||||
|
def process_point_jobs2(dataset, tag, job, conn):
|
||||||
|
"""
|
||||||
|
Extract information from a dictionary with point benchmark results and save it on a database
|
||||||
|
|
||||||
|
:param dataset: the benchmark dataset name
|
||||||
|
:param tag: alias for the benchmark group being executed
|
||||||
|
:param job: a dictionary with the benchmark results
|
||||||
|
:param conn: a connection to a Sqlite database
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
|
||||||
|
data = bUtil.process_common_data2(dataset, tag, 'point',job)
|
||||||
|
|
||||||
|
rmse = deepcopy(data)
|
||||||
|
rmse.extend(["rmse", job["rmse"]])
|
||||||
|
bUtil.insert_benchmark(rmse, conn)
|
||||||
|
smape = deepcopy(data)
|
||||||
|
smape.extend(["smape", job["smape"]])
|
||||||
|
bUtil.insert_benchmark(smape, conn)
|
||||||
|
u = deepcopy(data)
|
||||||
|
u.extend(["u", job["u"]])
|
||||||
|
bUtil.insert_benchmark(u, conn)
|
||||||
|
time = deepcopy(data)
|
||||||
|
time.extend(["time", job["time"]])
|
||||||
|
bUtil.insert_benchmark(time, conn)
|
||||||
|
|
||||||
|
|
||||||
def process_interval_jobs(dataset, tag, job, conn):
|
def process_interval_jobs(dataset, tag, job, conn):
|
||||||
"""
|
"""
|
||||||
@ -574,6 +872,42 @@ def process_interval_jobs(dataset, tag, job, conn):
|
|||||||
bUtil.insert_benchmark(W25, conn)
|
bUtil.insert_benchmark(W25, conn)
|
||||||
|
|
||||||
|
|
||||||
|
def process_interval_jobs2(dataset, tag, job, conn):
|
||||||
|
|
||||||
|
data = bUtil.process_common_data2(dataset, tag, 'interval', job)
|
||||||
|
|
||||||
|
sharpness = deepcopy(data)
|
||||||
|
sharpness.extend(["sharpness", job["sharpness"]])
|
||||||
|
bUtil.insert_benchmark(sharpness, conn)
|
||||||
|
resolution = deepcopy(data)
|
||||||
|
resolution.extend(["resolution", job["resolution"]])
|
||||||
|
bUtil.insert_benchmark(resolution, conn)
|
||||||
|
coverage = deepcopy(data)
|
||||||
|
coverage.extend(["coverage", job["coverage"]])
|
||||||
|
bUtil.insert_benchmark(coverage, conn)
|
||||||
|
time = deepcopy(data)
|
||||||
|
time.extend(["time", job["time"]])
|
||||||
|
bUtil.insert_benchmark(time, conn)
|
||||||
|
Q05 = deepcopy(data)
|
||||||
|
Q05.extend(["Q05", job["Q05"]])
|
||||||
|
bUtil.insert_benchmark(Q05, conn)
|
||||||
|
Q25 = deepcopy(data)
|
||||||
|
Q25.extend(["Q25", job["Q25"]])
|
||||||
|
bUtil.insert_benchmark(Q25, conn)
|
||||||
|
Q75 = deepcopy(data)
|
||||||
|
Q75.extend(["Q75", job["Q75"]])
|
||||||
|
bUtil.insert_benchmark(Q75, conn)
|
||||||
|
Q95 = deepcopy(data)
|
||||||
|
Q95.extend(["Q95", job["Q95"]])
|
||||||
|
bUtil.insert_benchmark(Q95, conn)
|
||||||
|
W05 = deepcopy(data)
|
||||||
|
W05.extend(["winkler05", job["winkler05"]])
|
||||||
|
bUtil.insert_benchmark(W05, conn)
|
||||||
|
W25 = deepcopy(data)
|
||||||
|
W25.extend(["winkler25", job["winkler25"]])
|
||||||
|
bUtil.insert_benchmark(W25, conn)
|
||||||
|
|
||||||
|
|
||||||
def process_probabilistic_jobs(dataset, tag, job, conn):
|
def process_probabilistic_jobs(dataset, tag, job, conn):
|
||||||
"""
|
"""
|
||||||
Extract information from an dictionary with probabilistic benchmark results and save it on a database
|
Extract information from an dictionary with probabilistic benchmark results and save it on a database
|
||||||
@ -598,6 +932,30 @@ def process_probabilistic_jobs(dataset, tag, job, conn):
|
|||||||
bUtil.insert_benchmark(brier, conn)
|
bUtil.insert_benchmark(brier, conn)
|
||||||
|
|
||||||
|
|
||||||
|
def process_probabilistic_jobs2(dataset, tag, job, conn):
|
||||||
|
"""
|
||||||
|
Extract information from an dictionary with probabilistic benchmark results and save it on a database
|
||||||
|
|
||||||
|
:param dataset: the benchmark dataset name
|
||||||
|
:param tag: alias for the benchmark group being executed
|
||||||
|
:param job: a dictionary with the benchmark results
|
||||||
|
:param conn: a connection to a Sqlite database
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
|
||||||
|
data = bUtil.process_common_data2(dataset, tag, 'density', job)
|
||||||
|
|
||||||
|
crps = deepcopy(data)
|
||||||
|
crps.extend(["crps",job["CRPS"]])
|
||||||
|
bUtil.insert_benchmark(crps, conn)
|
||||||
|
time = deepcopy(data)
|
||||||
|
time.extend(["time", job["time"]])
|
||||||
|
bUtil.insert_benchmark(time, conn)
|
||||||
|
brier = deepcopy(data)
|
||||||
|
brier.extend(["brier", job["brier"]])
|
||||||
|
bUtil.insert_benchmark(brier, conn)
|
||||||
|
|
||||||
|
|
||||||
def print_point_statistics(data, models, externalmodels = None, externalforecasts = None, indexers=None):
|
def print_point_statistics(data, models, externalmodels = None, externalforecasts = None, indexers=None):
|
||||||
"""
|
"""
|
||||||
Run point benchmarks on given models and data and print the results
|
Run point benchmarks on given models and data and print the results
|
||||||
@ -672,13 +1030,6 @@ def print_distribution_statistics(original, models, steps, resolution):
|
|||||||
print(ret)
|
print(ret)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def plot_point(axis, points, order, label, color='red', ls='-', linewidth=1):
|
def plot_point(axis, points, order, label, color='red', ls='-', linewidth=1):
|
||||||
mi = min(points) * 0.95
|
mi = min(points) * 0.95
|
||||||
ma = max(points) * 1.05
|
ma = max(points) * 1.05
|
||||||
@ -758,10 +1109,6 @@ def plot_compared_series(original, models, colors, typeonlegend=False, save=Fals
|
|||||||
#Util.show_and_save_image(fig, file, save, lgd=legends)
|
#Util.show_and_save_image(fig, file, save, lgd=legends)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def plotCompared(original, forecasts, labels, title):
|
def plotCompared(original, forecasts, labels, title):
|
||||||
fig = plt.figure(figsize=[13, 6])
|
fig = plt.figure(figsize=[13, 6])
|
||||||
ax = fig.add_subplot(111)
|
ax = fig.add_subplot(111)
|
||||||
|
@ -15,6 +15,7 @@ class Transformation(object):
|
|||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
self.is_invertible = True
|
self.is_invertible = True
|
||||||
self.minimal_length = 1
|
self.minimal_length = 1
|
||||||
|
self.name = ''
|
||||||
|
|
||||||
def apply(self, data, param, **kwargs):
|
def apply(self, data, param, **kwargs):
|
||||||
"""
|
"""
|
||||||
@ -38,7 +39,7 @@ class Transformation(object):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.__class__.__name__ + '(' + str(self.parameters) + ')'
|
return self.name
|
||||||
|
|
||||||
|
|
||||||
class Differential(Transformation):
|
class Differential(Transformation):
|
||||||
@ -49,6 +50,7 @@ class Differential(Transformation):
|
|||||||
super(Differential, self).__init__()
|
super(Differential, self).__init__()
|
||||||
self.lag = lag
|
self.lag = lag
|
||||||
self.minimal_length = 2
|
self.minimal_length = 2
|
||||||
|
self.name = 'Diff'
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def parameters(self):
|
def parameters(self):
|
||||||
@ -128,6 +130,7 @@ class Scale(Transformation):
|
|||||||
self.data_min = None
|
self.data_min = None
|
||||||
self.transf_max = max
|
self.transf_max = max
|
||||||
self.transf_min = min
|
self.transf_min = min
|
||||||
|
self.name = 'Scale'
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def parameters(self):
|
def parameters(self):
|
||||||
@ -167,6 +170,7 @@ class AdaptiveExpectation(Transformation):
|
|||||||
def __init__(self, parameters):
|
def __init__(self, parameters):
|
||||||
super(AdaptiveExpectation, self).__init__(parameters)
|
super(AdaptiveExpectation, self).__init__(parameters)
|
||||||
self.h = parameters
|
self.h = parameters
|
||||||
|
self.name = 'AdaptExpect'
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def parameters(self):
|
def parameters(self):
|
||||||
@ -193,6 +197,7 @@ class BoxCox(Transformation):
|
|||||||
def __init__(self, plambda):
|
def __init__(self, plambda):
|
||||||
super(BoxCox, self).__init__()
|
super(BoxCox, self).__init__()
|
||||||
self.plambda = plambda
|
self.plambda = plambda
|
||||||
|
self.name = 'BoxCox'
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def parameters(self):
|
def parameters(self):
|
||||||
|
@ -305,6 +305,8 @@ class SimpleEnsembleFTS(EnsembleFTS):
|
|||||||
"""Possible variations of order on internal models"""
|
"""Possible variations of order on internal models"""
|
||||||
self.uod_clip = False
|
self.uod_clip = False
|
||||||
|
|
||||||
|
self.shortname = kwargs.get('name', 'EnsembleFTS-' + str(self.method.__module__).split('.')[-1])
|
||||||
|
|
||||||
def train(self, data, **kwargs):
|
def train(self, data, **kwargs):
|
||||||
for k in self.partitions:
|
for k in self.partitions:
|
||||||
fs = self.partitioner_method(data=data, npart=k)
|
fs = self.partitioner_method(data=data, npart=k)
|
||||||
|
@ -62,7 +62,7 @@ class IntervalFTS(hofts.HighOrderFTS):
|
|||||||
if l <= self.order:
|
if l <= self.order:
|
||||||
return ndata
|
return ndata
|
||||||
|
|
||||||
for k in np.arange(self.max_lag, l+1):
|
for k in np.arange(self.max_lag, l):
|
||||||
|
|
||||||
sample = ndata[k - self.max_lag: k]
|
sample = ndata[k - self.max_lag: k]
|
||||||
|
|
||||||
@ -94,9 +94,9 @@ class WeightedIntervalFTS(hofts.WeightedHighOrderFTS):
|
|||||||
Weighted High Order Interval Fuzzy Time Series
|
Weighted High Order Interval Fuzzy Time Series
|
||||||
"""
|
"""
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super(IntervalFTS, self).__init__(**kwargs)
|
super(WeightedIntervalFTS, self).__init__(**kwargs)
|
||||||
self.shortname = "IFTS"
|
self.shortname = "WIFTS"
|
||||||
self.name = "Interval FTS"
|
self.name = "Weighted Interval FTS"
|
||||||
self.detail = "Silva, P.; Guimarães, F.; Sadaei, H. (2016)"
|
self.detail = "Silva, P.; Guimarães, F.; Sadaei, H. (2016)"
|
||||||
self.flrgs = {}
|
self.flrgs = {}
|
||||||
self.has_point_forecasting = False
|
self.has_point_forecasting = False
|
||||||
@ -138,7 +138,7 @@ class WeightedIntervalFTS(hofts.WeightedHighOrderFTS):
|
|||||||
if l <= self.order:
|
if l <= self.order:
|
||||||
return ndata
|
return ndata
|
||||||
|
|
||||||
for k in np.arange(self.max_lag, l+1):
|
for k in np.arange(self.max_lag, l):
|
||||||
|
|
||||||
sample = ndata[k - self.max_lag: k]
|
sample = ndata[k - self.max_lag: k]
|
||||||
|
|
||||||
|
@ -11,146 +11,41 @@ import pandas as pd
|
|||||||
from pyFTS.common import Util as cUtil, FuzzySet
|
from pyFTS.common import Util as cUtil, FuzzySet
|
||||||
from pyFTS.partitioners import Grid, Entropy, Util as pUtil, Simple
|
from pyFTS.partitioners import Grid, Entropy, Util as pUtil, Simple
|
||||||
from pyFTS.benchmarks import benchmarks as bchmk, Measures
|
from pyFTS.benchmarks import benchmarks as bchmk, Measures
|
||||||
from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts, tsaur, song, sadaei
|
from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts, tsaur, song, sadaei, ifts
|
||||||
from pyFTS.models.ensemble import ensemble
|
from pyFTS.models.ensemble import ensemble
|
||||||
from pyFTS.common import Transformations, Membership
|
from pyFTS.common import Transformations, Membership
|
||||||
from pyFTS.benchmarks import arima, quantreg, BSTS, gaussianproc
|
from pyFTS.benchmarks import arima, quantreg, BSTS, gaussianproc
|
||||||
from pyFTS.fcm import fts, common, GA
|
from pyFTS.fcm import fts, common, GA
|
||||||
|
|
||||||
from pyFTS.data import Enrollments, TAIEX
|
from pyFTS.data import TAIEX, NASDAQ, SP500
|
||||||
|
|
||||||
data = TAIEX.get_data()
|
datasets = {}
|
||||||
|
|
||||||
train = data[:800]
|
datasets['TAIEX'] = TAIEX.get_data()[:5000]
|
||||||
test = data[800:1000]
|
datasets['NASDAQ'] = NASDAQ.get_data()[:5000]
|
||||||
|
datasets['SP500'] = SP500.get_data()[10000:15000]
|
||||||
|
|
||||||
#model = ensemble.SimpleEnsembleFTS(fts_method=hofts.HighOrderFTS)
|
methods = [ensemble.SimpleEnsembleFTS]*8
|
||||||
#model = quantreg.QuantileRegression(order=2, dist=True)
|
|
||||||
#model = arima.ARIMA(order = (2,0,0))
|
|
||||||
#model = BSTS.ARIMA(order=(2,0,0))
|
|
||||||
model = gaussianproc.GPR(order=2)
|
|
||||||
model.fit(train)
|
|
||||||
|
|
||||||
horizon=5
|
methods_parameters = [
|
||||||
|
{'name': 'EnsembleFTS-HOFTS-10-.05', 'fts_method': hofts.HighOrderFTS, 'partitions': np.arange(20,50,10), 'alpha': .05},
|
||||||
#points = model.predict(test[:10], type='point', steps_ahead=horizon)
|
{'name': 'EnsembleFTS-HOFTS-5-.05', 'fts_method': hofts.HighOrderFTS, 'partitions': np.arange(20,50,5), 'alpha': .05},
|
||||||
|
{'name': 'EnsembleFTS-HOFTS-10-.25', 'fts_method': hofts.HighOrderFTS, 'partitions': np.arange(20,50,10), 'alpha': .25},
|
||||||
intervals = model.predict(test[:10], type='point', alpha=.05, steps_ahead=horizon)
|
{'name': 'EnsembleFTS-HOFTS-5-.25', 'fts_method': hofts.HighOrderFTS, 'partitions': np.arange(20,50,5), 'alpha': .25},
|
||||||
print(test[:10])
|
{'name': 'EnsembleFTS-WHOFTS-10-.05', 'fts_method': hofts.WeightedHighOrderFTS, 'partitions': np.arange(20,50,10), 'alpha': .05},
|
||||||
print(intervals)
|
{'name': 'EnsembleFTS-WHOFTS-5-.05', 'fts_method': hofts.WeightedHighOrderFTS, 'partitions': np.arange(20,50,5), 'alpha': .05},
|
||||||
#distributions = model.predict(test[:10], type='distribution', steps_ahead=horizon, num_bins=100)
|
{'name': 'EnsembleFTS-WHOFTS-10-.25', 'fts_method': hofts.WeightedHighOrderFTS, 'partitions': np.arange(20,50,10), 'alpha': .25},
|
||||||
|
{'name': 'EnsembleFTS-WHOFTS-5-.25', 'fts_method': hofts.WeightedHighOrderFTS, 'partitions': np.arange(20,50,5), 'alpha': .25},
|
||||||
|
|
||||||
fig, ax = plt.subplots(nrows=1, ncols=1,figsize=[15,5])
|
|
||||||
|
|
||||||
ax.plot(test[:10], label='Original',color='black')
|
|
||||||
cUtil.plot_interval2(intervals, test[:10], start_at=model.order, ax=ax)
|
|
||||||
#cUtil.plot_distribution2(distributions, test[:10], start_at=model.order, ax=ax, cmap="Blues")
|
|
||||||
|
|
||||||
print("")
|
|
||||||
|
|
||||||
'''
|
|
||||||
model = fts.FCM_FTS(partitioner=fs, order=1)
|
|
||||||
|
|
||||||
model.fcm.weights = np.array([
|
|
||||||
[1, 1, 0, -1, -1],
|
|
||||||
[1, 1, 1, 0, -1],
|
|
||||||
[0, 1, 1, 1, 0],
|
|
||||||
[-1, 0, 1, 1, 1],
|
|
||||||
[-1, -1, 0, 1, 1]
|
|
||||||
])
|
|
||||||
|
|
||||||
print(data)
|
|
||||||
print(model.forecast(data))
|
|
||||||
'''
|
|
||||||
'''
|
|
||||||
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
|
|
||||||
|
|
||||||
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
|
|
||||||
|
|
||||||
train_mv = dataset.iloc[:24505]
|
|
||||||
test_mv = dataset.iloc[24505:24605]
|
|
||||||
|
|
||||||
from itertools import product
|
|
||||||
|
|
||||||
levels = ['VL', 'L', 'M', 'H', 'VH']
|
|
||||||
sublevels = [str(k) for k in np.arange(0, 7)]
|
|
||||||
names = []
|
|
||||||
for combination in product(*[levels, sublevels]):
|
|
||||||
names.append(combination[0] + combination[1])
|
|
||||||
|
|
||||||
print(names)
|
|
||||||
|
|
||||||
from pyFTS.models.multivariate import common, variable, mvfts
|
|
||||||
from pyFTS.models.seasonal import partitioner as seasonal
|
|
||||||
from pyFTS.models.seasonal.common import DateTime
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Feb','Mar','Apr','May',
|
|
||||||
'Jun','Jul', 'Aug','Sep','Oct',
|
|
||||||
'Nov','Dec']}
|
|
||||||
|
|
||||||
vmonth = variable.Variable("Month", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=12,
|
|
||||||
data=train_mv, partitioner_specific=sp)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
|
|
||||||
|
|
||||||
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
|
||||||
data=train_mv, partitioner_specific=sp)
|
|
||||||
|
|
||||||
|
|
||||||
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
|
|
||||||
partitioner=Grid.GridPartitioner, npart=35, partitioner_specific={'names': names},
|
|
||||||
data=train_mv)
|
|
||||||
|
|
||||||
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid
|
|
||||||
|
|
||||||
parameters = [
|
|
||||||
{}, {},
|
|
||||||
{'order': 2, 'knn': 1},
|
|
||||||
{'order': 2, 'knn': 2},
|
|
||||||
{'order': 2, 'knn': 3},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid, granular
|
for dataset_name, dataset in datasets.items():
|
||||||
from pyFTS.benchmarks import Measures
|
bchmk.sliding_window_benchmarks2(dataset, 1000, train=0.8, inc=0.2,
|
||||||
|
methods=methods,
|
||||||
time_generator = lambda x : pd.to_datetime(x) + pd.to_timedelta(1, unit='h')
|
methods_parameters=methods_parameters,
|
||||||
|
benchmark_models=False,
|
||||||
model = granular.GranularWMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg, order=2, knn=2)
|
transformations=[None],
|
||||||
|
orders=[3],
|
||||||
model.fit(train_mv)
|
partitions=[None],
|
||||||
|
type='interval',
|
||||||
forecasts = model.predict(test_mv, type='multivariate', generators={'data': time_generator}, steps_ahead=24 )
|
#distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
|
||||||
|
file="tmp.db", dataset=dataset_name, tag="gridsearch")
|
||||||
print(forecasts)
|
|
||||||
|
|
||||||
'''
|
|
||||||
'''
|
|
||||||
from pyFTS.data import lorentz
|
|
||||||
df = lorentz.get_dataframe(iterations=5000)
|
|
||||||
|
|
||||||
train = df.iloc[:4000]
|
|
||||||
test = df.iloc[4000:]
|
|
||||||
|
|
||||||
from pyFTS.models.multivariate import common, variable, mvfts
|
|
||||||
from pyFTS.partitioners import Grid
|
|
||||||
|
|
||||||
vx = variable.Variable("x", data_label="x", alias='x', partitioner=Grid.GridPartitioner, npart=45, data=train)
|
|
||||||
vy = variable.Variable("y", data_label="y", alias='y', partitioner=Grid.GridPartitioner, npart=45, data=train)
|
|
||||||
vz = variable.Variable("z", data_label="z", alias='z', partitioner=Grid.GridPartitioner, npart=45, data=train)
|
|
||||||
|
|
||||||
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid, granular
|
|
||||||
from pyFTS.benchmarks import Measures
|
|
||||||
|
|
||||||
model = granular.GranularWMVFTS(explanatory_variables=[vx, vy, vz], target_variable=vx, order=5, knn=2)
|
|
||||||
|
|
||||||
model.fit(train)
|
|
||||||
|
|
||||||
forecasts = model.predict(test, type='multivariate', steps_ahead=20)
|
|
||||||
|
|
||||||
print(forecasts)
|
|
||||||
'''
|
|
||||||
|
Loading…
Reference in New Issue
Block a user