New version of sliding_window_benchmarks; Bugfixes in benchmarks, ifts and ensemble]

This commit is contained in:
Petrônio Cândido 2019-05-31 18:32:53 -03:00
parent 9b90853f6b
commit 0dc2fabdcc
7 changed files with 427 additions and 154 deletions

View File

@ -397,7 +397,7 @@ def get_interval_statistics(data, model, **kwargs):
forecasts = model.predict(data, **kwargs) forecasts = model.predict(data, **kwargs)
ret.append(round(sharpness(forecasts), 2)) ret.append(round(sharpness(forecasts), 2))
ret.append(round(resolution(forecasts), 2)) ret.append(round(resolution(forecasts), 2))
ret.append(round(coverage(data[model.order:], forecasts[:-1]), 2)) ret.append(round(coverage(data[model.max_lag:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.05, data[model.max_lag:], forecasts[:-1]), 2)) ret.append(round(pinball_mean(0.05, data[model.max_lag:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.25, data[model.max_lag:], forecasts[:-1]), 2)) ret.append(round(pinball_mean(0.25, data[model.max_lag:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.75, data[model.max_lag:], forecasts[:-1]), 2)) ret.append(round(pinball_mean(0.75, data[model.max_lag:], forecasts[:-1]), 2))

View File

@ -108,6 +108,30 @@ def process_common_data(dataset, tag, type, job):
return data return data
def process_common_data2(dataset, tag, type, job):
"""
Wraps benchmark information on a tuple for sqlite database
:param dataset: benchmark dataset
:param tag: benchmark set alias
:param type: forecasting type
:param job: a dictionary with benchmark data
:return: tuple for sqlite database
"""
data = [dataset, tag, type,
job['model'],
job['transformation'],
job['order'],
job['partitioner'],
job['partitions'],
job['size'],
job['steps'],
job['method']
]
return data
def get_dataframe_from_bd(file, filter): def get_dataframe_from_bd(file, filter):
""" """
Query the sqlite benchmark database and return a pandas dataframe with the results Query the sqlite benchmark database and return a pandas dataframe with the results

View File

@ -14,6 +14,7 @@ import matplotlib as plt
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
from mpl_toolkits.mplot3d import Axes3D from mpl_toolkits.mplot3d import Axes3D
from itertools import product
from pyFTS.common import Transformations from pyFTS.common import Transformations
from pyFTS.models import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, hwang from pyFTS.models import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, hwang
@ -67,7 +68,7 @@ def get_benchmark_interval_methods():
def get_interval_methods(): def get_interval_methods():
"""Return all FTS methods for point_to_interval forecasting""" """Return all FTS methods for point_to_interval forecasting"""
return [ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS] return [ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS]
def get_probabilistic_methods(): def get_probabilistic_methods():
@ -80,6 +81,126 @@ def get_benchmark_probabilistic_methods():
return [arima.ARIMA, quantreg.QuantileRegression, knn.KNearestNeighbors] return [arima.ARIMA, quantreg.QuantileRegression, knn.KNearestNeighbors]
def sliding_window_benchmarks2(data, windowsize, train=0.8, **kwargs):
tag = __pop('tag', None, kwargs)
dataset = __pop('dataset', None, kwargs)
distributed = __pop('distributed', False, kwargs)
transformations = kwargs.get('transformations', [None])
type = kwargs.get("type", 'point')
orders = __pop("orders", [1, 2, 3], kwargs)
partitioners_methods = __pop("partitioners_methods", [Grid.GridPartitioner], kwargs)
partitions = __pop("partitions", [10], kwargs)
partitions = [k for k in partitions]
steps_ahead = __pop('steps_ahead', [1], kwargs)
steps_ahead = [k for k in steps_ahead]
fts_methods = __pop('methods', None, kwargs)
methods_parameters = __pop('methods_parameters', None, kwargs)
if fts_methods is None:
if type == 'point':
fts_methods = get_point_methods()
elif type == 'interval':
fts_methods = get_interval_methods()
elif type == 'distribution':
fts_methods = get_probabilistic_methods()
ix_methods = [k for k in np.arange(len(fts_methods))]
benchmark_models = __pop("benchmark_models", False, kwargs)
benchmark_methods = __pop("benchmark_methods", None, kwargs)
benchmark_methods_parameters = __pop("benchmark_methods_parameters", None, kwargs)
if type == 'point':
experiment_method = run_point2
synthesis_method = process_point_jobs2
elif type == 'interval':
experiment_method = run_interval2
synthesis_method = process_interval_jobs2
elif type == 'distribution':
experiment_method = run_probabilistic2
synthesis_method = process_probabilistic_jobs2
else:
raise ValueError("Type parameter has a unkown value!")
if distributed:
import pyFTS.distributed.dispy as dispy
nodes = kwargs.get("nodes", ['127.0.0.1'])
cluster, http_server = dispy.start_dispy_cluster(experiment_method, nodes)
inc = __pop("inc", 0.1, kwargs)
file = kwargs.get('file', "benchmarks.db")
conn = bUtil.open_benchmark_db(file)
jobs = []
for ct, train, test in cUtil.sliding_window(data, windowsize, train, inc=inc, **kwargs):
if benchmark_models:
for bm, method in enumerate(benchmark_methods):
for step in steps_ahead:
kwargs['steps_ahead'] = step
kwargs['parameters'] = benchmark_methods_parameters[bm]
if not distributed:
try:
job = experiment_method(method, None, None, None, train, test, ct, **kwargs)
synthesis_method(dataset, tag, job, conn)
except Exception as ex:
print('EXCEPTION! ', method, benchmark_methods_parameters[bm])
traceback.print_exc()
else:
job = cluster.submit(method, None, None, None, train, test, ct, **kwargs)
jobs.append(job)
else:
params = [ix_methods, orders, partitioners_methods, partitions, transformations, steps_ahead]
for id, instance in enumerate(product(*params)):
fts_method = fts_methods[instance[0]]
kwargs['steps_ahead'] = instance[5]
if methods_parameters is not None:
kwargs['parameters'] = methods_parameters[instance[0]]
if not distributed:
try:
job = experiment_method(fts_method, instance[1], instance[2], instance[3], instance[4], train, test, ct, **kwargs)
synthesis_method(dataset, tag, job, conn)
except Exception as ex:
print('EXCEPTION! ', instance)
traceback.print_exc()
else:
job = cluster.submit(fts_method, instance[1], instance[2], instance[3], instance[4], train, test, ct, **kwargs)
job.id = id
jobs.append(job)
if distributed:
for job in jobs:
job()
if job.status == dispy.dispy.DispyJob.Finished and job is not None:
tmp = job.result
synthesis_method(dataset, tag, tmp, conn)
else:
print("status", job.status)
print("result", job.result)
print("stdout", job.stdout)
print("stderr", job.exception)
cluster.wait() # wait for all jobs to finish
dispy.stop_dispy_cluster(cluster, http_server)
conn.close()
def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs): def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
""" """
Sliding window benchmarks for FTS forecasters. Sliding window benchmarks for FTS forecasters.
@ -314,8 +435,6 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
conn.close() conn.close()
def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwargs): def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwargs):
""" """
Run the point forecasting benchmarks Run the point forecasting benchmarks
@ -502,6 +621,159 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
return ret return ret
def __build_model(fts_method, order, parameters, partitioner_method, partitions, train_data, transformation):
mfts = fts_method(**parameters)
if mfts.benchmark_only or mfts.is_wrapper:
pttr = ''
else:
fs = partitioner_method(npart=partitions, data=train_data, transformation=transformation)
pttr = str(fs.__module__).split('.')[-1]
if order > 1:
mfts = fts_method(partitioner=fs, order=order, **parameters)
else:
mfts.partitioner = fs
if transformation is not None:
mfts.append_transformation(transformation)
return mfts, pttr
def run_point2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
import time
from pyFTS.models import yu, chen, hofts, pwfts,ismailefendi,sadaei, song, cheng, hwang
from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, naive, arima, quantreg
from pyFTS.common import Transformations
tmp = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, hwang.HighOrderFTS,
pwfts.ProbabilisticWeightedFTS]
tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
tmp4 = [naive.Naive, arima.ARIMA, quantreg.QuantileRegression]
tmp3 = [Measures.get_point_statistics]
tmp5 = [Transformations.Differential]
indexer = kwargs.get('indexer', None)
steps_ahead = kwargs.get('steps_ahead', 1)
method = kwargs.get('method', None)
parameters = kwargs.get('parameters', {})
mfts, pttr = __build_model(fts_method, order, parameters, partitioner_method, partitions, train_data,
transformation)
_start = time.time()
mfts.fit(train_data, **kwargs)
_end = time.time()
times = _end - _start
_start = time.time()
_rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts, **kwargs)
_end = time.time()
times += _end - _start
ret = {'model': mfts.shortname, 'partitioner': pttr, 'order': order, 'partitions': partitions,
'transformation': '' if transformation is None else transformation.name,
'size': len(mfts), 'time': times,
'rmse': _rmse, 'smape': _smape, 'u': _u, 'window': window_key,
'steps': steps_ahead, 'method': method}
return ret
def run_interval2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
import time
from pyFTS.models import hofts,ifts,pwfts
from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, arima, quantreg, BSTS
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS]
tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
tmp4 = [arima.ARIMA, quantreg.QuantileRegression, BSTS.ARIMA]
tmp3 = [Measures.get_interval_statistics]
steps_ahead = kwargs.get('steps_ahead', 1)
method = kwargs.get('method', None)
parameters = kwargs.get('parameters',{})
mfts, pttr = __build_model(fts_method, order, parameters, partitioner_method, partitions, train_data,
transformation)
_start = time.time()
mfts.fit(train_data, **kwargs)
_end = time.time()
times = _end - _start
_start = time.time()
#_sharp, _res, _cov, _q05, _q25, _q75, _q95, _w05, _w25
metrics = Measures.get_interval_statistics(test_data, mfts, **kwargs)
_end = time.time()
times += _end - _start
ret = {'model': mfts.shortname, 'partitioner': pttr, 'order': order, 'partitions': partitions,
'transformation': '' if transformation is None else transformation.name,
'size': len(mfts), 'time': times,
'sharpness': metrics[0], 'resolution': metrics[1], 'coverage': metrics[2],
'time': times,'Q05': metrics[3], 'Q25': metrics[4], 'Q75': metrics[5], 'Q95': metrics[6],
'winkler05': metrics[7], 'winkler25': metrics[8],
'window': window_key,'steps': steps_ahead, 'method': method}
return ret
def run_probabilistic2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs):
import time
import numpy as np
from pyFTS.models import hofts, ifts, pwfts
from pyFTS.models.ensemble import ensemble
from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, arima, quantreg, knn
from pyFTS.models.seasonal import SeasonalIndexer
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS, arima.ARIMA,
ensemble.AllMethodEnsembleFTS, knn.KNearestNeighbors]
tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
tmp3 = [Measures.get_distribution_statistics, SeasonalIndexer.SeasonalIndexer, SeasonalIndexer.LinearSeasonalIndexer]
indexer = kwargs.get('indexer', None)
steps_ahead = kwargs.get('steps_ahead', 1)
method = kwargs.get('method', None)
parameters = kwargs.get('parameters', {})
mfts, pttr = __build_model(fts_method, order, parameters, partitioner_method, partitions, train_data,
transformation)
if mfts.has_seasonality:
mfts.indexer = indexer
_start = time.time()
mfts.fit(train_data, **kwargs)
_end = time.time()
times = _end - _start
_crps1, _t1, _brier = Measures.get_distribution_statistics(test_data, mfts, **kwargs)
_t1 += times
ret = {'model': mfts.shortname, 'partitioner': pttr, 'order': order, 'partitions': partitions,
'transformation': '' if transformation is None else transformation.name,
'size': len(mfts), 'time': times,
'CRPS': _crps1, 'brier': _brier, 'window': window_key,
'steps': steps_ahead, 'method': method}
return ret
def process_point_jobs(dataset, tag, job, conn): def process_point_jobs(dataset, tag, job, conn):
""" """
Extract information from a dictionary with point benchmark results and save it on a database Extract information from a dictionary with point benchmark results and save it on a database
@ -528,6 +800,32 @@ def process_point_jobs(dataset, tag, job, conn):
time.extend(["time", job["time"]]) time.extend(["time", job["time"]])
bUtil.insert_benchmark(time, conn) bUtil.insert_benchmark(time, conn)
def process_point_jobs2(dataset, tag, job, conn):
"""
Extract information from a dictionary with point benchmark results and save it on a database
:param dataset: the benchmark dataset name
:param tag: alias for the benchmark group being executed
:param job: a dictionary with the benchmark results
:param conn: a connection to a Sqlite database
:return:
"""
data = bUtil.process_common_data2(dataset, tag, 'point',job)
rmse = deepcopy(data)
rmse.extend(["rmse", job["rmse"]])
bUtil.insert_benchmark(rmse, conn)
smape = deepcopy(data)
smape.extend(["smape", job["smape"]])
bUtil.insert_benchmark(smape, conn)
u = deepcopy(data)
u.extend(["u", job["u"]])
bUtil.insert_benchmark(u, conn)
time = deepcopy(data)
time.extend(["time", job["time"]])
bUtil.insert_benchmark(time, conn)
def process_interval_jobs(dataset, tag, job, conn): def process_interval_jobs(dataset, tag, job, conn):
""" """
@ -574,6 +872,42 @@ def process_interval_jobs(dataset, tag, job, conn):
bUtil.insert_benchmark(W25, conn) bUtil.insert_benchmark(W25, conn)
def process_interval_jobs2(dataset, tag, job, conn):
data = bUtil.process_common_data2(dataset, tag, 'interval', job)
sharpness = deepcopy(data)
sharpness.extend(["sharpness", job["sharpness"]])
bUtil.insert_benchmark(sharpness, conn)
resolution = deepcopy(data)
resolution.extend(["resolution", job["resolution"]])
bUtil.insert_benchmark(resolution, conn)
coverage = deepcopy(data)
coverage.extend(["coverage", job["coverage"]])
bUtil.insert_benchmark(coverage, conn)
time = deepcopy(data)
time.extend(["time", job["time"]])
bUtil.insert_benchmark(time, conn)
Q05 = deepcopy(data)
Q05.extend(["Q05", job["Q05"]])
bUtil.insert_benchmark(Q05, conn)
Q25 = deepcopy(data)
Q25.extend(["Q25", job["Q25"]])
bUtil.insert_benchmark(Q25, conn)
Q75 = deepcopy(data)
Q75.extend(["Q75", job["Q75"]])
bUtil.insert_benchmark(Q75, conn)
Q95 = deepcopy(data)
Q95.extend(["Q95", job["Q95"]])
bUtil.insert_benchmark(Q95, conn)
W05 = deepcopy(data)
W05.extend(["winkler05", job["winkler05"]])
bUtil.insert_benchmark(W05, conn)
W25 = deepcopy(data)
W25.extend(["winkler25", job["winkler25"]])
bUtil.insert_benchmark(W25, conn)
def process_probabilistic_jobs(dataset, tag, job, conn): def process_probabilistic_jobs(dataset, tag, job, conn):
""" """
Extract information from an dictionary with probabilistic benchmark results and save it on a database Extract information from an dictionary with probabilistic benchmark results and save it on a database
@ -598,6 +932,30 @@ def process_probabilistic_jobs(dataset, tag, job, conn):
bUtil.insert_benchmark(brier, conn) bUtil.insert_benchmark(brier, conn)
def process_probabilistic_jobs2(dataset, tag, job, conn):
"""
Extract information from an dictionary with probabilistic benchmark results and save it on a database
:param dataset: the benchmark dataset name
:param tag: alias for the benchmark group being executed
:param job: a dictionary with the benchmark results
:param conn: a connection to a Sqlite database
:return:
"""
data = bUtil.process_common_data2(dataset, tag, 'density', job)
crps = deepcopy(data)
crps.extend(["crps",job["CRPS"]])
bUtil.insert_benchmark(crps, conn)
time = deepcopy(data)
time.extend(["time", job["time"]])
bUtil.insert_benchmark(time, conn)
brier = deepcopy(data)
brier.extend(["brier", job["brier"]])
bUtil.insert_benchmark(brier, conn)
def print_point_statistics(data, models, externalmodels = None, externalforecasts = None, indexers=None): def print_point_statistics(data, models, externalmodels = None, externalforecasts = None, indexers=None):
""" """
Run point benchmarks on given models and data and print the results Run point benchmarks on given models and data and print the results
@ -672,13 +1030,6 @@ def print_distribution_statistics(original, models, steps, resolution):
print(ret) print(ret)
def plot_point(axis, points, order, label, color='red', ls='-', linewidth=1): def plot_point(axis, points, order, label, color='red', ls='-', linewidth=1):
mi = min(points) * 0.95 mi = min(points) * 0.95
ma = max(points) * 1.05 ma = max(points) * 1.05
@ -758,10 +1109,6 @@ def plot_compared_series(original, models, colors, typeonlegend=False, save=Fals
#Util.show_and_save_image(fig, file, save, lgd=legends) #Util.show_and_save_image(fig, file, save, lgd=legends)
def plotCompared(original, forecasts, labels, title): def plotCompared(original, forecasts, labels, title):
fig = plt.figure(figsize=[13, 6]) fig = plt.figure(figsize=[13, 6])
ax = fig.add_subplot(111) ax = fig.add_subplot(111)

View File

@ -15,6 +15,7 @@ class Transformation(object):
def __init__(self, **kwargs): def __init__(self, **kwargs):
self.is_invertible = True self.is_invertible = True
self.minimal_length = 1 self.minimal_length = 1
self.name = ''
def apply(self, data, param, **kwargs): def apply(self, data, param, **kwargs):
""" """
@ -38,7 +39,7 @@ class Transformation(object):
pass pass
def __str__(self): def __str__(self):
return self.__class__.__name__ + '(' + str(self.parameters) + ')' return self.name
class Differential(Transformation): class Differential(Transformation):
@ -49,6 +50,7 @@ class Differential(Transformation):
super(Differential, self).__init__() super(Differential, self).__init__()
self.lag = lag self.lag = lag
self.minimal_length = 2 self.minimal_length = 2
self.name = 'Diff'
@property @property
def parameters(self): def parameters(self):
@ -128,6 +130,7 @@ class Scale(Transformation):
self.data_min = None self.data_min = None
self.transf_max = max self.transf_max = max
self.transf_min = min self.transf_min = min
self.name = 'Scale'
@property @property
def parameters(self): def parameters(self):
@ -167,6 +170,7 @@ class AdaptiveExpectation(Transformation):
def __init__(self, parameters): def __init__(self, parameters):
super(AdaptiveExpectation, self).__init__(parameters) super(AdaptiveExpectation, self).__init__(parameters)
self.h = parameters self.h = parameters
self.name = 'AdaptExpect'
@property @property
def parameters(self): def parameters(self):
@ -193,6 +197,7 @@ class BoxCox(Transformation):
def __init__(self, plambda): def __init__(self, plambda):
super(BoxCox, self).__init__() super(BoxCox, self).__init__()
self.plambda = plambda self.plambda = plambda
self.name = 'BoxCox'
@property @property
def parameters(self): def parameters(self):

View File

@ -305,6 +305,8 @@ class SimpleEnsembleFTS(EnsembleFTS):
"""Possible variations of order on internal models""" """Possible variations of order on internal models"""
self.uod_clip = False self.uod_clip = False
self.shortname = kwargs.get('name', 'EnsembleFTS-' + str(self.method.__module__).split('.')[-1])
def train(self, data, **kwargs): def train(self, data, **kwargs):
for k in self.partitions: for k in self.partitions:
fs = self.partitioner_method(data=data, npart=k) fs = self.partitioner_method(data=data, npart=k)

View File

@ -62,7 +62,7 @@ class IntervalFTS(hofts.HighOrderFTS):
if l <= self.order: if l <= self.order:
return ndata return ndata
for k in np.arange(self.max_lag, l+1): for k in np.arange(self.max_lag, l):
sample = ndata[k - self.max_lag: k] sample = ndata[k - self.max_lag: k]
@ -94,9 +94,9 @@ class WeightedIntervalFTS(hofts.WeightedHighOrderFTS):
Weighted High Order Interval Fuzzy Time Series Weighted High Order Interval Fuzzy Time Series
""" """
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(IntervalFTS, self).__init__(**kwargs) super(WeightedIntervalFTS, self).__init__(**kwargs)
self.shortname = "IFTS" self.shortname = "WIFTS"
self.name = "Interval FTS" self.name = "Weighted Interval FTS"
self.detail = "Silva, P.; Guimarães, F.; Sadaei, H. (2016)" self.detail = "Silva, P.; Guimarães, F.; Sadaei, H. (2016)"
self.flrgs = {} self.flrgs = {}
self.has_point_forecasting = False self.has_point_forecasting = False
@ -138,7 +138,7 @@ class WeightedIntervalFTS(hofts.WeightedHighOrderFTS):
if l <= self.order: if l <= self.order:
return ndata return ndata
for k in np.arange(self.max_lag, l+1): for k in np.arange(self.max_lag, l):
sample = ndata[k - self.max_lag: k] sample = ndata[k - self.max_lag: k]
@ -162,4 +162,4 @@ class WeightedIntervalFTS(hofts.WeightedHighOrderFTS):
up_ = sum(up) / norm up_ = sum(up) / norm
ret.append([lo_, up_]) ret.append([lo_, up_])
return ret return ret

View File

@ -11,146 +11,41 @@ import pandas as pd
from pyFTS.common import Util as cUtil, FuzzySet from pyFTS.common import Util as cUtil, FuzzySet
from pyFTS.partitioners import Grid, Entropy, Util as pUtil, Simple from pyFTS.partitioners import Grid, Entropy, Util as pUtil, Simple
from pyFTS.benchmarks import benchmarks as bchmk, Measures from pyFTS.benchmarks import benchmarks as bchmk, Measures
from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts, tsaur, song, sadaei from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts, tsaur, song, sadaei, ifts
from pyFTS.models.ensemble import ensemble from pyFTS.models.ensemble import ensemble
from pyFTS.common import Transformations, Membership from pyFTS.common import Transformations, Membership
from pyFTS.benchmarks import arima, quantreg, BSTS, gaussianproc from pyFTS.benchmarks import arima, quantreg, BSTS, gaussianproc
from pyFTS.fcm import fts, common, GA from pyFTS.fcm import fts, common, GA
from pyFTS.data import Enrollments, TAIEX from pyFTS.data import TAIEX, NASDAQ, SP500
data = TAIEX.get_data() datasets = {}
train = data[:800] datasets['TAIEX'] = TAIEX.get_data()[:5000]
test = data[800:1000] datasets['NASDAQ'] = NASDAQ.get_data()[:5000]
datasets['SP500'] = SP500.get_data()[10000:15000]
#model = ensemble.SimpleEnsembleFTS(fts_method=hofts.HighOrderFTS) methods = [ensemble.SimpleEnsembleFTS]*8
#model = quantreg.QuantileRegression(order=2, dist=True)
#model = arima.ARIMA(order = (2,0,0))
#model = BSTS.ARIMA(order=(2,0,0))
model = gaussianproc.GPR(order=2)
model.fit(train)
horizon=5 methods_parameters = [
{'name': 'EnsembleFTS-HOFTS-10-.05', 'fts_method': hofts.HighOrderFTS, 'partitions': np.arange(20,50,10), 'alpha': .05},
#points = model.predict(test[:10], type='point', steps_ahead=horizon) {'name': 'EnsembleFTS-HOFTS-5-.05', 'fts_method': hofts.HighOrderFTS, 'partitions': np.arange(20,50,5), 'alpha': .05},
{'name': 'EnsembleFTS-HOFTS-10-.25', 'fts_method': hofts.HighOrderFTS, 'partitions': np.arange(20,50,10), 'alpha': .25},
intervals = model.predict(test[:10], type='point', alpha=.05, steps_ahead=horizon) {'name': 'EnsembleFTS-HOFTS-5-.25', 'fts_method': hofts.HighOrderFTS, 'partitions': np.arange(20,50,5), 'alpha': .25},
print(test[:10]) {'name': 'EnsembleFTS-WHOFTS-10-.05', 'fts_method': hofts.WeightedHighOrderFTS, 'partitions': np.arange(20,50,10), 'alpha': .05},
print(intervals) {'name': 'EnsembleFTS-WHOFTS-5-.05', 'fts_method': hofts.WeightedHighOrderFTS, 'partitions': np.arange(20,50,5), 'alpha': .05},
#distributions = model.predict(test[:10], type='distribution', steps_ahead=horizon, num_bins=100) {'name': 'EnsembleFTS-WHOFTS-10-.25', 'fts_method': hofts.WeightedHighOrderFTS, 'partitions': np.arange(20,50,10), 'alpha': .25},
{'name': 'EnsembleFTS-WHOFTS-5-.25', 'fts_method': hofts.WeightedHighOrderFTS, 'partitions': np.arange(20,50,5), 'alpha': .25},
fig, ax = plt.subplots(nrows=1, ncols=1,figsize=[15,5])
ax.plot(test[:10], label='Original',color='black')
cUtil.plot_interval2(intervals, test[:10], start_at=model.order, ax=ax)
#cUtil.plot_distribution2(distributions, test[:10], start_at=model.order, ax=ax, cmap="Blues")
print("")
'''
model = fts.FCM_FTS(partitioner=fs, order=1)
model.fcm.weights = np.array([
[1, 1, 0, -1, -1],
[1, 1, 1, 0, -1],
[0, 1, 1, 1, 0],
[-1, 0, 1, 1, 1],
[-1, -1, 0, 1, 1]
])
print(data)
print(model.forecast(data))
'''
'''
dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
train_mv = dataset.iloc[:24505]
test_mv = dataset.iloc[24505:24605]
from itertools import product
levels = ['VL', 'L', 'M', 'H', 'VH']
sublevels = [str(k) for k in np.arange(0, 7)]
names = []
for combination in product(*[levels, sublevels]):
names.append(combination[0] + combination[1])
print(names)
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Feb','Mar','Apr','May',
'Jun','Jul', 'Aug','Sep','Oct',
'Nov','Dec']}
vmonth = variable.Variable("Month", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=12,
data=train_mv, partitioner_specific=sp)
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train_mv, partitioner_specific=sp)
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
partitioner=Grid.GridPartitioner, npart=35, partitioner_specific={'names': names},
data=train_mv)
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid
parameters = [
{}, {},
{'order': 2, 'knn': 1},
{'order': 2, 'knn': 2},
{'order': 2, 'knn': 3},
] ]
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid, granular for dataset_name, dataset in datasets.items():
from pyFTS.benchmarks import Measures bchmk.sliding_window_benchmarks2(dataset, 1000, train=0.8, inc=0.2,
methods=methods,
time_generator = lambda x : pd.to_datetime(x) + pd.to_timedelta(1, unit='h') methods_parameters=methods_parameters,
benchmark_models=False,
model = granular.GranularWMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg, order=2, knn=2) transformations=[None],
orders=[3],
model.fit(train_mv) partitions=[None],
type='interval',
forecasts = model.predict(test_mv, type='multivariate', generators={'data': time_generator}, steps_ahead=24 ) #distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
file="tmp.db", dataset=dataset_name, tag="gridsearch")
print(forecasts)
'''
'''
from pyFTS.data import lorentz
df = lorentz.get_dataframe(iterations=5000)
train = df.iloc[:4000]
test = df.iloc[4000:]
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.partitioners import Grid
vx = variable.Variable("x", data_label="x", alias='x', partitioner=Grid.GridPartitioner, npart=45, data=train)
vy = variable.Variable("y", data_label="y", alias='y', partitioner=Grid.GridPartitioner, npart=45, data=train)
vz = variable.Variable("z", data_label="z", alias='z', partitioner=Grid.GridPartitioner, npart=45, data=train)
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid, granular
from pyFTS.benchmarks import Measures
model = granular.GranularWMVFTS(explanatory_variables=[vx, vy, vz], target_variable=vx, order=5, knn=2)
model.fit(train)
forecasts = model.predict(test, type='multivariate', steps_ahead=20)
print(forecasts)
'''