Minor code standadization on models; Improvements and bugfixes on benchmarks
This commit is contained in:
parent
191ddf90d8
commit
ffd97bacfc
@ -205,14 +205,35 @@ def pinball_mean(tau, targets, forecasts):
|
||||
:param forecasts: list of prediction intervals
|
||||
:return: float, the pinball loss mean for tau quantile
|
||||
"""
|
||||
try:
|
||||
if tau <= 0.5:
|
||||
preds = [pinball(tau, targets[i], forecasts[i][0]) for i in np.arange(0, len(forecasts))]
|
||||
else:
|
||||
preds = [pinball(tau, targets[i], forecasts[i][1]) for i in np.arange(0, len(forecasts))]
|
||||
return np.nanmean(preds)
|
||||
except Exception as ex:
|
||||
print(ex)
|
||||
|
||||
|
||||
def winkler_score(tau, target, forecast):
|
||||
'''R. L. Winkler, A Decision-Theoretic Approach to Interval Estimation, J. Am. Stat. Assoc. 67 (337) (1972) 187–191. doi:10.2307/2284720. '''
|
||||
delta = forecast[1] - forecast[0]
|
||||
if forecast[0] < target and target < forecast[1]:
|
||||
return delta
|
||||
elif forecast[0] > target:
|
||||
return delta + 2*(forecast[0] - target)/tau
|
||||
elif forecast[1] < target:
|
||||
return delta + 2*(target - forecast[1])/tau
|
||||
|
||||
|
||||
def winkler_mean(tau, targets, forecasts):
|
||||
"""
|
||||
Mean Winkler score value of the forecast for a given tau-quantile of the targets
|
||||
:param tau: quantile value in the range (0,1)
|
||||
:param targets: list of target values
|
||||
:param forecasts: list of prediction intervals
|
||||
:return: float, the Winkler score mean for tau quantile
|
||||
"""
|
||||
preds = [winkler_score(tau, targets[i], forecasts[i]) for i in np.arange(0, len(forecasts))]
|
||||
|
||||
return np.nanmean(preds)
|
||||
|
||||
|
||||
def brier_score(targets, densities):
|
||||
@ -348,6 +369,8 @@ def get_interval_statistics(data, model, **kwargs):
|
||||
ret.append(round(pinball_mean(0.25, data[model.order:], forecasts[:-1]), 2))
|
||||
ret.append(round(pinball_mean(0.75, data[model.order:], forecasts[:-1]), 2))
|
||||
ret.append(round(pinball_mean(0.95, data[model.order:], forecasts[:-1]), 2))
|
||||
ret.append(round(winkler_mean(0.05, data[model.order:], forecasts[:-1]), 2))
|
||||
ret.append(round(winkler_mean(0.25, data[model.order:], forecasts[:-1]), 2))
|
||||
else:
|
||||
forecasts = []
|
||||
for k in np.arange(model.order, len(data) - steps_ahead):
|
||||
@ -363,6 +386,8 @@ def get_interval_statistics(data, model, **kwargs):
|
||||
ret.append(round(pinball_mean(0.25, data[start:], forecasts), 2))
|
||||
ret.append(round(pinball_mean(0.75, data[start:], forecasts), 2))
|
||||
ret.append(round(pinball_mean(0.95, data[start:], forecasts), 2))
|
||||
ret.append(round(winkler_mean(0.05, data[start:], forecasts), 2))
|
||||
ret.append(round(winkler_mean(0.25, data[start:], forecasts), 2))
|
||||
return ret
|
||||
|
||||
|
||||
|
@ -18,6 +18,11 @@ from pyFTS.common import Util
|
||||
|
||||
def open_benchmark_db(name):
|
||||
conn = sqlite3.connect(name)
|
||||
|
||||
#performance optimizations
|
||||
conn.execute("PRAGMA journal_mode = WAL")
|
||||
conn.execute("PRAGMA synchronous = NORMAL")
|
||||
|
||||
create_benchmark_tables(conn)
|
||||
return conn
|
||||
|
||||
@ -31,7 +36,6 @@ def create_benchmark_tables(conn):
|
||||
Scheme text, Partitions int,
|
||||
Size int, Steps int, Method text, Measure text, Value real)''')
|
||||
|
||||
# Save (commit) the changes
|
||||
conn.commit()
|
||||
|
||||
|
||||
|
@ -13,8 +13,8 @@ class ARIMA(fts.FTS):
|
||||
"""
|
||||
Façade for statsmodels.tsa.arima_model
|
||||
"""
|
||||
def __init__(self, name, **kwargs):
|
||||
super(ARIMA, self).__init__(1, "ARIMA"+name)
|
||||
def __init__(self, **kwargs):
|
||||
super(ARIMA, self).__init__(**kwargs)
|
||||
self.name = "ARIMA"
|
||||
self.detail = "Auto Regressive Integrated Moving Average"
|
||||
self.is_high_order = True
|
||||
@ -44,11 +44,8 @@ class ARIMA(fts.FTS):
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
|
||||
self.original_min = np.nanmin(data)
|
||||
self.original_max = np.nanmax(data)
|
||||
|
||||
if kwargs.get('order', None) is not None:
|
||||
order = kwargs.get('order', (1,0,0))
|
||||
if 'order' in kwargs:
|
||||
order = kwargs.pop('order')
|
||||
self._decompose_order(order)
|
||||
|
||||
if self.indexer is not None:
|
||||
|
@ -156,12 +156,12 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||
|
||||
if models is None:
|
||||
for method in methods:
|
||||
mfts = method("")
|
||||
mfts = method()
|
||||
|
||||
if mfts.is_high_order:
|
||||
for order in orders:
|
||||
if order >= mfts.min_order:
|
||||
mfts = method("")
|
||||
mfts = method()
|
||||
mfts.order = order
|
||||
pool.append(mfts)
|
||||
else:
|
||||
@ -190,7 +190,7 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||
for transformation in transformations:
|
||||
for count, model in enumerate(benchmark_methods, start=0):
|
||||
par = benchmark_methods_parameters[count]
|
||||
mfts = model("", **par)
|
||||
mfts = model(**par)
|
||||
mfts.append_transformation(transformation)
|
||||
benchmark_pool.append(mfts)
|
||||
|
||||
@ -203,6 +203,8 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||
elif type == 'distribution':
|
||||
experiment_method = run_probabilistic
|
||||
synthesis_method = process_probabilistic_jobs
|
||||
else:
|
||||
raise ValueError("Type parameter has a unkown value!")
|
||||
|
||||
if distributed:
|
||||
import dispy, dispy.httpd
|
||||
@ -213,28 +215,29 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||
experiments = 0
|
||||
jobs = []
|
||||
|
||||
inc = __pop("inc", 0.1, kwargs)
|
||||
|
||||
if progress:
|
||||
from tqdm import tqdm
|
||||
progressbar = tqdm(total=len(data), desc="Sliding Window:")
|
||||
|
||||
inc = __pop("inc", 0.1, kwargs)
|
||||
_tdata = len(data) / (windowsize * inc)
|
||||
_tasks = (len(partitioners_models) * len(orders) * len(partitions) * len(transformations) * len(steps_ahead))
|
||||
_tbcmk = len(benchmark_pool)*len(steps_ahead)
|
||||
progressbar = tqdm(total=_tdata*_tasks + _tdata*_tbcmk, desc="Benchmarks:")
|
||||
|
||||
file = kwargs.get('file', "benchmarks.db")
|
||||
|
||||
conn = bUtil.open_benchmark_db(file)
|
||||
|
||||
for ct, train, test in cUtil.sliding_window(data, windowsize, train, inc=inc, **kwargs):
|
||||
experiments += 1
|
||||
|
||||
if progress:
|
||||
progressbar.update(windowsize * inc)
|
||||
|
||||
if benchmark_models != False:
|
||||
for model in benchmark_pool:
|
||||
for step in steps_ahead:
|
||||
|
||||
kwargs['steps_ahead'] = step
|
||||
|
||||
if not distributed:
|
||||
if progress:
|
||||
progressbar.update(1)
|
||||
job = experiment_method(deepcopy(model), None, train, test, **kwargs)
|
||||
synthesis_method(dataset, tag, job, conn)
|
||||
else:
|
||||
@ -257,28 +260,17 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||
else:
|
||||
partitioners_pool = partitioners_models
|
||||
|
||||
rng1 = steps_ahead
|
||||
if progress:
|
||||
rng1 = tqdm(steps_ahead, desc="Steps")
|
||||
for step in steps_ahead:
|
||||
|
||||
for step in rng1:
|
||||
rng2 = partitioners_pool
|
||||
for partitioner in partitioners_pool:
|
||||
|
||||
if progress:
|
||||
rng2 = tqdm(partitioners_pool, desc="Partitioners")
|
||||
|
||||
for partitioner in rng2:
|
||||
|
||||
rng3 = enumerate(pool,start=0)
|
||||
|
||||
if progress:
|
||||
rng3 = enumerate(tqdm(pool, desc="Models"),start=0)
|
||||
|
||||
for _id, model in rng3:
|
||||
for _id, model in enumerate(pool,start=0):
|
||||
|
||||
kwargs['steps_ahead'] = step
|
||||
|
||||
if not distributed:
|
||||
if progress:
|
||||
progressbar.update(1)
|
||||
job = experiment_method(deepcopy(model), deepcopy(partitioner), train, test, **kwargs)
|
||||
synthesis_method(dataset, tag, job, conn)
|
||||
else:
|
||||
@ -291,12 +283,9 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||
|
||||
if distributed:
|
||||
|
||||
rng = jobs
|
||||
|
||||
for job in jobs:
|
||||
if progress:
|
||||
rng = tqdm(jobs)
|
||||
|
||||
for job in rng:
|
||||
progressbar.update(1)
|
||||
job()
|
||||
if job.status == dispy.DispyJob.Finished and job is not None:
|
||||
tmp = job.result
|
||||
@ -424,13 +413,15 @@ def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kw
|
||||
times = _end - _start
|
||||
|
||||
_start = time.time()
|
||||
_sharp, _res, _cov, _q05, _q25, _q75, _q95 = Measures.get_interval_statistics(test_data, mfts, **kwargs)
|
||||
#_sharp, _res, _cov, _q05, _q25, _q75, _q95, _w05, _w25
|
||||
metrics = Measures.get_interval_statistics(test_data, mfts, **kwargs)
|
||||
_end = time.time()
|
||||
times += _end - _start
|
||||
|
||||
ret = {'key': _key, 'obj': mfts, 'sharpness': _sharp, 'resolution': _res, 'coverage': _cov, 'time': times,
|
||||
'Q05': _q05, 'Q25': _q25, 'Q75': _q75, 'Q95': _q95, 'window': window_key,
|
||||
'steps': steps_ahead, 'method': method}
|
||||
ret = {'key': _key, 'obj': mfts, 'sharpness': metrics[0], 'resolution': metrics[1], 'coverage': metrics[2],
|
||||
'time': times,'Q05': metrics[3], 'Q25': metrics[4], 'Q75': metrics[5], 'Q95': metrics[6],
|
||||
'winkler05': metrics[7], 'winkler25': metrics[8],
|
||||
'window': window_key,'steps': steps_ahead, 'method': method}
|
||||
|
||||
return ret
|
||||
|
||||
@ -543,6 +534,12 @@ def process_interval_jobs(dataset, tag, job, conn):
|
||||
Q95 = deepcopy(data)
|
||||
Q95.extend(["Q95", job["Q95"]])
|
||||
bUtil.insert_benchmark(Q95, conn)
|
||||
W05 = deepcopy(data)
|
||||
W05.extend(["winkler05", job["winkler05"]])
|
||||
bUtil.insert_benchmark(W05, conn)
|
||||
W25 = deepcopy(data)
|
||||
W25.extend(["winkler25", job["winkler25"]])
|
||||
bUtil.insert_benchmark(W25, conn)
|
||||
|
||||
|
||||
def process_probabilistic_jobs(dataset, tag, job, conn):
|
||||
|
@ -11,8 +11,8 @@ class KNearestNeighbors(fts.FTS):
|
||||
"""
|
||||
K-Nearest Neighbors
|
||||
"""
|
||||
def __init__(self, name, **kwargs):
|
||||
super(KNearestNeighbors, self).__init__(1, "kNN"+name)
|
||||
def __init__(self, **kwargs):
|
||||
super(KNearestNeighbors, self).__init__(**kwargs)
|
||||
self.name = "kNN"
|
||||
self.shortname = "kNN"
|
||||
self.detail = "K-Nearest Neighbors"
|
||||
@ -23,20 +23,12 @@ class KNearestNeighbors(fts.FTS):
|
||||
self.benchmark_only = True
|
||||
self.min_order = 1
|
||||
self.alpha = kwargs.get("alpha", 0.05)
|
||||
self.order = kwargs.get("order", 1)
|
||||
self.lag = None
|
||||
self.k = kwargs.get("k", 30)
|
||||
self.uod = None
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
if kwargs.get('order', None) is not None:
|
||||
self.order = kwargs.get('order', 1)
|
||||
|
||||
self.data = np.array(data)
|
||||
self.original_max = max(data)
|
||||
self.original_min = min(data)
|
||||
|
||||
#self.lagdata, = lagmat(data, maxlag=self.order, trim="both", original='sep')
|
||||
|
||||
|
||||
def knn(self, sample):
|
||||
|
@ -6,8 +6,8 @@ from pyFTS.common import fts
|
||||
|
||||
class Naive(fts.FTS):
|
||||
"""Naïve Forecasting method"""
|
||||
def __init__(self, name, **kwargs):
|
||||
super(Naive, self).__init__(1, "Naive")
|
||||
def __init__(self, **kwargs):
|
||||
super(Naive, self).__init__(order=1, name="Naive",**kwargs)
|
||||
self.name = "Naïve Model"
|
||||
self.detail = "Naïve Model"
|
||||
self.benchmark_only = True
|
||||
|
@ -11,8 +11,8 @@ from pyFTS.probabilistic import ProbabilityDistribution
|
||||
|
||||
class QuantileRegression(fts.FTS):
|
||||
"""Façade for statsmodels.regression.quantile_regression"""
|
||||
def __init__(self, name, **kwargs):
|
||||
super(QuantileRegression, self).__init__(1, "")
|
||||
def __init__(self, **kwargs):
|
||||
super(QuantileRegression, self).__init__(**kwargs)
|
||||
self.name = "QR"
|
||||
self.detail = "Quantile Regression"
|
||||
self.is_high_order = True
|
||||
@ -27,13 +27,8 @@ class QuantileRegression(fts.FTS):
|
||||
self.mean_qt = None
|
||||
self.lower_qt = None
|
||||
self.dist_qt = None
|
||||
self.order = kwargs.get('order', 1)
|
||||
self.shortname = "QAR("+str(self.order)+","+str(self.alpha)+")"
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
if 'order' in kwargs:
|
||||
self.order = kwargs.get('order', 1)
|
||||
|
||||
if self.indexer is not None and isinstance(data, pd.DataFrame):
|
||||
data = self.indexer.get_data(data)
|
||||
|
||||
@ -58,9 +53,6 @@ class QuantileRegression(fts.FTS):
|
||||
up_qt = [k for k in uqt.params]
|
||||
self.dist_qt.append([lo_qt, up_qt])
|
||||
|
||||
self.original_min = min(data)
|
||||
self.original_max = max(data)
|
||||
|
||||
self.shortname = "QAR(" + str(self.order) + ") - " + str(self.alpha)
|
||||
|
||||
def linearmodel(self,data,params):
|
||||
|
@ -38,10 +38,10 @@ class FLRG(object):
|
||||
self.key = self.key + n
|
||||
return self.key
|
||||
|
||||
|
||||
def get_membership(self, data, sets):
|
||||
ret = 0.0
|
||||
if isinstance(self.LHS, (list, set)):
|
||||
if len(self.LHS) == len(data):
|
||||
ret = np.nanmin([sets[self.LHS[ct]].membership(dat) for ct, dat in enumerate(data)])
|
||||
else:
|
||||
ret = sets[self.LHS].membership(data)
|
||||
|
@ -7,7 +7,7 @@ class FTS(object):
|
||||
"""
|
||||
Fuzzy Time Series object model
|
||||
"""
|
||||
def __init__(self, order, name, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
"""
|
||||
Create a Fuzzy Time Series model
|
||||
:param order: model order
|
||||
@ -16,10 +16,10 @@ class FTS(object):
|
||||
"""
|
||||
self.sets = {}
|
||||
self.flrgs = {}
|
||||
self.order = order
|
||||
self.shortname = name
|
||||
self.name = name
|
||||
self.detail = name
|
||||
self.order = kwargs.get('order',"")
|
||||
self.shortname = kwargs.get('name',"")
|
||||
self.name = kwargs.get('name',"")
|
||||
self.detail = kwargs.get('name',"")
|
||||
self.is_high_order = False
|
||||
self.min_order = 1
|
||||
self.has_seasonality = False
|
||||
@ -75,6 +75,8 @@ class FTS(object):
|
||||
else:
|
||||
ndata = self.apply_transformations(data)
|
||||
|
||||
ndata = np.clip(ndata, self.original_min, self.original_max)
|
||||
|
||||
if 'distributed' in kwargs:
|
||||
distributed = kwargs.pop('distributed')
|
||||
else:
|
||||
@ -222,6 +224,24 @@ class FTS(object):
|
||||
else:
|
||||
data = self.apply_transformations(ndata)
|
||||
|
||||
self.original_min = np.nanmin(data)
|
||||
self.original_max = np.nanmax(data)
|
||||
|
||||
if 'sets' in kwargs:
|
||||
self.sets = kwargs.pop('sets')
|
||||
|
||||
if 'partitioner' in kwargs:
|
||||
self.partitioner = kwargs.pop('partitioner')
|
||||
|
||||
if (self.sets is None or len(self.sets) == 0) and not self.benchmark_only:
|
||||
if self.partitioner is not None:
|
||||
self.sets = self.partitioner.sets
|
||||
else:
|
||||
raise Exception("Fuzzy sets were not provided for the model. Use 'sets' parameter or 'partitioner'. ")
|
||||
|
||||
if 'order' in kwargs:
|
||||
self.order = kwargs.pop('order')
|
||||
|
||||
dump = kwargs.get('dump', None)
|
||||
|
||||
num_batches = kwargs.get('num_batches', None)
|
||||
|
@ -3,7 +3,7 @@ import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
def get_data(field):
|
||||
def get_data(field="avg"):
|
||||
"""
|
||||
Get a simple univariate time series data.
|
||||
:param field: the dataset field name to extract
|
||||
@ -21,6 +21,6 @@ def get_dataframe():
|
||||
"""
|
||||
dat = common.get_dataframe('NASDAQ.csv.bz2',
|
||||
'https://github.com/petroniocandido/pyFTS/raw/8f20f3634aa6a8f58083bdcd1bbf93795e6ed767/pyFTS/data/NASDAQ.csv.bz2',
|
||||
sep=";", compression='bz2')
|
||||
sep=",", compression='bz2')
|
||||
return dat
|
||||
|
||||
|
@ -33,10 +33,11 @@ class ConventionalFLRG(flrg.FLRG):
|
||||
|
||||
class ConventionalFTS(fts.FTS):
|
||||
"""Conventional Fuzzy Time Series"""
|
||||
def __init__(self, name, **kwargs):
|
||||
super(ConventionalFTS, self).__init__(1, "CFTS " + name, **kwargs)
|
||||
def __init__(self, **kwargs):
|
||||
super(ConventionalFTS, self).__init__(order=1, **kwargs)
|
||||
self.name = "Conventional FTS"
|
||||
self.detail = "Chen"
|
||||
self.shortname = "CFTS"
|
||||
self.flrgs = {}
|
||||
|
||||
def generate_flrg(self, flrs):
|
||||
@ -48,10 +49,6 @@ class ConventionalFTS(fts.FTS):
|
||||
self.flrgs[flr.LHS].append_rhs(flr.RHS)
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
if kwargs.get('sets', None) is not None:
|
||||
self.sets = kwargs.get('sets', None)
|
||||
else:
|
||||
self.sets = self.partitioner.sets
|
||||
|
||||
tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum')
|
||||
flrs = FLR.generate_non_recurrent_flrs(tmpdata)
|
||||
|
@ -45,9 +45,9 @@ class TrendWeightedFLRG(yu.WeightedFLRG):
|
||||
|
||||
class TrendWeightedFTS(yu.WeightedFTS):
|
||||
"""First Order Trend Weighted Fuzzy Time Series"""
|
||||
def __init__(self, name, **kwargs):
|
||||
super(TrendWeightedFTS, self).__init__("", **kwargs)
|
||||
self.shortname = "TWFTS " + name
|
||||
def __init__(self, **kwargs):
|
||||
super(TrendWeightedFTS, self).__init__(**kwargs)
|
||||
self.shortname = "TWFTS"
|
||||
self.name = "Trend Weighted FTS"
|
||||
self.detail = "Cheng"
|
||||
self.is_high_order = False
|
||||
|
@ -17,9 +17,9 @@ def sampler(data, quantiles):
|
||||
|
||||
|
||||
class EnsembleFTS(fts.FTS):
|
||||
def __init__(self, name, **kwargs):
|
||||
super(EnsembleFTS, self).__init__(1, "Ensemble FTS", **kwargs)
|
||||
self.shortname = "Ensemble FTS " + name
|
||||
def __init__(self, **kwargs):
|
||||
super(EnsembleFTS, self).__init__(**kwargs)
|
||||
self.shortname = "Ensemble FTS"
|
||||
self.name = "Ensemble FTS"
|
||||
self.flrgs = {}
|
||||
self.has_point_forecasting = True
|
||||
@ -29,7 +29,6 @@ class EnsembleFTS(fts.FTS):
|
||||
self.models = []
|
||||
self.parameters = []
|
||||
self.alpha = kwargs.get("alpha", 0.05)
|
||||
self.order = 1
|
||||
self.point_method = kwargs.get('point_method', 'mean')
|
||||
self.interval_method = kwargs.get('interval_method', 'quantile')
|
||||
|
||||
@ -39,8 +38,7 @@ class EnsembleFTS(fts.FTS):
|
||||
self.order = model.order
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
self.original_max = max(data)
|
||||
self.original_min = min(data)
|
||||
pass
|
||||
|
||||
def get_models_forecasts(self,data):
|
||||
tmp = []
|
||||
@ -246,8 +244,8 @@ class EnsembleFTS(fts.FTS):
|
||||
|
||||
|
||||
class AllMethodEnsembleFTS(EnsembleFTS):
|
||||
def __init__(self, name, **kwargs):
|
||||
super(AllMethodEnsembleFTS, self).__init__(name="Ensemble FTS"+name, **kwargs)
|
||||
def __init__(self, **kwargs):
|
||||
super(AllMethodEnsembleFTS, self).__init__(**kwargs)
|
||||
self.min_order = 3
|
||||
self.shortname ="Ensemble FTS"
|
||||
|
||||
@ -256,26 +254,22 @@ class AllMethodEnsembleFTS(EnsembleFTS):
|
||||
model.append_transformation(t)
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
self.original_max = max(data)
|
||||
self.original_min = min(data)
|
||||
|
||||
order = kwargs.get('order',2)
|
||||
|
||||
fo_methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS,
|
||||
sadaei.ExponentialyWeightedFTS, ismailefendi.ImprovedWeightedFTS]
|
||||
|
||||
ho_methods = [hofts.HighOrderFTS, hwang.HighOrderFTS]
|
||||
|
||||
for method in fo_methods:
|
||||
model = method("", partitioner=self.partitioner)
|
||||
model = method(partitioner=self.partitioner)
|
||||
self.set_transformations(model)
|
||||
model.fit(data, **kwargs)
|
||||
self.append_model(model)
|
||||
|
||||
for method in ho_methods:
|
||||
for o in np.arange(1, order+1):
|
||||
model = method("", partitioner=self.partitioner)
|
||||
for o in np.arange(1, self.order+1):
|
||||
model = method(partitioner=self.partitioner)
|
||||
if model.min_order >= o:
|
||||
model.order = o
|
||||
self.set_transformations(model)
|
||||
model.fit(data, **kwargs)
|
||||
self.append_model(model)
|
||||
|
@ -38,13 +38,11 @@ class HighOrderFLRG(flrg.FLRG):
|
||||
|
||||
class HighOrderFTS(fts.FTS):
|
||||
"""Conventional High Order Fuzzy Time Series"""
|
||||
def __init__(self, name, **kwargs):
|
||||
super(HighOrderFTS, self).__init__(1, name="HOFTS" + name, **kwargs)
|
||||
def __init__(self, **kwargs):
|
||||
super(HighOrderFTS, self).__init__(**kwargs)
|
||||
self.name = "High Order FTS"
|
||||
self.shortname = "HOFTS" + name
|
||||
self.shortname = "HOFTS"
|
||||
self.detail = "Chen"
|
||||
self.order = kwargs.get('order',1)
|
||||
self.setsDict = {}
|
||||
self.is_high_order = True
|
||||
self.min_order = 2
|
||||
|
||||
@ -94,13 +92,6 @@ class HighOrderFTS(fts.FTS):
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
|
||||
self.order = kwargs.get('order',2)
|
||||
|
||||
if kwargs.get('sets', None) is not None:
|
||||
self.sets = kwargs.get('sets', None)
|
||||
else:
|
||||
self.sets = self.partitioner.sets
|
||||
|
||||
self.generate_flrg(data)
|
||||
|
||||
def forecast(self, ndata, **kwargs):
|
||||
@ -115,9 +106,11 @@ class HighOrderFTS(fts.FTS):
|
||||
for k in np.arange(self.order, l+1):
|
||||
flrgs = self.generate_lhs_flrg(ndata[k - self.order: k])
|
||||
|
||||
for flrg in flrgs:
|
||||
tmp = []
|
||||
for flrg in flrgs:
|
||||
|
||||
if flrg.get_key() not in self.flrgs:
|
||||
if len(flrg.LHS) > 0:
|
||||
tmp.append(self.sets[flrg.LHS[-1]].centroid)
|
||||
else:
|
||||
flrg = self.flrgs[flrg.get_key()]
|
||||
|
@ -10,12 +10,12 @@ from pyFTS.common import FuzzySet, FLR, Transformations, fts
|
||||
|
||||
|
||||
class HighOrderFTS(fts.FTS):
|
||||
def __init__(self, name, **kwargs):
|
||||
super(HighOrderFTS, self).__init__(1, name, **kwargs)
|
||||
def __init__(self, **kwargs):
|
||||
super(HighOrderFTS, self).__init__(**kwargs)
|
||||
self.is_high_order = True
|
||||
self.min_order = 2
|
||||
self.name = "Hwang High Order FTS"
|
||||
self.shortname = "Hwang" + name
|
||||
self.shortname = "Hwang"
|
||||
self.detail = "Hwang"
|
||||
|
||||
def forecast(self, ndata, **kwargs):
|
||||
@ -57,9 +57,4 @@ class HighOrderFTS(fts.FTS):
|
||||
return ret
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
if kwargs.get('sets', None) is not None:
|
||||
self.sets = kwargs.get('sets', None)
|
||||
else:
|
||||
self.sets = self.partitioner.sets
|
||||
|
||||
self.order = kwargs.get('order', 2)
|
||||
pass
|
@ -17,9 +17,9 @@ class IntervalFTS(hofts.HighOrderFTS):
|
||||
"""
|
||||
High Order Interval Fuzzy Time Series
|
||||
"""
|
||||
def __init__(self, name, **kwargs):
|
||||
super(IntervalFTS, self).__init__(name="IFTS " + name, **kwargs)
|
||||
self.shortname = "IFTS " + name
|
||||
def __init__(self, **kwargs):
|
||||
super(IntervalFTS, self).__init__(**kwargs)
|
||||
self.shortname = "IFTS"
|
||||
self.name = "Interval FTS"
|
||||
self.detail = "Silva, P.; Guimarães, F.; Sadaei, H. (2016)"
|
||||
self.flrgs = {}
|
||||
@ -29,6 +29,8 @@ class IntervalFTS(hofts.HighOrderFTS):
|
||||
self.min_order = 1
|
||||
|
||||
def get_upper(self, flrg):
|
||||
ret = np.nan
|
||||
if len(flrg.LHS) > 0:
|
||||
if flrg.get_key() in self.flrgs:
|
||||
tmp = self.flrgs[flrg.get_key()]
|
||||
ret = tmp.get_upper(self.sets)
|
||||
@ -37,6 +39,8 @@ class IntervalFTS(hofts.HighOrderFTS):
|
||||
return ret
|
||||
|
||||
def get_lower(self, flrg):
|
||||
ret = np.nan
|
||||
if len(flrg.LHS) > 0:
|
||||
if flrg.get_key() in self.flrgs:
|
||||
tmp = self.flrgs[flrg.get_key()]
|
||||
ret = tmp.get_lower(self.sets)
|
||||
@ -69,6 +73,7 @@ class IntervalFTS(hofts.HighOrderFTS):
|
||||
affected_flrgs_memberships = []
|
||||
|
||||
for flrg in flrgs:
|
||||
if len(flrg.LHS) > 0:
|
||||
# achar o os bounds de cada FLRG, ponderados pela pertinência
|
||||
mv = flrg.get_membership(sample, self.sets)
|
||||
up.append(mv * self.get_upper(flrg))
|
||||
|
@ -47,8 +47,8 @@ class ImprovedWeightedFLRG(flrg.FLRG):
|
||||
|
||||
class ImprovedWeightedFTS(fts.FTS):
|
||||
"""First Order Improved Weighted Fuzzy Time Series"""
|
||||
def __init__(self, name, **kwargs):
|
||||
super(ImprovedWeightedFTS, self).__init__(1, "IWFTS " + name, **kwargs)
|
||||
def __init__(self, **kwargs):
|
||||
super(ImprovedWeightedFTS, self).__init__(order=1, name="IWFTS", **kwargs)
|
||||
self.name = "Improved Weighted FTS"
|
||||
self.detail = "Ismail & Efendi"
|
||||
|
||||
@ -61,10 +61,6 @@ class ImprovedWeightedFTS(fts.FTS):
|
||||
self.flrgs[flr.LHS].append_rhs(flr.RHS)
|
||||
|
||||
def train(self, ndata, **kwargs):
|
||||
if kwargs.get('sets', None) is not None:
|
||||
self.sets = kwargs.get('sets', None)
|
||||
else:
|
||||
self.sets = self.partitioner.sets
|
||||
|
||||
tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method='maximum')
|
||||
flrs = FLR.generate_recurrent_flrs(tmpdata)
|
||||
@ -73,6 +69,9 @@ class ImprovedWeightedFTS(fts.FTS):
|
||||
def forecast(self, ndata, **kwargs):
|
||||
l = 1
|
||||
|
||||
if self.partitioner is not None:
|
||||
ordered_sets = self.partitioner.ordered_sets
|
||||
else:
|
||||
ordered_sets = FuzzySet.set_ordered(self.sets)
|
||||
|
||||
ndata = np.array(ndata)
|
||||
|
@ -11,7 +11,7 @@ class MVFTS(fts.FTS):
|
||||
Multivariate extension of Chen's ConventionalFTS method
|
||||
"""
|
||||
def __init__(self, name, **kwargs):
|
||||
super(MVFTS, self).__init__(1, name, **kwargs)
|
||||
super(MVFTS, self).__init__(order=1, name=name, **kwargs)
|
||||
self.explanatory_variables = []
|
||||
self.target_variable = None
|
||||
self.flrgs = {}
|
||||
@ -91,8 +91,6 @@ class MVFTS(fts.FTS):
|
||||
|
||||
ndata = self.apply_transformations(data)
|
||||
|
||||
self.order = kwargs.get('order',1)
|
||||
|
||||
flrs = self.generate_flrs(ndata)
|
||||
self.generate_flrg(flrs)
|
||||
|
||||
|
@ -94,9 +94,9 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
|
||||
|
||||
class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
"""High Order Probabilistic Weighted Fuzzy Time Series"""
|
||||
def __init__(self, name, **kwargs):
|
||||
super(ProbabilisticWeightedFTS, self).__init__(name=name, **kwargs)
|
||||
self.shortname = "PWFTS " + name
|
||||
def __init__(self, **kwargs):
|
||||
super(ProbabilisticWeightedFTS, self).__init__(**kwargs)
|
||||
self.shortname = "PWFTS"
|
||||
self.name = "Probabilistic FTS"
|
||||
self.detail = "Silva, P.; Guimarães, F.; Sadaei, H."
|
||||
self.flrgs = {}
|
||||
@ -108,22 +108,10 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
self.min_order = 1
|
||||
self.auto_update = kwargs.get('update',False)
|
||||
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
|
||||
data = self.apply_transformations(data, updateUoD=True)
|
||||
|
||||
parameters = kwargs.get('parameters','fuzzy')
|
||||
|
||||
self.order = kwargs.get('order',1)
|
||||
|
||||
if kwargs.get('sets', None) is None and self.partitioner is not None:
|
||||
self.sets = self.partitioner.sets
|
||||
self.original_min = self.partitioner.min
|
||||
self.original_max = self.partitioner.max
|
||||
else:
|
||||
self.sets = kwargs.get('sets',None)
|
||||
|
||||
if parameters == 'monotonic':
|
||||
tmpdata = FuzzySet.fuzzyfy_series_old(data, self.sets)
|
||||
flrs = FLR.generate_recurrent_flrs(tmpdata)
|
||||
@ -237,7 +225,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
# this may be the problem! TEST IT!!!
|
||||
##########################################
|
||||
pi = 1 / len(flrg.LHS)
|
||||
ret = sum(np.array([pi * self.setsDict[s].membership(x) for s in flrg.LHS]))
|
||||
ret = sum(np.array([pi * self.sets[s].membership(x) for s in flrg.LHS]))
|
||||
return ret
|
||||
|
||||
def get_upper(self, flrg):
|
||||
|
@ -51,8 +51,8 @@ class ExponentialyWeightedFLRG(flrg.FLRG):
|
||||
|
||||
class ExponentialyWeightedFTS(fts.FTS):
|
||||
"""First Order Exponentialy Weighted Fuzzy Time Series"""
|
||||
def __init__(self, name, **kwargs):
|
||||
super(ExponentialyWeightedFTS, self).__init__(1, "EWFTS", **kwargs)
|
||||
def __init__(self, **kwargs):
|
||||
super(ExponentialyWeightedFTS, self).__init__(order=1, name="EWFTS", **kwargs)
|
||||
self.name = "Exponentialy Weighted FTS"
|
||||
self.detail = "Sadaei"
|
||||
self.c = kwargs.get('c', default_c)
|
||||
@ -66,12 +66,6 @@ class ExponentialyWeightedFTS(fts.FTS):
|
||||
self.flrgs[flr.LHS].append_rhs(flr.RHS)
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
self.c = kwargs.get('parameters', default_c)
|
||||
if kwargs.get('sets', None) is not None:
|
||||
self.sets = kwargs.get('sets', None)
|
||||
else:
|
||||
self.sets = self.partitioner.sets
|
||||
|
||||
tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum')
|
||||
flrs = FLR.generate_recurrent_flrs(tmpdata)
|
||||
self.generate_flrg(flrs, self.c)
|
||||
@ -79,6 +73,9 @@ class ExponentialyWeightedFTS(fts.FTS):
|
||||
def forecast(self, ndata, **kwargs):
|
||||
l = 1
|
||||
|
||||
if self.partitioner is not None:
|
||||
ordered_sets = self.partitioner.ordered_sets
|
||||
else:
|
||||
ordered_sets = FuzzySet.set_ordered(self.sets)
|
||||
|
||||
data = np.array(ndata)
|
||||
|
@ -10,8 +10,8 @@ from pyFTS.common import FuzzySet, FLR, fts
|
||||
|
||||
class ConventionalFTS(fts.FTS):
|
||||
"""Traditional Fuzzy Time Series"""
|
||||
def __init__(self, name, **kwargs):
|
||||
super(ConventionalFTS, self).__init__(1, "FTS " + name, **kwargs)
|
||||
def __init__(self, **kwargs):
|
||||
super(ConventionalFTS, self).__init__(order=1, name="FTS", **kwargs)
|
||||
self.name = "Traditional FTS"
|
||||
self.detail = "Song & Chissom"
|
||||
if self.sets is not None and self.partitioner is not None:
|
||||
@ -49,10 +49,6 @@ class ConventionalFTS(fts.FTS):
|
||||
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
if kwargs.get('sets', None) is not None:
|
||||
self.sets = kwargs.get('sets', None)
|
||||
else:
|
||||
self.sets = self.partitioner.sets
|
||||
|
||||
tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum')
|
||||
flrs = FLR.generate_non_recurrent_flrs(tmpdata)
|
||||
@ -60,7 +56,10 @@ class ConventionalFTS(fts.FTS):
|
||||
|
||||
def forecast(self, ndata, **kwargs):
|
||||
|
||||
ordered_set = FuzzySet.set_ordered(self.sets)
|
||||
if self.partitioner is not None:
|
||||
ordered_sets = self.partitioner.ordered_sets
|
||||
else:
|
||||
ordered_sets = FuzzySet.set_ordered(self.sets)
|
||||
|
||||
l = len(ndata)
|
||||
npart = len(self.sets)
|
||||
@ -75,9 +74,9 @@ class ConventionalFTS(fts.FTS):
|
||||
fs = np.ravel(np.argwhere(r == max(r)))
|
||||
|
||||
if len(fs) == 1:
|
||||
ret.append(self.sets[ordered_set[fs[0]]].centroid)
|
||||
ret.append(self.sets[ordered_sets[fs[0]]].centroid)
|
||||
else:
|
||||
mp = [self.sets[ordered_set[s]].centroid for s in fs]
|
||||
mp = [self.sets[ordered_sets[s]].centroid for s in fs]
|
||||
|
||||
ret.append( sum(mp)/len(mp))
|
||||
|
||||
|
@ -44,8 +44,8 @@ class WeightedFLRG(flrg.FLRG):
|
||||
|
||||
class WeightedFTS(fts.FTS):
|
||||
"""First Order Weighted Fuzzy Time Series"""
|
||||
def __init__(self, name, **kwargs):
|
||||
super(WeightedFTS, self).__init__(1, "WFTS " + name, **kwargs)
|
||||
def __init__(self, **kwargs):
|
||||
super(WeightedFTS, self).__init__(order=1, name="WFTS", **kwargs)
|
||||
self.name = "Weighted FTS"
|
||||
self.detail = "Yu"
|
||||
|
||||
@ -58,17 +58,15 @@ class WeightedFTS(fts.FTS):
|
||||
self.flrgs[flr.LHS].append_rhs(flr.RHS)
|
||||
|
||||
def train(self, ndata, **kwargs):
|
||||
if kwargs.get('sets', None) is not None:
|
||||
self.sets = kwargs.get('sets', None)
|
||||
else:
|
||||
self.sets = self.partitioner.sets
|
||||
|
||||
tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method='maximum')
|
||||
flrs = FLR.generate_recurrent_flrs(tmpdata)
|
||||
self.generate_FLRG(flrs)
|
||||
|
||||
def forecast(self, ndata, **kwargs):
|
||||
|
||||
if self.partitioner is not None:
|
||||
ordered_sets = self.partitioner.ordered_sets
|
||||
else:
|
||||
ordered_sets = FuzzySet.set_ordered(self.sets)
|
||||
|
||||
ndata = np.array(ndata)
|
||||
|
@ -11,35 +11,61 @@ from pyFTS.common import Transformations
|
||||
|
||||
tdiff = Transformations.Differential(1)
|
||||
|
||||
from pyFTS.data import TAIEX, SP500
|
||||
from pyFTS.data import TAIEX, SP500, NASDAQ
|
||||
|
||||
#dataset = TAIEX.get_data()
|
||||
dataset = SP500.get_data()[11500:16000]
|
||||
#dataset = NASDAQ.get_data()
|
||||
#print(len(dataset))
|
||||
'''
|
||||
from pyFTS.partitioners import Grid, Util as pUtil
|
||||
partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10) #, transformation=tdiff)
|
||||
'''
|
||||
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures, knn, quantreg, arima
|
||||
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures, knn, quantreg, arima, naive
|
||||
|
||||
|
||||
from pyFTS.models import pwfts, song, ifts
|
||||
from pyFTS.models.ensemble import ensemble
|
||||
|
||||
'''
|
||||
#model = knn.KNearestNeighbors("")
|
||||
model = knn.KNearestNeighbors(order=3)
|
||||
#model = ensemble.AllMethodEnsembleFTS("", partitioner=partitioner)
|
||||
#model = arima.ARIMA("", order=(2,0,2))
|
||||
#model = quantreg.QuantileRegression("", order=2, dist=True)
|
||||
model.append_transformation(tdiff)
|
||||
#model.append_transformation(tdiff)
|
||||
model.fit(dataset[:800])
|
||||
Measures.get_distribution_statistics(dataset[800:1000], model)
|
||||
print(Measures.get_distribution_statistics(dataset[800:1000], model))
|
||||
#tmp = model.predict(dataset[800:1000], type='distribution')
|
||||
#for tmp2 in tmp:
|
||||
# print(tmp2)
|
||||
#'''
|
||||
#'''
|
||||
|
||||
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
|
||||
methods=[pwfts.ProbabilisticWeightedFTS],
|
||||
benchmark_models=False,
|
||||
transformations=[None],
|
||||
orders=[1, 2, 3],
|
||||
partitions=np.arange(10, 90, 5),
|
||||
progress=False, type="point",
|
||||
#steps_ahead=[1,2,4,6,8,10],
|
||||
distributed=True, nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],
|
||||
file="benchmarks.db", dataset="SP500", tag="partitioning")
|
||||
|
||||
|
||||
|
||||
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
|
||||
methods=[pwfts.ProbabilisticWeightedFTS],
|
||||
benchmark_models=False,
|
||||
transformations=[tdiff],
|
||||
orders=[1, 2, 3],
|
||||
partitions=np.arange(3, 30, 2),
|
||||
progress=False, type="point",
|
||||
#steps_ahead=[1,2,4,6,8,10],
|
||||
distributed=True, nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],
|
||||
file="benchmarks.db", dataset="SP500", tag="partitioning")
|
||||
|
||||
#'''
|
||||
'''
|
||||
from pyFTS.partitioners import Grid, Util as pUtil
|
||||
partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff)
|
||||
@ -52,24 +78,76 @@ print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead
|
||||
#for tmp2 in tmp:
|
||||
# print(tmp2)
|
||||
'''
|
||||
'''
|
||||
|
||||
#'''
|
||||
types = ['point','interval','distribution']
|
||||
benchmark_methods=[
|
||||
[arima.ARIMA for k in range(4)] + [naive.Naive],
|
||||
[arima.ARIMA for k in range(8)] + [quantreg.QuantileRegression for k in range(4)],
|
||||
[arima.ARIMA for k in range(4)] + [quantreg.QuantileRegression for k in range(2)]
|
||||
+ [knn.KNearestNeighbors for k in range(3)]
|
||||
]
|
||||
benchmark_methods_parameters= [
|
||||
[
|
||||
{'order': (1, 0, 0)},
|
||||
{'order': (1, 0, 1)},
|
||||
{'order': (2, 0, 1)},
|
||||
{'order': (2, 0, 2)},
|
||||
{},
|
||||
],[
|
||||
{'order': (1, 0, 0), 'alpha': .05},
|
||||
{'order': (1, 0, 0), 'alpha': .25},
|
||||
{'order': (1, 0, 1), 'alpha': .05},
|
||||
{'order': (1, 0, 1), 'alpha': .25},
|
||||
{'order': (2, 0, 1), 'alpha': .05},
|
||||
{'order': (2, 0, 1), 'alpha': .25},
|
||||
{'order': (2, 0, 2), 'alpha': .05},
|
||||
{'order': (2, 0, 2), 'alpha': .25},
|
||||
{'order': 1, 'alpha': .05},
|
||||
{'order': 1, 'alpha': .25},
|
||||
{'order': 2, 'alpha': .05},
|
||||
{'order': 2, 'alpha': .25}
|
||||
],[
|
||||
{'order': (1, 0, 0)},
|
||||
{'order': (1, 0, 1)},
|
||||
{'order': (2, 0, 1)},
|
||||
{'order': (2, 0, 2)},
|
||||
{'order': 1, 'dist': True},
|
||||
{'order': 2, 'dist': True},
|
||||
{'order': 1}, {'order': 2}, {'order': 3},
|
||||
]
|
||||
]
|
||||
dataset_name = "NASDAQ"
|
||||
tag = "comparisons"
|
||||
|
||||
from pyFTS.benchmarks import arima, naive, quantreg
|
||||
|
||||
for ct, type in enumerate(types):
|
||||
|
||||
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
|
||||
methods=[pwfts.ProbabilisticWeightedFTS],
|
||||
benchmark_models=False,
|
||||
benchmark_models=True,
|
||||
benchmark_methods=benchmark_methods[ct],
|
||||
benchmark_methods_parameters=benchmark_methods_parameters[ct],
|
||||
transformations=[None],
|
||||
orders=[1,2,3],
|
||||
partitions=np.arange(15, 85, 5),
|
||||
progress=False, type=type,
|
||||
distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
|
||||
file="benchmarks.db", dataset=dataset_name, tag=tag)
|
||||
|
||||
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
|
||||
benchmark_models=True,
|
||||
benchmark_methods=benchmark_methods[ct],
|
||||
benchmark_methods_parameters=benchmark_methods_parameters[ct],
|
||||
transformations=[tdiff],
|
||||
orders=[1, 2, 3],
|
||||
partitions=np.arange(3, 50, 2),
|
||||
progress=False, type='point',
|
||||
#steps_ahead=[1,4,7,10], #steps_ahead=[1]
|
||||
partitiTAIEXons=np.arange(3, 35, 2),
|
||||
progress=False, type=type,
|
||||
distributed=True, nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],
|
||||
file="benchmarks.db", dataset="SP500", tag="partitioning")
|
||||
file="benchmarks.db", dataset=dataset_name, tag=tag)
|
||||
|
||||
|
||||
#'''
|
||||
'''
|
||||
'''
|
||||
dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';')
|
||||
print(bUtil.analytic_tabular_dataframe(dat))
|
||||
|
Loading…
Reference in New Issue
Block a user