Minor code standadization on models; Improvements and bugfixes on benchmarks

This commit is contained in:
Petrônio Cândido 2018-05-08 17:59:53 -03:00
parent 191ddf90d8
commit ffd97bacfc
23 changed files with 287 additions and 219 deletions

View File

@ -205,14 +205,35 @@ def pinball_mean(tau, targets, forecasts):
:param forecasts: list of prediction intervals
:return: float, the pinball loss mean for tau quantile
"""
try:
if tau <= 0.5:
preds = [pinball(tau, targets[i], forecasts[i][0]) for i in np.arange(0, len(forecasts))]
else:
preds = [pinball(tau, targets[i], forecasts[i][1]) for i in np.arange(0, len(forecasts))]
return np.nanmean(preds)
except Exception as ex:
print(ex)
if tau <= 0.5:
preds = [pinball(tau, targets[i], forecasts[i][0]) for i in np.arange(0, len(forecasts))]
else:
preds = [pinball(tau, targets[i], forecasts[i][1]) for i in np.arange(0, len(forecasts))]
return np.nanmean(preds)
def winkler_score(tau, target, forecast):
'''R. L. Winkler, A Decision-Theoretic Approach to Interval Estimation, J. Am. Stat. Assoc. 67 (337) (1972) 187191. doi:10.2307/2284720. '''
delta = forecast[1] - forecast[0]
if forecast[0] < target and target < forecast[1]:
return delta
elif forecast[0] > target:
return delta + 2*(forecast[0] - target)/tau
elif forecast[1] < target:
return delta + 2*(target - forecast[1])/tau
def winkler_mean(tau, targets, forecasts):
"""
Mean Winkler score value of the forecast for a given tau-quantile of the targets
:param tau: quantile value in the range (0,1)
:param targets: list of target values
:param forecasts: list of prediction intervals
:return: float, the Winkler score mean for tau quantile
"""
preds = [winkler_score(tau, targets[i], forecasts[i]) for i in np.arange(0, len(forecasts))]
return np.nanmean(preds)
def brier_score(targets, densities):
@ -348,6 +369,8 @@ def get_interval_statistics(data, model, **kwargs):
ret.append(round(pinball_mean(0.25, data[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.75, data[model.order:], forecasts[:-1]), 2))
ret.append(round(pinball_mean(0.95, data[model.order:], forecasts[:-1]), 2))
ret.append(round(winkler_mean(0.05, data[model.order:], forecasts[:-1]), 2))
ret.append(round(winkler_mean(0.25, data[model.order:], forecasts[:-1]), 2))
else:
forecasts = []
for k in np.arange(model.order, len(data) - steps_ahead):
@ -363,6 +386,8 @@ def get_interval_statistics(data, model, **kwargs):
ret.append(round(pinball_mean(0.25, data[start:], forecasts), 2))
ret.append(round(pinball_mean(0.75, data[start:], forecasts), 2))
ret.append(round(pinball_mean(0.95, data[start:], forecasts), 2))
ret.append(round(winkler_mean(0.05, data[start:], forecasts), 2))
ret.append(round(winkler_mean(0.25, data[start:], forecasts), 2))
return ret

View File

@ -18,6 +18,11 @@ from pyFTS.common import Util
def open_benchmark_db(name):
conn = sqlite3.connect(name)
#performance optimizations
conn.execute("PRAGMA journal_mode = WAL")
conn.execute("PRAGMA synchronous = NORMAL")
create_benchmark_tables(conn)
return conn
@ -31,7 +36,6 @@ def create_benchmark_tables(conn):
Scheme text, Partitions int,
Size int, Steps int, Method text, Measure text, Value real)''')
# Save (commit) the changes
conn.commit()

View File

@ -13,8 +13,8 @@ class ARIMA(fts.FTS):
"""
Façade for statsmodels.tsa.arima_model
"""
def __init__(self, name, **kwargs):
super(ARIMA, self).__init__(1, "ARIMA"+name)
def __init__(self, **kwargs):
super(ARIMA, self).__init__(**kwargs)
self.name = "ARIMA"
self.detail = "Auto Regressive Integrated Moving Average"
self.is_high_order = True
@ -44,11 +44,8 @@ class ARIMA(fts.FTS):
def train(self, data, **kwargs):
self.original_min = np.nanmin(data)
self.original_max = np.nanmax(data)
if kwargs.get('order', None) is not None:
order = kwargs.get('order', (1,0,0))
if 'order' in kwargs:
order = kwargs.pop('order')
self._decompose_order(order)
if self.indexer is not None:

View File

@ -156,12 +156,12 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
if models is None:
for method in methods:
mfts = method("")
mfts = method()
if mfts.is_high_order:
for order in orders:
if order >= mfts.min_order:
mfts = method("")
mfts = method()
mfts.order = order
pool.append(mfts)
else:
@ -190,7 +190,7 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
for transformation in transformations:
for count, model in enumerate(benchmark_methods, start=0):
par = benchmark_methods_parameters[count]
mfts = model("", **par)
mfts = model(**par)
mfts.append_transformation(transformation)
benchmark_pool.append(mfts)
@ -203,6 +203,8 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
elif type == 'distribution':
experiment_method = run_probabilistic
synthesis_method = process_probabilistic_jobs
else:
raise ValueError("Type parameter has a unkown value!")
if distributed:
import dispy, dispy.httpd
@ -213,28 +215,29 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
experiments = 0
jobs = []
inc = __pop("inc", 0.1, kwargs)
if progress:
from tqdm import tqdm
progressbar = tqdm(total=len(data), desc="Sliding Window:")
inc = __pop("inc", 0.1, kwargs)
_tdata = len(data) / (windowsize * inc)
_tasks = (len(partitioners_models) * len(orders) * len(partitions) * len(transformations) * len(steps_ahead))
_tbcmk = len(benchmark_pool)*len(steps_ahead)
progressbar = tqdm(total=_tdata*_tasks + _tdata*_tbcmk, desc="Benchmarks:")
file = kwargs.get('file', "benchmarks.db")
conn = bUtil.open_benchmark_db(file)
for ct, train, test in cUtil.sliding_window(data, windowsize, train, inc=inc, **kwargs):
experiments += 1
if progress:
progressbar.update(windowsize * inc)
if benchmark_models != False:
for model in benchmark_pool:
for step in steps_ahead:
kwargs['steps_ahead'] = step
if not distributed:
if progress:
progressbar.update(1)
job = experiment_method(deepcopy(model), None, train, test, **kwargs)
synthesis_method(dataset, tag, job, conn)
else:
@ -257,28 +260,17 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
else:
partitioners_pool = partitioners_models
rng1 = steps_ahead
if progress:
rng1 = tqdm(steps_ahead, desc="Steps")
for step in steps_ahead:
for step in rng1:
rng2 = partitioners_pool
for partitioner in partitioners_pool:
if progress:
rng2 = tqdm(partitioners_pool, desc="Partitioners")
for partitioner in rng2:
rng3 = enumerate(pool,start=0)
if progress:
rng3 = enumerate(tqdm(pool, desc="Models"),start=0)
for _id, model in rng3:
for _id, model in enumerate(pool,start=0):
kwargs['steps_ahead'] = step
if not distributed:
if progress:
progressbar.update(1)
job = experiment_method(deepcopy(model), deepcopy(partitioner), train, test, **kwargs)
synthesis_method(dataset, tag, job, conn)
else:
@ -291,12 +283,9 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
if distributed:
rng = jobs
if progress:
rng = tqdm(jobs)
for job in rng:
for job in jobs:
if progress:
progressbar.update(1)
job()
if job.status == dispy.DispyJob.Finished and job is not None:
tmp = job.result
@ -424,13 +413,15 @@ def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kw
times = _end - _start
_start = time.time()
_sharp, _res, _cov, _q05, _q25, _q75, _q95 = Measures.get_interval_statistics(test_data, mfts, **kwargs)
#_sharp, _res, _cov, _q05, _q25, _q75, _q95, _w05, _w25
metrics = Measures.get_interval_statistics(test_data, mfts, **kwargs)
_end = time.time()
times += _end - _start
ret = {'key': _key, 'obj': mfts, 'sharpness': _sharp, 'resolution': _res, 'coverage': _cov, 'time': times,
'Q05': _q05, 'Q25': _q25, 'Q75': _q75, 'Q95': _q95, 'window': window_key,
'steps': steps_ahead, 'method': method}
ret = {'key': _key, 'obj': mfts, 'sharpness': metrics[0], 'resolution': metrics[1], 'coverage': metrics[2],
'time': times,'Q05': metrics[3], 'Q25': metrics[4], 'Q75': metrics[5], 'Q95': metrics[6],
'winkler05': metrics[7], 'winkler25': metrics[8],
'window': window_key,'steps': steps_ahead, 'method': method}
return ret
@ -543,6 +534,12 @@ def process_interval_jobs(dataset, tag, job, conn):
Q95 = deepcopy(data)
Q95.extend(["Q95", job["Q95"]])
bUtil.insert_benchmark(Q95, conn)
W05 = deepcopy(data)
W05.extend(["winkler05", job["winkler05"]])
bUtil.insert_benchmark(W05, conn)
W25 = deepcopy(data)
W25.extend(["winkler25", job["winkler25"]])
bUtil.insert_benchmark(W25, conn)
def process_probabilistic_jobs(dataset, tag, job, conn):

View File

@ -11,8 +11,8 @@ class KNearestNeighbors(fts.FTS):
"""
K-Nearest Neighbors
"""
def __init__(self, name, **kwargs):
super(KNearestNeighbors, self).__init__(1, "kNN"+name)
def __init__(self, **kwargs):
super(KNearestNeighbors, self).__init__(**kwargs)
self.name = "kNN"
self.shortname = "kNN"
self.detail = "K-Nearest Neighbors"
@ -23,20 +23,12 @@ class KNearestNeighbors(fts.FTS):
self.benchmark_only = True
self.min_order = 1
self.alpha = kwargs.get("alpha", 0.05)
self.order = kwargs.get("order", 1)
self.lag = None
self.k = kwargs.get("k", 30)
self.uod = None
def train(self, data, **kwargs):
if kwargs.get('order', None) is not None:
self.order = kwargs.get('order', 1)
self.data = np.array(data)
self.original_max = max(data)
self.original_min = min(data)
#self.lagdata, = lagmat(data, maxlag=self.order, trim="both", original='sep')
def knn(self, sample):

View File

@ -6,8 +6,8 @@ from pyFTS.common import fts
class Naive(fts.FTS):
"""Naïve Forecasting method"""
def __init__(self, name, **kwargs):
super(Naive, self).__init__(1, "Naive")
def __init__(self, **kwargs):
super(Naive, self).__init__(order=1, name="Naive",**kwargs)
self.name = "Naïve Model"
self.detail = "Naïve Model"
self.benchmark_only = True

View File

@ -11,8 +11,8 @@ from pyFTS.probabilistic import ProbabilityDistribution
class QuantileRegression(fts.FTS):
"""Façade for statsmodels.regression.quantile_regression"""
def __init__(self, name, **kwargs):
super(QuantileRegression, self).__init__(1, "")
def __init__(self, **kwargs):
super(QuantileRegression, self).__init__(**kwargs)
self.name = "QR"
self.detail = "Quantile Regression"
self.is_high_order = True
@ -27,13 +27,8 @@ class QuantileRegression(fts.FTS):
self.mean_qt = None
self.lower_qt = None
self.dist_qt = None
self.order = kwargs.get('order', 1)
self.shortname = "QAR("+str(self.order)+","+str(self.alpha)+")"
def train(self, data, **kwargs):
if 'order' in kwargs:
self.order = kwargs.get('order', 1)
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
@ -58,9 +53,6 @@ class QuantileRegression(fts.FTS):
up_qt = [k for k in uqt.params]
self.dist_qt.append([lo_qt, up_qt])
self.original_min = min(data)
self.original_max = max(data)
self.shortname = "QAR(" + str(self.order) + ") - " + str(self.alpha)
def linearmodel(self,data,params):

View File

@ -38,11 +38,11 @@ class FLRG(object):
self.key = self.key + n
return self.key
def get_membership(self, data, sets):
ret = 0.0
if isinstance(self.LHS, (list, set)):
ret = np.nanmin([sets[self.LHS[ct]].membership(dat) for ct, dat in enumerate(data)])
if len(self.LHS) == len(data):
ret = np.nanmin([sets[self.LHS[ct]].membership(dat) for ct, dat in enumerate(data)])
else:
ret = sets[self.LHS].membership(data)
return ret

View File

@ -7,7 +7,7 @@ class FTS(object):
"""
Fuzzy Time Series object model
"""
def __init__(self, order, name, **kwargs):
def __init__(self, **kwargs):
"""
Create a Fuzzy Time Series model
:param order: model order
@ -16,10 +16,10 @@ class FTS(object):
"""
self.sets = {}
self.flrgs = {}
self.order = order
self.shortname = name
self.name = name
self.detail = name
self.order = kwargs.get('order',"")
self.shortname = kwargs.get('name',"")
self.name = kwargs.get('name',"")
self.detail = kwargs.get('name',"")
self.is_high_order = False
self.min_order = 1
self.has_seasonality = False
@ -75,6 +75,8 @@ class FTS(object):
else:
ndata = self.apply_transformations(data)
ndata = np.clip(ndata, self.original_min, self.original_max)
if 'distributed' in kwargs:
distributed = kwargs.pop('distributed')
else:
@ -222,6 +224,24 @@ class FTS(object):
else:
data = self.apply_transformations(ndata)
self.original_min = np.nanmin(data)
self.original_max = np.nanmax(data)
if 'sets' in kwargs:
self.sets = kwargs.pop('sets')
if 'partitioner' in kwargs:
self.partitioner = kwargs.pop('partitioner')
if (self.sets is None or len(self.sets) == 0) and not self.benchmark_only:
if self.partitioner is not None:
self.sets = self.partitioner.sets
else:
raise Exception("Fuzzy sets were not provided for the model. Use 'sets' parameter or 'partitioner'. ")
if 'order' in kwargs:
self.order = kwargs.pop('order')
dump = kwargs.get('dump', None)
num_batches = kwargs.get('num_batches', None)

View File

@ -3,7 +3,7 @@ import pandas as pd
import numpy as np
def get_data(field):
def get_data(field="avg"):
"""
Get a simple univariate time series data.
:param field: the dataset field name to extract
@ -21,6 +21,6 @@ def get_dataframe():
"""
dat = common.get_dataframe('NASDAQ.csv.bz2',
'https://github.com/petroniocandido/pyFTS/raw/8f20f3634aa6a8f58083bdcd1bbf93795e6ed767/pyFTS/data/NASDAQ.csv.bz2',
sep=";", compression='bz2')
sep=",", compression='bz2')
return dat

View File

@ -33,10 +33,11 @@ class ConventionalFLRG(flrg.FLRG):
class ConventionalFTS(fts.FTS):
"""Conventional Fuzzy Time Series"""
def __init__(self, name, **kwargs):
super(ConventionalFTS, self).__init__(1, "CFTS " + name, **kwargs)
def __init__(self, **kwargs):
super(ConventionalFTS, self).__init__(order=1, **kwargs)
self.name = "Conventional FTS"
self.detail = "Chen"
self.shortname = "CFTS"
self.flrgs = {}
def generate_flrg(self, flrs):
@ -48,10 +49,6 @@ class ConventionalFTS(fts.FTS):
self.flrgs[flr.LHS].append_rhs(flr.RHS)
def train(self, data, **kwargs):
if kwargs.get('sets', None) is not None:
self.sets = kwargs.get('sets', None)
else:
self.sets = self.partitioner.sets
tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum')
flrs = FLR.generate_non_recurrent_flrs(tmpdata)

View File

@ -45,9 +45,9 @@ class TrendWeightedFLRG(yu.WeightedFLRG):
class TrendWeightedFTS(yu.WeightedFTS):
"""First Order Trend Weighted Fuzzy Time Series"""
def __init__(self, name, **kwargs):
super(TrendWeightedFTS, self).__init__("", **kwargs)
self.shortname = "TWFTS " + name
def __init__(self, **kwargs):
super(TrendWeightedFTS, self).__init__(**kwargs)
self.shortname = "TWFTS"
self.name = "Trend Weighted FTS"
self.detail = "Cheng"
self.is_high_order = False

View File

@ -17,9 +17,9 @@ def sampler(data, quantiles):
class EnsembleFTS(fts.FTS):
def __init__(self, name, **kwargs):
super(EnsembleFTS, self).__init__(1, "Ensemble FTS", **kwargs)
self.shortname = "Ensemble FTS " + name
def __init__(self, **kwargs):
super(EnsembleFTS, self).__init__(**kwargs)
self.shortname = "Ensemble FTS"
self.name = "Ensemble FTS"
self.flrgs = {}
self.has_point_forecasting = True
@ -29,7 +29,6 @@ class EnsembleFTS(fts.FTS):
self.models = []
self.parameters = []
self.alpha = kwargs.get("alpha", 0.05)
self.order = 1
self.point_method = kwargs.get('point_method', 'mean')
self.interval_method = kwargs.get('interval_method', 'quantile')
@ -39,8 +38,7 @@ class EnsembleFTS(fts.FTS):
self.order = model.order
def train(self, data, **kwargs):
self.original_max = max(data)
self.original_min = min(data)
pass
def get_models_forecasts(self,data):
tmp = []
@ -246,8 +244,8 @@ class EnsembleFTS(fts.FTS):
class AllMethodEnsembleFTS(EnsembleFTS):
def __init__(self, name, **kwargs):
super(AllMethodEnsembleFTS, self).__init__(name="Ensemble FTS"+name, **kwargs)
def __init__(self, **kwargs):
super(AllMethodEnsembleFTS, self).__init__(**kwargs)
self.min_order = 3
self.shortname ="Ensemble FTS"
@ -256,26 +254,22 @@ class AllMethodEnsembleFTS(EnsembleFTS):
model.append_transformation(t)
def train(self, data, **kwargs):
self.original_max = max(data)
self.original_min = min(data)
order = kwargs.get('order',2)
fo_methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS,
sadaei.ExponentialyWeightedFTS, ismailefendi.ImprovedWeightedFTS]
ho_methods = [hofts.HighOrderFTS, hwang.HighOrderFTS]
for method in fo_methods:
model = method("", partitioner=self.partitioner)
model = method(partitioner=self.partitioner)
self.set_transformations(model)
model.fit(data, **kwargs)
self.append_model(model)
for method in ho_methods:
for o in np.arange(1, order+1):
model = method("", partitioner=self.partitioner)
for o in np.arange(1, self.order+1):
model = method(partitioner=self.partitioner)
if model.min_order >= o:
model.order = o
self.set_transformations(model)
model.fit(data, **kwargs)
self.append_model(model)

View File

@ -38,13 +38,11 @@ class HighOrderFLRG(flrg.FLRG):
class HighOrderFTS(fts.FTS):
"""Conventional High Order Fuzzy Time Series"""
def __init__(self, name, **kwargs):
super(HighOrderFTS, self).__init__(1, name="HOFTS" + name, **kwargs)
def __init__(self, **kwargs):
super(HighOrderFTS, self).__init__(**kwargs)
self.name = "High Order FTS"
self.shortname = "HOFTS" + name
self.shortname = "HOFTS"
self.detail = "Chen"
self.order = kwargs.get('order',1)
self.setsDict = {}
self.is_high_order = True
self.min_order = 2
@ -94,13 +92,6 @@ class HighOrderFTS(fts.FTS):
def train(self, data, **kwargs):
self.order = kwargs.get('order',2)
if kwargs.get('sets', None) is not None:
self.sets = kwargs.get('sets', None)
else:
self.sets = self.partitioner.sets
self.generate_flrg(data)
def forecast(self, ndata, **kwargs):
@ -115,10 +106,12 @@ class HighOrderFTS(fts.FTS):
for k in np.arange(self.order, l+1):
flrgs = self.generate_lhs_flrg(ndata[k - self.order: k])
tmp = []
for flrg in flrgs:
tmp = []
if flrg.get_key() not in self.flrgs:
tmp.append(self.sets[flrg.LHS[-1]].centroid)
if len(flrg.LHS) > 0:
tmp.append(self.sets[flrg.LHS[-1]].centroid)
else:
flrg = self.flrgs[flrg.get_key()]
tmp.append(flrg.get_midpoint(self.sets))

View File

@ -10,12 +10,12 @@ from pyFTS.common import FuzzySet, FLR, Transformations, fts
class HighOrderFTS(fts.FTS):
def __init__(self, name, **kwargs):
super(HighOrderFTS, self).__init__(1, name, **kwargs)
def __init__(self, **kwargs):
super(HighOrderFTS, self).__init__(**kwargs)
self.is_high_order = True
self.min_order = 2
self.name = "Hwang High Order FTS"
self.shortname = "Hwang" + name
self.shortname = "Hwang"
self.detail = "Hwang"
def forecast(self, ndata, **kwargs):
@ -57,9 +57,4 @@ class HighOrderFTS(fts.FTS):
return ret
def train(self, data, **kwargs):
if kwargs.get('sets', None) is not None:
self.sets = kwargs.get('sets', None)
else:
self.sets = self.partitioner.sets
self.order = kwargs.get('order', 2)
pass

View File

@ -17,9 +17,9 @@ class IntervalFTS(hofts.HighOrderFTS):
"""
High Order Interval Fuzzy Time Series
"""
def __init__(self, name, **kwargs):
super(IntervalFTS, self).__init__(name="IFTS " + name, **kwargs)
self.shortname = "IFTS " + name
def __init__(self, **kwargs):
super(IntervalFTS, self).__init__(**kwargs)
self.shortname = "IFTS"
self.name = "Interval FTS"
self.detail = "Silva, P.; Guimarães, F.; Sadaei, H. (2016)"
self.flrgs = {}
@ -29,19 +29,23 @@ class IntervalFTS(hofts.HighOrderFTS):
self.min_order = 1
def get_upper(self, flrg):
if flrg.get_key() in self.flrgs:
tmp = self.flrgs[flrg.get_key()]
ret = tmp.get_upper(self.sets)
else:
ret = self.sets[flrg.LHS[-1]].upper
ret = np.nan
if len(flrg.LHS) > 0:
if flrg.get_key() in self.flrgs:
tmp = self.flrgs[flrg.get_key()]
ret = tmp.get_upper(self.sets)
else:
ret = self.sets[flrg.LHS[-1]].upper
return ret
def get_lower(self, flrg):
if flrg.get_key() in self.flrgs:
tmp = self.flrgs[flrg.get_key()]
ret = tmp.get_lower(self.sets)
else:
ret = self.sets[flrg.LHS[-1]].lower
ret = np.nan
if len(flrg.LHS) > 0:
if flrg.get_key() in self.flrgs:
tmp = self.flrgs[flrg.get_key()]
ret = tmp.get_lower(self.sets)
else:
ret = self.sets[flrg.LHS[-1]].lower
return ret
def get_sequence_membership(self, data, fuzzySets):
@ -69,11 +73,12 @@ class IntervalFTS(hofts.HighOrderFTS):
affected_flrgs_memberships = []
for flrg in flrgs:
# achar o os bounds de cada FLRG, ponderados pela pertinência
mv = flrg.get_membership(sample, self.sets)
up.append(mv * self.get_upper(flrg))
lo.append(mv * self.get_lower(flrg))
affected_flrgs_memberships.append(mv)
if len(flrg.LHS) > 0:
# achar o os bounds de cada FLRG, ponderados pela pertinência
mv = flrg.get_membership(sample, self.sets)
up.append(mv * self.get_upper(flrg))
lo.append(mv * self.get_lower(flrg))
affected_flrgs_memberships.append(mv)
# gerar o intervalo
norm = sum(affected_flrgs_memberships)

View File

@ -47,8 +47,8 @@ class ImprovedWeightedFLRG(flrg.FLRG):
class ImprovedWeightedFTS(fts.FTS):
"""First Order Improved Weighted Fuzzy Time Series"""
def __init__(self, name, **kwargs):
super(ImprovedWeightedFTS, self).__init__(1, "IWFTS " + name, **kwargs)
def __init__(self, **kwargs):
super(ImprovedWeightedFTS, self).__init__(order=1, name="IWFTS", **kwargs)
self.name = "Improved Weighted FTS"
self.detail = "Ismail & Efendi"
@ -61,10 +61,6 @@ class ImprovedWeightedFTS(fts.FTS):
self.flrgs[flr.LHS].append_rhs(flr.RHS)
def train(self, ndata, **kwargs):
if kwargs.get('sets', None) is not None:
self.sets = kwargs.get('sets', None)
else:
self.sets = self.partitioner.sets
tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method='maximum')
flrs = FLR.generate_recurrent_flrs(tmpdata)
@ -73,7 +69,10 @@ class ImprovedWeightedFTS(fts.FTS):
def forecast(self, ndata, **kwargs):
l = 1
ordered_sets = FuzzySet.set_ordered(self.sets)
if self.partitioner is not None:
ordered_sets = self.partitioner.ordered_sets
else:
ordered_sets = FuzzySet.set_ordered(self.sets)
ndata = np.array(ndata)
l = len(ndata)

View File

@ -11,7 +11,7 @@ class MVFTS(fts.FTS):
Multivariate extension of Chen's ConventionalFTS method
"""
def __init__(self, name, **kwargs):
super(MVFTS, self).__init__(1, name, **kwargs)
super(MVFTS, self).__init__(order=1, name=name, **kwargs)
self.explanatory_variables = []
self.target_variable = None
self.flrgs = {}
@ -91,8 +91,6 @@ class MVFTS(fts.FTS):
ndata = self.apply_transformations(data)
self.order = kwargs.get('order',1)
flrs = self.generate_flrs(ndata)
self.generate_flrg(flrs)

View File

@ -94,9 +94,9 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
class ProbabilisticWeightedFTS(ifts.IntervalFTS):
"""High Order Probabilistic Weighted Fuzzy Time Series"""
def __init__(self, name, **kwargs):
super(ProbabilisticWeightedFTS, self).__init__(name=name, **kwargs)
self.shortname = "PWFTS " + name
def __init__(self, **kwargs):
super(ProbabilisticWeightedFTS, self).__init__(**kwargs)
self.shortname = "PWFTS"
self.name = "Probabilistic FTS"
self.detail = "Silva, P.; Guimarães, F.; Sadaei, H."
self.flrgs = {}
@ -108,22 +108,10 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
self.min_order = 1
self.auto_update = kwargs.get('update',False)
def train(self, data, **kwargs):
data = self.apply_transformations(data, updateUoD=True)
parameters = kwargs.get('parameters','fuzzy')
self.order = kwargs.get('order',1)
if kwargs.get('sets', None) is None and self.partitioner is not None:
self.sets = self.partitioner.sets
self.original_min = self.partitioner.min
self.original_max = self.partitioner.max
else:
self.sets = kwargs.get('sets',None)
if parameters == 'monotonic':
tmpdata = FuzzySet.fuzzyfy_series_old(data, self.sets)
flrs = FLR.generate_recurrent_flrs(tmpdata)
@ -237,7 +225,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
# this may be the problem! TEST IT!!!
##########################################
pi = 1 / len(flrg.LHS)
ret = sum(np.array([pi * self.setsDict[s].membership(x) for s in flrg.LHS]))
ret = sum(np.array([pi * self.sets[s].membership(x) for s in flrg.LHS]))
return ret
def get_upper(self, flrg):

View File

@ -51,8 +51,8 @@ class ExponentialyWeightedFLRG(flrg.FLRG):
class ExponentialyWeightedFTS(fts.FTS):
"""First Order Exponentialy Weighted Fuzzy Time Series"""
def __init__(self, name, **kwargs):
super(ExponentialyWeightedFTS, self).__init__(1, "EWFTS", **kwargs)
def __init__(self, **kwargs):
super(ExponentialyWeightedFTS, self).__init__(order=1, name="EWFTS", **kwargs)
self.name = "Exponentialy Weighted FTS"
self.detail = "Sadaei"
self.c = kwargs.get('c', default_c)
@ -66,12 +66,6 @@ class ExponentialyWeightedFTS(fts.FTS):
self.flrgs[flr.LHS].append_rhs(flr.RHS)
def train(self, data, **kwargs):
self.c = kwargs.get('parameters', default_c)
if kwargs.get('sets', None) is not None:
self.sets = kwargs.get('sets', None)
else:
self.sets = self.partitioner.sets
tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum')
flrs = FLR.generate_recurrent_flrs(tmpdata)
self.generate_flrg(flrs, self.c)
@ -79,7 +73,10 @@ class ExponentialyWeightedFTS(fts.FTS):
def forecast(self, ndata, **kwargs):
l = 1
ordered_sets = FuzzySet.set_ordered(self.sets)
if self.partitioner is not None:
ordered_sets = self.partitioner.ordered_sets
else:
ordered_sets = FuzzySet.set_ordered(self.sets)
data = np.array(ndata)

View File

@ -10,8 +10,8 @@ from pyFTS.common import FuzzySet, FLR, fts
class ConventionalFTS(fts.FTS):
"""Traditional Fuzzy Time Series"""
def __init__(self, name, **kwargs):
super(ConventionalFTS, self).__init__(1, "FTS " + name, **kwargs)
def __init__(self, **kwargs):
super(ConventionalFTS, self).__init__(order=1, name="FTS", **kwargs)
self.name = "Traditional FTS"
self.detail = "Song & Chissom"
if self.sets is not None and self.partitioner is not None:
@ -49,10 +49,6 @@ class ConventionalFTS(fts.FTS):
def train(self, data, **kwargs):
if kwargs.get('sets', None) is not None:
self.sets = kwargs.get('sets', None)
else:
self.sets = self.partitioner.sets
tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum')
flrs = FLR.generate_non_recurrent_flrs(tmpdata)
@ -60,7 +56,10 @@ class ConventionalFTS(fts.FTS):
def forecast(self, ndata, **kwargs):
ordered_set = FuzzySet.set_ordered(self.sets)
if self.partitioner is not None:
ordered_sets = self.partitioner.ordered_sets
else:
ordered_sets = FuzzySet.set_ordered(self.sets)
l = len(ndata)
npart = len(self.sets)
@ -75,9 +74,9 @@ class ConventionalFTS(fts.FTS):
fs = np.ravel(np.argwhere(r == max(r)))
if len(fs) == 1:
ret.append(self.sets[ordered_set[fs[0]]].centroid)
ret.append(self.sets[ordered_sets[fs[0]]].centroid)
else:
mp = [self.sets[ordered_set[s]].centroid for s in fs]
mp = [self.sets[ordered_sets[s]].centroid for s in fs]
ret.append( sum(mp)/len(mp))

View File

@ -44,8 +44,8 @@ class WeightedFLRG(flrg.FLRG):
class WeightedFTS(fts.FTS):
"""First Order Weighted Fuzzy Time Series"""
def __init__(self, name, **kwargs):
super(WeightedFTS, self).__init__(1, "WFTS " + name, **kwargs)
def __init__(self, **kwargs):
super(WeightedFTS, self).__init__(order=1, name="WFTS", **kwargs)
self.name = "Weighted FTS"
self.detail = "Yu"
@ -58,18 +58,16 @@ class WeightedFTS(fts.FTS):
self.flrgs[flr.LHS].append_rhs(flr.RHS)
def train(self, ndata, **kwargs):
if kwargs.get('sets', None) is not None:
self.sets = kwargs.get('sets', None)
else:
self.sets = self.partitioner.sets
tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method='maximum')
flrs = FLR.generate_recurrent_flrs(tmpdata)
self.generate_FLRG(flrs)
def forecast(self, ndata, **kwargs):
ordered_sets = FuzzySet.set_ordered(self.sets)
if self.partitioner is not None:
ordered_sets = self.partitioner.ordered_sets
else:
ordered_sets = FuzzySet.set_ordered(self.sets)
ndata = np.array(ndata)

View File

@ -11,35 +11,61 @@ from pyFTS.common import Transformations
tdiff = Transformations.Differential(1)
from pyFTS.data import TAIEX, SP500
from pyFTS.data import TAIEX, SP500, NASDAQ
#dataset = TAIEX.get_data()
dataset = SP500.get_data()[11500:16000]
#dataset = NASDAQ.get_data()
#print(len(dataset))
'''
from pyFTS.partitioners import Grid, Util as pUtil
partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10) #, transformation=tdiff)
'''
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures, knn, quantreg, arima
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures, knn, quantreg, arima, naive
from pyFTS.models import pwfts, song, ifts
from pyFTS.models.ensemble import ensemble
'''
#model = knn.KNearestNeighbors("")
model = knn.KNearestNeighbors(order=3)
#model = ensemble.AllMethodEnsembleFTS("", partitioner=partitioner)
#model = arima.ARIMA("", order=(2,0,2))
#model = quantreg.QuantileRegression("", order=2, dist=True)
model.append_transformation(tdiff)
#model.append_transformation(tdiff)
model.fit(dataset[:800])
Measures.get_distribution_statistics(dataset[800:1000], model)
print(Measures.get_distribution_statistics(dataset[800:1000], model))
#tmp = model.predict(dataset[800:1000], type='distribution')
#for tmp2 in tmp:
# print(tmp2)
#'''
#'''
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
methods=[pwfts.ProbabilisticWeightedFTS],
benchmark_models=False,
transformations=[None],
orders=[1, 2, 3],
partitions=np.arange(10, 90, 5),
progress=False, type="point",
#steps_ahead=[1,2,4,6,8,10],
distributed=True, nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],
file="benchmarks.db", dataset="SP500", tag="partitioning")
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
methods=[pwfts.ProbabilisticWeightedFTS],
benchmark_models=False,
transformations=[tdiff],
orders=[1, 2, 3],
partitions=np.arange(3, 30, 2),
progress=False, type="point",
#steps_ahead=[1,2,4,6,8,10],
distributed=True, nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],
file="benchmarks.db", dataset="SP500", tag="partitioning")
#'''
'''
from pyFTS.partitioners import Grid, Util as pUtil
partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff)
@ -52,24 +78,76 @@ print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead
#for tmp2 in tmp:
# print(tmp2)
'''
'''
#'''
types = ['point','interval','distribution']
benchmark_methods=[
[arima.ARIMA for k in range(4)] + [naive.Naive],
[arima.ARIMA for k in range(8)] + [quantreg.QuantileRegression for k in range(4)],
[arima.ARIMA for k in range(4)] + [quantreg.QuantileRegression for k in range(2)]
+ [knn.KNearestNeighbors for k in range(3)]
]
benchmark_methods_parameters= [
[
{'order': (1, 0, 0)},
{'order': (1, 0, 1)},
{'order': (2, 0, 1)},
{'order': (2, 0, 2)},
{},
],[
{'order': (1, 0, 0), 'alpha': .05},
{'order': (1, 0, 0), 'alpha': .25},
{'order': (1, 0, 1), 'alpha': .05},
{'order': (1, 0, 1), 'alpha': .25},
{'order': (2, 0, 1), 'alpha': .05},
{'order': (2, 0, 1), 'alpha': .25},
{'order': (2, 0, 2), 'alpha': .05},
{'order': (2, 0, 2), 'alpha': .25},
{'order': 1, 'alpha': .05},
{'order': 1, 'alpha': .25},
{'order': 2, 'alpha': .05},
{'order': 2, 'alpha': .25}
],[
{'order': (1, 0, 0)},
{'order': (1, 0, 1)},
{'order': (2, 0, 1)},
{'order': (2, 0, 2)},
{'order': 1, 'dist': True},
{'order': 2, 'dist': True},
{'order': 1}, {'order': 2}, {'order': 3},
]
]
dataset_name = "NASDAQ"
tag = "comparisons"
from pyFTS.benchmarks import arima, naive, quantreg
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
methods=[pwfts.ProbabilisticWeightedFTS],
benchmark_models=False,
transformations=[tdiff],
orders=[1,2,3],
partitions=np.arange(3, 50, 2),
progress=False, type='point',
#steps_ahead=[1,4,7,10], #steps_ahead=[1]
distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
file="benchmarks.db", dataset="SP500", tag="partitioning")
for ct, type in enumerate(types):
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
benchmark_models=True,
benchmark_methods=benchmark_methods[ct],
benchmark_methods_parameters=benchmark_methods_parameters[ct],
transformations=[None],
orders=[1,2,3],
partitions=np.arange(15, 85, 5),
progress=False, type=type,
distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
file="benchmarks.db", dataset=dataset_name, tag=tag)
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
benchmark_models=True,
benchmark_methods=benchmark_methods[ct],
benchmark_methods_parameters=benchmark_methods_parameters[ct],
transformations=[tdiff],
orders=[1, 2, 3],
partitiTAIEXons=np.arange(3, 35, 2),
progress=False, type=type,
distributed=True, nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],
file="benchmarks.db", dataset=dataset_name, tag=tag)
#'''
'''
'''
dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';')
print(bUtil.analytic_tabular_dataframe(dat))