diff --git a/pyFTS/benchmarks/Measures.py b/pyFTS/benchmarks/Measures.py index 7bb4f52..d046b63 100644 --- a/pyFTS/benchmarks/Measures.py +++ b/pyFTS/benchmarks/Measures.py @@ -205,14 +205,35 @@ def pinball_mean(tau, targets, forecasts): :param forecasts: list of prediction intervals :return: float, the pinball loss mean for tau quantile """ - try: - if tau <= 0.5: - preds = [pinball(tau, targets[i], forecasts[i][0]) for i in np.arange(0, len(forecasts))] - else: - preds = [pinball(tau, targets[i], forecasts[i][1]) for i in np.arange(0, len(forecasts))] - return np.nanmean(preds) - except Exception as ex: - print(ex) + if tau <= 0.5: + preds = [pinball(tau, targets[i], forecasts[i][0]) for i in np.arange(0, len(forecasts))] + else: + preds = [pinball(tau, targets[i], forecasts[i][1]) for i in np.arange(0, len(forecasts))] + return np.nanmean(preds) + + +def winkler_score(tau, target, forecast): + '''R. L. Winkler, A Decision-Theoretic Approach to Interval Estimation, J. Am. Stat. Assoc. 67 (337) (1972) 187–191. doi:10.2307/2284720. ''' + delta = forecast[1] - forecast[0] + if forecast[0] < target and target < forecast[1]: + return delta + elif forecast[0] > target: + return delta + 2*(forecast[0] - target)/tau + elif forecast[1] < target: + return delta + 2*(target - forecast[1])/tau + + +def winkler_mean(tau, targets, forecasts): + """ + Mean Winkler score value of the forecast for a given tau-quantile of the targets + :param tau: quantile value in the range (0,1) + :param targets: list of target values + :param forecasts: list of prediction intervals + :return: float, the Winkler score mean for tau quantile + """ + preds = [winkler_score(tau, targets[i], forecasts[i]) for i in np.arange(0, len(forecasts))] + + return np.nanmean(preds) def brier_score(targets, densities): @@ -348,6 +369,8 @@ def get_interval_statistics(data, model, **kwargs): ret.append(round(pinball_mean(0.25, data[model.order:], forecasts[:-1]), 2)) ret.append(round(pinball_mean(0.75, data[model.order:], forecasts[:-1]), 2)) ret.append(round(pinball_mean(0.95, data[model.order:], forecasts[:-1]), 2)) + ret.append(round(winkler_mean(0.05, data[model.order:], forecasts[:-1]), 2)) + ret.append(round(winkler_mean(0.25, data[model.order:], forecasts[:-1]), 2)) else: forecasts = [] for k in np.arange(model.order, len(data) - steps_ahead): @@ -363,6 +386,8 @@ def get_interval_statistics(data, model, **kwargs): ret.append(round(pinball_mean(0.25, data[start:], forecasts), 2)) ret.append(round(pinball_mean(0.75, data[start:], forecasts), 2)) ret.append(round(pinball_mean(0.95, data[start:], forecasts), 2)) + ret.append(round(winkler_mean(0.05, data[start:], forecasts), 2)) + ret.append(round(winkler_mean(0.25, data[start:], forecasts), 2)) return ret diff --git a/pyFTS/benchmarks/Util.py b/pyFTS/benchmarks/Util.py index 361e90c..7a68d9c 100644 --- a/pyFTS/benchmarks/Util.py +++ b/pyFTS/benchmarks/Util.py @@ -18,6 +18,11 @@ from pyFTS.common import Util def open_benchmark_db(name): conn = sqlite3.connect(name) + + #performance optimizations + conn.execute("PRAGMA journal_mode = WAL") + conn.execute("PRAGMA synchronous = NORMAL") + create_benchmark_tables(conn) return conn @@ -31,7 +36,6 @@ def create_benchmark_tables(conn): Scheme text, Partitions int, Size int, Steps int, Method text, Measure text, Value real)''') - # Save (commit) the changes conn.commit() diff --git a/pyFTS/benchmarks/arima.py b/pyFTS/benchmarks/arima.py index c853b10..b524f94 100644 --- a/pyFTS/benchmarks/arima.py +++ b/pyFTS/benchmarks/arima.py @@ -13,8 +13,8 @@ class ARIMA(fts.FTS): """ Façade for statsmodels.tsa.arima_model """ - def __init__(self, name, **kwargs): - super(ARIMA, self).__init__(1, "ARIMA"+name) + def __init__(self, **kwargs): + super(ARIMA, self).__init__(**kwargs) self.name = "ARIMA" self.detail = "Auto Regressive Integrated Moving Average" self.is_high_order = True @@ -44,11 +44,8 @@ class ARIMA(fts.FTS): def train(self, data, **kwargs): - self.original_min = np.nanmin(data) - self.original_max = np.nanmax(data) - - if kwargs.get('order', None) is not None: - order = kwargs.get('order', (1,0,0)) + if 'order' in kwargs: + order = kwargs.pop('order') self._decompose_order(order) if self.indexer is not None: diff --git a/pyFTS/benchmarks/benchmarks.py b/pyFTS/benchmarks/benchmarks.py index 9c8852a..0b3e20b 100644 --- a/pyFTS/benchmarks/benchmarks.py +++ b/pyFTS/benchmarks/benchmarks.py @@ -156,12 +156,12 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs): if models is None: for method in methods: - mfts = method("") + mfts = method() if mfts.is_high_order: for order in orders: if order >= mfts.min_order: - mfts = method("") + mfts = method() mfts.order = order pool.append(mfts) else: @@ -190,7 +190,7 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs): for transformation in transformations: for count, model in enumerate(benchmark_methods, start=0): par = benchmark_methods_parameters[count] - mfts = model("", **par) + mfts = model(**par) mfts.append_transformation(transformation) benchmark_pool.append(mfts) @@ -203,6 +203,8 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs): elif type == 'distribution': experiment_method = run_probabilistic synthesis_method = process_probabilistic_jobs + else: + raise ValueError("Type parameter has a unkown value!") if distributed: import dispy, dispy.httpd @@ -213,28 +215,29 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs): experiments = 0 jobs = [] + inc = __pop("inc", 0.1, kwargs) + if progress: from tqdm import tqdm - progressbar = tqdm(total=len(data), desc="Sliding Window:") - - inc = __pop("inc", 0.1, kwargs) + _tdata = len(data) / (windowsize * inc) + _tasks = (len(partitioners_models) * len(orders) * len(partitions) * len(transformations) * len(steps_ahead)) + _tbcmk = len(benchmark_pool)*len(steps_ahead) + progressbar = tqdm(total=_tdata*_tasks + _tdata*_tbcmk, desc="Benchmarks:") file = kwargs.get('file', "benchmarks.db") conn = bUtil.open_benchmark_db(file) for ct, train, test in cUtil.sliding_window(data, windowsize, train, inc=inc, **kwargs): - experiments += 1 - - if progress: - progressbar.update(windowsize * inc) - if benchmark_models != False: for model in benchmark_pool: for step in steps_ahead: + kwargs['steps_ahead'] = step if not distributed: + if progress: + progressbar.update(1) job = experiment_method(deepcopy(model), None, train, test, **kwargs) synthesis_method(dataset, tag, job, conn) else: @@ -257,28 +260,17 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs): else: partitioners_pool = partitioners_models - rng1 = steps_ahead - if progress: - rng1 = tqdm(steps_ahead, desc="Steps") + for step in steps_ahead: - for step in rng1: - rng2 = partitioners_pool + for partitioner in partitioners_pool: - if progress: - rng2 = tqdm(partitioners_pool, desc="Partitioners") - - for partitioner in rng2: - - rng3 = enumerate(pool,start=0) - - if progress: - rng3 = enumerate(tqdm(pool, desc="Models"),start=0) - - for _id, model in rng3: + for _id, model in enumerate(pool,start=0): kwargs['steps_ahead'] = step if not distributed: + if progress: + progressbar.update(1) job = experiment_method(deepcopy(model), deepcopy(partitioner), train, test, **kwargs) synthesis_method(dataset, tag, job, conn) else: @@ -291,12 +283,9 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs): if distributed: - rng = jobs - - if progress: - rng = tqdm(jobs) - - for job in rng: + for job in jobs: + if progress: + progressbar.update(1) job() if job.status == dispy.DispyJob.Finished and job is not None: tmp = job.result @@ -424,13 +413,15 @@ def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kw times = _end - _start _start = time.time() - _sharp, _res, _cov, _q05, _q25, _q75, _q95 = Measures.get_interval_statistics(test_data, mfts, **kwargs) + #_sharp, _res, _cov, _q05, _q25, _q75, _q95, _w05, _w25 + metrics = Measures.get_interval_statistics(test_data, mfts, **kwargs) _end = time.time() times += _end - _start - ret = {'key': _key, 'obj': mfts, 'sharpness': _sharp, 'resolution': _res, 'coverage': _cov, 'time': times, - 'Q05': _q05, 'Q25': _q25, 'Q75': _q75, 'Q95': _q95, 'window': window_key, - 'steps': steps_ahead, 'method': method} + ret = {'key': _key, 'obj': mfts, 'sharpness': metrics[0], 'resolution': metrics[1], 'coverage': metrics[2], + 'time': times,'Q05': metrics[3], 'Q25': metrics[4], 'Q75': metrics[5], 'Q95': metrics[6], + 'winkler05': metrics[7], 'winkler25': metrics[8], + 'window': window_key,'steps': steps_ahead, 'method': method} return ret @@ -543,6 +534,12 @@ def process_interval_jobs(dataset, tag, job, conn): Q95 = deepcopy(data) Q95.extend(["Q95", job["Q95"]]) bUtil.insert_benchmark(Q95, conn) + W05 = deepcopy(data) + W05.extend(["winkler05", job["winkler05"]]) + bUtil.insert_benchmark(W05, conn) + W25 = deepcopy(data) + W25.extend(["winkler25", job["winkler25"]]) + bUtil.insert_benchmark(W25, conn) def process_probabilistic_jobs(dataset, tag, job, conn): diff --git a/pyFTS/benchmarks/knn.py b/pyFTS/benchmarks/knn.py index 036e510..f26a3c8 100644 --- a/pyFTS/benchmarks/knn.py +++ b/pyFTS/benchmarks/knn.py @@ -11,8 +11,8 @@ class KNearestNeighbors(fts.FTS): """ K-Nearest Neighbors """ - def __init__(self, name, **kwargs): - super(KNearestNeighbors, self).__init__(1, "kNN"+name) + def __init__(self, **kwargs): + super(KNearestNeighbors, self).__init__(**kwargs) self.name = "kNN" self.shortname = "kNN" self.detail = "K-Nearest Neighbors" @@ -23,20 +23,12 @@ class KNearestNeighbors(fts.FTS): self.benchmark_only = True self.min_order = 1 self.alpha = kwargs.get("alpha", 0.05) - self.order = kwargs.get("order", 1) self.lag = None self.k = kwargs.get("k", 30) self.uod = None def train(self, data, **kwargs): - if kwargs.get('order', None) is not None: - self.order = kwargs.get('order', 1) - self.data = np.array(data) - self.original_max = max(data) - self.original_min = min(data) - - #self.lagdata, = lagmat(data, maxlag=self.order, trim="both", original='sep') def knn(self, sample): diff --git a/pyFTS/benchmarks/naive.py b/pyFTS/benchmarks/naive.py index bf9e1c1..d2dd86c 100644 --- a/pyFTS/benchmarks/naive.py +++ b/pyFTS/benchmarks/naive.py @@ -6,8 +6,8 @@ from pyFTS.common import fts class Naive(fts.FTS): """Naïve Forecasting method""" - def __init__(self, name, **kwargs): - super(Naive, self).__init__(1, "Naive") + def __init__(self, **kwargs): + super(Naive, self).__init__(order=1, name="Naive",**kwargs) self.name = "Naïve Model" self.detail = "Naïve Model" self.benchmark_only = True diff --git a/pyFTS/benchmarks/quantreg.py b/pyFTS/benchmarks/quantreg.py index fe6e3f4..f87e409 100644 --- a/pyFTS/benchmarks/quantreg.py +++ b/pyFTS/benchmarks/quantreg.py @@ -11,8 +11,8 @@ from pyFTS.probabilistic import ProbabilityDistribution class QuantileRegression(fts.FTS): """Façade for statsmodels.regression.quantile_regression""" - def __init__(self, name, **kwargs): - super(QuantileRegression, self).__init__(1, "") + def __init__(self, **kwargs): + super(QuantileRegression, self).__init__(**kwargs) self.name = "QR" self.detail = "Quantile Regression" self.is_high_order = True @@ -27,13 +27,8 @@ class QuantileRegression(fts.FTS): self.mean_qt = None self.lower_qt = None self.dist_qt = None - self.order = kwargs.get('order', 1) - self.shortname = "QAR("+str(self.order)+","+str(self.alpha)+")" def train(self, data, **kwargs): - if 'order' in kwargs: - self.order = kwargs.get('order', 1) - if self.indexer is not None and isinstance(data, pd.DataFrame): data = self.indexer.get_data(data) @@ -58,9 +53,6 @@ class QuantileRegression(fts.FTS): up_qt = [k for k in uqt.params] self.dist_qt.append([lo_qt, up_qt]) - self.original_min = min(data) - self.original_max = max(data) - self.shortname = "QAR(" + str(self.order) + ") - " + str(self.alpha) def linearmodel(self,data,params): diff --git a/pyFTS/common/flrg.py b/pyFTS/common/flrg.py index b1c7ce6..7e8f990 100644 --- a/pyFTS/common/flrg.py +++ b/pyFTS/common/flrg.py @@ -38,11 +38,11 @@ class FLRG(object): self.key = self.key + n return self.key - def get_membership(self, data, sets): ret = 0.0 if isinstance(self.LHS, (list, set)): - ret = np.nanmin([sets[self.LHS[ct]].membership(dat) for ct, dat in enumerate(data)]) + if len(self.LHS) == len(data): + ret = np.nanmin([sets[self.LHS[ct]].membership(dat) for ct, dat in enumerate(data)]) else: ret = sets[self.LHS].membership(data) return ret diff --git a/pyFTS/common/fts.py b/pyFTS/common/fts.py index 8ed15d0..d84308c 100644 --- a/pyFTS/common/fts.py +++ b/pyFTS/common/fts.py @@ -7,7 +7,7 @@ class FTS(object): """ Fuzzy Time Series object model """ - def __init__(self, order, name, **kwargs): + def __init__(self, **kwargs): """ Create a Fuzzy Time Series model :param order: model order @@ -16,10 +16,10 @@ class FTS(object): """ self.sets = {} self.flrgs = {} - self.order = order - self.shortname = name - self.name = name - self.detail = name + self.order = kwargs.get('order',"") + self.shortname = kwargs.get('name',"") + self.name = kwargs.get('name',"") + self.detail = kwargs.get('name',"") self.is_high_order = False self.min_order = 1 self.has_seasonality = False @@ -75,6 +75,8 @@ class FTS(object): else: ndata = self.apply_transformations(data) + ndata = np.clip(ndata, self.original_min, self.original_max) + if 'distributed' in kwargs: distributed = kwargs.pop('distributed') else: @@ -222,6 +224,24 @@ class FTS(object): else: data = self.apply_transformations(ndata) + self.original_min = np.nanmin(data) + self.original_max = np.nanmax(data) + + if 'sets' in kwargs: + self.sets = kwargs.pop('sets') + + if 'partitioner' in kwargs: + self.partitioner = kwargs.pop('partitioner') + + if (self.sets is None or len(self.sets) == 0) and not self.benchmark_only: + if self.partitioner is not None: + self.sets = self.partitioner.sets + else: + raise Exception("Fuzzy sets were not provided for the model. Use 'sets' parameter or 'partitioner'. ") + + if 'order' in kwargs: + self.order = kwargs.pop('order') + dump = kwargs.get('dump', None) num_batches = kwargs.get('num_batches', None) diff --git a/pyFTS/data/NASDAQ.py b/pyFTS/data/NASDAQ.py index 83a7787..c61e513 100644 --- a/pyFTS/data/NASDAQ.py +++ b/pyFTS/data/NASDAQ.py @@ -3,7 +3,7 @@ import pandas as pd import numpy as np -def get_data(field): +def get_data(field="avg"): """ Get a simple univariate time series data. :param field: the dataset field name to extract @@ -21,6 +21,6 @@ def get_dataframe(): """ dat = common.get_dataframe('NASDAQ.csv.bz2', 'https://github.com/petroniocandido/pyFTS/raw/8f20f3634aa6a8f58083bdcd1bbf93795e6ed767/pyFTS/data/NASDAQ.csv.bz2', - sep=";", compression='bz2') + sep=",", compression='bz2') return dat diff --git a/pyFTS/models/chen.py b/pyFTS/models/chen.py index f157428..ebba22b 100644 --- a/pyFTS/models/chen.py +++ b/pyFTS/models/chen.py @@ -33,10 +33,11 @@ class ConventionalFLRG(flrg.FLRG): class ConventionalFTS(fts.FTS): """Conventional Fuzzy Time Series""" - def __init__(self, name, **kwargs): - super(ConventionalFTS, self).__init__(1, "CFTS " + name, **kwargs) + def __init__(self, **kwargs): + super(ConventionalFTS, self).__init__(order=1, **kwargs) self.name = "Conventional FTS" self.detail = "Chen" + self.shortname = "CFTS" self.flrgs = {} def generate_flrg(self, flrs): @@ -48,10 +49,6 @@ class ConventionalFTS(fts.FTS): self.flrgs[flr.LHS].append_rhs(flr.RHS) def train(self, data, **kwargs): - if kwargs.get('sets', None) is not None: - self.sets = kwargs.get('sets', None) - else: - self.sets = self.partitioner.sets tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum') flrs = FLR.generate_non_recurrent_flrs(tmpdata) diff --git a/pyFTS/models/cheng.py b/pyFTS/models/cheng.py index 37194e7..436d9ba 100644 --- a/pyFTS/models/cheng.py +++ b/pyFTS/models/cheng.py @@ -45,9 +45,9 @@ class TrendWeightedFLRG(yu.WeightedFLRG): class TrendWeightedFTS(yu.WeightedFTS): """First Order Trend Weighted Fuzzy Time Series""" - def __init__(self, name, **kwargs): - super(TrendWeightedFTS, self).__init__("", **kwargs) - self.shortname = "TWFTS " + name + def __init__(self, **kwargs): + super(TrendWeightedFTS, self).__init__(**kwargs) + self.shortname = "TWFTS" self.name = "Trend Weighted FTS" self.detail = "Cheng" self.is_high_order = False diff --git a/pyFTS/models/ensemble/ensemble.py b/pyFTS/models/ensemble/ensemble.py index 3282b7c..b2b8715 100644 --- a/pyFTS/models/ensemble/ensemble.py +++ b/pyFTS/models/ensemble/ensemble.py @@ -17,9 +17,9 @@ def sampler(data, quantiles): class EnsembleFTS(fts.FTS): - def __init__(self, name, **kwargs): - super(EnsembleFTS, self).__init__(1, "Ensemble FTS", **kwargs) - self.shortname = "Ensemble FTS " + name + def __init__(self, **kwargs): + super(EnsembleFTS, self).__init__(**kwargs) + self.shortname = "Ensemble FTS" self.name = "Ensemble FTS" self.flrgs = {} self.has_point_forecasting = True @@ -29,7 +29,6 @@ class EnsembleFTS(fts.FTS): self.models = [] self.parameters = [] self.alpha = kwargs.get("alpha", 0.05) - self.order = 1 self.point_method = kwargs.get('point_method', 'mean') self.interval_method = kwargs.get('interval_method', 'quantile') @@ -39,8 +38,7 @@ class EnsembleFTS(fts.FTS): self.order = model.order def train(self, data, **kwargs): - self.original_max = max(data) - self.original_min = min(data) + pass def get_models_forecasts(self,data): tmp = [] @@ -246,8 +244,8 @@ class EnsembleFTS(fts.FTS): class AllMethodEnsembleFTS(EnsembleFTS): - def __init__(self, name, **kwargs): - super(AllMethodEnsembleFTS, self).__init__(name="Ensemble FTS"+name, **kwargs) + def __init__(self, **kwargs): + super(AllMethodEnsembleFTS, self).__init__(**kwargs) self.min_order = 3 self.shortname ="Ensemble FTS" @@ -256,26 +254,22 @@ class AllMethodEnsembleFTS(EnsembleFTS): model.append_transformation(t) def train(self, data, **kwargs): - self.original_max = max(data) - self.original_min = min(data) - - order = kwargs.get('order',2) - fo_methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS, ismailefendi.ImprovedWeightedFTS] ho_methods = [hofts.HighOrderFTS, hwang.HighOrderFTS] for method in fo_methods: - model = method("", partitioner=self.partitioner) + model = method(partitioner=self.partitioner) self.set_transformations(model) model.fit(data, **kwargs) self.append_model(model) for method in ho_methods: - for o in np.arange(1, order+1): - model = method("", partitioner=self.partitioner) + for o in np.arange(1, self.order+1): + model = method(partitioner=self.partitioner) if model.min_order >= o: + model.order = o self.set_transformations(model) model.fit(data, **kwargs) self.append_model(model) diff --git a/pyFTS/models/hofts.py b/pyFTS/models/hofts.py index 16a4224..79ab2f5 100644 --- a/pyFTS/models/hofts.py +++ b/pyFTS/models/hofts.py @@ -38,13 +38,11 @@ class HighOrderFLRG(flrg.FLRG): class HighOrderFTS(fts.FTS): """Conventional High Order Fuzzy Time Series""" - def __init__(self, name, **kwargs): - super(HighOrderFTS, self).__init__(1, name="HOFTS" + name, **kwargs) + def __init__(self, **kwargs): + super(HighOrderFTS, self).__init__(**kwargs) self.name = "High Order FTS" - self.shortname = "HOFTS" + name + self.shortname = "HOFTS" self.detail = "Chen" - self.order = kwargs.get('order',1) - self.setsDict = {} self.is_high_order = True self.min_order = 2 @@ -94,13 +92,6 @@ class HighOrderFTS(fts.FTS): def train(self, data, **kwargs): - self.order = kwargs.get('order',2) - - if kwargs.get('sets', None) is not None: - self.sets = kwargs.get('sets', None) - else: - self.sets = self.partitioner.sets - self.generate_flrg(data) def forecast(self, ndata, **kwargs): @@ -115,10 +106,12 @@ class HighOrderFTS(fts.FTS): for k in np.arange(self.order, l+1): flrgs = self.generate_lhs_flrg(ndata[k - self.order: k]) + tmp = [] for flrg in flrgs: - tmp = [] + if flrg.get_key() not in self.flrgs: - tmp.append(self.sets[flrg.LHS[-1]].centroid) + if len(flrg.LHS) > 0: + tmp.append(self.sets[flrg.LHS[-1]].centroid) else: flrg = self.flrgs[flrg.get_key()] tmp.append(flrg.get_midpoint(self.sets)) diff --git a/pyFTS/models/hwang.py b/pyFTS/models/hwang.py index dcb3eeb..6f862d7 100644 --- a/pyFTS/models/hwang.py +++ b/pyFTS/models/hwang.py @@ -10,12 +10,12 @@ from pyFTS.common import FuzzySet, FLR, Transformations, fts class HighOrderFTS(fts.FTS): - def __init__(self, name, **kwargs): - super(HighOrderFTS, self).__init__(1, name, **kwargs) + def __init__(self, **kwargs): + super(HighOrderFTS, self).__init__(**kwargs) self.is_high_order = True self.min_order = 2 self.name = "Hwang High Order FTS" - self.shortname = "Hwang" + name + self.shortname = "Hwang" self.detail = "Hwang" def forecast(self, ndata, **kwargs): @@ -57,9 +57,4 @@ class HighOrderFTS(fts.FTS): return ret def train(self, data, **kwargs): - if kwargs.get('sets', None) is not None: - self.sets = kwargs.get('sets', None) - else: - self.sets = self.partitioner.sets - - self.order = kwargs.get('order', 2) \ No newline at end of file + pass \ No newline at end of file diff --git a/pyFTS/models/ifts.py b/pyFTS/models/ifts.py index 8999c2b..002ed3e 100644 --- a/pyFTS/models/ifts.py +++ b/pyFTS/models/ifts.py @@ -17,9 +17,9 @@ class IntervalFTS(hofts.HighOrderFTS): """ High Order Interval Fuzzy Time Series """ - def __init__(self, name, **kwargs): - super(IntervalFTS, self).__init__(name="IFTS " + name, **kwargs) - self.shortname = "IFTS " + name + def __init__(self, **kwargs): + super(IntervalFTS, self).__init__(**kwargs) + self.shortname = "IFTS" self.name = "Interval FTS" self.detail = "Silva, P.; Guimarães, F.; Sadaei, H. (2016)" self.flrgs = {} @@ -29,19 +29,23 @@ class IntervalFTS(hofts.HighOrderFTS): self.min_order = 1 def get_upper(self, flrg): - if flrg.get_key() in self.flrgs: - tmp = self.flrgs[flrg.get_key()] - ret = tmp.get_upper(self.sets) - else: - ret = self.sets[flrg.LHS[-1]].upper + ret = np.nan + if len(flrg.LHS) > 0: + if flrg.get_key() in self.flrgs: + tmp = self.flrgs[flrg.get_key()] + ret = tmp.get_upper(self.sets) + else: + ret = self.sets[flrg.LHS[-1]].upper return ret def get_lower(self, flrg): - if flrg.get_key() in self.flrgs: - tmp = self.flrgs[flrg.get_key()] - ret = tmp.get_lower(self.sets) - else: - ret = self.sets[flrg.LHS[-1]].lower + ret = np.nan + if len(flrg.LHS) > 0: + if flrg.get_key() in self.flrgs: + tmp = self.flrgs[flrg.get_key()] + ret = tmp.get_lower(self.sets) + else: + ret = self.sets[flrg.LHS[-1]].lower return ret def get_sequence_membership(self, data, fuzzySets): @@ -69,11 +73,12 @@ class IntervalFTS(hofts.HighOrderFTS): affected_flrgs_memberships = [] for flrg in flrgs: - # achar o os bounds de cada FLRG, ponderados pela pertinência - mv = flrg.get_membership(sample, self.sets) - up.append(mv * self.get_upper(flrg)) - lo.append(mv * self.get_lower(flrg)) - affected_flrgs_memberships.append(mv) + if len(flrg.LHS) > 0: + # achar o os bounds de cada FLRG, ponderados pela pertinência + mv = flrg.get_membership(sample, self.sets) + up.append(mv * self.get_upper(flrg)) + lo.append(mv * self.get_lower(flrg)) + affected_flrgs_memberships.append(mv) # gerar o intervalo norm = sum(affected_flrgs_memberships) diff --git a/pyFTS/models/ismailefendi.py b/pyFTS/models/ismailefendi.py index 130be84..0764762 100644 --- a/pyFTS/models/ismailefendi.py +++ b/pyFTS/models/ismailefendi.py @@ -47,8 +47,8 @@ class ImprovedWeightedFLRG(flrg.FLRG): class ImprovedWeightedFTS(fts.FTS): """First Order Improved Weighted Fuzzy Time Series""" - def __init__(self, name, **kwargs): - super(ImprovedWeightedFTS, self).__init__(1, "IWFTS " + name, **kwargs) + def __init__(self, **kwargs): + super(ImprovedWeightedFTS, self).__init__(order=1, name="IWFTS", **kwargs) self.name = "Improved Weighted FTS" self.detail = "Ismail & Efendi" @@ -61,10 +61,6 @@ class ImprovedWeightedFTS(fts.FTS): self.flrgs[flr.LHS].append_rhs(flr.RHS) def train(self, ndata, **kwargs): - if kwargs.get('sets', None) is not None: - self.sets = kwargs.get('sets', None) - else: - self.sets = self.partitioner.sets tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method='maximum') flrs = FLR.generate_recurrent_flrs(tmpdata) @@ -73,7 +69,10 @@ class ImprovedWeightedFTS(fts.FTS): def forecast(self, ndata, **kwargs): l = 1 - ordered_sets = FuzzySet.set_ordered(self.sets) + if self.partitioner is not None: + ordered_sets = self.partitioner.ordered_sets + else: + ordered_sets = FuzzySet.set_ordered(self.sets) ndata = np.array(ndata) l = len(ndata) diff --git a/pyFTS/models/multivariate/mvfts.py b/pyFTS/models/multivariate/mvfts.py index a0831a5..296d99c 100644 --- a/pyFTS/models/multivariate/mvfts.py +++ b/pyFTS/models/multivariate/mvfts.py @@ -11,7 +11,7 @@ class MVFTS(fts.FTS): Multivariate extension of Chen's ConventionalFTS method """ def __init__(self, name, **kwargs): - super(MVFTS, self).__init__(1, name, **kwargs) + super(MVFTS, self).__init__(order=1, name=name, **kwargs) self.explanatory_variables = [] self.target_variable = None self.flrgs = {} @@ -91,8 +91,6 @@ class MVFTS(fts.FTS): ndata = self.apply_transformations(data) - self.order = kwargs.get('order',1) - flrs = self.generate_flrs(ndata) self.generate_flrg(flrs) diff --git a/pyFTS/models/pwfts.py b/pyFTS/models/pwfts.py index 45b9b6c..fe93abb 100644 --- a/pyFTS/models/pwfts.py +++ b/pyFTS/models/pwfts.py @@ -94,9 +94,9 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG): class ProbabilisticWeightedFTS(ifts.IntervalFTS): """High Order Probabilistic Weighted Fuzzy Time Series""" - def __init__(self, name, **kwargs): - super(ProbabilisticWeightedFTS, self).__init__(name=name, **kwargs) - self.shortname = "PWFTS " + name + def __init__(self, **kwargs): + super(ProbabilisticWeightedFTS, self).__init__(**kwargs) + self.shortname = "PWFTS" self.name = "Probabilistic FTS" self.detail = "Silva, P.; Guimarães, F.; Sadaei, H." self.flrgs = {} @@ -108,22 +108,10 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): self.min_order = 1 self.auto_update = kwargs.get('update',False) - def train(self, data, **kwargs): - data = self.apply_transformations(data, updateUoD=True) - parameters = kwargs.get('parameters','fuzzy') - self.order = kwargs.get('order',1) - - if kwargs.get('sets', None) is None and self.partitioner is not None: - self.sets = self.partitioner.sets - self.original_min = self.partitioner.min - self.original_max = self.partitioner.max - else: - self.sets = kwargs.get('sets',None) - if parameters == 'monotonic': tmpdata = FuzzySet.fuzzyfy_series_old(data, self.sets) flrs = FLR.generate_recurrent_flrs(tmpdata) @@ -237,7 +225,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): # this may be the problem! TEST IT!!! ########################################## pi = 1 / len(flrg.LHS) - ret = sum(np.array([pi * self.setsDict[s].membership(x) for s in flrg.LHS])) + ret = sum(np.array([pi * self.sets[s].membership(x) for s in flrg.LHS])) return ret def get_upper(self, flrg): diff --git a/pyFTS/models/sadaei.py b/pyFTS/models/sadaei.py index 22e7188..5c81648 100644 --- a/pyFTS/models/sadaei.py +++ b/pyFTS/models/sadaei.py @@ -51,8 +51,8 @@ class ExponentialyWeightedFLRG(flrg.FLRG): class ExponentialyWeightedFTS(fts.FTS): """First Order Exponentialy Weighted Fuzzy Time Series""" - def __init__(self, name, **kwargs): - super(ExponentialyWeightedFTS, self).__init__(1, "EWFTS", **kwargs) + def __init__(self, **kwargs): + super(ExponentialyWeightedFTS, self).__init__(order=1, name="EWFTS", **kwargs) self.name = "Exponentialy Weighted FTS" self.detail = "Sadaei" self.c = kwargs.get('c', default_c) @@ -66,12 +66,6 @@ class ExponentialyWeightedFTS(fts.FTS): self.flrgs[flr.LHS].append_rhs(flr.RHS) def train(self, data, **kwargs): - self.c = kwargs.get('parameters', default_c) - if kwargs.get('sets', None) is not None: - self.sets = kwargs.get('sets', None) - else: - self.sets = self.partitioner.sets - tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum') flrs = FLR.generate_recurrent_flrs(tmpdata) self.generate_flrg(flrs, self.c) @@ -79,7 +73,10 @@ class ExponentialyWeightedFTS(fts.FTS): def forecast(self, ndata, **kwargs): l = 1 - ordered_sets = FuzzySet.set_ordered(self.sets) + if self.partitioner is not None: + ordered_sets = self.partitioner.ordered_sets + else: + ordered_sets = FuzzySet.set_ordered(self.sets) data = np.array(ndata) diff --git a/pyFTS/models/song.py b/pyFTS/models/song.py index 326265c..f7b4b77 100644 --- a/pyFTS/models/song.py +++ b/pyFTS/models/song.py @@ -10,8 +10,8 @@ from pyFTS.common import FuzzySet, FLR, fts class ConventionalFTS(fts.FTS): """Traditional Fuzzy Time Series""" - def __init__(self, name, **kwargs): - super(ConventionalFTS, self).__init__(1, "FTS " + name, **kwargs) + def __init__(self, **kwargs): + super(ConventionalFTS, self).__init__(order=1, name="FTS", **kwargs) self.name = "Traditional FTS" self.detail = "Song & Chissom" if self.sets is not None and self.partitioner is not None: @@ -49,10 +49,6 @@ class ConventionalFTS(fts.FTS): def train(self, data, **kwargs): - if kwargs.get('sets', None) is not None: - self.sets = kwargs.get('sets', None) - else: - self.sets = self.partitioner.sets tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum') flrs = FLR.generate_non_recurrent_flrs(tmpdata) @@ -60,7 +56,10 @@ class ConventionalFTS(fts.FTS): def forecast(self, ndata, **kwargs): - ordered_set = FuzzySet.set_ordered(self.sets) + if self.partitioner is not None: + ordered_sets = self.partitioner.ordered_sets + else: + ordered_sets = FuzzySet.set_ordered(self.sets) l = len(ndata) npart = len(self.sets) @@ -75,9 +74,9 @@ class ConventionalFTS(fts.FTS): fs = np.ravel(np.argwhere(r == max(r))) if len(fs) == 1: - ret.append(self.sets[ordered_set[fs[0]]].centroid) + ret.append(self.sets[ordered_sets[fs[0]]].centroid) else: - mp = [self.sets[ordered_set[s]].centroid for s in fs] + mp = [self.sets[ordered_sets[s]].centroid for s in fs] ret.append( sum(mp)/len(mp)) diff --git a/pyFTS/models/yu.py b/pyFTS/models/yu.py index 427d642..95090be 100644 --- a/pyFTS/models/yu.py +++ b/pyFTS/models/yu.py @@ -44,8 +44,8 @@ class WeightedFLRG(flrg.FLRG): class WeightedFTS(fts.FTS): """First Order Weighted Fuzzy Time Series""" - def __init__(self, name, **kwargs): - super(WeightedFTS, self).__init__(1, "WFTS " + name, **kwargs) + def __init__(self, **kwargs): + super(WeightedFTS, self).__init__(order=1, name="WFTS", **kwargs) self.name = "Weighted FTS" self.detail = "Yu" @@ -58,18 +58,16 @@ class WeightedFTS(fts.FTS): self.flrgs[flr.LHS].append_rhs(flr.RHS) def train(self, ndata, **kwargs): - if kwargs.get('sets', None) is not None: - self.sets = kwargs.get('sets', None) - else: - self.sets = self.partitioner.sets - tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method='maximum') flrs = FLR.generate_recurrent_flrs(tmpdata) self.generate_FLRG(flrs) def forecast(self, ndata, **kwargs): - ordered_sets = FuzzySet.set_ordered(self.sets) + if self.partitioner is not None: + ordered_sets = self.partitioner.ordered_sets + else: + ordered_sets = FuzzySet.set_ordered(self.sets) ndata = np.array(ndata) diff --git a/pyFTS/tests/general.py b/pyFTS/tests/general.py index d934f6f..b31c57b 100644 --- a/pyFTS/tests/general.py +++ b/pyFTS/tests/general.py @@ -11,35 +11,61 @@ from pyFTS.common import Transformations tdiff = Transformations.Differential(1) -from pyFTS.data import TAIEX, SP500 +from pyFTS.data import TAIEX, SP500, NASDAQ #dataset = TAIEX.get_data() dataset = SP500.get_data()[11500:16000] +#dataset = NASDAQ.get_data() #print(len(dataset)) ''' from pyFTS.partitioners import Grid, Util as pUtil partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10) #, transformation=tdiff) ''' -from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures, knn, quantreg, arima +from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures, knn, quantreg, arima, naive from pyFTS.models import pwfts, song, ifts from pyFTS.models.ensemble import ensemble ''' -#model = knn.KNearestNeighbors("") +model = knn.KNearestNeighbors(order=3) #model = ensemble.AllMethodEnsembleFTS("", partitioner=partitioner) #model = arima.ARIMA("", order=(2,0,2)) #model = quantreg.QuantileRegression("", order=2, dist=True) -model.append_transformation(tdiff) +#model.append_transformation(tdiff) model.fit(dataset[:800]) -Measures.get_distribution_statistics(dataset[800:1000], model) +print(Measures.get_distribution_statistics(dataset[800:1000], model)) #tmp = model.predict(dataset[800:1000], type='distribution') #for tmp2 in tmp: # print(tmp2) #''' +#''' + +bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, + methods=[pwfts.ProbabilisticWeightedFTS], + benchmark_models=False, + transformations=[None], + orders=[1, 2, 3], + partitions=np.arange(10, 90, 5), + progress=False, type="point", + #steps_ahead=[1,2,4,6,8,10], + distributed=True, nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'], + file="benchmarks.db", dataset="SP500", tag="partitioning") + +bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, + methods=[pwfts.ProbabilisticWeightedFTS], + benchmark_models=False, + transformations=[tdiff], + orders=[1, 2, 3], + partitions=np.arange(3, 30, 2), + progress=False, type="point", + #steps_ahead=[1,2,4,6,8,10], + distributed=True, nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'], + file="benchmarks.db", dataset="SP500", tag="partitioning") + +#''' ''' from pyFTS.partitioners import Grid, Util as pUtil partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff) @@ -52,24 +78,76 @@ print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead #for tmp2 in tmp: # print(tmp2) ''' +''' -#''' +types = ['point','interval','distribution'] +benchmark_methods=[ + [arima.ARIMA for k in range(4)] + [naive.Naive], + [arima.ARIMA for k in range(8)] + [quantreg.QuantileRegression for k in range(4)], + [arima.ARIMA for k in range(4)] + [quantreg.QuantileRegression for k in range(2)] + + [knn.KNearestNeighbors for k in range(3)] + ] +benchmark_methods_parameters= [ + [ + {'order': (1, 0, 0)}, + {'order': (1, 0, 1)}, + {'order': (2, 0, 1)}, + {'order': (2, 0, 2)}, + {}, + ],[ + {'order': (1, 0, 0), 'alpha': .05}, + {'order': (1, 0, 0), 'alpha': .25}, + {'order': (1, 0, 1), 'alpha': .05}, + {'order': (1, 0, 1), 'alpha': .25}, + {'order': (2, 0, 1), 'alpha': .05}, + {'order': (2, 0, 1), 'alpha': .25}, + {'order': (2, 0, 2), 'alpha': .05}, + {'order': (2, 0, 2), 'alpha': .25}, + {'order': 1, 'alpha': .05}, + {'order': 1, 'alpha': .25}, + {'order': 2, 'alpha': .05}, + {'order': 2, 'alpha': .25} + ],[ + {'order': (1, 0, 0)}, + {'order': (1, 0, 1)}, + {'order': (2, 0, 1)}, + {'order': (2, 0, 2)}, + {'order': 1, 'dist': True}, + {'order': 2, 'dist': True}, + {'order': 1}, {'order': 2}, {'order': 3}, + ] +] +dataset_name = "NASDAQ" +tag = "comparisons" from pyFTS.benchmarks import arima, naive, quantreg -bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, - methods=[pwfts.ProbabilisticWeightedFTS], - benchmark_models=False, - transformations=[tdiff], - orders=[1,2,3], - partitions=np.arange(3, 50, 2), - progress=False, type='point', - #steps_ahead=[1,4,7,10], #steps_ahead=[1] - distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'], - file="benchmarks.db", dataset="SP500", tag="partitioning") +for ct, type in enumerate(types): + + bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, + benchmark_models=True, + benchmark_methods=benchmark_methods[ct], + benchmark_methods_parameters=benchmark_methods_parameters[ct], + transformations=[None], + orders=[1,2,3], + partitions=np.arange(15, 85, 5), + progress=False, type=type, + distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'], + file="benchmarks.db", dataset=dataset_name, tag=tag) + + bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, + benchmark_models=True, + benchmark_methods=benchmark_methods[ct], + benchmark_methods_parameters=benchmark_methods_parameters[ct], + transformations=[tdiff], + orders=[1, 2, 3], + partitiTAIEXons=np.arange(3, 35, 2), + progress=False, type=type, + distributed=True, nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'], + file="benchmarks.db", dataset=dataset_name, tag=tag) -#''' +''' ''' dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';') print(bUtil.analytic_tabular_dataframe(dat))