From 33dbeb8965fefb846aa78f8273c0f244203f8122 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Tue, 24 Apr 2018 12:57:40 -0300 Subject: [PATCH] Minor bugfixes on pwfts.models --- pyFTS/benchmarks/Measures.py | 23 ++++++++++++++---- pyFTS/benchmarks/benchmarks.py | 41 ++++++++++++++++----------------- pyFTS/common/Transformations.py | 33 +++++++++++++++++++------- pyFTS/common/fts.py | 6 ++--- pyFTS/models/chen.py | 5 +++- pyFTS/models/hofts.py | 2 ++ pyFTS/models/ismailefendi.py | 4 +++- pyFTS/models/sadaei.py | 5 +++- pyFTS/models/song.py | 4 +++- pyFTS/models/yu.py | 4 +++- pyFTS/tests/general.py | 30 +++++++++++++----------- 11 files changed, 102 insertions(+), 55 deletions(-) diff --git a/pyFTS/benchmarks/Measures.py b/pyFTS/benchmarks/Measures.py index 3ae8685..b8dae1a 100644 --- a/pyFTS/benchmarks/Measures.py +++ b/pyFTS/benchmarks/Measures.py @@ -215,6 +215,17 @@ def pinball_mean(tau, targets, forecasts): print(ex) +def brier_score(targets, densities): + '''Brier (1950). "Verification of Forecasts Expressed in Terms of Probability". Monthly Weather Review. 78: 1–3. ''' + ret = [] + for ct, d in enumerate(densities): + v = d.bin_index.find_ge(targets[ct]) + score = sum([d.distribution[k] ** 2 for k in d.bins if k != v]) + score += (d.distribution[v] - 1) ** 2 + ret.append(score) + return sum(ret)/len(ret) + + def pmf_to_cdf(density): ret = [] for row in density.index: @@ -236,7 +247,6 @@ def heavyside_cdf(bins, targets): df = pd.DataFrame(ret, columns=bins) return df - def crps(targets, densities): ''' Continuous Ranked Probability Score @@ -277,6 +287,7 @@ def get_point_statistics(data, model, **kwargs): ''' steps_ahead = kwargs.get('steps_ahead',1) + kwargs['type'] = 'point' indexer = kwargs.get('indexer', None) @@ -301,7 +312,7 @@ def get_point_statistics(data, model, **kwargs): nforecasts = [] for k in np.arange(model.order, len(ndata)-steps_ahead,steps_ahead_sampler): sample = ndata[k - model.order: k] - tmp = model.forecast_ahead(sample, steps_ahead, **kwargs) + tmp = model.predict(sample, **kwargs) nforecasts.append(tmp[-1]) start = model.order + steps_ahead -1 @@ -323,6 +334,7 @@ def get_interval_statistics(data, model, **kwargs): ''' steps_ahead = kwargs.get('steps_ahead', 1) + kwargs['type'] = 'interval' ret = list() @@ -339,7 +351,7 @@ def get_interval_statistics(data, model, **kwargs): forecasts = [] for k in np.arange(model.order, len(data) - steps_ahead): sample = data[k - model.order: k] - tmp = model.predict(sample, steps_ahead, **kwargs) + tmp = model.predict(sample, **kwargs) forecasts.append(tmp[-1]) start = model.order + steps_ahead -1 @@ -362,12 +374,13 @@ def get_distribution_statistics(data, model, **kwargs): :return: a list with the CRPS and execution time ''' steps_ahead = kwargs.get('steps_ahead', 1) + kwargs['type'] = 'distribution' ret = list() if steps_ahead == 1: _s1 = time.time() - forecasts = model.forecast_distribution(data, **kwargs) + forecasts = model.predict(data, **kwargs) _e1 = time.time() ret.append(round(crps(data, forecasts), 3)) ret.append(round(_e1 - _s1, 3)) @@ -377,7 +390,7 @@ def get_distribution_statistics(data, model, **kwargs): _s1 = time.time() for k in np.arange(model.order, len(data) - steps_ahead, skip): sample = data[k - model.order: k] - tmp = model.forecast_ahead_distribution(sample, steps_ahead, **kwargs) + tmp = model.predict(sample, **kwargs) forecasts.append(tmp[-1]) _e1 = time.time() diff --git a/pyFTS/benchmarks/benchmarks.py b/pyFTS/benchmarks/benchmarks.py index 4b7d154..02f39a3 100644 --- a/pyFTS/benchmarks/benchmarks.py +++ b/pyFTS/benchmarks/benchmarks.py @@ -56,10 +56,13 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs): partitions and partitioning method will be created a partitioner model. And for each partitioner, order, steps ahead and FTS method a foreasting model will be trained. - Then all trained models are benchmarked on the test data and the metrics are stored in a datafame for - posterior analysis. + Then all trained models are benchmarked on the test data and the metrics are stored on a sqlite3 database + (identified by the 'file' parameter) for posterior analysis. - The number of experiments is determined by the windowsize and inc. + All these process can be distributed on a dispy cluster, setting the atributed 'distributed' to true and + informing the list of dispy nodes on 'nodes' parameter. + + The number of experiments is determined by 'windowsize' and 'inc' parameters. :param data: test data :param windowsize: size of sliding window @@ -67,35 +70,31 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs): :param kwargs: dict, optional arguments :keyword + benchmark_methods: a list with Non FTS models to benchmark. The default is None. + benchmark_methods_parameters: a list with Non FTS models parameters. The default is None. + dataset: the dataset name to identify the current set of benchmarks results on database. + distributed: A boolean value indicating if the forecasting procedure will be distributed in a dispy cluster. . The default is False + file: file path to save the results. The default is benchmarks.db. inc: a float on interval [0,1] indicating the percentage of the windowsize to move the window - models: a list with prebuilt FTS objects. The default is None. methods: a list with FTS class names. The default depends on the forecasting type and contains the list of all FTS methods. + models: a list with prebuilt FTS objects. The default is None. + nodes: a list with the dispy cluster nodes addresses. The default is [127.0.0.1]. + orders: a list with orders of the models (for high order models). The default is [1,2,3]. + partitions: a list with the numbers of partitions on the Universe of Discourse. The default is [10]. partitioners_models: a list with prebuilt Universe of Discourse partitioners objects. The default is None. partitioners_methods: a list with Universe of Discourse partitioners class names. The default is [partitioners.Grid.GridPartitioner]. - partitions: a list with the numbers of partitions on the Universe of Discourse. The default is [10]. - orders: a list with orders of the models (for high order models). The default is [1,2,3]. - type: the forecasting type, one of these values: point(default), interval or distribution. . The default is point. - steps_ahead: a list with the forecasting horizons, i. e., the number of steps ahead to forecast. The default is 1. - start: in the multi step forecasting, the index of the data where to start forecasting. The default is 0. - transformation: data transformation . The default is None. - indexer: seasonal indexer. . The default is None. progress: If true a progress bar will be displayed during the benchmarks. The default is False. - distributed: A boolean value indicating if the forecasting procedure will be distributed in a dispy cluster. . The default is False - nodes: a list with the dispy cluster nodes addresses. The default is [127.0.0.1]. - benchmark_methods: a list with Non FTS models to benchmark. The default is None. - benchmark_methods_parameters: a list with Non FTS models parameters. . The default is None. - save: save results. The default is False. - file: file path to save the results. The default is None. - sintetic: if true only the average and standard deviation of the results. The de fault is False. - - :return: DataFrame with the benchmark results + start: in the multi step forecasting, the index of the data where to start forecasting. The default is 0. + steps_ahead: a list with the forecasting horizons, i. e., the number of steps ahead to forecast. The default is 1. + tag: a name to identify the current set of benchmarks results on database. + type: the forecasting type, one of these values: point(default), interval or distribution. The default is point. + transformations: a list with data transformations do apply . The default is [None]. """ tag = __pop('tag', None, kwargs) dataset = __pop('dataset', None, kwargs) distributed = __pop('distributed', False, kwargs) - save = __pop('save', False, kwargs) transformations = kwargs.get('transformations', [None]) progress = kwargs.get('progress', None) diff --git a/pyFTS/common/Transformations.py b/pyFTS/common/Transformations.py index 6291daf..6693614 100644 --- a/pyFTS/common/Transformations.py +++ b/pyFTS/common/Transformations.py @@ -71,6 +71,7 @@ class Differential(Transformation): def inverse(self, data, param, **kwargs): type = kwargs.get("type","point") + steps_ahead = kwargs.get("steps_ahead", 1) if isinstance(data, (np.ndarray, np.generic)): data = data.tolist() @@ -83,14 +84,30 @@ class Differential(Transformation): # print(n) # print(len(param)) - if type == "point": - inc = [data[t] + param[t] for t in np.arange(0, n)] - elif type == "interval": - inc = [[data[t][0] + param[t], data[t][1] + param[t]] for t in np.arange(0, n)] - elif type == "distribution": - for t in np.arange(0, n): - data[t].differential_offset(param[t]) - inc = data + if steps_ahead == 1: + if type == "point": + inc = [data[t] + param[t] for t in np.arange(0, n)] + elif type == "interval": + inc = [[data[t][0] + param[t], data[t][1] + param[t]] for t in np.arange(0, n)] + elif type == "distribution": + for t in np.arange(0, n): + data[t].differential_offset(param[t]) + inc = data + else: + if type == "point": + inc = [data[0] + param[0]] + for t in np.arange(1, steps_ahead): + inc.append(data[t] + inc[t-1]) + elif type == "interval": + inc = [[data[0][0] + param[0], data[0][1] + param[0]]] + for t in np.arange(1, steps_ahead): + inc.append([data[t][0] + np.nanmean(inc[t-1]), data[t][1] + np.nanmean(inc[t-1])]) + elif type == "distribution": + data[0].differential_offset(param[0]) + for t in np.arange(1, steps_ahead): + ex = data[t-1].expected_value() + data[t].differential_offset(ex) + inc = data if n == 1: return inc[0] diff --git a/pyFTS/common/fts.py b/pyFTS/common/fts.py index 3e3b435..8ed15d0 100644 --- a/pyFTS/common/fts.py +++ b/pyFTS/common/fts.py @@ -114,9 +114,9 @@ class FTS(object): ret = Util.distributed_predict(self, kwargs, nodes, ndata, num_batches) - if type != 'distribution' and not self.is_multivariate: - interval = True if type == 'interval' else False - ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]], interval=interval) + if not self.is_multivariate: + kwargs['type'] = type + ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]], **kwargs) return ret diff --git a/pyFTS/models/chen.py b/pyFTS/models/chen.py index 207a15e..f157428 100644 --- a/pyFTS/models/chen.py +++ b/pyFTS/models/chen.py @@ -50,7 +50,10 @@ class ConventionalFTS(fts.FTS): def train(self, data, **kwargs): if kwargs.get('sets', None) is not None: self.sets = kwargs.get('sets', None) - tmpdata = FuzzySet.fuzzyfy_series_old(data, self.sets) + else: + self.sets = self.partitioner.sets + + tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum') flrs = FLR.generate_non_recurrent_flrs(tmpdata) self.generate_flrg(flrs) diff --git a/pyFTS/models/hofts.py b/pyFTS/models/hofts.py index 9e1e5ff..bdc565d 100644 --- a/pyFTS/models/hofts.py +++ b/pyFTS/models/hofts.py @@ -97,6 +97,8 @@ class HighOrderFTS(fts.FTS): if kwargs.get('sets', None) is not None: self.sets = kwargs.get('sets', None) + else: + self.sets = self.partitioner.sets self.generate_flrg(data) diff --git a/pyFTS/models/ismailefendi.py b/pyFTS/models/ismailefendi.py index ecd5122..130be84 100644 --- a/pyFTS/models/ismailefendi.py +++ b/pyFTS/models/ismailefendi.py @@ -63,8 +63,10 @@ class ImprovedWeightedFTS(fts.FTS): def train(self, ndata, **kwargs): if kwargs.get('sets', None) is not None: self.sets = kwargs.get('sets', None) + else: + self.sets = self.partitioner.sets - tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method="maximum") + tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method='maximum') flrs = FLR.generate_recurrent_flrs(tmpdata) self.generate_flrg(flrs) diff --git a/pyFTS/models/sadaei.py b/pyFTS/models/sadaei.py index 8803e41..22e7188 100644 --- a/pyFTS/models/sadaei.py +++ b/pyFTS/models/sadaei.py @@ -69,6 +69,9 @@ class ExponentialyWeightedFTS(fts.FTS): self.c = kwargs.get('parameters', default_c) if kwargs.get('sets', None) is not None: self.sets = kwargs.get('sets', None) + else: + self.sets = self.partitioner.sets + tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum') flrs = FLR.generate_recurrent_flrs(tmpdata) self.generate_flrg(flrs, self.c) @@ -78,7 +81,7 @@ class ExponentialyWeightedFTS(fts.FTS): ordered_sets = FuzzySet.set_ordered(self.sets) - data = np.array(data) + data = np.array(ndata) l = len(ndata) diff --git a/pyFTS/models/song.py b/pyFTS/models/song.py index 136934b..326265c 100644 --- a/pyFTS/models/song.py +++ b/pyFTS/models/song.py @@ -39,7 +39,7 @@ class ConventionalFTS(fts.FTS): def operation_matrix(self, flrs): l = len(self.sets) - if self.R is None: + if self.R is None or len(self.R) == 0 : self.R = np.zeros((l, l)) for k in flrs: mm = self.flr_membership_matrix(k) @@ -51,6 +51,8 @@ class ConventionalFTS(fts.FTS): def train(self, data, **kwargs): if kwargs.get('sets', None) is not None: self.sets = kwargs.get('sets', None) + else: + self.sets = self.partitioner.sets tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum') flrs = FLR.generate_non_recurrent_flrs(tmpdata) diff --git a/pyFTS/models/yu.py b/pyFTS/models/yu.py index 349a571..427d642 100644 --- a/pyFTS/models/yu.py +++ b/pyFTS/models/yu.py @@ -60,8 +60,10 @@ class WeightedFTS(fts.FTS): def train(self, ndata, **kwargs): if kwargs.get('sets', None) is not None: self.sets = kwargs.get('sets', None) + else: + self.sets = self.partitioner.sets - tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets) + tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method='maximum') flrs = FLR.generate_recurrent_flrs(tmpdata) self.generate_FLRG(flrs) diff --git a/pyFTS/tests/general.py b/pyFTS/tests/general.py index ba45a18..062e4bc 100644 --- a/pyFTS/tests/general.py +++ b/pyFTS/tests/general.py @@ -15,33 +15,37 @@ from pyFTS.data import TAIEX dataset = TAIEX.get_data() -from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil +from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures from pyFTS.models import pwfts - +''' from pyFTS.partitioners import Grid, Util as pUtil partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff) model = pwfts.ProbabilisticWeightedFTS('',partitioner=partitioner) -#model.append_transformation(tdiff) +model.append_transformation(tdiff) model.fit(dataset[:800]) -print(model.predict(dataset[800:1000], type='interval')) - - +print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead=7)) +#tmp = model.predict(dataset[800:1000], type='distribution', steps_ahead=7) +#for tmp2 in tmp: +# print(tmp2) ''' -bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, methods=[pwfts.ProbabilisticWeightedFTS], + +#''' +bchmk.sliding_window_benchmarks(dataset[:1000], 1000, train=0.8, inc=0.2, + #methods=[pwfts.ProbabilisticWeightedFTS], benchmark_models=False, #transformations=[tdiff], - orders=[1, 2, 3], - partitions=np.arange(10, 100, 5), - progress=False, type='distribution', + orders=[1], #[1, 2, 3], + partitions=[20], #np.arange(10, 100, 5), + progress=True, type='point', #steps_ahead=[1,4,7,10], #steps_ahead=[1] - distributed=True, nodes=['192.168.0.110', '192.168.0.100','192.168.0.106'], - file="benchmarks.db", dataset="TAIEX", tag="partitioning") + #distributed=True, nodes=['192.168.0.110', '192.168.0.105','192.168.0.106'], + file="benchmarks.tmp", dataset="TAIEX", tag="comparisons") #save=True, file="tmp.db") -''' +#''' ''' dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';') print(bUtil.analytic_tabular_dataframe(dat))