From 33dbeb8965fefb846aa78f8273c0f244203f8122 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= <petronio.candido@gmail.com>
Date: Tue, 24 Apr 2018 12:57:40 -0300
Subject: [PATCH] Minor bugfixes on pwfts.models

---
 pyFTS/benchmarks/Measures.py    | 23 ++++++++++++++----
 pyFTS/benchmarks/benchmarks.py  | 41 ++++++++++++++++-----------------
 pyFTS/common/Transformations.py | 33 +++++++++++++++++++-------
 pyFTS/common/fts.py             |  6 ++---
 pyFTS/models/chen.py            |  5 +++-
 pyFTS/models/hofts.py           |  2 ++
 pyFTS/models/ismailefendi.py    |  4 +++-
 pyFTS/models/sadaei.py          |  5 +++-
 pyFTS/models/song.py            |  4 +++-
 pyFTS/models/yu.py              |  4 +++-
 pyFTS/tests/general.py          | 30 +++++++++++++-----------
 11 files changed, 102 insertions(+), 55 deletions(-)

diff --git a/pyFTS/benchmarks/Measures.py b/pyFTS/benchmarks/Measures.py
index 3ae8685..b8dae1a 100644
--- a/pyFTS/benchmarks/Measures.py
+++ b/pyFTS/benchmarks/Measures.py
@@ -215,6 +215,17 @@ def pinball_mean(tau, targets, forecasts):
         print(ex)
 
 
+def brier_score(targets, densities):
+    '''Brier (1950). "Verification of Forecasts Expressed in Terms of Probability". Monthly Weather Review. 78: 1–3. '''
+    ret = []
+    for ct, d in enumerate(densities):
+        v = d.bin_index.find_ge(targets[ct])
+        score = sum([d.distribution[k] ** 2 for k in d.bins if k != v])
+        score += (d.distribution[v] - 1) ** 2
+        ret.append(score)
+    return sum(ret)/len(ret)
+
+
 def pmf_to_cdf(density):
     ret = []
     for row in density.index:
@@ -236,7 +247,6 @@ def heavyside_cdf(bins, targets):
     df = pd.DataFrame(ret, columns=bins)
     return df
 
-
 def crps(targets, densities):
     '''
     Continuous Ranked Probability Score
@@ -277,6 +287,7 @@ def get_point_statistics(data, model, **kwargs):
     '''
 
     steps_ahead = kwargs.get('steps_ahead',1)
+    kwargs['type'] = 'point'
 
     indexer = kwargs.get('indexer', None)
 
@@ -301,7 +312,7 @@ def get_point_statistics(data, model, **kwargs):
         nforecasts = []
         for k in np.arange(model.order, len(ndata)-steps_ahead,steps_ahead_sampler):
             sample = ndata[k - model.order: k]
-            tmp = model.forecast_ahead(sample, steps_ahead, **kwargs)
+            tmp = model.predict(sample, **kwargs)
             nforecasts.append(tmp[-1])
 
         start = model.order + steps_ahead -1
@@ -323,6 +334,7 @@ def get_interval_statistics(data, model, **kwargs):
     '''
 
     steps_ahead = kwargs.get('steps_ahead', 1)
+    kwargs['type'] = 'interval'
 
     ret = list()
 
@@ -339,7 +351,7 @@ def get_interval_statistics(data, model, **kwargs):
         forecasts = []
         for k in np.arange(model.order, len(data) - steps_ahead):
             sample = data[k - model.order: k]
-            tmp = model.predict(sample, steps_ahead, **kwargs)
+            tmp = model.predict(sample, **kwargs)
             forecasts.append(tmp[-1])
 
         start = model.order + steps_ahead -1
@@ -362,12 +374,13 @@ def get_distribution_statistics(data, model, **kwargs):
     :return: a list with the CRPS and execution time
     '''
     steps_ahead = kwargs.get('steps_ahead', 1)
+    kwargs['type'] = 'distribution'
 
     ret = list()
 
     if steps_ahead == 1:
         _s1 = time.time()
-        forecasts = model.forecast_distribution(data, **kwargs)
+        forecasts = model.predict(data, **kwargs)
         _e1 = time.time()
         ret.append(round(crps(data, forecasts), 3))
         ret.append(round(_e1 - _s1, 3))
@@ -377,7 +390,7 @@ def get_distribution_statistics(data, model, **kwargs):
         _s1 = time.time()
         for k in np.arange(model.order, len(data) - steps_ahead, skip):
             sample = data[k - model.order: k]
-            tmp = model.forecast_ahead_distribution(sample, steps_ahead, **kwargs)
+            tmp = model.predict(sample, **kwargs)
             forecasts.append(tmp[-1])
         _e1 = time.time()
 
diff --git a/pyFTS/benchmarks/benchmarks.py b/pyFTS/benchmarks/benchmarks.py
index 4b7d154..02f39a3 100644
--- a/pyFTS/benchmarks/benchmarks.py
+++ b/pyFTS/benchmarks/benchmarks.py
@@ -56,10 +56,13 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
     partitions and partitioning method will be created a partitioner model. And for each partitioner, order,
     steps ahead and FTS method a foreasting model will be trained.
 
-    Then all trained models are benchmarked on the test data and the metrics are stored in a datafame for
-    posterior analysis.
+    Then all trained models are benchmarked on the test data and the metrics are stored on a sqlite3 database
+    (identified by the 'file' parameter) for posterior analysis.
 
-    The number of experiments is determined by the windowsize and inc.
+    All these process can be distributed on a dispy cluster, setting the atributed 'distributed' to true and
+    informing the list of dispy nodes on 'nodes' parameter.
+
+    The number of experiments is determined by 'windowsize' and 'inc' parameters.
 
     :param data: test data
     :param windowsize: size of sliding window
@@ -67,35 +70,31 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
     :param kwargs: dict, optional arguments
 
     :keyword
+        benchmark_methods:  a list with Non FTS models to benchmark. The default is None.
+        benchmark_methods_parameters:  a list with Non FTS models parameters. The default is None.
+        dataset: the dataset name to identify the current set of benchmarks results on database.
+        distributed: A boolean value indicating if the forecasting procedure will be distributed in a dispy cluster. . The default is False
+        file: file path to save the results. The default is benchmarks.db.
         inc: a float on interval [0,1] indicating the percentage of the windowsize to move the window
-        models: a list with prebuilt FTS objects. The default is None.
         methods: a list with FTS class names. The default depends on the forecasting type and contains the list of all FTS methods.
+        models: a list with prebuilt FTS objects. The default is None.
+        nodes: a list with the dispy cluster nodes addresses. The default is [127.0.0.1].
+        orders: a list with orders of the models (for high order models). The default is [1,2,3].
+        partitions: a list with the numbers of partitions on the Universe of Discourse. The default is [10].
         partitioners_models: a list with prebuilt Universe of Discourse partitioners objects. The default is None.
         partitioners_methods: a list with Universe of Discourse partitioners class names. The default is [partitioners.Grid.GridPartitioner].
-        partitions: a list with the numbers of partitions on the Universe of Discourse. The default is [10].
-        orders: a list with orders of the models (for high order models). The default is [1,2,3].
-        type: the forecasting type, one of these values: point(default), interval or distribution. . The default is point.
-        steps_ahead: a list with  the forecasting horizons, i. e., the number of steps ahead to forecast. The default is 1.
-        start: in the multi step forecasting, the index of the data where to start forecasting. The default is 0.
-        transformation: data transformation . The default is None.
-        indexer: seasonal indexer. . The default is None.
         progress: If true a progress bar will be displayed during the benchmarks. The default is False.
-        distributed: A boolean value indicating if the forecasting procedure will be distributed in a dispy cluster. . The default is False
-        nodes: a list with the dispy cluster nodes addresses. The default is [127.0.0.1].
-        benchmark_methods:  a list with Non FTS models to benchmark. The default is None.
-        benchmark_methods_parameters:  a list with Non FTS models parameters. . The default is None.
-        save: save results. The default is False.
-        file: file path to save the results. The default is None.
-        sintetic: if true only the average and standard deviation of the results. The de fault is False.
-
-    :return: DataFrame with the benchmark results
+        start: in the multi step forecasting, the index of the data where to start forecasting. The default is 0.
+        steps_ahead: a list with  the forecasting horizons, i. e., the number of steps ahead to forecast. The default is 1.
+        tag: a name to identify the current set of benchmarks results on database.
+        type: the forecasting type, one of these values: point(default), interval or distribution. The default is point.
+        transformations: a list with data transformations do apply . The default is [None].
     """
 
     tag = __pop('tag', None, kwargs)
     dataset = __pop('dataset', None, kwargs)
 
     distributed = __pop('distributed', False, kwargs)
-    save = __pop('save', False, kwargs)
 
     transformations = kwargs.get('transformations', [None])
     progress = kwargs.get('progress', None)
diff --git a/pyFTS/common/Transformations.py b/pyFTS/common/Transformations.py
index 6291daf..6693614 100644
--- a/pyFTS/common/Transformations.py
+++ b/pyFTS/common/Transformations.py
@@ -71,6 +71,7 @@ class Differential(Transformation):
     def inverse(self, data, param, **kwargs):
 
         type = kwargs.get("type","point")
+        steps_ahead = kwargs.get("steps_ahead", 1)
 
         if isinstance(data, (np.ndarray, np.generic)):
             data = data.tolist()
@@ -83,14 +84,30 @@ class Differential(Transformation):
 #        print(n)
 #        print(len(param))
 
-        if type == "point":
-            inc = [data[t] + param[t] for t in np.arange(0, n)]
-        elif type == "interval":
-            inc = [[data[t][0] + param[t], data[t][1] + param[t]] for t in np.arange(0, n)]
-        elif type == "distribution":
-            for t in np.arange(0, n):
-                data[t].differential_offset(param[t])
-            inc = data
+        if steps_ahead == 1:
+            if type == "point":
+                inc = [data[t] + param[t] for t in np.arange(0, n)]
+            elif type == "interval":
+                inc = [[data[t][0] + param[t], data[t][1] + param[t]] for t in np.arange(0, n)]
+            elif type == "distribution":
+                for t in np.arange(0, n):
+                    data[t].differential_offset(param[t])
+                inc = data
+        else:
+            if type == "point":
+                inc = [data[0] + param[0]]
+                for t in np.arange(1, steps_ahead):
+                    inc.append(data[t] + inc[t-1])
+            elif type == "interval":
+                inc = [[data[0][0] + param[0], data[0][1] + param[0]]]
+                for t in np.arange(1, steps_ahead):
+                    inc.append([data[t][0] + np.nanmean(inc[t-1]), data[t][1] + np.nanmean(inc[t-1])])
+            elif type == "distribution":
+                data[0].differential_offset(param[0])
+                for t in np.arange(1, steps_ahead):
+                    ex = data[t-1].expected_value()
+                    data[t].differential_offset(ex)
+                inc = data
 
         if n == 1:
             return inc[0]
diff --git a/pyFTS/common/fts.py b/pyFTS/common/fts.py
index 3e3b435..8ed15d0 100644
--- a/pyFTS/common/fts.py
+++ b/pyFTS/common/fts.py
@@ -114,9 +114,9 @@ class FTS(object):
 
             ret = Util.distributed_predict(self, kwargs, nodes, ndata, num_batches)
 
-        if type != 'distribution' and not self.is_multivariate:
-            interval = True if type == 'interval' else False
-            ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]], interval=interval)
+        if not self.is_multivariate:
+            kwargs['type'] = type
+            ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]], **kwargs)
 
         return ret
 
diff --git a/pyFTS/models/chen.py b/pyFTS/models/chen.py
index 207a15e..f157428 100644
--- a/pyFTS/models/chen.py
+++ b/pyFTS/models/chen.py
@@ -50,7 +50,10 @@ class ConventionalFTS(fts.FTS):
     def train(self, data, **kwargs):
         if kwargs.get('sets', None) is not None:
             self.sets = kwargs.get('sets', None)
-        tmpdata = FuzzySet.fuzzyfy_series_old(data, self.sets)
+        else:
+            self.sets = self.partitioner.sets
+
+        tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum')
         flrs = FLR.generate_non_recurrent_flrs(tmpdata)
         self.generate_flrg(flrs)
 
diff --git a/pyFTS/models/hofts.py b/pyFTS/models/hofts.py
index 9e1e5ff..bdc565d 100644
--- a/pyFTS/models/hofts.py
+++ b/pyFTS/models/hofts.py
@@ -97,6 +97,8 @@ class HighOrderFTS(fts.FTS):
 
         if kwargs.get('sets', None) is not None:
             self.sets = kwargs.get('sets', None)
+        else:
+            self.sets = self.partitioner.sets
 
         self.generate_flrg(data)
 
diff --git a/pyFTS/models/ismailefendi.py b/pyFTS/models/ismailefendi.py
index ecd5122..130be84 100644
--- a/pyFTS/models/ismailefendi.py
+++ b/pyFTS/models/ismailefendi.py
@@ -63,8 +63,10 @@ class ImprovedWeightedFTS(fts.FTS):
     def train(self, ndata, **kwargs):
         if kwargs.get('sets', None) is not None:
             self.sets = kwargs.get('sets', None)
+        else:
+            self.sets = self.partitioner.sets
 
-        tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method="maximum")
+        tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method='maximum')
         flrs = FLR.generate_recurrent_flrs(tmpdata)
         self.generate_flrg(flrs)
 
diff --git a/pyFTS/models/sadaei.py b/pyFTS/models/sadaei.py
index 8803e41..22e7188 100644
--- a/pyFTS/models/sadaei.py
+++ b/pyFTS/models/sadaei.py
@@ -69,6 +69,9 @@ class ExponentialyWeightedFTS(fts.FTS):
         self.c = kwargs.get('parameters', default_c)
         if kwargs.get('sets', None) is not None:
             self.sets = kwargs.get('sets', None)
+        else:
+            self.sets = self.partitioner.sets
+
         tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum')
         flrs = FLR.generate_recurrent_flrs(tmpdata)
         self.generate_flrg(flrs, self.c)
@@ -78,7 +81,7 @@ class ExponentialyWeightedFTS(fts.FTS):
 
         ordered_sets = FuzzySet.set_ordered(self.sets)
 
-        data = np.array(data)
+        data = np.array(ndata)
 
         l = len(ndata)
 
diff --git a/pyFTS/models/song.py b/pyFTS/models/song.py
index 136934b..326265c 100644
--- a/pyFTS/models/song.py
+++ b/pyFTS/models/song.py
@@ -39,7 +39,7 @@ class ConventionalFTS(fts.FTS):
 
     def operation_matrix(self, flrs):
         l = len(self.sets)
-        if self.R is None:
+        if self.R is None or len(self.R) == 0 :
             self.R = np.zeros((l, l))
         for k in flrs:
             mm = self.flr_membership_matrix(k)
@@ -51,6 +51,8 @@ class ConventionalFTS(fts.FTS):
     def train(self, data, **kwargs):
         if kwargs.get('sets', None) is not None:
             self.sets = kwargs.get('sets', None)
+        else:
+            self.sets = self.partitioner.sets
 
         tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum')
         flrs = FLR.generate_non_recurrent_flrs(tmpdata)
diff --git a/pyFTS/models/yu.py b/pyFTS/models/yu.py
index 349a571..427d642 100644
--- a/pyFTS/models/yu.py
+++ b/pyFTS/models/yu.py
@@ -60,8 +60,10 @@ class WeightedFTS(fts.FTS):
     def train(self, ndata, **kwargs):
         if kwargs.get('sets', None) is not None:
             self.sets = kwargs.get('sets', None)
+        else:
+            self.sets = self.partitioner.sets
 
-        tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets)
+        tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method='maximum')
         flrs = FLR.generate_recurrent_flrs(tmpdata)
         self.generate_FLRG(flrs)
 
diff --git a/pyFTS/tests/general.py b/pyFTS/tests/general.py
index ba45a18..062e4bc 100644
--- a/pyFTS/tests/general.py
+++ b/pyFTS/tests/general.py
@@ -15,33 +15,37 @@ from pyFTS.data import TAIEX
 
 dataset = TAIEX.get_data()
 
-from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil
+from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures
 
 from pyFTS.models import pwfts
-
+'''
 from pyFTS.partitioners import Grid, Util as pUtil
 partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff)
 
 model = pwfts.ProbabilisticWeightedFTS('',partitioner=partitioner)
-#model.append_transformation(tdiff)
+model.append_transformation(tdiff)
 model.fit(dataset[:800])
-print(model.predict(dataset[800:1000], type='interval'))
-
-
+print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead=7))
+#tmp = model.predict(dataset[800:1000], type='distribution', steps_ahead=7)
+#for tmp2 in tmp:
+#    print(tmp2)
 '''
-bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, methods=[pwfts.ProbabilisticWeightedFTS],
+
+#'''
+bchmk.sliding_window_benchmarks(dataset[:1000], 1000, train=0.8, inc=0.2,
+                                #methods=[pwfts.ProbabilisticWeightedFTS],
                                 benchmark_models=False,
                                 #transformations=[tdiff],
-                                orders=[1, 2, 3],
-                                partitions=np.arange(10, 100, 5),
-                                progress=False, type='distribution',
+                                orders=[1], #[1, 2, 3],
+                                partitions=[20], #np.arange(10, 100, 5),
+                                progress=True, type='point',
                                 #steps_ahead=[1,4,7,10], #steps_ahead=[1]
-                                distributed=True, nodes=['192.168.0.110', '192.168.0.100','192.168.0.106'],
-                                file="benchmarks.db", dataset="TAIEX", tag="partitioning")
+                                #distributed=True, nodes=['192.168.0.110', '192.168.0.105','192.168.0.106'],
+                                file="benchmarks.tmp", dataset="TAIEX", tag="comparisons")
                                 #save=True, file="tmp.db")
 
 
-'''
+#'''
 '''
 dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';')
 print(bUtil.analytic_tabular_dataframe(dat))