Minor bugfixes on pwfts.models

2018-04-24 12:57:40 -03:00 · 2018-04-24 12:57:40 -03:00 · 33dbeb8965
commit 33dbeb8965
parent f3c6eda2ec
11 changed files with 102 additions and 55 deletions
--- a/pyFTS/benchmarks/Measures.py
+++ b/pyFTS/benchmarks/Measures.py
@ -215,6 +215,17 @@ def pinball_mean(tau, targets, forecasts):
        print(ex)
 def brier_score(targets, densities):
    '''Brier (1950). "Verification of Forecasts Expressed in Terms of Probability". Monthly Weather Review. 78: 1–3. '''
    ret = []
    for ct, d in enumerate(densities):
        v = d.bin_index.find_ge(targets[ct])
        score = sum([d.distribution[k] ** 2 for k in d.bins if k != v])
        score += (d.distribution[v] - 1) ** 2
        ret.append(score)
    return sum(ret)/len(ret)
 def pmf_to_cdf(density):
    ret = []
    for row in density.index:
@ -236,7 +247,6 @@ def heavyside_cdf(bins, targets):
    df = pd.DataFrame(ret, columns=bins)
    return df
 def crps(targets, densities):
    '''
    Continuous Ranked Probability Score
@ -277,6 +287,7 @@ def get_point_statistics(data, model, **kwargs):
    '''
    steps_ahead = kwargs.get('steps_ahead',1)
    kwargs['type'] = 'point'
    indexer = kwargs.get('indexer', None)
@ -301,7 +312,7 @@ def get_point_statistics(data, model, **kwargs):
        nforecasts = []
        for k in np.arange(model.order, len(ndata)-steps_ahead,steps_ahead_sampler):
            sample = ndata[k - model.order: k]
-            tmp = model.forecast_ahead(sample, steps_ahead, **kwargs)
+            tmp = model.predict(sample, **kwargs)
            nforecasts.append(tmp[-1])
        start = model.order + steps_ahead -1
@ -323,6 +334,7 @@ def get_interval_statistics(data, model, **kwargs):
    '''
    steps_ahead = kwargs.get('steps_ahead', 1)
    kwargs['type'] = 'interval'
    ret = list()
@ -339,7 +351,7 @@ def get_interval_statistics(data, model, **kwargs):
        forecasts = []
        for k in np.arange(model.order, len(data) - steps_ahead):
            sample = data[k - model.order: k]
-            tmp = model.predict(sample, steps_ahead, **kwargs)
+            tmp = model.predict(sample, **kwargs)
            forecasts.append(tmp[-1])
        start = model.order + steps_ahead -1
@ -362,12 +374,13 @@ def get_distribution_statistics(data, model, **kwargs):
    :return: a list with the CRPS and execution time
    '''
    steps_ahead = kwargs.get('steps_ahead', 1)
    kwargs['type'] = 'distribution'
    ret = list()
    if steps_ahead == 1:
        _s1 = time.time()
-        forecasts = model.forecast_distribution(data, **kwargs)
+        forecasts = model.predict(data, **kwargs)
        _e1 = time.time()
        ret.append(round(crps(data, forecasts), 3))
        ret.append(round(_e1 - _s1, 3))
@ -377,7 +390,7 @@ def get_distribution_statistics(data, model, **kwargs):
        _s1 = time.time()
        for k in np.arange(model.order, len(data) - steps_ahead, skip):
            sample = data[k - model.order: k]
-            tmp = model.forecast_ahead_distribution(sample, steps_ahead, **kwargs)
+            tmp = model.predict(sample, **kwargs)
            forecasts.append(tmp[-1])
        _e1 = time.time()
--- a/pyFTS/benchmarks/benchmarks.py
+++ b/pyFTS/benchmarks/benchmarks.py
@ -56,10 +56,13 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
    partitions and partitioning method will be created a partitioner model. And for each partitioner, order,
    steps ahead and FTS method a foreasting model will be trained.
-    Then all trained models are benchmarked on the test data and the metrics are stored in a datafame for
+    Then all trained models are benchmarked on the test data and the metrics are stored on a sqlite3 database
-    posterior analysis.
+    (identified by the 'file' parameter) for posterior analysis.
-    The number of experiments is determined by the windowsize and inc.
+    All these process can be distributed on a dispy cluster, setting the atributed 'distributed' to true and
    informing the list of dispy nodes on 'nodes' parameter.
    The number of experiments is determined by 'windowsize' and 'inc' parameters.
    :param data: test data
    :param windowsize: size of sliding window
@ -67,35 +70,31 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
    :param kwargs: dict, optional arguments
    :keyword
        benchmark_methods:  a list with Non FTS models to benchmark. The default is None.
        benchmark_methods_parameters:  a list with Non FTS models parameters. The default is None.
        dataset: the dataset name to identify the current set of benchmarks results on database.
        distributed: A boolean value indicating if the forecasting procedure will be distributed in a dispy cluster. . The default is False
        file: file path to save the results. The default is benchmarks.db.
        inc: a float on interval [0,1] indicating the percentage of the windowsize to move the window
        models: a list with prebuilt FTS objects. The default is None.
        methods: a list with FTS class names. The default depends on the forecasting type and contains the list of all FTS methods.
        models: a list with prebuilt FTS objects. The default is None.
        nodes: a list with the dispy cluster nodes addresses. The default is [127.0.0.1].
        orders: a list with orders of the models (for high order models). The default is [1,2,3].
        partitions: a list with the numbers of partitions on the Universe of Discourse. The default is [10].
        partitioners_models: a list with prebuilt Universe of Discourse partitioners objects. The default is None.
        partitioners_methods: a list with Universe of Discourse partitioners class names. The default is [partitioners.Grid.GridPartitioner].
        partitions: a list with the numbers of partitions on the Universe of Discourse. The default is [10].
        orders: a list with orders of the models (for high order models). The default is [1,2,3].
        type: the forecasting type, one of these values: point(default), interval or distribution. . The default is point.
        steps_ahead: a list with  the forecasting horizons, i. e., the number of steps ahead to forecast. The default is 1.
        start: in the multi step forecasting, the index of the data where to start forecasting. The default is 0.
        transformation: data transformation . The default is None.
        indexer: seasonal indexer. . The default is None.
        progress: If true a progress bar will be displayed during the benchmarks. The default is False.
-        distributed: A boolean value indicating if the forecasting procedure will be distributed in a dispy cluster. . The default is False
+        start: in the multi step forecasting, the index of the data where to start forecasting. The default is 0.
-        nodes: a list with the dispy cluster nodes addresses. The default is [127.0.0.1].
+        steps_ahead: a list with  the forecasting horizons, i. e., the number of steps ahead to forecast. The default is 1.
-        benchmark_methods:  a list with Non FTS models to benchmark. The default is None.
+        tag: a name to identify the current set of benchmarks results on database.
-        benchmark_methods_parameters:  a list with Non FTS models parameters. . The default is None.
+        type: the forecasting type, one of these values: point(default), interval or distribution. The default is point.
-        save: save results. The default is False.
+        transformations: a list with data transformations do apply . The default is [None].
        file: file path to save the results. The default is None.
        sintetic: if true only the average and standard deviation of the results. The de fault is False.
    :return: DataFrame with the benchmark results
    """
    tag = __pop('tag', None, kwargs)
    dataset = __pop('dataset', None, kwargs)
    distributed = __pop('distributed', False, kwargs)
    save = __pop('save', False, kwargs)
    transformations = kwargs.get('transformations', [None])
    progress = kwargs.get('progress', None)
--- a/pyFTS/common/Transformations.py
+++ b/pyFTS/common/Transformations.py
@ -71,6 +71,7 @@ class Differential(Transformation):
    def inverse(self, data, param, **kwargs):
        type = kwargs.get("type","point")
        steps_ahead = kwargs.get("steps_ahead", 1)
        if isinstance(data, (np.ndarray, np.generic)):
            data = data.tolist()
@ -83,6 +84,7 @@ class Differential(Transformation):
 #        print(n)
 #        print(len(param))
        if steps_ahead == 1:
            if type == "point":
                inc = [data[t] + param[t] for t in np.arange(0, n)]
            elif type == "interval":
@ -91,6 +93,21 @@ class Differential(Transformation):
                for t in np.arange(0, n):
                    data[t].differential_offset(param[t])
                inc = data
        else:
            if type == "point":
                inc = [data[0] + param[0]]
                for t in np.arange(1, steps_ahead):
                    inc.append(data[t] + inc[t-1])
            elif type == "interval":
                inc = [[data[0][0] + param[0], data[0][1] + param[0]]]
                for t in np.arange(1, steps_ahead):
                    inc.append([data[t][0] + np.nanmean(inc[t-1]), data[t][1] + np.nanmean(inc[t-1])])
            elif type == "distribution":
                data[0].differential_offset(param[0])
                for t in np.arange(1, steps_ahead):
                    ex = data[t-1].expected_value()
                    data[t].differential_offset(ex)
                inc = data
        if n == 1:
            return inc[0]
--- a/pyFTS/common/fts.py
+++ b/pyFTS/common/fts.py
@ -114,9 +114,9 @@ class FTS(object):
            ret = Util.distributed_predict(self, kwargs, nodes, ndata, num_batches)
-        if type != 'distribution' and not self.is_multivariate:
+        if not self.is_multivariate:
-            interval = True if type == 'interval' else False
+            kwargs['type'] = type
-            ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]], interval=interval)
+            ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]], **kwargs)
        return ret
--- a/pyFTS/models/chen.py
+++ b/pyFTS/models/chen.py
@ -50,7 +50,10 @@ class ConventionalFTS(fts.FTS):
    def train(self, data, **kwargs):
        if kwargs.get('sets', None) is not None:
            self.sets = kwargs.get('sets', None)
-        tmpdata = FuzzySet.fuzzyfy_series_old(data, self.sets)
+        else:
            self.sets = self.partitioner.sets
        tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum')
        flrs = FLR.generate_non_recurrent_flrs(tmpdata)
        self.generate_flrg(flrs)
--- a/pyFTS/models/hofts.py
+++ b/pyFTS/models/hofts.py
@ -97,6 +97,8 @@ class HighOrderFTS(fts.FTS):
        if kwargs.get('sets', None) is not None:
            self.sets = kwargs.get('sets', None)
        else:
            self.sets = self.partitioner.sets
        self.generate_flrg(data)
--- a/pyFTS/models/ismailefendi.py
+++ b/pyFTS/models/ismailefendi.py
@ -63,8 +63,10 @@ class ImprovedWeightedFTS(fts.FTS):
    def train(self, ndata, **kwargs):
        if kwargs.get('sets', None) is not None:
            self.sets = kwargs.get('sets', None)
        else:
            self.sets = self.partitioner.sets
-        tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method="maximum")
+        tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method='maximum')
        flrs = FLR.generate_recurrent_flrs(tmpdata)
        self.generate_flrg(flrs)
--- a/pyFTS/models/sadaei.py
+++ b/pyFTS/models/sadaei.py
@ -69,6 +69,9 @@ class ExponentialyWeightedFTS(fts.FTS):
        self.c = kwargs.get('parameters', default_c)
        if kwargs.get('sets', None) is not None:
            self.sets = kwargs.get('sets', None)
        else:
            self.sets = self.partitioner.sets
        tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum')
        flrs = FLR.generate_recurrent_flrs(tmpdata)
        self.generate_flrg(flrs, self.c)
@ -78,7 +81,7 @@ class ExponentialyWeightedFTS(fts.FTS):
        ordered_sets = FuzzySet.set_ordered(self.sets)
-        data = np.array(data)
+        data = np.array(ndata)
        l = len(ndata)
--- a/pyFTS/models/song.py
+++ b/pyFTS/models/song.py
@ -39,7 +39,7 @@ class ConventionalFTS(fts.FTS):
    def operation_matrix(self, flrs):
        l = len(self.sets)
-        if self.R is None:
+        if self.R is None or len(self.R) == 0 :
            self.R = np.zeros((l, l))
        for k in flrs:
            mm = self.flr_membership_matrix(k)
@ -51,6 +51,8 @@ class ConventionalFTS(fts.FTS):
    def train(self, data, **kwargs):
        if kwargs.get('sets', None) is not None:
            self.sets = kwargs.get('sets', None)
        else:
            self.sets = self.partitioner.sets
        tmpdata = FuzzySet.fuzzyfy_series(data, self.sets, method='maximum')
        flrs = FLR.generate_non_recurrent_flrs(tmpdata)
--- a/pyFTS/models/yu.py
+++ b/pyFTS/models/yu.py
@ -60,8 +60,10 @@ class WeightedFTS(fts.FTS):
    def train(self, ndata, **kwargs):
        if kwargs.get('sets', None) is not None:
            self.sets = kwargs.get('sets', None)
        else:
            self.sets = self.partitioner.sets
-        tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets)
+        tmpdata = FuzzySet.fuzzyfy_series(ndata, self.sets, method='maximum')
        flrs = FLR.generate_recurrent_flrs(tmpdata)
        self.generate_FLRG(flrs)
--- a/pyFTS/tests/general.py
+++ b/pyFTS/tests/general.py
@ -15,33 +15,37 @@ from pyFTS.data import TAIEX
 dataset = TAIEX.get_data()
-from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil
+from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures
 from pyFTS.models import pwfts
-
+'''
 from pyFTS.partitioners import Grid, Util as pUtil
 partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff)
 model = pwfts.ProbabilisticWeightedFTS('',partitioner=partitioner)
-#model.append_transformation(tdiff)
+model.append_transformation(tdiff)
 model.fit(dataset[:800])
-print(model.predict(dataset[800:1000], type='interval'))
+print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead=7))
-
+#tmp = model.predict(dataset[800:1000], type='distribution', steps_ahead=7)
-
+#for tmp2 in tmp:
 #    print(tmp2)
 '''
-bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, methods=[pwfts.ProbabilisticWeightedFTS],
+
 #'''
 bchmk.sliding_window_benchmarks(dataset[:1000], 1000, train=0.8, inc=0.2,
                                #methods=[pwfts.ProbabilisticWeightedFTS],
                                benchmark_models=False,
                                #transformations=[tdiff],
-                                orders=[1, 2, 3],
+                                orders=[1], #[1, 2, 3],
-                                partitions=np.arange(10, 100, 5),
+                                partitions=[20], #np.arange(10, 100, 5),
-                                progress=False, type='distribution',
+                                progress=True, type='point',
                                #steps_ahead=[1,4,7,10], #steps_ahead=[1]
-                                distributed=True, nodes=['192.168.0.110', '192.168.0.100','192.168.0.106'],
+                                #distributed=True, nodes=['192.168.0.110', '192.168.0.105','192.168.0.106'],
-                                file="benchmarks.db", dataset="TAIEX", tag="partitioning")
+                                file="benchmarks.tmp", dataset="TAIEX", tag="comparisons")
                                #save=True, file="tmp.db")
-'''
+#'''
 '''
 dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';')
 print(bUtil.analytic_tabular_dataframe(dat))