From 8fea727560ec904c1e9b42b034b38633fca1718b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Mon, 10 Jul 2017 23:37:09 -0300 Subject: [PATCH] Bugfix at fts.forecastAhead; forecastDistribution with ProbabilityDistribution on benchmarks.arima --- pyFTS/benchmarks/Measures.py | 28 ++++++--- pyFTS/benchmarks/arima.py | 63 +++++++++++++------ pyFTS/fts.py | 8 +-- .../probabilistic/ProbabilityDistribution.py | 37 ++++++++++- 4 files changed, 100 insertions(+), 36 deletions(-) diff --git a/pyFTS/benchmarks/Measures.py b/pyFTS/benchmarks/Measures.py index 21fd558..0a84dd5 100644 --- a/pyFTS/benchmarks/Measures.py +++ b/pyFTS/benchmarks/Measures.py @@ -8,6 +8,7 @@ import time import numpy as np import pandas as pd from pyFTS.common import FuzzySet,SortedCollection +from pyFTS.probabilistic import ProbabilityDistribution def acf(data, k): @@ -154,7 +155,7 @@ def resolution(forecasts): def coverage(targets, forecasts): - """Percent of""" + """Percent of target values that fall inside forecasted interval""" preds = [] for i in np.arange(0, len(forecasts)): if targets[i] >= forecasts[i][0] and targets[i] <= forecasts[i][1]: @@ -218,14 +219,25 @@ def heavyside_cdf(bins, targets): def crps(targets, densities): """Continuous Ranked Probability Score""" - l = len(densities.columns) - n = len(densities.index) - Ff = pmf_to_cdf(densities) - Fa = heavyside_cdf(densities.columns, targets) - _crps = float(0.0) - for k in densities.index: - _crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns]) + if isinstance(densities, pd.DataFrame): + l = len(densities.columns) + n = len(densities.index) + Ff = pmf_to_cdf(densities) + Fa = heavyside_cdf(densities.columns, targets) + for k in densities.index: + _crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns]) + elif isinstance(densities, ProbabilityDistribution): + l = len(densities.bins) + n = 1 + Fa = heavyside_cdf(densities.bin, targets) + _crps = sum([(densities.cdf(val) - Fa[val][0]) ** 2 for val in densities.bins]) + elif isinstance(densities, list): + l = len(densities[0].bins) + n = len(densities) + Fa = heavyside_cdf(densities[0].bin, targets) + for df in densities: + _crps += sum([(df.cdf(val) - Fa[val][0]) ** 2 for val in df.bins]) return _crps / float(l * n) diff --git a/pyFTS/benchmarks/arima.py b/pyFTS/benchmarks/arima.py index 797146a..4b4a41d 100644 --- a/pyFTS/benchmarks/arima.py +++ b/pyFTS/benchmarks/arima.py @@ -7,6 +7,7 @@ from statsmodels.tsa.arima_model import ARIMA as stats_arima import scipy.stats as st from pyFTS import fts from pyFTS.common import SortedCollection +from pyFTS.probabilistic import ProbabilityDistribution class ARIMA(fts.FTS): @@ -148,29 +149,55 @@ class ARIMA(fts.FTS): def empty_grid(self, resolution): return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution) + def forecastDistribution(self, data, **kwargs): + + sigma = np.sqrt(self.model_fit.sigma2) + + l = len(data) + + ret = [] + + for k in np.arange(self.order, l + 1): + tmp = [] + + sample = [data[i] for i in np.arange(k - self.order, k)] + + mean = self.forecast(sample) + + if isinstance(mean, (list, np.ndarray)): + mean = mean[0] + + dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram", uod=[self.original_min, self.original_max]) + intervals = [] + for alpha in np.arange(0.05, 0.5, 0.05): + + qt1 = mean + st.norm.ppf(alpha) * sigma + qt2 = mean + st.norm.ppf(1 - alpha) * sigma + + intervals.append([qt1, qt2]) + + dist.appendInterval(intervals) + + ret.append(dist) + + return ret + + def forecastAheadDistribution(self, data, steps, **kwargs): smoothing = kwargs.get("smoothing", 0.5) sigma = np.sqrt(self.model_fit.sigma2) - ndata = np.array(self.doTransformations(data)) - - l = len(ndata) - - percentile_size = (self.original_max - self.original_min)/100 - - resolution = kwargs.get('resolution', percentile_size) - - grid = self.empty_grid(resolution) - - index = SortedCollection.SortedCollection(iterable=grid.keys()) + l = len(data) ret = [] - nmeans = self.forecastAhead(ndata, steps, **kwargs) + nmeans = self.forecastAhead(data, steps, **kwargs) for k in np.arange(0, steps): - grid = self.empty_grid(resolution) + dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram", + uod=[self.original_min, self.original_max]) + intervals = [] for alpha in np.arange(0.05, 0.5, 0.05): tmp = [] @@ -179,12 +206,10 @@ class ARIMA(fts.FTS): tmp.append(nmeans[k] + st.norm.ppf(alpha) * hsigma) tmp.append(nmeans[k] + st.norm.ppf(1 - alpha) * hsigma) - grid = self.gridCount(grid, resolution, index, tmp) + intervals.append(tmp) - tmp = np.array([grid[i] for i in sorted(grid)]) + dist.appendInterval(intervals) - ret.append(tmp / sum(tmp)) + ret.append(dist) - grid = self.empty_grid(resolution) - df = pd.DataFrame(ret, columns=sorted(grid)) - return df \ No newline at end of file + return ret \ No newline at end of file diff --git a/pyFTS/fts.py b/pyFTS/fts.py index 018794a..e04342f 100644 --- a/pyFTS/fts.py +++ b/pyFTS/fts.py @@ -89,19 +89,15 @@ class FTS(object): :param kwargs: :return: """ - ndata = [k for k in self.doTransformations(data[- self.order:])] - ret = [] for k in np.arange(0,steps): - tmp = self.forecast(ndata[-self.order:], **kwargs) + tmp = self.forecast(data[-self.order:], **kwargs) if isinstance(tmp,(list, np.ndarray)): tmp = tmp[0] ret.append(tmp) - ndata.append(tmp) - - ret = self.doInverseTransformations(ret, params=[ndata[self.order - 1:]]) + data.append(tmp) return ret diff --git a/pyFTS/probabilistic/ProbabilityDistribution.py b/pyFTS/probabilistic/ProbabilityDistribution.py index e9fbd88..4bd580e 100644 --- a/pyFTS/probabilistic/ProbabilityDistribution.py +++ b/pyFTS/probabilistic/ProbabilityDistribution.py @@ -3,7 +3,8 @@ import pandas as pd import matplotlib.pyplot as plt from pyFTS.common import FuzzySet,SortedCollection from pyFTS.probabilistic import kde - +from pyFTS import tree +from pyFTS.common import SortedCollection class ProbabilityDistribution(object): """ @@ -42,6 +43,10 @@ class ProbabilityDistribution(object): self.name = kwargs.get("name", "") + def set(self, value, density): + k = self.index.find_ge(value) + self.distribution[k] = density + def append(self, values): if self.type == "histogram": for k in values: @@ -55,20 +60,46 @@ class ProbabilityDistribution(object): for v,d in enumerate(dens): self.distribution[self.bins[v]] = d + def appendInterval(self, intervals): + if self.type == "histogram": + for interval in intervals: + for k in self.index.inside(interval[0], interval[1]): + self.distribution[k] += 1 + self.count += 1 + def density(self, values): ret = [] for k in values: if self.type == "histogram": v = self.index.find_ge(k) ret.append(self.distribution[v] / self.count) - else: + elif self.type == "KDE": v = self.kde.probability(k, self.data) ret.append(v) + else: + v = self.index.find_ge(k) + ret.append(self.distribution[v]) + return ret + + def cdf(self, value): + ret = 0 + for k in self.bins: + if k < value: + ret += self.distribution[k] + else: + return ret + return ret def cummulative(self, values): - pass + if isinstance(values, list): + ret = [] + for k in values: + ret.append(self.cdf(k)) + else: + return self.cdf(values) + def quantile(self, qt): pass