Bugfix at fts.forecastAhead; forecastDistribution with ProbabilityDistribution on benchmarks.arima

This commit is contained in:
Petrônio Cândido 2017-07-10 23:37:09 -03:00
parent 5503cd26a4
commit 8fea727560
4 changed files with 100 additions and 36 deletions

View File

@ -8,6 +8,7 @@ import time
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from pyFTS.common import FuzzySet,SortedCollection from pyFTS.common import FuzzySet,SortedCollection
from pyFTS.probabilistic import ProbabilityDistribution
def acf(data, k): def acf(data, k):
@ -154,7 +155,7 @@ def resolution(forecasts):
def coverage(targets, forecasts): def coverage(targets, forecasts):
"""Percent of""" """Percent of target values that fall inside forecasted interval"""
preds = [] preds = []
for i in np.arange(0, len(forecasts)): for i in np.arange(0, len(forecasts)):
if targets[i] >= forecasts[i][0] and targets[i] <= forecasts[i][1]: if targets[i] >= forecasts[i][0] and targets[i] <= forecasts[i][1]:
@ -218,14 +219,25 @@ def heavyside_cdf(bins, targets):
def crps(targets, densities): def crps(targets, densities):
"""Continuous Ranked Probability Score""" """Continuous Ranked Probability Score"""
l = len(densities.columns)
n = len(densities.index)
Ff = pmf_to_cdf(densities)
Fa = heavyside_cdf(densities.columns, targets)
_crps = float(0.0) _crps = float(0.0)
for k in densities.index: if isinstance(densities, pd.DataFrame):
_crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns]) l = len(densities.columns)
n = len(densities.index)
Ff = pmf_to_cdf(densities)
Fa = heavyside_cdf(densities.columns, targets)
for k in densities.index:
_crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns])
elif isinstance(densities, ProbabilityDistribution):
l = len(densities.bins)
n = 1
Fa = heavyside_cdf(densities.bin, targets)
_crps = sum([(densities.cdf(val) - Fa[val][0]) ** 2 for val in densities.bins])
elif isinstance(densities, list):
l = len(densities[0].bins)
n = len(densities)
Fa = heavyside_cdf(densities[0].bin, targets)
for df in densities:
_crps += sum([(df.cdf(val) - Fa[val][0]) ** 2 for val in df.bins])
return _crps / float(l * n) return _crps / float(l * n)

View File

@ -7,6 +7,7 @@ from statsmodels.tsa.arima_model import ARIMA as stats_arima
import scipy.stats as st import scipy.stats as st
from pyFTS import fts from pyFTS import fts
from pyFTS.common import SortedCollection from pyFTS.common import SortedCollection
from pyFTS.probabilistic import ProbabilityDistribution
class ARIMA(fts.FTS): class ARIMA(fts.FTS):
@ -148,29 +149,55 @@ class ARIMA(fts.FTS):
def empty_grid(self, resolution): def empty_grid(self, resolution):
return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution) return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution)
def forecastDistribution(self, data, **kwargs):
sigma = np.sqrt(self.model_fit.sigma2)
l = len(data)
ret = []
for k in np.arange(self.order, l + 1):
tmp = []
sample = [data[i] for i in np.arange(k - self.order, k)]
mean = self.forecast(sample)
if isinstance(mean, (list, np.ndarray)):
mean = mean[0]
dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram", uod=[self.original_min, self.original_max])
intervals = []
for alpha in np.arange(0.05, 0.5, 0.05):
qt1 = mean + st.norm.ppf(alpha) * sigma
qt2 = mean + st.norm.ppf(1 - alpha) * sigma
intervals.append([qt1, qt2])
dist.appendInterval(intervals)
ret.append(dist)
return ret
def forecastAheadDistribution(self, data, steps, **kwargs): def forecastAheadDistribution(self, data, steps, **kwargs):
smoothing = kwargs.get("smoothing", 0.5) smoothing = kwargs.get("smoothing", 0.5)
sigma = np.sqrt(self.model_fit.sigma2) sigma = np.sqrt(self.model_fit.sigma2)
ndata = np.array(self.doTransformations(data)) l = len(data)
l = len(ndata)
percentile_size = (self.original_max - self.original_min)/100
resolution = kwargs.get('resolution', percentile_size)
grid = self.empty_grid(resolution)
index = SortedCollection.SortedCollection(iterable=grid.keys())
ret = [] ret = []
nmeans = self.forecastAhead(ndata, steps, **kwargs) nmeans = self.forecastAhead(data, steps, **kwargs)
for k in np.arange(0, steps): for k in np.arange(0, steps):
grid = self.empty_grid(resolution) dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram",
uod=[self.original_min, self.original_max])
intervals = []
for alpha in np.arange(0.05, 0.5, 0.05): for alpha in np.arange(0.05, 0.5, 0.05):
tmp = [] tmp = []
@ -179,12 +206,10 @@ class ARIMA(fts.FTS):
tmp.append(nmeans[k] + st.norm.ppf(alpha) * hsigma) tmp.append(nmeans[k] + st.norm.ppf(alpha) * hsigma)
tmp.append(nmeans[k] + st.norm.ppf(1 - alpha) * hsigma) tmp.append(nmeans[k] + st.norm.ppf(1 - alpha) * hsigma)
grid = self.gridCount(grid, resolution, index, tmp) intervals.append(tmp)
tmp = np.array([grid[i] for i in sorted(grid)]) dist.appendInterval(intervals)
ret.append(tmp / sum(tmp)) ret.append(dist)
grid = self.empty_grid(resolution) return ret
df = pd.DataFrame(ret, columns=sorted(grid))
return df

View File

@ -89,19 +89,15 @@ class FTS(object):
:param kwargs: :param kwargs:
:return: :return:
""" """
ndata = [k for k in self.doTransformations(data[- self.order:])]
ret = [] ret = []
for k in np.arange(0,steps): for k in np.arange(0,steps):
tmp = self.forecast(ndata[-self.order:], **kwargs) tmp = self.forecast(data[-self.order:], **kwargs)
if isinstance(tmp,(list, np.ndarray)): if isinstance(tmp,(list, np.ndarray)):
tmp = tmp[0] tmp = tmp[0]
ret.append(tmp) ret.append(tmp)
ndata.append(tmp) data.append(tmp)
ret = self.doInverseTransformations(ret, params=[ndata[self.order - 1:]])
return ret return ret

View File

@ -3,7 +3,8 @@ import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from pyFTS.common import FuzzySet,SortedCollection from pyFTS.common import FuzzySet,SortedCollection
from pyFTS.probabilistic import kde from pyFTS.probabilistic import kde
from pyFTS import tree
from pyFTS.common import SortedCollection
class ProbabilityDistribution(object): class ProbabilityDistribution(object):
""" """
@ -42,6 +43,10 @@ class ProbabilityDistribution(object):
self.name = kwargs.get("name", "") self.name = kwargs.get("name", "")
def set(self, value, density):
k = self.index.find_ge(value)
self.distribution[k] = density
def append(self, values): def append(self, values):
if self.type == "histogram": if self.type == "histogram":
for k in values: for k in values:
@ -55,20 +60,46 @@ class ProbabilityDistribution(object):
for v,d in enumerate(dens): for v,d in enumerate(dens):
self.distribution[self.bins[v]] = d self.distribution[self.bins[v]] = d
def appendInterval(self, intervals):
if self.type == "histogram":
for interval in intervals:
for k in self.index.inside(interval[0], interval[1]):
self.distribution[k] += 1
self.count += 1
def density(self, values): def density(self, values):
ret = [] ret = []
for k in values: for k in values:
if self.type == "histogram": if self.type == "histogram":
v = self.index.find_ge(k) v = self.index.find_ge(k)
ret.append(self.distribution[v] / self.count) ret.append(self.distribution[v] / self.count)
else: elif self.type == "KDE":
v = self.kde.probability(k, self.data) v = self.kde.probability(k, self.data)
ret.append(v) ret.append(v)
else:
v = self.index.find_ge(k)
ret.append(self.distribution[v])
return ret
def cdf(self, value):
ret = 0
for k in self.bins:
if k < value:
ret += self.distribution[k]
else:
return ret
return ret return ret
def cummulative(self, values): def cummulative(self, values):
pass if isinstance(values, list):
ret = []
for k in values:
ret.append(self.cdf(k))
else:
return self.cdf(values)
def quantile(self, qt): def quantile(self, qt):
pass pass