Bugfix at fts.forecastAhead; forecastDistribution with ProbabilityDistribution on benchmarks.arima
This commit is contained in:
parent
5503cd26a4
commit
8fea727560
@ -8,6 +8,7 @@ import time
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pyFTS.common import FuzzySet,SortedCollection
|
from pyFTS.common import FuzzySet,SortedCollection
|
||||||
|
from pyFTS.probabilistic import ProbabilityDistribution
|
||||||
|
|
||||||
|
|
||||||
def acf(data, k):
|
def acf(data, k):
|
||||||
@ -154,7 +155,7 @@ def resolution(forecasts):
|
|||||||
|
|
||||||
|
|
||||||
def coverage(targets, forecasts):
|
def coverage(targets, forecasts):
|
||||||
"""Percent of"""
|
"""Percent of target values that fall inside forecasted interval"""
|
||||||
preds = []
|
preds = []
|
||||||
for i in np.arange(0, len(forecasts)):
|
for i in np.arange(0, len(forecasts)):
|
||||||
if targets[i] >= forecasts[i][0] and targets[i] <= forecasts[i][1]:
|
if targets[i] >= forecasts[i][0] and targets[i] <= forecasts[i][1]:
|
||||||
@ -218,14 +219,25 @@ def heavyside_cdf(bins, targets):
|
|||||||
|
|
||||||
def crps(targets, densities):
|
def crps(targets, densities):
|
||||||
"""Continuous Ranked Probability Score"""
|
"""Continuous Ranked Probability Score"""
|
||||||
l = len(densities.columns)
|
|
||||||
n = len(densities.index)
|
|
||||||
Ff = pmf_to_cdf(densities)
|
|
||||||
Fa = heavyside_cdf(densities.columns, targets)
|
|
||||||
|
|
||||||
_crps = float(0.0)
|
_crps = float(0.0)
|
||||||
for k in densities.index:
|
if isinstance(densities, pd.DataFrame):
|
||||||
_crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns])
|
l = len(densities.columns)
|
||||||
|
n = len(densities.index)
|
||||||
|
Ff = pmf_to_cdf(densities)
|
||||||
|
Fa = heavyside_cdf(densities.columns, targets)
|
||||||
|
for k in densities.index:
|
||||||
|
_crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns])
|
||||||
|
elif isinstance(densities, ProbabilityDistribution):
|
||||||
|
l = len(densities.bins)
|
||||||
|
n = 1
|
||||||
|
Fa = heavyside_cdf(densities.bin, targets)
|
||||||
|
_crps = sum([(densities.cdf(val) - Fa[val][0]) ** 2 for val in densities.bins])
|
||||||
|
elif isinstance(densities, list):
|
||||||
|
l = len(densities[0].bins)
|
||||||
|
n = len(densities)
|
||||||
|
Fa = heavyside_cdf(densities[0].bin, targets)
|
||||||
|
for df in densities:
|
||||||
|
_crps += sum([(df.cdf(val) - Fa[val][0]) ** 2 for val in df.bins])
|
||||||
|
|
||||||
return _crps / float(l * n)
|
return _crps / float(l * n)
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ from statsmodels.tsa.arima_model import ARIMA as stats_arima
|
|||||||
import scipy.stats as st
|
import scipy.stats as st
|
||||||
from pyFTS import fts
|
from pyFTS import fts
|
||||||
from pyFTS.common import SortedCollection
|
from pyFTS.common import SortedCollection
|
||||||
|
from pyFTS.probabilistic import ProbabilityDistribution
|
||||||
|
|
||||||
|
|
||||||
class ARIMA(fts.FTS):
|
class ARIMA(fts.FTS):
|
||||||
@ -148,29 +149,55 @@ class ARIMA(fts.FTS):
|
|||||||
def empty_grid(self, resolution):
|
def empty_grid(self, resolution):
|
||||||
return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution)
|
return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution)
|
||||||
|
|
||||||
|
def forecastDistribution(self, data, **kwargs):
|
||||||
|
|
||||||
|
sigma = np.sqrt(self.model_fit.sigma2)
|
||||||
|
|
||||||
|
l = len(data)
|
||||||
|
|
||||||
|
ret = []
|
||||||
|
|
||||||
|
for k in np.arange(self.order, l + 1):
|
||||||
|
tmp = []
|
||||||
|
|
||||||
|
sample = [data[i] for i in np.arange(k - self.order, k)]
|
||||||
|
|
||||||
|
mean = self.forecast(sample)
|
||||||
|
|
||||||
|
if isinstance(mean, (list, np.ndarray)):
|
||||||
|
mean = mean[0]
|
||||||
|
|
||||||
|
dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram", uod=[self.original_min, self.original_max])
|
||||||
|
intervals = []
|
||||||
|
for alpha in np.arange(0.05, 0.5, 0.05):
|
||||||
|
|
||||||
|
qt1 = mean + st.norm.ppf(alpha) * sigma
|
||||||
|
qt2 = mean + st.norm.ppf(1 - alpha) * sigma
|
||||||
|
|
||||||
|
intervals.append([qt1, qt2])
|
||||||
|
|
||||||
|
dist.appendInterval(intervals)
|
||||||
|
|
||||||
|
ret.append(dist)
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def forecastAheadDistribution(self, data, steps, **kwargs):
|
def forecastAheadDistribution(self, data, steps, **kwargs):
|
||||||
smoothing = kwargs.get("smoothing", 0.5)
|
smoothing = kwargs.get("smoothing", 0.5)
|
||||||
|
|
||||||
sigma = np.sqrt(self.model_fit.sigma2)
|
sigma = np.sqrt(self.model_fit.sigma2)
|
||||||
|
|
||||||
ndata = np.array(self.doTransformations(data))
|
l = len(data)
|
||||||
|
|
||||||
l = len(ndata)
|
|
||||||
|
|
||||||
percentile_size = (self.original_max - self.original_min)/100
|
|
||||||
|
|
||||||
resolution = kwargs.get('resolution', percentile_size)
|
|
||||||
|
|
||||||
grid = self.empty_grid(resolution)
|
|
||||||
|
|
||||||
index = SortedCollection.SortedCollection(iterable=grid.keys())
|
|
||||||
|
|
||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
nmeans = self.forecastAhead(ndata, steps, **kwargs)
|
nmeans = self.forecastAhead(data, steps, **kwargs)
|
||||||
|
|
||||||
for k in np.arange(0, steps):
|
for k in np.arange(0, steps):
|
||||||
grid = self.empty_grid(resolution)
|
dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram",
|
||||||
|
uod=[self.original_min, self.original_max])
|
||||||
|
intervals = []
|
||||||
for alpha in np.arange(0.05, 0.5, 0.05):
|
for alpha in np.arange(0.05, 0.5, 0.05):
|
||||||
tmp = []
|
tmp = []
|
||||||
|
|
||||||
@ -179,12 +206,10 @@ class ARIMA(fts.FTS):
|
|||||||
tmp.append(nmeans[k] + st.norm.ppf(alpha) * hsigma)
|
tmp.append(nmeans[k] + st.norm.ppf(alpha) * hsigma)
|
||||||
tmp.append(nmeans[k] + st.norm.ppf(1 - alpha) * hsigma)
|
tmp.append(nmeans[k] + st.norm.ppf(1 - alpha) * hsigma)
|
||||||
|
|
||||||
grid = self.gridCount(grid, resolution, index, tmp)
|
intervals.append(tmp)
|
||||||
|
|
||||||
tmp = np.array([grid[i] for i in sorted(grid)])
|
dist.appendInterval(intervals)
|
||||||
|
|
||||||
ret.append(tmp / sum(tmp))
|
ret.append(dist)
|
||||||
|
|
||||||
grid = self.empty_grid(resolution)
|
return ret
|
||||||
df = pd.DataFrame(ret, columns=sorted(grid))
|
|
||||||
return df
|
|
@ -89,19 +89,15 @@ class FTS(object):
|
|||||||
:param kwargs:
|
:param kwargs:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
ndata = [k for k in self.doTransformations(data[- self.order:])]
|
|
||||||
|
|
||||||
ret = []
|
ret = []
|
||||||
for k in np.arange(0,steps):
|
for k in np.arange(0,steps):
|
||||||
tmp = self.forecast(ndata[-self.order:], **kwargs)
|
tmp = self.forecast(data[-self.order:], **kwargs)
|
||||||
|
|
||||||
if isinstance(tmp,(list, np.ndarray)):
|
if isinstance(tmp,(list, np.ndarray)):
|
||||||
tmp = tmp[0]
|
tmp = tmp[0]
|
||||||
|
|
||||||
ret.append(tmp)
|
ret.append(tmp)
|
||||||
ndata.append(tmp)
|
data.append(tmp)
|
||||||
|
|
||||||
ret = self.doInverseTransformations(ret, params=[ndata[self.order - 1:]])
|
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
@ -3,7 +3,8 @@ import pandas as pd
|
|||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from pyFTS.common import FuzzySet,SortedCollection
|
from pyFTS.common import FuzzySet,SortedCollection
|
||||||
from pyFTS.probabilistic import kde
|
from pyFTS.probabilistic import kde
|
||||||
|
from pyFTS import tree
|
||||||
|
from pyFTS.common import SortedCollection
|
||||||
|
|
||||||
class ProbabilityDistribution(object):
|
class ProbabilityDistribution(object):
|
||||||
"""
|
"""
|
||||||
@ -42,6 +43,10 @@ class ProbabilityDistribution(object):
|
|||||||
|
|
||||||
self.name = kwargs.get("name", "")
|
self.name = kwargs.get("name", "")
|
||||||
|
|
||||||
|
def set(self, value, density):
|
||||||
|
k = self.index.find_ge(value)
|
||||||
|
self.distribution[k] = density
|
||||||
|
|
||||||
def append(self, values):
|
def append(self, values):
|
||||||
if self.type == "histogram":
|
if self.type == "histogram":
|
||||||
for k in values:
|
for k in values:
|
||||||
@ -55,20 +60,46 @@ class ProbabilityDistribution(object):
|
|||||||
for v,d in enumerate(dens):
|
for v,d in enumerate(dens):
|
||||||
self.distribution[self.bins[v]] = d
|
self.distribution[self.bins[v]] = d
|
||||||
|
|
||||||
|
def appendInterval(self, intervals):
|
||||||
|
if self.type == "histogram":
|
||||||
|
for interval in intervals:
|
||||||
|
for k in self.index.inside(interval[0], interval[1]):
|
||||||
|
self.distribution[k] += 1
|
||||||
|
self.count += 1
|
||||||
|
|
||||||
def density(self, values):
|
def density(self, values):
|
||||||
ret = []
|
ret = []
|
||||||
for k in values:
|
for k in values:
|
||||||
if self.type == "histogram":
|
if self.type == "histogram":
|
||||||
v = self.index.find_ge(k)
|
v = self.index.find_ge(k)
|
||||||
ret.append(self.distribution[v] / self.count)
|
ret.append(self.distribution[v] / self.count)
|
||||||
else:
|
elif self.type == "KDE":
|
||||||
v = self.kde.probability(k, self.data)
|
v = self.kde.probability(k, self.data)
|
||||||
ret.append(v)
|
ret.append(v)
|
||||||
|
else:
|
||||||
|
v = self.index.find_ge(k)
|
||||||
|
ret.append(self.distribution[v])
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def cdf(self, value):
|
||||||
|
ret = 0
|
||||||
|
for k in self.bins:
|
||||||
|
if k < value:
|
||||||
|
ret += self.distribution[k]
|
||||||
|
else:
|
||||||
|
return ret
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def cummulative(self, values):
|
def cummulative(self, values):
|
||||||
pass
|
if isinstance(values, list):
|
||||||
|
ret = []
|
||||||
|
for k in values:
|
||||||
|
ret.append(self.cdf(k))
|
||||||
|
else:
|
||||||
|
return self.cdf(values)
|
||||||
|
|
||||||
|
|
||||||
def quantile(self, qt):
|
def quantile(self, qt):
|
||||||
pass
|
pass
|
||||||
|
Loading…
Reference in New Issue
Block a user