Bugfixes in ProbabilityDistribution

This commit is contained in:
Petrônio Cândido 2019-05-28 14:32:06 -03:00
parent e3f5faee2b
commit 60785ac89f
5 changed files with 126 additions and 57 deletions

86
pyFTS/benchmarks/BSTS.py Normal file
View File

@ -0,0 +1,86 @@
#!/usr/bin/python
# -*- coding: utf8 -*-
import numpy as np
import pandas as pd
import pyflux as pf
import scipy.stats as st
from pyFTS.common import SortedCollection, fts
from pyFTS.probabilistic import ProbabilityDistribution
class ARIMA(fts.FTS):
"""
Façade for statsmodels.tsa.arima_model
"""
def __init__(self, **kwargs):
super(ARIMA, self).__init__(**kwargs)
self.name = "BSTS"
self.detail = "Bayesian Structural Time Series"
self.is_high_order = True
self.has_point_forecasting = True
self.has_interval_forecasting = True
self.has_probability_forecasting = True
self.model = None
self.model_fit = None
self.trained_data = None
self.p = 1
self.d = 0
self.q = 0
self.benchmark_only = True
self.min_order = 1
self.alpha = kwargs.get("alpha", 0.05)
self.order = kwargs.get("order", (1,0,0))
self._decompose_order(self.order)
self.model = None
def _decompose_order(self, order):
if isinstance(order, (tuple, set, list)):
self.p = order[0]
self.d = order[1]
self.q = order[2]
self.order = self.p + self.q + (self.q - 1 if self.q > 0 else 0)
self.max_lag = self.order
self.d = len(self.transformations)
self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha)
def train(self, data, **kwargs):
if 'order' in kwargs:
order = kwargs.pop('order')
self._decompose_order(order)
if self.indexer is not None:
data = self.indexer.get_data(data)
try:
self.model = pf.ARIMA(data=data, ar=self.p, ma=self.q, integ=self.d, family=pf.Normal())
self.model_fit = self.model.fit('M-H', nsims=20000)
except Exception as ex:
print(ex)
self.model_fit = None
def forecast(self, ndata, **kwargs):
return ret
def forecast_interval(self, data, **kwargs):
return ret
def forecast_ahead_interval(self, ndata, steps, **kwargs):
return ret
def forecast_distribution(self, data, **kwargs):
return ret
def forecast_ahead_distribution(self, data, steps, **kwargs):
return ret

View File

@ -239,7 +239,6 @@ class EnsembleFTS(fts.FTS):
return ret return ret
def forecast_ahead_distribution(self, data, steps, **kwargs): def forecast_ahead_distribution(self, data, steps, **kwargs):
if 'method' in kwargs: if 'method' in kwargs:
self.point_method = kwargs.get('method','mean') self.point_method = kwargs.get('method','mean')

View File

@ -1,8 +1,7 @@
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from pyFTS.common import FuzzySet from pyFTS.common import FuzzySet,SortedCollection,tree
from scipy.spatial import KDTree
from pyFTS.probabilistic import kde from pyFTS.probabilistic import kde
@ -32,7 +31,9 @@ class ProbabilityDistribution(object):
data = kwargs.get("data", None) data = kwargs.get("data", None)
if self.type == "KDE": if self.type == "KDE":
self.kde = kde.KernelSmoothing(kwargs.get("h", 0.5), kwargs.get("kernel", "epanechnikov")) self.kde = kde.KernelSmoothing(h=kwargs.get("h", 0.5), kernel=kwargs.get("kernel", "epanechnikov"))
if self.data is not None:
_min = np.nanmin(data) _min = np.nanmin(data)
_min = _min * .7 if _min > 0 else _min * 1.3 _min = _min * .7 if _min > 0 else _min * 1.3
_max = np.nanmax(data) _max = np.nanmax(data)
@ -42,13 +43,13 @@ class ProbabilityDistribution(object):
self.nbins = kwargs.get("num_bins", 100) self.nbins = kwargs.get("num_bins", 100)
if self.bins is None: if self.bins is None:
self.bins = np.linspace(self.uod[0], self.uod[1], int(self.nbins)).tolist() self.bins = np.linspace(int(self.uod[0]), int(self.uod[1]), int(self.nbins)).tolist()
self.labels = [str(k) for k in self.bins] self.labels = [str(k) for k in self.bins]
if self.uod is not None: if self.uod is not None:
self.resolution = (self.uod[1] - self.uod[0]) / self.nbins self.resolution = (self.uod[1] - self.uod[0]) / self.nbins
self.bin_index = KDTree(self.bins) self.bin_index = SortedCollection.SortedCollection(iterable=sorted(self.bins))
self.quantile_index = None self.quantile_index = None
self.distribution = {} self.distribution = {}
self.cdf = None self.cdf = None
@ -68,8 +69,7 @@ class ProbabilityDistribution(object):
:param value: A value in the universe of discourse from the distribution :param value: A value in the universe of discourse from the distribution
:param density: The probability density to assign to the value :param density: The probability density to assign to the value
""" """
_, ix = self.bin_index.query([value], 1) k = self.bin_index.find_ge(value)
k = self.labels[ix]
self.distribution[k] = density self.distribution[k] = density
def append(self, values): def append(self, values):
@ -80,8 +80,7 @@ class ProbabilityDistribution(object):
""" """
if self.type == "histogram": if self.type == "histogram":
for k in values: for k in values:
_, ix = self.bin_index.query([k], 1) v = self.bin_index.find_ge(k)
v = self.labels[ix]
self.distribution[v] += 1 self.distribution[v] += 1
self.count += 1 self.count += 1
else: else:
@ -99,12 +98,8 @@ class ProbabilityDistribution(object):
""" """
if self.type == "histogram": if self.type == "histogram":
for interval in intervals: for interval in intervals:
_, ix1 = self.bin_index.query([interval[0]], 1) for k in self.bin_index.inside(interval[0], interval[1]):
_, ix2 = self.bin_index.query([interval[1]], 1) self.distribution[k] += 1
for k in range(ix1, ix2):
v = self.labels[k]
self.distribution[v] += 1
self.count += 1 self.count += 1
def density(self, values): def density(self, values):
@ -123,15 +118,13 @@ class ProbabilityDistribution(object):
for k in values: for k in values:
if self.type == "histogram": if self.type == "histogram":
_, ix = self.bin_index.query([k], 1) v = self.bin_index.find_ge(k)
v = self.labels[ix]
ret.append(self.distribution[v] / (self.count + 1e-5)) ret.append(self.distribution[v] / (self.count + 1e-5))
elif self.type == "KDE": elif self.type == "KDE":
v = self.kde.probability(k, self.data) v = self.kde.probability(k, data=self.data)
ret.append(v) ret.append(v)
else: else:
_, ix = self.bin_index.query([k], 1) v = self.bin_index.find_ge(k)
v = self.labels[ix]
ret.append(self.distribution[v]) ret.append(self.distribution[v])
if scalar: if scalar:
@ -158,7 +151,7 @@ class ProbabilityDistribution(object):
self.distribution = dist self.distribution = dist
self.labels = [str(k) for k in self.bins] self.labels = [str(k) for k in self.bins]
self.bin_index = KDTree(self.bins) self.bin_index = SortedCollection.SortedCollection(iterable=sorted(self.bins))
self.quantile_index = None self.quantile_index = None
self.cdf = None self.cdf = None
self.qtl = None self.qtl = None
@ -187,11 +180,11 @@ class ProbabilityDistribution(object):
_keys = [float(k) for k in sorted(self.qtl.keys())] _keys = [float(k) for k in sorted(self.qtl.keys())]
self.quantile_index = KDTree(_keys) self.quantile_index = SortedCollection.SortedCollection(iterable=_keys)
def cumulative(self, values): def cumulative(self, values):
""" """
Return the cumulative probability densities for the input values, Return the cumulative probability densities for the input values,
such that F(x) = P(X <= x) such that F(x) = P(X <= x)
:param values: A list of input values :param values: A list of input values
@ -203,17 +196,15 @@ class ProbabilityDistribution(object):
if isinstance(values, list): if isinstance(values, list):
ret = [] ret = []
for val in values: for val in values:
_, ix = self.bin_index.query([val], 1) k = self.bin_index.find_ge(val)
k = self.labels[ix]
ret.append(self.cdf[k]) ret.append(self.cdf[k])
else: else:
_, ix = self.bin_index.query([values], 1) k = self.bin_index.find_ge(values)
k = self.labels[ix]
return self.cdf[values] return self.cdf[values]
def quantile(self, values): def quantile(self, values):
""" """
Return the Universe of Discourse values in relation to the quantile input values, Return the Universe of Discourse values in relation to the quantile input values,
such that Q(tau) = min( {x | F(x) >= tau }) such that Q(tau) = min( {x | F(x) >= tau })
:param values: input values :param values: input values
@ -225,12 +216,10 @@ class ProbabilityDistribution(object):
if isinstance(values, list): if isinstance(values, list):
ret = [] ret = []
for val in values: for val in values:
_, ix = self.quantile_index.query([val], 1) k = self.quantile_index.find_ge(val)
k = self.labels[ix]
ret.append(self.qtl[str(k)][0]) ret.append(self.qtl[str(k)][0])
else: else:
_, ix = self.quantile_index.query([values], 1) k = self.quantile_index.find_ge(values)
k = self.labels[ix]
ret = self.qtl[str(k)] ret = self.qtl[str(k)]
return ret return ret

View File

@ -10,13 +10,17 @@ import numpy as np
class KernelSmoothing(object): class KernelSmoothing(object):
"""Kernel Density Estimation""" """Kernel Density Estimation"""
def __init__(self,h, kernel="epanechnikov"): def __init__(self, **kwargs):
self.h = h self.h = kwargs.get('h',.5)
"""Width parameter""" """Width parameter"""
self.kernel = kernel self.kernel = kwargs.get("kernel", "epanechnikov")
"""Kernel function""" """Kernel function"""
self.data = kwargs.get("data",None)
self.transf = Transformations.Scale(min=0,max=1) self.transf = Transformations.Scale(min=0,max=1)
if self.data is not None:
self.data = self.transf.apply(self.data)
def kernel_function(self, u): def kernel_function(self, u):
""" """
Apply the kernel Apply the kernel
@ -45,7 +49,7 @@ class KernelSmoothing(object):
elif self.kernel == "exponential": elif self.kernel == "exponential":
return 0.5 * np.exp(-np.abs(u)) return 0.5 * np.exp(-np.abs(u))
def probability(self, x, data): def probability(self, x, **kwargs):
""" """
Probability of the point x on data Probability of the point x on data
@ -53,10 +57,14 @@ class KernelSmoothing(object):
:param data: :param data:
:return: :return:
""" """
l = len(data)
ndata = self.transf.apply(data) if self.data is None:
self.data = kwargs.get('data',None)
self.data = self.transf.apply(self.data)
l = len(self.data)
nx = self.transf.apply(x) nx = self.transf.apply(x)
p = sum([self.kernel_function((nx - k)/self.h) for k in ndata]) / l*self.h p = sum([self.kernel_function((nx - k)/self.h) for k in self.data]) / l*self.h
return p return p

View File

@ -21,25 +21,12 @@ from pyFTS.data import Enrollments, TAIEX
x = [k for k in np.arange(-2*np.pi, 2*np.pi, 0.15)] x = [k for k in np.arange(-2*np.pi, 2*np.pi, 0.15)]
y = [np.sin(k) for k in x] y = [np.sin(k) for k in x]
from pyFTS.models import hofts from pyFTS.probabilistic import ProbabilityDistribution
from pyFTS.partitioners import Grid, FCM, CMeans, Entropy
from pyFTS.benchmarks import Measures
metodos = [FCM.FCMPartitioner] pd = ProbabilityDistribution.ProbabilityDistribution(type='histogram', data=y, num_bins=30)
k = 35
rows = []
for contador, metodo in enumerate(metodos):
print(metodo)
part = metodo(data=y, npart=k)
model = hofts.HighOrderFTS(order=2, partitioner=part)
model.fit(y)
forecasts = model.predict(y)
for o in range(model.order):
forecasts.insert(0, None)
print(pd.quantile([.5]))
print(pd.cdf)
''' '''
model = fts.FCM_FTS(partitioner=fs, order=1) model = fts.FCM_FTS(partitioner=fs, order=1)