Bugfixes in ProbabilityDistribution
This commit is contained in:
parent
e3f5faee2b
commit
60785ac89f
86
pyFTS/benchmarks/BSTS.py
Normal file
86
pyFTS/benchmarks/BSTS.py
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
# -*- coding: utf8 -*-
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import pyflux as pf
|
||||||
|
import scipy.stats as st
|
||||||
|
from pyFTS.common import SortedCollection, fts
|
||||||
|
from pyFTS.probabilistic import ProbabilityDistribution
|
||||||
|
|
||||||
|
|
||||||
|
class ARIMA(fts.FTS):
|
||||||
|
"""
|
||||||
|
Façade for statsmodels.tsa.arima_model
|
||||||
|
"""
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super(ARIMA, self).__init__(**kwargs)
|
||||||
|
self.name = "BSTS"
|
||||||
|
self.detail = "Bayesian Structural Time Series"
|
||||||
|
self.is_high_order = True
|
||||||
|
self.has_point_forecasting = True
|
||||||
|
self.has_interval_forecasting = True
|
||||||
|
self.has_probability_forecasting = True
|
||||||
|
self.model = None
|
||||||
|
self.model_fit = None
|
||||||
|
self.trained_data = None
|
||||||
|
self.p = 1
|
||||||
|
self.d = 0
|
||||||
|
self.q = 0
|
||||||
|
self.benchmark_only = True
|
||||||
|
self.min_order = 1
|
||||||
|
self.alpha = kwargs.get("alpha", 0.05)
|
||||||
|
self.order = kwargs.get("order", (1,0,0))
|
||||||
|
self._decompose_order(self.order)
|
||||||
|
self.model = None
|
||||||
|
|
||||||
|
def _decompose_order(self, order):
|
||||||
|
if isinstance(order, (tuple, set, list)):
|
||||||
|
self.p = order[0]
|
||||||
|
self.d = order[1]
|
||||||
|
self.q = order[2]
|
||||||
|
self.order = self.p + self.q + (self.q - 1 if self.q > 0 else 0)
|
||||||
|
self.max_lag = self.order
|
||||||
|
self.d = len(self.transformations)
|
||||||
|
self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha)
|
||||||
|
|
||||||
|
def train(self, data, **kwargs):
|
||||||
|
|
||||||
|
if 'order' in kwargs:
|
||||||
|
order = kwargs.pop('order')
|
||||||
|
self._decompose_order(order)
|
||||||
|
|
||||||
|
if self.indexer is not None:
|
||||||
|
data = self.indexer.get_data(data)
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.model = pf.ARIMA(data=data, ar=self.p, ma=self.q, integ=self.d, family=pf.Normal())
|
||||||
|
self.model_fit = self.model.fit('M-H', nsims=20000)
|
||||||
|
except Exception as ex:
|
||||||
|
print(ex)
|
||||||
|
self.model_fit = None
|
||||||
|
|
||||||
|
def forecast(self, ndata, **kwargs):
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def forecast_interval(self, data, **kwargs):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def forecast_ahead_interval(self, ndata, steps, **kwargs):
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def forecast_distribution(self, data, **kwargs):
|
||||||
|
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def forecast_ahead_distribution(self, data, steps, **kwargs):
|
||||||
|
|
||||||
|
|
||||||
|
return ret
|
@ -239,7 +239,6 @@ class EnsembleFTS(fts.FTS):
|
|||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def forecast_ahead_distribution(self, data, steps, **kwargs):
|
def forecast_ahead_distribution(self, data, steps, **kwargs):
|
||||||
if 'method' in kwargs:
|
if 'method' in kwargs:
|
||||||
self.point_method = kwargs.get('method','mean')
|
self.point_method = kwargs.get('method','mean')
|
||||||
|
@ -1,8 +1,7 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from pyFTS.common import FuzzySet
|
from pyFTS.common import FuzzySet,SortedCollection,tree
|
||||||
from scipy.spatial import KDTree
|
|
||||||
from pyFTS.probabilistic import kde
|
from pyFTS.probabilistic import kde
|
||||||
|
|
||||||
|
|
||||||
@ -32,7 +31,9 @@ class ProbabilityDistribution(object):
|
|||||||
data = kwargs.get("data", None)
|
data = kwargs.get("data", None)
|
||||||
|
|
||||||
if self.type == "KDE":
|
if self.type == "KDE":
|
||||||
self.kde = kde.KernelSmoothing(kwargs.get("h", 0.5), kwargs.get("kernel", "epanechnikov"))
|
self.kde = kde.KernelSmoothing(h=kwargs.get("h", 0.5), kernel=kwargs.get("kernel", "epanechnikov"))
|
||||||
|
|
||||||
|
if self.data is not None:
|
||||||
_min = np.nanmin(data)
|
_min = np.nanmin(data)
|
||||||
_min = _min * .7 if _min > 0 else _min * 1.3
|
_min = _min * .7 if _min > 0 else _min * 1.3
|
||||||
_max = np.nanmax(data)
|
_max = np.nanmax(data)
|
||||||
@ -42,13 +43,13 @@ class ProbabilityDistribution(object):
|
|||||||
self.nbins = kwargs.get("num_bins", 100)
|
self.nbins = kwargs.get("num_bins", 100)
|
||||||
|
|
||||||
if self.bins is None:
|
if self.bins is None:
|
||||||
self.bins = np.linspace(self.uod[0], self.uod[1], int(self.nbins)).tolist()
|
self.bins = np.linspace(int(self.uod[0]), int(self.uod[1]), int(self.nbins)).tolist()
|
||||||
self.labels = [str(k) for k in self.bins]
|
self.labels = [str(k) for k in self.bins]
|
||||||
|
|
||||||
if self.uod is not None:
|
if self.uod is not None:
|
||||||
self.resolution = (self.uod[1] - self.uod[0]) / self.nbins
|
self.resolution = (self.uod[1] - self.uod[0]) / self.nbins
|
||||||
|
|
||||||
self.bin_index = KDTree(self.bins)
|
self.bin_index = SortedCollection.SortedCollection(iterable=sorted(self.bins))
|
||||||
self.quantile_index = None
|
self.quantile_index = None
|
||||||
self.distribution = {}
|
self.distribution = {}
|
||||||
self.cdf = None
|
self.cdf = None
|
||||||
@ -68,8 +69,7 @@ class ProbabilityDistribution(object):
|
|||||||
:param value: A value in the universe of discourse from the distribution
|
:param value: A value in the universe of discourse from the distribution
|
||||||
:param density: The probability density to assign to the value
|
:param density: The probability density to assign to the value
|
||||||
"""
|
"""
|
||||||
_, ix = self.bin_index.query([value], 1)
|
k = self.bin_index.find_ge(value)
|
||||||
k = self.labels[ix]
|
|
||||||
self.distribution[k] = density
|
self.distribution[k] = density
|
||||||
|
|
||||||
def append(self, values):
|
def append(self, values):
|
||||||
@ -80,8 +80,7 @@ class ProbabilityDistribution(object):
|
|||||||
"""
|
"""
|
||||||
if self.type == "histogram":
|
if self.type == "histogram":
|
||||||
for k in values:
|
for k in values:
|
||||||
_, ix = self.bin_index.query([k], 1)
|
v = self.bin_index.find_ge(k)
|
||||||
v = self.labels[ix]
|
|
||||||
self.distribution[v] += 1
|
self.distribution[v] += 1
|
||||||
self.count += 1
|
self.count += 1
|
||||||
else:
|
else:
|
||||||
@ -99,12 +98,8 @@ class ProbabilityDistribution(object):
|
|||||||
"""
|
"""
|
||||||
if self.type == "histogram":
|
if self.type == "histogram":
|
||||||
for interval in intervals:
|
for interval in intervals:
|
||||||
_, ix1 = self.bin_index.query([interval[0]], 1)
|
for k in self.bin_index.inside(interval[0], interval[1]):
|
||||||
_, ix2 = self.bin_index.query([interval[1]], 1)
|
self.distribution[k] += 1
|
||||||
|
|
||||||
for k in range(ix1, ix2):
|
|
||||||
v = self.labels[k]
|
|
||||||
self.distribution[v] += 1
|
|
||||||
self.count += 1
|
self.count += 1
|
||||||
|
|
||||||
def density(self, values):
|
def density(self, values):
|
||||||
@ -123,15 +118,13 @@ class ProbabilityDistribution(object):
|
|||||||
|
|
||||||
for k in values:
|
for k in values:
|
||||||
if self.type == "histogram":
|
if self.type == "histogram":
|
||||||
_, ix = self.bin_index.query([k], 1)
|
v = self.bin_index.find_ge(k)
|
||||||
v = self.labels[ix]
|
|
||||||
ret.append(self.distribution[v] / (self.count + 1e-5))
|
ret.append(self.distribution[v] / (self.count + 1e-5))
|
||||||
elif self.type == "KDE":
|
elif self.type == "KDE":
|
||||||
v = self.kde.probability(k, self.data)
|
v = self.kde.probability(k, data=self.data)
|
||||||
ret.append(v)
|
ret.append(v)
|
||||||
else:
|
else:
|
||||||
_, ix = self.bin_index.query([k], 1)
|
v = self.bin_index.find_ge(k)
|
||||||
v = self.labels[ix]
|
|
||||||
ret.append(self.distribution[v])
|
ret.append(self.distribution[v])
|
||||||
|
|
||||||
if scalar:
|
if scalar:
|
||||||
@ -158,7 +151,7 @@ class ProbabilityDistribution(object):
|
|||||||
self.distribution = dist
|
self.distribution = dist
|
||||||
self.labels = [str(k) for k in self.bins]
|
self.labels = [str(k) for k in self.bins]
|
||||||
|
|
||||||
self.bin_index = KDTree(self.bins)
|
self.bin_index = SortedCollection.SortedCollection(iterable=sorted(self.bins))
|
||||||
self.quantile_index = None
|
self.quantile_index = None
|
||||||
self.cdf = None
|
self.cdf = None
|
||||||
self.qtl = None
|
self.qtl = None
|
||||||
@ -187,7 +180,7 @@ class ProbabilityDistribution(object):
|
|||||||
|
|
||||||
_keys = [float(k) for k in sorted(self.qtl.keys())]
|
_keys = [float(k) for k in sorted(self.qtl.keys())]
|
||||||
|
|
||||||
self.quantile_index = KDTree(_keys)
|
self.quantile_index = SortedCollection.SortedCollection(iterable=_keys)
|
||||||
|
|
||||||
def cumulative(self, values):
|
def cumulative(self, values):
|
||||||
"""
|
"""
|
||||||
@ -203,12 +196,10 @@ class ProbabilityDistribution(object):
|
|||||||
if isinstance(values, list):
|
if isinstance(values, list):
|
||||||
ret = []
|
ret = []
|
||||||
for val in values:
|
for val in values:
|
||||||
_, ix = self.bin_index.query([val], 1)
|
k = self.bin_index.find_ge(val)
|
||||||
k = self.labels[ix]
|
|
||||||
ret.append(self.cdf[k])
|
ret.append(self.cdf[k])
|
||||||
else:
|
else:
|
||||||
_, ix = self.bin_index.query([values], 1)
|
k = self.bin_index.find_ge(values)
|
||||||
k = self.labels[ix]
|
|
||||||
return self.cdf[values]
|
return self.cdf[values]
|
||||||
|
|
||||||
def quantile(self, values):
|
def quantile(self, values):
|
||||||
@ -225,12 +216,10 @@ class ProbabilityDistribution(object):
|
|||||||
if isinstance(values, list):
|
if isinstance(values, list):
|
||||||
ret = []
|
ret = []
|
||||||
for val in values:
|
for val in values:
|
||||||
_, ix = self.quantile_index.query([val], 1)
|
k = self.quantile_index.find_ge(val)
|
||||||
k = self.labels[ix]
|
|
||||||
ret.append(self.qtl[str(k)][0])
|
ret.append(self.qtl[str(k)][0])
|
||||||
else:
|
else:
|
||||||
_, ix = self.quantile_index.query([values], 1)
|
k = self.quantile_index.find_ge(values)
|
||||||
k = self.labels[ix]
|
|
||||||
ret = self.qtl[str(k)]
|
ret = self.qtl[str(k)]
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
@ -10,13 +10,17 @@ import numpy as np
|
|||||||
|
|
||||||
class KernelSmoothing(object):
|
class KernelSmoothing(object):
|
||||||
"""Kernel Density Estimation"""
|
"""Kernel Density Estimation"""
|
||||||
def __init__(self,h, kernel="epanechnikov"):
|
def __init__(self, **kwargs):
|
||||||
self.h = h
|
self.h = kwargs.get('h',.5)
|
||||||
"""Width parameter"""
|
"""Width parameter"""
|
||||||
self.kernel = kernel
|
self.kernel = kwargs.get("kernel", "epanechnikov")
|
||||||
"""Kernel function"""
|
"""Kernel function"""
|
||||||
|
self.data = kwargs.get("data",None)
|
||||||
self.transf = Transformations.Scale(min=0,max=1)
|
self.transf = Transformations.Scale(min=0,max=1)
|
||||||
|
|
||||||
|
if self.data is not None:
|
||||||
|
self.data = self.transf.apply(self.data)
|
||||||
|
|
||||||
def kernel_function(self, u):
|
def kernel_function(self, u):
|
||||||
"""
|
"""
|
||||||
Apply the kernel
|
Apply the kernel
|
||||||
@ -45,7 +49,7 @@ class KernelSmoothing(object):
|
|||||||
elif self.kernel == "exponential":
|
elif self.kernel == "exponential":
|
||||||
return 0.5 * np.exp(-np.abs(u))
|
return 0.5 * np.exp(-np.abs(u))
|
||||||
|
|
||||||
def probability(self, x, data):
|
def probability(self, x, **kwargs):
|
||||||
"""
|
"""
|
||||||
Probability of the point x on data
|
Probability of the point x on data
|
||||||
|
|
||||||
@ -53,10 +57,14 @@ class KernelSmoothing(object):
|
|||||||
:param data:
|
:param data:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
l = len(data)
|
|
||||||
|
|
||||||
ndata = self.transf.apply(data)
|
if self.data is None:
|
||||||
|
self.data = kwargs.get('data',None)
|
||||||
|
self.data = self.transf.apply(self.data)
|
||||||
|
|
||||||
|
l = len(self.data)
|
||||||
|
|
||||||
nx = self.transf.apply(x)
|
nx = self.transf.apply(x)
|
||||||
p = sum([self.kernel_function((nx - k)/self.h) for k in ndata]) / l*self.h
|
p = sum([self.kernel_function((nx - k)/self.h) for k in self.data]) / l*self.h
|
||||||
|
|
||||||
return p
|
return p
|
@ -21,25 +21,12 @@ from pyFTS.data import Enrollments, TAIEX
|
|||||||
x = [k for k in np.arange(-2*np.pi, 2*np.pi, 0.15)]
|
x = [k for k in np.arange(-2*np.pi, 2*np.pi, 0.15)]
|
||||||
y = [np.sin(k) for k in x]
|
y = [np.sin(k) for k in x]
|
||||||
|
|
||||||
from pyFTS.models import hofts
|
from pyFTS.probabilistic import ProbabilityDistribution
|
||||||
from pyFTS.partitioners import Grid, FCM, CMeans, Entropy
|
|
||||||
from pyFTS.benchmarks import Measures
|
|
||||||
|
|
||||||
metodos = [FCM.FCMPartitioner]
|
pd = ProbabilityDistribution.ProbabilityDistribution(type='histogram', data=y, num_bins=30)
|
||||||
|
|
||||||
k = 35
|
|
||||||
|
|
||||||
rows = []
|
|
||||||
|
|
||||||
for contador, metodo in enumerate(metodos):
|
|
||||||
print(metodo)
|
|
||||||
part = metodo(data=y, npart=k)
|
|
||||||
model = hofts.HighOrderFTS(order=2, partitioner=part)
|
|
||||||
model.fit(y)
|
|
||||||
forecasts = model.predict(y)
|
|
||||||
for o in range(model.order):
|
|
||||||
forecasts.insert(0, None)
|
|
||||||
|
|
||||||
|
print(pd.quantile([.5]))
|
||||||
|
print(pd.cdf)
|
||||||
|
|
||||||
'''
|
'''
|
||||||
model = fts.FCM_FTS(partitioner=fs, order=1)
|
model = fts.FCM_FTS(partitioner=fs, order=1)
|
||||||
|
Loading…
Reference in New Issue
Block a user