Several Bugfixes on benchmarks; ProbabilityDistribution on arima and quantreg; Including models.seasonal on setup.py

This commit is contained in:
Petrônio Cândido 2017-07-11 21:43:13 -03:00
parent 8fea727560
commit 862382aa50
7 changed files with 78 additions and 28 deletions

View File

@ -227,15 +227,15 @@ def crps(targets, densities):
Fa = heavyside_cdf(densities.columns, targets)
for k in densities.index:
_crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns])
elif isinstance(densities, ProbabilityDistribution):
elif isinstance(densities, ProbabilityDistribution.ProbabilityDistribution):
l = len(densities.bins)
n = 1
Fa = heavyside_cdf(densities.bin, targets)
Fa = heavyside_cdf(densities.bins, targets)
_crps = sum([(densities.cdf(val) - Fa[val][0]) ** 2 for val in densities.bins])
elif isinstance(densities, list):
l = len(densities[0].bins)
n = len(densities)
Fa = heavyside_cdf(densities[0].bin, targets)
Fa = heavyside_cdf(densities[0].bins, targets)
for df in densities:
_crps += sum([(df.cdf(val) - Fa[val][0]) ** 2 for val in df.bins])

View File

@ -8,7 +8,6 @@ import matplotlib.colors as pltcolors
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from checkbox_support.parsers.tests.test_modinfo import testMultipleModinfoParser
#from mpl_toolkits.mplot3d import Axes3D

View File

@ -40,6 +40,9 @@ class ARIMA(fts.FTS):
self.order = self.p + self.q
self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha)
if self.indexer is not None:
data = self.indexer.get_data(data)
data = self.doTransformations(data, updateUoD=True)
old_fit = self.model_fit
@ -60,6 +63,9 @@ class ARIMA(fts.FTS):
if self.model_fit is None:
return np.nan
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
ndata = np.array(self.doTransformations(data))
l = len(ndata)
@ -151,6 +157,9 @@ class ARIMA(fts.FTS):
def forecastDistribution(self, data, **kwargs):
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
sigma = np.sqrt(self.model_fit.sigma2)
l = len(data)

View File

@ -825,6 +825,7 @@ def plot_density_rectange(ax, cmap, density, fig, resolution, time_from, time_to
cb = fig.colorbar(pc, ax=ax)
cb.set_label('Density')
from pyFTS.common import Transformations
def plot_probabilitydistribution_density(ax, cmap, probabilitydist, fig, time_from):
from matplotlib.patches import Rectangle
@ -836,7 +837,9 @@ def plot_probabilitydistribution_density(ax, cmap, probabilitydist, fig, time_fr
for y in dt.bins:
s = Rectangle((time_from+ct, y), 1, dt.resolution, fill=True, lw = 0)
patches.append(s)
colors.append(dt.distribution[y]*5)
colors.append(dt.density(y))
scale = Transformations.Scale()
colors = scale.apply(colors)
pc = PatchCollection(patches=patches, match_original=True)
pc.set_clim([0, 1])
pc.set_cmap(cmap)

View File

@ -7,7 +7,7 @@ from statsmodels.regression.quantile_regression import QuantReg
from statsmodels.tsa.tsatools import lagmat
from pyFTS import fts
from pyFTS.common import SortedCollection
from pyFTS.probabilistic import ProbabilityDistribution
class QuantileRegression(fts.FTS):
"""Façade for statsmodels.regression.quantile_regression"""
@ -32,6 +32,9 @@ class QuantileRegression(fts.FTS):
def train(self, data, sets, order=1, parameters=None):
self.order = order
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
tmp = np.array(self.doTransformations(data, updateUoD=True))
lagdata, ndata = lagmat(tmp, maxlag=order, trim="both", original='sep')
@ -75,6 +78,10 @@ class QuantileRegression(fts.FTS):
return [lo, up]
def forecast(self, data, **kwargs):
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
ndata = np.array(self.doTransformations(data))
l = len(ndata)
@ -91,6 +98,9 @@ class QuantileRegression(fts.FTS):
def forecastInterval(self, data, **kwargs):
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
ndata = np.array(self.doTransformations(data))
l = len(ndata)
@ -106,6 +116,10 @@ class QuantileRegression(fts.FTS):
return ret
def forecastAheadInterval(self, data, steps, **kwargs):
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
ndata = np.array(self.doTransformations(data))
smoothing = kwargs.get("smoothing", 0.9)
@ -128,35 +142,50 @@ class QuantileRegression(fts.FTS):
return ret[-steps:]
def forecastAheadDistribution(self, data, steps, **kwargs):
def forecastDistribution(self, data, **kwargs):
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
ndata = np.array(self.doTransformations(data))
percentile_size = (self.original_max - self.original_min) / 100
ret = []
resolution = kwargs.get('resolution', percentile_size)
l = len(data)
grid = self.get_empty_grid(self.original_min, self.original_max, resolution)
for k in np.arange(self.order, l + 1):
dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram",
uod=[self.original_min, self.original_max])
intervals = []
for qt in self.dist_qt:
sample = ndata[k - self.order: k]
intl = self.point_to_interval(sample, qt[0], qt[1])
intervals.append(intl)
index = SortedCollection.SortedCollection(iterable=grid.keys())
dist.appendInterval(intervals)
ret.append(dist)
return ret
def forecastAheadDistribution(self, data, steps, **kwargs):
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
ndata = np.array(self.doTransformations(data))
ret = []
tmps = []
grids = {}
for k in np.arange(self.order, steps + self.order):
grids[k] = self.get_empty_grid(self.original_min, self.original_max, resolution)
for qt in self.dist_qt:
dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram",
uod=[self.original_min, self.original_max])
intervals = [[k, k] for k in ndata[-self.order:]]
for k in np.arange(self.order, steps + self.order):
for qt in self.dist_qt:
intl = self.interval_to_interval([intervals[x] for x in np.arange(k - self.order, k)], qt[0], qt[1])
intervals.append(intl)
grids[k] = self.gridCount(grids[k], resolution, index, intl)
dist.appendInterval(intervals)
for k in np.arange(self.order, steps + self.order):
tmp = np.array([grids[k][i] for i in sorted(grids[k])])
ret.append(tmp / sum(tmp))
ret.append(dist)
grid = self.get_empty_grid(self.original_min, self.original_max, resolution)
df = pd.DataFrame(ret, columns=sorted(grid))
return df
return ret

View File

@ -69,6 +69,12 @@ class ProbabilityDistribution(object):
def density(self, values):
ret = []
scalar = False
if not isinstance(values, list):
values = [values]
scalar = True
for k in values:
if self.type == "histogram":
v = self.index.find_ge(k)
@ -79,6 +85,10 @@ class ProbabilityDistribution(object):
else:
v = self.index.find_ge(k)
ret.append(self.distribution[v])
if scalar:
return ret[0]
return ret
def cdf(self, value):

View File

@ -1,14 +1,14 @@
from distutils.core import setup
setup(
name = 'pyFTS',
packages = ['pyFTS','pyFTS.benchmarks','pyFTS.common','pyFTS.data', 'pyFTS.ensemble','pyFTS.models','pyFTS.partitioners','pyFTS.probabilistic','pyFTS.tests'],
package_data = {'benchmarks':['*'], 'common':['*'], 'data':['*'], 'ensemble':['*'], 'models':['*'], 'partitioners':['*'], 'probabilistic':['*'], 'tests':['*']},
version = '1.1',
packages = ['pyFTS','pyFTS.benchmarks','pyFTS.common','pyFTS.data', 'pyFTS.ensemble','pyFTS.models','pyFTS.models.seasonal','pyFTS.partitioners','pyFTS.probabilistic','pyFTS.tests'],
package_data = {'benchmarks':['*'], 'common':['*'], 'data':['*'], 'ensemble':['*'], 'models':['*'], 'seasonal':['*'], 'partitioners':['*'], 'probabilistic':['*'], 'tests':['*']},
version = '1.2',
description = 'Fuzzy Time Series for Python',
author = 'Petronio Candido L. e Silva',
author_email = 'petronio.candido@gmail.com',
url = 'https://github.com/petroniocandido/pyFTS',
download_url = 'https://github.com/petroniocandido/pyFTS/archive/pkg1.1.tar.gz',
download_url = 'https://github.com/petroniocandido/pyFTS/archive/pkg1.2.tar.gz',
keywords = ['forecasting', 'fuzzy time series', 'fuzzy', 'time series forecasting'],
classifiers = [],
)