Several Bugfixes on benchmarks; ProbabilityDistribution on arima and quantreg; Including models.seasonal on setup.py

This commit is contained in:
Petrônio Cândido 2017-07-11 21:43:13 -03:00
parent 8fea727560
commit 862382aa50
7 changed files with 78 additions and 28 deletions

View File

@ -227,15 +227,15 @@ def crps(targets, densities):
Fa = heavyside_cdf(densities.columns, targets) Fa = heavyside_cdf(densities.columns, targets)
for k in densities.index: for k in densities.index:
_crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns]) _crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns])
elif isinstance(densities, ProbabilityDistribution): elif isinstance(densities, ProbabilityDistribution.ProbabilityDistribution):
l = len(densities.bins) l = len(densities.bins)
n = 1 n = 1
Fa = heavyside_cdf(densities.bin, targets) Fa = heavyside_cdf(densities.bins, targets)
_crps = sum([(densities.cdf(val) - Fa[val][0]) ** 2 for val in densities.bins]) _crps = sum([(densities.cdf(val) - Fa[val][0]) ** 2 for val in densities.bins])
elif isinstance(densities, list): elif isinstance(densities, list):
l = len(densities[0].bins) l = len(densities[0].bins)
n = len(densities) n = len(densities)
Fa = heavyside_cdf(densities[0].bin, targets) Fa = heavyside_cdf(densities[0].bins, targets)
for df in densities: for df in densities:
_crps += sum([(df.cdf(val) - Fa[val][0]) ** 2 for val in df.bins]) _crps += sum([(df.cdf(val) - Fa[val][0]) ** 2 for val in df.bins])

View File

@ -8,7 +8,6 @@ import matplotlib.colors as pltcolors
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from checkbox_support.parsers.tests.test_modinfo import testMultipleModinfoParser
#from mpl_toolkits.mplot3d import Axes3D #from mpl_toolkits.mplot3d import Axes3D

View File

@ -40,6 +40,9 @@ class ARIMA(fts.FTS):
self.order = self.p + self.q self.order = self.p + self.q
self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha) self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha)
if self.indexer is not None:
data = self.indexer.get_data(data)
data = self.doTransformations(data, updateUoD=True) data = self.doTransformations(data, updateUoD=True)
old_fit = self.model_fit old_fit = self.model_fit
@ -60,6 +63,9 @@ class ARIMA(fts.FTS):
if self.model_fit is None: if self.model_fit is None:
return np.nan return np.nan
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
ndata = np.array(self.doTransformations(data)) ndata = np.array(self.doTransformations(data))
l = len(ndata) l = len(ndata)
@ -151,6 +157,9 @@ class ARIMA(fts.FTS):
def forecastDistribution(self, data, **kwargs): def forecastDistribution(self, data, **kwargs):
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
sigma = np.sqrt(self.model_fit.sigma2) sigma = np.sqrt(self.model_fit.sigma2)
l = len(data) l = len(data)

View File

@ -825,6 +825,7 @@ def plot_density_rectange(ax, cmap, density, fig, resolution, time_from, time_to
cb = fig.colorbar(pc, ax=ax) cb = fig.colorbar(pc, ax=ax)
cb.set_label('Density') cb.set_label('Density')
from pyFTS.common import Transformations
def plot_probabilitydistribution_density(ax, cmap, probabilitydist, fig, time_from): def plot_probabilitydistribution_density(ax, cmap, probabilitydist, fig, time_from):
from matplotlib.patches import Rectangle from matplotlib.patches import Rectangle
@ -836,7 +837,9 @@ def plot_probabilitydistribution_density(ax, cmap, probabilitydist, fig, time_fr
for y in dt.bins: for y in dt.bins:
s = Rectangle((time_from+ct, y), 1, dt.resolution, fill=True, lw = 0) s = Rectangle((time_from+ct, y), 1, dt.resolution, fill=True, lw = 0)
patches.append(s) patches.append(s)
colors.append(dt.distribution[y]*5) colors.append(dt.density(y))
scale = Transformations.Scale()
colors = scale.apply(colors)
pc = PatchCollection(patches=patches, match_original=True) pc = PatchCollection(patches=patches, match_original=True)
pc.set_clim([0, 1]) pc.set_clim([0, 1])
pc.set_cmap(cmap) pc.set_cmap(cmap)

View File

@ -7,7 +7,7 @@ from statsmodels.regression.quantile_regression import QuantReg
from statsmodels.tsa.tsatools import lagmat from statsmodels.tsa.tsatools import lagmat
from pyFTS import fts from pyFTS import fts
from pyFTS.common import SortedCollection from pyFTS.common import SortedCollection
from pyFTS.probabilistic import ProbabilityDistribution
class QuantileRegression(fts.FTS): class QuantileRegression(fts.FTS):
"""Façade for statsmodels.regression.quantile_regression""" """Façade for statsmodels.regression.quantile_regression"""
@ -32,6 +32,9 @@ class QuantileRegression(fts.FTS):
def train(self, data, sets, order=1, parameters=None): def train(self, data, sets, order=1, parameters=None):
self.order = order self.order = order
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
tmp = np.array(self.doTransformations(data, updateUoD=True)) tmp = np.array(self.doTransformations(data, updateUoD=True))
lagdata, ndata = lagmat(tmp, maxlag=order, trim="both", original='sep') lagdata, ndata = lagmat(tmp, maxlag=order, trim="both", original='sep')
@ -75,6 +78,10 @@ class QuantileRegression(fts.FTS):
return [lo, up] return [lo, up]
def forecast(self, data, **kwargs): def forecast(self, data, **kwargs):
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
ndata = np.array(self.doTransformations(data)) ndata = np.array(self.doTransformations(data))
l = len(ndata) l = len(ndata)
@ -91,6 +98,9 @@ class QuantileRegression(fts.FTS):
def forecastInterval(self, data, **kwargs): def forecastInterval(self, data, **kwargs):
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
ndata = np.array(self.doTransformations(data)) ndata = np.array(self.doTransformations(data))
l = len(ndata) l = len(ndata)
@ -106,6 +116,10 @@ class QuantileRegression(fts.FTS):
return ret return ret
def forecastAheadInterval(self, data, steps, **kwargs): def forecastAheadInterval(self, data, steps, **kwargs):
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
ndata = np.array(self.doTransformations(data)) ndata = np.array(self.doTransformations(data))
smoothing = kwargs.get("smoothing", 0.9) smoothing = kwargs.get("smoothing", 0.9)
@ -128,35 +142,50 @@ class QuantileRegression(fts.FTS):
return ret[-steps:] return ret[-steps:]
def forecastAheadDistribution(self, data, steps, **kwargs): def forecastDistribution(self, data, **kwargs):
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
ndata = np.array(self.doTransformations(data)) ndata = np.array(self.doTransformations(data))
percentile_size = (self.original_max - self.original_min) / 100 ret = []
resolution = kwargs.get('resolution', percentile_size) l = len(data)
grid = self.get_empty_grid(self.original_min, self.original_max, resolution) for k in np.arange(self.order, l + 1):
dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram",
uod=[self.original_min, self.original_max])
intervals = []
for qt in self.dist_qt:
sample = ndata[k - self.order: k]
intl = self.point_to_interval(sample, qt[0], qt[1])
intervals.append(intl)
index = SortedCollection.SortedCollection(iterable=grid.keys()) dist.appendInterval(intervals)
ret.append(dist)
return ret
def forecastAheadDistribution(self, data, steps, **kwargs):
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
ndata = np.array(self.doTransformations(data))
ret = [] ret = []
tmps = []
grids = {}
for k in np.arange(self.order, steps + self.order): for k in np.arange(self.order, steps + self.order):
grids[k] = self.get_empty_grid(self.original_min, self.original_max, resolution) dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram",
uod=[self.original_min, self.original_max])
for qt in self.dist_qt:
intervals = [[k, k] for k in ndata[-self.order:]] intervals = [[k, k] for k in ndata[-self.order:]]
for k in np.arange(self.order, steps + self.order): for qt in self.dist_qt:
intl = self.interval_to_interval([intervals[x] for x in np.arange(k - self.order, k)], qt[0], qt[1]) intl = self.interval_to_interval([intervals[x] for x in np.arange(k - self.order, k)], qt[0], qt[1])
intervals.append(intl) intervals.append(intl)
grids[k] = self.gridCount(grids[k], resolution, index, intl) dist.appendInterval(intervals)
for k in np.arange(self.order, steps + self.order): ret.append(dist)
tmp = np.array([grids[k][i] for i in sorted(grids[k])])
ret.append(tmp / sum(tmp))
grid = self.get_empty_grid(self.original_min, self.original_max, resolution) return ret
df = pd.DataFrame(ret, columns=sorted(grid))
return df

View File

@ -69,6 +69,12 @@ class ProbabilityDistribution(object):
def density(self, values): def density(self, values):
ret = [] ret = []
scalar = False
if not isinstance(values, list):
values = [values]
scalar = True
for k in values: for k in values:
if self.type == "histogram": if self.type == "histogram":
v = self.index.find_ge(k) v = self.index.find_ge(k)
@ -79,6 +85,10 @@ class ProbabilityDistribution(object):
else: else:
v = self.index.find_ge(k) v = self.index.find_ge(k)
ret.append(self.distribution[v]) ret.append(self.distribution[v])
if scalar:
return ret[0]
return ret return ret
def cdf(self, value): def cdf(self, value):

View File

@ -1,14 +1,14 @@
from distutils.core import setup from distutils.core import setup
setup( setup(
name = 'pyFTS', name = 'pyFTS',
packages = ['pyFTS','pyFTS.benchmarks','pyFTS.common','pyFTS.data', 'pyFTS.ensemble','pyFTS.models','pyFTS.partitioners','pyFTS.probabilistic','pyFTS.tests'], packages = ['pyFTS','pyFTS.benchmarks','pyFTS.common','pyFTS.data', 'pyFTS.ensemble','pyFTS.models','pyFTS.models.seasonal','pyFTS.partitioners','pyFTS.probabilistic','pyFTS.tests'],
package_data = {'benchmarks':['*'], 'common':['*'], 'data':['*'], 'ensemble':['*'], 'models':['*'], 'partitioners':['*'], 'probabilistic':['*'], 'tests':['*']}, package_data = {'benchmarks':['*'], 'common':['*'], 'data':['*'], 'ensemble':['*'], 'models':['*'], 'seasonal':['*'], 'partitioners':['*'], 'probabilistic':['*'], 'tests':['*']},
version = '1.1', version = '1.2',
description = 'Fuzzy Time Series for Python', description = 'Fuzzy Time Series for Python',
author = 'Petronio Candido L. e Silva', author = 'Petronio Candido L. e Silva',
author_email = 'petronio.candido@gmail.com', author_email = 'petronio.candido@gmail.com',
url = 'https://github.com/petroniocandido/pyFTS', url = 'https://github.com/petroniocandido/pyFTS',
download_url = 'https://github.com/petroniocandido/pyFTS/archive/pkg1.1.tar.gz', download_url = 'https://github.com/petroniocandido/pyFTS/archive/pkg1.2.tar.gz',
keywords = ['forecasting', 'fuzzy time series', 'fuzzy', 'time series forecasting'], keywords = ['forecasting', 'fuzzy time series', 'fuzzy', 'time series forecasting'],
classifiers = [], classifiers = [],
) )