Box-Pierce and Box-Ljang tests form residuals and many bugfixes

This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-01-25 12:17:07 -02:00
parent f53eded47e
commit 664fec513e
7 changed files with 125 additions and 30 deletions

View File

@ -1,9 +1,21 @@
#!/usr/bin/python
# -*- coding: utf8 -*- # -*- coding: utf8 -*-
import numpy as np import numpy as np
import pandas as pd import pandas as pd
# Autocorrelation function estimative
def acf(data, k):
mu = np.mean(data)
sigma = np.var(data)
n = len(data)
s = 0
for t in np.arange(0,n-k):
s += (data[t]-mu) * (data[t+k] - mu)
return 1/((n-k)*sigma)*s
# Erro quadrático médio # Erro quadrático médio
def rmse(targets, forecasts): def rmse(targets, forecasts):
return np.sqrt(np.nanmean((forecasts - targets) ** 2)) return np.sqrt(np.nanmean((forecasts - targets) ** 2))
@ -25,16 +37,36 @@ def mape_interval(targets, forecasts):
# Theil's U Statistic # Theil's U Statistic
def U(targets, forecasts): def UStatistic(targets, forecasts):
l = len(targets) l = len(targets)
naive = [] naive = []
y = [] y = []
for k in np.arange(0,l-1): for k in np.arange(0,l-1):
y.append(((forecasts[k] - targets[k+1])/targets[k]) ** 2) y.append(((forecasts[k+1] - targets[k+1])/targets[k]) ** 2)
naive.append(((targets[k + 1] - targets[k]) / targets[k]) ** 2) naive.append(((targets[k + 1] - targets[k]) / targets[k]) ** 2)
return np.sqrt(sum(y)/sum(naive)) return np.sqrt(sum(y)/sum(naive))
# Q Statistic for Box-Pierce test
def BoxPierceStatistic(data, h):
n = len(data)
s = 0
for k in np.arange(1,h+1):
r = acf(data, k)
s += r**2
return n*s
# Q Statistic for LjungBox test
def BoxLjungStatistic(data, h):
n = len(data)
s = 0
for k in np.arange(1,h+1):
r = acf(data, k)
s += r**2 / (n -k)
return n*(n-2)*s
# Sharpness - Mean size of the intervals # Sharpness - Mean size of the intervals
def sharpness(forecasts): def sharpness(forecasts):
tmp = [i[1] - i[0] for i in forecasts] tmp = [i[1] - i[0] for i in forecasts]
@ -57,3 +89,4 @@ def coverage(targets, forecasts):
else: else:
preds.append(0) preds.append(0)
return np.mean(preds) return np.mean(preds)

View File

@ -6,21 +6,68 @@ import pandas as pd
import matplotlib as plt import matplotlib as plt
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from pyFTS.common import Transformations,Util from pyFTS.common import Transformations,Util
from pyFTS.benchmarks import Measures
from scipy import stats
def residuals(targets, forecasts, order=1): def residuals(targets, forecasts, order=1):
return np.array(targets[order:]) - np.array(forecasts[:-order]) return np.array(targets[order:]) - np.array(forecasts[:-1])
def plotResiduals(targets, forecasts, order=1, tam=[8, 8], save=False, file=None): def ChiSquared(q,h):
res = residuals(targets,forecasts,order) p = stats.chi2.sf(q, h)
fig = plt.figure(figsize=tam) return p
ax1 = fig.add_axes([0, 1, 0.9, 0.3]) # left, bottom, width, height
ax1.plot(res)
ax2 = fig.add_axes([0, 0.65, 0.9, 0.3])
ax2.acorr(res) def compareResiduals(data, models):
ax3 = fig.add_axes([0, 0.3, 0.9, 0.3]) ret = "Model & Order & Mean & STD & Box-Pierce & Box-Ljung & P-value \\\\ \n"
ax3.hist(res) for mfts in models:
forecasts = mfts.forecast(data)
res = residuals(data, forecasts, mfts.order)
mu = np.mean(res)
sig = np.std(res)
ret += mfts.shortname + " & "
ret += str(mfts.order) + " & "
ret += str(mu) + " & "
ret += str(sig) + " & "
q1 = Measures.BoxPierceStatistic(res, 10)
ret += str(q1) + " & "
q2 = Measures.BoxLjungStatistic(res, 10)
ret += str(q2) + " & "
ret += str(ChiSquared(q2, 10))
ret += " \\\\ \n"
return ret
def plotResiduals(targets, models, tam=[8, 8], save=False, file=None):
fig, axes = plt.subplots(nrows=len(models), ncols=3, figsize=tam)
c = 0
for mfts in models:
forecasts = mfts.forecast(targets)
res = residuals(targets,forecasts,mfts.order)
mu = np.mean(res)
sig = np.std(res)
axes[c][0].set_title("Residuals Mean=" + str(mu) + " STD = " + str(sig))
axes[c][0].set_ylabel('E')
axes[c][0].set_xlabel('T')
axes[c][0].plot(res)
axes[c][1].set_title("Residuals Autocorrelation")
axes[c][1].set_ylabel('ACS')
axes[c][1].set_xlabel('Lag')
axes[c][1].acorr(res)
axes[c][2].set_title("Residuals Histogram")
axes[c][2].set_ylabel('Freq')
axes[c][2].set_xlabel('Bins')
axes[c][2].hist(res)
c += 1
plt.tight_layout()
Util.showAndSaveImage(fig, file, save) Util.showAndSaveImage(fig, file, save)

View File

@ -8,16 +8,15 @@ import matplotlib.colors as pltcolors
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D from mpl_toolkits.mplot3d import Axes3D
# from sklearn.cross_validation import KFold # from sklearn.cross_validation import KFold
from pyFTS.benchmarks import Measures from pyFTS.benchmarks import Measures, naive, ResidualAnalysis
from pyFTS.partitioners import Grid from pyFTS.partitioners import Grid
from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util
from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pfts, ifts from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pfts, ifts
def allPointForecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5]): def allPointForecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5]):
models = [chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS, models = [naive.Naive, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
sadaei.ExponentialyWeightedFTS, hwang.HighOrderFTS, hofts.HighOrderFTS, sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, pfts.ProbabilisticFTS]
pfts.ProbabilisticFTS]
objs = [] objs = []
@ -38,25 +37,31 @@ def allPointForecasters(data_train, data_test, partitions, max_order=3,save=Fals
colors.append( all_colors[count] ) colors.append( all_colors[count] )
else: else:
for order in np.arange(1,max_order+1): for order in np.arange(1,max_order+1):
mfts = model(" n = " + str(order)) if order >= mfts.minOrder:
mfts.train(data_train, data_train_fs, order=order) mfts = model(" n = " + str(order))
objs.append(mfts) mfts.train(data_train, data_train_fs, order=order)
colors.append(all_colors[count]) objs.append(mfts)
count += 5 colors.append(all_colors[count])
count += 10
print(getPointStatistics(data_test, objs)) print(getPointStatistics(data_test, objs))
print(ResidualAnalysis.compareResiduals(data_test, objs))
plotComparedSeries(data_test, objs, colors, typeonlegend=False, save=save, file=file, tam=tam, intervals=False) plotComparedSeries(data_test, objs, colors, typeonlegend=False, save=save, file=file, tam=tam, intervals=False)
ResidualAnalysis.plotResiduals(data_test, objs, save=save, file=file, tam=[tam[0],5*tam[1]])
def getPointStatistics(data, models, externalmodels = None, externalforecasts = None): def getPointStatistics(data, models, externalmodels = None, externalforecasts = None):
ret = "Model & Order & RMSE & MAPE \\\\ \n" ret = "Model & Order & RMSE & MAPE & Theil's U \\\\ \n"
for fts in models: for fts in models:
forecasts = fts.forecast(data) forecasts = fts.forecast(data)
ret += fts.shortname + " & " ret += fts.shortname + " & "
ret += str(fts.order) + " & " ret += str(fts.order) + " & "
ret += str(round(Measures.rmse(np.array(data[fts.order:]), np.array(forecasts[:-1])), 2)) + " & " ret += str(round(Measures.rmse(np.array(data[fts.order:]), np.array(forecasts[:-1])), 2)) + " & "
ret += str(round(Measures.mape(np.array(data[fts.order:]), np.array(forecasts[:-1])), 2)) ret += str(round(Measures.mape(np.array(data[fts.order:]), np.array(forecasts[:-1])), 2))+ " & "
ret += str(round(Measures.UStatistic(np.array(data[fts.order:]), np.array(forecasts[:-1])), 2))
ret += " \\\\ \n" ret += " \\\\ \n"
if externalmodels is not None: if externalmodels is not None:
l = len(externalmodels) l = len(externalmodels)
@ -64,7 +69,8 @@ def getPointStatistics(data, models, externalmodels = None, externalforecasts =
ret += externalmodels[k] + " & " ret += externalmodels[k] + " & "
ret += " 1 & " ret += " 1 & "
ret += str(round(Measures.rmse(data[fts.order:], externalforecasts[k][:-1]), 2)) + " & " ret += str(round(Measures.rmse(data[fts.order:], externalforecasts[k][:-1]), 2)) + " & "
ret += str(round(Measures.mape(data[fts.order:], externalforecasts[k][:-1]), 2)) ret += str(round(Measures.mape(data[fts.order:], externalforecasts[k][:-1]), 2))+ " & "
ret += str(round(Measures.UStatistic(np.array(data[fts.order:]), np.array(forecasts[:-1])), 2))
ret += " \\\\ \n" ret += " \\\\ \n"
return ret return ret
@ -91,10 +97,11 @@ def allIntervalForecasters(data_train, data_test, partitions, max_order=3,save=F
colors.append( all_colors[count] ) colors.append( all_colors[count] )
else: else:
for order in np.arange(1,max_order+1): for order in np.arange(1,max_order+1):
mfts = model(" n = " + str(order)) if order >= mfts.minOrder:
mfts.train(data_train, data_train_fs, order=order) mfts = model(" n = " + str(order))
objs.append(mfts) mfts.train(data_train, data_train_fs, order=order)
colors.append(all_colors[count]) objs.append(mfts)
colors.append(all_colors[count])
count += 5 count += 5
print(getIntervalStatistics(data_test, objs)) print(getIntervalStatistics(data_test, objs))

View File

@ -11,4 +11,4 @@ class Naive(fts.FTS):
self.detail = "Naïve Model" self.detail = "Naïve Model"
def forecast(self, data): def forecast(self, data):
return data return [k for k in data]

View File

@ -33,3 +33,9 @@ def roi(original):
for t in np.arange(0, n-1): for t in np.arange(0, n-1):
roi.append( (original[t+1] - original[t])/original[t] ) roi.append( (original[t+1] - original[t])/original[t] )
return roi return roi
def smoothing(original, lags):
pass
def aggregate(original, operation):
pass

1
fts.py
View File

@ -11,6 +11,7 @@ class FTS(object):
self.name = name self.name = name
self.detail = name self.detail = name
self.isHighOrder = False self.isHighOrder = False
self.minOrder = 1
self.hasSeasonality = False self.hasSeasonality = False
self.hasPointForecasting = True self.hasPointForecasting = True
self.hasIntervalForecasting = False self.hasIntervalForecasting = False

View File

@ -7,6 +7,7 @@ class HighOrderFTS(fts.FTS):
def __init__(self, name): def __init__(self, name):
super(HighOrderFTS, self).__init__(1, name) super(HighOrderFTS, self).__init__(1, name)
self.isHighOrder = True self.isHighOrder = True
self.minOrder = 2
self.name = "Hwang High Order FTS" self.name = "Hwang High Order FTS"
self.shortname = "Hwang" + name self.shortname = "Hwang" + name
self.detail = "Hwang" self.detail = "Hwang"