Box-Pierce and Box-Ljang tests form residuals and many bugfixes

This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-01-25 12:17:07 -02:00
parent f53eded47e
commit 664fec513e
7 changed files with 125 additions and 30 deletions

View File

@ -1,9 +1,21 @@
#!/usr/bin/python
# -*- coding: utf8 -*-
import numpy as np
import pandas as pd
# Autocorrelation function estimative
def acf(data, k):
mu = np.mean(data)
sigma = np.var(data)
n = len(data)
s = 0
for t in np.arange(0,n-k):
s += (data[t]-mu) * (data[t+k] - mu)
return 1/((n-k)*sigma)*s
# Erro quadrático médio
def rmse(targets, forecasts):
return np.sqrt(np.nanmean((forecasts - targets) ** 2))
@ -25,16 +37,36 @@ def mape_interval(targets, forecasts):
# Theil's U Statistic
def U(targets, forecasts):
def UStatistic(targets, forecasts):
l = len(targets)
naive = []
y = []
for k in np.arange(0,l-1):
y.append(((forecasts[k] - targets[k+1])/targets[k]) ** 2)
y.append(((forecasts[k+1] - targets[k+1])/targets[k]) ** 2)
naive.append(((targets[k + 1] - targets[k]) / targets[k]) ** 2)
return np.sqrt(sum(y)/sum(naive))
# Q Statistic for Box-Pierce test
def BoxPierceStatistic(data, h):
n = len(data)
s = 0
for k in np.arange(1,h+1):
r = acf(data, k)
s += r**2
return n*s
# Q Statistic for LjungBox test
def BoxLjungStatistic(data, h):
n = len(data)
s = 0
for k in np.arange(1,h+1):
r = acf(data, k)
s += r**2 / (n -k)
return n*(n-2)*s
# Sharpness - Mean size of the intervals
def sharpness(forecasts):
tmp = [i[1] - i[0] for i in forecasts]
@ -57,3 +89,4 @@ def coverage(targets, forecasts):
else:
preds.append(0)
return np.mean(preds)

View File

@ -6,21 +6,68 @@ import pandas as pd
import matplotlib as plt
import matplotlib.pyplot as plt
from pyFTS.common import Transformations,Util
from pyFTS.benchmarks import Measures
from scipy import stats
def residuals(targets, forecasts, order=1):
return np.array(targets[order:]) - np.array(forecasts[:-order])
return np.array(targets[order:]) - np.array(forecasts[:-1])
def plotResiduals(targets, forecasts, order=1, tam=[8, 8], save=False, file=None):
res = residuals(targets,forecasts,order)
fig = plt.figure(figsize=tam)
ax1 = fig.add_axes([0, 1, 0.9, 0.3]) # left, bottom, width, height
ax1.plot(res)
ax2 = fig.add_axes([0, 0.65, 0.9, 0.3])
ax2.acorr(res)
ax3 = fig.add_axes([0, 0.3, 0.9, 0.3])
ax3.hist(res)
def ChiSquared(q,h):
p = stats.chi2.sf(q, h)
return p
def compareResiduals(data, models):
ret = "Model & Order & Mean & STD & Box-Pierce & Box-Ljung & P-value \\\\ \n"
for mfts in models:
forecasts = mfts.forecast(data)
res = residuals(data, forecasts, mfts.order)
mu = np.mean(res)
sig = np.std(res)
ret += mfts.shortname + " & "
ret += str(mfts.order) + " & "
ret += str(mu) + " & "
ret += str(sig) + " & "
q1 = Measures.BoxPierceStatistic(res, 10)
ret += str(q1) + " & "
q2 = Measures.BoxLjungStatistic(res, 10)
ret += str(q2) + " & "
ret += str(ChiSquared(q2, 10))
ret += " \\\\ \n"
return ret
def plotResiduals(targets, models, tam=[8, 8], save=False, file=None):
fig, axes = plt.subplots(nrows=len(models), ncols=3, figsize=tam)
c = 0
for mfts in models:
forecasts = mfts.forecast(targets)
res = residuals(targets,forecasts,mfts.order)
mu = np.mean(res)
sig = np.std(res)
axes[c][0].set_title("Residuals Mean=" + str(mu) + " STD = " + str(sig))
axes[c][0].set_ylabel('E')
axes[c][0].set_xlabel('T')
axes[c][0].plot(res)
axes[c][1].set_title("Residuals Autocorrelation")
axes[c][1].set_ylabel('ACS')
axes[c][1].set_xlabel('Lag')
axes[c][1].acorr(res)
axes[c][2].set_title("Residuals Histogram")
axes[c][2].set_ylabel('Freq')
axes[c][2].set_xlabel('Bins')
axes[c][2].hist(res)
c += 1
plt.tight_layout()
Util.showAndSaveImage(fig, file, save)

View File

@ -8,16 +8,15 @@ import matplotlib.colors as pltcolors
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# from sklearn.cross_validation import KFold
from pyFTS.benchmarks import Measures
from pyFTS.benchmarks import Measures, naive, ResidualAnalysis
from pyFTS.partitioners import Grid
from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util
from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pfts, ifts
def allPointForecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5]):
models = [chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
sadaei.ExponentialyWeightedFTS, hwang.HighOrderFTS, hofts.HighOrderFTS,
pfts.ProbabilisticFTS]
models = [naive.Naive, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, pfts.ProbabilisticFTS]
objs = []
@ -38,25 +37,31 @@ def allPointForecasters(data_train, data_test, partitions, max_order=3,save=Fals
colors.append( all_colors[count] )
else:
for order in np.arange(1,max_order+1):
if order >= mfts.minOrder:
mfts = model(" n = " + str(order))
mfts.train(data_train, data_train_fs, order=order)
objs.append(mfts)
colors.append(all_colors[count])
count += 5
count += 10
print(getPointStatistics(data_test, objs))
print(ResidualAnalysis.compareResiduals(data_test, objs))
plotComparedSeries(data_test, objs, colors, typeonlegend=False, save=save, file=file, tam=tam, intervals=False)
ResidualAnalysis.plotResiduals(data_test, objs, save=save, file=file, tam=[tam[0],5*tam[1]])
def getPointStatistics(data, models, externalmodels = None, externalforecasts = None):
ret = "Model & Order & RMSE & MAPE \\\\ \n"
ret = "Model & Order & RMSE & MAPE & Theil's U \\\\ \n"
for fts in models:
forecasts = fts.forecast(data)
ret += fts.shortname + " & "
ret += str(fts.order) + " & "
ret += str(round(Measures.rmse(np.array(data[fts.order:]), np.array(forecasts[:-1])), 2)) + " & "
ret += str(round(Measures.mape(np.array(data[fts.order:]), np.array(forecasts[:-1])), 2))
ret += str(round(Measures.mape(np.array(data[fts.order:]), np.array(forecasts[:-1])), 2))+ " & "
ret += str(round(Measures.UStatistic(np.array(data[fts.order:]), np.array(forecasts[:-1])), 2))
ret += " \\\\ \n"
if externalmodels is not None:
l = len(externalmodels)
@ -64,7 +69,8 @@ def getPointStatistics(data, models, externalmodels = None, externalforecasts =
ret += externalmodels[k] + " & "
ret += " 1 & "
ret += str(round(Measures.rmse(data[fts.order:], externalforecasts[k][:-1]), 2)) + " & "
ret += str(round(Measures.mape(data[fts.order:], externalforecasts[k][:-1]), 2))
ret += str(round(Measures.mape(data[fts.order:], externalforecasts[k][:-1]), 2))+ " & "
ret += str(round(Measures.UStatistic(np.array(data[fts.order:]), np.array(forecasts[:-1])), 2))
ret += " \\\\ \n"
return ret
@ -91,6 +97,7 @@ def allIntervalForecasters(data_train, data_test, partitions, max_order=3,save=F
colors.append( all_colors[count] )
else:
for order in np.arange(1,max_order+1):
if order >= mfts.minOrder:
mfts = model(" n = " + str(order))
mfts.train(data_train, data_train_fs, order=order)
objs.append(mfts)

View File

@ -11,4 +11,4 @@ class Naive(fts.FTS):
self.detail = "Naïve Model"
def forecast(self, data):
return data
return [k for k in data]

View File

@ -33,3 +33,9 @@ def roi(original):
for t in np.arange(0, n-1):
roi.append( (original[t+1] - original[t])/original[t] )
return roi
def smoothing(original, lags):
pass
def aggregate(original, operation):
pass

1
fts.py
View File

@ -11,6 +11,7 @@ class FTS(object):
self.name = name
self.detail = name
self.isHighOrder = False
self.minOrder = 1
self.hasSeasonality = False
self.hasPointForecasting = True
self.hasIntervalForecasting = False

View File

@ -7,6 +7,7 @@ class HighOrderFTS(fts.FTS):
def __init__(self, name):
super(HighOrderFTS, self).__init__(1, name)
self.isHighOrder = True
self.minOrder = 2
self.name = "Hwang High Order FTS"
self.shortname = "Hwang" + name
self.detail = "Hwang"