From 664fec513e578904f83670a3be582b1250ab0402 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido=20de=20Lima=20e=20Silva?= Date: Wed, 25 Jan 2017 12:17:07 -0200 Subject: [PATCH] Box-Pierce and Box-Ljang tests form residuals and many bugfixes --- benchmarks/Measures.py | 39 ++++++++++++++++++-- benchmarks/ResidualAnalysis.py | 67 +++++++++++++++++++++++++++++----- benchmarks/benchmarks.py | 39 ++++++++++++-------- benchmarks/naive.py | 2 +- common/Transformations.py | 6 +++ fts.py | 1 + hwang.py | 1 + 7 files changed, 125 insertions(+), 30 deletions(-) diff --git a/benchmarks/Measures.py b/benchmarks/Measures.py index e9b8f13..990131f 100644 --- a/benchmarks/Measures.py +++ b/benchmarks/Measures.py @@ -1,9 +1,21 @@ -#!/usr/bin/python # -*- coding: utf8 -*- import numpy as np import pandas as pd + +# Autocorrelation function estimative +def acf(data, k): + mu = np.mean(data) + sigma = np.var(data) + n = len(data) + s = 0 + for t in np.arange(0,n-k): + s += (data[t]-mu) * (data[t+k] - mu) + + return 1/((n-k)*sigma)*s + + # Erro quadrático médio def rmse(targets, forecasts): return np.sqrt(np.nanmean((forecasts - targets) ** 2)) @@ -25,16 +37,36 @@ def mape_interval(targets, forecasts): # Theil's U Statistic -def U(targets, forecasts): +def UStatistic(targets, forecasts): l = len(targets) naive = [] y = [] for k in np.arange(0,l-1): - y.append(((forecasts[k] - targets[k+1])/targets[k]) ** 2) + y.append(((forecasts[k+1] - targets[k+1])/targets[k]) ** 2) naive.append(((targets[k + 1] - targets[k]) / targets[k]) ** 2) return np.sqrt(sum(y)/sum(naive)) +# Q Statistic for Box-Pierce test +def BoxPierceStatistic(data, h): + n = len(data) + s = 0 + for k in np.arange(1,h+1): + r = acf(data, k) + s += r**2 + return n*s + + +# Q Statistic for Ljung–Box test +def BoxLjungStatistic(data, h): + n = len(data) + s = 0 + for k in np.arange(1,h+1): + r = acf(data, k) + s += r**2 / (n -k) + return n*(n-2)*s + + # Sharpness - Mean size of the intervals def sharpness(forecasts): tmp = [i[1] - i[0] for i in forecasts] @@ -57,3 +89,4 @@ def coverage(targets, forecasts): else: preds.append(0) return np.mean(preds) + diff --git a/benchmarks/ResidualAnalysis.py b/benchmarks/ResidualAnalysis.py index db1fa1f..a81361f 100644 --- a/benchmarks/ResidualAnalysis.py +++ b/benchmarks/ResidualAnalysis.py @@ -6,21 +6,68 @@ import pandas as pd import matplotlib as plt import matplotlib.pyplot as plt from pyFTS.common import Transformations,Util +from pyFTS.benchmarks import Measures +from scipy import stats def residuals(targets, forecasts, order=1): - return np.array(targets[order:]) - np.array(forecasts[:-order]) + return np.array(targets[order:]) - np.array(forecasts[:-1]) -def plotResiduals(targets, forecasts, order=1, tam=[8, 8], save=False, file=None): - res = residuals(targets,forecasts,order) - fig = plt.figure(figsize=tam) - ax1 = fig.add_axes([0, 1, 0.9, 0.3]) # left, bottom, width, height - ax1.plot(res) - ax2 = fig.add_axes([0, 0.65, 0.9, 0.3]) - ax2.acorr(res) - ax3 = fig.add_axes([0, 0.3, 0.9, 0.3]) - ax3.hist(res) +def ChiSquared(q,h): + p = stats.chi2.sf(q, h) + return p + + + +def compareResiduals(data, models): + ret = "Model & Order & Mean & STD & Box-Pierce & Box-Ljung & P-value \\\\ \n" + for mfts in models: + forecasts = mfts.forecast(data) + res = residuals(data, forecasts, mfts.order) + mu = np.mean(res) + sig = np.std(res) + ret += mfts.shortname + " & " + ret += str(mfts.order) + " & " + ret += str(mu) + " & " + ret += str(sig) + " & " + q1 = Measures.BoxPierceStatistic(res, 10) + ret += str(q1) + " & " + q2 = Measures.BoxLjungStatistic(res, 10) + ret += str(q2) + " & " + ret += str(ChiSquared(q2, 10)) + ret += " \\\\ \n" + return ret + + +def plotResiduals(targets, models, tam=[8, 8], save=False, file=None): + + fig, axes = plt.subplots(nrows=len(models), ncols=3, figsize=tam) + c = 0 + for mfts in models: + forecasts = mfts.forecast(targets) + res = residuals(targets,forecasts,mfts.order) + mu = np.mean(res) + sig = np.std(res) + + axes[c][0].set_title("Residuals Mean=" + str(mu) + " STD = " + str(sig)) + axes[c][0].set_ylabel('E') + axes[c][0].set_xlabel('T') + axes[c][0].plot(res) + + axes[c][1].set_title("Residuals Autocorrelation") + axes[c][1].set_ylabel('ACS') + axes[c][1].set_xlabel('Lag') + axes[c][1].acorr(res) + + axes[c][2].set_title("Residuals Histogram") + axes[c][2].set_ylabel('Freq') + axes[c][2].set_xlabel('Bins') + axes[c][2].hist(res) + + c += 1 + + plt.tight_layout() Util.showAndSaveImage(fig, file, save) diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py index 1829fcd..aa042aa 100644 --- a/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks.py @@ -8,16 +8,15 @@ import matplotlib.colors as pltcolors import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D # from sklearn.cross_validation import KFold -from pyFTS.benchmarks import Measures +from pyFTS.benchmarks import Measures, naive, ResidualAnalysis from pyFTS.partitioners import Grid from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pfts, ifts def allPointForecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5]): - models = [chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS, - sadaei.ExponentialyWeightedFTS, hwang.HighOrderFTS, hofts.HighOrderFTS, - pfts.ProbabilisticFTS] + models = [naive.Naive, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS, + sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, pfts.ProbabilisticFTS] objs = [] @@ -38,25 +37,31 @@ def allPointForecasters(data_train, data_test, partitions, max_order=3,save=Fals colors.append( all_colors[count] ) else: for order in np.arange(1,max_order+1): - mfts = model(" n = " + str(order)) - mfts.train(data_train, data_train_fs, order=order) - objs.append(mfts) - colors.append(all_colors[count]) - count += 5 + if order >= mfts.minOrder: + mfts = model(" n = " + str(order)) + mfts.train(data_train, data_train_fs, order=order) + objs.append(mfts) + colors.append(all_colors[count]) + count += 10 print(getPointStatistics(data_test, objs)) + print(ResidualAnalysis.compareResiduals(data_test, objs)) + plotComparedSeries(data_test, objs, colors, typeonlegend=False, save=save, file=file, tam=tam, intervals=False) + ResidualAnalysis.plotResiduals(data_test, objs, save=save, file=file, tam=[tam[0],5*tam[1]]) + def getPointStatistics(data, models, externalmodels = None, externalforecasts = None): - ret = "Model & Order & RMSE & MAPE \\\\ \n" + ret = "Model & Order & RMSE & MAPE & Theil's U \\\\ \n" for fts in models: forecasts = fts.forecast(data) ret += fts.shortname + " & " ret += str(fts.order) + " & " ret += str(round(Measures.rmse(np.array(data[fts.order:]), np.array(forecasts[:-1])), 2)) + " & " - ret += str(round(Measures.mape(np.array(data[fts.order:]), np.array(forecasts[:-1])), 2)) + ret += str(round(Measures.mape(np.array(data[fts.order:]), np.array(forecasts[:-1])), 2))+ " & " + ret += str(round(Measures.UStatistic(np.array(data[fts.order:]), np.array(forecasts[:-1])), 2)) ret += " \\\\ \n" if externalmodels is not None: l = len(externalmodels) @@ -64,7 +69,8 @@ def getPointStatistics(data, models, externalmodels = None, externalforecasts = ret += externalmodels[k] + " & " ret += " 1 & " ret += str(round(Measures.rmse(data[fts.order:], externalforecasts[k][:-1]), 2)) + " & " - ret += str(round(Measures.mape(data[fts.order:], externalforecasts[k][:-1]), 2)) + ret += str(round(Measures.mape(data[fts.order:], externalforecasts[k][:-1]), 2))+ " & " + ret += str(round(Measures.UStatistic(np.array(data[fts.order:]), np.array(forecasts[:-1])), 2)) ret += " \\\\ \n" return ret @@ -91,10 +97,11 @@ def allIntervalForecasters(data_train, data_test, partitions, max_order=3,save=F colors.append( all_colors[count] ) else: for order in np.arange(1,max_order+1): - mfts = model(" n = " + str(order)) - mfts.train(data_train, data_train_fs, order=order) - objs.append(mfts) - colors.append(all_colors[count]) + if order >= mfts.minOrder: + mfts = model(" n = " + str(order)) + mfts.train(data_train, data_train_fs, order=order) + objs.append(mfts) + colors.append(all_colors[count]) count += 5 print(getIntervalStatistics(data_test, objs)) diff --git a/benchmarks/naive.py b/benchmarks/naive.py index 8f0e257..9a86875 100644 --- a/benchmarks/naive.py +++ b/benchmarks/naive.py @@ -11,4 +11,4 @@ class Naive(fts.FTS): self.detail = "Naïve Model" def forecast(self, data): - return data \ No newline at end of file + return [k for k in data] diff --git a/common/Transformations.py b/common/Transformations.py index ad435f2..24fb58b 100644 --- a/common/Transformations.py +++ b/common/Transformations.py @@ -33,3 +33,9 @@ def roi(original): for t in np.arange(0, n-1): roi.append( (original[t+1] - original[t])/original[t] ) return roi + +def smoothing(original, lags): + pass + +def aggregate(original, operation): + pass diff --git a/fts.py b/fts.py index 8c8998e..1431160 100644 --- a/fts.py +++ b/fts.py @@ -11,6 +11,7 @@ class FTS(object): self.name = name self.detail = name self.isHighOrder = False + self.minOrder = 1 self.hasSeasonality = False self.hasPointForecasting = True self.hasIntervalForecasting = False diff --git a/hwang.py b/hwang.py index 39248a6..26465b7 100644 --- a/hwang.py +++ b/hwang.py @@ -7,6 +7,7 @@ class HighOrderFTS(fts.FTS): def __init__(self, name): super(HighOrderFTS, self).__init__(1, name) self.isHighOrder = True + self.minOrder = 2 self.name = "Hwang High Order FTS" self.shortname = "Hwang" + name self.detail = "Hwang"