Gaussian Process Regression benchmark method

This commit is contained in:
Petrônio Cândido 2019-05-30 15:28:50 -03:00
parent 5f667c8680
commit 18c0fed021
3 changed files with 181 additions and 3 deletions

View File

@ -86,7 +86,7 @@ class ARIMA(fts.FTS):
ret = []
for ct, sample in enumerate(sim_vector):
i = [np.percentile(sample, qt) for qt in [alpha, 1-alpha]]
i = np.percentile(sample, [alpha*100, (1-alpha)*100]).tolist()
ret.append(i)
return ret

View File

@ -0,0 +1,176 @@
#!/usr/bin/python
# -*- coding: utf8 -*-
import numpy as np
import pandas as pd
import pyflux as pf
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel
import scipy.stats as st
from pyFTS.common import SortedCollection, fts
from pyFTS.probabilistic import ProbabilityDistribution
class GPR(fts.FTS):
"""
Façade for sklearn.gaussian_proces
"""
def __init__(self, **kwargs):
super(GPR, self).__init__(**kwargs)
self.name = "GPR"
self.detail = "Gaussian Process Regression"
self.is_high_order = True
self.has_point_forecasting = True
self.has_interval_forecasting = True
self.has_probability_forecasting = True
self.uod_clip = False
self.benchmark_only = True
self.min_order = 1
self.alpha = kwargs.get("alpha", 0.05)
self.lscale = kwargs.get('length_scale', 1)
self.kernel = ConstantKernel(1.0) * RBF(length_scale=self.lscale)
self.model = GaussianProcessRegressor(kernel=self.kernel, alpha=.05,
n_restarts_optimizer=10,
normalize_y=False)
self.model_fit = None
def train(self, data, **kwargs):
l = len(data)
X = []
Y = []
for t in np.arange(self.order, l):
X.append([data[t - k - 1] for k in np.arange(self.order)])
Y.append(data[t])
self.model_fit = self.model.fit(X, Y)
def forecast(self, data, **kwargs):
X = []
l = len(data)
for t in np.arange(self.order, l):
X.append([data[t - k - 1] for k in np.arange(self.order)])
return self.model_fit.predict(X)
def forecast_interval(self, data, **kwargs):
if 'alpha' in kwargs:
alpha = kwargs.get('alpha')
else:
alpha = self.alpha
X = []
l = len(data)
for t in np.arange(self.order, l):
X.append([data[t - k - 1] for k in np.arange(self.order)])
Y, sigma = self.model_fit.predict(X, return_cov=True)
uncertainty = st.norm.ppf(alpha) * np.sqrt(np.diag(sigma))
l = len(Y)
intervals = [[Y[k] - uncertainty[k], Y[k] + uncertainty[k]] for k in range(l)]
return intervals
def forecast_ahead_interval(self, data, steps, **kwargs):
if 'alpha' in kwargs:
alpha = kwargs.get('alpha')
else:
alpha = self.alpha
smoothing = kwargs.get("smoothing", 0.5)
X = [data[t] for t in np.arange(self.order+10)]
S = []
for k in np.arange(self.order, steps+self.order):
sample = [[X[k-t-1] for t in np.arange(self.order)] for p in range(5)]
Y, sigma = self.model_fit.predict([sample], return_std=True)
X.append(Y[0])
S.append(sigma[0])
X = X[-steps:]
intervals = []
for k in range(steps):
i = []
i.append(X[k] - st.norm.ppf(alpha) * (1 + k*smoothing)*np.sqrt(S[k]))
i.append(X[k] - st.norm.ppf(1-alpha) * (1 + k * smoothing) * np.sqrt(S[k]))
intervals.append(i)
return intervals
def forecast_distribution(self, data, **kwargs):
ret = []
X = []
l = len(data)
for t in np.arange(self.order, l):
X.append([data[t - k - 1] for k in np.arange(self.order)])
Y, sigma = self.model_fit.predict(X, return_std=True)
for k in len(Y):
dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram", uod=[self.original_min, self.original_max])
intervals = []
for alpha in np.arange(0.05, 0.5, 0.05):
qt1 = Y[k] + st.norm.ppf(alpha) * sigma[k]
qt2 = Y[k] + st.norm.ppf(1 - alpha) * sigma[k]
intervals.append([qt1, qt2])
dist.append_interval(intervals)
ret.append(dist)
return ret
def forecast_ahead_distribution(self, data, steps, **kwargs):
smoothing = kwargs.get("smoothing", 0.5)
X = [data[t] for t in np.arange(self.order)]
S = []
for k in np.arange(self.order, steps+self.order):
sample = [X[k-t-1] for t in np.arange(self.order)]
Y, sigma = self.model_fit.predict([sample], return_std=True)
X.append(Y[0])
S.append(S[0])
X = X[-steps:]
#uncertainty = st.norm.ppf(alpha) * np.sqrt(np.diag(sigma))
ret = []
for k in range(steps):
dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram",
uod=[self.original_min, self.original_max])
intervals = []
for alpha in np.arange(0.05, 0.5, 0.05):
qt1 = X[k] - st.norm.ppf(alpha) * (1 + k*smoothing)*np.sqrt(S[k])
qt2 = X[k] - st.norm.ppf(1-alpha) * (1 + k * smoothing) * np.sqrt(S[k])
intervals.append([qt1, qt2])
dist.append_interval(intervals)
ret.append(dist)
return ret

View File

@ -14,7 +14,7 @@ from pyFTS.benchmarks import benchmarks as bchmk, Measures
from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts, tsaur, song, sadaei
from pyFTS.models.ensemble import ensemble
from pyFTS.common import Transformations, Membership
from pyFTS.benchmarks import arima, quantreg, BSTS
from pyFTS.benchmarks import arima, quantreg, BSTS, gaussianproc
from pyFTS.fcm import fts, common, GA
from pyFTS.data import Enrollments, TAIEX
@ -24,9 +24,11 @@ data = TAIEX.get_data()
train = data[:800]
test = data[800:1000]
model = ensemble.SimpleEnsembleFTS(fts_method=hofts.HighOrderFTS)
#model = ensemble.SimpleEnsembleFTS(fts_method=hofts.HighOrderFTS)
#model = quantreg.QuantileRegression(order=2, dist=True)
#model = arima.ARIMA(order = (2,0,0))
#model = BSTS.ARIMA(order=(2,0,0))
model = gaussianproc.GPR(order=2)
model.fit(train)
horizon=5