From 18c0fed02185bb4377afeefd396fa93dc143fa5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Thu, 30 May 2019 15:28:50 -0300 Subject: [PATCH] Gaussian Process Regression benchmark method --- pyFTS/benchmarks/BSTS.py | 2 +- pyFTS/benchmarks/gaussianproc.py | 176 +++++++++++++++++++++++++++++++ pyFTS/tests/general.py | 6 +- 3 files changed, 181 insertions(+), 3 deletions(-) create mode 100644 pyFTS/benchmarks/gaussianproc.py diff --git a/pyFTS/benchmarks/BSTS.py b/pyFTS/benchmarks/BSTS.py index 7cf6b27..35bd014 100644 --- a/pyFTS/benchmarks/BSTS.py +++ b/pyFTS/benchmarks/BSTS.py @@ -86,7 +86,7 @@ class ARIMA(fts.FTS): ret = [] for ct, sample in enumerate(sim_vector): - i = [np.percentile(sample, qt) for qt in [alpha, 1-alpha]] + i = np.percentile(sample, [alpha*100, (1-alpha)*100]).tolist() ret.append(i) return ret diff --git a/pyFTS/benchmarks/gaussianproc.py b/pyFTS/benchmarks/gaussianproc.py new file mode 100644 index 0000000..a856fba --- /dev/null +++ b/pyFTS/benchmarks/gaussianproc.py @@ -0,0 +1,176 @@ +#!/usr/bin/python +# -*- coding: utf8 -*- + +import numpy as np +import pandas as pd +import pyflux as pf +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.gaussian_process.kernels import RBF, ConstantKernel +import scipy.stats as st +from pyFTS.common import SortedCollection, fts +from pyFTS.probabilistic import ProbabilityDistribution + + +class GPR(fts.FTS): + """ + Façade for sklearn.gaussian_proces + """ + def __init__(self, **kwargs): + super(GPR, self).__init__(**kwargs) + self.name = "GPR" + self.detail = "Gaussian Process Regression" + self.is_high_order = True + self.has_point_forecasting = True + self.has_interval_forecasting = True + self.has_probability_forecasting = True + self.uod_clip = False + self.benchmark_only = True + self.min_order = 1 + self.alpha = kwargs.get("alpha", 0.05) + + self.lscale = kwargs.get('length_scale', 1) + + self.kernel = ConstantKernel(1.0) * RBF(length_scale=self.lscale) + self.model = GaussianProcessRegressor(kernel=self.kernel, alpha=.05, + n_restarts_optimizer=10, + normalize_y=False) + self.model_fit = None + + + def train(self, data, **kwargs): + l = len(data) + X = [] + Y = [] + + for t in np.arange(self.order, l): + X.append([data[t - k - 1] for k in np.arange(self.order)]) + Y.append(data[t]) + + self.model_fit = self.model.fit(X, Y) + + + def forecast(self, data, **kwargs): + X = [] + l = len(data) + for t in np.arange(self.order, l): + X.append([data[t - k - 1] for k in np.arange(self.order)]) + + return self.model_fit.predict(X) + + def forecast_interval(self, data, **kwargs): + + if 'alpha' in kwargs: + alpha = kwargs.get('alpha') + else: + alpha = self.alpha + + X = [] + l = len(data) + for t in np.arange(self.order, l): + X.append([data[t - k - 1] for k in np.arange(self.order)]) + + Y, sigma = self.model_fit.predict(X, return_cov=True) + + uncertainty = st.norm.ppf(alpha) * np.sqrt(np.diag(sigma)) + + l = len(Y) + intervals = [[Y[k] - uncertainty[k], Y[k] + uncertainty[k]] for k in range(l)] + + return intervals + + def forecast_ahead_interval(self, data, steps, **kwargs): + + if 'alpha' in kwargs: + alpha = kwargs.get('alpha') + else: + alpha = self.alpha + + smoothing = kwargs.get("smoothing", 0.5) + + X = [data[t] for t in np.arange(self.order+10)] + S = [] + + for k in np.arange(self.order, steps+self.order): + sample = [[X[k-t-1] for t in np.arange(self.order)] for p in range(5)] + Y, sigma = self.model_fit.predict([sample], return_std=True) + X.append(Y[0]) + S.append(sigma[0]) + + X = X[-steps:] + + intervals = [] + for k in range(steps): + i = [] + i.append(X[k] - st.norm.ppf(alpha) * (1 + k*smoothing)*np.sqrt(S[k])) + i.append(X[k] - st.norm.ppf(1-alpha) * (1 + k * smoothing) * np.sqrt(S[k])) + intervals.append(i) + + return intervals + + def forecast_distribution(self, data, **kwargs): + + ret = [] + + X = [] + l = len(data) + for t in np.arange(self.order, l): + X.append([data[t - k - 1] for k in np.arange(self.order)]) + + Y, sigma = self.model_fit.predict(X, return_std=True) + + for k in len(Y): + + dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram", uod=[self.original_min, self.original_max]) + intervals = [] + for alpha in np.arange(0.05, 0.5, 0.05): + + qt1 = Y[k] + st.norm.ppf(alpha) * sigma[k] + qt2 = Y[k] + st.norm.ppf(1 - alpha) * sigma[k] + + intervals.append([qt1, qt2]) + + dist.append_interval(intervals) + + ret.append(dist) + + return ret + + + def forecast_ahead_distribution(self, data, steps, **kwargs): + smoothing = kwargs.get("smoothing", 0.5) + + X = [data[t] for t in np.arange(self.order)] + S = [] + + for k in np.arange(self.order, steps+self.order): + sample = [X[k-t-1] for t in np.arange(self.order)] + Y, sigma = self.model_fit.predict([sample], return_std=True) + X.append(Y[0]) + S.append(S[0]) + + X = X[-steps:] + #uncertainty = st.norm.ppf(alpha) * np.sqrt(np.diag(sigma)) + ret = [] + for k in range(steps): + dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram", + uod=[self.original_min, self.original_max]) + intervals = [] + for alpha in np.arange(0.05, 0.5, 0.05): + qt1 = X[k] - st.norm.ppf(alpha) * (1 + k*smoothing)*np.sqrt(S[k]) + qt2 = X[k] - st.norm.ppf(1-alpha) * (1 + k * smoothing) * np.sqrt(S[k]) + + intervals.append([qt1, qt2]) + + dist.append_interval(intervals) + + ret.append(dist) + + return ret + + + + + + + + diff --git a/pyFTS/tests/general.py b/pyFTS/tests/general.py index 971e76f..eed82d5 100644 --- a/pyFTS/tests/general.py +++ b/pyFTS/tests/general.py @@ -14,7 +14,7 @@ from pyFTS.benchmarks import benchmarks as bchmk, Measures from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts, tsaur, song, sadaei from pyFTS.models.ensemble import ensemble from pyFTS.common import Transformations, Membership -from pyFTS.benchmarks import arima, quantreg, BSTS +from pyFTS.benchmarks import arima, quantreg, BSTS, gaussianproc from pyFTS.fcm import fts, common, GA from pyFTS.data import Enrollments, TAIEX @@ -24,9 +24,11 @@ data = TAIEX.get_data() train = data[:800] test = data[800:1000] -model = ensemble.SimpleEnsembleFTS(fts_method=hofts.HighOrderFTS) +#model = ensemble.SimpleEnsembleFTS(fts_method=hofts.HighOrderFTS) #model = quantreg.QuantileRegression(order=2, dist=True) #model = arima.ARIMA(order = (2,0,0)) +#model = BSTS.ARIMA(order=(2,0,0)) +model = gaussianproc.GPR(order=2) model.fit(train) horizon=5