k-Nearest Neighbors benchmark method
This commit is contained in:
parent
34995b72f8
commit
abe9a45a47
@ -387,7 +387,7 @@ def get_distribution_statistics(data, model, **kwargs):
|
|||||||
_s1 = time.time()
|
_s1 = time.time()
|
||||||
forecasts = model.predict(data, **kwargs)
|
forecasts = model.predict(data, **kwargs)
|
||||||
_e1 = time.time()
|
_e1 = time.time()
|
||||||
ret.append(round(crps(data, forecasts), 3))
|
ret.append(round(crps(data[model.order:], forecasts), 3))
|
||||||
ret.append(round(_e1 - _s1, 3))
|
ret.append(round(_e1 - _s1, 3))
|
||||||
else:
|
else:
|
||||||
skip = kwargs.get('steps_ahead_sampler', 1)
|
skip = kwargs.get('steps_ahead_sampler', 1)
|
||||||
|
@ -30,7 +30,7 @@ class ARIMA(fts.FTS):
|
|||||||
self.benchmark_only = True
|
self.benchmark_only = True
|
||||||
self.min_order = 1
|
self.min_order = 1
|
||||||
self.alpha = kwargs.get("alpha", 0.05)
|
self.alpha = kwargs.get("alpha", 0.05)
|
||||||
self.shortname += str(self.alpha)
|
self.order = kwargs.get("order", (1,0,0))
|
||||||
self._decompose_order(self.order)
|
self._decompose_order(self.order)
|
||||||
|
|
||||||
def _decompose_order(self, order):
|
def _decompose_order(self, order):
|
||||||
@ -43,6 +43,10 @@ class ARIMA(fts.FTS):
|
|||||||
self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha)
|
self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha)
|
||||||
|
|
||||||
def train(self, data, **kwargs):
|
def train(self, data, **kwargs):
|
||||||
|
|
||||||
|
self.original_min = np.nanmin(data)
|
||||||
|
self.original_max = np.nanmax(data)
|
||||||
|
|
||||||
if kwargs.get('order', None) is not None:
|
if kwargs.get('order', None) is not None:
|
||||||
order = kwargs.get('order', (1,0,0))
|
order = kwargs.get('order', (1,0,0))
|
||||||
self._decompose_order(order)
|
self._decompose_order(order)
|
||||||
@ -50,8 +54,6 @@ class ARIMA(fts.FTS):
|
|||||||
if self.indexer is not None:
|
if self.indexer is not None:
|
||||||
data = self.indexer.get_data(data)
|
data = self.indexer.get_data(data)
|
||||||
|
|
||||||
#data = self.apply_transformations(data, updateUoD=True)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.model = stats_arima(data, order=(self.p, self.d, self.q))
|
self.model = stats_arima(data, order=(self.p, self.d, self.q))
|
||||||
self.model_fit = self.model.fit(disp=0)
|
self.model_fit = self.model.fit(disp=0)
|
||||||
@ -69,9 +71,6 @@ class ARIMA(fts.FTS):
|
|||||||
if self.model_fit is None:
|
if self.model_fit is None:
|
||||||
return np.nan
|
return np.nan
|
||||||
|
|
||||||
if self.indexer is not None and isinstance(ndata, pd.DataFrame):
|
|
||||||
data = self.indexer.get_data(ndata)
|
|
||||||
|
|
||||||
ndata = np.array(ndata)
|
ndata = np.array(ndata)
|
||||||
|
|
||||||
l = len(ndata)
|
l = len(ndata)
|
||||||
@ -101,8 +100,6 @@ class ARIMA(fts.FTS):
|
|||||||
|
|
||||||
sigma = np.sqrt(self.model_fit.sigma2)
|
sigma = np.sqrt(self.model_fit.sigma2)
|
||||||
|
|
||||||
#ndata = np.array(self.apply_transformations(data))
|
|
||||||
|
|
||||||
l = len(data)
|
l = len(data)
|
||||||
|
|
||||||
ret = []
|
ret = []
|
||||||
@ -122,8 +119,6 @@ class ARIMA(fts.FTS):
|
|||||||
|
|
||||||
ret.append(tmp)
|
ret.append(tmp)
|
||||||
|
|
||||||
#ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]], point_to_interval=True)
|
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def forecast_ahead_interval(self, ndata, steps, **kwargs):
|
def forecast_ahead_interval(self, ndata, steps, **kwargs):
|
||||||
@ -134,8 +129,6 @@ class ARIMA(fts.FTS):
|
|||||||
|
|
||||||
sigma = np.sqrt(self.model_fit.sigma2)
|
sigma = np.sqrt(self.model_fit.sigma2)
|
||||||
|
|
||||||
#ndata = np.array(self.apply_transformations(data))
|
|
||||||
|
|
||||||
l = len(ndata)
|
l = len(ndata)
|
||||||
|
|
||||||
nmeans = self.forecast_ahead(ndata, steps, **kwargs)
|
nmeans = self.forecast_ahead(ndata, steps, **kwargs)
|
||||||
@ -152,15 +145,10 @@ class ARIMA(fts.FTS):
|
|||||||
|
|
||||||
ret.append(tmp)
|
ret.append(tmp)
|
||||||
|
|
||||||
#ret = self.apply_inverse_transformations(ret, params=[[data[-1] for a in np.arange(0, steps)]], interval=True)
|
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def forecast_distribution(self, data, **kwargs):
|
def forecast_distribution(self, data, **kwargs):
|
||||||
|
|
||||||
if self.indexer is not None and isinstance(data, pd.DataFrame):
|
|
||||||
data = self.indexer.get_data(data)
|
|
||||||
|
|
||||||
sigma = np.sqrt(self.model_fit.sigma2)
|
sigma = np.sqrt(self.model_fit.sigma2)
|
||||||
|
|
||||||
l = len(data)
|
l = len(data)
|
||||||
@ -168,8 +156,6 @@ class ARIMA(fts.FTS):
|
|||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
for k in np.arange(self.order, l + 1):
|
for k in np.arange(self.order, l + 1):
|
||||||
tmp = []
|
|
||||||
|
|
||||||
sample = [data[i] for i in np.arange(k - self.order, k)]
|
sample = [data[i] for i in np.arange(k - self.order, k)]
|
||||||
|
|
||||||
mean = self.forecast(sample)
|
mean = self.forecast(sample)
|
||||||
|
@ -19,7 +19,7 @@ from pyFTS.probabilistic import ProbabilityDistribution
|
|||||||
from pyFTS.common import Transformations
|
from pyFTS.common import Transformations
|
||||||
from pyFTS.models import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, hwang
|
from pyFTS.models import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, hwang
|
||||||
from pyFTS.models.ensemble import ensemble
|
from pyFTS.models.ensemble import ensemble
|
||||||
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, quantreg
|
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, quantreg, knn
|
||||||
from pyFTS.benchmarks import Util as bUtil
|
from pyFTS.benchmarks import Util as bUtil
|
||||||
from pyFTS.common import Util as cUtil
|
from pyFTS.common import Util as cUtil
|
||||||
# from sklearn.cross_validation import KFold
|
# from sklearn.cross_validation import KFold
|
||||||
@ -156,8 +156,7 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
|||||||
elif benchmark_methods is not None:
|
elif benchmark_methods is not None:
|
||||||
for count, model in enumerate(benchmark_methods, start=0):
|
for count, model in enumerate(benchmark_methods, start=0):
|
||||||
par = benchmark_methods_parameters[count]
|
par = benchmark_methods_parameters[count]
|
||||||
mfts = model(str(par if par is not None else ""))
|
mfts = model("", **par)
|
||||||
mfts.order = par
|
|
||||||
pool.append(mfts)
|
pool.append(mfts)
|
||||||
|
|
||||||
if type == 'point':
|
if type == 'point':
|
||||||
@ -244,7 +243,6 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
|||||||
progressbar.close()
|
progressbar.close()
|
||||||
|
|
||||||
if distributed:
|
if distributed:
|
||||||
jobs2 = []
|
|
||||||
|
|
||||||
rng = jobs
|
rng = jobs
|
||||||
|
|
||||||
@ -268,10 +266,6 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
|||||||
|
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
sintetic = kwargs.get('sintetic', False)
|
|
||||||
|
|
||||||
#return synthesis_method(jobs, experiments, save, file, sintetic)
|
|
||||||
|
|
||||||
|
|
||||||
def get_benchmark_point_methods():
|
def get_benchmark_point_methods():
|
||||||
"""Return all non FTS methods for point forecasting"""
|
"""Return all non FTS methods for point forecasting"""
|
||||||
@ -287,7 +281,7 @@ def get_point_methods():
|
|||||||
|
|
||||||
def get_benchmark_interval_methods():
|
def get_benchmark_interval_methods():
|
||||||
"""Return all non FTS methods for point_to_interval forecasting"""
|
"""Return all non FTS methods for point_to_interval forecasting"""
|
||||||
return [quantreg.QuantileRegression]
|
return [ arima.ARIMA, quantreg.QuantileRegression]
|
||||||
|
|
||||||
|
|
||||||
def get_interval_methods():
|
def get_interval_methods():
|
||||||
@ -302,7 +296,7 @@ def get_probabilistic_methods():
|
|||||||
|
|
||||||
def get_benchmark_probabilistic_methods():
|
def get_benchmark_probabilistic_methods():
|
||||||
"""Return all FTS methods for probabilistic forecasting"""
|
"""Return all FTS methods for probabilistic forecasting"""
|
||||||
return [arima.ARIMA, quantreg.QuantileRegression]
|
return [arima.ARIMA, quantreg.QuantileRegression, knn.KNearestNeighbors]
|
||||||
|
|
||||||
|
|
||||||
def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwargs):
|
def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwargs):
|
||||||
@ -398,6 +392,7 @@ def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kw
|
|||||||
method = kwargs.get('method', None)
|
method = kwargs.get('method', None)
|
||||||
|
|
||||||
if mfts.benchmark_only:
|
if mfts.benchmark_only:
|
||||||
|
mfts.append_transformation(partitioner.transformation)
|
||||||
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
|
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
|
||||||
else:
|
else:
|
||||||
pttr = str(partitioner.__module__).split('.')[-1]
|
pttr = str(partitioner.__module__).split('.')[-1]
|
||||||
@ -444,10 +439,11 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
|
|||||||
from pyFTS.models import hofts, ifts, pwfts
|
from pyFTS.models import hofts, ifts, pwfts
|
||||||
from pyFTS.models.ensemble import ensemble
|
from pyFTS.models.ensemble import ensemble
|
||||||
from pyFTS.partitioners import Grid, Entropy, FCM
|
from pyFTS.partitioners import Grid, Entropy, FCM
|
||||||
from pyFTS.benchmarks import Measures, arima
|
from pyFTS.benchmarks import Measures, arima, quantreg, knn
|
||||||
from pyFTS.models.seasonal import SeasonalIndexer
|
from pyFTS.models.seasonal import SeasonalIndexer
|
||||||
|
|
||||||
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS, arima.ARIMA, ensemble.AllMethodEnsembleFTS]
|
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS, arima.ARIMA,
|
||||||
|
ensemble.AllMethodEnsembleFTS, knn.KNearestNeighbors]
|
||||||
|
|
||||||
tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
|
tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
|
||||||
|
|
||||||
@ -460,6 +456,7 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
|
|||||||
|
|
||||||
if mfts.benchmark_only:
|
if mfts.benchmark_only:
|
||||||
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
|
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
|
||||||
|
mfts.append_transformation(partitioner.transformation)
|
||||||
else:
|
else:
|
||||||
pttr = str(partitioner.__module__).split('.')[-1]
|
pttr = str(partitioner.__module__).split('.')[-1]
|
||||||
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
|
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
|
||||||
|
71
pyFTS/benchmarks/knn.py
Normal file
71
pyFTS/benchmarks/knn.py
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
# -*- coding: utf8 -*-
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from statsmodels.tsa.tsatools import lagmat
|
||||||
|
from pyFTS.common import fts
|
||||||
|
from pyFTS.probabilistic import ProbabilityDistribution
|
||||||
|
|
||||||
|
class KNearestNeighbors(fts.FTS):
|
||||||
|
"""
|
||||||
|
K-Nearest Neighbors
|
||||||
|
"""
|
||||||
|
def __init__(self, name, **kwargs):
|
||||||
|
super(KNearestNeighbors, self).__init__(1, "kNN"+name)
|
||||||
|
self.name = "kNN"
|
||||||
|
self.detail = "K-Nearest Neighbors"
|
||||||
|
self.is_high_order = True
|
||||||
|
self.has_point_forecasting = True
|
||||||
|
self.has_interval_forecasting = True
|
||||||
|
self.has_probability_forecasting = True
|
||||||
|
self.benchmark_only = True
|
||||||
|
self.min_order = 1
|
||||||
|
self.alpha = kwargs.get("alpha", 0.05)
|
||||||
|
self.order = kwargs.get("order", 1)
|
||||||
|
self.lag = None
|
||||||
|
self.k = kwargs.get("k", 30)
|
||||||
|
|
||||||
|
def train(self, data, **kwargs):
|
||||||
|
if kwargs.get('order', None) is not None:
|
||||||
|
self.order = kwargs.get('order', 1)
|
||||||
|
|
||||||
|
self.data = data
|
||||||
|
|
||||||
|
#self.lagdata, = lagmat(data, maxlag=self.order, trim="both", original='sep')
|
||||||
|
|
||||||
|
|
||||||
|
def knn(self, sample):
|
||||||
|
|
||||||
|
if self.order == 1:
|
||||||
|
dist = np.apply_along_axis(lambda x: (x - sample) ** 2, 0, self.data)
|
||||||
|
ix = np.argsort(dist) + 1
|
||||||
|
else:
|
||||||
|
dist = []
|
||||||
|
for k in np.arange(self.order, len(self.data)):
|
||||||
|
dist.append(sum([ (self.data[k - kk] - sample[kk])**2 for kk in range(self.order)]))
|
||||||
|
ix = np.argsort(np.array(dist)) + self.order + 1
|
||||||
|
|
||||||
|
ix = np.clip(ix, 0, len(self.data)-1 )
|
||||||
|
return self.data[ix[:self.k]]
|
||||||
|
|
||||||
|
def forecast_distribution(self, data, **kwargs):
|
||||||
|
ret = []
|
||||||
|
|
||||||
|
smooth = kwargs.get("smooth", "KDE")
|
||||||
|
alpha = kwargs.get("alpha", None)
|
||||||
|
|
||||||
|
uod = self.get_UoD()
|
||||||
|
|
||||||
|
for k in np.arange(self.order, len(data)):
|
||||||
|
|
||||||
|
sample = data[k-self.order : k]
|
||||||
|
|
||||||
|
forecasts = self.knn(sample)
|
||||||
|
|
||||||
|
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, data=forecasts,
|
||||||
|
name="", **kwargs)
|
||||||
|
ret.append(dist)
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
@ -19,7 +19,7 @@ class QuantileRegression(fts.FTS):
|
|||||||
self.has_interval_forecasting = True
|
self.has_interval_forecasting = True
|
||||||
self.has_probability_forecasting = True
|
self.has_probability_forecasting = True
|
||||||
self.benchmark_only = True
|
self.benchmark_only = True
|
||||||
self.minOrder = 1
|
self.min_order = 1
|
||||||
self.alpha = kwargs.get("alpha", 0.05)
|
self.alpha = kwargs.get("alpha", 0.05)
|
||||||
self.dist = kwargs.get("dist", False)
|
self.dist = kwargs.get("dist", False)
|
||||||
self.upper_qt = None
|
self.upper_qt = None
|
||||||
@ -28,15 +28,14 @@ class QuantileRegression(fts.FTS):
|
|||||||
self.dist_qt = None
|
self.dist_qt = None
|
||||||
self.shortname = "QAR("+str(self.order)+","+str(self.alpha)+")"
|
self.shortname = "QAR("+str(self.order)+","+str(self.alpha)+")"
|
||||||
|
|
||||||
def train(self, data, sets, order=1, parameters=None):
|
def train(self, data, **kwargs):
|
||||||
self.order = order
|
if kwargs.get('order', None) is not None:
|
||||||
|
self.order = kwargs.get('order', 1)
|
||||||
|
|
||||||
if self.indexer is not None and isinstance(data, pd.DataFrame):
|
if self.indexer is not None and isinstance(data, pd.DataFrame):
|
||||||
data = self.indexer.get_data(data)
|
data = self.indexer.get_data(data)
|
||||||
|
|
||||||
tmp = np.array(self.apply_transformations(data, updateUoD=True))
|
lagdata, ndata = lagmat(data, maxlag=self.order, trim="both", original='sep')
|
||||||
|
|
||||||
lagdata, ndata = lagmat(tmp, maxlag=order, trim="both", original='sep')
|
|
||||||
|
|
||||||
mqt = QuantReg(ndata, lagdata).fit(0.5)
|
mqt = QuantReg(ndata, lagdata).fit(0.5)
|
||||||
if self.alpha is not None:
|
if self.alpha is not None:
|
||||||
@ -76,12 +75,8 @@ class QuantileRegression(fts.FTS):
|
|||||||
up = self.linearmodel([k[1] for k in data], up_params)
|
up = self.linearmodel([k[1] for k in data], up_params)
|
||||||
return [lo, up]
|
return [lo, up]
|
||||||
|
|
||||||
def forecast(self, data, **kwargs):
|
def forecast(self, ndata, **kwargs):
|
||||||
|
|
||||||
if self.indexer is not None and isinstance(data, pd.DataFrame):
|
|
||||||
data = self.indexer.get_data(data)
|
|
||||||
|
|
||||||
ndata = np.array(self.apply_transformations(data))
|
|
||||||
l = len(ndata)
|
l = len(ndata)
|
||||||
|
|
||||||
ret = []
|
ret = []
|
||||||
@ -91,16 +86,9 @@ class QuantileRegression(fts.FTS):
|
|||||||
|
|
||||||
ret.append(self.linearmodel(sample, self.mean_qt))
|
ret.append(self.linearmodel(sample, self.mean_qt))
|
||||||
|
|
||||||
ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]])
|
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def forecast_interval(self, data, **kwargs):
|
def forecast_interval(self, ndata, **kwargs):
|
||||||
|
|
||||||
if self.indexer is not None and isinstance(data, pd.DataFrame):
|
|
||||||
data = self.indexer.get_data(data)
|
|
||||||
|
|
||||||
ndata = np.array(self.apply_transformations(data))
|
|
||||||
|
|
||||||
l = len(ndata)
|
l = len(ndata)
|
||||||
|
|
||||||
@ -110,16 +98,9 @@ class QuantileRegression(fts.FTS):
|
|||||||
sample = ndata[k - self.order: k]
|
sample = ndata[k - self.order: k]
|
||||||
ret.append(self.point_to_interval(sample, self.lower_qt, self.upper_qt))
|
ret.append(self.point_to_interval(sample, self.lower_qt, self.upper_qt))
|
||||||
|
|
||||||
ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]], interval=True)
|
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def forecast_ahead_interval(self, data, steps, **kwargs):
|
def forecast_ahead_interval(self, ndata, steps, **kwargs):
|
||||||
|
|
||||||
if self.indexer is not None and isinstance(data, pd.DataFrame):
|
|
||||||
data = self.indexer.get_data(data)
|
|
||||||
|
|
||||||
ndata = np.array(self.apply_transformations(data))
|
|
||||||
|
|
||||||
smoothing = kwargs.get("smoothing", 0.9)
|
smoothing = kwargs.get("smoothing", 0.9)
|
||||||
|
|
||||||
@ -137,20 +118,13 @@ class QuantileRegression(fts.FTS):
|
|||||||
|
|
||||||
ret.append([intl[0]*(1 + k*smoothing), intl[1]*(1 + k*smoothing)])
|
ret.append([intl[0]*(1 + k*smoothing), intl[1]*(1 + k*smoothing)])
|
||||||
|
|
||||||
ret = self.apply_inverse_transformations(ret, params=[[data[-1] for a in np.arange(0, steps + self.order)]], interval=True)
|
|
||||||
|
|
||||||
return ret[-steps:]
|
return ret[-steps:]
|
||||||
|
|
||||||
def forecast_distribution(self, data, **kwargs):
|
def forecast_distribution(self, ndata, **kwargs):
|
||||||
|
|
||||||
if self.indexer is not None and isinstance(data, pd.DataFrame):
|
|
||||||
data = self.indexer.get_data(data)
|
|
||||||
|
|
||||||
ndata = np.array(self.apply_transformations(data))
|
|
||||||
|
|
||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
l = len(data)
|
l = len(ndata)
|
||||||
|
|
||||||
for k in np.arange(self.order, l + 1):
|
for k in np.arange(self.order, l + 1):
|
||||||
dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram",
|
dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram",
|
||||||
@ -167,12 +141,7 @@ class QuantileRegression(fts.FTS):
|
|||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def forecast_ahead_distribution(self, data, steps, **kwargs):
|
def forecast_ahead_distribution(self, ndata, steps, **kwargs):
|
||||||
|
|
||||||
if self.indexer is not None and isinstance(data, pd.DataFrame):
|
|
||||||
data = self.indexer.get_data(data)
|
|
||||||
|
|
||||||
ndata = np.array(self.apply_transformations(data))
|
|
||||||
|
|
||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
|
@ -42,7 +42,6 @@ class FLRG(object):
|
|||||||
def get_membership(self, data, sets):
|
def get_membership(self, data, sets):
|
||||||
ret = 0.0
|
ret = 0.0
|
||||||
if isinstance(self.LHS, (list, set)):
|
if isinstance(self.LHS, (list, set)):
|
||||||
assert len(self.LHS) == len(data)
|
|
||||||
ret = np.nanmin([sets[self.LHS[ct]].membership(dat) for ct, dat in enumerate(data)])
|
ret = np.nanmin([sets[self.LHS[ct]].membership(dat) for ct, dat in enumerate(data)])
|
||||||
else:
|
else:
|
||||||
ret = sets[self.LHS].membership(data)
|
ret = sets[self.LHS].membership(data)
|
||||||
|
@ -5,6 +5,7 @@ import numpy as np
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pyFTS.common import SortedCollection, fts, tree
|
from pyFTS.common import SortedCollection, fts, tree
|
||||||
from pyFTS.models import chen, cheng, hofts, hwang, ismailefendi, sadaei, song, yu
|
from pyFTS.models import chen, cheng, hofts, hwang, ismailefendi, sadaei, song, yu
|
||||||
|
from pyFTS.probabilistic import ProbabilityDistribution
|
||||||
import scipy.stats as st
|
import scipy.stats as st
|
||||||
|
|
||||||
|
|
||||||
@ -171,29 +172,52 @@ class EnsembleFTS(fts.FTS):
|
|||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def empty_grid(self, resolution):
|
def forecast_distribution(self, data, **kwargs):
|
||||||
return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution)
|
ret = []
|
||||||
|
|
||||||
|
smooth = kwargs.get("smooth", "KDE")
|
||||||
|
alpha = kwargs.get("alpha", None)
|
||||||
|
|
||||||
|
uod = self.get_UoD()
|
||||||
|
|
||||||
|
for k in np.arange(self.order, len(data)):
|
||||||
|
|
||||||
|
sample = data[k-self.order : k]
|
||||||
|
|
||||||
|
forecasts = self.get_models_forecasts(sample)
|
||||||
|
|
||||||
|
if alpha is None:
|
||||||
|
forecasts = np.ravel(forecasts).tolist()
|
||||||
|
else:
|
||||||
|
forecasts = self.get_distribution_interquantile(np.ravel(forecasts).tolist(), alpha)
|
||||||
|
|
||||||
|
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, data=forecasts,
|
||||||
|
name="", **kwargs)
|
||||||
|
|
||||||
|
ret.append(dist)
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def forecast_ahead_distribution(self, data, steps, **kwargs):
|
def forecast_ahead_distribution(self, data, steps, **kwargs):
|
||||||
if 'method' in kwargs:
|
if 'method' in kwargs:
|
||||||
self.point_method = kwargs.get('method','mean')
|
self.point_method = kwargs.get('method','mean')
|
||||||
|
|
||||||
percentile_size = (self.original_max - self.original_min) / 100
|
smooth = kwargs.get("smooth", "KDE")
|
||||||
|
alpha = kwargs.get("alpha", None)
|
||||||
resolution = kwargs.get('resolution', percentile_size)
|
|
||||||
|
|
||||||
grid = self.empty_grid(resolution)
|
|
||||||
|
|
||||||
index = SortedCollection.SortedCollection(iterable=grid.keys())
|
|
||||||
|
|
||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
samples = [[k] for k in data[-self.order:]]
|
start = kwargs.get('start', self.order)
|
||||||
|
|
||||||
for k in np.arange(self.order, steps + self.order):
|
uod = self.get_UoD()
|
||||||
|
|
||||||
|
sample = data[start - self.order: start]
|
||||||
|
|
||||||
|
for k in np.arange(self.order, steps+self.order):
|
||||||
forecasts = []
|
forecasts = []
|
||||||
lags = {}
|
lags = {}
|
||||||
for i in np.arange(0, self.order): lags[i] = samples[k - self.order + i]
|
for i in np.arange(0, self.order): lags[i] = sample[k-self.order]
|
||||||
|
|
||||||
# Build the tree with all possible paths
|
# Build the tree with all possible paths
|
||||||
|
|
||||||
@ -206,17 +230,19 @@ class EnsembleFTS(fts.FTS):
|
|||||||
|
|
||||||
forecasts.extend(self.get_models_forecasts(path))
|
forecasts.extend(self.get_models_forecasts(path))
|
||||||
|
|
||||||
samples.append(sampler(forecasts, np.arange(0.1, 1, 0.1)))
|
sample.append(sampler(forecasts, np.arange(0.1, 1, 0.1)))
|
||||||
|
|
||||||
grid = self.gridCountPoint(grid, resolution, index, forecasts)
|
if alpha is None:
|
||||||
|
forecasts = np.ravel(forecasts).tolist()
|
||||||
|
else:
|
||||||
|
forecasts = self.get_distribution_interquantile(np.ravel(forecasts).tolist(), alpha)
|
||||||
|
|
||||||
tmp = np.array([grid[i] for i in sorted(grid)])
|
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, data=forecasts,
|
||||||
|
name="", **kwargs)
|
||||||
|
|
||||||
ret.append(tmp / sum(tmp))
|
ret.append(dist)
|
||||||
|
|
||||||
grid = self.empty_grid(resolution)
|
return ret
|
||||||
df = pd.DataFrame(ret, columns=sorted(grid))
|
|
||||||
return df
|
|
||||||
|
|
||||||
|
|
||||||
class AllMethodEnsembleFTS(EnsembleFTS):
|
class AllMethodEnsembleFTS(EnsembleFTS):
|
||||||
|
@ -26,6 +26,7 @@ class IntervalFTS(hofts.HighOrderFTS):
|
|||||||
self.has_point_forecasting = False
|
self.has_point_forecasting = False
|
||||||
self.has_interval_forecasting = True
|
self.has_interval_forecasting = True
|
||||||
self.is_high_order = True
|
self.is_high_order = True
|
||||||
|
self.min_order = 1
|
||||||
|
|
||||||
def get_upper(self, flrg):
|
def get_upper(self, flrg):
|
||||||
if flrg.get_key() in self.flrgs:
|
if flrg.get_key() in self.flrgs:
|
||||||
|
@ -105,6 +105,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
|||||||
self.has_interval_forecasting = True
|
self.has_interval_forecasting = True
|
||||||
self.has_probability_forecasting = True
|
self.has_probability_forecasting = True
|
||||||
self.is_high_order = True
|
self.is_high_order = True
|
||||||
|
self.min_order = 1
|
||||||
self.auto_update = kwargs.get('update',False)
|
self.auto_update = kwargs.get('update',False)
|
||||||
|
|
||||||
|
|
||||||
|
@ -14,15 +14,25 @@ class ProbabilityDistribution(object):
|
|||||||
def __init__(self, type = "KDE", **kwargs):
|
def __init__(self, type = "KDE", **kwargs):
|
||||||
self.uod = kwargs.get("uod", None)
|
self.uod = kwargs.get("uod", None)
|
||||||
|
|
||||||
self.type = type
|
self.data = []
|
||||||
if self.type == "KDE":
|
|
||||||
self.kde = kde.KernelSmoothing(kwargs.get("h", 0.5), kwargs.get("kernel", "epanechnikov"))
|
|
||||||
|
|
||||||
self.nbins = kwargs.get("num_bins", 100)
|
self.type = type
|
||||||
|
|
||||||
self.bins = kwargs.get("bins", None)
|
self.bins = kwargs.get("bins", None)
|
||||||
self.labels = kwargs.get("bins_labels", None)
|
self.labels = kwargs.get("bins_labels", None)
|
||||||
|
|
||||||
|
data = kwargs.get("data", None)
|
||||||
|
|
||||||
|
if self.type == "KDE":
|
||||||
|
self.kde = kde.KernelSmoothing(kwargs.get("h", 0.5), kwargs.get("kernel", "epanechnikov"))
|
||||||
|
_min = np.nanmin(data)
|
||||||
|
_min = _min * .7 if _min > 0 else _min * 1.3
|
||||||
|
_max = np.nanmax(data)
|
||||||
|
_max = _max * 1.3 if _max > 0 else _max * .7
|
||||||
|
self.uod = [_min, _max]
|
||||||
|
|
||||||
|
self.nbins = kwargs.get("num_bins", 100)
|
||||||
|
|
||||||
if self.bins is None:
|
if self.bins is None:
|
||||||
self.bins = np.linspace(int(self.uod[0]), int(self.uod[1]), int(self.nbins)).tolist()
|
self.bins = np.linspace(int(self.uod[0]), int(self.uod[1]), int(self.nbins)).tolist()
|
||||||
self.labels = [str(k) for k in self.bins]
|
self.labels = [str(k) for k in self.bins]
|
||||||
@ -38,10 +48,6 @@ class ProbabilityDistribution(object):
|
|||||||
self.count = 0
|
self.count = 0
|
||||||
for k in self.bins: self.distribution[k] = 0
|
for k in self.bins: self.distribution[k] = 0
|
||||||
|
|
||||||
self.data = []
|
|
||||||
|
|
||||||
data = kwargs.get("data",None)
|
|
||||||
|
|
||||||
if data is not None:
|
if data is not None:
|
||||||
self.append(data)
|
self.append(data)
|
||||||
|
|
||||||
@ -228,10 +234,12 @@ class ProbabilityDistribution(object):
|
|||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
ret = ""
|
ret = ""
|
||||||
for k in sorted(self.distribution.keys()):
|
for k in sorted(self.bins):
|
||||||
ret += str(round(k,2)) + ':\t'
|
ret += str(round(k,2)) + ':\t'
|
||||||
if self.type == "histogram":
|
if self.type == "histogram":
|
||||||
ret += str(round(self.distribution[k] / self.count,3))
|
ret += str(round(self.distribution[k] / self.count,3))
|
||||||
|
elif self.type == "KDE":
|
||||||
|
ret += str(round(self.density(k),3))
|
||||||
else:
|
else:
|
||||||
ret += str(round(self.distribution[k], 6))
|
ret += str(round(self.distribution[k], 6))
|
||||||
ret += '\n'
|
ret += '\n'
|
||||||
|
@ -15,9 +15,18 @@ from pyFTS.data import TAIEX
|
|||||||
|
|
||||||
dataset = TAIEX.get_data()
|
dataset = TAIEX.get_data()
|
||||||
|
|
||||||
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures
|
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures, knn, quantreg, arima
|
||||||
|
|
||||||
|
from pyFTS.models import pwfts, song, ifts
|
||||||
|
|
||||||
|
model = arima.ARIMA("", order=(1,0,0))
|
||||||
|
model.fit(dataset[:800])
|
||||||
|
tmp = model.predict(dataset[800:1000], type='distribution')
|
||||||
|
for tmp2 in tmp:
|
||||||
|
print(tmp2)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
from pyFTS.models import pwfts, song
|
|
||||||
'''
|
'''
|
||||||
from pyFTS.partitioners import Grid, Util as pUtil
|
from pyFTS.partitioners import Grid, Util as pUtil
|
||||||
partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff)
|
partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff)
|
||||||
@ -31,28 +40,39 @@ print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead
|
|||||||
# print(tmp2)
|
# print(tmp2)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
#'''
|
'''
|
||||||
|
|
||||||
from pyFTS.benchmarks import arima, naive, quantreg
|
from pyFTS.benchmarks import arima, naive, quantreg
|
||||||
|
|
||||||
bchmk.sliding_window_benchmarks(dataset[:1000], 1000, train=0.8, inc=0.2,
|
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
|
||||||
#methods=[song.ConventionalFTS], #[pwfts.ProbabilisticWeightedFTS],
|
methods=[ifts.IntervalFTS], #[pwfts.ProbabilisticWeightedFTS],
|
||||||
benchmark_models=True,
|
benchmark_models=True,
|
||||||
benchmark_methods=[naive.Naive, arima.ARIMA,arima.ARIMA], #arima.ARIMA,arima.ARIMA],
|
benchmark_methods=[arima.ARIMA for k in range(8)]
|
||||||
#benchmark_methods=[arima.ARIMA],
|
+ [quantreg.QuantileRegression for k in range(4)],
|
||||||
benchmark_methods_parameters=[1,(1,0,0),(1,0,1)], #(2,0,1),(2,0,2)],
|
benchmark_methods_parameters=[
|
||||||
#benchmark_methods_parameters=[(1,0,0)],
|
{'order': (1, 0, 0), 'alpha': .05},
|
||||||
|
{'order': (1, 0, 0), 'alpha': .25},
|
||||||
|
{'order': (1, 0, 1), 'alpha': .05},
|
||||||
|
{'order': (1, 0, 1), 'alpha': .25},
|
||||||
|
{'order': (2, 0, 1), 'alpha': .05},
|
||||||
|
{'order': (2, 0, 1), 'alpha': .25},
|
||||||
|
{'order': (2, 0, 2), 'alpha': .05},
|
||||||
|
{'order': (2, 0, 2), 'alpha': .25},
|
||||||
|
{'order': 1, 'alpha': .05},
|
||||||
|
{'order': 1, 'alpha': .25},
|
||||||
|
{'order': 2, 'alpha': .05},
|
||||||
|
{'order': 2, 'alpha': .25},
|
||||||
|
],
|
||||||
transformations=[None, tdiff],
|
transformations=[None, tdiff],
|
||||||
orders=[1, 2, 3],
|
orders=[1], #2, 3],
|
||||||
partitions=[35], #np.arange(10, 100, 5),
|
partitions=[3], #np.arange(3, 25, 2),
|
||||||
progress=True, type='point',
|
progress=False, type='interval',
|
||||||
#steps_ahead=[1,4,7,10], #steps_ahead=[1]
|
#steps_ahead=[1,4,7,10], #steps_ahead=[1]
|
||||||
#distributed=True, nodes=['192.168.0.110', '192.168.0.105','192.168.0.106'],
|
distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
|
||||||
file="benchmarks.tmp", dataset="TAIEX", tag="comparisons")
|
file="benchmarks.db", dataset="TAIEX", tag="comparisons")
|
||||||
#save=True, file="tmp.db")
|
|
||||||
|
|
||||||
|
|
||||||
#'''
|
'''
|
||||||
'''
|
'''
|
||||||
dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';')
|
dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';')
|
||||||
print(bUtil.analytic_tabular_dataframe(dat))
|
print(bUtil.analytic_tabular_dataframe(dat))
|
||||||
|
Loading…
Reference in New Issue
Block a user