Improvements on benchmarks.knn

This commit is contained in:
Petrônio Cândido 2019-06-01 14:58:39 -03:00
parent 0dc2fabdcc
commit f4dec685bc
5 changed files with 178 additions and 44 deletions

View File

@ -643,7 +643,7 @@ def run_point2(fts_method, order, partitioner_method, partitions, transformation
import time
from pyFTS.models import yu, chen, hofts, pwfts,ismailefendi,sadaei, song, cheng, hwang
from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, naive, arima, quantreg
from pyFTS.benchmarks import Measures, naive, arima, quantreg, benchmarks
from pyFTS.common import Transformations
tmp = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
@ -664,7 +664,7 @@ def run_point2(fts_method, order, partitioner_method, partitions, transformation
method = kwargs.get('method', None)
parameters = kwargs.get('parameters', {})
mfts, pttr = __build_model(fts_method, order, parameters, partitioner_method, partitions, train_data,
mfts, pttr = benchmarks.__build_model(fts_method, order, parameters, partitioner_method, partitions, train_data,
transformation)
_start = time.time()
@ -691,7 +691,7 @@ def run_interval2(fts_method, order, partitioner_method, partitions, transformat
import time
from pyFTS.models import hofts,ifts,pwfts
from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, arima, quantreg, BSTS
from pyFTS.benchmarks import Measures, arima, quantreg, BSTS, benchmarks
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS]
@ -705,7 +705,7 @@ def run_interval2(fts_method, order, partitioner_method, partitions, transformat
method = kwargs.get('method', None)
parameters = kwargs.get('parameters',{})
mfts, pttr = __build_model(fts_method, order, parameters, partitioner_method, partitions, train_data,
mfts, pttr = benchmarks.__build_model(fts_method, order, parameters, partitioner_method, partitions, train_data,
transformation)
_start = time.time()
mfts.fit(train_data, **kwargs)
@ -735,7 +735,7 @@ def run_probabilistic2(fts_method, order, partitioner_method, partitions, transf
from pyFTS.models import hofts, ifts, pwfts
from pyFTS.models.ensemble import ensemble
from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, arima, quantreg, knn
from pyFTS.benchmarks import Measures, arima, quantreg, knn, benchmarks
from pyFTS.models.seasonal import SeasonalIndexer
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS, arima.ARIMA,
@ -751,9 +751,8 @@ def run_probabilistic2(fts_method, order, partitioner_method, partitions, transf
method = kwargs.get('method', None)
parameters = kwargs.get('parameters', {})
mfts, pttr = __build_model(fts_method, order, parameters, partitioner_method, partitions, train_data,
mfts, pttr = benchmarks.__build_model(fts_method, order, parameters, partitioner_method, partitions, train_data,
transformation)
if mfts.has_seasonality:
mfts.indexer = indexer

View File

@ -5,17 +5,20 @@ import numpy as np
from statsmodels.tsa.tsatools import lagmat
from pyFTS.common import fts
from pyFTS.probabilistic import ProbabilityDistribution
from sklearn.neighbors import KDTree
from itertools import product
from pyFTS.models.ensemble.ensemble import sampler
class KNearestNeighbors(fts.FTS):
"""
K-Nearest Neighbors
A façade for sklearn.neighbors
"""
def __init__(self, **kwargs):
super(KNearestNeighbors, self).__init__(**kwargs)
self.name = "kNN"
self.shortname = "kNN"
self.detail = "K-Nearest Neighbors"
self.uod_clip = False
self.is_high_order = True
self.has_point_forecasting = True
self.has_interval_forecasting = True
@ -26,30 +29,113 @@ class KNearestNeighbors(fts.FTS):
self.lag = None
self.k = kwargs.get("k", 30)
self.uod = None
self.kdtree = None
self.values = None
def _prepare_x(self, data):
l = len(data)
X = []
if l == self.order:
l += 1
for t in np.arange(self.order, l):
X.append([data[t - k - 1] for k in np.arange(self.order)])
return X
def _prepare_xy(self, data):
l = len(data)
X = []
Y = []
for t in np.arange(self.order, l):
X.append([data[t - k - 1] for k in np.arange(self.order)])
Y.append(data[t])
return (X,Y)
def train(self, data, **kwargs):
self.data = np.array(data)
X,Y = self._prepare_xy(data)
self.kdtree = KDTree(X)
self.values = Y
def knn(self, sample):
X = self._prepare_x(sample)
_, ix = self.kdtree.query(X, self.k)
if self.order == 1:
dist = np.apply_along_axis(lambda x: (x - sample) ** 2, 0, self.data)
ix = np.argsort(dist) + 1
else:
dist = []
for k in np.arange(self.order, len(self.data)):
dist.append(sum([ (self.data[k - kk] - sample[kk])**2 for kk in range(self.order)]))
ix = np.argsort(np.array(dist)) + self.order + 1
return [self.values[k] for k in ix.flatten() ]
ix2 = np.clip(ix[:self.k], 0, len(self.data)-1)
return self.data[ix2]
def forecast(self, data, **kwargs):
ret = []
for k in np.arange(self.order, len(data)):
sample = data[k-self.order : k]
forecasts = self.knn(sample)
ret.append(np.nanmean(forecasts))
return ret
def forecast_ahead(self, data, steps, **kwargs):
start = kwargs.get('start', self.order)
sample = [k for k in data[start - self.order: start]]
for k in np.arange(self.order, steps + self.order):
tmp = self.forecast(sample[k-self.order:k])
sample.append(tmp)
return sample[-steps]
def forecast_interval(self, data, **kwargs):
alpha = kwargs.get('alpha',self.alpha)
ret = []
for k in np.arange(self.order, len(data)):
sample = data[k-self.order : k]
forecasts = self.knn(sample)
i = np.percentile(forecasts, [alpha*100, (1-alpha)*100]).tolist()
ret.append(i)
return ret
def forecast_ahead_interval(self, data, steps, **kwargs):
alpha = kwargs.get('alpha', self.alpha)
ret = []
start = kwargs.get('start', self.order)
sample = [[k] for k in data[start - self.order: start]]
for k in np.arange(self.order, steps + self.order):
forecasts = []
lags = [sample[k - i - 1] for i in np.arange(0, self.order)]
# Trace the possible paths
for path in product(*lags):
forecasts.extend(self.knn(path))
sample.append(sampler(forecasts, np.arange(.1, 1, 0.1), bounds=True))
interval = np.percentile(forecasts, [alpha*100, (1-alpha)*100]).tolist()
ret.append(interval)
return ret
def forecast_distribution(self, data, **kwargs):
ret = []
smooth = kwargs.get("smooth", "KDE")
alpha = kwargs.get("alpha", None)
smooth = kwargs.get("smooth", "histogram")
uod = self.get_UoD()
@ -65,4 +151,32 @@ class KNearestNeighbors(fts.FTS):
return ret
def forecast_ahead_distribution(self, data, steps, **kwargs):
smooth = kwargs.get("smooth", "histogram")
ret = []
start = kwargs.get('start', self.order)
uod = self.get_UoD()
sample = [[k] for k in data[start - self.order: start]]
for k in np.arange(self.order, steps + self.order):
forecasts = []
lags = [sample[k - i - 1] for i in np.arange(0, self.order)]
# Trace the possible paths
for path in product(*lags):
forecasts.extend(self.knn(path))
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, data=forecasts,
name="", **kwargs)
ret.append(dist)
sample.append(sampler(forecasts, np.arange(.1, 1, 0.1), bounds=True))
return ret

View File

@ -519,8 +519,10 @@ class FTS(object):
return data
def get_UoD(self):
#return [self.original_min, self.original_max]
return [self.partitioner.min, self.partitioner.max]
if self.partitioner is not None:
return [self.partitioner.min, self.partitioner.max]
else:
return [self.original_min, self.original_max]
def __str__(self):
"""String representation of the model"""

View File

@ -167,8 +167,7 @@ class EnsembleFTS(fts.FTS):
if "method" in kwargs:
self.interval_method = kwargs.get('method','quantile')
if 'alpha' in kwargs:
self.alpha = kwargs.get('alpha',0.05)
self.alpha = kwargs.get('alpha', self.alpha)
l = len(data)
@ -189,15 +188,12 @@ class EnsembleFTS(fts.FTS):
if 'method' in kwargs:
self.interval_method = kwargs.get('method','quantile')
if 'alpha' in kwargs:
self.alpha = kwargs.get('alpha', self.alpha)
self.alpha = kwargs.get('alpha', self.alpha)
ret = []
start = kwargs.get('start', self.order)
uod = self.get_UoD()
sample = [[k] for k in data[start - self.order: start]]
for k in np.arange(self.order, steps + self.order):

View File

@ -13,29 +13,51 @@ from pyFTS.partitioners import Grid, Entropy, Util as pUtil, Simple
from pyFTS.benchmarks import benchmarks as bchmk, Measures
from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts, tsaur, song, sadaei, ifts
from pyFTS.models.ensemble import ensemble
from pyFTS.common import Transformations, Membership
from pyFTS.benchmarks import arima, quantreg, BSTS, gaussianproc
from pyFTS.common import Transformations, Membership, Util
from pyFTS.benchmarks import arima, quantreg, BSTS, gaussianproc, knn
from pyFTS.fcm import fts, common, GA
from pyFTS.data import TAIEX, NASDAQ, SP500
train = TAIEX.get_data()[:800]
test = TAIEX.get_data()[800:1000]
order = 2
model = knn.KNearestNeighbors(order=order)
model.fit(train)
horizon=7
intervals05 = model.predict(test[:10], type='interval', alpha=.05, steps_ahead=horizon)
print(test[:10])
print(intervals05)
intervals25 = model.predict(test[:10], type='interval', alpha=.25, steps_ahead=horizon)
distributions = model.predict(test[:10], type='distribution', steps_ahead=horizon, smoothing=0.01, num_bins=100)
fig, ax = plt.subplots(nrows=1, ncols=1,figsize=[15,5])
ax.plot(test[:10], label='Original',color='black')
Util.plot_interval2(intervals05, test[:10], start_at=model.order, ax=ax, color='green', label='alpha=.05'.format(model.order))
Util.plot_interval2(intervals25, test[:10], start_at=model.order, ax=ax, color='green', label='alpha=.25'.format(model.order))
Util.plot_distribution2(distributions, test[:10], start_at=model.order, ax=ax, cmap="Blues")
print("")
'''
datasets = {}
datasets['TAIEX'] = TAIEX.get_data()[:5000]
datasets['NASDAQ'] = NASDAQ.get_data()[:5000]
datasets['SP500'] = SP500.get_data()[10000:15000]
methods = [ensemble.SimpleEnsembleFTS]*8
methods = [ensemble.SimpleEnsembleFTS]*4
methods_parameters = [
{'name': 'EnsembleFTS-HOFTS-10-.05', 'fts_method': hofts.HighOrderFTS, 'partitions': np.arange(20,50,10), 'alpha': .05},
{'name': 'EnsembleFTS-HOFTS-5-.05', 'fts_method': hofts.HighOrderFTS, 'partitions': np.arange(20,50,5), 'alpha': .05},
{'name': 'EnsembleFTS-HOFTS-10-.25', 'fts_method': hofts.HighOrderFTS, 'partitions': np.arange(20,50,10), 'alpha': .25},
{'name': 'EnsembleFTS-HOFTS-5-.25', 'fts_method': hofts.HighOrderFTS, 'partitions': np.arange(20,50,5), 'alpha': .25},
{'name': 'EnsembleFTS-WHOFTS-10-.05', 'fts_method': hofts.WeightedHighOrderFTS, 'partitions': np.arange(20,50,10), 'alpha': .05},
{'name': 'EnsembleFTS-WHOFTS-5-.05', 'fts_method': hofts.WeightedHighOrderFTS, 'partitions': np.arange(20,50,5), 'alpha': .05},
{'name': 'EnsembleFTS-WHOFTS-10-.25', 'fts_method': hofts.WeightedHighOrderFTS, 'partitions': np.arange(20,50,10), 'alpha': .25},
{'name': 'EnsembleFTS-WHOFTS-5-.25', 'fts_method': hofts.WeightedHighOrderFTS, 'partitions': np.arange(20,50,5), 'alpha': .25},
{'name': 'EnsembleFTS-HOFTS-10', 'fts_method': hofts.HighOrderFTS, 'partitions': np.arange(20,50,10)},
{'name': 'EnsembleFTS-HOFTS-5', 'fts_method': hofts.HighOrderFTS, 'partitions': np.arange(20,50,5)},
{'name': 'EnsembleFTS-WHOFTS-10', 'fts_method': hofts.WeightedHighOrderFTS, 'partitions': np.arange(20,50,10)},
{'name': 'EnsembleFTS-WHOFTS-5', 'fts_method': hofts.WeightedHighOrderFTS, 'partitions': np.arange(20,50,5)}
]
for dataset_name, dataset in datasets.items():
@ -46,6 +68,7 @@ for dataset_name, dataset in datasets.items():
transformations=[None],
orders=[3],
partitions=[None],
type='interval',
#distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
file="tmp.db", dataset=dataset_name, tag="gridsearch")
type='distribution',
distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
file="experiments.db", dataset=dataset_name, tag="gridsearch")
'''