Improvements on benchmarks.knn
This commit is contained in:
parent
0dc2fabdcc
commit
f4dec685bc
pyFTS
@ -643,7 +643,7 @@ def run_point2(fts_method, order, partitioner_method, partitions, transformation
|
|||||||
import time
|
import time
|
||||||
from pyFTS.models import yu, chen, hofts, pwfts,ismailefendi,sadaei, song, cheng, hwang
|
from pyFTS.models import yu, chen, hofts, pwfts,ismailefendi,sadaei, song, cheng, hwang
|
||||||
from pyFTS.partitioners import Grid, Entropy, FCM
|
from pyFTS.partitioners import Grid, Entropy, FCM
|
||||||
from pyFTS.benchmarks import Measures, naive, arima, quantreg
|
from pyFTS.benchmarks import Measures, naive, arima, quantreg, benchmarks
|
||||||
from pyFTS.common import Transformations
|
from pyFTS.common import Transformations
|
||||||
|
|
||||||
tmp = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
|
tmp = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
|
||||||
@ -664,7 +664,7 @@ def run_point2(fts_method, order, partitioner_method, partitions, transformation
|
|||||||
method = kwargs.get('method', None)
|
method = kwargs.get('method', None)
|
||||||
parameters = kwargs.get('parameters', {})
|
parameters = kwargs.get('parameters', {})
|
||||||
|
|
||||||
mfts, pttr = __build_model(fts_method, order, parameters, partitioner_method, partitions, train_data,
|
mfts, pttr = benchmarks.__build_model(fts_method, order, parameters, partitioner_method, partitions, train_data,
|
||||||
transformation)
|
transformation)
|
||||||
|
|
||||||
_start = time.time()
|
_start = time.time()
|
||||||
@ -691,7 +691,7 @@ def run_interval2(fts_method, order, partitioner_method, partitions, transformat
|
|||||||
import time
|
import time
|
||||||
from pyFTS.models import hofts,ifts,pwfts
|
from pyFTS.models import hofts,ifts,pwfts
|
||||||
from pyFTS.partitioners import Grid, Entropy, FCM
|
from pyFTS.partitioners import Grid, Entropy, FCM
|
||||||
from pyFTS.benchmarks import Measures, arima, quantreg, BSTS
|
from pyFTS.benchmarks import Measures, arima, quantreg, BSTS, benchmarks
|
||||||
|
|
||||||
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS]
|
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS]
|
||||||
|
|
||||||
@ -705,7 +705,7 @@ def run_interval2(fts_method, order, partitioner_method, partitions, transformat
|
|||||||
method = kwargs.get('method', None)
|
method = kwargs.get('method', None)
|
||||||
parameters = kwargs.get('parameters',{})
|
parameters = kwargs.get('parameters',{})
|
||||||
|
|
||||||
mfts, pttr = __build_model(fts_method, order, parameters, partitioner_method, partitions, train_data,
|
mfts, pttr = benchmarks.__build_model(fts_method, order, parameters, partitioner_method, partitions, train_data,
|
||||||
transformation)
|
transformation)
|
||||||
_start = time.time()
|
_start = time.time()
|
||||||
mfts.fit(train_data, **kwargs)
|
mfts.fit(train_data, **kwargs)
|
||||||
@ -735,7 +735,7 @@ def run_probabilistic2(fts_method, order, partitioner_method, partitions, transf
|
|||||||
from pyFTS.models import hofts, ifts, pwfts
|
from pyFTS.models import hofts, ifts, pwfts
|
||||||
from pyFTS.models.ensemble import ensemble
|
from pyFTS.models.ensemble import ensemble
|
||||||
from pyFTS.partitioners import Grid, Entropy, FCM
|
from pyFTS.partitioners import Grid, Entropy, FCM
|
||||||
from pyFTS.benchmarks import Measures, arima, quantreg, knn
|
from pyFTS.benchmarks import Measures, arima, quantreg, knn, benchmarks
|
||||||
from pyFTS.models.seasonal import SeasonalIndexer
|
from pyFTS.models.seasonal import SeasonalIndexer
|
||||||
|
|
||||||
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS, arima.ARIMA,
|
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS, arima.ARIMA,
|
||||||
@ -751,9 +751,8 @@ def run_probabilistic2(fts_method, order, partitioner_method, partitions, transf
|
|||||||
method = kwargs.get('method', None)
|
method = kwargs.get('method', None)
|
||||||
parameters = kwargs.get('parameters', {})
|
parameters = kwargs.get('parameters', {})
|
||||||
|
|
||||||
mfts, pttr = __build_model(fts_method, order, parameters, partitioner_method, partitions, train_data,
|
mfts, pttr = benchmarks.__build_model(fts_method, order, parameters, partitioner_method, partitions, train_data,
|
||||||
transformation)
|
transformation)
|
||||||
|
|
||||||
if mfts.has_seasonality:
|
if mfts.has_seasonality:
|
||||||
mfts.indexer = indexer
|
mfts.indexer = indexer
|
||||||
|
|
||||||
|
@ -5,17 +5,20 @@ import numpy as np
|
|||||||
from statsmodels.tsa.tsatools import lagmat
|
from statsmodels.tsa.tsatools import lagmat
|
||||||
from pyFTS.common import fts
|
from pyFTS.common import fts
|
||||||
from pyFTS.probabilistic import ProbabilityDistribution
|
from pyFTS.probabilistic import ProbabilityDistribution
|
||||||
|
from sklearn.neighbors import KDTree
|
||||||
|
from itertools import product
|
||||||
|
from pyFTS.models.ensemble.ensemble import sampler
|
||||||
|
|
||||||
class KNearestNeighbors(fts.FTS):
|
class KNearestNeighbors(fts.FTS):
|
||||||
"""
|
"""
|
||||||
K-Nearest Neighbors
|
A façade for sklearn.neighbors
|
||||||
"""
|
"""
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super(KNearestNeighbors, self).__init__(**kwargs)
|
super(KNearestNeighbors, self).__init__(**kwargs)
|
||||||
self.name = "kNN"
|
self.name = "kNN"
|
||||||
self.shortname = "kNN"
|
self.shortname = "kNN"
|
||||||
self.detail = "K-Nearest Neighbors"
|
self.detail = "K-Nearest Neighbors"
|
||||||
|
self.uod_clip = False
|
||||||
self.is_high_order = True
|
self.is_high_order = True
|
||||||
self.has_point_forecasting = True
|
self.has_point_forecasting = True
|
||||||
self.has_interval_forecasting = True
|
self.has_interval_forecasting = True
|
||||||
@ -26,30 +29,113 @@ class KNearestNeighbors(fts.FTS):
|
|||||||
self.lag = None
|
self.lag = None
|
||||||
self.k = kwargs.get("k", 30)
|
self.k = kwargs.get("k", 30)
|
||||||
self.uod = None
|
self.uod = None
|
||||||
|
self.kdtree = None
|
||||||
|
self.values = None
|
||||||
|
|
||||||
|
def _prepare_x(self, data):
|
||||||
|
l = len(data)
|
||||||
|
X = []
|
||||||
|
|
||||||
|
if l == self.order:
|
||||||
|
l += 1
|
||||||
|
|
||||||
|
for t in np.arange(self.order, l):
|
||||||
|
X.append([data[t - k - 1] for k in np.arange(self.order)])
|
||||||
|
|
||||||
|
return X
|
||||||
|
|
||||||
|
def _prepare_xy(self, data):
|
||||||
|
l = len(data)
|
||||||
|
X = []
|
||||||
|
Y = []
|
||||||
|
|
||||||
|
for t in np.arange(self.order, l):
|
||||||
|
X.append([data[t - k - 1] for k in np.arange(self.order)])
|
||||||
|
Y.append(data[t])
|
||||||
|
|
||||||
|
return (X,Y)
|
||||||
|
|
||||||
def train(self, data, **kwargs):
|
def train(self, data, **kwargs):
|
||||||
self.data = np.array(data)
|
X,Y = self._prepare_xy(data)
|
||||||
|
|
||||||
|
self.kdtree = KDTree(X)
|
||||||
|
self.values = Y
|
||||||
|
|
||||||
def knn(self, sample):
|
def knn(self, sample):
|
||||||
|
X = self._prepare_x(sample)
|
||||||
|
_, ix = self.kdtree.query(X, self.k)
|
||||||
|
|
||||||
if self.order == 1:
|
return [self.values[k] for k in ix.flatten() ]
|
||||||
dist = np.apply_along_axis(lambda x: (x - sample) ** 2, 0, self.data)
|
|
||||||
ix = np.argsort(dist) + 1
|
|
||||||
else:
|
|
||||||
dist = []
|
|
||||||
for k in np.arange(self.order, len(self.data)):
|
|
||||||
dist.append(sum([ (self.data[k - kk] - sample[kk])**2 for kk in range(self.order)]))
|
|
||||||
ix = np.argsort(np.array(dist)) + self.order + 1
|
|
||||||
|
|
||||||
ix2 = np.clip(ix[:self.k], 0, len(self.data)-1)
|
def forecast(self, data, **kwargs):
|
||||||
return self.data[ix2]
|
ret = []
|
||||||
|
for k in np.arange(self.order, len(data)):
|
||||||
|
|
||||||
|
sample = data[k-self.order : k]
|
||||||
|
|
||||||
|
forecasts = self.knn(sample)
|
||||||
|
|
||||||
|
ret.append(np.nanmean(forecasts))
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def forecast_ahead(self, data, steps, **kwargs):
|
||||||
|
start = kwargs.get('start', self.order)
|
||||||
|
|
||||||
|
sample = [k for k in data[start - self.order: start]]
|
||||||
|
|
||||||
|
for k in np.arange(self.order, steps + self.order):
|
||||||
|
tmp = self.forecast(sample[k-self.order:k])
|
||||||
|
sample.append(tmp)
|
||||||
|
|
||||||
|
return sample[-steps]
|
||||||
|
|
||||||
|
def forecast_interval(self, data, **kwargs):
|
||||||
|
|
||||||
|
alpha = kwargs.get('alpha',self.alpha)
|
||||||
|
|
||||||
|
ret = []
|
||||||
|
for k in np.arange(self.order, len(data)):
|
||||||
|
|
||||||
|
sample = data[k-self.order : k]
|
||||||
|
|
||||||
|
forecasts = self.knn(sample)
|
||||||
|
|
||||||
|
i = np.percentile(forecasts, [alpha*100, (1-alpha)*100]).tolist()
|
||||||
|
ret.append(i)
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
def forecast_ahead_interval(self, data, steps, **kwargs):
|
||||||
|
alpha = kwargs.get('alpha', self.alpha)
|
||||||
|
|
||||||
|
ret = []
|
||||||
|
|
||||||
|
start = kwargs.get('start', self.order)
|
||||||
|
|
||||||
|
sample = [[k] for k in data[start - self.order: start]]
|
||||||
|
|
||||||
|
for k in np.arange(self.order, steps + self.order):
|
||||||
|
forecasts = []
|
||||||
|
|
||||||
|
lags = [sample[k - i - 1] for i in np.arange(0, self.order)]
|
||||||
|
|
||||||
|
# Trace the possible paths
|
||||||
|
for path in product(*lags):
|
||||||
|
forecasts.extend(self.knn(path))
|
||||||
|
|
||||||
|
sample.append(sampler(forecasts, np.arange(.1, 1, 0.1), bounds=True))
|
||||||
|
|
||||||
|
interval = np.percentile(forecasts, [alpha*100, (1-alpha)*100]).tolist()
|
||||||
|
|
||||||
|
ret.append(interval)
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
def forecast_distribution(self, data, **kwargs):
|
def forecast_distribution(self, data, **kwargs):
|
||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
smooth = kwargs.get("smooth", "KDE")
|
smooth = kwargs.get("smooth", "histogram")
|
||||||
alpha = kwargs.get("alpha", None)
|
|
||||||
|
|
||||||
uod = self.get_UoD()
|
uod = self.get_UoD()
|
||||||
|
|
||||||
@ -65,4 +151,32 @@ class KNearestNeighbors(fts.FTS):
|
|||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
def forecast_ahead_distribution(self, data, steps, **kwargs):
|
||||||
|
smooth = kwargs.get("smooth", "histogram")
|
||||||
|
|
||||||
|
ret = []
|
||||||
|
|
||||||
|
start = kwargs.get('start', self.order)
|
||||||
|
|
||||||
|
uod = self.get_UoD()
|
||||||
|
|
||||||
|
sample = [[k] for k in data[start - self.order: start]]
|
||||||
|
|
||||||
|
for k in np.arange(self.order, steps + self.order):
|
||||||
|
forecasts = []
|
||||||
|
|
||||||
|
lags = [sample[k - i - 1] for i in np.arange(0, self.order)]
|
||||||
|
|
||||||
|
# Trace the possible paths
|
||||||
|
for path in product(*lags):
|
||||||
|
forecasts.extend(self.knn(path))
|
||||||
|
|
||||||
|
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, data=forecasts,
|
||||||
|
name="", **kwargs)
|
||||||
|
ret.append(dist)
|
||||||
|
|
||||||
|
sample.append(sampler(forecasts, np.arange(.1, 1, 0.1), bounds=True))
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
@ -519,8 +519,10 @@ class FTS(object):
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
def get_UoD(self):
|
def get_UoD(self):
|
||||||
#return [self.original_min, self.original_max]
|
if self.partitioner is not None:
|
||||||
return [self.partitioner.min, self.partitioner.max]
|
return [self.partitioner.min, self.partitioner.max]
|
||||||
|
else:
|
||||||
|
return [self.original_min, self.original_max]
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
"""String representation of the model"""
|
"""String representation of the model"""
|
||||||
|
@ -167,8 +167,7 @@ class EnsembleFTS(fts.FTS):
|
|||||||
if "method" in kwargs:
|
if "method" in kwargs:
|
||||||
self.interval_method = kwargs.get('method','quantile')
|
self.interval_method = kwargs.get('method','quantile')
|
||||||
|
|
||||||
if 'alpha' in kwargs:
|
self.alpha = kwargs.get('alpha', self.alpha)
|
||||||
self.alpha = kwargs.get('alpha',0.05)
|
|
||||||
|
|
||||||
l = len(data)
|
l = len(data)
|
||||||
|
|
||||||
@ -189,15 +188,12 @@ class EnsembleFTS(fts.FTS):
|
|||||||
if 'method' in kwargs:
|
if 'method' in kwargs:
|
||||||
self.interval_method = kwargs.get('method','quantile')
|
self.interval_method = kwargs.get('method','quantile')
|
||||||
|
|
||||||
if 'alpha' in kwargs:
|
|
||||||
self.alpha = kwargs.get('alpha', self.alpha)
|
self.alpha = kwargs.get('alpha', self.alpha)
|
||||||
|
|
||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
start = kwargs.get('start', self.order)
|
start = kwargs.get('start', self.order)
|
||||||
|
|
||||||
uod = self.get_UoD()
|
|
||||||
|
|
||||||
sample = [[k] for k in data[start - self.order: start]]
|
sample = [[k] for k in data[start - self.order: start]]
|
||||||
|
|
||||||
for k in np.arange(self.order, steps + self.order):
|
for k in np.arange(self.order, steps + self.order):
|
||||||
|
@ -13,29 +13,51 @@ from pyFTS.partitioners import Grid, Entropy, Util as pUtil, Simple
|
|||||||
from pyFTS.benchmarks import benchmarks as bchmk, Measures
|
from pyFTS.benchmarks import benchmarks as bchmk, Measures
|
||||||
from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts, tsaur, song, sadaei, ifts
|
from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts, tsaur, song, sadaei, ifts
|
||||||
from pyFTS.models.ensemble import ensemble
|
from pyFTS.models.ensemble import ensemble
|
||||||
from pyFTS.common import Transformations, Membership
|
from pyFTS.common import Transformations, Membership, Util
|
||||||
from pyFTS.benchmarks import arima, quantreg, BSTS, gaussianproc
|
from pyFTS.benchmarks import arima, quantreg, BSTS, gaussianproc, knn
|
||||||
from pyFTS.fcm import fts, common, GA
|
from pyFTS.fcm import fts, common, GA
|
||||||
|
|
||||||
from pyFTS.data import TAIEX, NASDAQ, SP500
|
from pyFTS.data import TAIEX, NASDAQ, SP500
|
||||||
|
|
||||||
|
train = TAIEX.get_data()[:800]
|
||||||
|
test = TAIEX.get_data()[800:1000]
|
||||||
|
|
||||||
|
order = 2
|
||||||
|
model = knn.KNearestNeighbors(order=order)
|
||||||
|
model.fit(train)
|
||||||
|
|
||||||
|
horizon=7
|
||||||
|
|
||||||
|
intervals05 = model.predict(test[:10], type='interval', alpha=.05, steps_ahead=horizon)
|
||||||
|
|
||||||
|
print(test[:10])
|
||||||
|
print(intervals05)
|
||||||
|
|
||||||
|
intervals25 = model.predict(test[:10], type='interval', alpha=.25, steps_ahead=horizon)
|
||||||
|
distributions = model.predict(test[:10], type='distribution', steps_ahead=horizon, smoothing=0.01, num_bins=100)
|
||||||
|
|
||||||
|
fig, ax = plt.subplots(nrows=1, ncols=1,figsize=[15,5])
|
||||||
|
ax.plot(test[:10], label='Original',color='black')
|
||||||
|
Util.plot_interval2(intervals05, test[:10], start_at=model.order, ax=ax, color='green', label='alpha=.05'.format(model.order))
|
||||||
|
Util.plot_interval2(intervals25, test[:10], start_at=model.order, ax=ax, color='green', label='alpha=.25'.format(model.order))
|
||||||
|
Util.plot_distribution2(distributions, test[:10], start_at=model.order, ax=ax, cmap="Blues")
|
||||||
|
|
||||||
|
print("")
|
||||||
|
'''
|
||||||
|
|
||||||
datasets = {}
|
datasets = {}
|
||||||
|
|
||||||
datasets['TAIEX'] = TAIEX.get_data()[:5000]
|
datasets['TAIEX'] = TAIEX.get_data()[:5000]
|
||||||
datasets['NASDAQ'] = NASDAQ.get_data()[:5000]
|
datasets['NASDAQ'] = NASDAQ.get_data()[:5000]
|
||||||
datasets['SP500'] = SP500.get_data()[10000:15000]
|
datasets['SP500'] = SP500.get_data()[10000:15000]
|
||||||
|
|
||||||
methods = [ensemble.SimpleEnsembleFTS]*8
|
methods = [ensemble.SimpleEnsembleFTS]*4
|
||||||
|
|
||||||
methods_parameters = [
|
methods_parameters = [
|
||||||
{'name': 'EnsembleFTS-HOFTS-10-.05', 'fts_method': hofts.HighOrderFTS, 'partitions': np.arange(20,50,10), 'alpha': .05},
|
{'name': 'EnsembleFTS-HOFTS-10', 'fts_method': hofts.HighOrderFTS, 'partitions': np.arange(20,50,10)},
|
||||||
{'name': 'EnsembleFTS-HOFTS-5-.05', 'fts_method': hofts.HighOrderFTS, 'partitions': np.arange(20,50,5), 'alpha': .05},
|
{'name': 'EnsembleFTS-HOFTS-5', 'fts_method': hofts.HighOrderFTS, 'partitions': np.arange(20,50,5)},
|
||||||
{'name': 'EnsembleFTS-HOFTS-10-.25', 'fts_method': hofts.HighOrderFTS, 'partitions': np.arange(20,50,10), 'alpha': .25},
|
{'name': 'EnsembleFTS-WHOFTS-10', 'fts_method': hofts.WeightedHighOrderFTS, 'partitions': np.arange(20,50,10)},
|
||||||
{'name': 'EnsembleFTS-HOFTS-5-.25', 'fts_method': hofts.HighOrderFTS, 'partitions': np.arange(20,50,5), 'alpha': .25},
|
{'name': 'EnsembleFTS-WHOFTS-5', 'fts_method': hofts.WeightedHighOrderFTS, 'partitions': np.arange(20,50,5)}
|
||||||
{'name': 'EnsembleFTS-WHOFTS-10-.05', 'fts_method': hofts.WeightedHighOrderFTS, 'partitions': np.arange(20,50,10), 'alpha': .05},
|
|
||||||
{'name': 'EnsembleFTS-WHOFTS-5-.05', 'fts_method': hofts.WeightedHighOrderFTS, 'partitions': np.arange(20,50,5), 'alpha': .05},
|
|
||||||
{'name': 'EnsembleFTS-WHOFTS-10-.25', 'fts_method': hofts.WeightedHighOrderFTS, 'partitions': np.arange(20,50,10), 'alpha': .25},
|
|
||||||
{'name': 'EnsembleFTS-WHOFTS-5-.25', 'fts_method': hofts.WeightedHighOrderFTS, 'partitions': np.arange(20,50,5), 'alpha': .25},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
for dataset_name, dataset in datasets.items():
|
for dataset_name, dataset in datasets.items():
|
||||||
@ -46,6 +68,7 @@ for dataset_name, dataset in datasets.items():
|
|||||||
transformations=[None],
|
transformations=[None],
|
||||||
orders=[3],
|
orders=[3],
|
||||||
partitions=[None],
|
partitions=[None],
|
||||||
type='interval',
|
type='distribution',
|
||||||
#distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
|
distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
|
||||||
file="tmp.db", dataset=dataset_name, tag="gridsearch")
|
file="experiments.db", dataset=dataset_name, tag="gridsearch")
|
||||||
|
'''
|
Loading…
Reference in New Issue
Block a user