k-Nearest Neighbors benchmark method

This commit is contained in:
Petrônio Cândido 2018-04-25 11:36:01 -03:00
parent 34995b72f8
commit abe9a45a47
11 changed files with 198 additions and 120 deletions

View File

@ -387,7 +387,7 @@ def get_distribution_statistics(data, model, **kwargs):
_s1 = time.time() _s1 = time.time()
forecasts = model.predict(data, **kwargs) forecasts = model.predict(data, **kwargs)
_e1 = time.time() _e1 = time.time()
ret.append(round(crps(data, forecasts), 3)) ret.append(round(crps(data[model.order:], forecasts), 3))
ret.append(round(_e1 - _s1, 3)) ret.append(round(_e1 - _s1, 3))
else: else:
skip = kwargs.get('steps_ahead_sampler', 1) skip = kwargs.get('steps_ahead_sampler', 1)

View File

@ -30,7 +30,7 @@ class ARIMA(fts.FTS):
self.benchmark_only = True self.benchmark_only = True
self.min_order = 1 self.min_order = 1
self.alpha = kwargs.get("alpha", 0.05) self.alpha = kwargs.get("alpha", 0.05)
self.shortname += str(self.alpha) self.order = kwargs.get("order", (1,0,0))
self._decompose_order(self.order) self._decompose_order(self.order)
def _decompose_order(self, order): def _decompose_order(self, order):
@ -43,6 +43,10 @@ class ARIMA(fts.FTS):
self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha) self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha)
def train(self, data, **kwargs): def train(self, data, **kwargs):
self.original_min = np.nanmin(data)
self.original_max = np.nanmax(data)
if kwargs.get('order', None) is not None: if kwargs.get('order', None) is not None:
order = kwargs.get('order', (1,0,0)) order = kwargs.get('order', (1,0,0))
self._decompose_order(order) self._decompose_order(order)
@ -50,8 +54,6 @@ class ARIMA(fts.FTS):
if self.indexer is not None: if self.indexer is not None:
data = self.indexer.get_data(data) data = self.indexer.get_data(data)
#data = self.apply_transformations(data, updateUoD=True)
try: try:
self.model = stats_arima(data, order=(self.p, self.d, self.q)) self.model = stats_arima(data, order=(self.p, self.d, self.q))
self.model_fit = self.model.fit(disp=0) self.model_fit = self.model.fit(disp=0)
@ -69,9 +71,6 @@ class ARIMA(fts.FTS):
if self.model_fit is None: if self.model_fit is None:
return np.nan return np.nan
if self.indexer is not None and isinstance(ndata, pd.DataFrame):
data = self.indexer.get_data(ndata)
ndata = np.array(ndata) ndata = np.array(ndata)
l = len(ndata) l = len(ndata)
@ -101,8 +100,6 @@ class ARIMA(fts.FTS):
sigma = np.sqrt(self.model_fit.sigma2) sigma = np.sqrt(self.model_fit.sigma2)
#ndata = np.array(self.apply_transformations(data))
l = len(data) l = len(data)
ret = [] ret = []
@ -122,8 +119,6 @@ class ARIMA(fts.FTS):
ret.append(tmp) ret.append(tmp)
#ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]], point_to_interval=True)
return ret return ret
def forecast_ahead_interval(self, ndata, steps, **kwargs): def forecast_ahead_interval(self, ndata, steps, **kwargs):
@ -134,8 +129,6 @@ class ARIMA(fts.FTS):
sigma = np.sqrt(self.model_fit.sigma2) sigma = np.sqrt(self.model_fit.sigma2)
#ndata = np.array(self.apply_transformations(data))
l = len(ndata) l = len(ndata)
nmeans = self.forecast_ahead(ndata, steps, **kwargs) nmeans = self.forecast_ahead(ndata, steps, **kwargs)
@ -152,15 +145,10 @@ class ARIMA(fts.FTS):
ret.append(tmp) ret.append(tmp)
#ret = self.apply_inverse_transformations(ret, params=[[data[-1] for a in np.arange(0, steps)]], interval=True)
return ret return ret
def forecast_distribution(self, data, **kwargs): def forecast_distribution(self, data, **kwargs):
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
sigma = np.sqrt(self.model_fit.sigma2) sigma = np.sqrt(self.model_fit.sigma2)
l = len(data) l = len(data)
@ -168,8 +156,6 @@ class ARIMA(fts.FTS):
ret = [] ret = []
for k in np.arange(self.order, l + 1): for k in np.arange(self.order, l + 1):
tmp = []
sample = [data[i] for i in np.arange(k - self.order, k)] sample = [data[i] for i in np.arange(k - self.order, k)]
mean = self.forecast(sample) mean = self.forecast(sample)

View File

@ -19,7 +19,7 @@ from pyFTS.probabilistic import ProbabilityDistribution
from pyFTS.common import Transformations from pyFTS.common import Transformations
from pyFTS.models import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, hwang from pyFTS.models import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, hwang
from pyFTS.models.ensemble import ensemble from pyFTS.models.ensemble import ensemble
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, quantreg from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, quantreg, knn
from pyFTS.benchmarks import Util as bUtil from pyFTS.benchmarks import Util as bUtil
from pyFTS.common import Util as cUtil from pyFTS.common import Util as cUtil
# from sklearn.cross_validation import KFold # from sklearn.cross_validation import KFold
@ -156,8 +156,7 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
elif benchmark_methods is not None: elif benchmark_methods is not None:
for count, model in enumerate(benchmark_methods, start=0): for count, model in enumerate(benchmark_methods, start=0):
par = benchmark_methods_parameters[count] par = benchmark_methods_parameters[count]
mfts = model(str(par if par is not None else "")) mfts = model("", **par)
mfts.order = par
pool.append(mfts) pool.append(mfts)
if type == 'point': if type == 'point':
@ -244,7 +243,6 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
progressbar.close() progressbar.close()
if distributed: if distributed:
jobs2 = []
rng = jobs rng = jobs
@ -268,10 +266,6 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
conn.close() conn.close()
sintetic = kwargs.get('sintetic', False)
#return synthesis_method(jobs, experiments, save, file, sintetic)
def get_benchmark_point_methods(): def get_benchmark_point_methods():
"""Return all non FTS methods for point forecasting""" """Return all non FTS methods for point forecasting"""
@ -287,7 +281,7 @@ def get_point_methods():
def get_benchmark_interval_methods(): def get_benchmark_interval_methods():
"""Return all non FTS methods for point_to_interval forecasting""" """Return all non FTS methods for point_to_interval forecasting"""
return [quantreg.QuantileRegression] return [ arima.ARIMA, quantreg.QuantileRegression]
def get_interval_methods(): def get_interval_methods():
@ -302,7 +296,7 @@ def get_probabilistic_methods():
def get_benchmark_probabilistic_methods(): def get_benchmark_probabilistic_methods():
"""Return all FTS methods for probabilistic forecasting""" """Return all FTS methods for probabilistic forecasting"""
return [arima.ARIMA, quantreg.QuantileRegression] return [arima.ARIMA, quantreg.QuantileRegression, knn.KNearestNeighbors]
def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwargs): def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwargs):
@ -398,6 +392,7 @@ def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kw
method = kwargs.get('method', None) method = kwargs.get('method', None)
if mfts.benchmark_only: if mfts.benchmark_only:
mfts.append_transformation(partitioner.transformation)
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha) _key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
else: else:
pttr = str(partitioner.__module__).split('.')[-1] pttr = str(partitioner.__module__).split('.')[-1]
@ -444,10 +439,11 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
from pyFTS.models import hofts, ifts, pwfts from pyFTS.models import hofts, ifts, pwfts
from pyFTS.models.ensemble import ensemble from pyFTS.models.ensemble import ensemble
from pyFTS.partitioners import Grid, Entropy, FCM from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, arima from pyFTS.benchmarks import Measures, arima, quantreg, knn
from pyFTS.models.seasonal import SeasonalIndexer from pyFTS.models.seasonal import SeasonalIndexer
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS, arima.ARIMA, ensemble.AllMethodEnsembleFTS] tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS, arima.ARIMA,
ensemble.AllMethodEnsembleFTS, knn.KNearestNeighbors]
tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner] tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
@ -460,6 +456,7 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
if mfts.benchmark_only: if mfts.benchmark_only:
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha) _key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
mfts.append_transformation(partitioner.transformation)
else: else:
pttr = str(partitioner.__module__).split('.')[-1] pttr = str(partitioner.__module__).split('.')[-1]
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)

71
pyFTS/benchmarks/knn.py Normal file
View File

@ -0,0 +1,71 @@
#!/usr/bin/python
# -*- coding: utf8 -*-
import numpy as np
from statsmodels.tsa.tsatools import lagmat
from pyFTS.common import fts
from pyFTS.probabilistic import ProbabilityDistribution
class KNearestNeighbors(fts.FTS):
"""
K-Nearest Neighbors
"""
def __init__(self, name, **kwargs):
super(KNearestNeighbors, self).__init__(1, "kNN"+name)
self.name = "kNN"
self.detail = "K-Nearest Neighbors"
self.is_high_order = True
self.has_point_forecasting = True
self.has_interval_forecasting = True
self.has_probability_forecasting = True
self.benchmark_only = True
self.min_order = 1
self.alpha = kwargs.get("alpha", 0.05)
self.order = kwargs.get("order", 1)
self.lag = None
self.k = kwargs.get("k", 30)
def train(self, data, **kwargs):
if kwargs.get('order', None) is not None:
self.order = kwargs.get('order', 1)
self.data = data
#self.lagdata, = lagmat(data, maxlag=self.order, trim="both", original='sep')
def knn(self, sample):
if self.order == 1:
dist = np.apply_along_axis(lambda x: (x - sample) ** 2, 0, self.data)
ix = np.argsort(dist) + 1
else:
dist = []
for k in np.arange(self.order, len(self.data)):
dist.append(sum([ (self.data[k - kk] - sample[kk])**2 for kk in range(self.order)]))
ix = np.argsort(np.array(dist)) + self.order + 1
ix = np.clip(ix, 0, len(self.data)-1 )
return self.data[ix[:self.k]]
def forecast_distribution(self, data, **kwargs):
ret = []
smooth = kwargs.get("smooth", "KDE")
alpha = kwargs.get("alpha", None)
uod = self.get_UoD()
for k in np.arange(self.order, len(data)):
sample = data[k-self.order : k]
forecasts = self.knn(sample)
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, data=forecasts,
name="", **kwargs)
ret.append(dist)
return ret

View File

@ -19,7 +19,7 @@ class QuantileRegression(fts.FTS):
self.has_interval_forecasting = True self.has_interval_forecasting = True
self.has_probability_forecasting = True self.has_probability_forecasting = True
self.benchmark_only = True self.benchmark_only = True
self.minOrder = 1 self.min_order = 1
self.alpha = kwargs.get("alpha", 0.05) self.alpha = kwargs.get("alpha", 0.05)
self.dist = kwargs.get("dist", False) self.dist = kwargs.get("dist", False)
self.upper_qt = None self.upper_qt = None
@ -28,15 +28,14 @@ class QuantileRegression(fts.FTS):
self.dist_qt = None self.dist_qt = None
self.shortname = "QAR("+str(self.order)+","+str(self.alpha)+")" self.shortname = "QAR("+str(self.order)+","+str(self.alpha)+")"
def train(self, data, sets, order=1, parameters=None): def train(self, data, **kwargs):
self.order = order if kwargs.get('order', None) is not None:
self.order = kwargs.get('order', 1)
if self.indexer is not None and isinstance(data, pd.DataFrame): if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data) data = self.indexer.get_data(data)
tmp = np.array(self.apply_transformations(data, updateUoD=True)) lagdata, ndata = lagmat(data, maxlag=self.order, trim="both", original='sep')
lagdata, ndata = lagmat(tmp, maxlag=order, trim="both", original='sep')
mqt = QuantReg(ndata, lagdata).fit(0.5) mqt = QuantReg(ndata, lagdata).fit(0.5)
if self.alpha is not None: if self.alpha is not None:
@ -76,12 +75,8 @@ class QuantileRegression(fts.FTS):
up = self.linearmodel([k[1] for k in data], up_params) up = self.linearmodel([k[1] for k in data], up_params)
return [lo, up] return [lo, up]
def forecast(self, data, **kwargs): def forecast(self, ndata, **kwargs):
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
ndata = np.array(self.apply_transformations(data))
l = len(ndata) l = len(ndata)
ret = [] ret = []
@ -91,16 +86,9 @@ class QuantileRegression(fts.FTS):
ret.append(self.linearmodel(sample, self.mean_qt)) ret.append(self.linearmodel(sample, self.mean_qt))
ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]])
return ret return ret
def forecast_interval(self, data, **kwargs): def forecast_interval(self, ndata, **kwargs):
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
ndata = np.array(self.apply_transformations(data))
l = len(ndata) l = len(ndata)
@ -110,16 +98,9 @@ class QuantileRegression(fts.FTS):
sample = ndata[k - self.order: k] sample = ndata[k - self.order: k]
ret.append(self.point_to_interval(sample, self.lower_qt, self.upper_qt)) ret.append(self.point_to_interval(sample, self.lower_qt, self.upper_qt))
ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]], interval=True)
return ret return ret
def forecast_ahead_interval(self, data, steps, **kwargs): def forecast_ahead_interval(self, ndata, steps, **kwargs):
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
ndata = np.array(self.apply_transformations(data))
smoothing = kwargs.get("smoothing", 0.9) smoothing = kwargs.get("smoothing", 0.9)
@ -137,20 +118,13 @@ class QuantileRegression(fts.FTS):
ret.append([intl[0]*(1 + k*smoothing), intl[1]*(1 + k*smoothing)]) ret.append([intl[0]*(1 + k*smoothing), intl[1]*(1 + k*smoothing)])
ret = self.apply_inverse_transformations(ret, params=[[data[-1] for a in np.arange(0, steps + self.order)]], interval=True)
return ret[-steps:] return ret[-steps:]
def forecast_distribution(self, data, **kwargs): def forecast_distribution(self, ndata, **kwargs):
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
ndata = np.array(self.apply_transformations(data))
ret = [] ret = []
l = len(data) l = len(ndata)
for k in np.arange(self.order, l + 1): for k in np.arange(self.order, l + 1):
dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram", dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram",
@ -167,12 +141,7 @@ class QuantileRegression(fts.FTS):
return ret return ret
def forecast_ahead_distribution(self, data, steps, **kwargs): def forecast_ahead_distribution(self, ndata, steps, **kwargs):
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
ndata = np.array(self.apply_transformations(data))
ret = [] ret = []

View File

@ -42,7 +42,6 @@ class FLRG(object):
def get_membership(self, data, sets): def get_membership(self, data, sets):
ret = 0.0 ret = 0.0
if isinstance(self.LHS, (list, set)): if isinstance(self.LHS, (list, set)):
assert len(self.LHS) == len(data)
ret = np.nanmin([sets[self.LHS[ct]].membership(dat) for ct, dat in enumerate(data)]) ret = np.nanmin([sets[self.LHS[ct]].membership(dat) for ct, dat in enumerate(data)])
else: else:
ret = sets[self.LHS].membership(data) ret = sets[self.LHS].membership(data)

View File

@ -5,6 +5,7 @@ import numpy as np
import pandas as pd import pandas as pd
from pyFTS.common import SortedCollection, fts, tree from pyFTS.common import SortedCollection, fts, tree
from pyFTS.models import chen, cheng, hofts, hwang, ismailefendi, sadaei, song, yu from pyFTS.models import chen, cheng, hofts, hwang, ismailefendi, sadaei, song, yu
from pyFTS.probabilistic import ProbabilityDistribution
import scipy.stats as st import scipy.stats as st
@ -171,29 +172,52 @@ class EnsembleFTS(fts.FTS):
return ret return ret
def empty_grid(self, resolution): def forecast_distribution(self, data, **kwargs):
return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution) ret = []
smooth = kwargs.get("smooth", "KDE")
alpha = kwargs.get("alpha", None)
uod = self.get_UoD()
for k in np.arange(self.order, len(data)):
sample = data[k-self.order : k]
forecasts = self.get_models_forecasts(sample)
if alpha is None:
forecasts = np.ravel(forecasts).tolist()
else:
forecasts = self.get_distribution_interquantile(np.ravel(forecasts).tolist(), alpha)
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, data=forecasts,
name="", **kwargs)
ret.append(dist)
return ret
def forecast_ahead_distribution(self, data, steps, **kwargs): def forecast_ahead_distribution(self, data, steps, **kwargs):
if 'method' in kwargs: if 'method' in kwargs:
self.point_method = kwargs.get('method','mean') self.point_method = kwargs.get('method','mean')
percentile_size = (self.original_max - self.original_min) / 100 smooth = kwargs.get("smooth", "KDE")
alpha = kwargs.get("alpha", None)
resolution = kwargs.get('resolution', percentile_size)
grid = self.empty_grid(resolution)
index = SortedCollection.SortedCollection(iterable=grid.keys())
ret = [] ret = []
samples = [[k] for k in data[-self.order:]] start = kwargs.get('start', self.order)
for k in np.arange(self.order, steps + self.order): uod = self.get_UoD()
sample = data[start - self.order: start]
for k in np.arange(self.order, steps+self.order):
forecasts = [] forecasts = []
lags = {} lags = {}
for i in np.arange(0, self.order): lags[i] = samples[k - self.order + i] for i in np.arange(0, self.order): lags[i] = sample[k-self.order]
# Build the tree with all possible paths # Build the tree with all possible paths
@ -206,17 +230,19 @@ class EnsembleFTS(fts.FTS):
forecasts.extend(self.get_models_forecasts(path)) forecasts.extend(self.get_models_forecasts(path))
samples.append(sampler(forecasts, np.arange(0.1, 1, 0.1))) sample.append(sampler(forecasts, np.arange(0.1, 1, 0.1)))
grid = self.gridCountPoint(grid, resolution, index, forecasts) if alpha is None:
forecasts = np.ravel(forecasts).tolist()
else:
forecasts = self.get_distribution_interquantile(np.ravel(forecasts).tolist(), alpha)
tmp = np.array([grid[i] for i in sorted(grid)]) dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, data=forecasts,
name="", **kwargs)
ret.append(tmp / sum(tmp)) ret.append(dist)
grid = self.empty_grid(resolution) return ret
df = pd.DataFrame(ret, columns=sorted(grid))
return df
class AllMethodEnsembleFTS(EnsembleFTS): class AllMethodEnsembleFTS(EnsembleFTS):

View File

@ -26,6 +26,7 @@ class IntervalFTS(hofts.HighOrderFTS):
self.has_point_forecasting = False self.has_point_forecasting = False
self.has_interval_forecasting = True self.has_interval_forecasting = True
self.is_high_order = True self.is_high_order = True
self.min_order = 1
def get_upper(self, flrg): def get_upper(self, flrg):
if flrg.get_key() in self.flrgs: if flrg.get_key() in self.flrgs:

View File

@ -105,6 +105,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
self.has_interval_forecasting = True self.has_interval_forecasting = True
self.has_probability_forecasting = True self.has_probability_forecasting = True
self.is_high_order = True self.is_high_order = True
self.min_order = 1
self.auto_update = kwargs.get('update',False) self.auto_update = kwargs.get('update',False)

View File

@ -14,15 +14,25 @@ class ProbabilityDistribution(object):
def __init__(self, type = "KDE", **kwargs): def __init__(self, type = "KDE", **kwargs):
self.uod = kwargs.get("uod", None) self.uod = kwargs.get("uod", None)
self.type = type self.data = []
if self.type == "KDE":
self.kde = kde.KernelSmoothing(kwargs.get("h", 0.5), kwargs.get("kernel", "epanechnikov"))
self.nbins = kwargs.get("num_bins", 100) self.type = type
self.bins = kwargs.get("bins", None) self.bins = kwargs.get("bins", None)
self.labels = kwargs.get("bins_labels", None) self.labels = kwargs.get("bins_labels", None)
data = kwargs.get("data", None)
if self.type == "KDE":
self.kde = kde.KernelSmoothing(kwargs.get("h", 0.5), kwargs.get("kernel", "epanechnikov"))
_min = np.nanmin(data)
_min = _min * .7 if _min > 0 else _min * 1.3
_max = np.nanmax(data)
_max = _max * 1.3 if _max > 0 else _max * .7
self.uod = [_min, _max]
self.nbins = kwargs.get("num_bins", 100)
if self.bins is None: if self.bins is None:
self.bins = np.linspace(int(self.uod[0]), int(self.uod[1]), int(self.nbins)).tolist() self.bins = np.linspace(int(self.uod[0]), int(self.uod[1]), int(self.nbins)).tolist()
self.labels = [str(k) for k in self.bins] self.labels = [str(k) for k in self.bins]
@ -38,10 +48,6 @@ class ProbabilityDistribution(object):
self.count = 0 self.count = 0
for k in self.bins: self.distribution[k] = 0 for k in self.bins: self.distribution[k] = 0
self.data = []
data = kwargs.get("data",None)
if data is not None: if data is not None:
self.append(data) self.append(data)
@ -228,10 +234,12 @@ class ProbabilityDistribution(object):
def __str__(self): def __str__(self):
ret = "" ret = ""
for k in sorted(self.distribution.keys()): for k in sorted(self.bins):
ret += str(round(k,2)) + ':\t' ret += str(round(k,2)) + ':\t'
if self.type == "histogram": if self.type == "histogram":
ret += str(round(self.distribution[k] / self.count,3)) ret += str(round(self.distribution[k] / self.count,3))
elif self.type == "KDE":
ret += str(round(self.density(k),3))
else: else:
ret += str(round(self.distribution[k], 6)) ret += str(round(self.distribution[k], 6))
ret += '\n' ret += '\n'

View File

@ -15,9 +15,18 @@ from pyFTS.data import TAIEX
dataset = TAIEX.get_data() dataset = TAIEX.get_data()
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures, knn, quantreg, arima
from pyFTS.models import pwfts, song, ifts
model = arima.ARIMA("", order=(1,0,0))
model.fit(dataset[:800])
tmp = model.predict(dataset[800:1000], type='distribution')
for tmp2 in tmp:
print(tmp2)
from pyFTS.models import pwfts, song
''' '''
from pyFTS.partitioners import Grid, Util as pUtil from pyFTS.partitioners import Grid, Util as pUtil
partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff) partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff)
@ -31,28 +40,39 @@ print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead
# print(tmp2) # print(tmp2)
''' '''
#''' '''
from pyFTS.benchmarks import arima, naive, quantreg from pyFTS.benchmarks import arima, naive, quantreg
bchmk.sliding_window_benchmarks(dataset[:1000], 1000, train=0.8, inc=0.2, bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
#methods=[song.ConventionalFTS], #[pwfts.ProbabilisticWeightedFTS], methods=[ifts.IntervalFTS], #[pwfts.ProbabilisticWeightedFTS],
benchmark_models=True, benchmark_models=True,
benchmark_methods=[naive.Naive, arima.ARIMA,arima.ARIMA], #arima.ARIMA,arima.ARIMA], benchmark_methods=[arima.ARIMA for k in range(8)]
#benchmark_methods=[arima.ARIMA], + [quantreg.QuantileRegression for k in range(4)],
benchmark_methods_parameters=[1,(1,0,0),(1,0,1)], #(2,0,1),(2,0,2)], benchmark_methods_parameters=[
#benchmark_methods_parameters=[(1,0,0)], {'order': (1, 0, 0), 'alpha': .05},
{'order': (1, 0, 0), 'alpha': .25},
{'order': (1, 0, 1), 'alpha': .05},
{'order': (1, 0, 1), 'alpha': .25},
{'order': (2, 0, 1), 'alpha': .05},
{'order': (2, 0, 1), 'alpha': .25},
{'order': (2, 0, 2), 'alpha': .05},
{'order': (2, 0, 2), 'alpha': .25},
{'order': 1, 'alpha': .05},
{'order': 1, 'alpha': .25},
{'order': 2, 'alpha': .05},
{'order': 2, 'alpha': .25},
],
transformations=[None, tdiff], transformations=[None, tdiff],
orders=[1, 2, 3], orders=[1], #2, 3],
partitions=[35], #np.arange(10, 100, 5), partitions=[3], #np.arange(3, 25, 2),
progress=True, type='point', progress=False, type='interval',
#steps_ahead=[1,4,7,10], #steps_ahead=[1] #steps_ahead=[1,4,7,10], #steps_ahead=[1]
#distributed=True, nodes=['192.168.0.110', '192.168.0.105','192.168.0.106'], distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
file="benchmarks.tmp", dataset="TAIEX", tag="comparisons") file="benchmarks.db", dataset="TAIEX", tag="comparisons")
#save=True, file="tmp.db")
#''' '''
''' '''
dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';') dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';')
print(bUtil.analytic_tabular_dataframe(dat)) print(bUtil.analytic_tabular_dataframe(dat))
@ -111,4 +131,4 @@ tmp[20].plot(ax[2][2], title='t=200')
f, ax = plt.subplots(1, 1, figsize=[20,15]) f, ax = plt.subplots(1, 1, figsize=[20,15])
bchmk.plot_distribution(ax, 'blue', tmp, f, 0, reference_data=dataset[train_split:train_split+200]) bchmk.plot_distribution(ax, 'blue', tmp, f, 0, reference_data=dataset[train_split:train_split+200])
''' '''