k-Nearest Neighbors benchmark method
This commit is contained in:
parent
34995b72f8
commit
abe9a45a47
@ -387,7 +387,7 @@ def get_distribution_statistics(data, model, **kwargs):
|
||||
_s1 = time.time()
|
||||
forecasts = model.predict(data, **kwargs)
|
||||
_e1 = time.time()
|
||||
ret.append(round(crps(data, forecasts), 3))
|
||||
ret.append(round(crps(data[model.order:], forecasts), 3))
|
||||
ret.append(round(_e1 - _s1, 3))
|
||||
else:
|
||||
skip = kwargs.get('steps_ahead_sampler', 1)
|
||||
|
@ -30,7 +30,7 @@ class ARIMA(fts.FTS):
|
||||
self.benchmark_only = True
|
||||
self.min_order = 1
|
||||
self.alpha = kwargs.get("alpha", 0.05)
|
||||
self.shortname += str(self.alpha)
|
||||
self.order = kwargs.get("order", (1,0,0))
|
||||
self._decompose_order(self.order)
|
||||
|
||||
def _decompose_order(self, order):
|
||||
@ -43,6 +43,10 @@ class ARIMA(fts.FTS):
|
||||
self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha)
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
|
||||
self.original_min = np.nanmin(data)
|
||||
self.original_max = np.nanmax(data)
|
||||
|
||||
if kwargs.get('order', None) is not None:
|
||||
order = kwargs.get('order', (1,0,0))
|
||||
self._decompose_order(order)
|
||||
@ -50,8 +54,6 @@ class ARIMA(fts.FTS):
|
||||
if self.indexer is not None:
|
||||
data = self.indexer.get_data(data)
|
||||
|
||||
#data = self.apply_transformations(data, updateUoD=True)
|
||||
|
||||
try:
|
||||
self.model = stats_arima(data, order=(self.p, self.d, self.q))
|
||||
self.model_fit = self.model.fit(disp=0)
|
||||
@ -69,9 +71,6 @@ class ARIMA(fts.FTS):
|
||||
if self.model_fit is None:
|
||||
return np.nan
|
||||
|
||||
if self.indexer is not None and isinstance(ndata, pd.DataFrame):
|
||||
data = self.indexer.get_data(ndata)
|
||||
|
||||
ndata = np.array(ndata)
|
||||
|
||||
l = len(ndata)
|
||||
@ -101,8 +100,6 @@ class ARIMA(fts.FTS):
|
||||
|
||||
sigma = np.sqrt(self.model_fit.sigma2)
|
||||
|
||||
#ndata = np.array(self.apply_transformations(data))
|
||||
|
||||
l = len(data)
|
||||
|
||||
ret = []
|
||||
@ -122,8 +119,6 @@ class ARIMA(fts.FTS):
|
||||
|
||||
ret.append(tmp)
|
||||
|
||||
#ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]], point_to_interval=True)
|
||||
|
||||
return ret
|
||||
|
||||
def forecast_ahead_interval(self, ndata, steps, **kwargs):
|
||||
@ -134,8 +129,6 @@ class ARIMA(fts.FTS):
|
||||
|
||||
sigma = np.sqrt(self.model_fit.sigma2)
|
||||
|
||||
#ndata = np.array(self.apply_transformations(data))
|
||||
|
||||
l = len(ndata)
|
||||
|
||||
nmeans = self.forecast_ahead(ndata, steps, **kwargs)
|
||||
@ -152,15 +145,10 @@ class ARIMA(fts.FTS):
|
||||
|
||||
ret.append(tmp)
|
||||
|
||||
#ret = self.apply_inverse_transformations(ret, params=[[data[-1] for a in np.arange(0, steps)]], interval=True)
|
||||
|
||||
return ret
|
||||
|
||||
def forecast_distribution(self, data, **kwargs):
|
||||
|
||||
if self.indexer is not None and isinstance(data, pd.DataFrame):
|
||||
data = self.indexer.get_data(data)
|
||||
|
||||
sigma = np.sqrt(self.model_fit.sigma2)
|
||||
|
||||
l = len(data)
|
||||
@ -168,8 +156,6 @@ class ARIMA(fts.FTS):
|
||||
ret = []
|
||||
|
||||
for k in np.arange(self.order, l + 1):
|
||||
tmp = []
|
||||
|
||||
sample = [data[i] for i in np.arange(k - self.order, k)]
|
||||
|
||||
mean = self.forecast(sample)
|
||||
|
@ -19,7 +19,7 @@ from pyFTS.probabilistic import ProbabilityDistribution
|
||||
from pyFTS.common import Transformations
|
||||
from pyFTS.models import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, hwang
|
||||
from pyFTS.models.ensemble import ensemble
|
||||
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, quantreg
|
||||
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, quantreg, knn
|
||||
from pyFTS.benchmarks import Util as bUtil
|
||||
from pyFTS.common import Util as cUtil
|
||||
# from sklearn.cross_validation import KFold
|
||||
@ -156,8 +156,7 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||
elif benchmark_methods is not None:
|
||||
for count, model in enumerate(benchmark_methods, start=0):
|
||||
par = benchmark_methods_parameters[count]
|
||||
mfts = model(str(par if par is not None else ""))
|
||||
mfts.order = par
|
||||
mfts = model("", **par)
|
||||
pool.append(mfts)
|
||||
|
||||
if type == 'point':
|
||||
@ -244,7 +243,6 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||
progressbar.close()
|
||||
|
||||
if distributed:
|
||||
jobs2 = []
|
||||
|
||||
rng = jobs
|
||||
|
||||
@ -268,10 +266,6 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
||||
|
||||
conn.close()
|
||||
|
||||
sintetic = kwargs.get('sintetic', False)
|
||||
|
||||
#return synthesis_method(jobs, experiments, save, file, sintetic)
|
||||
|
||||
|
||||
def get_benchmark_point_methods():
|
||||
"""Return all non FTS methods for point forecasting"""
|
||||
@ -287,7 +281,7 @@ def get_point_methods():
|
||||
|
||||
def get_benchmark_interval_methods():
|
||||
"""Return all non FTS methods for point_to_interval forecasting"""
|
||||
return [quantreg.QuantileRegression]
|
||||
return [ arima.ARIMA, quantreg.QuantileRegression]
|
||||
|
||||
|
||||
def get_interval_methods():
|
||||
@ -302,7 +296,7 @@ def get_probabilistic_methods():
|
||||
|
||||
def get_benchmark_probabilistic_methods():
|
||||
"""Return all FTS methods for probabilistic forecasting"""
|
||||
return [arima.ARIMA, quantreg.QuantileRegression]
|
||||
return [arima.ARIMA, quantreg.QuantileRegression, knn.KNearestNeighbors]
|
||||
|
||||
|
||||
def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwargs):
|
||||
@ -398,6 +392,7 @@ def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kw
|
||||
method = kwargs.get('method', None)
|
||||
|
||||
if mfts.benchmark_only:
|
||||
mfts.append_transformation(partitioner.transformation)
|
||||
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
|
||||
else:
|
||||
pttr = str(partitioner.__module__).split('.')[-1]
|
||||
@ -444,10 +439,11 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
|
||||
from pyFTS.models import hofts, ifts, pwfts
|
||||
from pyFTS.models.ensemble import ensemble
|
||||
from pyFTS.partitioners import Grid, Entropy, FCM
|
||||
from pyFTS.benchmarks import Measures, arima
|
||||
from pyFTS.benchmarks import Measures, arima, quantreg, knn
|
||||
from pyFTS.models.seasonal import SeasonalIndexer
|
||||
|
||||
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS, arima.ARIMA, ensemble.AllMethodEnsembleFTS]
|
||||
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS, arima.ARIMA,
|
||||
ensemble.AllMethodEnsembleFTS, knn.KNearestNeighbors]
|
||||
|
||||
tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
|
||||
|
||||
@ -460,6 +456,7 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
|
||||
|
||||
if mfts.benchmark_only:
|
||||
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
|
||||
mfts.append_transformation(partitioner.transformation)
|
||||
else:
|
||||
pttr = str(partitioner.__module__).split('.')[-1]
|
||||
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
|
||||
|
71
pyFTS/benchmarks/knn.py
Normal file
71
pyFTS/benchmarks/knn.py
Normal file
@ -0,0 +1,71 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf8 -*-
|
||||
|
||||
import numpy as np
|
||||
from statsmodels.tsa.tsatools import lagmat
|
||||
from pyFTS.common import fts
|
||||
from pyFTS.probabilistic import ProbabilityDistribution
|
||||
|
||||
class KNearestNeighbors(fts.FTS):
|
||||
"""
|
||||
K-Nearest Neighbors
|
||||
"""
|
||||
def __init__(self, name, **kwargs):
|
||||
super(KNearestNeighbors, self).__init__(1, "kNN"+name)
|
||||
self.name = "kNN"
|
||||
self.detail = "K-Nearest Neighbors"
|
||||
self.is_high_order = True
|
||||
self.has_point_forecasting = True
|
||||
self.has_interval_forecasting = True
|
||||
self.has_probability_forecasting = True
|
||||
self.benchmark_only = True
|
||||
self.min_order = 1
|
||||
self.alpha = kwargs.get("alpha", 0.05)
|
||||
self.order = kwargs.get("order", 1)
|
||||
self.lag = None
|
||||
self.k = kwargs.get("k", 30)
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
if kwargs.get('order', None) is not None:
|
||||
self.order = kwargs.get('order', 1)
|
||||
|
||||
self.data = data
|
||||
|
||||
#self.lagdata, = lagmat(data, maxlag=self.order, trim="both", original='sep')
|
||||
|
||||
|
||||
def knn(self, sample):
|
||||
|
||||
if self.order == 1:
|
||||
dist = np.apply_along_axis(lambda x: (x - sample) ** 2, 0, self.data)
|
||||
ix = np.argsort(dist) + 1
|
||||
else:
|
||||
dist = []
|
||||
for k in np.arange(self.order, len(self.data)):
|
||||
dist.append(sum([ (self.data[k - kk] - sample[kk])**2 for kk in range(self.order)]))
|
||||
ix = np.argsort(np.array(dist)) + self.order + 1
|
||||
|
||||
ix = np.clip(ix, 0, len(self.data)-1 )
|
||||
return self.data[ix[:self.k]]
|
||||
|
||||
def forecast_distribution(self, data, **kwargs):
|
||||
ret = []
|
||||
|
||||
smooth = kwargs.get("smooth", "KDE")
|
||||
alpha = kwargs.get("alpha", None)
|
||||
|
||||
uod = self.get_UoD()
|
||||
|
||||
for k in np.arange(self.order, len(data)):
|
||||
|
||||
sample = data[k-self.order : k]
|
||||
|
||||
forecasts = self.knn(sample)
|
||||
|
||||
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, data=forecasts,
|
||||
name="", **kwargs)
|
||||
ret.append(dist)
|
||||
|
||||
return ret
|
||||
|
||||
|
@ -19,7 +19,7 @@ class QuantileRegression(fts.FTS):
|
||||
self.has_interval_forecasting = True
|
||||
self.has_probability_forecasting = True
|
||||
self.benchmark_only = True
|
||||
self.minOrder = 1
|
||||
self.min_order = 1
|
||||
self.alpha = kwargs.get("alpha", 0.05)
|
||||
self.dist = kwargs.get("dist", False)
|
||||
self.upper_qt = None
|
||||
@ -28,15 +28,14 @@ class QuantileRegression(fts.FTS):
|
||||
self.dist_qt = None
|
||||
self.shortname = "QAR("+str(self.order)+","+str(self.alpha)+")"
|
||||
|
||||
def train(self, data, sets, order=1, parameters=None):
|
||||
self.order = order
|
||||
def train(self, data, **kwargs):
|
||||
if kwargs.get('order', None) is not None:
|
||||
self.order = kwargs.get('order', 1)
|
||||
|
||||
if self.indexer is not None and isinstance(data, pd.DataFrame):
|
||||
data = self.indexer.get_data(data)
|
||||
|
||||
tmp = np.array(self.apply_transformations(data, updateUoD=True))
|
||||
|
||||
lagdata, ndata = lagmat(tmp, maxlag=order, trim="both", original='sep')
|
||||
lagdata, ndata = lagmat(data, maxlag=self.order, trim="both", original='sep')
|
||||
|
||||
mqt = QuantReg(ndata, lagdata).fit(0.5)
|
||||
if self.alpha is not None:
|
||||
@ -76,12 +75,8 @@ class QuantileRegression(fts.FTS):
|
||||
up = self.linearmodel([k[1] for k in data], up_params)
|
||||
return [lo, up]
|
||||
|
||||
def forecast(self, data, **kwargs):
|
||||
def forecast(self, ndata, **kwargs):
|
||||
|
||||
if self.indexer is not None and isinstance(data, pd.DataFrame):
|
||||
data = self.indexer.get_data(data)
|
||||
|
||||
ndata = np.array(self.apply_transformations(data))
|
||||
l = len(ndata)
|
||||
|
||||
ret = []
|
||||
@ -91,16 +86,9 @@ class QuantileRegression(fts.FTS):
|
||||
|
||||
ret.append(self.linearmodel(sample, self.mean_qt))
|
||||
|
||||
ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]])
|
||||
|
||||
return ret
|
||||
|
||||
def forecast_interval(self, data, **kwargs):
|
||||
|
||||
if self.indexer is not None and isinstance(data, pd.DataFrame):
|
||||
data = self.indexer.get_data(data)
|
||||
|
||||
ndata = np.array(self.apply_transformations(data))
|
||||
def forecast_interval(self, ndata, **kwargs):
|
||||
|
||||
l = len(ndata)
|
||||
|
||||
@ -110,16 +98,9 @@ class QuantileRegression(fts.FTS):
|
||||
sample = ndata[k - self.order: k]
|
||||
ret.append(self.point_to_interval(sample, self.lower_qt, self.upper_qt))
|
||||
|
||||
ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]], interval=True)
|
||||
|
||||
return ret
|
||||
|
||||
def forecast_ahead_interval(self, data, steps, **kwargs):
|
||||
|
||||
if self.indexer is not None and isinstance(data, pd.DataFrame):
|
||||
data = self.indexer.get_data(data)
|
||||
|
||||
ndata = np.array(self.apply_transformations(data))
|
||||
def forecast_ahead_interval(self, ndata, steps, **kwargs):
|
||||
|
||||
smoothing = kwargs.get("smoothing", 0.9)
|
||||
|
||||
@ -137,20 +118,13 @@ class QuantileRegression(fts.FTS):
|
||||
|
||||
ret.append([intl[0]*(1 + k*smoothing), intl[1]*(1 + k*smoothing)])
|
||||
|
||||
ret = self.apply_inverse_transformations(ret, params=[[data[-1] for a in np.arange(0, steps + self.order)]], interval=True)
|
||||
|
||||
return ret[-steps:]
|
||||
|
||||
def forecast_distribution(self, data, **kwargs):
|
||||
|
||||
if self.indexer is not None and isinstance(data, pd.DataFrame):
|
||||
data = self.indexer.get_data(data)
|
||||
|
||||
ndata = np.array(self.apply_transformations(data))
|
||||
def forecast_distribution(self, ndata, **kwargs):
|
||||
|
||||
ret = []
|
||||
|
||||
l = len(data)
|
||||
l = len(ndata)
|
||||
|
||||
for k in np.arange(self.order, l + 1):
|
||||
dist = ProbabilityDistribution.ProbabilityDistribution(type="histogram",
|
||||
@ -167,12 +141,7 @@ class QuantileRegression(fts.FTS):
|
||||
|
||||
return ret
|
||||
|
||||
def forecast_ahead_distribution(self, data, steps, **kwargs):
|
||||
|
||||
if self.indexer is not None and isinstance(data, pd.DataFrame):
|
||||
data = self.indexer.get_data(data)
|
||||
|
||||
ndata = np.array(self.apply_transformations(data))
|
||||
def forecast_ahead_distribution(self, ndata, steps, **kwargs):
|
||||
|
||||
ret = []
|
||||
|
||||
|
@ -42,7 +42,6 @@ class FLRG(object):
|
||||
def get_membership(self, data, sets):
|
||||
ret = 0.0
|
||||
if isinstance(self.LHS, (list, set)):
|
||||
assert len(self.LHS) == len(data)
|
||||
ret = np.nanmin([sets[self.LHS[ct]].membership(dat) for ct, dat in enumerate(data)])
|
||||
else:
|
||||
ret = sets[self.LHS].membership(data)
|
||||
|
@ -5,6 +5,7 @@ import numpy as np
|
||||
import pandas as pd
|
||||
from pyFTS.common import SortedCollection, fts, tree
|
||||
from pyFTS.models import chen, cheng, hofts, hwang, ismailefendi, sadaei, song, yu
|
||||
from pyFTS.probabilistic import ProbabilityDistribution
|
||||
import scipy.stats as st
|
||||
|
||||
|
||||
@ -171,29 +172,52 @@ class EnsembleFTS(fts.FTS):
|
||||
|
||||
return ret
|
||||
|
||||
def empty_grid(self, resolution):
|
||||
return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution)
|
||||
def forecast_distribution(self, data, **kwargs):
|
||||
ret = []
|
||||
|
||||
smooth = kwargs.get("smooth", "KDE")
|
||||
alpha = kwargs.get("alpha", None)
|
||||
|
||||
uod = self.get_UoD()
|
||||
|
||||
for k in np.arange(self.order, len(data)):
|
||||
|
||||
sample = data[k-self.order : k]
|
||||
|
||||
forecasts = self.get_models_forecasts(sample)
|
||||
|
||||
if alpha is None:
|
||||
forecasts = np.ravel(forecasts).tolist()
|
||||
else:
|
||||
forecasts = self.get_distribution_interquantile(np.ravel(forecasts).tolist(), alpha)
|
||||
|
||||
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, data=forecasts,
|
||||
name="", **kwargs)
|
||||
|
||||
ret.append(dist)
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
def forecast_ahead_distribution(self, data, steps, **kwargs):
|
||||
if 'method' in kwargs:
|
||||
self.point_method = kwargs.get('method','mean')
|
||||
|
||||
percentile_size = (self.original_max - self.original_min) / 100
|
||||
|
||||
resolution = kwargs.get('resolution', percentile_size)
|
||||
|
||||
grid = self.empty_grid(resolution)
|
||||
|
||||
index = SortedCollection.SortedCollection(iterable=grid.keys())
|
||||
smooth = kwargs.get("smooth", "KDE")
|
||||
alpha = kwargs.get("alpha", None)
|
||||
|
||||
ret = []
|
||||
|
||||
samples = [[k] for k in data[-self.order:]]
|
||||
start = kwargs.get('start', self.order)
|
||||
|
||||
for k in np.arange(self.order, steps + self.order):
|
||||
uod = self.get_UoD()
|
||||
|
||||
sample = data[start - self.order: start]
|
||||
|
||||
for k in np.arange(self.order, steps+self.order):
|
||||
forecasts = []
|
||||
lags = {}
|
||||
for i in np.arange(0, self.order): lags[i] = samples[k - self.order + i]
|
||||
for i in np.arange(0, self.order): lags[i] = sample[k-self.order]
|
||||
|
||||
# Build the tree with all possible paths
|
||||
|
||||
@ -206,17 +230,19 @@ class EnsembleFTS(fts.FTS):
|
||||
|
||||
forecasts.extend(self.get_models_forecasts(path))
|
||||
|
||||
samples.append(sampler(forecasts, np.arange(0.1, 1, 0.1)))
|
||||
sample.append(sampler(forecasts, np.arange(0.1, 1, 0.1)))
|
||||
|
||||
grid = self.gridCountPoint(grid, resolution, index, forecasts)
|
||||
if alpha is None:
|
||||
forecasts = np.ravel(forecasts).tolist()
|
||||
else:
|
||||
forecasts = self.get_distribution_interquantile(np.ravel(forecasts).tolist(), alpha)
|
||||
|
||||
tmp = np.array([grid[i] for i in sorted(grid)])
|
||||
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, data=forecasts,
|
||||
name="", **kwargs)
|
||||
|
||||
ret.append(tmp / sum(tmp))
|
||||
ret.append(dist)
|
||||
|
||||
grid = self.empty_grid(resolution)
|
||||
df = pd.DataFrame(ret, columns=sorted(grid))
|
||||
return df
|
||||
return ret
|
||||
|
||||
|
||||
class AllMethodEnsembleFTS(EnsembleFTS):
|
||||
|
@ -26,6 +26,7 @@ class IntervalFTS(hofts.HighOrderFTS):
|
||||
self.has_point_forecasting = False
|
||||
self.has_interval_forecasting = True
|
||||
self.is_high_order = True
|
||||
self.min_order = 1
|
||||
|
||||
def get_upper(self, flrg):
|
||||
if flrg.get_key() in self.flrgs:
|
||||
|
@ -105,6 +105,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
self.has_interval_forecasting = True
|
||||
self.has_probability_forecasting = True
|
||||
self.is_high_order = True
|
||||
self.min_order = 1
|
||||
self.auto_update = kwargs.get('update',False)
|
||||
|
||||
|
||||
|
@ -14,15 +14,25 @@ class ProbabilityDistribution(object):
|
||||
def __init__(self, type = "KDE", **kwargs):
|
||||
self.uod = kwargs.get("uod", None)
|
||||
|
||||
self.type = type
|
||||
if self.type == "KDE":
|
||||
self.kde = kde.KernelSmoothing(kwargs.get("h", 0.5), kwargs.get("kernel", "epanechnikov"))
|
||||
self.data = []
|
||||
|
||||
self.nbins = kwargs.get("num_bins", 100)
|
||||
self.type = type
|
||||
|
||||
self.bins = kwargs.get("bins", None)
|
||||
self.labels = kwargs.get("bins_labels", None)
|
||||
|
||||
data = kwargs.get("data", None)
|
||||
|
||||
if self.type == "KDE":
|
||||
self.kde = kde.KernelSmoothing(kwargs.get("h", 0.5), kwargs.get("kernel", "epanechnikov"))
|
||||
_min = np.nanmin(data)
|
||||
_min = _min * .7 if _min > 0 else _min * 1.3
|
||||
_max = np.nanmax(data)
|
||||
_max = _max * 1.3 if _max > 0 else _max * .7
|
||||
self.uod = [_min, _max]
|
||||
|
||||
self.nbins = kwargs.get("num_bins", 100)
|
||||
|
||||
if self.bins is None:
|
||||
self.bins = np.linspace(int(self.uod[0]), int(self.uod[1]), int(self.nbins)).tolist()
|
||||
self.labels = [str(k) for k in self.bins]
|
||||
@ -38,10 +48,6 @@ class ProbabilityDistribution(object):
|
||||
self.count = 0
|
||||
for k in self.bins: self.distribution[k] = 0
|
||||
|
||||
self.data = []
|
||||
|
||||
data = kwargs.get("data",None)
|
||||
|
||||
if data is not None:
|
||||
self.append(data)
|
||||
|
||||
@ -228,10 +234,12 @@ class ProbabilityDistribution(object):
|
||||
|
||||
def __str__(self):
|
||||
ret = ""
|
||||
for k in sorted(self.distribution.keys()):
|
||||
for k in sorted(self.bins):
|
||||
ret += str(round(k,2)) + ':\t'
|
||||
if self.type == "histogram":
|
||||
ret += str(round(self.distribution[k] / self.count,3))
|
||||
elif self.type == "KDE":
|
||||
ret += str(round(self.density(k),3))
|
||||
else:
|
||||
ret += str(round(self.distribution[k], 6))
|
||||
ret += '\n'
|
||||
|
@ -15,9 +15,18 @@ from pyFTS.data import TAIEX
|
||||
|
||||
dataset = TAIEX.get_data()
|
||||
|
||||
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures
|
||||
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures, knn, quantreg, arima
|
||||
|
||||
from pyFTS.models import pwfts, song, ifts
|
||||
|
||||
model = arima.ARIMA("", order=(1,0,0))
|
||||
model.fit(dataset[:800])
|
||||
tmp = model.predict(dataset[800:1000], type='distribution')
|
||||
for tmp2 in tmp:
|
||||
print(tmp2)
|
||||
|
||||
|
||||
|
||||
from pyFTS.models import pwfts, song
|
||||
'''
|
||||
from pyFTS.partitioners import Grid, Util as pUtil
|
||||
partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff)
|
||||
@ -31,28 +40,39 @@ print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead
|
||||
# print(tmp2)
|
||||
'''
|
||||
|
||||
#'''
|
||||
'''
|
||||
|
||||
from pyFTS.benchmarks import arima, naive, quantreg
|
||||
|
||||
bchmk.sliding_window_benchmarks(dataset[:1000], 1000, train=0.8, inc=0.2,
|
||||
#methods=[song.ConventionalFTS], #[pwfts.ProbabilisticWeightedFTS],
|
||||
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
|
||||
methods=[ifts.IntervalFTS], #[pwfts.ProbabilisticWeightedFTS],
|
||||
benchmark_models=True,
|
||||
benchmark_methods=[naive.Naive, arima.ARIMA,arima.ARIMA], #arima.ARIMA,arima.ARIMA],
|
||||
#benchmark_methods=[arima.ARIMA],
|
||||
benchmark_methods_parameters=[1,(1,0,0),(1,0,1)], #(2,0,1),(2,0,2)],
|
||||
#benchmark_methods_parameters=[(1,0,0)],
|
||||
benchmark_methods=[arima.ARIMA for k in range(8)]
|
||||
+ [quantreg.QuantileRegression for k in range(4)],
|
||||
benchmark_methods_parameters=[
|
||||
{'order': (1, 0, 0), 'alpha': .05},
|
||||
{'order': (1, 0, 0), 'alpha': .25},
|
||||
{'order': (1, 0, 1), 'alpha': .05},
|
||||
{'order': (1, 0, 1), 'alpha': .25},
|
||||
{'order': (2, 0, 1), 'alpha': .05},
|
||||
{'order': (2, 0, 1), 'alpha': .25},
|
||||
{'order': (2, 0, 2), 'alpha': .05},
|
||||
{'order': (2, 0, 2), 'alpha': .25},
|
||||
{'order': 1, 'alpha': .05},
|
||||
{'order': 1, 'alpha': .25},
|
||||
{'order': 2, 'alpha': .05},
|
||||
{'order': 2, 'alpha': .25},
|
||||
],
|
||||
transformations=[None, tdiff],
|
||||
orders=[1, 2, 3],
|
||||
partitions=[35], #np.arange(10, 100, 5),
|
||||
progress=True, type='point',
|
||||
orders=[1], #2, 3],
|
||||
partitions=[3], #np.arange(3, 25, 2),
|
||||
progress=False, type='interval',
|
||||
#steps_ahead=[1,4,7,10], #steps_ahead=[1]
|
||||
#distributed=True, nodes=['192.168.0.110', '192.168.0.105','192.168.0.106'],
|
||||
file="benchmarks.tmp", dataset="TAIEX", tag="comparisons")
|
||||
#save=True, file="tmp.db")
|
||||
distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
|
||||
file="benchmarks.db", dataset="TAIEX", tag="comparisons")
|
||||
|
||||
|
||||
#'''
|
||||
'''
|
||||
'''
|
||||
dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';')
|
||||
print(bUtil.analytic_tabular_dataframe(dat))
|
||||
@ -111,4 +131,4 @@ tmp[20].plot(ax[2][2], title='t=200')
|
||||
f, ax = plt.subplots(1, 1, figsize=[20,15])
|
||||
bchmk.plot_distribution(ax, 'blue', tmp, f, 0, reference_data=dataset[train_split:train_split+200])
|
||||
|
||||
'''
|
||||
'''
|
||||
|
Loading…
Reference in New Issue
Block a user