Minor bugfixes

This commit is contained in:
Petrônio Cândido 2018-04-24 15:47:28 -03:00
parent 33dbeb8965
commit 34995b72f8
8 changed files with 83 additions and 48 deletions

View File

@ -294,19 +294,24 @@ def get_point_statistics(data, model, **kwargs):
if indexer is not None:
ndata = np.array(indexer.get_data(data))
else:
ndata = np.array(data[model.order:])
ndata = np.array(data)
ret = list()
if steps_ahead == 1:
forecasts = model.predict(data, **kwargs)
forecasts = model.predict(ndata, **kwargs)
if not isinstance(forecasts, (list, np.ndarray)):
forecasts = [forecasts]
if model.has_seasonality:
nforecasts = np.array(forecasts)
else:
nforecasts = np.array(forecasts[:-1])
ret.append(np.round(rmse(ndata, nforecasts), 2))
ret.append(np.round(smape(ndata, nforecasts), 2))
ret.append(np.round(UStatistic(ndata, nforecasts), 2))
ret.append(np.round(rmse(ndata[model.order:], nforecasts), 2))
ret.append(np.round(smape(ndata[model.order:], nforecasts), 2))
ret.append(np.round(UStatistic(ndata[model.order:], nforecasts), 2))
else:
steps_ahead_sampler = kwargs.get('steps_ahead_sampler', 1)
nforecasts = []

View File

@ -47,14 +47,17 @@ def insert_benchmark(data, conn):
def process_common_data(dataset, tag, type, job):
model = job["obj"]
if not model.benchmark_only:
if model.benchmark_only:
data = [dataset, tag, type, model.shortname,
str(model.transformations[0]) if len(model.transformations) > 0 else None,
model.order, None, None,
None, job['steps'], job['method']]
else:
data = [dataset, tag, type, model.shortname,
str(model.partitioner.transformation) if model.partitioner.transformation is not None else None,
model.order, model.partitioner.name, str(model.partitioner.partitions),
len(model), job['steps'], job['method']]
else:
data = [tag, type, model.shortname, None, model.order, None, None,
None, job['steps'], job['method']]
return data

View File

@ -31,20 +31,27 @@ class ARIMA(fts.FTS):
self.min_order = 1
self.alpha = kwargs.get("alpha", 0.05)
self.shortname += str(self.alpha)
self._decompose_order(self.order)
def train(self, data, sets, order, parameters=None):
def _decompose_order(self, order):
if isinstance(order, (tuple, set, list)):
self.p = order[0]
self.d = order[1]
self.q = order[2]
self.order = self.p + self.q
self.order = self.p + self.q + (self.q - 1 if self.q > 0 else 0)
self.d = len(self.transformations)
self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha)
def train(self, data, **kwargs):
if kwargs.get('order', None) is not None:
order = kwargs.get('order', (1,0,0))
self._decompose_order(order)
if self.indexer is not None:
data = self.indexer.get_data(data)
data = self.apply_transformations(data, updateUoD=True)
#data = self.apply_transformations(data, updateUoD=True)
old_fit = self.model_fit
try:
self.model = stats_arima(data, order=(self.p, self.d, self.q))
self.model_fit = self.model.fit(disp=0)
@ -58,34 +65,32 @@ class ARIMA(fts.FTS):
def ma(self, data):
return data.dot(self.model_fit.maparams)
def forecast(self, data, **kwargs):
def forecast(self, ndata, **kwargs):
if self.model_fit is None:
return np.nan
if self.indexer is not None and isinstance(data, pd.DataFrame):
data = self.indexer.get_data(data)
if self.indexer is not None and isinstance(ndata, pd.DataFrame):
data = self.indexer.get_data(ndata)
ndata = np.array(self.apply_transformations(data))
ndata = np.array(ndata)
l = len(ndata)
ret = []
if self.d == 0:
ar = np.array([self.ar(ndata[k - self.p: k]) for k in np.arange(self.p, l+1)]) #+1 to forecast one step ahead given all available lags
else:
ar = np.array([ndata[k] + self.ar(ndata[k - self.p: k]) for k in np.arange(self.p, l+1)])
if self.q > 0:
residuals = np.array([ndata[k] - ar[k - self.p] for k in np.arange(self.p, l)])
residuals = ndata[self.p-1:] - ar
ma = np.array([self.ma(residuals[k - self.q: k]) for k in np.arange(self.q, len(residuals)+1)])
ma = np.array([self.ma(residuals[k - self.q: k]) for k in np.arange(self.q, len(residuals) + 1)])
ret = ar[self.q:] + ma
ret = ar[self.q - 1:] + ma
ret = ret[self.q:]
else:
ret = ar
ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]])
#ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]]) nforecasts = np.array(forecasts)
return ret
@ -121,7 +126,7 @@ class ARIMA(fts.FTS):
return ret
def forecast_ahead_interval(self, data, steps, **kwargs):
def forecast_ahead_interval(self, ndata, steps, **kwargs):
if self.model_fit is None:
return np.nan
@ -129,7 +134,7 @@ class ARIMA(fts.FTS):
sigma = np.sqrt(self.model_fit.sigma2)
ndata = np.array(self.apply_transformations(data))
#ndata = np.array(self.apply_transformations(data))
l = len(ndata)
@ -147,13 +152,10 @@ class ARIMA(fts.FTS):
ret.append(tmp)
ret = self.apply_inverse_transformations(ret, params=[[data[-1] for a in np.arange(0, steps)]], interval=True)
#ret = self.apply_inverse_transformations(ret, params=[[data[-1] for a in np.arange(0, steps)]], interval=True)
return ret
def empty_grid(self, resolution):
return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution)
def forecast_distribution(self, data, **kwargs):
if self.indexer is not None and isinstance(data, pd.DataFrame):

View File

@ -151,7 +151,7 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
elif type == 'distribution':
benchmark_methods = get_benchmark_probabilistic_methods()
if benchmark_models is not None:
if isinstance(benchmark_models, list) :
pool.extend(benchmark_models)
elif benchmark_methods is not None:
for count, model in enumerate(benchmark_methods, start=0):
@ -342,6 +342,7 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwarg
if mfts.benchmark_only:
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "")
mfts.append_transformation(partitioner.transformation)
else:
pttr = str(partitioner.__module__).split('.')[-1]
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
@ -356,6 +357,7 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwarg
_end = time.time()
times = _end - _start
_start = time.time()
_rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts, **kwargs)
_end = time.time()

View File

@ -147,10 +147,18 @@ def fuzzyfy_series(data, fuzzySets, method='maximum'):
return fts
def grant_bounds(data, sets, ordered_sets):
if data < sets[ordered_sets[0]].lower:
return sets[ordered_sets[0]].lower
elif data > sets[ordered_sets[-1]].upper:
return sets[ordered_sets[-1]].upper
else:
return data
def check_bounds(data, sets, ordered_sets):
if data < sets[ordered_sets[0]].get_lower():
if data < sets[ordered_sets[0]].lower:
return sets[ordered_sets[0]]
elif data > sets[ordered_sets[-1]].get_upper():
elif data > sets[ordered_sets[-1]].upper:
return sets[ordered_sets[-1]]

View File

@ -46,6 +46,7 @@ class HighOrderFTS(fts.FTS):
self.order = kwargs.get('order',1)
self.setsDict = {}
self.is_high_order = True
self.min_order = 2
def generate_lhs_flrg(self, sample):
lags = {}

View File

@ -20,6 +20,10 @@ class HighOrderFTS(fts.FTS):
def forecast(self, ndata, **kwargs):
if self.sets == None:
self.sets = self.partitioner.sets
ordered_sets = self.partitioner.ordered_sets
else:
ordered_sets = FuzzySet.set_ordered(self.sets)
l = len(self.sets)
@ -35,9 +39,9 @@ class HighOrderFTS(fts.FTS):
for ix in range(l):
s = ordered_sets[ix]
cn[ix] = self.sets[s].membership(ndata[t])
cn[ix] = self.sets[s].membership( FuzzySet.grant_bounds(ndata[t], self.sets, ordered_sets))
for w in range(self.order - 1):
ow[w, ix] = self.sets[s].membership(ndata[t - w])
ow[w, ix] = self.sets[s].membership(FuzzySet.grant_bounds(ndata[t - w], self.sets, ordered_sets))
rn[w, ix] = ow[w, ix] * cn[ix]
ft[ix] = max(ft[ix], rn[w, ix])
mft = max(ft)
@ -55,4 +59,7 @@ class HighOrderFTS(fts.FTS):
def train(self, data, **kwargs):
if kwargs.get('sets', None) is not None:
self.sets = kwargs.get('sets', None)
self.order = kwargs.get('order', 1)
else:
self.sets = self.partitioner.sets
self.order = kwargs.get('order', 2)

View File

@ -17,7 +17,7 @@ dataset = TAIEX.get_data()
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures
from pyFTS.models import pwfts
from pyFTS.models import pwfts, song
'''
from pyFTS.partitioners import Grid, Util as pUtil
partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff)
@ -32,12 +32,19 @@ print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead
'''
#'''
from pyFTS.benchmarks import arima, naive, quantreg
bchmk.sliding_window_benchmarks(dataset[:1000], 1000, train=0.8, inc=0.2,
#methods=[pwfts.ProbabilisticWeightedFTS],
benchmark_models=False,
#transformations=[tdiff],
orders=[1], #[1, 2, 3],
partitions=[20], #np.arange(10, 100, 5),
#methods=[song.ConventionalFTS], #[pwfts.ProbabilisticWeightedFTS],
benchmark_models=True,
benchmark_methods=[naive.Naive, arima.ARIMA,arima.ARIMA], #arima.ARIMA,arima.ARIMA],
#benchmark_methods=[arima.ARIMA],
benchmark_methods_parameters=[1,(1,0,0),(1,0,1)], #(2,0,1),(2,0,2)],
#benchmark_methods_parameters=[(1,0,0)],
transformations=[None, tdiff],
orders=[1, 2, 3],
partitions=[35], #np.arange(10, 100, 5),
progress=True, type='point',
#steps_ahead=[1,4,7,10], #steps_ahead=[1]
#distributed=True, nodes=['192.168.0.110', '192.168.0.105','192.168.0.106'],