Minor bugfixes

This commit is contained in:
Petrônio Cândido 2018-04-24 15:47:28 -03:00
parent 33dbeb8965
commit 34995b72f8
8 changed files with 83 additions and 48 deletions

View File

@ -294,19 +294,24 @@ def get_point_statistics(data, model, **kwargs):
if indexer is not None: if indexer is not None:
ndata = np.array(indexer.get_data(data)) ndata = np.array(indexer.get_data(data))
else: else:
ndata = np.array(data[model.order:]) ndata = np.array(data)
ret = list() ret = list()
if steps_ahead == 1: if steps_ahead == 1:
forecasts = model.predict(data, **kwargs) forecasts = model.predict(ndata, **kwargs)
if not isinstance(forecasts, (list, np.ndarray)):
forecasts = [forecasts]
if model.has_seasonality: if model.has_seasonality:
nforecasts = np.array(forecasts) nforecasts = np.array(forecasts)
else: else:
nforecasts = np.array(forecasts[:-1]) nforecasts = np.array(forecasts[:-1])
ret.append(np.round(rmse(ndata, nforecasts), 2))
ret.append(np.round(smape(ndata, nforecasts), 2)) ret.append(np.round(rmse(ndata[model.order:], nforecasts), 2))
ret.append(np.round(UStatistic(ndata, nforecasts), 2)) ret.append(np.round(smape(ndata[model.order:], nforecasts), 2))
ret.append(np.round(UStatistic(ndata[model.order:], nforecasts), 2))
else: else:
steps_ahead_sampler = kwargs.get('steps_ahead_sampler', 1) steps_ahead_sampler = kwargs.get('steps_ahead_sampler', 1)
nforecasts = [] nforecasts = []

View File

@ -47,14 +47,17 @@ def insert_benchmark(data, conn):
def process_common_data(dataset, tag, type, job): def process_common_data(dataset, tag, type, job):
model = job["obj"] model = job["obj"]
if not model.benchmark_only: if model.benchmark_only:
data = [dataset, tag, type, model.shortname,
str(model.transformations[0]) if len(model.transformations) > 0 else None,
model.order, None, None,
None, job['steps'], job['method']]
else:
data = [dataset, tag, type, model.shortname, data = [dataset, tag, type, model.shortname,
str(model.partitioner.transformation) if model.partitioner.transformation is not None else None, str(model.partitioner.transformation) if model.partitioner.transformation is not None else None,
model.order, model.partitioner.name, str(model.partitioner.partitions), model.order, model.partitioner.name, str(model.partitioner.partitions),
len(model), job['steps'], job['method']] len(model), job['steps'], job['method']]
else:
data = [tag, type, model.shortname, None, model.order, None, None,
None, job['steps'], job['method']]
return data return data

View File

@ -31,20 +31,27 @@ class ARIMA(fts.FTS):
self.min_order = 1 self.min_order = 1
self.alpha = kwargs.get("alpha", 0.05) self.alpha = kwargs.get("alpha", 0.05)
self.shortname += str(self.alpha) self.shortname += str(self.alpha)
self._decompose_order(self.order)
def train(self, data, sets, order, parameters=None): def _decompose_order(self, order):
self.p = order[0] if isinstance(order, (tuple, set, list)):
self.d = order[1] self.p = order[0]
self.q = order[2] self.d = order[1]
self.order = self.p + self.q self.q = order[2]
self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha) self.order = self.p + self.q + (self.q - 1 if self.q > 0 else 0)
self.d = len(self.transformations)
self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ") - " + str(self.alpha)
def train(self, data, **kwargs):
if kwargs.get('order', None) is not None:
order = kwargs.get('order', (1,0,0))
self._decompose_order(order)
if self.indexer is not None: if self.indexer is not None:
data = self.indexer.get_data(data) data = self.indexer.get_data(data)
data = self.apply_transformations(data, updateUoD=True) #data = self.apply_transformations(data, updateUoD=True)
old_fit = self.model_fit
try: try:
self.model = stats_arima(data, order=(self.p, self.d, self.q)) self.model = stats_arima(data, order=(self.p, self.d, self.q))
self.model_fit = self.model.fit(disp=0) self.model_fit = self.model.fit(disp=0)
@ -58,34 +65,32 @@ class ARIMA(fts.FTS):
def ma(self, data): def ma(self, data):
return data.dot(self.model_fit.maparams) return data.dot(self.model_fit.maparams)
def forecast(self, data, **kwargs): def forecast(self, ndata, **kwargs):
if self.model_fit is None: if self.model_fit is None:
return np.nan return np.nan
if self.indexer is not None and isinstance(data, pd.DataFrame): if self.indexer is not None and isinstance(ndata, pd.DataFrame):
data = self.indexer.get_data(data) data = self.indexer.get_data(ndata)
ndata = np.array(self.apply_transformations(data)) ndata = np.array(ndata)
l = len(ndata) l = len(ndata)
ret = [] ret = []
if self.d == 0: ar = np.array([self.ar(ndata[k - self.p: k]) for k in np.arange(self.p, l+1)]) #+1 to forecast one step ahead given all available lags
ar = np.array([self.ar(ndata[k - self.p: k]) for k in np.arange(self.p, l+1)]) #+1 to forecast one step ahead given all available lags
else:
ar = np.array([ndata[k] + self.ar(ndata[k - self.p: k]) for k in np.arange(self.p, l+1)])
if self.q > 0: if self.q > 0:
residuals = np.array([ndata[k] - ar[k - self.p] for k in np.arange(self.p, l)]) residuals = ndata[self.p-1:] - ar
ma = np.array([self.ma(residuals[k - self.q: k]) for k in np.arange(self.q, len(residuals)+1)]) ma = np.array([self.ma(residuals[k - self.q: k]) for k in np.arange(self.q, len(residuals) + 1)])
ret = ar[self.q:] + ma ret = ar[self.q - 1:] + ma
ret = ret[self.q:]
else: else:
ret = ar ret = ar
ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]]) #ret = self.apply_inverse_transformations(ret, params=[data[self.order - 1:]]) nforecasts = np.array(forecasts)
return ret return ret
@ -121,7 +126,7 @@ class ARIMA(fts.FTS):
return ret return ret
def forecast_ahead_interval(self, data, steps, **kwargs): def forecast_ahead_interval(self, ndata, steps, **kwargs):
if self.model_fit is None: if self.model_fit is None:
return np.nan return np.nan
@ -129,7 +134,7 @@ class ARIMA(fts.FTS):
sigma = np.sqrt(self.model_fit.sigma2) sigma = np.sqrt(self.model_fit.sigma2)
ndata = np.array(self.apply_transformations(data)) #ndata = np.array(self.apply_transformations(data))
l = len(ndata) l = len(ndata)
@ -147,13 +152,10 @@ class ARIMA(fts.FTS):
ret.append(tmp) ret.append(tmp)
ret = self.apply_inverse_transformations(ret, params=[[data[-1] for a in np.arange(0, steps)]], interval=True) #ret = self.apply_inverse_transformations(ret, params=[[data[-1] for a in np.arange(0, steps)]], interval=True)
return ret return ret
def empty_grid(self, resolution):
return self.get_empty_grid(-(self.original_max*2), self.original_max*2, resolution)
def forecast_distribution(self, data, **kwargs): def forecast_distribution(self, data, **kwargs):
if self.indexer is not None and isinstance(data, pd.DataFrame): if self.indexer is not None and isinstance(data, pd.DataFrame):

View File

@ -151,7 +151,7 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
elif type == 'distribution': elif type == 'distribution':
benchmark_methods = get_benchmark_probabilistic_methods() benchmark_methods = get_benchmark_probabilistic_methods()
if benchmark_models is not None: if isinstance(benchmark_models, list) :
pool.extend(benchmark_models) pool.extend(benchmark_models)
elif benchmark_methods is not None: elif benchmark_methods is not None:
for count, model in enumerate(benchmark_methods, start=0): for count, model in enumerate(benchmark_methods, start=0):
@ -342,6 +342,7 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwarg
if mfts.benchmark_only: if mfts.benchmark_only:
_key = mfts.shortname + str(mfts.order if mfts.order is not None else "") _key = mfts.shortname + str(mfts.order if mfts.order is not None else "")
mfts.append_transformation(partitioner.transformation)
else: else:
pttr = str(partitioner.__module__).split('.')[-1] pttr = str(partitioner.__module__).split('.')[-1]
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
@ -356,6 +357,7 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwarg
_end = time.time() _end = time.time()
times = _end - _start times = _end - _start
_start = time.time() _start = time.time()
_rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts, **kwargs) _rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts, **kwargs)
_end = time.time() _end = time.time()

View File

@ -147,10 +147,18 @@ def fuzzyfy_series(data, fuzzySets, method='maximum'):
return fts return fts
def grant_bounds(data, sets, ordered_sets):
if data < sets[ordered_sets[0]].lower:
return sets[ordered_sets[0]].lower
elif data > sets[ordered_sets[-1]].upper:
return sets[ordered_sets[-1]].upper
else:
return data
def check_bounds(data, sets, ordered_sets): def check_bounds(data, sets, ordered_sets):
if data < sets[ordered_sets[0]].get_lower(): if data < sets[ordered_sets[0]].lower:
return sets[ordered_sets[0]] return sets[ordered_sets[0]]
elif data > sets[ordered_sets[-1]].get_upper(): elif data > sets[ordered_sets[-1]].upper:
return sets[ordered_sets[-1]] return sets[ordered_sets[-1]]

View File

@ -46,6 +46,7 @@ class HighOrderFTS(fts.FTS):
self.order = kwargs.get('order',1) self.order = kwargs.get('order',1)
self.setsDict = {} self.setsDict = {}
self.is_high_order = True self.is_high_order = True
self.min_order = 2
def generate_lhs_flrg(self, sample): def generate_lhs_flrg(self, sample):
lags = {} lags = {}

View File

@ -20,7 +20,11 @@ class HighOrderFTS(fts.FTS):
def forecast(self, ndata, **kwargs): def forecast(self, ndata, **kwargs):
ordered_sets = FuzzySet.set_ordered(self.sets) if self.sets == None:
self.sets = self.partitioner.sets
ordered_sets = self.partitioner.ordered_sets
else:
ordered_sets = FuzzySet.set_ordered(self.sets)
l = len(self.sets) l = len(self.sets)
@ -35,9 +39,9 @@ class HighOrderFTS(fts.FTS):
for ix in range(l): for ix in range(l):
s = ordered_sets[ix] s = ordered_sets[ix]
cn[ix] = self.sets[s].membership(ndata[t]) cn[ix] = self.sets[s].membership( FuzzySet.grant_bounds(ndata[t], self.sets, ordered_sets))
for w in range(self.order - 1): for w in range(self.order - 1):
ow[w, ix] = self.sets[s].membership(ndata[t - w]) ow[w, ix] = self.sets[s].membership(FuzzySet.grant_bounds(ndata[t - w], self.sets, ordered_sets))
rn[w, ix] = ow[w, ix] * cn[ix] rn[w, ix] = ow[w, ix] * cn[ix]
ft[ix] = max(ft[ix], rn[w, ix]) ft[ix] = max(ft[ix], rn[w, ix])
mft = max(ft) mft = max(ft)
@ -55,4 +59,7 @@ class HighOrderFTS(fts.FTS):
def train(self, data, **kwargs): def train(self, data, **kwargs):
if kwargs.get('sets', None) is not None: if kwargs.get('sets', None) is not None:
self.sets = kwargs.get('sets', None) self.sets = kwargs.get('sets', None)
self.order = kwargs.get('order', 1) else:
self.sets = self.partitioner.sets
self.order = kwargs.get('order', 2)

View File

@ -17,7 +17,7 @@ dataset = TAIEX.get_data()
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures
from pyFTS.models import pwfts from pyFTS.models import pwfts, song
''' '''
from pyFTS.partitioners import Grid, Util as pUtil from pyFTS.partitioners import Grid, Util as pUtil
partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff) partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff)
@ -32,12 +32,19 @@ print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead
''' '''
#''' #'''
from pyFTS.benchmarks import arima, naive, quantreg
bchmk.sliding_window_benchmarks(dataset[:1000], 1000, train=0.8, inc=0.2, bchmk.sliding_window_benchmarks(dataset[:1000], 1000, train=0.8, inc=0.2,
#methods=[pwfts.ProbabilisticWeightedFTS], #methods=[song.ConventionalFTS], #[pwfts.ProbabilisticWeightedFTS],
benchmark_models=False, benchmark_models=True,
#transformations=[tdiff], benchmark_methods=[naive.Naive, arima.ARIMA,arima.ARIMA], #arima.ARIMA,arima.ARIMA],
orders=[1], #[1, 2, 3], #benchmark_methods=[arima.ARIMA],
partitions=[20], #np.arange(10, 100, 5), benchmark_methods_parameters=[1,(1,0,0),(1,0,1)], #(2,0,1),(2,0,2)],
#benchmark_methods_parameters=[(1,0,0)],
transformations=[None, tdiff],
orders=[1, 2, 3],
partitions=[35], #np.arange(10, 100, 5),
progress=True, type='point', progress=True, type='point',
#steps_ahead=[1,4,7,10], #steps_ahead=[1] #steps_ahead=[1,4,7,10], #steps_ahead=[1]
#distributed=True, nodes=['192.168.0.110', '192.168.0.105','192.168.0.106'], #distributed=True, nodes=['192.168.0.110', '192.168.0.105','192.168.0.106'],