ARIMA façade for benchmarks; Sliding Window benchmarks; small bugfixes and optimizations

This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-03-21 23:17:06 -03:00
parent 5c8c80cd8d
commit 5a59d91816
8 changed files with 235 additions and 103 deletions

View File

@ -117,7 +117,7 @@ def pmf_to_cdf(density):
tmp = [] tmp = []
prev = 0 prev = 0
for col in density.columns: for col in density.columns:
prev += density[col][row] prev += density[col][row] if not np.isnan(density[col][row]) else 0
tmp.append( prev ) tmp.append( prev )
ret.append(tmp) ret.append(tmp)
df = pd.DataFrame(ret, columns=density.columns) df = pd.DataFrame(ret, columns=density.columns)

View File

@ -43,6 +43,8 @@ class ARIMA(fts.FTS):
self.trained_data = data #.tolist() self.trained_data = data #.tolist()
def forecast(self, data): def forecast(self, data):
if self.model_fit is None:
return np.nan
ret = [] ret = []
for t in data: for t in data:
output = self.model_fit.forecast() output = self.model_fit.forecast()

View File

@ -10,8 +10,8 @@ import matplotlib.cm as cmx
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D from mpl_toolkits.mplot3d import Axes3D
# from sklearn.cross_validation import KFold # from sklearn.cross_validation import KFold
from pyFTS.partitioners import partitioner, Grid, Huarng, Entropy, FCM
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, ProbabilityDistribution from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, ProbabilityDistribution
from pyFTS.partitioners import Grid
from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util
from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pwfts, ifts from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pwfts, ifts
@ -62,32 +62,41 @@ def external_point_sliding_window(models, parameters, data, windowsize,train=0.8
u[_key] = [] u[_key] = []
times[_key] = [] times[_key] = []
_tdiff = _end - _start
times[_key].append(_end - _start) try:
_start = time.time() _start = time.time()
_rmse, _smape, _u = get_point_statistics(test, model, None) _rmse, _smape, _u = get_point_statistics(test, model, None)
_end = time.time() _end = time.time()
rmse[_key].append(_rmse) rmse[_key].append(_rmse)
smape[_key].append(_smape) smape[_key].append(_smape)
u[_key].append(_u) u[_key].append(_u)
times[_key].append(_end - _start) _tdiff += _end - _start
times[_key].append(_tdiff)
if dump: print(_rmse, _smape, _u) if dump: print(_rmse, _smape, _u, _tdiff)
except:
rmse[_key].append(np.nan)
smape[_key].append(np.nan)
u[_key].append(np.nan)
times[_key].append(np.nan)
ret = [] ret = []
for k in sorted(objs.keys()): for k in sorted(objs.keys()):
try:
mod = [] mod = []
mfts = objs[k] mfts = objs[k]
mod.append(mfts.shortname) mod.append(mfts.shortname)
mod.append(round(np.nanmean(rmse[k]), 2)) mod.append(np.round(np.nanmean(rmse[k]), 2))
mod.append(round(np.nanstd(rmse[k]), 2)) mod.append(np.round(np.nanstd(rmse[k]), 2))
mod.append(round(np.nanmean(smape[k]), 2)) mod.append(np.round(np.nanmean(smape[k]), 2))
mod.append(round(np.nanstd(smape[k]), 2)) mod.append(np.round(np.nanstd(smape[k]), 2))
mod.append(round(np.nanmean(u[k]), 2)) mod.append(np.round(np.nanmean(u[k]), 2))
mod.append(round(np.nanstd(u[k]), 2)) mod.append(np.round(np.nanstd(u[k]), 2))
mod.append(round(np.nanmean(times[k]), 4)) mod.append(np.round(np.nanmean(times[k]), 4))
ret.append(mod) ret.append(mod)
except Exception as ex:
print("Erro ao salvar ",k)
print("Exceção ", ex)
columns = ["Model", "RMSEAVG", "RMSESTD", "SMAPEAVG", "SMAPESTD", "UAVG", "USTD", "TIMEAVG"] columns = ["Model", "RMSEAVG", "RMSESTD", "SMAPEAVG", "SMAPESTD", "UAVG", "USTD", "TIMEAVG"]
@ -154,6 +163,9 @@ def point_sliding_window(data, windowsize, train=0.8,models=None,partitioners=[G
smape[_key].append(_smape) smape[_key].append(_smape)
u[_key].append(_u) u[_key].append(_u)
times[_key].append(_end - _start) times[_key].append(_end - _start)
if dump: print(_rmse, _smape, _u)
else: else:
for order in np.arange(1, max_order + 1): for order in np.arange(1, max_order + 1):
if order >= mfts.minOrder: if order >= mfts.minOrder:
@ -176,6 +188,7 @@ def point_sliding_window(data, windowsize, train=0.8,models=None,partitioners=[G
if transformation is not None: if transformation is not None:
mfts.appendTransformation(transformation) mfts.appendTransformation(transformation)
try:
_start = time.time() _start = time.time()
mfts.train(train, data_train_fs.sets, order=order) mfts.train(train, data_train_fs.sets, order=order)
_end = time.time() _end = time.time()
@ -188,25 +201,39 @@ def point_sliding_window(data, windowsize, train=0.8,models=None,partitioners=[G
smape[_key].append(_smape) smape[_key].append(_smape)
u[_key].append(_u) u[_key].append(_u)
times[_key].append(_end - _start) times[_key].append(_end - _start)
if dump: print(_rmse, _smape, _u)
except Exception as e:
print(e)
rmse[_key].append(np.nan)
smape[_key].append(np.nan)
u[_key].append(np.nan)
times[_key].append(np.nan)
ret = [] ret = []
for k in sorted(objs.keys()): for k in sorted(objs.keys()):
try:
mod = [] mod = []
mfts = objs[k] tmp = objs[k]
mod.append(mfts.shortname) mod.append(tmp.shortname)
mod.append(mfts.order ) mod.append(tmp.order )
mod.append(mfts.partitioner.name) mod.append(tmp.partitioner.name)
mod.append(mfts.partitioner.partitions) mod.append(tmp.partitioner.partitions)
mod.append(round(np.nanmean(rmse[k]),2)) mod.append(np.round(np.nanmean(rmse[k]),2))
mod.append(round(np.nanstd(rmse[k]), 2)) mod.append(np.round(np.nanstd(rmse[k]), 2))
mod.append(round(np.nanmean(smape[k]), 2)) mod.append(np.round(np.nanmean(smape[k]), 2))
mod.append(round(np.nanstd(smape[k]), 2)) mod.append(np.round(np.nanstd(smape[k]), 2))
mod.append(round(np.nanmean(u[k]), 2)) mod.append(np.round(np.nanmean(u[k]), 2))
mod.append(round(np.nanstd(u[k]), 2)) mod.append(np.round(np.nanstd(u[k]), 2))
mod.append(len(mfts)) mod.append(np.round(np.nanmean(times[k]), 4))
mod.append(round(np.nanmean(times[k]),4)) mod.append(np.round(np.nanstd(times[k]), 4))
mod.append(len(tmp))
ret.append(mod) ret.append(mod)
except Exception as ex:
print("Erro ao salvar ",k)
print("Exceção ", ex)
columns = ["Model","Order","Scheme","Partitions","RMSEAVG","RMSESTD","SMAPEAVG","SMAPESTD","UAVG","USTD","SIZE","TIMEAVG"] columns = ["Model","Order","Scheme","Partitions","RMSEAVG","RMSESTD","SMAPEAVG","SMAPESTD","UAVG","USTD","TIMEAVG","TIMESTD","SIZE"]
dat = pd.DataFrame(ret,columns=columns) dat = pd.DataFrame(ret,columns=columns)
@ -355,12 +382,13 @@ def interval_sliding_window(data, windowsize, train=0.8,models=None,partitioners
sharpness = {} sharpness = {}
resolution = {} resolution = {}
coverage = {} coverage = {}
times = {}
for ct, train,test in Util.sliding_window(data, windowsize, train): for ct, training,test in Util.sliding_window(data, windowsize, train):
for partition in partitions: for partition in partitions:
for partitioner in partitioners: for partitioner in partitioners:
pttr = str(partitioner.__module__).split('.')[-1] pttr = str(partitioner.__module__).split('.')[-1]
data_train_fs = partitioner(train, partition, transformation=transformation) data_train_fs = partitioner(training, partition, transformation=transformation)
for count, model in enumerate(models, start=0): for count, model in enumerate(models, start=0):
@ -378,16 +406,24 @@ def interval_sliding_window(data, windowsize, train=0.8,models=None,partitioners
sharpness[_key] = [] sharpness[_key] = []
resolution[_key] = [] resolution[_key] = []
coverage[_key] = [] coverage[_key] = []
times[_key] = []
if transformation is not None: if transformation is not None:
mfts.appendTransformation(transformation) mfts.appendTransformation(transformation)
mfts.train(train, data_train_fs.sets) _start = time.time()
mfts.train(training, data_train_fs.sets)
_end = time.time()
_tdiff = _end - _start
_start = time.time()
_sharp, _res, _cov = get_interval_statistics(test, mfts) _sharp, _res, _cov = get_interval_statistics(test, mfts)
_end = time.time()
_tdiff += _end - _start
sharpness[_key].append(_sharp) sharpness[_key].append(_sharp)
resolution[_key].append(_res) resolution[_key].append(_res)
coverage[_key].append(_cov) coverage[_key].append(_cov)
times[_key].append(_tdiff)
else: else:
for order in np.arange(1, max_order + 1): for order in np.arange(1, max_order + 1):
@ -404,16 +440,25 @@ def interval_sliding_window(data, windowsize, train=0.8,models=None,partitioners
sharpness[_key] = [] sharpness[_key] = []
resolution[_key] = [] resolution[_key] = []
coverage[_key] = [] coverage[_key] = []
times[_key] = []
if transformation is not None: if transformation is not None:
mfts.appendTransformation(transformation) mfts.appendTransformation(transformation)
mfts.train(train, data_train_fs.sets, order=order) _start = time.time()
mfts.train(training, data_train_fs.sets, order=order)
_end = time.time()
_tdiff = _end - _start
_start = time.time()
_sharp, _res, _cov = get_interval_statistics(test, mfts) _sharp, _res, _cov = get_interval_statistics(test, mfts)
_end = time.time()
_tdiff += _end - _start
sharpness[_key].append(_sharp) sharpness[_key].append(_sharp)
resolution[_key].append(_res) resolution[_key].append(_res)
coverage[_key].append(_cov) coverage[_key].append(_cov)
times[_key].append(_tdiff)
ret = [] ret = []
for k in sorted(objs.keys()): for k in sorted(objs.keys()):
@ -429,10 +474,12 @@ def interval_sliding_window(data, windowsize, train=0.8,models=None,partitioners
mod.append(round(np.nanstd(resolution[k]), 2)) mod.append(round(np.nanstd(resolution[k]), 2))
mod.append(round(np.nanmean(coverage[k]), 2)) mod.append(round(np.nanmean(coverage[k]), 2))
mod.append(round(np.nanstd(coverage[k]), 2)) mod.append(round(np.nanstd(coverage[k]), 2))
mod.append(round(np.nanmean(times[k]), 2))
mod.append(round(np.nanstd(times[k]), 2))
mod.append(len(mfts)) mod.append(len(mfts))
ret.append(mod) ret.append(mod)
columns = ["Model","Order","Scheme","Partitions","SHARPAVG","SHARPSTD","RESAVG","RESSTD","COVAVG","COVSTD","SIZE"] columns = ["Model","Order","Scheme","Partitions","SHARPAVG","SHARPSTD","RESAVG","RESSTD","COVAVG","COVSTD","TIMEAVG","TIMESTD","SIZE"]
dat = pd.DataFrame(ret,columns=columns) dat = pd.DataFrame(ret,columns=columns)
@ -566,7 +613,7 @@ def plot_probability_distributions(pmfs, lcolors, tam=[15, 7]):
ax.legend(handles0, labels0) ax.legend(handles0, labels0)
def ahead_sliding_window(data, windowsize, train=0.9,models=None, resolution = None, partitioners=[Grid.GridPartitioner], def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution = None, partitioners=[Grid.GridPartitioner],
partitions=[10], max_order=3,transformation=None,indexer=None,dump=False, partitions=[10], max_order=3,transformation=None,indexer=None,dump=False,
save=False, file=None): save=False, file=None):
if models is None: if models is None:
@ -576,8 +623,8 @@ def ahead_sliding_window(data, windowsize, train=0.9,models=None, resolution = N
lcolors = {} lcolors = {}
crps_interval = {} crps_interval = {}
crps_distr = {} crps_distr = {}
times1 = {}
steps = int(round(windowsize*(1.0-train),0)) times2 = {}
for ct, train,test in Util.sliding_window(data, windowsize, train): for ct, train,test in Util.sliding_window(data, windowsize, train):
for partition in partitions: for partition in partitions:
@ -600,15 +647,26 @@ def ahead_sliding_window(data, windowsize, train=0.9,models=None, resolution = N
lcolors[_key] = colors[count % ncol] lcolors[_key] = colors[count % ncol]
crps_interval[_key] = [] crps_interval[_key] = []
crps_distr[_key] = [] crps_distr[_key] = []
times1[_key] = []
times2[_key] = []
if transformation is not None: if transformation is not None:
mfts.appendTransformation(transformation) mfts.appendTransformation(transformation)
_start = time.time()
mfts.train(train, data_train_fs.sets) mfts.train(train, data_train_fs.sets)
_end = time.time()
_tdiff = _end - _start
_crps1, _crps2, _t1, _t2 = get_distribution_statistics(test,mfts,steps=steps,resolution=resolution)
_crps1, _crps2 = get_distribution_statistics(test,mfts,steps=steps,resolution=resolution)
crps_interval[_key].append(_crps1) crps_interval[_key].append(_crps1)
crps_distr[_key].append(_crps2) crps_distr[_key].append(_crps2)
times1[_key] = _tdiff + _t1
times2[_key] = _tdiff + _t2
if dump: print(_crps1, _crps2, _tdiff, _t1, _t2)
else: else:
for order in np.arange(1, max_order + 1): for order in np.arange(1, max_order + 1):
@ -624,32 +682,49 @@ def ahead_sliding_window(data, windowsize, train=0.9,models=None, resolution = N
lcolors[_key] = colors[count % ncol] lcolors[_key] = colors[count % ncol]
crps_interval[_key] = [] crps_interval[_key] = []
crps_distr[_key] = [] crps_distr[_key] = []
times1[_key] = []
times2[_key] = []
if transformation is not None: if transformation is not None:
mfts.appendTransformation(transformation) mfts.appendTransformation(transformation)
_start = time.time()
mfts.train(train, data_train_fs.sets, order=order) mfts.train(train, data_train_fs.sets, order=order)
_end = time.time()
_tdiff = _end - _start
_crps1, _crps2, _t1, _t2 = get_distribution_statistics(test, mfts, steps=steps,
resolution=resolution)
_crps1, _crps2 = get_distribution_statistics(test,mfts,steps=steps,resolution=resolution)
crps_interval[_key].append(_crps1) crps_interval[_key].append(_crps1)
crps_distr[_key].append(_crps2) crps_distr[_key].append(_crps2)
times1[_key] = _tdiff + _t1
times2[_key] = _tdiff + _t2
if dump: print(_crps1, _crps2, _tdiff, _t1, _t2)
ret = [] ret = []
for k in sorted(objs.keys()): for k in sorted(objs.keys()):
try:
mod = [] mod = []
mfts = objs[k] mfts = objs[k]
mod.append(mfts.shortname) mod.append(mfts.shortname)
mod.append(mfts.order ) mod.append(mfts.order )
mod.append(mfts.partitioner.name) mod.append(mfts.partitioner.name)
mod.append(mfts.partitioner.partitions) mod.append(mfts.partitioner.partitions)
mod.append(round(np.nanmean(crps_interval[k]),2)) mod.append(np.round(np.nanmean(crps_interval[k]),2))
mod.append(round(np.nanstd(crps_interval[k]), 2)) mod.append(np.round(np.nanstd(crps_interval[k]), 2))
mod.append(round(np.nanmean(crps_distr[k]), 2)) mod.append(np.round(np.nanmean(crps_distr[k]), 2))
mod.append(round(np.nanstd(crps_distr[k]), 2)) mod.append(np.round(np.nanstd(crps_distr[k]), 2))
mod.append(len(mfts)) mod.append(len(mfts))
mod.append(np.round(np.nanmean(times1[k]), 4))
mod.append(np.round(np.nanmean(times2[k]), 4))
ret.append(mod) ret.append(mod)
except Exception as e:
print ('Erro: %s' % e)
columns = ["Model","Order","Scheme","Partitions","CRPS1AVG","CRPS1STD","CRPS2AVG","CRPS2STD","SIZE"] columns = ["Model","Order","Scheme","Partitions","CRPS1AVG","CRPS1STD","CRPS2AVG","CRPS2STD","SIZE","TIME1AVG","TIME2AVG"]
dat = pd.DataFrame(ret,columns=columns) dat = pd.DataFrame(ret,columns=columns)
@ -694,20 +769,40 @@ def all_ahead_forecasters(data_train, data_test, partitions, start, steps, resol
print_distribution_statistics(data_test[start:], objs, steps, resolution) print_distribution_statistics(data_test[start:], objs, steps, resolution)
#plotComparedIntervalsAhead(data_test, objs, lcolors, distributions=, save=save, file=file, tam=tam, intervals=True) plotComparedIntervalsAhead(data_test, objs, lcolors, distributions=distributions, time_from=start, time_to=steps,
interpol=False, save=save, file=file, tam=tam, resolution=resolution, option=option)
def get_distribution_statistics(original, model, steps, resolution): def get_distribution_statistics(original, model, steps, resolution):
ret = list() ret = list()
try:
_s1 = time.time()
densities1 = model.forecastAheadDistribution(original,steps,resolution, parameters=3) densities1 = model.forecastAheadDistribution(original,steps,resolution, parameters=3)
_e1 = time.time()
ret.append(round(Measures.crps(original, densities1), 3))
ret.append(round(_e1 - _s1, 3))
except Exception as e:
print('Erro: ', e)
ret.append(np.nan)
ret.append(np.nan)
try:
_s2 = time.time()
densities2 = model.forecastAheadDistribution(original, steps, resolution, parameters=2) densities2 = model.forecastAheadDistribution(original, steps, resolution, parameters=2)
ret.append( round(Measures.crps(original, densities1), 3)) _e2 = time.time()
ret.append( round(Measures.crps(original, densities2), 3)) ret.append( round(Measures.crps(original, densities2), 3))
ret.append(round(_e2 - _s2, 3))
except:
ret.append(np.nan)
ret.append(np.nan)
return ret return ret
def print_distribution_statistics(original, models, steps, resolution): def print_distribution_statistics(original, models, steps, resolution):
ret = "Model & Order & Interval & Distribution \\\\ \n" ret = "Model & Order & Interval & Distribution \\\\ \n"
for fts in models: for fts in models:
_crps1, _crps2 = get_distribution_statistics(original, fts, steps, resolution) _crps1, _crps2, _t1, _t2 = get_distribution_statistics(original, fts, steps, resolution)
ret += fts.shortname + " & " ret += fts.shortname + " & "
ret += str(fts.order) + " & " ret += str(fts.order) + " & "
ret += str(_crps1) + " & " ret += str(_crps1) + " & "
@ -977,7 +1072,8 @@ def compareModelsTable(original, models_fo, models_ho):
def simpleSearch_RMSE(train, test, model, partitions, orders, save=False, file=None, tam=[10, 15], def simpleSearch_RMSE(train, test, model, partitions, orders, save=False, file=None, tam=[10, 15],
plotforecasts=False, elev=30, azim=144, intervals=False,parameters=None): plotforecasts=False, elev=30, azim=144, intervals=False,parameters=None,
partitioner=Grid.GridPartitioner,transformation=None,indexer=None):
_3d = len(orders) > 1 _3d = len(orders) > 1
ret = [] ret = []
errors = np.array([[0 for k in range(len(partitions))] for kk in range(len(orders))]) errors = np.array([[0 for k in range(len(partitions))] for kk in range(len(orders))])
@ -996,10 +1092,11 @@ def simpleSearch_RMSE(train, test, model, partitions, orders, save=False, file=N
for pc, p in enumerate(partitions, start=0): for pc, p in enumerate(partitions, start=0):
sets = Grid.GridPartitioner(train, p).sets sets = partitioner(train, p, transformation=transformation).sets
for oc, o in enumerate(orders, start=0): for oc, o in enumerate(orders, start=0):
fts = model("q = " + str(p) + " n = " + str(o)) fts = model("q = " + str(p) + " n = " + str(o))
fts.train(train, sets, o,parameters=parameters) fts.appendTransformation(transformation)
fts.train(train, sets, o, parameters=parameters)
if not intervals: if not intervals:
forecasted = fts.forecast(test) forecasted = fts.forecast(test)
if not fts.hasSeasonality: if not fts.hasSeasonality:
@ -1041,6 +1138,7 @@ def simpleSearch_RMSE(train, test, model, partitions, orders, save=False, file=N
ax0.plot(errors,partitions) ax0.plot(errors,partitions)
ret.append(best) ret.append(best)
ret.append(forecasted_best) ret.append(forecasted_best)
ret.append(min_rmse)
# plt.tight_layout() # plt.tight_layout()

View File

@ -206,8 +206,10 @@ class SortedCollection(object):
def inside(self, ge, le): def inside(self, ge, le):
g = bisect_right(self._keys, ge) g = bisect_right(self._keys, ge)
l = bisect_left(self._keys, le) l = bisect_left(self._keys, le)
if g != len(self) and l != len(self): if g != len(self) and l != len(self) and g != l:
return self._items[g : l] return self._items[g : l]
elif g != len(self) and l != len(self) and g == l:
return [ self._items[g] ]
elif g != len(self): elif g != len(self):
return self._items[g-1: l] return self._items[g-1: l]
elif l != len(self): elif l != len(self):

1
fts.py
View File

@ -66,6 +66,7 @@ class FTS(object):
return ret return ret
def appendTransformation(self, transformation): def appendTransformation(self, transformation):
if transformation is not None:
self.transformations.append(transformation) self.transformations.append(transformation)
def doTransformations(self,data,params=None,updateUoD=False): def doTransformations(self,data,params=None,updateUoD=False):

19
ifts.py
View File

@ -73,6 +73,16 @@ class IntervalFTS(hofts.HighOrderFTS):
mb = FuzzySet.fuzzyInstance(instance, self.sets) mb = FuzzySet.fuzzyInstance(instance, self.sets)
tmp = np.argwhere(mb) tmp = np.argwhere(mb)
idx = np.ravel(tmp) # flat the array idx = np.ravel(tmp) # flat the array
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
if instance <= self.sets[0].lower:
idx = [0]
elif instance >= self.sets[-1].upper:
idx = [len(self.sets) - 1]
else:
raise Exception(instance)
lags[count] = idx lags[count] = idx
count = count + 1 count = count + 1
@ -98,6 +108,15 @@ class IntervalFTS(hofts.HighOrderFTS):
mv = FuzzySet.fuzzyInstance(ndata[k], self.sets) mv = FuzzySet.fuzzyInstance(ndata[k], self.sets)
tmp = np.argwhere(mv) tmp = np.argwhere(mv)
idx = np.ravel(tmp) idx = np.ravel(tmp)
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
if ndata[k] <= self.sets[0].lower:
idx = [0]
elif ndata[k] >= self.sets[-1].upper:
idx = [len(self.sets) - 1]
else:
raise Exception(ndata[k])
for kk in idx: for kk in idx:
flrg = hofts.HighOrderFLRG(self.order) flrg = hofts.HighOrderFLRG(self.order)
flrg.appendLHS(self.sets[kk]) flrg.appendLHS(self.sets[kk])

View File

@ -327,12 +327,13 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
idx = np.ravel(tmp) # flatten the array idx = np.ravel(tmp) # flatten the array
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
if instance <= np.ceil(self.sets[0].lower): if math.isclose(instance, self.sets[0].lower) or instance < self.sets[0].lower:
idx = [0] idx = [0]
elif instance >= np.floor(self.sets[-1].upper): elif math.isclose(instance, self.sets[-1].upper) or instance > self.sets[-1].upper:
idx = [len(self.sets) - 1] idx = [len(self.sets) - 1]
else: else:
raise Exception(instance) raise Exception("Data exceed the known bounds [%s, %s] of universe of discourse: %s" %
(self.sets[0].lower, self.sets[-1].upper, instance))
lags[count] = idx lags[count] = idx
count += 1 count += 1
@ -365,12 +366,13 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
idx = np.ravel(tmp) # flatten the array idx = np.ravel(tmp) # flatten the array
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
if ndata[k] <= self.sets[0].lower: if math.isclose(ndata[k], self.sets[0].lower) or ndata[k] < self.sets[0].lower:
idx = [0] idx = [0]
elif ndata[k] >= self.sets[-1].upper: elif math.isclose(ndata[k], self.sets[-1].upper) or ndata[k] > self.sets[-1].upper:
idx = [len(self.sets) - 1] idx = [len(self.sets) - 1]
else: else:
raise Exception(ndata[k]) raise Exception("Data exceed the known bounds [%s, %s] of universe of discourse: %s" %
(self.sets[0].lower, self.sets[-1].upper, ndata[k]))
for kk in idx: for kk in idx:
flrg = hofts.HighOrderFLRG(self.order) flrg = hofts.HighOrderFLRG(self.order)

View File

@ -41,30 +41,38 @@ nasdaq = np.array(nasdaqpd["avg"][:5000])
#, , #, ,
#diff = Transformations.Differential(1) diff = Transformations.Differential(1)
bchmk.external_point_sliding_window([naive.Naive, arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA], #bchmk.external_point_sliding_window([naive.Naive, arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA],
[None, (1,0,0),(1,1,0),(2,0,0), (2,1,0), (1,1,1), (1,0,1)], # [None, (1,0,0),(1,1,0),(2,0,0), (2,1,0), (1,1,1), (1,0,1)],
nasdaq,2000,train=0.8, #transformation=diff, #models=[pwfts.ProbabilisticWeightedFTS], # # # gauss,2000,train=0.8, dump=True, save=True, file="experiments/arima_gauss.csv")
dump=True, save=True, file="experiments/arima_nasdaq.csv")
#bchmk.point_sliding_window(taiex,2000,train=0.8, #transformation=diff, #models=[pwfts.ProbabilisticWeightedFTS], # # bchmk.interval_sliding_window(nasdaq,2000,train=0.8, #transformation=diff, #models=[pwfts.ProbabilisticWeightedFTS], # #
partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
partitions= np.arange(10,200,step=5), #
dump=True, save=True, file="experiments/nasdaq_interval.csv")
#3bchmk.ahead_sliding_window(taiex,2000,train=0.8, steps=20, resolution=250, #transformation=diff, #models=[pwfts.ProbabilisticWeightedFTS], # #
# partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ], # partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
# partitions= [45,55, 65, 75, 85, 95,105,115,125,135, 150], #np.arange(5,150,step=10), # # partitions= np.arange(10,200,step=10), #
# dump=True, save=True, file="experiments/taiex_point_new.csv") # dump=True, save=True, file="experiments/taiex_ahead.csv")
#bchmk.allPointForecasters(taiex_treino, taiex_treino, 95, #transformation=diff, #bchmk.allPointForecasters(taiex_treino, taiex_treino, 95, #transformation=diff,
# models=[ naive.Naive, pfts.ProbabilisticFTS, pwfts.ProbabilisticWeightedFTS], # models=[ naive.Naive, pfts.ProbabilisticFTS, pwfts.ProbabilisticWeightedFTS],
# statistics=True, residuals=False, series=False) # statistics=True, residuals=False, series=False)
#data_train_fs = Grid.GridPartitioner(taiex_treino, 10, transformation=diff).sets #data_train_fs = Grid.GridPartitioner(nasdaq[:1600], 95).sets
#fts1 = pfts.ProbabilisticFTS("") #fts1 = pwfts.ProbabilisticWeightedFTS("")
#fts1.appendTransformation(diff) #fts1.appendTransformation(diff)
#fts1.train(taiex_treino, data_train_fs, order=1) #fts1.train(nasdaq[:1600], data_train_fs, order=1)
#_crps1, _crps2, _t1, _t2 = bchmk.get_distribution_statistics(nasdaq[1600:2000], fts1, steps=20, resolution=200)
#print(_crps1, _crps2, _t1, _t2)
#print(fts1.forecast([5000, 5000])) #print(fts1.forecast([5000, 5000]))