- Issue #3 - Code documentation with PEP 257 compliance

- Benchmarks refactoring and optimizations
 - Probabilistic package, with Kernel Density Estimation
This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-05-08 13:12:08 -03:00
parent ea7a0bbd62
commit ff67356d64
8 changed files with 428 additions and 192 deletions

View File

@ -2,12 +2,68 @@
Benchmark utility functions Benchmark utility functions
""" """
import matplotlib as plt
import matplotlib.cm as cmx
import matplotlib.colors as pltcolors
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from checkbox_support.parsers.tests.test_modinfo import testMultipleModinfoParser
from mpl_toolkits.mplot3d import Axes3D
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from copy import deepcopy from copy import deepcopy
from pyFTS.common import Util from pyFTS.common import Util
def extract_measure(dataframe,measure,data_columns):
if not dataframe.empty:
tmp = dataframe[(dataframe.Measure == measure)][data_columns].to_dict(orient="records")[0]
ret = [k for k in tmp.values()]
return ret
else:
return None
def find_best(dataframe, criteria, ascending):
models = dataframe.Model.unique()
orders = dataframe.Order.unique()
ret = {}
for m in models:
for o in orders:
mod = {}
df = dataframe[(dataframe.Model == m) & (dataframe.Order == o)].sort_values(by=criteria, ascending=ascending)
if not df.empty:
_key = str(m) + str(o)
best = df.loc[df.index[0]]
mod['Model'] = m
mod['Order'] = o
mod['Scheme'] = best["Scheme"]
mod['Partitions'] = best["Partitions"]
ret[_key] = mod
return ret
def point_dataframe_sintetic_columns():
return ["Model", "Order", "Scheme", "Partitions", "Size", "RMSEAVG", "RMSESTD", "SMAPEAVG", "SMAPESTD", "UAVG",
"USTD", "TIMEAVG", "TIMESTD"]
def point_dataframe_analytic_columns(experiments):
columns = [str(k) for k in np.arange(0, experiments)]
columns.insert(0, "Model")
columns.insert(1, "Order")
columns.insert(2, "Scheme")
columns.insert(3, "Partitions")
columns.insert(4, "Size")
columns.insert(5, "Measure")
return columns
def save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u): def save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u):
""" """
Create a dataframe to store the benchmark results Create a dataframe to store the benchmark results
@ -32,9 +88,14 @@ def save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, t
mfts = objs[k] mfts = objs[k]
mod.append(mfts.shortname) mod.append(mfts.shortname)
mod.append(mfts.order) mod.append(mfts.order)
mod.append(mfts.partitioner.name) if not mfts.benchmark_only:
mod.append(mfts.partitioner.partitions) mod.append(mfts.partitioner.name)
mod.append(len(mfts)) mod.append(mfts.partitioner.partitions)
mod.append(len(mfts))
else:
mod.append('-')
mod.append('-')
mod.append('-')
mod.append(np.round(np.nanmean(rmse[k]), 2)) mod.append(np.round(np.nanmean(rmse[k]), 2))
mod.append(np.round(np.nanstd(rmse[k]), 2)) mod.append(np.round(np.nanstd(rmse[k]), 2))
mod.append(np.round(np.nanmean(smape[k]), 2)) mod.append(np.round(np.nanmean(smape[k]), 2))
@ -42,38 +103,315 @@ def save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, t
mod.append(np.round(np.nanmean(u[k]), 2)) mod.append(np.round(np.nanmean(u[k]), 2))
mod.append(np.round(np.nanstd(u[k]), 2)) mod.append(np.round(np.nanstd(u[k]), 2))
mod.append(np.round(np.nanmean(times[k]), 4)) mod.append(np.round(np.nanmean(times[k]), 4))
mod.append(np.round(np.nanstd(times[k]), 4))
ret.append(mod) ret.append(mod)
except Exception as ex: except Exception as ex:
print("Erro ao salvar ", k) print("Erro ao salvar ", k)
print("Exceção ", ex) print("Exceção ", ex)
columns = ["Model", "Order", "Scheme","Partitions", "Size", "RMSEAVG", "RMSESTD", "SMAPEAVG", "SMAPESTD", "UAVG", "USTD", "TIMEAVG"] columns = point_dataframe_sintetic_columns()
else: else:
for k in sorted(objs.keys()): for k in sorted(objs.keys()):
try: try:
mfts = objs[k] mfts = objs[k]
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'RMSE'] n = mfts.shortname
o = mfts.order
if not mfts.benchmark_only:
s = mfts.partitioner.name
p = mfts.partitioner.partitions
l = len(mfts)
else:
s = '-'
p = '-'
l = '-'
tmp = [n, o, s, p, l, 'RMSE']
tmp.extend(rmse[k]) tmp.extend(rmse[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'SMAPE'] tmp = [n, o, s, p, l, 'SMAPE']
tmp.extend(smape[k]) tmp.extend(smape[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'U'] tmp = [n, o, s, p, l, 'U']
tmp.extend(u[k]) tmp.extend(u[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'TIME'] tmp = [n, o, s, p, l, 'TIME']
tmp.extend(times[k]) tmp.extend(times[k])
ret.append(deepcopy(tmp)) ret.append(deepcopy(tmp))
except Exception as ex: except Exception as ex:
print("Erro ao salvar ", k) print("Erro ao salvar ", k)
print("Exceção ", ex) print("Exceção ", ex)
columns = [str(k) for k in np.arange(0, experiments)] columns = point_dataframe_analytic_columns(experiments)
columns.insert(0, "Model") dat = pd.DataFrame(ret, columns=columns)
columns.insert(1, "Order") if save: dat.to_csv(Util.uniquefilename(file), sep=";", index=False)
columns.insert(2, "Scheme") return dat
columns.insert(3, "Partitions")
columns.insert(4, "Size")
columns.insert(5, "Measure") def cast_dataframe_to_sintetic_point(infile, outfile, experiments):
columns = point_dataframe_analytic_columns(experiments)
dat = pd.read_csv(infile, sep=";", usecols=columns)
models = dat.Model.unique()
orders = dat.Order.unique()
schemes = dat.Scheme.unique()
partitions = dat.Partitions.unique()
data_columns = analytical_data_columns(experiments)
ret = []
for m in models:
for o in orders:
for s in schemes:
for p in partitions:
mod = []
df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)]
if not df.empty:
rmse = extract_measure(df, 'RMSE', data_columns)
smape = extract_measure(df, 'SMAPE', data_columns)
u = extract_measure(df, 'U', data_columns)
times = extract_measure(df, 'TIME', data_columns)
mod.append(m)
mod.append(o)
mod.append(s)
mod.append(p)
mod.append(extract_measure(df, 'RMSE', ['Size'])[0])
mod.append(np.round(np.nanmean(rmse), 2))
mod.append(np.round(np.nanstd(rmse), 2))
mod.append(np.round(np.nanmean(smape), 2))
mod.append(np.round(np.nanstd(smape), 2))
mod.append(np.round(np.nanmean(u), 2))
mod.append(np.round(np.nanstd(u), 2))
mod.append(np.round(np.nanmean(times), 4))
mod.append(np.round(np.nanstd(times), 4))
ret.append(mod)
dat = pd.DataFrame(ret, columns=point_dataframe_sintetic_columns())
dat.to_csv(Util.uniquefilename(outfile), sep=";", index=False)
def analytical_data_columns(experiments):
data_columns = [str(k) for k in np.arange(0, experiments)]
return data_columns
def plot_dataframe_point(file_synthetic, file_analytic, experiments):
fig, axes = plt.subplots(nrows=4, ncols=1, figsize=[6, 8])
axes[0].set_title('RMSE')
axes[1].set_title('SMAPE')
axes[2].set_title('U Statistic')
axes[3].set_title('Execution Time')
dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=point_dataframe_sintetic_columns())
bests = find_best(dat_syn, ['UAVG','RMSEAVG','USTD','RMSESTD'], [1,1,1,1])
dat_ana = pd.read_csv(file_analytic, sep=";", usecols=point_dataframe_analytic_columns(experiments))
data_columns = analytical_data_columns(experiments)
rmse = []
smape = []
u = []
times = []
labels = []
for b in bests.keys():
best = bests[b]
tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
& (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
rmse.append( extract_measure(tmp,'RMSE',data_columns) )
smape.append(extract_measure(tmp, 'SMAPE', data_columns))
u.append(extract_measure(tmp, 'U', data_columns))
times.append(extract_measure(tmp, 'TIME', data_columns))
labels.append(best["Model"] + " " + str(best["Order"]))
axes[0].boxplot(rmse, labels=labels, showmeans=True)
axes[1].boxplot(smape, labels=labels, showmeans=True)
axes[2].boxplot(u, labels=labels, showmeans=True)
axes[3].boxplot(times, labels=labels, showmeans=True)
plt.show()
def save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times):
ret = []
if sintetic:
for k in sorted(objs.keys()):
mod = []
mfts = objs[k]
mod.append(mfts.shortname)
mod.append(mfts.order)
if not mfts.benchmark_only:
mod.append(mfts.partitioner.name)
mod.append(mfts.partitioner.partitions)
l = len(mfts)
else:
mod.append('-')
mod.append('-')
l = '-'
mod.append(round(np.nanmean(sharpness[k]), 2))
mod.append(round(np.nanstd(sharpness[k]), 2))
mod.append(round(np.nanmean(resolution[k]), 2))
mod.append(round(np.nanstd(resolution[k]), 2))
mod.append(round(np.nanmean(coverage[k]), 2))
mod.append(round(np.nanstd(coverage[k]), 2))
mod.append(round(np.nanmean(times[k]), 2))
mod.append(round(np.nanstd(times[k]), 2))
mod.append(l)
ret.append(mod)
columns = interval_dataframe_sintetic_columns()
else:
for k in sorted(objs.keys()):
try:
mfts = objs[k]
n = mfts.shortname
o = mfts.order
if not mfts.benchmark_only:
s = mfts.partitioner.name
p = mfts.partitioner.partitions
l = len(mfts)
else:
s = '-'
p = '-'
l = '-'
tmp = [n, o, s, p, l, 'Sharpness']
tmp.extend(sharpness[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, 'Resolution']
tmp.extend(resolution[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, 'Coverage']
tmp.extend(coverage[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, 'TIME']
tmp.extend(times[k])
ret.append(deepcopy(tmp))
except Exception as ex:
print("Erro ao salvar ", k)
print("Exceção ", ex)
columns = interval_dataframe_analytic_columns(experiments)
dat = pd.DataFrame(ret, columns=columns) dat = pd.DataFrame(ret, columns=columns)
if save: dat.to_csv(Util.uniquefilename(file), sep=";") if save: dat.to_csv(Util.uniquefilename(file), sep=";")
return dat return dat
def interval_dataframe_analytic_columns(experiments):
columns = [str(k) for k in np.arange(0, experiments)]
columns.insert(0, "Model")
columns.insert(1, "Order")
columns.insert(2, "Scheme")
columns.insert(3, "Partitions")
columns.insert(4, "Size")
columns.insert(5, "Measure")
return columns
def interval_dataframe_sintetic_columns():
columns = ["Model", "Order", "Scheme", "Partitions", "SHARPAVG", "SHARPSTD", "RESAVG", "RESSTD", "COVAVG",
"COVSTD", "TIMEAVG", "TIMESTD", "SIZE"]
return columns
def save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic):
"""
Save benchmark results for m-step ahead probabilistic forecasters
:param experiments:
:param file:
:param objs:
:param crps_interval:
:param crps_distr:
:param times1:
:param times2:
:param save:
:param sintetic:
:return:
"""
ret = []
if sintetic:
for k in sorted(objs.keys()):
try:
ret = []
for k in sorted(objs.keys()):
try:
mod = []
mfts = objs[k]
mod.append(mfts.shortname)
mod.append(mfts.order)
if not mfts.benchmark_only:
mod.append(mfts.partitioner.name)
mod.append(mfts.partitioner.partitions)
l = len(mfts)
else:
mod.append('-')
mod.append('-')
l = '-'
mod.append(np.round(np.nanmean(crps_interval[k]), 2))
mod.append(np.round(np.nanstd(crps_interval[k]), 2))
mod.append(np.round(np.nanmean(crps_distr[k]), 2))
mod.append(np.round(np.nanstd(crps_distr[k]), 2))
mod.append(l)
mod.append(np.round(np.nanmean(times1[k]), 4))
mod.append(np.round(np.nanmean(times2[k]), 4))
ret.append(mod)
except Exception as e:
print('Erro: %s' % e)
except Exception as ex:
print("Erro ao salvar ", k)
print("Exceção ", ex)
columns = ahead_dataframe_sintetic_columns()
else:
for k in sorted(objs.keys()):
try:
mfts = objs[k]
n = mfts.shortname
o = mfts.order
if not mfts.benchmark_only:
s = mfts.partitioner.name
p = mfts.partitioner.partitions
l = len(mfts)
else:
s = '-'
p = '-'
l = '-'
tmp = [n, o, s, p, l, 'CRPS_Interval']
tmp.extend(crps_interval[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, 'CRPS_Distribution']
tmp.extend(crps_distr[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, 'TIME_Interval']
tmp.extend(times1[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, 'TIME_Distribution']
tmp.extend(times2[k])
ret.append(deepcopy(tmp))
except Exception as ex:
print("Erro ao salvar ", k)
print("Exceção ", ex)
columns = ahead_dataframe_analytic_columns(experiments)
dat = pd.DataFrame(ret, columns=columns)
if save: dat.to_csv(Util.uniquefilename(file), sep=";")
return dat
def ahead_dataframe_analytic_columns(experiments):
columns = [str(k) for k in np.arange(0, experiments)]
columns.insert(0, "Model")
columns.insert(1, "Order")
columns.insert(2, "Scheme")
columns.insert(3, "Partitions")
columns.insert(4, "Size")
columns.insert(5, "Measure")
return columns
def ahead_dataframe_sintetic_columns():
columns = ["Model", "Order", "Scheme", "Partitions", "CRPS1AVG", "CRPS1STD", "CRPS2AVG", "CRPS2STD",
"SIZE", "TIME1AVG", "TIME2AVG"]
return columns

View File

@ -4,21 +4,24 @@
"""Benchmarks to FTS methods""" """Benchmarks to FTS methods"""
import datetime
import time
from copy import deepcopy
import matplotlib as plt
import matplotlib.cm as cmx
import matplotlib.colors as pltcolors
import matplotlib.pyplot as plt
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import time
import datetime
import matplotlib as plt
import matplotlib.colors as pltcolors
import matplotlib.cm as cmx
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D from mpl_toolkits.mplot3d import Axes3D
from probabilistic import ProbabilityDistribution
from pyFTS import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, ensemble, hwang
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, Util, quantreg
from pyFTS.common import Transformations, Util
# from sklearn.cross_validation import KFold # from sklearn.cross_validation import KFold
from pyFTS.partitioners import partitioner, Grid, Huarng, Entropy, FCM from pyFTS.partitioners import Grid
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, ProbabilityDistribution, Util, quantreg
from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util
from pyFTS import fts, song, chen, yu, ismailefendi, sadaei, hofts, hwang, pwfts, ifts, cheng, ensemble, hwang
from copy import deepcopy
colors = ['grey', 'rosybrown', 'maroon', 'red','orange', 'yellow', 'olive', 'green', colors = ['grey', 'rosybrown', 'maroon', 'red','orange', 'yellow', 'olive', 'green',
'cyan', 'blue', 'darkblue', 'purple', 'darkviolet'] 'cyan', 'blue', 'darkblue', 'purple', 'darkviolet']
@ -369,62 +372,6 @@ def getProbabilityDistributionStatistics(pmfs, data):
ret += " \\\\ \n" ret += " \\\\ \n"
return ret return ret
def save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times):
ret = []
if sintetic:
for k in sorted(objs.keys()):
mod = []
mfts = objs[k]
mod.append(mfts.shortname)
mod.append(mfts.order)
mod.append(mfts.partitioner.name)
mod.append(mfts.partitioner.partitions)
mod.append(round(np.nanmean(sharpness[k]), 2))
mod.append(round(np.nanstd(sharpness[k]), 2))
mod.append(round(np.nanmean(resolution[k]), 2))
mod.append(round(np.nanstd(resolution[k]), 2))
mod.append(round(np.nanmean(coverage[k]), 2))
mod.append(round(np.nanstd(coverage[k]), 2))
mod.append(round(np.nanmean(times[k]), 2))
mod.append(round(np.nanstd(times[k]), 2))
mod.append(len(mfts))
ret.append(mod)
columns = ["Model", "Order", "Scheme", "Partitions", "SHARPAVG", "SHARPSTD", "RESAVG", "RESSTD", "COVAVG",
"COVSTD", "TIMEAVG", "TIMESTD", "SIZE"]
else:
for k in sorted(objs.keys()):
try:
mfts = objs[k]
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts),
'Sharpness']
tmp.extend(sharpness[k])
ret.append(deepcopy(tmp))
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts),
'Resolution']
tmp.extend(resolution[k])
ret.append(deepcopy(tmp))
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts),
'Coverage']
tmp.extend(coverage[k])
ret.append(deepcopy(tmp))
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts),
'TIME']
tmp.extend(times[k])
ret.append(deepcopy(tmp))
except Exception as ex:
print("Erro ao salvar ", k)
print("Exceção ", ex)
columns = [str(k) for k in np.arange(0, experiments)]
columns.insert(0, "Model")
columns.insert(1, "Order")
columns.insert(2, "Scheme")
columns.insert(3, "Partitions")
columns.insert(4, "Size")
columns.insert(5, "Measure")
dat = pd.DataFrame(ret, columns=columns)
if save: dat.to_csv(Util.uniquefilename(file), sep=";")
return dat
def interval_sliding_window(data, windowsize, train=0.8,models=None,partitioners=[Grid.GridPartitioner], def interval_sliding_window(data, windowsize, train=0.8,models=None,partitioners=[Grid.GridPartitioner],
@ -518,7 +465,7 @@ def interval_sliding_window(data, windowsize, train=0.8,models=None,partitioners
coverage[_key].append(_cov) coverage[_key].append(_cov)
times[_key].append(_tdiff) times[_key].append(_tdiff)
return save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times) return Util.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times)
def all_interval_forecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5], def all_interval_forecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5],
@ -637,80 +584,6 @@ def plot_probability_distributions(pmfs, lcolors, tam=[15, 7]):
ax.legend(handles0, labels0) ax.legend(handles0, labels0)
def save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic):
"""
Save benchmark results for m-step ahead probabilistic forecasters
:param experiments:
:param file:
:param objs:
:param crps_interval:
:param crps_distr:
:param times1:
:param times2:
:param save:
:param sintetic:
:return:
"""
ret = []
if sintetic:
for k in sorted(objs.keys()):
try:
ret = []
for k in sorted(objs.keys()):
try:
mod = []
mfts = objs[k]
mod.append(mfts.shortname)
mod.append(mfts.order)
mod.append(mfts.partitioner.name)
mod.append(mfts.partitioner.partitions)
mod.append(np.round(np.nanmean(crps_interval[k]), 2))
mod.append(np.round(np.nanstd(crps_interval[k]), 2))
mod.append(np.round(np.nanmean(crps_distr[k]), 2))
mod.append(np.round(np.nanstd(crps_distr[k]), 2))
mod.append(len(mfts))
mod.append(np.round(np.nanmean(times1[k]), 4))
mod.append(np.round(np.nanmean(times2[k]), 4))
ret.append(mod)
except Exception as e:
print('Erro: %s' % e)
except Exception as ex:
print("Erro ao salvar ", k)
print("Exceção ", ex)
columns = ["Model", "Order", "Scheme", "Partitions", "CRPS1AVG", "CRPS1STD", "CRPS2AVG", "CRPS2STD",
"SIZE", "TIME1AVG", "TIME2AVG"]
else:
for k in sorted(objs.keys()):
try:
mfts = objs[k]
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'CRPS_Interval']
tmp.extend(crps_interval[k])
ret.append(deepcopy(tmp))
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'CRPS_Distribution']
tmp.extend(crps_distr[k])
ret.append(deepcopy(tmp))
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'TIME_Interval']
tmp.extend(times1[k])
ret.append(deepcopy(tmp))
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'TIME_Distribution']
tmp.extend(times2[k])
ret.append(deepcopy(tmp))
except Exception as ex:
print("Erro ao salvar ", k)
print("Exceção ", ex)
columns = [str(k) for k in np.arange(0, experiments)]
columns.insert(0, "Model")
columns.insert(1, "Order")
columns.insert(2, "Scheme")
columns.insert(3, "Partitions")
columns.insert(4, "Size")
columns.insert(5, "Measure")
dat = pd.DataFrame(ret, columns=columns)
if save: dat.to_csv(Util.uniquefilename(file), sep=";")
return dat
def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution = None, partitioners=[Grid.GridPartitioner], def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution = None, partitioners=[Grid.GridPartitioner],
@ -806,7 +679,7 @@ def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution
if dump: print(_crps1, _crps2, _tdiff, _t1, _t2) if dump: print(_crps1, _crps2, _tdiff, _t1, _t2)
return save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic) return Util.save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic)
def all_ahead_forecasters(data_train, data_test, partitions, start, steps, resolution = None, max_order=3,save=False, file=None, tam=[20, 5], def all_ahead_forecasters(data_train, data_test, partitions, start, steps, resolution = None, max_order=3,save=False, file=None, tam=[20, 5],
@ -979,6 +852,7 @@ def plotCompared(original, forecasts, labels, title):
ax.set_xlim([0, len(original)]) ax.set_xlim([0, len(original)])
ax.set_ylim([min(original), max(original)]) ax.set_ylim([min(original), max(original)])
def SelecaoSimples_MenorRMSE(original, parameters, modelo): def SelecaoSimples_MenorRMSE(original, parameters, modelo):
ret = [] ret = []
errors = [] errors = []

View File

@ -6,20 +6,16 @@ To enable a dispy cluster node:
python3 /usr/local/bin/dispynode.py -i [local IP] -d python3 /usr/local/bin/dispynode.py -i [local IP] -d
""" """
import random import datetime
import time
import dispy import dispy
import dispy.httpd import dispy.httpd
from copy import deepcopy
import numpy as np import numpy as np
import pandas as pd
import time from pyFTS.benchmarks import benchmarks, Util as bUtil
import datetime from pyFTS.common import Util
import pyFTS from pyFTS.partitioners import Grid
from pyFTS.partitioners import partitioner, Grid, Huarng, Entropy, FCM
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, ProbabilityDistribution
from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util
from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pwfts, ifts
from pyFTS.benchmarks import benchmarks, parallel_benchmarks, Util as bUtil
def run_point(mfts, partitioner, train_data, test_data, window_key=None, transformation=None, indexer=None): def run_point(mfts, partitioner, train_data, test_data, window_key=None, transformation=None, indexer=None):

View File

@ -2,19 +2,17 @@
joblib Parallelized Benchmarks to FTS methods joblib Parallelized Benchmarks to FTS methods
""" """
from copy import deepcopy import datetime
from joblib import Parallel, delayed
import multiprocessing import multiprocessing
import time
from copy import deepcopy
import numpy as np import numpy as np
import pandas as pd from joblib import Parallel, delayed
import time
import datetime from pyFTS.benchmarks import benchmarks, Util
from pyFTS.partitioners import partitioner, Grid, Huarng, Entropy, FCM from pyFTS.common import Util
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, ProbabilityDistribution from pyFTS.partitioners import Grid
from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util
from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pwfts, ifts
from pyFTS.benchmarks import benchmarks
def run_point(mfts, partitioner, train_data, test_data, transformation=None, indexer=None): def run_point(mfts, partitioner, train_data, test_data, transformation=None, indexer=None):
@ -140,7 +138,7 @@ def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=
print("Process Duration: {0}".format(_process_end - _process_start)) print("Process Duration: {0}".format(_process_end - _process_start))
return benchmarks.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u) return Util.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u)
def run_interval(mfts, partitioner, train_data, test_data, transformation=None, indexer=None): def run_interval(mfts, partitioner, train_data, test_data, transformation=None, indexer=None):
@ -267,7 +265,7 @@ def interval_sliding_window(data, windowsize, train=0.8, models=None, partitione
print("Process Duration: {0}".format(_process_end - _process_start)) print("Process Duration: {0}".format(_process_end - _process_start))
return benchmarks.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times) return Util.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times)
def run_ahead(mfts, partitioner, train_data, test_data, steps, resolution, transformation=None, indexer=None): def run_ahead(mfts, partitioner, train_data, test_data, steps, resolution, transformation=None, indexer=None):
@ -397,4 +395,4 @@ def ahead_sliding_window(data, windowsize, train, steps,resolution, models=None,
print("Process Duration: {0}".format(_process_end - _process_start)) print("Process Duration: {0}".format(_process_end - _process_start))
return benchmarks.save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic) return Util.save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic)

View File

@ -98,7 +98,6 @@ class ProbabilityDistribution(object):
axis.set_xlabel('Universe of Discourse') axis.set_xlabel('Universe of Discourse')
axis.set_ylabel('Probability') axis.set_ylabel('Probability')
def __str__(self): def __str__(self):
head = '|' head = '|'
body = '|' body = '|'

View File

25
probabilistic/kde.py Normal file
View File

@ -0,0 +1,25 @@
"""
Kernel Density Estimation
"""
class KernelSmoothing(object):
"""Kernel Density Estimation"""
def __init__(self,h, data, method="epanechnikov"):
self.h = h
self.data = data
self.method = method
def kernel(self, u):
if self.method == "epanechnikov":
return (3/4) * (1 - u**2)
elif self.method == "uniform":
return 0.5
elif self.method == "uniform":
return 0.5
def probability(self, x):
l = len(self.data)
p = sum([self.kernel((x - k)/self.h) for k in self.data]) / l*self.h
return p

View File

@ -35,12 +35,15 @@ os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/")
#print(FCM.FCMPartitionerTrimf.__module__) #print(FCM.FCMPartitionerTrimf.__module__)
#gauss = random.normal(0,1.0,1000) #gauss = random.normal(0,1.0,5000)
#gauss_teste = random.normal(0,1.0,400) #gauss_teste = random.normal(0,1.0,400)
taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",") #taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",")
taiex = np.array(taiexpd["avg"][:5000]) #taiex = np.array(taiexpd["avg"][:5000])
#nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",")
#nasdaq = np.array(nasdaqpd["avg"][0:5000])
#from statsmodels.tsa.arima_model import ARIMA as stats_arima #from statsmodels.tsa.arima_model import ARIMA as stats_arima
from statsmodels.tsa.tsatools import lagmat from statsmodels.tsa.tsatools import lagmat
@ -54,9 +57,12 @@ from statsmodels.tsa.tsatools import lagmat
from pyFTS.benchmarks import distributed_benchmarks as bchmk from pyFTS.benchmarks import distributed_benchmarks as bchmk
#from pyFTS.benchmarks import parallel_benchmarks as bchmk #from pyFTS.benchmarks import parallel_benchmarks as bchmk
#from pyFTS.benchmarks import benchmarks as bchmk from pyFTS.benchmarks import Util
#from pyFTS.benchmarks import arima #from pyFTS.benchmarks import arima
#Util.cast_dataframe_to_sintetic_point("experiments/taiex_point_analitic.csv","experiments/taiex_point_sintetic.csv",11)
Util.plot_dataframe_point("experiments/taiex_point_sintetic.csv","experiments/taiex_point_analitic.csv",11)
#tmp = arima.ARIMA("") #tmp = arima.ARIMA("")
#tmp.train(taiex[:1600],None,parameters=(2,0,1)) #tmp.train(taiex[:1600],None,parameters=(2,0,1))
@ -66,11 +72,11 @@ from pyFTS.benchmarks import distributed_benchmarks as bchmk
#bchmk.teste(taiex,['192.168.0.109', '192.168.0.101']) #bchmk.teste(taiex,['192.168.0.109', '192.168.0.101'])
bchmk.point_sliding_window(taiex,2000,train=0.8, #models=[yu.WeightedFTS], # # #bchmk.point_sliding_window(gauss,2000,train=0.8, #models=[yu.WeightedFTS], # #
partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ], # partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
partitions= np.arange(10,200,step=5), #transformation=diff, # partitions= np.arange(3,10,step=1), #transformation=diff,
dump=True, save=True, file="experiments/taiex_point_distributed.csv", # dump=True, save=True, file="experiments/gauss_point_distributed.csv",
nodes=['192.168.0.102', '192.168.0.109']) #, depends=[hofts, ifts]) # nodes=['192.168.0.102', '192.168.0.109']) #, depends=[hofts, ifts])
#bchmk.testa(taiex,[10,20],partitioners=[Grid.GridPartitioner], nodes=['192.168.0.109', '192.168.0.101']) #bchmk.testa(taiex,[10,20],partitioners=[Grid.GridPartitioner], nodes=['192.168.0.109', '192.168.0.101'])