- Issue #3 - Code documentation with PEP 257 compliance
- Benchmarks refactoring and optimizations - Probabilistic package, with Kernel Density Estimation
This commit is contained in:
parent
ea7a0bbd62
commit
ff67356d64
@ -2,12 +2,68 @@
|
||||
Benchmark utility functions
|
||||
"""
|
||||
|
||||
import matplotlib as plt
|
||||
import matplotlib.cm as cmx
|
||||
import matplotlib.colors as pltcolors
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from checkbox_support.parsers.tests.test_modinfo import testMultipleModinfoParser
|
||||
from mpl_toolkits.mplot3d import Axes3D
|
||||
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from copy import deepcopy
|
||||
from pyFTS.common import Util
|
||||
|
||||
|
||||
def extract_measure(dataframe,measure,data_columns):
|
||||
if not dataframe.empty:
|
||||
tmp = dataframe[(dataframe.Measure == measure)][data_columns].to_dict(orient="records")[0]
|
||||
ret = [k for k in tmp.values()]
|
||||
return ret
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def find_best(dataframe, criteria, ascending):
|
||||
models = dataframe.Model.unique()
|
||||
orders = dataframe.Order.unique()
|
||||
ret = {}
|
||||
for m in models:
|
||||
for o in orders:
|
||||
mod = {}
|
||||
df = dataframe[(dataframe.Model == m) & (dataframe.Order == o)].sort_values(by=criteria, ascending=ascending)
|
||||
if not df.empty:
|
||||
_key = str(m) + str(o)
|
||||
best = df.loc[df.index[0]]
|
||||
mod['Model'] = m
|
||||
mod['Order'] = o
|
||||
mod['Scheme'] = best["Scheme"]
|
||||
mod['Partitions'] = best["Partitions"]
|
||||
|
||||
ret[_key] = mod
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
def point_dataframe_sintetic_columns():
|
||||
return ["Model", "Order", "Scheme", "Partitions", "Size", "RMSEAVG", "RMSESTD", "SMAPEAVG", "SMAPESTD", "UAVG",
|
||||
"USTD", "TIMEAVG", "TIMESTD"]
|
||||
|
||||
|
||||
def point_dataframe_analytic_columns(experiments):
|
||||
columns = [str(k) for k in np.arange(0, experiments)]
|
||||
columns.insert(0, "Model")
|
||||
columns.insert(1, "Order")
|
||||
columns.insert(2, "Scheme")
|
||||
columns.insert(3, "Partitions")
|
||||
columns.insert(4, "Size")
|
||||
columns.insert(5, "Measure")
|
||||
return columns
|
||||
|
||||
|
||||
def save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u):
|
||||
"""
|
||||
Create a dataframe to store the benchmark results
|
||||
@ -32,9 +88,14 @@ def save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, t
|
||||
mfts = objs[k]
|
||||
mod.append(mfts.shortname)
|
||||
mod.append(mfts.order)
|
||||
if not mfts.benchmark_only:
|
||||
mod.append(mfts.partitioner.name)
|
||||
mod.append(mfts.partitioner.partitions)
|
||||
mod.append(len(mfts))
|
||||
else:
|
||||
mod.append('-')
|
||||
mod.append('-')
|
||||
mod.append('-')
|
||||
mod.append(np.round(np.nanmean(rmse[k]), 2))
|
||||
mod.append(np.round(np.nanstd(rmse[k]), 2))
|
||||
mod.append(np.round(np.nanmean(smape[k]), 2))
|
||||
@ -42,31 +103,202 @@ def save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, t
|
||||
mod.append(np.round(np.nanmean(u[k]), 2))
|
||||
mod.append(np.round(np.nanstd(u[k]), 2))
|
||||
mod.append(np.round(np.nanmean(times[k]), 4))
|
||||
mod.append(np.round(np.nanstd(times[k]), 4))
|
||||
ret.append(mod)
|
||||
except Exception as ex:
|
||||
print("Erro ao salvar ", k)
|
||||
print("Exceção ", ex)
|
||||
|
||||
columns = ["Model", "Order", "Scheme","Partitions", "Size", "RMSEAVG", "RMSESTD", "SMAPEAVG", "SMAPESTD", "UAVG", "USTD", "TIMEAVG"]
|
||||
columns = point_dataframe_sintetic_columns()
|
||||
else:
|
||||
for k in sorted(objs.keys()):
|
||||
try:
|
||||
mfts = objs[k]
|
||||
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'RMSE']
|
||||
n = mfts.shortname
|
||||
o = mfts.order
|
||||
if not mfts.benchmark_only:
|
||||
s = mfts.partitioner.name
|
||||
p = mfts.partitioner.partitions
|
||||
l = len(mfts)
|
||||
else:
|
||||
s = '-'
|
||||
p = '-'
|
||||
l = '-'
|
||||
|
||||
tmp = [n, o, s, p, l, 'RMSE']
|
||||
tmp.extend(rmse[k])
|
||||
ret.append(deepcopy(tmp))
|
||||
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'SMAPE']
|
||||
tmp = [n, o, s, p, l, 'SMAPE']
|
||||
tmp.extend(smape[k])
|
||||
ret.append(deepcopy(tmp))
|
||||
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'U']
|
||||
tmp = [n, o, s, p, l, 'U']
|
||||
tmp.extend(u[k])
|
||||
ret.append(deepcopy(tmp))
|
||||
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'TIME']
|
||||
tmp = [n, o, s, p, l, 'TIME']
|
||||
tmp.extend(times[k])
|
||||
ret.append(deepcopy(tmp))
|
||||
except Exception as ex:
|
||||
print("Erro ao salvar ", k)
|
||||
print("Exceção ", ex)
|
||||
columns = point_dataframe_analytic_columns(experiments)
|
||||
dat = pd.DataFrame(ret, columns=columns)
|
||||
if save: dat.to_csv(Util.uniquefilename(file), sep=";", index=False)
|
||||
return dat
|
||||
|
||||
|
||||
def cast_dataframe_to_sintetic_point(infile, outfile, experiments):
|
||||
columns = point_dataframe_analytic_columns(experiments)
|
||||
dat = pd.read_csv(infile, sep=";", usecols=columns)
|
||||
models = dat.Model.unique()
|
||||
orders = dat.Order.unique()
|
||||
schemes = dat.Scheme.unique()
|
||||
partitions = dat.Partitions.unique()
|
||||
|
||||
data_columns = analytical_data_columns(experiments)
|
||||
|
||||
ret = []
|
||||
|
||||
for m in models:
|
||||
for o in orders:
|
||||
for s in schemes:
|
||||
for p in partitions:
|
||||
mod = []
|
||||
df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)]
|
||||
if not df.empty:
|
||||
rmse = extract_measure(df, 'RMSE', data_columns)
|
||||
smape = extract_measure(df, 'SMAPE', data_columns)
|
||||
u = extract_measure(df, 'U', data_columns)
|
||||
times = extract_measure(df, 'TIME', data_columns)
|
||||
mod.append(m)
|
||||
mod.append(o)
|
||||
mod.append(s)
|
||||
mod.append(p)
|
||||
mod.append(extract_measure(df, 'RMSE', ['Size'])[0])
|
||||
mod.append(np.round(np.nanmean(rmse), 2))
|
||||
mod.append(np.round(np.nanstd(rmse), 2))
|
||||
mod.append(np.round(np.nanmean(smape), 2))
|
||||
mod.append(np.round(np.nanstd(smape), 2))
|
||||
mod.append(np.round(np.nanmean(u), 2))
|
||||
mod.append(np.round(np.nanstd(u), 2))
|
||||
mod.append(np.round(np.nanmean(times), 4))
|
||||
mod.append(np.round(np.nanstd(times), 4))
|
||||
ret.append(mod)
|
||||
|
||||
dat = pd.DataFrame(ret, columns=point_dataframe_sintetic_columns())
|
||||
dat.to_csv(Util.uniquefilename(outfile), sep=";", index=False)
|
||||
|
||||
|
||||
def analytical_data_columns(experiments):
|
||||
data_columns = [str(k) for k in np.arange(0, experiments)]
|
||||
return data_columns
|
||||
|
||||
|
||||
def plot_dataframe_point(file_synthetic, file_analytic, experiments):
|
||||
|
||||
fig, axes = plt.subplots(nrows=4, ncols=1, figsize=[6, 8])
|
||||
|
||||
axes[0].set_title('RMSE')
|
||||
axes[1].set_title('SMAPE')
|
||||
axes[2].set_title('U Statistic')
|
||||
axes[3].set_title('Execution Time')
|
||||
|
||||
dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=point_dataframe_sintetic_columns())
|
||||
|
||||
bests = find_best(dat_syn, ['UAVG','RMSEAVG','USTD','RMSESTD'], [1,1,1,1])
|
||||
|
||||
dat_ana = pd.read_csv(file_analytic, sep=";", usecols=point_dataframe_analytic_columns(experiments))
|
||||
|
||||
data_columns = analytical_data_columns(experiments)
|
||||
|
||||
rmse = []
|
||||
smape = []
|
||||
u = []
|
||||
times = []
|
||||
labels = []
|
||||
|
||||
for b in bests.keys():
|
||||
best = bests[b]
|
||||
tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
|
||||
& (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
|
||||
rmse.append( extract_measure(tmp,'RMSE',data_columns) )
|
||||
smape.append(extract_measure(tmp, 'SMAPE', data_columns))
|
||||
u.append(extract_measure(tmp, 'U', data_columns))
|
||||
times.append(extract_measure(tmp, 'TIME', data_columns))
|
||||
labels.append(best["Model"] + " " + str(best["Order"]))
|
||||
|
||||
axes[0].boxplot(rmse, labels=labels, showmeans=True)
|
||||
axes[1].boxplot(smape, labels=labels, showmeans=True)
|
||||
axes[2].boxplot(u, labels=labels, showmeans=True)
|
||||
axes[3].boxplot(times, labels=labels, showmeans=True)
|
||||
|
||||
plt.show()
|
||||
|
||||
|
||||
|
||||
def save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times):
|
||||
ret = []
|
||||
if sintetic:
|
||||
for k in sorted(objs.keys()):
|
||||
mod = []
|
||||
mfts = objs[k]
|
||||
mod.append(mfts.shortname)
|
||||
mod.append(mfts.order)
|
||||
if not mfts.benchmark_only:
|
||||
mod.append(mfts.partitioner.name)
|
||||
mod.append(mfts.partitioner.partitions)
|
||||
l = len(mfts)
|
||||
else:
|
||||
mod.append('-')
|
||||
mod.append('-')
|
||||
l = '-'
|
||||
mod.append(round(np.nanmean(sharpness[k]), 2))
|
||||
mod.append(round(np.nanstd(sharpness[k]), 2))
|
||||
mod.append(round(np.nanmean(resolution[k]), 2))
|
||||
mod.append(round(np.nanstd(resolution[k]), 2))
|
||||
mod.append(round(np.nanmean(coverage[k]), 2))
|
||||
mod.append(round(np.nanstd(coverage[k]), 2))
|
||||
mod.append(round(np.nanmean(times[k]), 2))
|
||||
mod.append(round(np.nanstd(times[k]), 2))
|
||||
mod.append(l)
|
||||
ret.append(mod)
|
||||
|
||||
columns = interval_dataframe_sintetic_columns()
|
||||
else:
|
||||
for k in sorted(objs.keys()):
|
||||
try:
|
||||
mfts = objs[k]
|
||||
n = mfts.shortname
|
||||
o = mfts.order
|
||||
if not mfts.benchmark_only:
|
||||
s = mfts.partitioner.name
|
||||
p = mfts.partitioner.partitions
|
||||
l = len(mfts)
|
||||
else:
|
||||
s = '-'
|
||||
p = '-'
|
||||
l = '-'
|
||||
|
||||
tmp = [n, o, s, p, l, 'Sharpness']
|
||||
tmp.extend(sharpness[k])
|
||||
ret.append(deepcopy(tmp))
|
||||
tmp = [n, o, s, p, l, 'Resolution']
|
||||
tmp.extend(resolution[k])
|
||||
ret.append(deepcopy(tmp))
|
||||
tmp = [n, o, s, p, l, 'Coverage']
|
||||
tmp.extend(coverage[k])
|
||||
ret.append(deepcopy(tmp))
|
||||
tmp = [n, o, s, p, l, 'TIME']
|
||||
tmp.extend(times[k])
|
||||
ret.append(deepcopy(tmp))
|
||||
except Exception as ex:
|
||||
print("Erro ao salvar ", k)
|
||||
print("Exceção ", ex)
|
||||
columns = interval_dataframe_analytic_columns(experiments)
|
||||
dat = pd.DataFrame(ret, columns=columns)
|
||||
if save: dat.to_csv(Util.uniquefilename(file), sep=";")
|
||||
return dat
|
||||
|
||||
def interval_dataframe_analytic_columns(experiments):
|
||||
columns = [str(k) for k in np.arange(0, experiments)]
|
||||
columns.insert(0, "Model")
|
||||
columns.insert(1, "Order")
|
||||
@ -74,6 +306,112 @@ def save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, t
|
||||
columns.insert(3, "Partitions")
|
||||
columns.insert(4, "Size")
|
||||
columns.insert(5, "Measure")
|
||||
return columns
|
||||
|
||||
|
||||
def interval_dataframe_sintetic_columns():
|
||||
columns = ["Model", "Order", "Scheme", "Partitions", "SHARPAVG", "SHARPSTD", "RESAVG", "RESSTD", "COVAVG",
|
||||
"COVSTD", "TIMEAVG", "TIMESTD", "SIZE"]
|
||||
return columns
|
||||
|
||||
|
||||
def save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic):
|
||||
"""
|
||||
Save benchmark results for m-step ahead probabilistic forecasters
|
||||
:param experiments:
|
||||
:param file:
|
||||
:param objs:
|
||||
:param crps_interval:
|
||||
:param crps_distr:
|
||||
:param times1:
|
||||
:param times2:
|
||||
:param save:
|
||||
:param sintetic:
|
||||
:return:
|
||||
"""
|
||||
ret = []
|
||||
|
||||
if sintetic:
|
||||
|
||||
for k in sorted(objs.keys()):
|
||||
try:
|
||||
ret = []
|
||||
for k in sorted(objs.keys()):
|
||||
try:
|
||||
mod = []
|
||||
mfts = objs[k]
|
||||
mod.append(mfts.shortname)
|
||||
mod.append(mfts.order)
|
||||
if not mfts.benchmark_only:
|
||||
mod.append(mfts.partitioner.name)
|
||||
mod.append(mfts.partitioner.partitions)
|
||||
l = len(mfts)
|
||||
else:
|
||||
mod.append('-')
|
||||
mod.append('-')
|
||||
l = '-'
|
||||
mod.append(np.round(np.nanmean(crps_interval[k]), 2))
|
||||
mod.append(np.round(np.nanstd(crps_interval[k]), 2))
|
||||
mod.append(np.round(np.nanmean(crps_distr[k]), 2))
|
||||
mod.append(np.round(np.nanstd(crps_distr[k]), 2))
|
||||
mod.append(l)
|
||||
mod.append(np.round(np.nanmean(times1[k]), 4))
|
||||
mod.append(np.round(np.nanmean(times2[k]), 4))
|
||||
ret.append(mod)
|
||||
except Exception as e:
|
||||
print('Erro: %s' % e)
|
||||
except Exception as ex:
|
||||
print("Erro ao salvar ", k)
|
||||
print("Exceção ", ex)
|
||||
|
||||
columns = ahead_dataframe_sintetic_columns()
|
||||
else:
|
||||
for k in sorted(objs.keys()):
|
||||
try:
|
||||
mfts = objs[k]
|
||||
n = mfts.shortname
|
||||
o = mfts.order
|
||||
if not mfts.benchmark_only:
|
||||
s = mfts.partitioner.name
|
||||
p = mfts.partitioner.partitions
|
||||
l = len(mfts)
|
||||
else:
|
||||
s = '-'
|
||||
p = '-'
|
||||
l = '-'
|
||||
tmp = [n, o, s, p, l, 'CRPS_Interval']
|
||||
tmp.extend(crps_interval[k])
|
||||
ret.append(deepcopy(tmp))
|
||||
tmp = [n, o, s, p, l, 'CRPS_Distribution']
|
||||
tmp.extend(crps_distr[k])
|
||||
ret.append(deepcopy(tmp))
|
||||
tmp = [n, o, s, p, l, 'TIME_Interval']
|
||||
tmp.extend(times1[k])
|
||||
ret.append(deepcopy(tmp))
|
||||
tmp = [n, o, s, p, l, 'TIME_Distribution']
|
||||
tmp.extend(times2[k])
|
||||
ret.append(deepcopy(tmp))
|
||||
except Exception as ex:
|
||||
print("Erro ao salvar ", k)
|
||||
print("Exceção ", ex)
|
||||
columns = ahead_dataframe_analytic_columns(experiments)
|
||||
dat = pd.DataFrame(ret, columns=columns)
|
||||
if save: dat.to_csv(Util.uniquefilename(file), sep=";")
|
||||
return dat
|
||||
|
||||
|
||||
def ahead_dataframe_analytic_columns(experiments):
|
||||
columns = [str(k) for k in np.arange(0, experiments)]
|
||||
columns.insert(0, "Model")
|
||||
columns.insert(1, "Order")
|
||||
columns.insert(2, "Scheme")
|
||||
columns.insert(3, "Partitions")
|
||||
columns.insert(4, "Size")
|
||||
columns.insert(5, "Measure")
|
||||
return columns
|
||||
|
||||
|
||||
def ahead_dataframe_sintetic_columns():
|
||||
columns = ["Model", "Order", "Scheme", "Partitions", "CRPS1AVG", "CRPS1STD", "CRPS2AVG", "CRPS2STD",
|
||||
"SIZE", "TIME1AVG", "TIME2AVG"]
|
||||
return columns
|
||||
|
@ -4,21 +4,24 @@
|
||||
"""Benchmarks to FTS methods"""
|
||||
|
||||
|
||||
import datetime
|
||||
import time
|
||||
from copy import deepcopy
|
||||
|
||||
import matplotlib as plt
|
||||
import matplotlib.cm as cmx
|
||||
import matplotlib.colors as pltcolors
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import time
|
||||
import datetime
|
||||
import matplotlib as plt
|
||||
import matplotlib.colors as pltcolors
|
||||
import matplotlib.cm as cmx
|
||||
import matplotlib.pyplot as plt
|
||||
from mpl_toolkits.mplot3d import Axes3D
|
||||
|
||||
from probabilistic import ProbabilityDistribution
|
||||
from pyFTS import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, ensemble, hwang
|
||||
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, Util, quantreg
|
||||
from pyFTS.common import Transformations, Util
|
||||
# from sklearn.cross_validation import KFold
|
||||
from pyFTS.partitioners import partitioner, Grid, Huarng, Entropy, FCM
|
||||
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, ProbabilityDistribution, Util, quantreg
|
||||
from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util
|
||||
from pyFTS import fts, song, chen, yu, ismailefendi, sadaei, hofts, hwang, pwfts, ifts, cheng, ensemble, hwang
|
||||
from copy import deepcopy
|
||||
from pyFTS.partitioners import Grid
|
||||
|
||||
colors = ['grey', 'rosybrown', 'maroon', 'red','orange', 'yellow', 'olive', 'green',
|
||||
'cyan', 'blue', 'darkblue', 'purple', 'darkviolet']
|
||||
@ -369,62 +372,6 @@ def getProbabilityDistributionStatistics(pmfs, data):
|
||||
ret += " \\\\ \n"
|
||||
return ret
|
||||
|
||||
def save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times):
|
||||
ret = []
|
||||
if sintetic:
|
||||
for k in sorted(objs.keys()):
|
||||
mod = []
|
||||
mfts = objs[k]
|
||||
mod.append(mfts.shortname)
|
||||
mod.append(mfts.order)
|
||||
mod.append(mfts.partitioner.name)
|
||||
mod.append(mfts.partitioner.partitions)
|
||||
mod.append(round(np.nanmean(sharpness[k]), 2))
|
||||
mod.append(round(np.nanstd(sharpness[k]), 2))
|
||||
mod.append(round(np.nanmean(resolution[k]), 2))
|
||||
mod.append(round(np.nanstd(resolution[k]), 2))
|
||||
mod.append(round(np.nanmean(coverage[k]), 2))
|
||||
mod.append(round(np.nanstd(coverage[k]), 2))
|
||||
mod.append(round(np.nanmean(times[k]), 2))
|
||||
mod.append(round(np.nanstd(times[k]), 2))
|
||||
mod.append(len(mfts))
|
||||
ret.append(mod)
|
||||
|
||||
columns = ["Model", "Order", "Scheme", "Partitions", "SHARPAVG", "SHARPSTD", "RESAVG", "RESSTD", "COVAVG",
|
||||
"COVSTD", "TIMEAVG", "TIMESTD", "SIZE"]
|
||||
else:
|
||||
for k in sorted(objs.keys()):
|
||||
try:
|
||||
mfts = objs[k]
|
||||
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts),
|
||||
'Sharpness']
|
||||
tmp.extend(sharpness[k])
|
||||
ret.append(deepcopy(tmp))
|
||||
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts),
|
||||
'Resolution']
|
||||
tmp.extend(resolution[k])
|
||||
ret.append(deepcopy(tmp))
|
||||
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts),
|
||||
'Coverage']
|
||||
tmp.extend(coverage[k])
|
||||
ret.append(deepcopy(tmp))
|
||||
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts),
|
||||
'TIME']
|
||||
tmp.extend(times[k])
|
||||
ret.append(deepcopy(tmp))
|
||||
except Exception as ex:
|
||||
print("Erro ao salvar ", k)
|
||||
print("Exceção ", ex)
|
||||
columns = [str(k) for k in np.arange(0, experiments)]
|
||||
columns.insert(0, "Model")
|
||||
columns.insert(1, "Order")
|
||||
columns.insert(2, "Scheme")
|
||||
columns.insert(3, "Partitions")
|
||||
columns.insert(4, "Size")
|
||||
columns.insert(5, "Measure")
|
||||
dat = pd.DataFrame(ret, columns=columns)
|
||||
if save: dat.to_csv(Util.uniquefilename(file), sep=";")
|
||||
return dat
|
||||
|
||||
|
||||
def interval_sliding_window(data, windowsize, train=0.8,models=None,partitioners=[Grid.GridPartitioner],
|
||||
@ -518,7 +465,7 @@ def interval_sliding_window(data, windowsize, train=0.8,models=None,partitioners
|
||||
coverage[_key].append(_cov)
|
||||
times[_key].append(_tdiff)
|
||||
|
||||
return save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times)
|
||||
return Util.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times)
|
||||
|
||||
|
||||
def all_interval_forecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5],
|
||||
@ -637,80 +584,6 @@ def plot_probability_distributions(pmfs, lcolors, tam=[15, 7]):
|
||||
ax.legend(handles0, labels0)
|
||||
|
||||
|
||||
def save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic):
|
||||
"""
|
||||
Save benchmark results for m-step ahead probabilistic forecasters
|
||||
:param experiments:
|
||||
:param file:
|
||||
:param objs:
|
||||
:param crps_interval:
|
||||
:param crps_distr:
|
||||
:param times1:
|
||||
:param times2:
|
||||
:param save:
|
||||
:param sintetic:
|
||||
:return:
|
||||
"""
|
||||
ret = []
|
||||
|
||||
if sintetic:
|
||||
|
||||
for k in sorted(objs.keys()):
|
||||
try:
|
||||
ret = []
|
||||
for k in sorted(objs.keys()):
|
||||
try:
|
||||
mod = []
|
||||
mfts = objs[k]
|
||||
mod.append(mfts.shortname)
|
||||
mod.append(mfts.order)
|
||||
mod.append(mfts.partitioner.name)
|
||||
mod.append(mfts.partitioner.partitions)
|
||||
mod.append(np.round(np.nanmean(crps_interval[k]), 2))
|
||||
mod.append(np.round(np.nanstd(crps_interval[k]), 2))
|
||||
mod.append(np.round(np.nanmean(crps_distr[k]), 2))
|
||||
mod.append(np.round(np.nanstd(crps_distr[k]), 2))
|
||||
mod.append(len(mfts))
|
||||
mod.append(np.round(np.nanmean(times1[k]), 4))
|
||||
mod.append(np.round(np.nanmean(times2[k]), 4))
|
||||
ret.append(mod)
|
||||
except Exception as e:
|
||||
print('Erro: %s' % e)
|
||||
except Exception as ex:
|
||||
print("Erro ao salvar ", k)
|
||||
print("Exceção ", ex)
|
||||
|
||||
columns = ["Model", "Order", "Scheme", "Partitions", "CRPS1AVG", "CRPS1STD", "CRPS2AVG", "CRPS2STD",
|
||||
"SIZE", "TIME1AVG", "TIME2AVG"]
|
||||
else:
|
||||
for k in sorted(objs.keys()):
|
||||
try:
|
||||
mfts = objs[k]
|
||||
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'CRPS_Interval']
|
||||
tmp.extend(crps_interval[k])
|
||||
ret.append(deepcopy(tmp))
|
||||
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'CRPS_Distribution']
|
||||
tmp.extend(crps_distr[k])
|
||||
ret.append(deepcopy(tmp))
|
||||
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'TIME_Interval']
|
||||
tmp.extend(times1[k])
|
||||
ret.append(deepcopy(tmp))
|
||||
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'TIME_Distribution']
|
||||
tmp.extend(times2[k])
|
||||
ret.append(deepcopy(tmp))
|
||||
except Exception as ex:
|
||||
print("Erro ao salvar ", k)
|
||||
print("Exceção ", ex)
|
||||
columns = [str(k) for k in np.arange(0, experiments)]
|
||||
columns.insert(0, "Model")
|
||||
columns.insert(1, "Order")
|
||||
columns.insert(2, "Scheme")
|
||||
columns.insert(3, "Partitions")
|
||||
columns.insert(4, "Size")
|
||||
columns.insert(5, "Measure")
|
||||
dat = pd.DataFrame(ret, columns=columns)
|
||||
if save: dat.to_csv(Util.uniquefilename(file), sep=";")
|
||||
return dat
|
||||
|
||||
|
||||
def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution = None, partitioners=[Grid.GridPartitioner],
|
||||
@ -806,7 +679,7 @@ def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution
|
||||
|
||||
if dump: print(_crps1, _crps2, _tdiff, _t1, _t2)
|
||||
|
||||
return save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic)
|
||||
return Util.save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic)
|
||||
|
||||
|
||||
def all_ahead_forecasters(data_train, data_test, partitions, start, steps, resolution = None, max_order=3,save=False, file=None, tam=[20, 5],
|
||||
@ -979,6 +852,7 @@ def plotCompared(original, forecasts, labels, title):
|
||||
ax.set_xlim([0, len(original)])
|
||||
ax.set_ylim([min(original), max(original)])
|
||||
|
||||
|
||||
def SelecaoSimples_MenorRMSE(original, parameters, modelo):
|
||||
ret = []
|
||||
errors = []
|
||||
|
@ -6,20 +6,16 @@ To enable a dispy cluster node:
|
||||
python3 /usr/local/bin/dispynode.py -i [local IP] -d
|
||||
"""
|
||||
|
||||
import random
|
||||
import datetime
|
||||
import time
|
||||
|
||||
import dispy
|
||||
import dispy.httpd
|
||||
from copy import deepcopy
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import time
|
||||
import datetime
|
||||
import pyFTS
|
||||
from pyFTS.partitioners import partitioner, Grid, Huarng, Entropy, FCM
|
||||
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, ProbabilityDistribution
|
||||
from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util
|
||||
from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pwfts, ifts
|
||||
from pyFTS.benchmarks import benchmarks, parallel_benchmarks, Util as bUtil
|
||||
|
||||
from pyFTS.benchmarks import benchmarks, Util as bUtil
|
||||
from pyFTS.common import Util
|
||||
from pyFTS.partitioners import Grid
|
||||
|
||||
|
||||
def run_point(mfts, partitioner, train_data, test_data, window_key=None, transformation=None, indexer=None):
|
||||
|
@ -2,19 +2,17 @@
|
||||
joblib Parallelized Benchmarks to FTS methods
|
||||
"""
|
||||
|
||||
from copy import deepcopy
|
||||
from joblib import Parallel, delayed
|
||||
import datetime
|
||||
import multiprocessing
|
||||
import time
|
||||
from copy import deepcopy
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import time
|
||||
import datetime
|
||||
from pyFTS.partitioners import partitioner, Grid, Huarng, Entropy, FCM
|
||||
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, ProbabilityDistribution
|
||||
from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util
|
||||
from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pwfts, ifts
|
||||
from pyFTS.benchmarks import benchmarks
|
||||
from joblib import Parallel, delayed
|
||||
|
||||
from pyFTS.benchmarks import benchmarks, Util
|
||||
from pyFTS.common import Util
|
||||
from pyFTS.partitioners import Grid
|
||||
|
||||
|
||||
def run_point(mfts, partitioner, train_data, test_data, transformation=None, indexer=None):
|
||||
@ -140,7 +138,7 @@ def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=
|
||||
|
||||
print("Process Duration: {0}".format(_process_end - _process_start))
|
||||
|
||||
return benchmarks.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u)
|
||||
return Util.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u)
|
||||
|
||||
|
||||
def run_interval(mfts, partitioner, train_data, test_data, transformation=None, indexer=None):
|
||||
@ -267,7 +265,7 @@ def interval_sliding_window(data, windowsize, train=0.8, models=None, partitione
|
||||
|
||||
print("Process Duration: {0}".format(_process_end - _process_start))
|
||||
|
||||
return benchmarks.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times)
|
||||
return Util.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times)
|
||||
|
||||
|
||||
def run_ahead(mfts, partitioner, train_data, test_data, steps, resolution, transformation=None, indexer=None):
|
||||
@ -397,4 +395,4 @@ def ahead_sliding_window(data, windowsize, train, steps,resolution, models=None,
|
||||
|
||||
print("Process Duration: {0}".format(_process_end - _process_start))
|
||||
|
||||
return benchmarks.save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic)
|
||||
return Util.save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic)
|
||||
|
@ -98,7 +98,6 @@ class ProbabilityDistribution(object):
|
||||
axis.set_xlabel('Universe of Discourse')
|
||||
axis.set_ylabel('Probability')
|
||||
|
||||
|
||||
def __str__(self):
|
||||
head = '|'
|
||||
body = '|'
|
0
probabilistic/__init__.py
Normal file
0
probabilistic/__init__.py
Normal file
25
probabilistic/kde.py
Normal file
25
probabilistic/kde.py
Normal file
@ -0,0 +1,25 @@
|
||||
"""
|
||||
Kernel Density Estimation
|
||||
"""
|
||||
|
||||
|
||||
class KernelSmoothing(object):
|
||||
"""Kernel Density Estimation"""
|
||||
def __init__(self,h, data, method="epanechnikov"):
|
||||
self.h = h
|
||||
self.data = data
|
||||
self.method = method
|
||||
|
||||
def kernel(self, u):
|
||||
if self.method == "epanechnikov":
|
||||
return (3/4) * (1 - u**2)
|
||||
elif self.method == "uniform":
|
||||
return 0.5
|
||||
elif self.method == "uniform":
|
||||
return 0.5
|
||||
|
||||
def probability(self, x):
|
||||
l = len(self.data)
|
||||
p = sum([self.kernel((x - k)/self.h) for k in self.data]) / l*self.h
|
||||
|
||||
return p
|
@ -35,12 +35,15 @@ os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/")
|
||||
|
||||
#print(FCM.FCMPartitionerTrimf.__module__)
|
||||
|
||||
#gauss = random.normal(0,1.0,1000)
|
||||
#gauss = random.normal(0,1.0,5000)
|
||||
#gauss_teste = random.normal(0,1.0,400)
|
||||
|
||||
|
||||
taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",")
|
||||
taiex = np.array(taiexpd["avg"][:5000])
|
||||
#taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",")
|
||||
#taiex = np.array(taiexpd["avg"][:5000])
|
||||
|
||||
#nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",")
|
||||
#nasdaq = np.array(nasdaqpd["avg"][0:5000])
|
||||
|
||||
#from statsmodels.tsa.arima_model import ARIMA as stats_arima
|
||||
from statsmodels.tsa.tsatools import lagmat
|
||||
@ -54,9 +57,12 @@ from statsmodels.tsa.tsatools import lagmat
|
||||
|
||||
from pyFTS.benchmarks import distributed_benchmarks as bchmk
|
||||
#from pyFTS.benchmarks import parallel_benchmarks as bchmk
|
||||
#from pyFTS.benchmarks import benchmarks as bchmk
|
||||
from pyFTS.benchmarks import Util
|
||||
#from pyFTS.benchmarks import arima
|
||||
|
||||
#Util.cast_dataframe_to_sintetic_point("experiments/taiex_point_analitic.csv","experiments/taiex_point_sintetic.csv",11)
|
||||
|
||||
Util.plot_dataframe_point("experiments/taiex_point_sintetic.csv","experiments/taiex_point_analitic.csv",11)
|
||||
|
||||
#tmp = arima.ARIMA("")
|
||||
#tmp.train(taiex[:1600],None,parameters=(2,0,1))
|
||||
@ -66,11 +72,11 @@ from pyFTS.benchmarks import distributed_benchmarks as bchmk
|
||||
|
||||
#bchmk.teste(taiex,['192.168.0.109', '192.168.0.101'])
|
||||
|
||||
bchmk.point_sliding_window(taiex,2000,train=0.8, #models=[yu.WeightedFTS], # #
|
||||
partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
|
||||
partitions= np.arange(10,200,step=5), #transformation=diff,
|
||||
dump=True, save=True, file="experiments/taiex_point_distributed.csv",
|
||||
nodes=['192.168.0.102', '192.168.0.109']) #, depends=[hofts, ifts])
|
||||
#bchmk.point_sliding_window(gauss,2000,train=0.8, #models=[yu.WeightedFTS], # #
|
||||
# partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
|
||||
# partitions= np.arange(3,10,step=1), #transformation=diff,
|
||||
# dump=True, save=True, file="experiments/gauss_point_distributed.csv",
|
||||
# nodes=['192.168.0.102', '192.168.0.109']) #, depends=[hofts, ifts])
|
||||
|
||||
#bchmk.testa(taiex,[10,20],partitioners=[Grid.GridPartitioner], nodes=['192.168.0.109', '192.168.0.101'])
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user