From ff67356d646c4458ee951510e593bef4b33c0800 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido=20de=20Lima=20e=20Silva?= Date: Mon, 8 May 2017 13:12:08 -0300 Subject: [PATCH] - Issue #3 - Code documentation with PEP 257 compliance - Benchmarks refactoring and optimizations - Probabilistic package, with Kernel Density Estimation --- benchmarks/Util.py | 368 +++++++++++++++++- benchmarks/benchmarks.py | 160 +------- benchmarks/distributed_benchmarks.py | 18 +- benchmarks/parallel_benchmarks.py | 24 +- .../ProbabilityDistribution.py | 1 - probabilistic/__init__.py | 0 probabilistic/kde.py | 25 ++ tests/general.py | 24 +- 8 files changed, 428 insertions(+), 192 deletions(-) rename {benchmarks => probabilistic}/ProbabilityDistribution.py (99%) create mode 100644 probabilistic/__init__.py create mode 100644 probabilistic/kde.py diff --git a/benchmarks/Util.py b/benchmarks/Util.py index 5742ae1..9bead11 100644 --- a/benchmarks/Util.py +++ b/benchmarks/Util.py @@ -2,12 +2,68 @@ Benchmark utility functions """ +import matplotlib as plt +import matplotlib.cm as cmx +import matplotlib.colors as pltcolors +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from checkbox_support.parsers.tests.test_modinfo import testMultipleModinfoParser +from mpl_toolkits.mplot3d import Axes3D + + import numpy as np import pandas as pd from copy import deepcopy from pyFTS.common import Util +def extract_measure(dataframe,measure,data_columns): + if not dataframe.empty: + tmp = dataframe[(dataframe.Measure == measure)][data_columns].to_dict(orient="records")[0] + ret = [k for k in tmp.values()] + return ret + else: + return None + + +def find_best(dataframe, criteria, ascending): + models = dataframe.Model.unique() + orders = dataframe.Order.unique() + ret = {} + for m in models: + for o in orders: + mod = {} + df = dataframe[(dataframe.Model == m) & (dataframe.Order == o)].sort_values(by=criteria, ascending=ascending) + if not df.empty: + _key = str(m) + str(o) + best = df.loc[df.index[0]] + mod['Model'] = m + mod['Order'] = o + mod['Scheme'] = best["Scheme"] + mod['Partitions'] = best["Partitions"] + + ret[_key] = mod + + return ret + + +def point_dataframe_sintetic_columns(): + return ["Model", "Order", "Scheme", "Partitions", "Size", "RMSEAVG", "RMSESTD", "SMAPEAVG", "SMAPESTD", "UAVG", + "USTD", "TIMEAVG", "TIMESTD"] + + +def point_dataframe_analytic_columns(experiments): + columns = [str(k) for k in np.arange(0, experiments)] + columns.insert(0, "Model") + columns.insert(1, "Order") + columns.insert(2, "Scheme") + columns.insert(3, "Partitions") + columns.insert(4, "Size") + columns.insert(5, "Measure") + return columns + + def save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u): """ Create a dataframe to store the benchmark results @@ -32,9 +88,14 @@ def save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, t mfts = objs[k] mod.append(mfts.shortname) mod.append(mfts.order) - mod.append(mfts.partitioner.name) - mod.append(mfts.partitioner.partitions) - mod.append(len(mfts)) + if not mfts.benchmark_only: + mod.append(mfts.partitioner.name) + mod.append(mfts.partitioner.partitions) + mod.append(len(mfts)) + else: + mod.append('-') + mod.append('-') + mod.append('-') mod.append(np.round(np.nanmean(rmse[k]), 2)) mod.append(np.round(np.nanstd(rmse[k]), 2)) mod.append(np.round(np.nanmean(smape[k]), 2)) @@ -42,38 +103,315 @@ def save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, t mod.append(np.round(np.nanmean(u[k]), 2)) mod.append(np.round(np.nanstd(u[k]), 2)) mod.append(np.round(np.nanmean(times[k]), 4)) + mod.append(np.round(np.nanstd(times[k]), 4)) ret.append(mod) except Exception as ex: print("Erro ao salvar ", k) print("Exceção ", ex) - columns = ["Model", "Order", "Scheme","Partitions", "Size", "RMSEAVG", "RMSESTD", "SMAPEAVG", "SMAPESTD", "UAVG", "USTD", "TIMEAVG"] + columns = point_dataframe_sintetic_columns() else: for k in sorted(objs.keys()): try: mfts = objs[k] - tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'RMSE'] + n = mfts.shortname + o = mfts.order + if not mfts.benchmark_only: + s = mfts.partitioner.name + p = mfts.partitioner.partitions + l = len(mfts) + else: + s = '-' + p = '-' + l = '-' + + tmp = [n, o, s, p, l, 'RMSE'] tmp.extend(rmse[k]) ret.append(deepcopy(tmp)) - tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'SMAPE'] + tmp = [n, o, s, p, l, 'SMAPE'] tmp.extend(smape[k]) ret.append(deepcopy(tmp)) - tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'U'] + tmp = [n, o, s, p, l, 'U'] tmp.extend(u[k]) ret.append(deepcopy(tmp)) - tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'TIME'] + tmp = [n, o, s, p, l, 'TIME'] tmp.extend(times[k]) ret.append(deepcopy(tmp)) except Exception as ex: print("Erro ao salvar ", k) print("Exceção ", ex) - columns = [str(k) for k in np.arange(0, experiments)] - columns.insert(0, "Model") - columns.insert(1, "Order") - columns.insert(2, "Scheme") - columns.insert(3, "Partitions") - columns.insert(4, "Size") - columns.insert(5, "Measure") + columns = point_dataframe_analytic_columns(experiments) + dat = pd.DataFrame(ret, columns=columns) + if save: dat.to_csv(Util.uniquefilename(file), sep=";", index=False) + return dat + + +def cast_dataframe_to_sintetic_point(infile, outfile, experiments): + columns = point_dataframe_analytic_columns(experiments) + dat = pd.read_csv(infile, sep=";", usecols=columns) + models = dat.Model.unique() + orders = dat.Order.unique() + schemes = dat.Scheme.unique() + partitions = dat.Partitions.unique() + + data_columns = analytical_data_columns(experiments) + + ret = [] + + for m in models: + for o in orders: + for s in schemes: + for p in partitions: + mod = [] + df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)] + if not df.empty: + rmse = extract_measure(df, 'RMSE', data_columns) + smape = extract_measure(df, 'SMAPE', data_columns) + u = extract_measure(df, 'U', data_columns) + times = extract_measure(df, 'TIME', data_columns) + mod.append(m) + mod.append(o) + mod.append(s) + mod.append(p) + mod.append(extract_measure(df, 'RMSE', ['Size'])[0]) + mod.append(np.round(np.nanmean(rmse), 2)) + mod.append(np.round(np.nanstd(rmse), 2)) + mod.append(np.round(np.nanmean(smape), 2)) + mod.append(np.round(np.nanstd(smape), 2)) + mod.append(np.round(np.nanmean(u), 2)) + mod.append(np.round(np.nanstd(u), 2)) + mod.append(np.round(np.nanmean(times), 4)) + mod.append(np.round(np.nanstd(times), 4)) + ret.append(mod) + + dat = pd.DataFrame(ret, columns=point_dataframe_sintetic_columns()) + dat.to_csv(Util.uniquefilename(outfile), sep=";", index=False) + + +def analytical_data_columns(experiments): + data_columns = [str(k) for k in np.arange(0, experiments)] + return data_columns + + +def plot_dataframe_point(file_synthetic, file_analytic, experiments): + + fig, axes = plt.subplots(nrows=4, ncols=1, figsize=[6, 8]) + + axes[0].set_title('RMSE') + axes[1].set_title('SMAPE') + axes[2].set_title('U Statistic') + axes[3].set_title('Execution Time') + + dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=point_dataframe_sintetic_columns()) + + bests = find_best(dat_syn, ['UAVG','RMSEAVG','USTD','RMSESTD'], [1,1,1,1]) + + dat_ana = pd.read_csv(file_analytic, sep=";", usecols=point_dataframe_analytic_columns(experiments)) + + data_columns = analytical_data_columns(experiments) + + rmse = [] + smape = [] + u = [] + times = [] + labels = [] + + for b in bests.keys(): + best = bests[b] + tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"]) + & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])] + rmse.append( extract_measure(tmp,'RMSE',data_columns) ) + smape.append(extract_measure(tmp, 'SMAPE', data_columns)) + u.append(extract_measure(tmp, 'U', data_columns)) + times.append(extract_measure(tmp, 'TIME', data_columns)) + labels.append(best["Model"] + " " + str(best["Order"])) + + axes[0].boxplot(rmse, labels=labels, showmeans=True) + axes[1].boxplot(smape, labels=labels, showmeans=True) + axes[2].boxplot(u, labels=labels, showmeans=True) + axes[3].boxplot(times, labels=labels, showmeans=True) + + plt.show() + + + +def save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times): + ret = [] + if sintetic: + for k in sorted(objs.keys()): + mod = [] + mfts = objs[k] + mod.append(mfts.shortname) + mod.append(mfts.order) + if not mfts.benchmark_only: + mod.append(mfts.partitioner.name) + mod.append(mfts.partitioner.partitions) + l = len(mfts) + else: + mod.append('-') + mod.append('-') + l = '-' + mod.append(round(np.nanmean(sharpness[k]), 2)) + mod.append(round(np.nanstd(sharpness[k]), 2)) + mod.append(round(np.nanmean(resolution[k]), 2)) + mod.append(round(np.nanstd(resolution[k]), 2)) + mod.append(round(np.nanmean(coverage[k]), 2)) + mod.append(round(np.nanstd(coverage[k]), 2)) + mod.append(round(np.nanmean(times[k]), 2)) + mod.append(round(np.nanstd(times[k]), 2)) + mod.append(l) + ret.append(mod) + + columns = interval_dataframe_sintetic_columns() + else: + for k in sorted(objs.keys()): + try: + mfts = objs[k] + n = mfts.shortname + o = mfts.order + if not mfts.benchmark_only: + s = mfts.partitioner.name + p = mfts.partitioner.partitions + l = len(mfts) + else: + s = '-' + p = '-' + l = '-' + + tmp = [n, o, s, p, l, 'Sharpness'] + tmp.extend(sharpness[k]) + ret.append(deepcopy(tmp)) + tmp = [n, o, s, p, l, 'Resolution'] + tmp.extend(resolution[k]) + ret.append(deepcopy(tmp)) + tmp = [n, o, s, p, l, 'Coverage'] + tmp.extend(coverage[k]) + ret.append(deepcopy(tmp)) + tmp = [n, o, s, p, l, 'TIME'] + tmp.extend(times[k]) + ret.append(deepcopy(tmp)) + except Exception as ex: + print("Erro ao salvar ", k) + print("Exceção ", ex) + columns = interval_dataframe_analytic_columns(experiments) dat = pd.DataFrame(ret, columns=columns) if save: dat.to_csv(Util.uniquefilename(file), sep=";") return dat + +def interval_dataframe_analytic_columns(experiments): + columns = [str(k) for k in np.arange(0, experiments)] + columns.insert(0, "Model") + columns.insert(1, "Order") + columns.insert(2, "Scheme") + columns.insert(3, "Partitions") + columns.insert(4, "Size") + columns.insert(5, "Measure") + return columns + + +def interval_dataframe_sintetic_columns(): + columns = ["Model", "Order", "Scheme", "Partitions", "SHARPAVG", "SHARPSTD", "RESAVG", "RESSTD", "COVAVG", + "COVSTD", "TIMEAVG", "TIMESTD", "SIZE"] + return columns + + +def save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic): + """ + Save benchmark results for m-step ahead probabilistic forecasters + :param experiments: + :param file: + :param objs: + :param crps_interval: + :param crps_distr: + :param times1: + :param times2: + :param save: + :param sintetic: + :return: + """ + ret = [] + + if sintetic: + + for k in sorted(objs.keys()): + try: + ret = [] + for k in sorted(objs.keys()): + try: + mod = [] + mfts = objs[k] + mod.append(mfts.shortname) + mod.append(mfts.order) + if not mfts.benchmark_only: + mod.append(mfts.partitioner.name) + mod.append(mfts.partitioner.partitions) + l = len(mfts) + else: + mod.append('-') + mod.append('-') + l = '-' + mod.append(np.round(np.nanmean(crps_interval[k]), 2)) + mod.append(np.round(np.nanstd(crps_interval[k]), 2)) + mod.append(np.round(np.nanmean(crps_distr[k]), 2)) + mod.append(np.round(np.nanstd(crps_distr[k]), 2)) + mod.append(l) + mod.append(np.round(np.nanmean(times1[k]), 4)) + mod.append(np.round(np.nanmean(times2[k]), 4)) + ret.append(mod) + except Exception as e: + print('Erro: %s' % e) + except Exception as ex: + print("Erro ao salvar ", k) + print("Exceção ", ex) + + columns = ahead_dataframe_sintetic_columns() + else: + for k in sorted(objs.keys()): + try: + mfts = objs[k] + n = mfts.shortname + o = mfts.order + if not mfts.benchmark_only: + s = mfts.partitioner.name + p = mfts.partitioner.partitions + l = len(mfts) + else: + s = '-' + p = '-' + l = '-' + tmp = [n, o, s, p, l, 'CRPS_Interval'] + tmp.extend(crps_interval[k]) + ret.append(deepcopy(tmp)) + tmp = [n, o, s, p, l, 'CRPS_Distribution'] + tmp.extend(crps_distr[k]) + ret.append(deepcopy(tmp)) + tmp = [n, o, s, p, l, 'TIME_Interval'] + tmp.extend(times1[k]) + ret.append(deepcopy(tmp)) + tmp = [n, o, s, p, l, 'TIME_Distribution'] + tmp.extend(times2[k]) + ret.append(deepcopy(tmp)) + except Exception as ex: + print("Erro ao salvar ", k) + print("Exceção ", ex) + columns = ahead_dataframe_analytic_columns(experiments) + dat = pd.DataFrame(ret, columns=columns) + if save: dat.to_csv(Util.uniquefilename(file), sep=";") + return dat + + +def ahead_dataframe_analytic_columns(experiments): + columns = [str(k) for k in np.arange(0, experiments)] + columns.insert(0, "Model") + columns.insert(1, "Order") + columns.insert(2, "Scheme") + columns.insert(3, "Partitions") + columns.insert(4, "Size") + columns.insert(5, "Measure") + return columns + + +def ahead_dataframe_sintetic_columns(): + columns = ["Model", "Order", "Scheme", "Partitions", "CRPS1AVG", "CRPS1STD", "CRPS2AVG", "CRPS2STD", + "SIZE", "TIME1AVG", "TIME2AVG"] + return columns diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py index 0851771..4bd2d12 100644 --- a/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks.py @@ -4,21 +4,24 @@ """Benchmarks to FTS methods""" +import datetime +import time +from copy import deepcopy + +import matplotlib as plt +import matplotlib.cm as cmx +import matplotlib.colors as pltcolors +import matplotlib.pyplot as plt import numpy as np import pandas as pd -import time -import datetime -import matplotlib as plt -import matplotlib.colors as pltcolors -import matplotlib.cm as cmx -import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D + +from probabilistic import ProbabilityDistribution +from pyFTS import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, ensemble, hwang +from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, Util, quantreg +from pyFTS.common import Transformations, Util # from sklearn.cross_validation import KFold -from pyFTS.partitioners import partitioner, Grid, Huarng, Entropy, FCM -from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, ProbabilityDistribution, Util, quantreg -from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util -from pyFTS import fts, song, chen, yu, ismailefendi, sadaei, hofts, hwang, pwfts, ifts, cheng, ensemble, hwang -from copy import deepcopy +from pyFTS.partitioners import Grid colors = ['grey', 'rosybrown', 'maroon', 'red','orange', 'yellow', 'olive', 'green', 'cyan', 'blue', 'darkblue', 'purple', 'darkviolet'] @@ -369,62 +372,6 @@ def getProbabilityDistributionStatistics(pmfs, data): ret += " \\\\ \n" return ret -def save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times): - ret = [] - if sintetic: - for k in sorted(objs.keys()): - mod = [] - mfts = objs[k] - mod.append(mfts.shortname) - mod.append(mfts.order) - mod.append(mfts.partitioner.name) - mod.append(mfts.partitioner.partitions) - mod.append(round(np.nanmean(sharpness[k]), 2)) - mod.append(round(np.nanstd(sharpness[k]), 2)) - mod.append(round(np.nanmean(resolution[k]), 2)) - mod.append(round(np.nanstd(resolution[k]), 2)) - mod.append(round(np.nanmean(coverage[k]), 2)) - mod.append(round(np.nanstd(coverage[k]), 2)) - mod.append(round(np.nanmean(times[k]), 2)) - mod.append(round(np.nanstd(times[k]), 2)) - mod.append(len(mfts)) - ret.append(mod) - - columns = ["Model", "Order", "Scheme", "Partitions", "SHARPAVG", "SHARPSTD", "RESAVG", "RESSTD", "COVAVG", - "COVSTD", "TIMEAVG", "TIMESTD", "SIZE"] - else: - for k in sorted(objs.keys()): - try: - mfts = objs[k] - tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), - 'Sharpness'] - tmp.extend(sharpness[k]) - ret.append(deepcopy(tmp)) - tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), - 'Resolution'] - tmp.extend(resolution[k]) - ret.append(deepcopy(tmp)) - tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), - 'Coverage'] - tmp.extend(coverage[k]) - ret.append(deepcopy(tmp)) - tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), - 'TIME'] - tmp.extend(times[k]) - ret.append(deepcopy(tmp)) - except Exception as ex: - print("Erro ao salvar ", k) - print("Exceção ", ex) - columns = [str(k) for k in np.arange(0, experiments)] - columns.insert(0, "Model") - columns.insert(1, "Order") - columns.insert(2, "Scheme") - columns.insert(3, "Partitions") - columns.insert(4, "Size") - columns.insert(5, "Measure") - dat = pd.DataFrame(ret, columns=columns) - if save: dat.to_csv(Util.uniquefilename(file), sep=";") - return dat def interval_sliding_window(data, windowsize, train=0.8,models=None,partitioners=[Grid.GridPartitioner], @@ -518,7 +465,7 @@ def interval_sliding_window(data, windowsize, train=0.8,models=None,partitioners coverage[_key].append(_cov) times[_key].append(_tdiff) - return save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times) + return Util.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times) def all_interval_forecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5], @@ -637,80 +584,6 @@ def plot_probability_distributions(pmfs, lcolors, tam=[15, 7]): ax.legend(handles0, labels0) -def save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic): - """ - Save benchmark results for m-step ahead probabilistic forecasters - :param experiments: - :param file: - :param objs: - :param crps_interval: - :param crps_distr: - :param times1: - :param times2: - :param save: - :param sintetic: - :return: - """ - ret = [] - - if sintetic: - - for k in sorted(objs.keys()): - try: - ret = [] - for k in sorted(objs.keys()): - try: - mod = [] - mfts = objs[k] - mod.append(mfts.shortname) - mod.append(mfts.order) - mod.append(mfts.partitioner.name) - mod.append(mfts.partitioner.partitions) - mod.append(np.round(np.nanmean(crps_interval[k]), 2)) - mod.append(np.round(np.nanstd(crps_interval[k]), 2)) - mod.append(np.round(np.nanmean(crps_distr[k]), 2)) - mod.append(np.round(np.nanstd(crps_distr[k]), 2)) - mod.append(len(mfts)) - mod.append(np.round(np.nanmean(times1[k]), 4)) - mod.append(np.round(np.nanmean(times2[k]), 4)) - ret.append(mod) - except Exception as e: - print('Erro: %s' % e) - except Exception as ex: - print("Erro ao salvar ", k) - print("Exceção ", ex) - - columns = ["Model", "Order", "Scheme", "Partitions", "CRPS1AVG", "CRPS1STD", "CRPS2AVG", "CRPS2STD", - "SIZE", "TIME1AVG", "TIME2AVG"] - else: - for k in sorted(objs.keys()): - try: - mfts = objs[k] - tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'CRPS_Interval'] - tmp.extend(crps_interval[k]) - ret.append(deepcopy(tmp)) - tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'CRPS_Distribution'] - tmp.extend(crps_distr[k]) - ret.append(deepcopy(tmp)) - tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'TIME_Interval'] - tmp.extend(times1[k]) - ret.append(deepcopy(tmp)) - tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'TIME_Distribution'] - tmp.extend(times2[k]) - ret.append(deepcopy(tmp)) - except Exception as ex: - print("Erro ao salvar ", k) - print("Exceção ", ex) - columns = [str(k) for k in np.arange(0, experiments)] - columns.insert(0, "Model") - columns.insert(1, "Order") - columns.insert(2, "Scheme") - columns.insert(3, "Partitions") - columns.insert(4, "Size") - columns.insert(5, "Measure") - dat = pd.DataFrame(ret, columns=columns) - if save: dat.to_csv(Util.uniquefilename(file), sep=";") - return dat def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution = None, partitioners=[Grid.GridPartitioner], @@ -806,7 +679,7 @@ def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution if dump: print(_crps1, _crps2, _tdiff, _t1, _t2) - return save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic) + return Util.save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic) def all_ahead_forecasters(data_train, data_test, partitions, start, steps, resolution = None, max_order=3,save=False, file=None, tam=[20, 5], @@ -979,6 +852,7 @@ def plotCompared(original, forecasts, labels, title): ax.set_xlim([0, len(original)]) ax.set_ylim([min(original), max(original)]) + def SelecaoSimples_MenorRMSE(original, parameters, modelo): ret = [] errors = [] diff --git a/benchmarks/distributed_benchmarks.py b/benchmarks/distributed_benchmarks.py index 67755f4..e1f20f7 100644 --- a/benchmarks/distributed_benchmarks.py +++ b/benchmarks/distributed_benchmarks.py @@ -6,20 +6,16 @@ To enable a dispy cluster node: python3 /usr/local/bin/dispynode.py -i [local IP] -d """ -import random +import datetime +import time + import dispy import dispy.httpd -from copy import deepcopy import numpy as np -import pandas as pd -import time -import datetime -import pyFTS -from pyFTS.partitioners import partitioner, Grid, Huarng, Entropy, FCM -from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, ProbabilityDistribution -from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util -from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pwfts, ifts -from pyFTS.benchmarks import benchmarks, parallel_benchmarks, Util as bUtil + +from pyFTS.benchmarks import benchmarks, Util as bUtil +from pyFTS.common import Util +from pyFTS.partitioners import Grid def run_point(mfts, partitioner, train_data, test_data, window_key=None, transformation=None, indexer=None): diff --git a/benchmarks/parallel_benchmarks.py b/benchmarks/parallel_benchmarks.py index b2db134..4365a26 100644 --- a/benchmarks/parallel_benchmarks.py +++ b/benchmarks/parallel_benchmarks.py @@ -2,19 +2,17 @@ joblib Parallelized Benchmarks to FTS methods """ -from copy import deepcopy -from joblib import Parallel, delayed +import datetime import multiprocessing +import time +from copy import deepcopy import numpy as np -import pandas as pd -import time -import datetime -from pyFTS.partitioners import partitioner, Grid, Huarng, Entropy, FCM -from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, ProbabilityDistribution -from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util -from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pwfts, ifts -from pyFTS.benchmarks import benchmarks +from joblib import Parallel, delayed + +from pyFTS.benchmarks import benchmarks, Util +from pyFTS.common import Util +from pyFTS.partitioners import Grid def run_point(mfts, partitioner, train_data, test_data, transformation=None, indexer=None): @@ -140,7 +138,7 @@ def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners= print("Process Duration: {0}".format(_process_end - _process_start)) - return benchmarks.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u) + return Util.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u) def run_interval(mfts, partitioner, train_data, test_data, transformation=None, indexer=None): @@ -267,7 +265,7 @@ def interval_sliding_window(data, windowsize, train=0.8, models=None, partitione print("Process Duration: {0}".format(_process_end - _process_start)) - return benchmarks.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times) + return Util.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times) def run_ahead(mfts, partitioner, train_data, test_data, steps, resolution, transformation=None, indexer=None): @@ -397,4 +395,4 @@ def ahead_sliding_window(data, windowsize, train, steps,resolution, models=None, print("Process Duration: {0}".format(_process_end - _process_start)) - return benchmarks.save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic) + return Util.save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic) diff --git a/benchmarks/ProbabilityDistribution.py b/probabilistic/ProbabilityDistribution.py similarity index 99% rename from benchmarks/ProbabilityDistribution.py rename to probabilistic/ProbabilityDistribution.py index c42d73c..3d70dad 100644 --- a/benchmarks/ProbabilityDistribution.py +++ b/probabilistic/ProbabilityDistribution.py @@ -98,7 +98,6 @@ class ProbabilityDistribution(object): axis.set_xlabel('Universe of Discourse') axis.set_ylabel('Probability') - def __str__(self): head = '|' body = '|' diff --git a/probabilistic/__init__.py b/probabilistic/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/probabilistic/kde.py b/probabilistic/kde.py new file mode 100644 index 0000000..724dbbb --- /dev/null +++ b/probabilistic/kde.py @@ -0,0 +1,25 @@ +""" +Kernel Density Estimation +""" + + +class KernelSmoothing(object): + """Kernel Density Estimation""" + def __init__(self,h, data, method="epanechnikov"): + self.h = h + self.data = data + self.method = method + + def kernel(self, u): + if self.method == "epanechnikov": + return (3/4) * (1 - u**2) + elif self.method == "uniform": + return 0.5 + elif self.method == "uniform": + return 0.5 + + def probability(self, x): + l = len(self.data) + p = sum([self.kernel((x - k)/self.h) for k in self.data]) / l*self.h + + return p \ No newline at end of file diff --git a/tests/general.py b/tests/general.py index 30576f4..ec794ab 100644 --- a/tests/general.py +++ b/tests/general.py @@ -35,12 +35,15 @@ os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/") #print(FCM.FCMPartitionerTrimf.__module__) -#gauss = random.normal(0,1.0,1000) +#gauss = random.normal(0,1.0,5000) #gauss_teste = random.normal(0,1.0,400) -taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",") -taiex = np.array(taiexpd["avg"][:5000]) +#taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",") +#taiex = np.array(taiexpd["avg"][:5000]) + +#nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",") +#nasdaq = np.array(nasdaqpd["avg"][0:5000]) #from statsmodels.tsa.arima_model import ARIMA as stats_arima from statsmodels.tsa.tsatools import lagmat @@ -54,9 +57,12 @@ from statsmodels.tsa.tsatools import lagmat from pyFTS.benchmarks import distributed_benchmarks as bchmk #from pyFTS.benchmarks import parallel_benchmarks as bchmk -#from pyFTS.benchmarks import benchmarks as bchmk +from pyFTS.benchmarks import Util #from pyFTS.benchmarks import arima +#Util.cast_dataframe_to_sintetic_point("experiments/taiex_point_analitic.csv","experiments/taiex_point_sintetic.csv",11) + +Util.plot_dataframe_point("experiments/taiex_point_sintetic.csv","experiments/taiex_point_analitic.csv",11) #tmp = arima.ARIMA("") #tmp.train(taiex[:1600],None,parameters=(2,0,1)) @@ -66,11 +72,11 @@ from pyFTS.benchmarks import distributed_benchmarks as bchmk #bchmk.teste(taiex,['192.168.0.109', '192.168.0.101']) -bchmk.point_sliding_window(taiex,2000,train=0.8, #models=[yu.WeightedFTS], # # - partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ], - partitions= np.arange(10,200,step=5), #transformation=diff, - dump=True, save=True, file="experiments/taiex_point_distributed.csv", - nodes=['192.168.0.102', '192.168.0.109']) #, depends=[hofts, ifts]) +#bchmk.point_sliding_window(gauss,2000,train=0.8, #models=[yu.WeightedFTS], # # +# partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ], +# partitions= np.arange(3,10,step=1), #transformation=diff, +# dump=True, save=True, file="experiments/gauss_point_distributed.csv", +# nodes=['192.168.0.102', '192.168.0.109']) #, depends=[hofts, ifts]) #bchmk.testa(taiex,[10,20],partitioners=[Grid.GridPartitioner], nodes=['192.168.0.109', '192.168.0.101'])