- Issue #3 - Code documentation with PEP 257 compliance

- Benchmarks refactoring and optimizations
 - Probabilistic package, with Kernel Density Estimation
This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-05-08 13:12:08 -03:00
parent ea7a0bbd62
commit ff67356d64
8 changed files with 428 additions and 192 deletions

View File

@ -2,12 +2,68 @@
Benchmark utility functions
"""
import matplotlib as plt
import matplotlib.cm as cmx
import matplotlib.colors as pltcolors
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from checkbox_support.parsers.tests.test_modinfo import testMultipleModinfoParser
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import pandas as pd
from copy import deepcopy
from pyFTS.common import Util
def extract_measure(dataframe,measure,data_columns):
if not dataframe.empty:
tmp = dataframe[(dataframe.Measure == measure)][data_columns].to_dict(orient="records")[0]
ret = [k for k in tmp.values()]
return ret
else:
return None
def find_best(dataframe, criteria, ascending):
models = dataframe.Model.unique()
orders = dataframe.Order.unique()
ret = {}
for m in models:
for o in orders:
mod = {}
df = dataframe[(dataframe.Model == m) & (dataframe.Order == o)].sort_values(by=criteria, ascending=ascending)
if not df.empty:
_key = str(m) + str(o)
best = df.loc[df.index[0]]
mod['Model'] = m
mod['Order'] = o
mod['Scheme'] = best["Scheme"]
mod['Partitions'] = best["Partitions"]
ret[_key] = mod
return ret
def point_dataframe_sintetic_columns():
return ["Model", "Order", "Scheme", "Partitions", "Size", "RMSEAVG", "RMSESTD", "SMAPEAVG", "SMAPESTD", "UAVG",
"USTD", "TIMEAVG", "TIMESTD"]
def point_dataframe_analytic_columns(experiments):
columns = [str(k) for k in np.arange(0, experiments)]
columns.insert(0, "Model")
columns.insert(1, "Order")
columns.insert(2, "Scheme")
columns.insert(3, "Partitions")
columns.insert(4, "Size")
columns.insert(5, "Measure")
return columns
def save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u):
"""
Create a dataframe to store the benchmark results
@ -32,9 +88,14 @@ def save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, t
mfts = objs[k]
mod.append(mfts.shortname)
mod.append(mfts.order)
mod.append(mfts.partitioner.name)
mod.append(mfts.partitioner.partitions)
mod.append(len(mfts))
if not mfts.benchmark_only:
mod.append(mfts.partitioner.name)
mod.append(mfts.partitioner.partitions)
mod.append(len(mfts))
else:
mod.append('-')
mod.append('-')
mod.append('-')
mod.append(np.round(np.nanmean(rmse[k]), 2))
mod.append(np.round(np.nanstd(rmse[k]), 2))
mod.append(np.round(np.nanmean(smape[k]), 2))
@ -42,38 +103,315 @@ def save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, t
mod.append(np.round(np.nanmean(u[k]), 2))
mod.append(np.round(np.nanstd(u[k]), 2))
mod.append(np.round(np.nanmean(times[k]), 4))
mod.append(np.round(np.nanstd(times[k]), 4))
ret.append(mod)
except Exception as ex:
print("Erro ao salvar ", k)
print("Exceção ", ex)
columns = ["Model", "Order", "Scheme","Partitions", "Size", "RMSEAVG", "RMSESTD", "SMAPEAVG", "SMAPESTD", "UAVG", "USTD", "TIMEAVG"]
columns = point_dataframe_sintetic_columns()
else:
for k in sorted(objs.keys()):
try:
mfts = objs[k]
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'RMSE']
n = mfts.shortname
o = mfts.order
if not mfts.benchmark_only:
s = mfts.partitioner.name
p = mfts.partitioner.partitions
l = len(mfts)
else:
s = '-'
p = '-'
l = '-'
tmp = [n, o, s, p, l, 'RMSE']
tmp.extend(rmse[k])
ret.append(deepcopy(tmp))
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'SMAPE']
tmp = [n, o, s, p, l, 'SMAPE']
tmp.extend(smape[k])
ret.append(deepcopy(tmp))
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'U']
tmp = [n, o, s, p, l, 'U']
tmp.extend(u[k])
ret.append(deepcopy(tmp))
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'TIME']
tmp = [n, o, s, p, l, 'TIME']
tmp.extend(times[k])
ret.append(deepcopy(tmp))
except Exception as ex:
print("Erro ao salvar ", k)
print("Exceção ", ex)
columns = [str(k) for k in np.arange(0, experiments)]
columns.insert(0, "Model")
columns.insert(1, "Order")
columns.insert(2, "Scheme")
columns.insert(3, "Partitions")
columns.insert(4, "Size")
columns.insert(5, "Measure")
columns = point_dataframe_analytic_columns(experiments)
dat = pd.DataFrame(ret, columns=columns)
if save: dat.to_csv(Util.uniquefilename(file), sep=";", index=False)
return dat
def cast_dataframe_to_sintetic_point(infile, outfile, experiments):
columns = point_dataframe_analytic_columns(experiments)
dat = pd.read_csv(infile, sep=";", usecols=columns)
models = dat.Model.unique()
orders = dat.Order.unique()
schemes = dat.Scheme.unique()
partitions = dat.Partitions.unique()
data_columns = analytical_data_columns(experiments)
ret = []
for m in models:
for o in orders:
for s in schemes:
for p in partitions:
mod = []
df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)]
if not df.empty:
rmse = extract_measure(df, 'RMSE', data_columns)
smape = extract_measure(df, 'SMAPE', data_columns)
u = extract_measure(df, 'U', data_columns)
times = extract_measure(df, 'TIME', data_columns)
mod.append(m)
mod.append(o)
mod.append(s)
mod.append(p)
mod.append(extract_measure(df, 'RMSE', ['Size'])[0])
mod.append(np.round(np.nanmean(rmse), 2))
mod.append(np.round(np.nanstd(rmse), 2))
mod.append(np.round(np.nanmean(smape), 2))
mod.append(np.round(np.nanstd(smape), 2))
mod.append(np.round(np.nanmean(u), 2))
mod.append(np.round(np.nanstd(u), 2))
mod.append(np.round(np.nanmean(times), 4))
mod.append(np.round(np.nanstd(times), 4))
ret.append(mod)
dat = pd.DataFrame(ret, columns=point_dataframe_sintetic_columns())
dat.to_csv(Util.uniquefilename(outfile), sep=";", index=False)
def analytical_data_columns(experiments):
data_columns = [str(k) for k in np.arange(0, experiments)]
return data_columns
def plot_dataframe_point(file_synthetic, file_analytic, experiments):
fig, axes = plt.subplots(nrows=4, ncols=1, figsize=[6, 8])
axes[0].set_title('RMSE')
axes[1].set_title('SMAPE')
axes[2].set_title('U Statistic')
axes[3].set_title('Execution Time')
dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=point_dataframe_sintetic_columns())
bests = find_best(dat_syn, ['UAVG','RMSEAVG','USTD','RMSESTD'], [1,1,1,1])
dat_ana = pd.read_csv(file_analytic, sep=";", usecols=point_dataframe_analytic_columns(experiments))
data_columns = analytical_data_columns(experiments)
rmse = []
smape = []
u = []
times = []
labels = []
for b in bests.keys():
best = bests[b]
tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
& (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
rmse.append( extract_measure(tmp,'RMSE',data_columns) )
smape.append(extract_measure(tmp, 'SMAPE', data_columns))
u.append(extract_measure(tmp, 'U', data_columns))
times.append(extract_measure(tmp, 'TIME', data_columns))
labels.append(best["Model"] + " " + str(best["Order"]))
axes[0].boxplot(rmse, labels=labels, showmeans=True)
axes[1].boxplot(smape, labels=labels, showmeans=True)
axes[2].boxplot(u, labels=labels, showmeans=True)
axes[3].boxplot(times, labels=labels, showmeans=True)
plt.show()
def save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times):
ret = []
if sintetic:
for k in sorted(objs.keys()):
mod = []
mfts = objs[k]
mod.append(mfts.shortname)
mod.append(mfts.order)
if not mfts.benchmark_only:
mod.append(mfts.partitioner.name)
mod.append(mfts.partitioner.partitions)
l = len(mfts)
else:
mod.append('-')
mod.append('-')
l = '-'
mod.append(round(np.nanmean(sharpness[k]), 2))
mod.append(round(np.nanstd(sharpness[k]), 2))
mod.append(round(np.nanmean(resolution[k]), 2))
mod.append(round(np.nanstd(resolution[k]), 2))
mod.append(round(np.nanmean(coverage[k]), 2))
mod.append(round(np.nanstd(coverage[k]), 2))
mod.append(round(np.nanmean(times[k]), 2))
mod.append(round(np.nanstd(times[k]), 2))
mod.append(l)
ret.append(mod)
columns = interval_dataframe_sintetic_columns()
else:
for k in sorted(objs.keys()):
try:
mfts = objs[k]
n = mfts.shortname
o = mfts.order
if not mfts.benchmark_only:
s = mfts.partitioner.name
p = mfts.partitioner.partitions
l = len(mfts)
else:
s = '-'
p = '-'
l = '-'
tmp = [n, o, s, p, l, 'Sharpness']
tmp.extend(sharpness[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, 'Resolution']
tmp.extend(resolution[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, 'Coverage']
tmp.extend(coverage[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, 'TIME']
tmp.extend(times[k])
ret.append(deepcopy(tmp))
except Exception as ex:
print("Erro ao salvar ", k)
print("Exceção ", ex)
columns = interval_dataframe_analytic_columns(experiments)
dat = pd.DataFrame(ret, columns=columns)
if save: dat.to_csv(Util.uniquefilename(file), sep=";")
return dat
def interval_dataframe_analytic_columns(experiments):
columns = [str(k) for k in np.arange(0, experiments)]
columns.insert(0, "Model")
columns.insert(1, "Order")
columns.insert(2, "Scheme")
columns.insert(3, "Partitions")
columns.insert(4, "Size")
columns.insert(5, "Measure")
return columns
def interval_dataframe_sintetic_columns():
columns = ["Model", "Order", "Scheme", "Partitions", "SHARPAVG", "SHARPSTD", "RESAVG", "RESSTD", "COVAVG",
"COVSTD", "TIMEAVG", "TIMESTD", "SIZE"]
return columns
def save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic):
"""
Save benchmark results for m-step ahead probabilistic forecasters
:param experiments:
:param file:
:param objs:
:param crps_interval:
:param crps_distr:
:param times1:
:param times2:
:param save:
:param sintetic:
:return:
"""
ret = []
if sintetic:
for k in sorted(objs.keys()):
try:
ret = []
for k in sorted(objs.keys()):
try:
mod = []
mfts = objs[k]
mod.append(mfts.shortname)
mod.append(mfts.order)
if not mfts.benchmark_only:
mod.append(mfts.partitioner.name)
mod.append(mfts.partitioner.partitions)
l = len(mfts)
else:
mod.append('-')
mod.append('-')
l = '-'
mod.append(np.round(np.nanmean(crps_interval[k]), 2))
mod.append(np.round(np.nanstd(crps_interval[k]), 2))
mod.append(np.round(np.nanmean(crps_distr[k]), 2))
mod.append(np.round(np.nanstd(crps_distr[k]), 2))
mod.append(l)
mod.append(np.round(np.nanmean(times1[k]), 4))
mod.append(np.round(np.nanmean(times2[k]), 4))
ret.append(mod)
except Exception as e:
print('Erro: %s' % e)
except Exception as ex:
print("Erro ao salvar ", k)
print("Exceção ", ex)
columns = ahead_dataframe_sintetic_columns()
else:
for k in sorted(objs.keys()):
try:
mfts = objs[k]
n = mfts.shortname
o = mfts.order
if not mfts.benchmark_only:
s = mfts.partitioner.name
p = mfts.partitioner.partitions
l = len(mfts)
else:
s = '-'
p = '-'
l = '-'
tmp = [n, o, s, p, l, 'CRPS_Interval']
tmp.extend(crps_interval[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, 'CRPS_Distribution']
tmp.extend(crps_distr[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, 'TIME_Interval']
tmp.extend(times1[k])
ret.append(deepcopy(tmp))
tmp = [n, o, s, p, l, 'TIME_Distribution']
tmp.extend(times2[k])
ret.append(deepcopy(tmp))
except Exception as ex:
print("Erro ao salvar ", k)
print("Exceção ", ex)
columns = ahead_dataframe_analytic_columns(experiments)
dat = pd.DataFrame(ret, columns=columns)
if save: dat.to_csv(Util.uniquefilename(file), sep=";")
return dat
def ahead_dataframe_analytic_columns(experiments):
columns = [str(k) for k in np.arange(0, experiments)]
columns.insert(0, "Model")
columns.insert(1, "Order")
columns.insert(2, "Scheme")
columns.insert(3, "Partitions")
columns.insert(4, "Size")
columns.insert(5, "Measure")
return columns
def ahead_dataframe_sintetic_columns():
columns = ["Model", "Order", "Scheme", "Partitions", "CRPS1AVG", "CRPS1STD", "CRPS2AVG", "CRPS2STD",
"SIZE", "TIME1AVG", "TIME2AVG"]
return columns

View File

@ -4,21 +4,24 @@
"""Benchmarks to FTS methods"""
import datetime
import time
from copy import deepcopy
import matplotlib as plt
import matplotlib.cm as cmx
import matplotlib.colors as pltcolors
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time
import datetime
import matplotlib as plt
import matplotlib.colors as pltcolors
import matplotlib.cm as cmx
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from probabilistic import ProbabilityDistribution
from pyFTS import song, chen, yu, ismailefendi, sadaei, hofts, pwfts, ifts, cheng, ensemble, hwang
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, Util, quantreg
from pyFTS.common import Transformations, Util
# from sklearn.cross_validation import KFold
from pyFTS.partitioners import partitioner, Grid, Huarng, Entropy, FCM
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, ProbabilityDistribution, Util, quantreg
from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util
from pyFTS import fts, song, chen, yu, ismailefendi, sadaei, hofts, hwang, pwfts, ifts, cheng, ensemble, hwang
from copy import deepcopy
from pyFTS.partitioners import Grid
colors = ['grey', 'rosybrown', 'maroon', 'red','orange', 'yellow', 'olive', 'green',
'cyan', 'blue', 'darkblue', 'purple', 'darkviolet']
@ -369,62 +372,6 @@ def getProbabilityDistributionStatistics(pmfs, data):
ret += " \\\\ \n"
return ret
def save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times):
ret = []
if sintetic:
for k in sorted(objs.keys()):
mod = []
mfts = objs[k]
mod.append(mfts.shortname)
mod.append(mfts.order)
mod.append(mfts.partitioner.name)
mod.append(mfts.partitioner.partitions)
mod.append(round(np.nanmean(sharpness[k]), 2))
mod.append(round(np.nanstd(sharpness[k]), 2))
mod.append(round(np.nanmean(resolution[k]), 2))
mod.append(round(np.nanstd(resolution[k]), 2))
mod.append(round(np.nanmean(coverage[k]), 2))
mod.append(round(np.nanstd(coverage[k]), 2))
mod.append(round(np.nanmean(times[k]), 2))
mod.append(round(np.nanstd(times[k]), 2))
mod.append(len(mfts))
ret.append(mod)
columns = ["Model", "Order", "Scheme", "Partitions", "SHARPAVG", "SHARPSTD", "RESAVG", "RESSTD", "COVAVG",
"COVSTD", "TIMEAVG", "TIMESTD", "SIZE"]
else:
for k in sorted(objs.keys()):
try:
mfts = objs[k]
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts),
'Sharpness']
tmp.extend(sharpness[k])
ret.append(deepcopy(tmp))
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts),
'Resolution']
tmp.extend(resolution[k])
ret.append(deepcopy(tmp))
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts),
'Coverage']
tmp.extend(coverage[k])
ret.append(deepcopy(tmp))
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts),
'TIME']
tmp.extend(times[k])
ret.append(deepcopy(tmp))
except Exception as ex:
print("Erro ao salvar ", k)
print("Exceção ", ex)
columns = [str(k) for k in np.arange(0, experiments)]
columns.insert(0, "Model")
columns.insert(1, "Order")
columns.insert(2, "Scheme")
columns.insert(3, "Partitions")
columns.insert(4, "Size")
columns.insert(5, "Measure")
dat = pd.DataFrame(ret, columns=columns)
if save: dat.to_csv(Util.uniquefilename(file), sep=";")
return dat
def interval_sliding_window(data, windowsize, train=0.8,models=None,partitioners=[Grid.GridPartitioner],
@ -518,7 +465,7 @@ def interval_sliding_window(data, windowsize, train=0.8,models=None,partitioners
coverage[_key].append(_cov)
times[_key].append(_tdiff)
return save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times)
return Util.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times)
def all_interval_forecasters(data_train, data_test, partitions, max_order=3,save=False, file=None, tam=[20, 5],
@ -637,80 +584,6 @@ def plot_probability_distributions(pmfs, lcolors, tam=[15, 7]):
ax.legend(handles0, labels0)
def save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic):
"""
Save benchmark results for m-step ahead probabilistic forecasters
:param experiments:
:param file:
:param objs:
:param crps_interval:
:param crps_distr:
:param times1:
:param times2:
:param save:
:param sintetic:
:return:
"""
ret = []
if sintetic:
for k in sorted(objs.keys()):
try:
ret = []
for k in sorted(objs.keys()):
try:
mod = []
mfts = objs[k]
mod.append(mfts.shortname)
mod.append(mfts.order)
mod.append(mfts.partitioner.name)
mod.append(mfts.partitioner.partitions)
mod.append(np.round(np.nanmean(crps_interval[k]), 2))
mod.append(np.round(np.nanstd(crps_interval[k]), 2))
mod.append(np.round(np.nanmean(crps_distr[k]), 2))
mod.append(np.round(np.nanstd(crps_distr[k]), 2))
mod.append(len(mfts))
mod.append(np.round(np.nanmean(times1[k]), 4))
mod.append(np.round(np.nanmean(times2[k]), 4))
ret.append(mod)
except Exception as e:
print('Erro: %s' % e)
except Exception as ex:
print("Erro ao salvar ", k)
print("Exceção ", ex)
columns = ["Model", "Order", "Scheme", "Partitions", "CRPS1AVG", "CRPS1STD", "CRPS2AVG", "CRPS2STD",
"SIZE", "TIME1AVG", "TIME2AVG"]
else:
for k in sorted(objs.keys()):
try:
mfts = objs[k]
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'CRPS_Interval']
tmp.extend(crps_interval[k])
ret.append(deepcopy(tmp))
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'CRPS_Distribution']
tmp.extend(crps_distr[k])
ret.append(deepcopy(tmp))
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'TIME_Interval']
tmp.extend(times1[k])
ret.append(deepcopy(tmp))
tmp = [mfts.shortname, mfts.order, mfts.partitioner.name, mfts.partitioner.partitions, len(mfts), 'TIME_Distribution']
tmp.extend(times2[k])
ret.append(deepcopy(tmp))
except Exception as ex:
print("Erro ao salvar ", k)
print("Exceção ", ex)
columns = [str(k) for k in np.arange(0, experiments)]
columns.insert(0, "Model")
columns.insert(1, "Order")
columns.insert(2, "Scheme")
columns.insert(3, "Partitions")
columns.insert(4, "Size")
columns.insert(5, "Measure")
dat = pd.DataFrame(ret, columns=columns)
if save: dat.to_csv(Util.uniquefilename(file), sep=";")
return dat
def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution = None, partitioners=[Grid.GridPartitioner],
@ -806,7 +679,7 @@ def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution
if dump: print(_crps1, _crps2, _tdiff, _t1, _t2)
return save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic)
return Util.save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic)
def all_ahead_forecasters(data_train, data_test, partitions, start, steps, resolution = None, max_order=3,save=False, file=None, tam=[20, 5],
@ -979,6 +852,7 @@ def plotCompared(original, forecasts, labels, title):
ax.set_xlim([0, len(original)])
ax.set_ylim([min(original), max(original)])
def SelecaoSimples_MenorRMSE(original, parameters, modelo):
ret = []
errors = []

View File

@ -6,20 +6,16 @@ To enable a dispy cluster node:
python3 /usr/local/bin/dispynode.py -i [local IP] -d
"""
import random
import datetime
import time
import dispy
import dispy.httpd
from copy import deepcopy
import numpy as np
import pandas as pd
import time
import datetime
import pyFTS
from pyFTS.partitioners import partitioner, Grid, Huarng, Entropy, FCM
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, ProbabilityDistribution
from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util
from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pwfts, ifts
from pyFTS.benchmarks import benchmarks, parallel_benchmarks, Util as bUtil
from pyFTS.benchmarks import benchmarks, Util as bUtil
from pyFTS.common import Util
from pyFTS.partitioners import Grid
def run_point(mfts, partitioner, train_data, test_data, window_key=None, transformation=None, indexer=None):

View File

@ -2,19 +2,17 @@
joblib Parallelized Benchmarks to FTS methods
"""
from copy import deepcopy
from joblib import Parallel, delayed
import datetime
import multiprocessing
import time
from copy import deepcopy
import numpy as np
import pandas as pd
import time
import datetime
from pyFTS.partitioners import partitioner, Grid, Huarng, Entropy, FCM
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, ProbabilityDistribution
from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util
from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pwfts, ifts
from pyFTS.benchmarks import benchmarks
from joblib import Parallel, delayed
from pyFTS.benchmarks import benchmarks, Util
from pyFTS.common import Util
from pyFTS.partitioners import Grid
def run_point(mfts, partitioner, train_data, test_data, transformation=None, indexer=None):
@ -140,7 +138,7 @@ def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=
print("Process Duration: {0}".format(_process_end - _process_start))
return benchmarks.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u)
return Util.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u)
def run_interval(mfts, partitioner, train_data, test_data, transformation=None, indexer=None):
@ -267,7 +265,7 @@ def interval_sliding_window(data, windowsize, train=0.8, models=None, partitione
print("Process Duration: {0}".format(_process_end - _process_start))
return benchmarks.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times)
return Util.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, times)
def run_ahead(mfts, partitioner, train_data, test_data, steps, resolution, transformation=None, indexer=None):
@ -397,4 +395,4 @@ def ahead_sliding_window(data, windowsize, train, steps,resolution, models=None,
print("Process Duration: {0}".format(_process_end - _process_start))
return benchmarks.save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic)
return Util.save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic)

View File

@ -98,7 +98,6 @@ class ProbabilityDistribution(object):
axis.set_xlabel('Universe of Discourse')
axis.set_ylabel('Probability')
def __str__(self):
head = '|'
body = '|'

View File

25
probabilistic/kde.py Normal file
View File

@ -0,0 +1,25 @@
"""
Kernel Density Estimation
"""
class KernelSmoothing(object):
"""Kernel Density Estimation"""
def __init__(self,h, data, method="epanechnikov"):
self.h = h
self.data = data
self.method = method
def kernel(self, u):
if self.method == "epanechnikov":
return (3/4) * (1 - u**2)
elif self.method == "uniform":
return 0.5
elif self.method == "uniform":
return 0.5
def probability(self, x):
l = len(self.data)
p = sum([self.kernel((x - k)/self.h) for k in self.data]) / l*self.h
return p

View File

@ -35,12 +35,15 @@ os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/")
#print(FCM.FCMPartitionerTrimf.__module__)
#gauss = random.normal(0,1.0,1000)
#gauss = random.normal(0,1.0,5000)
#gauss_teste = random.normal(0,1.0,400)
taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",")
taiex = np.array(taiexpd["avg"][:5000])
#taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",")
#taiex = np.array(taiexpd["avg"][:5000])
#nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",")
#nasdaq = np.array(nasdaqpd["avg"][0:5000])
#from statsmodels.tsa.arima_model import ARIMA as stats_arima
from statsmodels.tsa.tsatools import lagmat
@ -54,9 +57,12 @@ from statsmodels.tsa.tsatools import lagmat
from pyFTS.benchmarks import distributed_benchmarks as bchmk
#from pyFTS.benchmarks import parallel_benchmarks as bchmk
#from pyFTS.benchmarks import benchmarks as bchmk
from pyFTS.benchmarks import Util
#from pyFTS.benchmarks import arima
#Util.cast_dataframe_to_sintetic_point("experiments/taiex_point_analitic.csv","experiments/taiex_point_sintetic.csv",11)
Util.plot_dataframe_point("experiments/taiex_point_sintetic.csv","experiments/taiex_point_analitic.csv",11)
#tmp = arima.ARIMA("")
#tmp.train(taiex[:1600],None,parameters=(2,0,1))
@ -66,11 +72,11 @@ from pyFTS.benchmarks import distributed_benchmarks as bchmk
#bchmk.teste(taiex,['192.168.0.109', '192.168.0.101'])
bchmk.point_sliding_window(taiex,2000,train=0.8, #models=[yu.WeightedFTS], # #
partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
partitions= np.arange(10,200,step=5), #transformation=diff,
dump=True, save=True, file="experiments/taiex_point_distributed.csv",
nodes=['192.168.0.102', '192.168.0.109']) #, depends=[hofts, ifts])
#bchmk.point_sliding_window(gauss,2000,train=0.8, #models=[yu.WeightedFTS], # #
# partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
# partitions= np.arange(3,10,step=1), #transformation=diff,
# dump=True, save=True, file="experiments/gauss_point_distributed.csv",
# nodes=['192.168.0.102', '192.168.0.109']) #, depends=[hofts, ifts])
#bchmk.testa(taiex,[10,20],partitioners=[Grid.GridPartitioner], nodes=['192.168.0.109', '192.168.0.101'])