- Issue #3 - Code documentation with PEP 257 compliance

This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-05-06 21:19:04 -03:00
parent fb9c3585be
commit cbfbf47f54
5 changed files with 313 additions and 38 deletions

View File

@ -4,6 +4,7 @@
pyFTS module for common benchmark metrics pyFTS module for common benchmark metrics
""" """
import time
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from pyFTS.common import FuzzySet,SortedCollection from pyFTS.common import FuzzySet,SortedCollection
@ -240,3 +241,29 @@ def get_interval_statistics(original, model):
ret.append(round(resolution(forecasts), 2)) ret.append(round(resolution(forecasts), 2))
ret.append(round(coverage(original[model.order:], forecasts[:-1]), 2)) ret.append(round(coverage(original[model.order:], forecasts[:-1]), 2))
return ret return ret
def get_distribution_statistics(original, model, steps, resolution):
ret = list()
try:
_s1 = time.time()
densities1 = model.forecastAheadDistribution(original, steps, parameters=3)
_e1 = time.time()
ret.append(round(crps(original, densities1), 3))
ret.append(round(_e1 - _s1, 3))
except Exception as e:
print('Erro: ', e)
ret.append(np.nan)
ret.append(np.nan)
try:
_s2 = time.time()
densities2 = model.forecastAheadDistribution(original, steps, parameters=2)
_e2 = time.time()
ret.append( round(crps(original, densities2), 3))
ret.append(round(_e2 - _s2, 3))
except:
ret.append(np.nan)
ret.append(np.nan)
return ret

View File

@ -17,7 +17,7 @@ from mpl_toolkits.mplot3d import Axes3D
from pyFTS.partitioners import partitioner, Grid, Huarng, Entropy, FCM from pyFTS.partitioners import partitioner, Grid, Huarng, Entropy, FCM
from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, ProbabilityDistribution, Util, quantreg from pyFTS.benchmarks import Measures, naive, arima, ResidualAnalysis, ProbabilityDistribution, Util, quantreg
from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util from pyFTS.common import Membership, FuzzySet, FLR, Transformations, Util
from pyFTS import fts, chen, yu, ismailefendi, sadaei, hofts, hwang, pwfts, ifts, cheng from pyFTS import fts, song, chen, yu, ismailefendi, sadaei, hofts, hwang, pwfts, ifts, cheng, ensemble
from copy import deepcopy from copy import deepcopy
colors = ['grey', 'rosybrown', 'maroon', 'red','orange', 'yellow', 'olive', 'green', colors = ['grey', 'rosybrown', 'maroon', 'red','orange', 'yellow', 'olive', 'green',
@ -29,24 +29,34 @@ styles = ['-','--','-.',':','.']
nsty = len(styles) nsty = len(styles)
def get_benchmark_point_methods(): def get_benchmark_point_methods():
"""Return all non FTS methods for point forecast""" """Return all non FTS methods for point forecasting"""
return [naive.Naive, arima.ARIMA, quantreg.QuantileRegression] return [naive.Naive, arima.ARIMA, quantreg.QuantileRegression]
def get_point_methods(): def get_point_methods():
"""Return all FTS methods for point forecast""" """Return all FTS methods for point forecasting"""
return [chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS, cheng.TrendWeightedFTS, return [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, ismailefendi.ImprovedWeightedFTS,
sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, pwfts.ProbabilisticWeightedFTS] cheng.TrendWeightedFTS, sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS,
pwfts.ProbabilisticWeightedFTS]
def get_benchmark_interval_methods(): def get_benchmark_interval_methods():
"""Return all non FTS methods for interval forecast""" """Return all non FTS methods for interval forecasting"""
return [quantreg.QuantileRegression] return [quantreg.QuantileRegression]
def get_interval_methods(): def get_interval_methods():
"""Return all FTS methods for interval forecast""" """Return all FTS methods for interval forecasting"""
return [ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS] return [ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS]
def get_probabilistic_methods():
"""Return all FTS methods for probabilistic forecasting"""
return [quantreg.QuantileRegression, ensemble.EnsembleFTS, pwfts.ProbabilisticWeightedFTS]
def external_point_sliding_window(models, parameters, data, windowsize,train=0.8, dump=False, def external_point_sliding_window(models, parameters, data, windowsize,train=0.8, dump=False,
save=False, file=None, sintetic=True): save=False, file=None, sintetic=True):
""" """
@ -628,6 +638,19 @@ def plot_probability_distributions(pmfs, lcolors, tam=[15, 7]):
def save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic): def save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic):
"""
Save benchmark results for m-step ahead probabilistic forecasters
:param experiments:
:param file:
:param objs:
:param crps_interval:
:param crps_distr:
:param times1:
:param times2:
:param save:
:param sintetic:
:return:
"""
ret = [] ret = []
if sintetic: if sintetic:
@ -738,7 +761,7 @@ def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution
_tdiff = _end - _start _tdiff = _end - _start
_crps1, _crps2, _t1, _t2 = get_distribution_statistics(test,mfts,steps=steps,resolution=resolution) _crps1, _crps2, _t1, _t2 = Measures.get_distribution_statistics(test,mfts,steps=steps,resolution=resolution)
crps_interval[_key].append(_crps1) crps_interval[_key].append(_crps1)
crps_distr[_key].append(_crps2) crps_distr[_key].append(_crps2)
@ -773,7 +796,7 @@ def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution
_tdiff = _end - _start _tdiff = _end - _start
_crps1, _crps2, _t1, _t2 = get_distribution_statistics(test, mfts, steps=steps, _crps1, _crps2, _t1, _t2 = Measures.get_distribution_statistics(test, mfts, steps=steps,
resolution=resolution) resolution=resolution)
crps_interval[_key].append(_crps1) crps_interval[_key].append(_crps1)
@ -826,36 +849,13 @@ def all_ahead_forecasters(data_train, data_test, partitions, start, steps, resol
interpol=False, save=save, file=file, tam=tam, resolution=resolution, option=option) interpol=False, save=save, file=file, tam=tam, resolution=resolution, option=option)
def get_distribution_statistics(original, model, steps, resolution):
ret = list()
try:
_s1 = time.time()
densities1 = model.forecastAheadDistribution(original, steps, parameters=3)
_e1 = time.time()
ret.append(round(Measures.crps(original, densities1), 3))
ret.append(round(_e1 - _s1, 3))
except Exception as e:
print('Erro: ', e)
ret.append(np.nan)
ret.append(np.nan)
try:
_s2 = time.time()
densities2 = model.forecastAheadDistribution(original, steps, parameters=2)
_e2 = time.time()
ret.append( round(Measures.crps(original, densities2), 3))
ret.append(round(_e2 - _s2, 3))
except:
ret.append(np.nan)
ret.append(np.nan)
return ret
def print_distribution_statistics(original, models, steps, resolution): def print_distribution_statistics(original, models, steps, resolution):
ret = "Model & Order & Interval & Distribution \\\\ \n" ret = "Model & Order & Interval & Distribution \\\\ \n"
for fts in models: for fts in models:
_crps1, _crps2, _t1, _t2 = get_distribution_statistics(original, fts, steps, resolution) _crps1, _crps2, _t1, _t2 = Measures.get_distribution_statistics(original, fts, steps, resolution)
ret += fts.shortname + " & " ret += fts.shortname + " & "
ret += str(fts.order) + " & " ret += str(fts.order) + " & "
ret += str(_crps1) + " & " ret += str(_crps1) + " & "

View File

@ -172,7 +172,7 @@ def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=
return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u) return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u)
def run_interval(mfts, partitioner, train_data, test_data, transformation=None, indexer=None): def run_interval(mfts, partitioner, train_data, test_data, window_key=None, transformation=None, indexer=None):
""" """
Interval forecast benchmark function to be executed on cluster nodes Interval forecast benchmark function to be executed on cluster nodes
:param mfts: FTS model :param mfts: FTS model
@ -211,7 +211,8 @@ def run_interval(mfts, partitioner, train_data, test_data, transformation=None,
_end = time.time() _end = time.time()
times += _end - _start times += _end - _start
ret = {'key': _key, 'obj': mfts, 'sharpness': _sharp, 'resolution': _res, 'coverage': _cov, 'time': times} ret = {'key': _key, 'obj': mfts, 'sharpness': _sharp, 'resolution': _res, 'coverage': _cov, 'time': times,
'window': window_key}
return ret return ret
@ -321,3 +322,163 @@ def interval_sliding_window(data, windowsize, train=0.8, models=None, partitione
return benchmarks.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, return benchmarks.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic,
times) times)
def run_ahead(mfts, partitioner, train_data, test_data, steps, resolution, window_key=None, transformation=None, indexer=None):
"""
Probabilistic m-step ahead forecast benchmark function to be executed on cluster nodes
:param mfts: FTS model
:param partitioner: Universe of Discourse partitioner
:param train_data: data used to train the model
:param test_data: ata used to test the model
:param steps:
:param resolution:
:param window_key: id of the sliding window
:param transformation: data transformation
:param indexer: seasonal indexer
:return: a dictionary with the benchmark results
"""
import time
from pyFTS import hofts, ifts, pwfts
from pyFTS.partitioners import Grid, Entropy, FCM
from pyFTS.benchmarks import Measures, arima, quantreg
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS, arima.ARIMA, quantreg.QuantileRegression]
tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
tmp3 = [Measures.get_distribution_statistics]
pttr = str(partitioner.__module__).split('.')[-1]
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
mfts.partitioner = partitioner
if transformation is not None:
mfts.appendTransformation(transformation)
try:
_start = time.time()
mfts.train(train_data, partitioner.sets, order=mfts.order)
_end = time.time()
times = _end - _start
_crps1, _crps2, _t1, _t2 = Measures.get_distribution_statistics(test_data, mfts, steps=steps,
resolution=resolution)
_t1 += times
_t2 += times
except Exception as e:
print(e)
_crps1 = np.nan
_crps2 = np.nan
_t1 = np.nan
_t2 = np.nan
ret = {'key': _key, 'obj': mfts, 'CRPS_Interval': _crps1, 'CRPS_Distribution': _crps2, 'TIME_Interval': _t1,
'TIME_Distribution': _t2, 'window': window_key}
return ret
def ahead_sliding_window(data, windowsize, train, steps,resolution, models=None, partitioners=[Grid.GridPartitioner],
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
save=False, file=None, sintetic=False,nodes=None, depends=None):
"""
Distributed sliding window benchmarks for FTS probabilistic forecasters
:param data:
:param windowsize: size of sliding window
:param train: percentual of sliding window data used to train the models
:param steps:
:param resolution:
:param models: FTS point forecasters
:param partitioners: Universe of Discourse partitioner
:param partitions: the max number of partitions on the Universe of Discourse
:param max_order: the max order of the models (for high order models)
:param transformation: data transformation
:param indexer: seasonal indexer
:param dump:
:param save: save results
:param file: file path to save the results
:param sintetic: if true only the average and standard deviation of the results
:param nodes: list of cluster nodes to distribute tasks
:param depends: list of module dependencies
:return: DataFrame with the results
"""
cluster = dispy.JobCluster(run_point, nodes=nodes) # , depends=dependencies)
http_server = dispy.httpd.DispyHTTPServer(cluster)
_process_start = time.time()
print("Process Start: {0: %H:%M:%S}".format(datetime.datetime.now()))
pool = []
jobs = []
objs = {}
crps_interval = {}
crps_distr = {}
times1 = {}
times2 = {}
if models is None:
models = benchmarks.get_probabilistic_methods()
for model in models:
mfts = model("")
if mfts.is_high_order:
for order in np.arange(1, max_order + 1):
if order >= mfts.min_order:
mfts = model("")
mfts.order = order
pool.append(mfts)
else:
pool.append(mfts)
experiments = 0
for ct, train, test in Util.sliding_window(data, windowsize, train):
experiments += 1
if dump: print('\nWindow: {0}\n'.format(ct))
for partition in partitions:
for partitioner in partitioners:
data_train_fs = partitioner(train, partition, transformation=transformation)
for id, m in enumerate(pool,start=0):
job = cluster.submit(m, data_train_fs, train, test, ct, transformation)
job.id = id # associate an ID to identify jobs (if needed later)
jobs.append(job)
for job in jobs:
tmp = job()
if job.status == dispy.DispyJob.Finished and tmp is not None:
if tmp['key'] not in objs:
objs[tmp['key']] = tmp['obj']
crps_interval[tmp['key']] = []
crps_distr[tmp['key']] = []
times1[tmp['key']] = []
times2[tmp['key']] = []
crps_interval[tmp['key']].append(tmp['CRPS_Interval'])
crps_distr[tmp['key']].append(tmp['CRPS_Distribution'])
times1[tmp['key']].append(tmp['TIME_Interval'])
times2[tmp['key']].append(tmp['TIME_Distribution'])
else:
print(job.exception)
print(job.stdout)
_process_end = time.time()
print("Process End: {0: %H:%M:%S}".format(datetime.datetime.now()))
print("Process Duration: {0}".format(_process_end - _process_start))
cluster.wait() # wait for all jobs to finish
cluster.print_status()
http_server.shutdown() # this waits until browser gets all updates
cluster.close()
return benchmarks.save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, sintetic)

View File

@ -18,6 +18,17 @@ from pyFTS.benchmarks import benchmarks
def run_point(mfts, partitioner, train_data, test_data, transformation=None, indexer=None): def run_point(mfts, partitioner, train_data, test_data, transformation=None, indexer=None):
"""
Point forecast benchmark function to be executed on threads
:param mfts: FTS model
:param partitioner: Universe of Discourse partitioner
:param train_data: data used to train the model
:param test_data: ata used to test the model
:param window_key: id of the sliding window
:param transformation: data transformation
:param indexer: seasonal indexer
:return: a dictionary with the benchmark results
"""
pttr = str(partitioner.__module__).split('.')[-1] pttr = str(partitioner.__module__).split('.')[-1]
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
mfts.partitioner = partitioner mfts.partitioner = partitioner
@ -51,6 +62,23 @@ def run_point(mfts, partitioner, train_data, test_data, transformation=None, ind
def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=[Grid.GridPartitioner], def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=[Grid.GridPartitioner],
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False, partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
save=False, file=None, sintetic=False): save=False, file=None, sintetic=False):
"""
Parallel sliding window benchmarks for FTS point forecasters
:param data:
:param windowsize: size of sliding window
:param train: percentual of sliding window data used to train the models
:param models: FTS point forecasters
:param partitioners: Universe of Discourse partitioner
:param partitions: the max number of partitions on the Universe of Discourse
:param max_order: the max order of the models (for high order models)
:param transformation: data transformation
:param indexer: seasonal indexer
:param dump:
:param save: save results
:param file: file path to save the results
:param sintetic: if true only the average and standard deviation of the results
:return: DataFrame with the results
"""
_process_start = time.time() _process_start = time.time()
print("Process Start: {0: %H:%M:%S}".format(datetime.datetime.now())) print("Process Start: {0: %H:%M:%S}".format(datetime.datetime.now()))
@ -116,6 +144,17 @@ def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=
def run_interval(mfts, partitioner, train_data, test_data, transformation=None, indexer=None): def run_interval(mfts, partitioner, train_data, test_data, transformation=None, indexer=None):
"""
Interval forecast benchmark function to be executed on threads
:param mfts: FTS model
:param partitioner: Universe of Discourse partitioner
:param train_data: data used to train the model
:param test_data: ata used to test the model
:param window_key: id of the sliding window
:param transformation: data transformation
:param indexer: seasonal indexer
:return: a dictionary with the benchmark results
"""
pttr = str(partitioner.__module__).split('.')[-1] pttr = str(partitioner.__module__).split('.')[-1]
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
mfts.partitioner = partitioner mfts.partitioner = partitioner
@ -149,6 +188,23 @@ def run_interval(mfts, partitioner, train_data, test_data, transformation=None,
def interval_sliding_window(data, windowsize, train=0.8, models=None, partitioners=[Grid.GridPartitioner], def interval_sliding_window(data, windowsize, train=0.8, models=None, partitioners=[Grid.GridPartitioner],
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False, partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
save=False, file=None, sintetic=False): save=False, file=None, sintetic=False):
"""
Parallel sliding window benchmarks for FTS interval forecasters
:param data:
:param windowsize: size of sliding window
:param train: percentual of sliding window data used to train the models
:param models: FTS point forecasters
:param partitioners: Universe of Discourse partitioner
:param partitions: the max number of partitions on the Universe of Discourse
:param max_order: the max order of the models (for high order models)
:param transformation: data transformation
:param indexer: seasonal indexer
:param dump:
:param save: save results
:param file: file path to save the results
:param sintetic: if true only the average and standard deviation of the results
:return: DataFrame with the results
"""
_process_start = time.time() _process_start = time.time()
print("Process Start: {0: %H:%M:%S}".format(datetime.datetime.now())) print("Process Start: {0: %H:%M:%S}".format(datetime.datetime.now()))
@ -215,6 +271,18 @@ def interval_sliding_window(data, windowsize, train=0.8, models=None, partitione
def run_ahead(mfts, partitioner, train_data, test_data, steps, resolution, transformation=None, indexer=None): def run_ahead(mfts, partitioner, train_data, test_data, steps, resolution, transformation=None, indexer=None):
"""
Probabilistic m-step ahead forecast benchmark function to be executed on threads
:param mfts: FTS model
:param partitioner: Universe of Discourse partitioner
:param train_data: data used to train the model
:param test_data: ata used to test the model
:param steps:
:param resolution:
:param transformation: data transformation
:param indexer: seasonal indexer
:return: a dictionary with the benchmark results
"""
pttr = str(partitioner.__module__).split('.')[-1] pttr = str(partitioner.__module__).split('.')[-1]
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
mfts.partitioner = partitioner mfts.partitioner = partitioner
@ -248,6 +316,25 @@ def run_ahead(mfts, partitioner, train_data, test_data, steps, resolution, trans
def ahead_sliding_window(data, windowsize, train, steps,resolution, models=None, partitioners=[Grid.GridPartitioner], def ahead_sliding_window(data, windowsize, train, steps,resolution, models=None, partitioners=[Grid.GridPartitioner],
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False, partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
save=False, file=None, sintetic=False): save=False, file=None, sintetic=False):
"""
Parallel sliding window benchmarks for FTS probabilistic forecasters
:param data:
:param windowsize: size of sliding window
:param train: percentual of sliding window data used to train the models
:param steps:
:param resolution:
:param models: FTS point forecasters
:param partitioners: Universe of Discourse partitioner
:param partitions: the max number of partitions on the Universe of Discourse
:param max_order: the max order of the models (for high order models)
:param transformation: data transformation
:param indexer: seasonal indexer
:param dump:
:param save: save results
:param file: file path to save the results
:param sintetic: if true only the average and standard deviation of the results
:return: DataFrame with the results
"""
_process_start = time.time() _process_start = time.time()
print("Process Start: {0: %H:%M:%S}".format(datetime.datetime.now())) print("Process Start: {0: %H:%M:%S}".format(datetime.datetime.now()))

View File

@ -5,8 +5,8 @@ from pyFTS import fts
class ConventionalFTS(fts.FTS): class ConventionalFTS(fts.FTS):
"""Conventional Fuzzy Time Series""" """Conventional Fuzzy Time Series"""
def __init__(self, name, **kwargs): def __init__(self, name, **kwargs):
super(ConventionalFTS, self).__init__(1, "CFTS " + name) super(ConventionalFTS, self).__init__(1, "FTS " + name)
self.name = "Conventional FTS" self.name = "Traditional FTS"
self.detail = "Song & Chissom" self.detail = "Song & Chissom"
self.R = None self.R = None