- Distributed sliding window benchmarks with dispy
This commit is contained in:
parent
97246f1510
commit
3ec95232bd
@ -177,3 +177,12 @@ def get_point_statistics(data, model, indexer=None):
|
|||||||
ret.append(np.nan)
|
ret.append(np.nan)
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
|
def get_interval_statistics(original, model):
|
||||||
|
ret = list()
|
||||||
|
forecasts = model.forecastInterval(original)
|
||||||
|
ret.append(round(sharpness(forecasts), 2))
|
||||||
|
ret.append(round(resolution(forecasts), 2))
|
||||||
|
ret.append(round(coverage(original[model.order:], forecasts[:-1]), 2))
|
||||||
|
return ret
|
||||||
|
@ -408,7 +408,7 @@ def interval_sliding_window(data, windowsize, train=0.8,models=None,partitioners
|
|||||||
_tdiff = _end - _start
|
_tdiff = _end - _start
|
||||||
|
|
||||||
_start = time.time()
|
_start = time.time()
|
||||||
_sharp, _res, _cov = get_interval_statistics(test, mfts)
|
_sharp, _res, _cov = Measures.get_interval_statistics(test, mfts)
|
||||||
_end = time.time()
|
_end = time.time()
|
||||||
_tdiff += _end - _start
|
_tdiff += _end - _start
|
||||||
sharpness[_key].append(_sharp)
|
sharpness[_key].append(_sharp)
|
||||||
@ -443,7 +443,7 @@ def interval_sliding_window(data, windowsize, train=0.8,models=None,partitioners
|
|||||||
_tdiff = _end - _start
|
_tdiff = _end - _start
|
||||||
|
|
||||||
_start = time.time()
|
_start = time.time()
|
||||||
_sharp, _res, _cov = get_interval_statistics(test, mfts)
|
_sharp, _res, _cov = Measures.get_interval_statistics(test, mfts)
|
||||||
_end = time.time()
|
_end = time.time()
|
||||||
_tdiff += _end - _start
|
_tdiff += _end - _start
|
||||||
sharpness[_key].append(_sharp)
|
sharpness[_key].append(_sharp)
|
||||||
@ -488,19 +488,10 @@ def all_interval_forecasters(data_train, data_test, partitions, max_order=3,save
|
|||||||
plot_compared_series(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam, intervals=True)
|
plot_compared_series(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam, intervals=True)
|
||||||
|
|
||||||
|
|
||||||
def get_interval_statistics(original, model):
|
|
||||||
ret = list()
|
|
||||||
forecasts = model.forecastInterval(original)
|
|
||||||
ret.append(round(Measures.sharpness(forecasts), 2))
|
|
||||||
ret.append(round(Measures.resolution(forecasts), 2))
|
|
||||||
ret.append(round(Measures.coverage(original[model.order:], forecasts[:-1]), 2))
|
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
def print_interval_statistics(original, models):
|
def print_interval_statistics(original, models):
|
||||||
ret = "Model & Order & Sharpness & Resolution & Coverage \\\\ \n"
|
ret = "Model & Order & Sharpness & Resolution & Coverage \\\\ \n"
|
||||||
for fts in models:
|
for fts in models:
|
||||||
_sharp, _res, _cov = get_interval_statistics(original, fts)
|
_sharp, _res, _cov = Measures.get_interval_statistics(original, fts)
|
||||||
ret += fts.shortname + " & "
|
ret += fts.shortname + " & "
|
||||||
ret += str(fts.order) + " & "
|
ret += str(fts.order) + " & "
|
||||||
ret += str(_sharp) + " & "
|
ret += str(_sharp) + " & "
|
||||||
|
@ -33,7 +33,6 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, transfo
|
|||||||
if transformation is not None:
|
if transformation is not None:
|
||||||
mfts.appendTransformation(transformation)
|
mfts.appendTransformation(transformation)
|
||||||
|
|
||||||
# try:
|
|
||||||
_start = time.time()
|
_start = time.time()
|
||||||
mfts.train(train_data, partitioner.sets, order=mfts.order)
|
mfts.train(train_data, partitioner.sets, order=mfts.order)
|
||||||
_end = time.time()
|
_end = time.time()
|
||||||
@ -43,17 +42,9 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, transfo
|
|||||||
_rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts, indexer)
|
_rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts, indexer)
|
||||||
_end = time.time()
|
_end = time.time()
|
||||||
times += _end - _start
|
times += _end - _start
|
||||||
# except Exception as e:
|
|
||||||
# print(e)
|
|
||||||
# _rmse = np.nan
|
|
||||||
# _smape = np.nan
|
|
||||||
# _u = np.nan
|
|
||||||
# times = np.nan
|
|
||||||
|
|
||||||
ret = {'key': _key, 'obj': mfts, 'rmse': _rmse, 'smape': _smape, 'u': _u, 'time': times, 'window': window_key}
|
ret = {'key': _key, 'obj': mfts, 'rmse': _rmse, 'smape': _smape, 'u': _u, 'time': times, 'window': window_key}
|
||||||
|
|
||||||
# print(ret)
|
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
@ -61,22 +52,8 @@ def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=
|
|||||||
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
|
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
|
||||||
save=False, file=None, sintetic=False,nodes=None, depends=None):
|
save=False, file=None, sintetic=False,nodes=None, depends=None):
|
||||||
|
|
||||||
# dependencies = [fts, Membership, benchmarks]
|
|
||||||
# if depends is not None: dependencies.extend(depends)
|
|
||||||
|
|
||||||
# if models is not None:
|
|
||||||
# dependencies.extend(models)
|
|
||||||
# else:
|
|
||||||
# dependencies.extend(benchmarks.get_point_methods())
|
|
||||||
|
|
||||||
# dependencies.extend(partitioners)
|
|
||||||
# if transformation is not None: dependencies.extend(transformation)
|
|
||||||
# if indexer is not None: dependencies.extend(indexer)
|
|
||||||
|
|
||||||
cluster = dispy.JobCluster(run_point, nodes=nodes) #, depends=dependencies)
|
cluster = dispy.JobCluster(run_point, nodes=nodes) #, depends=dependencies)
|
||||||
|
|
||||||
# import dispy's httpd module, create http server for this cluster
|
|
||||||
|
|
||||||
http_server = dispy.httpd.DispyHTTPServer(cluster)
|
http_server = dispy.httpd.DispyHTTPServer(cluster)
|
||||||
|
|
||||||
_process_start = time.time()
|
_process_start = time.time()
|
||||||
@ -157,23 +134,122 @@ def point_sliding_window(data, windowsize, train=0.8, models=None, partitioners=
|
|||||||
return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u)
|
return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u)
|
||||||
|
|
||||||
|
|
||||||
def compute(data):
|
def run_interval(mfts, partitioner, train_data, test_data, transformation=None, indexer=None):
|
||||||
import socket
|
import time
|
||||||
return (socket.gethostname(), data)
|
from pyFTS import hofts,ifts,pwfts
|
||||||
|
from pyFTS.partitioners import Grid, Entropy, FCM
|
||||||
|
from pyFTS.benchmarks import Measures
|
||||||
|
|
||||||
|
tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS]
|
||||||
|
|
||||||
|
tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
|
||||||
|
|
||||||
|
tmp3 = [Measures.get_interval_statistics]
|
||||||
|
|
||||||
|
pttr = str(partitioner.__module__).split('.')[-1]
|
||||||
|
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
|
||||||
|
mfts.partitioner = partitioner
|
||||||
|
if transformation is not None:
|
||||||
|
mfts.appendTransformation(transformation)
|
||||||
|
|
||||||
|
_start = time.time()
|
||||||
|
mfts.train(train_data, partitioner.sets, order=mfts.order)
|
||||||
|
_end = time.time()
|
||||||
|
times = _end - _start
|
||||||
|
|
||||||
|
_start = time.time()
|
||||||
|
_sharp, _res, _cov = Measures.get_interval_statistics(test_data, mfts)
|
||||||
|
_end = time.time()
|
||||||
|
times += _end - _start
|
||||||
|
|
||||||
|
ret = {'key': _key, 'obj': mfts, 'sharpness': _sharp, 'resolution': _res, 'coverage': _cov, 'time': times}
|
||||||
|
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def teste(data,nodes):
|
def interval_sliding_window(data, windowsize, train=0.8, models=None, partitioners=[Grid.GridPartitioner],
|
||||||
cluster = dispy.JobCluster(compute, nodes=nodes)
|
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
|
||||||
|
save=False, file=None, sintetic=False,nodes=None, depends=None):
|
||||||
|
|
||||||
|
cluster = dispy.JobCluster(run_point, nodes=nodes) #, depends=dependencies)
|
||||||
|
|
||||||
|
http_server = dispy.httpd.DispyHTTPServer(cluster)
|
||||||
|
|
||||||
|
_process_start = time.time()
|
||||||
|
|
||||||
|
print("Process Start: {0: %H:%M:%S}".format(datetime.datetime.now()))
|
||||||
|
|
||||||
|
pool = []
|
||||||
jobs = []
|
jobs = []
|
||||||
for ct, train, test in Util.sliding_window(data, 2000, 0.8):
|
objs = {}
|
||||||
job = cluster.submit(ct)
|
sharpness = {}
|
||||||
jobs.append(job)
|
resolution = {}
|
||||||
|
coverage = {}
|
||||||
|
times = {}
|
||||||
|
|
||||||
|
if models is None:
|
||||||
|
models = benchmarks.get_interval_methods()
|
||||||
|
|
||||||
|
for model in models:
|
||||||
|
mfts = model("")
|
||||||
|
|
||||||
|
if mfts.isHighOrder:
|
||||||
|
for order in np.arange(1, max_order + 1):
|
||||||
|
if order >= mfts.minOrder:
|
||||||
|
mfts = model("")
|
||||||
|
mfts.order = order
|
||||||
|
pool.append(mfts)
|
||||||
|
else:
|
||||||
|
pool.append(mfts)
|
||||||
|
|
||||||
|
experiments = 0
|
||||||
|
for ct, train, test in Util.sliding_window(data, windowsize, train):
|
||||||
|
experiments += 1
|
||||||
|
|
||||||
|
if dump: print('\nWindow: {0}\n'.format(ct))
|
||||||
|
|
||||||
|
for partition in partitions:
|
||||||
|
|
||||||
|
for partitioner in partitioners:
|
||||||
|
|
||||||
|
data_train_fs = partitioner(train, partition, transformation=transformation)
|
||||||
|
|
||||||
|
for id, m in enumerate(pool,start=0):
|
||||||
|
job = cluster.submit(m, data_train_fs, train, test, ct, transformation)
|
||||||
|
job.id = id # associate an ID to identify jobs (if needed later)
|
||||||
|
jobs.append(job)
|
||||||
|
|
||||||
for job in jobs:
|
for job in jobs:
|
||||||
x = job()
|
tmp = job()
|
||||||
print(x)
|
if job.status == dispy.DispyJob.Finished and tmp is not None:
|
||||||
|
if tmp['key'] not in objs:
|
||||||
|
objs[tmp['key']] = tmp['obj']
|
||||||
|
sharpness[tmp['key']] = []
|
||||||
|
resolution[tmp['key']] = []
|
||||||
|
coverage[tmp['key']] = []
|
||||||
|
times[tmp['key']] = []
|
||||||
|
|
||||||
cluster.wait()
|
sharpness[tmp['key']].append(tmp['sharpness'])
|
||||||
|
resolution[tmp['key']].append(tmp['resolution'])
|
||||||
|
coverage[tmp['key']].append(tmp['coverage'])
|
||||||
|
times[tmp['key']].append(tmp['time'])
|
||||||
|
print(tmp['key'])
|
||||||
|
else:
|
||||||
|
print(job.exception)
|
||||||
|
print(job.stdout)
|
||||||
|
|
||||||
|
_process_end = time.time()
|
||||||
|
|
||||||
|
print("Process End: {0: %H:%M:%S}".format(datetime.datetime.now()))
|
||||||
|
|
||||||
|
print("Process Duration: {0}".format(_process_end - _process_start))
|
||||||
|
|
||||||
|
cluster.wait() # wait for all jobs to finish
|
||||||
|
|
||||||
|
cluster.print_status()
|
||||||
|
|
||||||
|
http_server.shutdown() # this waits until browser gets all updates
|
||||||
cluster.close()
|
cluster.close()
|
||||||
|
|
||||||
|
return benchmarks.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic,
|
||||||
|
times)
|
Loading…
Reference in New Issue
Block a user