SQLITE3 interface for benchmarks; Support for for ProbabilityDistribution on Differentiation
This commit is contained in:
parent
6adf4710b6
commit
f3c6eda2ec
@ -288,7 +288,7 @@ def get_point_statistics(data, model, **kwargs):
|
|||||||
ret = list()
|
ret = list()
|
||||||
|
|
||||||
if steps_ahead == 1:
|
if steps_ahead == 1:
|
||||||
forecasts = model.forecast(data, **kwargs)
|
forecasts = model.predict(data, **kwargs)
|
||||||
if model.has_seasonality:
|
if model.has_seasonality:
|
||||||
nforecasts = np.array(forecasts)
|
nforecasts = np.array(forecasts)
|
||||||
else:
|
else:
|
||||||
@ -304,7 +304,7 @@ def get_point_statistics(data, model, **kwargs):
|
|||||||
tmp = model.forecast_ahead(sample, steps_ahead, **kwargs)
|
tmp = model.forecast_ahead(sample, steps_ahead, **kwargs)
|
||||||
nforecasts.append(tmp[-1])
|
nforecasts.append(tmp[-1])
|
||||||
|
|
||||||
start = model.order + steps_ahead
|
start = model.order + steps_ahead -1
|
||||||
ret.append(np.round(rmse(ndata[start:-1:steps_ahead_sampler], nforecasts), 2))
|
ret.append(np.round(rmse(ndata[start:-1:steps_ahead_sampler], nforecasts), 2))
|
||||||
ret.append(np.round(smape(ndata[start:-1:steps_ahead_sampler], nforecasts), 2))
|
ret.append(np.round(smape(ndata[start:-1:steps_ahead_sampler], nforecasts), 2))
|
||||||
ret.append(np.round(UStatistic(ndata[start:-1:steps_ahead_sampler], nforecasts), 2))
|
ret.append(np.round(UStatistic(ndata[start:-1:steps_ahead_sampler], nforecasts), 2))
|
||||||
@ -327,7 +327,7 @@ def get_interval_statistics(data, model, **kwargs):
|
|||||||
ret = list()
|
ret = list()
|
||||||
|
|
||||||
if steps_ahead == 1:
|
if steps_ahead == 1:
|
||||||
forecasts = model.forecast_interval(data, **kwargs)
|
forecasts = model.predict(data, **kwargs)
|
||||||
ret.append(round(sharpness(forecasts), 2))
|
ret.append(round(sharpness(forecasts), 2))
|
||||||
ret.append(round(resolution(forecasts), 2))
|
ret.append(round(resolution(forecasts), 2))
|
||||||
ret.append(round(coverage(data[model.order:], forecasts[:-1]), 2))
|
ret.append(round(coverage(data[model.order:], forecasts[:-1]), 2))
|
||||||
@ -339,10 +339,10 @@ def get_interval_statistics(data, model, **kwargs):
|
|||||||
forecasts = []
|
forecasts = []
|
||||||
for k in np.arange(model.order, len(data) - steps_ahead):
|
for k in np.arange(model.order, len(data) - steps_ahead):
|
||||||
sample = data[k - model.order: k]
|
sample = data[k - model.order: k]
|
||||||
tmp = model.forecast_ahead_interval(sample, steps_ahead, **kwargs)
|
tmp = model.predict(sample, steps_ahead, **kwargs)
|
||||||
forecasts.append(tmp[-1])
|
forecasts.append(tmp[-1])
|
||||||
|
|
||||||
start = model.order + steps_ahead
|
start = model.order + steps_ahead -1
|
||||||
ret.append(round(sharpness(forecasts), 2))
|
ret.append(round(sharpness(forecasts), 2))
|
||||||
ret.append(round(resolution(forecasts), 2))
|
ret.append(round(resolution(forecasts), 2))
|
||||||
ret.append(round(coverage(data[model.order:], forecasts), 2))
|
ret.append(round(coverage(data[model.order:], forecasts), 2))
|
||||||
|
@ -8,6 +8,7 @@ import matplotlib.colors as pltcolors
|
|||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import sqlite3
|
||||||
#from mpl_toolkits.mplot3d import Axes3D
|
#from mpl_toolkits.mplot3d import Axes3D
|
||||||
|
|
||||||
|
|
||||||
@ -15,6 +16,57 @@ from copy import deepcopy
|
|||||||
from pyFTS.common import Util
|
from pyFTS.common import Util
|
||||||
|
|
||||||
|
|
||||||
|
def open_benchmark_db(name):
|
||||||
|
conn = sqlite3.connect(name)
|
||||||
|
create_benchmark_tables(conn)
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
def create_benchmark_tables(conn):
|
||||||
|
c = conn.cursor()
|
||||||
|
|
||||||
|
c.execute('''CREATE TABLE if not exists benchmarks(
|
||||||
|
ID integer primary key, Date int, Dataset text, Tag text,
|
||||||
|
Type text, Model text, Transformation text, 'Order' int,
|
||||||
|
Scheme text, Partitions int,
|
||||||
|
Size int, Steps int, Method text, Measure text, Value real)''')
|
||||||
|
|
||||||
|
# Save (commit) the changes
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def insert_benchmark(data, conn):
|
||||||
|
c = conn.cursor()
|
||||||
|
|
||||||
|
c.execute("INSERT INTO benchmarks(Date, Dataset, Tag, Type, Model, "
|
||||||
|
+ "Transformation, 'Order', Scheme, Partitions, "
|
||||||
|
+ "Size, Steps, Method, Measure, Value) "
|
||||||
|
+ "VALUES(datetime('now'),?,?,?,?,?,?,?,?,?,?,?,?,?)", data)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def process_common_data(dataset, tag, type, job):
|
||||||
|
model = job["obj"]
|
||||||
|
if not model.benchmark_only:
|
||||||
|
data = [dataset, tag, type, model.shortname,
|
||||||
|
str(model.partitioner.transformation) if model.partitioner.transformation is not None else None,
|
||||||
|
model.order, model.partitioner.name, str(model.partitioner.partitions),
|
||||||
|
len(model), job['steps'], job['method']]
|
||||||
|
else:
|
||||||
|
data = [tag, type, model.shortname, None, model.order, None, None,
|
||||||
|
None, job['steps'], job['method']]
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def get_dataframe_from_bd(file, filter):
|
||||||
|
con = sqlite3.connect(file)
|
||||||
|
sql = "SELECT * from benchmarks"
|
||||||
|
if filter is not None:
|
||||||
|
sql += " WHERE " + filter
|
||||||
|
return pd.read_sql_query(sql, con)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def extract_measure(dataframe, measure, data_columns):
|
def extract_measure(dataframe, measure, data_columns):
|
||||||
if not dataframe.empty:
|
if not dataframe.empty:
|
||||||
df = dataframe[(dataframe.Measure == measure)][data_columns]
|
df = dataframe[(dataframe.Measure == measure)][data_columns]
|
||||||
@ -45,6 +97,7 @@ def find_best(dataframe, criteria, ascending):
|
|||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def analytic_tabular_dataframe(dataframe):
|
def analytic_tabular_dataframe(dataframe):
|
||||||
experiments = len(dataframe.columns) - len(base_dataframe_columns()) - 1
|
experiments = len(dataframe.columns) - len(base_dataframe_columns()) - 1
|
||||||
models = dataframe.Model.unique()
|
models = dataframe.Model.unique()
|
||||||
|
@ -90,10 +90,14 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
|||||||
|
|
||||||
:return: DataFrame with the benchmark results
|
:return: DataFrame with the benchmark results
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
tag = __pop('tag', None, kwargs)
|
||||||
|
dataset = __pop('dataset', None, kwargs)
|
||||||
|
|
||||||
distributed = __pop('distributed', False, kwargs)
|
distributed = __pop('distributed', False, kwargs)
|
||||||
save = __pop('save', False, kwargs)
|
save = __pop('save', False, kwargs)
|
||||||
|
|
||||||
transformation = kwargs.get('transformation', None)
|
transformations = kwargs.get('transformations', [None])
|
||||||
progress = kwargs.get('progress', None)
|
progress = kwargs.get('progress', None)
|
||||||
type = kwargs.get("type", 'point')
|
type = kwargs.get("type", 'point')
|
||||||
|
|
||||||
@ -192,13 +196,15 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
|||||||
|
|
||||||
if partitioners_models is None:
|
if partitioners_models is None:
|
||||||
|
|
||||||
for partition in partitions:
|
for transformation in transformations:
|
||||||
|
|
||||||
for partitioner in partitioners_methods:
|
for partition in partitions:
|
||||||
|
|
||||||
data_train_fs = partitioner(data=train, npart=partition, transformation=transformation)
|
for partitioner in partitioners_methods:
|
||||||
|
|
||||||
partitioners_pool.append(data_train_fs)
|
data_train_fs = partitioner(data=train, npart=partition, transformation=transformation)
|
||||||
|
|
||||||
|
partitioners_pool.append(data_train_fs)
|
||||||
else:
|
else:
|
||||||
partitioners_pool = partitioners_models
|
partitioners_pool = partitioners_models
|
||||||
|
|
||||||
@ -206,6 +212,10 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
|||||||
if progress:
|
if progress:
|
||||||
rng1 = tqdm(steps_ahead, desc="Steps")
|
rng1 = tqdm(steps_ahead, desc="Steps")
|
||||||
|
|
||||||
|
file = kwargs.get('file', "benchmarks.db")
|
||||||
|
|
||||||
|
conn = bUtil.open_benchmark_db(file)
|
||||||
|
|
||||||
for step in rng1:
|
for step in rng1:
|
||||||
rng2 = partitioners_pool
|
rng2 = partitioners_pool
|
||||||
|
|
||||||
@ -225,7 +235,7 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
|||||||
|
|
||||||
if not distributed:
|
if not distributed:
|
||||||
job = experiment_method(deepcopy(model), deepcopy(partitioner), train, test, **kwargs)
|
job = experiment_method(deepcopy(model), deepcopy(partitioner), train, test, **kwargs)
|
||||||
jobs.append(job)
|
synthesis_method(dataset, tag, job, conn)
|
||||||
else:
|
else:
|
||||||
job = cluster.submit(deepcopy(model), deepcopy(partitioner), train, test, **kwargs)
|
job = cluster.submit(deepcopy(model), deepcopy(partitioner), train, test, **kwargs)
|
||||||
job.id = id # associate an ID to identify jobs (if needed later)
|
job.id = id # associate an ID to identify jobs (if needed later)
|
||||||
@ -239,29 +249,29 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
|
|||||||
|
|
||||||
rng = jobs
|
rng = jobs
|
||||||
|
|
||||||
cluster.wait() # wait for all jobs to finish
|
|
||||||
|
|
||||||
if progress:
|
if progress:
|
||||||
rng = tqdm(jobs)
|
rng = tqdm(jobs)
|
||||||
|
|
||||||
for job in rng:
|
for job in rng:
|
||||||
|
job()
|
||||||
if job.status == dispy.DispyJob.Finished and job is not None:
|
if job.status == dispy.DispyJob.Finished and job is not None:
|
||||||
tmp = job()
|
tmp = job.result
|
||||||
jobs2.append(tmp)
|
synthesis_method(dataset, tag, tmp, conn)
|
||||||
else:
|
else:
|
||||||
print("status",job.status)
|
print("status",job.status)
|
||||||
print("result",job.result)
|
print("result",job.result)
|
||||||
print("stdout",job.stdout)
|
print("stdout",job.stdout)
|
||||||
print("stderr",job.exception)
|
print("stderr",job.exception)
|
||||||
|
|
||||||
jobs = deepcopy(jobs2)
|
cluster.wait() # wait for all jobs to finish
|
||||||
|
|
||||||
cUtil.stop_dispy_cluster(cluster, http_server)
|
cUtil.stop_dispy_cluster(cluster, http_server)
|
||||||
|
|
||||||
file = kwargs.get('file', None)
|
conn.close()
|
||||||
|
|
||||||
sintetic = kwargs.get('sintetic', False)
|
sintetic = kwargs.get('sintetic', False)
|
||||||
|
|
||||||
return synthesis_method(jobs, experiments, save, file, sintetic)
|
#return synthesis_method(jobs, experiments, save, file, sintetic)
|
||||||
|
|
||||||
|
|
||||||
def get_benchmark_point_methods():
|
def get_benchmark_point_methods():
|
||||||
@ -326,7 +336,6 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwarg
|
|||||||
|
|
||||||
tmp5 = [Transformations.Differential]
|
tmp5 = [Transformations.Differential]
|
||||||
|
|
||||||
transformation = kwargs.get('transformation', None)
|
|
||||||
indexer = kwargs.get('indexer', None)
|
indexer = kwargs.get('indexer', None)
|
||||||
|
|
||||||
steps_ahead = kwargs.get('steps_ahead', 1)
|
steps_ahead = kwargs.get('steps_ahead', 1)
|
||||||
@ -338,13 +347,11 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwarg
|
|||||||
pttr = str(partitioner.__module__).split('.')[-1]
|
pttr = str(partitioner.__module__).split('.')[-1]
|
||||||
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
|
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
|
||||||
mfts.partitioner = partitioner
|
mfts.partitioner = partitioner
|
||||||
|
mfts.append_transformation(partitioner.transformation)
|
||||||
|
|
||||||
_key += str(steps_ahead)
|
_key += str(steps_ahead)
|
||||||
_key += str(method) if method is not None else ""
|
_key += str(method) if method is not None else ""
|
||||||
|
|
||||||
if transformation is not None:
|
|
||||||
mfts.append_transformation(transformation)
|
|
||||||
|
|
||||||
_start = time.time()
|
_start = time.time()
|
||||||
mfts.fit(train_data, order=mfts.order, **kwargs)
|
mfts.fit(train_data, order=mfts.order, **kwargs)
|
||||||
_end = time.time()
|
_end = time.time()
|
||||||
@ -386,9 +393,6 @@ def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kw
|
|||||||
|
|
||||||
tmp3 = [Measures.get_interval_statistics]
|
tmp3 = [Measures.get_interval_statistics]
|
||||||
|
|
||||||
transformation = kwargs.get('transformation', None)
|
|
||||||
indexer = kwargs.get('indexer', None)
|
|
||||||
|
|
||||||
steps_ahead = kwargs.get('steps_ahead', 1)
|
steps_ahead = kwargs.get('steps_ahead', 1)
|
||||||
method = kwargs.get('method', None)
|
method = kwargs.get('method', None)
|
||||||
|
|
||||||
@ -398,9 +402,7 @@ def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kw
|
|||||||
pttr = str(partitioner.__module__).split('.')[-1]
|
pttr = str(partitioner.__module__).split('.')[-1]
|
||||||
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
|
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
|
||||||
mfts.partitioner = partitioner
|
mfts.partitioner = partitioner
|
||||||
|
mfts.append_transformation(partitioner.transformation)
|
||||||
if transformation is not None:
|
|
||||||
mfts.append_transformation(transformation)
|
|
||||||
|
|
||||||
_key += str(steps_ahead)
|
_key += str(steps_ahead)
|
||||||
_key += str(method) if method is not None else ""
|
_key += str(method) if method is not None else ""
|
||||||
@ -450,7 +452,6 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
|
|||||||
|
|
||||||
tmp3 = [Measures.get_distribution_statistics, SeasonalIndexer.SeasonalIndexer, SeasonalIndexer.LinearSeasonalIndexer]
|
tmp3 = [Measures.get_distribution_statistics, SeasonalIndexer.SeasonalIndexer, SeasonalIndexer.LinearSeasonalIndexer]
|
||||||
|
|
||||||
transformation = kwargs.get('transformation', None)
|
|
||||||
indexer = kwargs.get('indexer', None)
|
indexer = kwargs.get('indexer', None)
|
||||||
|
|
||||||
steps_ahead = kwargs.get('steps_ahead', 1)
|
steps_ahead = kwargs.get('steps_ahead', 1)
|
||||||
@ -462,13 +463,11 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
|
|||||||
pttr = str(partitioner.__module__).split('.')[-1]
|
pttr = str(partitioner.__module__).split('.')[-1]
|
||||||
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
|
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
|
||||||
mfts.partitioner = partitioner
|
mfts.partitioner = partitioner
|
||||||
|
mfts.append_transformation(partitioner.transformation)
|
||||||
|
|
||||||
_key += str(steps_ahead)
|
_key += str(steps_ahead)
|
||||||
_key += str(method) if method is not None else ""
|
_key += str(method) if method is not None else ""
|
||||||
|
|
||||||
if transformation is not None:
|
|
||||||
mfts.append_transformation(transformation)
|
|
||||||
|
|
||||||
if mfts.has_seasonality:
|
if mfts.has_seasonality:
|
||||||
mfts.indexer = indexer
|
mfts.indexer = indexer
|
||||||
|
|
||||||
@ -491,126 +490,64 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
|
|||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def build_model_pool_point(models, max_order, benchmark_models, benchmark_models_parameters):
|
def process_point_jobs(dataset, tag, job, conn):
|
||||||
pool = []
|
|
||||||
if models is None:
|
|
||||||
models = get_point_methods()
|
|
||||||
for model in models:
|
|
||||||
mfts = model("")
|
|
||||||
|
|
||||||
if mfts.is_high_order:
|
data = bUtil.process_common_data(dataset, tag, 'point',job)
|
||||||
for order in np.arange(1, max_order + 1):
|
|
||||||
if order >= mfts.min_order:
|
|
||||||
mfts = model("")
|
|
||||||
mfts.order = order
|
|
||||||
pool.append(mfts)
|
|
||||||
else:
|
|
||||||
mfts.order = 1
|
|
||||||
pool.append(mfts)
|
|
||||||
|
|
||||||
if benchmark_models is not None:
|
rmse = deepcopy(data)
|
||||||
for count, model in enumerate(benchmark_models, start=0):
|
rmse.extend(["rmse", job["rmse"]])
|
||||||
par = benchmark_models_parameters[count]
|
bUtil.insert_benchmark(rmse, conn)
|
||||||
mfts = model(str(par if par is not None else ""))
|
smape = deepcopy(data)
|
||||||
mfts.order = par
|
smape.extend(["smape", job["smape"]])
|
||||||
pool.append(mfts)
|
bUtil.insert_benchmark(smape, conn)
|
||||||
return pool
|
u = deepcopy(data)
|
||||||
|
u.extend(["u", job["u"]])
|
||||||
|
bUtil.insert_benchmark(u, conn)
|
||||||
|
time = deepcopy(data)
|
||||||
|
time.extend(["time", job["time"]])
|
||||||
|
bUtil.insert_benchmark(time, conn)
|
||||||
|
|
||||||
|
|
||||||
def process_point_jobs(jobs, experiments, save=False, file=None, sintetic=False):
|
def process_interval_jobs(dataset, tag, job, conn):
|
||||||
objs = {}
|
|
||||||
rmse = {}
|
|
||||||
smape = {}
|
|
||||||
u = {}
|
|
||||||
times = {}
|
|
||||||
steps = {}
|
|
||||||
method = {}
|
|
||||||
|
|
||||||
for job in jobs:
|
data = bUtil.process_common_data(dataset, tag, 'interval', job)
|
||||||
_key = job['key']
|
|
||||||
if _key not in objs:
|
|
||||||
objs[_key] = job['obj']
|
|
||||||
rmse[_key] = []
|
|
||||||
smape[_key] = []
|
|
||||||
u[_key] = []
|
|
||||||
times[_key] = []
|
|
||||||
steps[_key] = []
|
|
||||||
method[_key] = []
|
|
||||||
steps[_key] = job['steps']
|
|
||||||
method[_key] = job['method']
|
|
||||||
rmse[_key].append(job['rmse'])
|
|
||||||
smape[_key].append(job['smape'])
|
|
||||||
u[_key].append(job['u'])
|
|
||||||
times[_key].append(job['time'])
|
|
||||||
|
|
||||||
return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u, steps, method)
|
sharpness = deepcopy(data)
|
||||||
|
sharpness.extend(["sharpness", job["sharpness"]])
|
||||||
|
bUtil.insert_benchmark(sharpness, conn)
|
||||||
|
resolution = deepcopy(data)
|
||||||
|
resolution.extend(["resolution", job["resolution"]])
|
||||||
|
bUtil.insert_benchmark(resolution, conn)
|
||||||
|
coverage = deepcopy(data)
|
||||||
|
coverage.extend(["coverage", job["coverage"]])
|
||||||
|
bUtil.insert_benchmark(coverage, conn)
|
||||||
|
time = deepcopy(data)
|
||||||
|
time.extend(["time", job["time"]])
|
||||||
|
bUtil.insert_benchmark(time, conn)
|
||||||
|
Q05 = deepcopy(data)
|
||||||
|
Q05.extend(["Q05", job["Q05"]])
|
||||||
|
bUtil.insert_benchmark(Q05, conn)
|
||||||
|
Q25 = deepcopy(data)
|
||||||
|
Q25.extend(["Q25", job["Q25"]])
|
||||||
|
bUtil.insert_benchmark(Q25, conn)
|
||||||
|
Q75 = deepcopy(data)
|
||||||
|
Q75.extend(["Q75", job["Q75"]])
|
||||||
|
bUtil.insert_benchmark(Q75, conn)
|
||||||
|
Q95 = deepcopy(data)
|
||||||
|
Q95.extend(["Q95", job["Q95"]])
|
||||||
|
bUtil.insert_benchmark(Q95, conn)
|
||||||
|
|
||||||
|
|
||||||
def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=False):
|
def process_probabilistic_jobs(dataset, tag, job, conn):
|
||||||
objs = {}
|
|
||||||
sharpness = {}
|
|
||||||
resolution = {}
|
|
||||||
coverage = {}
|
|
||||||
q05 = {}
|
|
||||||
q25 = {}
|
|
||||||
q75 = {}
|
|
||||||
q95 = {}
|
|
||||||
times = {}
|
|
||||||
steps = {}
|
|
||||||
method = {}
|
|
||||||
|
|
||||||
for job in jobs:
|
data = bUtil.process_common_data(dataset, tag, 'density', job)
|
||||||
_key = job['key']
|
|
||||||
if _key not in objs:
|
|
||||||
objs[_key] = job['obj']
|
|
||||||
sharpness[_key] = []
|
|
||||||
resolution[_key] = []
|
|
||||||
coverage[_key] = []
|
|
||||||
times[_key] = []
|
|
||||||
q05[_key] = []
|
|
||||||
q25[_key] = []
|
|
||||||
q75[_key] = []
|
|
||||||
q95[_key] = []
|
|
||||||
steps[_key] = []
|
|
||||||
method[_key] = []
|
|
||||||
|
|
||||||
sharpness[_key].append(job['sharpness'])
|
|
||||||
resolution[_key].append(job['resolution'])
|
|
||||||
coverage[_key].append(job['coverage'])
|
|
||||||
times[_key].append(job['time'])
|
|
||||||
q05[_key].append(job['Q05'])
|
|
||||||
q25[_key].append(job['Q25'])
|
|
||||||
q75[_key].append(job['Q75'])
|
|
||||||
q95[_key].append(job['Q95'])
|
|
||||||
steps[_key] = job['steps']
|
|
||||||
method[_key] = job['method']
|
|
||||||
return bUtil.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic,
|
|
||||||
times, q05, q25, q75, q95, steps, method)
|
|
||||||
|
|
||||||
|
|
||||||
def process_probabilistic_jobs(jobs, experiments, save=False, file=None, sintetic=False):
|
|
||||||
objs = {}
|
|
||||||
crps = {}
|
|
||||||
times = {}
|
|
||||||
steps = {}
|
|
||||||
method = {}
|
|
||||||
|
|
||||||
for job in jobs:
|
|
||||||
_key = job['key']
|
|
||||||
if _key not in objs:
|
|
||||||
objs[_key] = job['obj']
|
|
||||||
crps[_key] = []
|
|
||||||
times[_key] = []
|
|
||||||
steps[_key] = []
|
|
||||||
method[_key] = []
|
|
||||||
|
|
||||||
crps[_key].append(job['CRPS'])
|
|
||||||
times[_key].append(job['time'])
|
|
||||||
steps[_key] = job['steps']
|
|
||||||
method[_key] = job['method']
|
|
||||||
|
|
||||||
return bUtil.save_dataframe_probabilistic(experiments, file, objs, crps, times, save, sintetic, steps, method)
|
|
||||||
|
|
||||||
|
crps = deepcopy(data)
|
||||||
|
crps.extend(["CRPS",job["CRPS"]])
|
||||||
|
bUtil.insert_benchmark(crps, conn)
|
||||||
|
time = deepcopy(data)
|
||||||
|
time.extend(["time", job["time"]])
|
||||||
|
bUtil.insert_benchmark(time, conn)
|
||||||
|
|
||||||
|
|
||||||
def print_point_statistics(data, models, externalmodels = None, externalforecasts = None, indexers=None):
|
def print_point_statistics(data, models, externalmodels = None, externalforecasts = None, indexers=None):
|
||||||
@ -636,7 +573,6 @@ def print_point_statistics(data, models, externalmodels = None, externalforecast
|
|||||||
print(ret)
|
print(ret)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def print_interval_statistics(original, models):
|
def print_interval_statistics(original, models):
|
||||||
ret = "Model & Order & Sharpness & Resolution & Coverage & .05 & .25 & .75 & .95 \\\\ \n"
|
ret = "Model & Order & Sharpness & Resolution & Coverage & .05 & .25 & .75 & .95 \\\\ \n"
|
||||||
for fts in models:
|
for fts in models:
|
||||||
@ -653,151 +589,6 @@ def print_interval_statistics(original, models):
|
|||||||
print(ret)
|
print(ret)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution = None, partitioners=[Grid.GridPartitioner],
|
|
||||||
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
|
|
||||||
save=False, file=None, synthetic=False):
|
|
||||||
if models is None:
|
|
||||||
models = [pwfts.ProbabilisticWeightedFTS]
|
|
||||||
|
|
||||||
objs = {}
|
|
||||||
lcolors = {}
|
|
||||||
crps_interval = {}
|
|
||||||
crps_distr = {}
|
|
||||||
times1 = {}
|
|
||||||
times2 = {}
|
|
||||||
|
|
||||||
experiments = 0
|
|
||||||
for ct, train,test in cUtil.sliding_window(data, windowsize, train):
|
|
||||||
experiments += 1
|
|
||||||
for partition in partitions:
|
|
||||||
for partitioner in partitioners:
|
|
||||||
pttr = str(partitioner.__module__).split('.')[-1]
|
|
||||||
data_train_fs = partitioner(data=train, npart=partition, transformation=transformation)
|
|
||||||
|
|
||||||
for count, model in enumerate(models, start=0):
|
|
||||||
|
|
||||||
mfts = model("")
|
|
||||||
_key = mfts.shortname + " " + pttr+ " q = " +str(partition)
|
|
||||||
|
|
||||||
mfts.partitioner = data_train_fs
|
|
||||||
if not mfts.is_high_order:
|
|
||||||
|
|
||||||
if dump: print(ct,_key)
|
|
||||||
|
|
||||||
if _key not in objs:
|
|
||||||
objs[_key] = mfts
|
|
||||||
lcolors[_key] = colors[count % ncol]
|
|
||||||
crps_interval[_key] = []
|
|
||||||
crps_distr[_key] = []
|
|
||||||
times1[_key] = []
|
|
||||||
times2[_key] = []
|
|
||||||
|
|
||||||
if transformation is not None:
|
|
||||||
mfts.append_transformation(transformation)
|
|
||||||
|
|
||||||
_start = time.time()
|
|
||||||
mfts.train(train, sets=data_train_fs.sets)
|
|
||||||
_end = time.time()
|
|
||||||
|
|
||||||
_tdiff = _end - _start
|
|
||||||
|
|
||||||
_crps1, _crps2, _t1, _t2 = Measures.get_distribution_statistics(test,mfts,steps=steps,resolution=resolution)
|
|
||||||
|
|
||||||
crps_interval[_key].append_rhs(_crps1)
|
|
||||||
crps_distr[_key].append_rhs(_crps2)
|
|
||||||
times1[_key] = _tdiff + _t1
|
|
||||||
times2[_key] = _tdiff + _t2
|
|
||||||
|
|
||||||
if dump: print(_crps1, _crps2, _tdiff, _t1, _t2)
|
|
||||||
|
|
||||||
else:
|
|
||||||
for order in np.arange(1, max_order + 1):
|
|
||||||
if order >= mfts.min_order:
|
|
||||||
mfts = model("")
|
|
||||||
_key = mfts.shortname + " n = " + str(order) + " " + pttr + " q = " + str(partition)
|
|
||||||
mfts.partitioner = data_train_fs
|
|
||||||
|
|
||||||
if dump: print(ct,_key)
|
|
||||||
|
|
||||||
if _key not in objs:
|
|
||||||
objs[_key] = mfts
|
|
||||||
lcolors[_key] = colors[count % ncol]
|
|
||||||
crps_interval[_key] = []
|
|
||||||
crps_distr[_key] = []
|
|
||||||
times1[_key] = []
|
|
||||||
times2[_key] = []
|
|
||||||
|
|
||||||
if transformation is not None:
|
|
||||||
mfts.append_transformation(transformation)
|
|
||||||
|
|
||||||
_start = time.time()
|
|
||||||
mfts.train(train, sets=data_train_fs.sets, order=order)
|
|
||||||
_end = time.time()
|
|
||||||
|
|
||||||
_tdiff = _end - _start
|
|
||||||
|
|
||||||
_crps1, _crps2, _t1, _t2 = Measures.get_distribution_statistics(test, mfts, steps=steps,
|
|
||||||
resolution=resolution)
|
|
||||||
|
|
||||||
crps_interval[_key].append_rhs(_crps1)
|
|
||||||
crps_distr[_key].append_rhs(_crps2)
|
|
||||||
times1[_key] = _tdiff + _t1
|
|
||||||
times2[_key] = _tdiff + _t2
|
|
||||||
|
|
||||||
if dump: print(_crps1, _crps2, _tdiff, _t1, _t2)
|
|
||||||
|
|
||||||
return bUtil.save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, synthetic)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def all_ahead_forecasters(data_train, data_test, partitions, start, steps, resolution = None, max_order=3,save=False, file=None, tam=[20, 5],
|
|
||||||
models=None, transformation=None, option=2):
|
|
||||||
if models is None:
|
|
||||||
models = [pwfts.ProbabilisticWeightedFTS]
|
|
||||||
|
|
||||||
if resolution is None: resolution = (max(data_train) - min(data_train)) / 100
|
|
||||||
|
|
||||||
objs = []
|
|
||||||
|
|
||||||
data_train_fs = Grid.GridPartitioner(data=data_train, npart=partitions, transformation=transformation).sets
|
|
||||||
lcolors = []
|
|
||||||
|
|
||||||
for count, model in cUtil.enumerate2(models, start=0, step=2):
|
|
||||||
mfts = model("")
|
|
||||||
if not mfts.is_high_order:
|
|
||||||
if transformation is not None:
|
|
||||||
mfts.append_transformation(transformation)
|
|
||||||
mfts.train(data_train, sets=data_train_fs.sets)
|
|
||||||
objs.append(mfts)
|
|
||||||
lcolors.append( colors[count % ncol] )
|
|
||||||
else:
|
|
||||||
for order in np.arange(1,max_order+1):
|
|
||||||
if order >= mfts.min_order:
|
|
||||||
mfts = model(" n = " + str(order))
|
|
||||||
if transformation is not None:
|
|
||||||
mfts.append_transformation(transformation)
|
|
||||||
mfts.train(data_train, sets=data_train_fs.sets, order=order)
|
|
||||||
objs.append(mfts)
|
|
||||||
lcolors.append(colors[count % ncol])
|
|
||||||
|
|
||||||
distributions = [False for k in objs]
|
|
||||||
|
|
||||||
distributions[0] = True
|
|
||||||
|
|
||||||
print_distribution_statistics(data_test[start:], objs, steps, resolution)
|
|
||||||
|
|
||||||
plot_compared_intervals_ahead(data_test, objs, lcolors, distributions=distributions, time_from=start, time_to=steps,
|
|
||||||
interpol=False, save=save, file=file, tam=tam, resolution=resolution, option=option)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def print_distribution_statistics(original, models, steps, resolution):
|
def print_distribution_statistics(original, models, steps, resolution):
|
||||||
ret = "Model & Order & Interval & Distribution \\\\ \n"
|
ret = "Model & Order & Interval & Distribution \\\\ \n"
|
||||||
for fts in models:
|
for fts in models:
|
||||||
|
@ -44,11 +44,15 @@ class Differential(Transformation):
|
|||||||
"""
|
"""
|
||||||
Differentiation data transform
|
Differentiation data transform
|
||||||
"""
|
"""
|
||||||
def __init__(self, parameters):
|
def __init__(self, lag):
|
||||||
super(Differential, self).__init__()
|
super(Differential, self).__init__()
|
||||||
self.lag = parameters
|
self.lag = lag
|
||||||
self.minimal_length = 2
|
self.minimal_length = 2
|
||||||
|
|
||||||
|
@property
|
||||||
|
def parameters(self):
|
||||||
|
return self.lag
|
||||||
|
|
||||||
def apply(self, data, param=None, **kwargs):
|
def apply(self, data, param=None, **kwargs):
|
||||||
if param is not None:
|
if param is not None:
|
||||||
self.lag = param
|
self.lag = param
|
||||||
@ -66,7 +70,7 @@ class Differential(Transformation):
|
|||||||
|
|
||||||
def inverse(self, data, param, **kwargs):
|
def inverse(self, data, param, **kwargs):
|
||||||
|
|
||||||
interval = kwargs.get("point_to_interval",False)
|
type = kwargs.get("type","point")
|
||||||
|
|
||||||
if isinstance(data, (np.ndarray, np.generic)):
|
if isinstance(data, (np.ndarray, np.generic)):
|
||||||
data = data.tolist()
|
data = data.tolist()
|
||||||
@ -79,10 +83,14 @@ class Differential(Transformation):
|
|||||||
# print(n)
|
# print(n)
|
||||||
# print(len(param))
|
# print(len(param))
|
||||||
|
|
||||||
if not interval:
|
if type == "point":
|
||||||
inc = [data[t] + param[t] for t in np.arange(0, n)]
|
inc = [data[t] + param[t] for t in np.arange(0, n)]
|
||||||
else:
|
elif type == "interval":
|
||||||
inc = [[data[t][0] + param[t], data[t][1] + param[t]] for t in np.arange(0, n)]
|
inc = [[data[t][0] + param[t], data[t][1] + param[t]] for t in np.arange(0, n)]
|
||||||
|
elif type == "distribution":
|
||||||
|
for t in np.arange(0, n):
|
||||||
|
data[t].differential_offset(param[t])
|
||||||
|
inc = data
|
||||||
|
|
||||||
if n == 1:
|
if n == 1:
|
||||||
return inc[0]
|
return inc[0]
|
||||||
@ -103,6 +111,10 @@ class Scale(Transformation):
|
|||||||
self.transf_max = max
|
self.transf_max = max
|
||||||
self.transf_min = min
|
self.transf_min = min
|
||||||
|
|
||||||
|
@property
|
||||||
|
def parameters(self):
|
||||||
|
return [self.transf_max, self.transf_min]
|
||||||
|
|
||||||
def apply(self, data, param=None,**kwargs):
|
def apply(self, data, param=None,**kwargs):
|
||||||
if self.data_max is None:
|
if self.data_max is None:
|
||||||
self.data_max = np.nanmax(data)
|
self.data_max = np.nanmax(data)
|
||||||
@ -138,6 +150,10 @@ class AdaptiveExpectation(Transformation):
|
|||||||
super(AdaptiveExpectation, self).__init__(parameters)
|
super(AdaptiveExpectation, self).__init__(parameters)
|
||||||
self.h = parameters
|
self.h = parameters
|
||||||
|
|
||||||
|
@property
|
||||||
|
def parameters(self):
|
||||||
|
return self.parameters
|
||||||
|
|
||||||
def apply(self, data, param=None,**kwargs):
|
def apply(self, data, param=None,**kwargs):
|
||||||
return data
|
return data
|
||||||
|
|
||||||
@ -160,6 +176,10 @@ class BoxCox(Transformation):
|
|||||||
super(BoxCox, self).__init__()
|
super(BoxCox, self).__init__()
|
||||||
self.plambda = plambda
|
self.plambda = plambda
|
||||||
|
|
||||||
|
@property
|
||||||
|
def parameters(self):
|
||||||
|
return self.plambda
|
||||||
|
|
||||||
def apply(self, data, param=None, **kwargs):
|
def apply(self, data, param=None, **kwargs):
|
||||||
if self.plambda != 0:
|
if self.plambda != 0:
|
||||||
modified = [(dat ** self.plambda - 1) / self.plambda for dat in data]
|
modified = [(dat ** self.plambda - 1) / self.plambda for dat in data]
|
||||||
|
@ -95,6 +95,25 @@ class ProbabilityDistribution(object):
|
|||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
def differential_offset(self, value):
|
||||||
|
nbins = []
|
||||||
|
dist = {}
|
||||||
|
|
||||||
|
for k in self.bins:
|
||||||
|
nk = k+value
|
||||||
|
nbins.append(nk)
|
||||||
|
dist[nk] = self.distribution[k]
|
||||||
|
|
||||||
|
self.bins = nbins
|
||||||
|
self.distribution = dist
|
||||||
|
self.labels = [str(k) for k in self.bins]
|
||||||
|
|
||||||
|
self.bin_index = SortedCollection.SortedCollection(iterable=sorted(self.bins))
|
||||||
|
self.quantile_index = None
|
||||||
|
self.cdf = None
|
||||||
|
self.qtl = None
|
||||||
|
|
||||||
|
|
||||||
def expected_value(self):
|
def expected_value(self):
|
||||||
return np.nansum([v * self.distribution[v] for v in self.bins])
|
return np.nansum([v * self.distribution[v] for v in self.bins])
|
||||||
|
|
||||||
|
@ -19,19 +19,34 @@ from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil
|
|||||||
|
|
||||||
from pyFTS.models import pwfts
|
from pyFTS.models import pwfts
|
||||||
|
|
||||||
|
from pyFTS.partitioners import Grid, Util as pUtil
|
||||||
|
partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff)
|
||||||
|
|
||||||
|
model = pwfts.ProbabilisticWeightedFTS('',partitioner=partitioner)
|
||||||
|
#model.append_transformation(tdiff)
|
||||||
|
model.fit(dataset[:800])
|
||||||
|
print(model.predict(dataset[800:1000], type='interval'))
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, methods=[pwfts.ProbabilisticWeightedFTS],
|
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, methods=[pwfts.ProbabilisticWeightedFTS],
|
||||||
benchmark_models=False, orders=[1,2,3], partitions=np.arange(10,100,5),
|
benchmark_models=False,
|
||||||
progress=False, type='point',
|
#transformations=[tdiff],
|
||||||
#steps_ahead=[1,4,7,10], steps_ahead_sampler=10,
|
orders=[1, 2, 3],
|
||||||
distributed=True, nodes=['192.168.0.102','192.168.0.106','192.168.0.110'],
|
partitions=np.arange(10, 100, 5),
|
||||||
save=True, file="pwfts_taiex_partitioning.csv")
|
progress=False, type='distribution',
|
||||||
'''
|
#steps_ahead=[1,4,7,10], #steps_ahead=[1]
|
||||||
|
distributed=True, nodes=['192.168.0.110', '192.168.0.100','192.168.0.106'],
|
||||||
|
file="benchmarks.db", dataset="TAIEX", tag="partitioning")
|
||||||
|
#save=True, file="tmp.db")
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
'''
|
||||||
dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';')
|
dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';')
|
||||||
print(bUtil.analytic_tabular_dataframe(dat))
|
print(bUtil.analytic_tabular_dataframe(dat))
|
||||||
#print(dat["Size"].values[0])
|
#print(dat["Size"].values[0])
|
||||||
|
'''
|
||||||
'''
|
'''
|
||||||
train_split = 2000
|
train_split = 2000
|
||||||
test_length = 200
|
test_length = 200
|
||||||
|
Loading…
Reference in New Issue
Block a user