SQLITE3 interface for benchmarks; Support for for ProbabilityDistribution on Differentiation

This commit is contained in:
Petrônio Cândido 2018-04-22 16:59:17 -03:00
parent 6adf4710b6
commit f3c6eda2ec
6 changed files with 198 additions and 300 deletions

View File

@ -288,7 +288,7 @@ def get_point_statistics(data, model, **kwargs):
ret = list() ret = list()
if steps_ahead == 1: if steps_ahead == 1:
forecasts = model.forecast(data, **kwargs) forecasts = model.predict(data, **kwargs)
if model.has_seasonality: if model.has_seasonality:
nforecasts = np.array(forecasts) nforecasts = np.array(forecasts)
else: else:
@ -304,7 +304,7 @@ def get_point_statistics(data, model, **kwargs):
tmp = model.forecast_ahead(sample, steps_ahead, **kwargs) tmp = model.forecast_ahead(sample, steps_ahead, **kwargs)
nforecasts.append(tmp[-1]) nforecasts.append(tmp[-1])
start = model.order + steps_ahead start = model.order + steps_ahead -1
ret.append(np.round(rmse(ndata[start:-1:steps_ahead_sampler], nforecasts), 2)) ret.append(np.round(rmse(ndata[start:-1:steps_ahead_sampler], nforecasts), 2))
ret.append(np.round(smape(ndata[start:-1:steps_ahead_sampler], nforecasts), 2)) ret.append(np.round(smape(ndata[start:-1:steps_ahead_sampler], nforecasts), 2))
ret.append(np.round(UStatistic(ndata[start:-1:steps_ahead_sampler], nforecasts), 2)) ret.append(np.round(UStatistic(ndata[start:-1:steps_ahead_sampler], nforecasts), 2))
@ -327,7 +327,7 @@ def get_interval_statistics(data, model, **kwargs):
ret = list() ret = list()
if steps_ahead == 1: if steps_ahead == 1:
forecasts = model.forecast_interval(data, **kwargs) forecasts = model.predict(data, **kwargs)
ret.append(round(sharpness(forecasts), 2)) ret.append(round(sharpness(forecasts), 2))
ret.append(round(resolution(forecasts), 2)) ret.append(round(resolution(forecasts), 2))
ret.append(round(coverage(data[model.order:], forecasts[:-1]), 2)) ret.append(round(coverage(data[model.order:], forecasts[:-1]), 2))
@ -339,10 +339,10 @@ def get_interval_statistics(data, model, **kwargs):
forecasts = [] forecasts = []
for k in np.arange(model.order, len(data) - steps_ahead): for k in np.arange(model.order, len(data) - steps_ahead):
sample = data[k - model.order: k] sample = data[k - model.order: k]
tmp = model.forecast_ahead_interval(sample, steps_ahead, **kwargs) tmp = model.predict(sample, steps_ahead, **kwargs)
forecasts.append(tmp[-1]) forecasts.append(tmp[-1])
start = model.order + steps_ahead start = model.order + steps_ahead -1
ret.append(round(sharpness(forecasts), 2)) ret.append(round(sharpness(forecasts), 2))
ret.append(round(resolution(forecasts), 2)) ret.append(round(resolution(forecasts), 2))
ret.append(round(coverage(data[model.order:], forecasts), 2)) ret.append(round(coverage(data[model.order:], forecasts), 2))

View File

@ -8,6 +8,7 @@ import matplotlib.colors as pltcolors
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import sqlite3
#from mpl_toolkits.mplot3d import Axes3D #from mpl_toolkits.mplot3d import Axes3D
@ -15,6 +16,57 @@ from copy import deepcopy
from pyFTS.common import Util from pyFTS.common import Util
def open_benchmark_db(name):
conn = sqlite3.connect(name)
create_benchmark_tables(conn)
return conn
def create_benchmark_tables(conn):
c = conn.cursor()
c.execute('''CREATE TABLE if not exists benchmarks(
ID integer primary key, Date int, Dataset text, Tag text,
Type text, Model text, Transformation text, 'Order' int,
Scheme text, Partitions int,
Size int, Steps int, Method text, Measure text, Value real)''')
# Save (commit) the changes
conn.commit()
def insert_benchmark(data, conn):
c = conn.cursor()
c.execute("INSERT INTO benchmarks(Date, Dataset, Tag, Type, Model, "
+ "Transformation, 'Order', Scheme, Partitions, "
+ "Size, Steps, Method, Measure, Value) "
+ "VALUES(datetime('now'),?,?,?,?,?,?,?,?,?,?,?,?,?)", data)
conn.commit()
def process_common_data(dataset, tag, type, job):
model = job["obj"]
if not model.benchmark_only:
data = [dataset, tag, type, model.shortname,
str(model.partitioner.transformation) if model.partitioner.transformation is not None else None,
model.order, model.partitioner.name, str(model.partitioner.partitions),
len(model), job['steps'], job['method']]
else:
data = [tag, type, model.shortname, None, model.order, None, None,
None, job['steps'], job['method']]
return data
def get_dataframe_from_bd(file, filter):
con = sqlite3.connect(file)
sql = "SELECT * from benchmarks"
if filter is not None:
sql += " WHERE " + filter
return pd.read_sql_query(sql, con)
def extract_measure(dataframe, measure, data_columns): def extract_measure(dataframe, measure, data_columns):
if not dataframe.empty: if not dataframe.empty:
df = dataframe[(dataframe.Measure == measure)][data_columns] df = dataframe[(dataframe.Measure == measure)][data_columns]
@ -45,6 +97,7 @@ def find_best(dataframe, criteria, ascending):
return ret return ret
def analytic_tabular_dataframe(dataframe): def analytic_tabular_dataframe(dataframe):
experiments = len(dataframe.columns) - len(base_dataframe_columns()) - 1 experiments = len(dataframe.columns) - len(base_dataframe_columns()) - 1
models = dataframe.Model.unique() models = dataframe.Model.unique()

View File

@ -90,10 +90,14 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
:return: DataFrame with the benchmark results :return: DataFrame with the benchmark results
""" """
tag = __pop('tag', None, kwargs)
dataset = __pop('dataset', None, kwargs)
distributed = __pop('distributed', False, kwargs) distributed = __pop('distributed', False, kwargs)
save = __pop('save', False, kwargs) save = __pop('save', False, kwargs)
transformation = kwargs.get('transformation', None) transformations = kwargs.get('transformations', [None])
progress = kwargs.get('progress', None) progress = kwargs.get('progress', None)
type = kwargs.get("type", 'point') type = kwargs.get("type", 'point')
@ -192,13 +196,15 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
if partitioners_models is None: if partitioners_models is None:
for partition in partitions: for transformation in transformations:
for partitioner in partitioners_methods: for partition in partitions:
data_train_fs = partitioner(data=train, npart=partition, transformation=transformation) for partitioner in partitioners_methods:
partitioners_pool.append(data_train_fs) data_train_fs = partitioner(data=train, npart=partition, transformation=transformation)
partitioners_pool.append(data_train_fs)
else: else:
partitioners_pool = partitioners_models partitioners_pool = partitioners_models
@ -206,6 +212,10 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
if progress: if progress:
rng1 = tqdm(steps_ahead, desc="Steps") rng1 = tqdm(steps_ahead, desc="Steps")
file = kwargs.get('file', "benchmarks.db")
conn = bUtil.open_benchmark_db(file)
for step in rng1: for step in rng1:
rng2 = partitioners_pool rng2 = partitioners_pool
@ -225,7 +235,7 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
if not distributed: if not distributed:
job = experiment_method(deepcopy(model), deepcopy(partitioner), train, test, **kwargs) job = experiment_method(deepcopy(model), deepcopy(partitioner), train, test, **kwargs)
jobs.append(job) synthesis_method(dataset, tag, job, conn)
else: else:
job = cluster.submit(deepcopy(model), deepcopy(partitioner), train, test, **kwargs) job = cluster.submit(deepcopy(model), deepcopy(partitioner), train, test, **kwargs)
job.id = id # associate an ID to identify jobs (if needed later) job.id = id # associate an ID to identify jobs (if needed later)
@ -239,29 +249,29 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
rng = jobs rng = jobs
cluster.wait() # wait for all jobs to finish
if progress: if progress:
rng = tqdm(jobs) rng = tqdm(jobs)
for job in rng: for job in rng:
job()
if job.status == dispy.DispyJob.Finished and job is not None: if job.status == dispy.DispyJob.Finished and job is not None:
tmp = job() tmp = job.result
jobs2.append(tmp) synthesis_method(dataset, tag, tmp, conn)
else: else:
print("status",job.status) print("status",job.status)
print("result",job.result) print("result",job.result)
print("stdout",job.stdout) print("stdout",job.stdout)
print("stderr",job.exception) print("stderr",job.exception)
jobs = deepcopy(jobs2) cluster.wait() # wait for all jobs to finish
cUtil.stop_dispy_cluster(cluster, http_server) cUtil.stop_dispy_cluster(cluster, http_server)
file = kwargs.get('file', None) conn.close()
sintetic = kwargs.get('sintetic', False) sintetic = kwargs.get('sintetic', False)
return synthesis_method(jobs, experiments, save, file, sintetic) #return synthesis_method(jobs, experiments, save, file, sintetic)
def get_benchmark_point_methods(): def get_benchmark_point_methods():
@ -326,7 +336,6 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwarg
tmp5 = [Transformations.Differential] tmp5 = [Transformations.Differential]
transformation = kwargs.get('transformation', None)
indexer = kwargs.get('indexer', None) indexer = kwargs.get('indexer', None)
steps_ahead = kwargs.get('steps_ahead', 1) steps_ahead = kwargs.get('steps_ahead', 1)
@ -338,13 +347,11 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwarg
pttr = str(partitioner.__module__).split('.')[-1] pttr = str(partitioner.__module__).split('.')[-1]
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
mfts.partitioner = partitioner mfts.partitioner = partitioner
mfts.append_transformation(partitioner.transformation)
_key += str(steps_ahead) _key += str(steps_ahead)
_key += str(method) if method is not None else "" _key += str(method) if method is not None else ""
if transformation is not None:
mfts.append_transformation(transformation)
_start = time.time() _start = time.time()
mfts.fit(train_data, order=mfts.order, **kwargs) mfts.fit(train_data, order=mfts.order, **kwargs)
_end = time.time() _end = time.time()
@ -386,9 +393,6 @@ def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kw
tmp3 = [Measures.get_interval_statistics] tmp3 = [Measures.get_interval_statistics]
transformation = kwargs.get('transformation', None)
indexer = kwargs.get('indexer', None)
steps_ahead = kwargs.get('steps_ahead', 1) steps_ahead = kwargs.get('steps_ahead', 1)
method = kwargs.get('method', None) method = kwargs.get('method', None)
@ -398,9 +402,7 @@ def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kw
pttr = str(partitioner.__module__).split('.')[-1] pttr = str(partitioner.__module__).split('.')[-1]
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
mfts.partitioner = partitioner mfts.partitioner = partitioner
mfts.append_transformation(partitioner.transformation)
if transformation is not None:
mfts.append_transformation(transformation)
_key += str(steps_ahead) _key += str(steps_ahead)
_key += str(method) if method is not None else "" _key += str(method) if method is not None else ""
@ -450,7 +452,6 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
tmp3 = [Measures.get_distribution_statistics, SeasonalIndexer.SeasonalIndexer, SeasonalIndexer.LinearSeasonalIndexer] tmp3 = [Measures.get_distribution_statistics, SeasonalIndexer.SeasonalIndexer, SeasonalIndexer.LinearSeasonalIndexer]
transformation = kwargs.get('transformation', None)
indexer = kwargs.get('indexer', None) indexer = kwargs.get('indexer', None)
steps_ahead = kwargs.get('steps_ahead', 1) steps_ahead = kwargs.get('steps_ahead', 1)
@ -462,13 +463,11 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
pttr = str(partitioner.__module__).split('.')[-1] pttr = str(partitioner.__module__).split('.')[-1]
_key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
mfts.partitioner = partitioner mfts.partitioner = partitioner
mfts.append_transformation(partitioner.transformation)
_key += str(steps_ahead) _key += str(steps_ahead)
_key += str(method) if method is not None else "" _key += str(method) if method is not None else ""
if transformation is not None:
mfts.append_transformation(transformation)
if mfts.has_seasonality: if mfts.has_seasonality:
mfts.indexer = indexer mfts.indexer = indexer
@ -491,126 +490,64 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None,
return ret return ret
def build_model_pool_point(models, max_order, benchmark_models, benchmark_models_parameters): def process_point_jobs(dataset, tag, job, conn):
pool = []
if models is None:
models = get_point_methods()
for model in models:
mfts = model("")
if mfts.is_high_order: data = bUtil.process_common_data(dataset, tag, 'point',job)
for order in np.arange(1, max_order + 1):
if order >= mfts.min_order:
mfts = model("")
mfts.order = order
pool.append(mfts)
else:
mfts.order = 1
pool.append(mfts)
if benchmark_models is not None: rmse = deepcopy(data)
for count, model in enumerate(benchmark_models, start=0): rmse.extend(["rmse", job["rmse"]])
par = benchmark_models_parameters[count] bUtil.insert_benchmark(rmse, conn)
mfts = model(str(par if par is not None else "")) smape = deepcopy(data)
mfts.order = par smape.extend(["smape", job["smape"]])
pool.append(mfts) bUtil.insert_benchmark(smape, conn)
return pool u = deepcopy(data)
u.extend(["u", job["u"]])
bUtil.insert_benchmark(u, conn)
time = deepcopy(data)
time.extend(["time", job["time"]])
bUtil.insert_benchmark(time, conn)
def process_point_jobs(jobs, experiments, save=False, file=None, sintetic=False): def process_interval_jobs(dataset, tag, job, conn):
objs = {}
rmse = {}
smape = {}
u = {}
times = {}
steps = {}
method = {}
for job in jobs: data = bUtil.process_common_data(dataset, tag, 'interval', job)
_key = job['key']
if _key not in objs:
objs[_key] = job['obj']
rmse[_key] = []
smape[_key] = []
u[_key] = []
times[_key] = []
steps[_key] = []
method[_key] = []
steps[_key] = job['steps']
method[_key] = job['method']
rmse[_key].append(job['rmse'])
smape[_key].append(job['smape'])
u[_key].append(job['u'])
times[_key].append(job['time'])
return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u, steps, method) sharpness = deepcopy(data)
sharpness.extend(["sharpness", job["sharpness"]])
bUtil.insert_benchmark(sharpness, conn)
resolution = deepcopy(data)
resolution.extend(["resolution", job["resolution"]])
bUtil.insert_benchmark(resolution, conn)
coverage = deepcopy(data)
coverage.extend(["coverage", job["coverage"]])
bUtil.insert_benchmark(coverage, conn)
time = deepcopy(data)
time.extend(["time", job["time"]])
bUtil.insert_benchmark(time, conn)
Q05 = deepcopy(data)
Q05.extend(["Q05", job["Q05"]])
bUtil.insert_benchmark(Q05, conn)
Q25 = deepcopy(data)
Q25.extend(["Q25", job["Q25"]])
bUtil.insert_benchmark(Q25, conn)
Q75 = deepcopy(data)
Q75.extend(["Q75", job["Q75"]])
bUtil.insert_benchmark(Q75, conn)
Q95 = deepcopy(data)
Q95.extend(["Q95", job["Q95"]])
bUtil.insert_benchmark(Q95, conn)
def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=False): def process_probabilistic_jobs(dataset, tag, job, conn):
objs = {}
sharpness = {}
resolution = {}
coverage = {}
q05 = {}
q25 = {}
q75 = {}
q95 = {}
times = {}
steps = {}
method = {}
for job in jobs: data = bUtil.process_common_data(dataset, tag, 'density', job)
_key = job['key']
if _key not in objs:
objs[_key] = job['obj']
sharpness[_key] = []
resolution[_key] = []
coverage[_key] = []
times[_key] = []
q05[_key] = []
q25[_key] = []
q75[_key] = []
q95[_key] = []
steps[_key] = []
method[_key] = []
sharpness[_key].append(job['sharpness'])
resolution[_key].append(job['resolution'])
coverage[_key].append(job['coverage'])
times[_key].append(job['time'])
q05[_key].append(job['Q05'])
q25[_key].append(job['Q25'])
q75[_key].append(job['Q75'])
q95[_key].append(job['Q95'])
steps[_key] = job['steps']
method[_key] = job['method']
return bUtil.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic,
times, q05, q25, q75, q95, steps, method)
def process_probabilistic_jobs(jobs, experiments, save=False, file=None, sintetic=False):
objs = {}
crps = {}
times = {}
steps = {}
method = {}
for job in jobs:
_key = job['key']
if _key not in objs:
objs[_key] = job['obj']
crps[_key] = []
times[_key] = []
steps[_key] = []
method[_key] = []
crps[_key].append(job['CRPS'])
times[_key].append(job['time'])
steps[_key] = job['steps']
method[_key] = job['method']
return bUtil.save_dataframe_probabilistic(experiments, file, objs, crps, times, save, sintetic, steps, method)
crps = deepcopy(data)
crps.extend(["CRPS",job["CRPS"]])
bUtil.insert_benchmark(crps, conn)
time = deepcopy(data)
time.extend(["time", job["time"]])
bUtil.insert_benchmark(time, conn)
def print_point_statistics(data, models, externalmodels = None, externalforecasts = None, indexers=None): def print_point_statistics(data, models, externalmodels = None, externalforecasts = None, indexers=None):
@ -636,7 +573,6 @@ def print_point_statistics(data, models, externalmodels = None, externalforecast
print(ret) print(ret)
def print_interval_statistics(original, models): def print_interval_statistics(original, models):
ret = "Model & Order & Sharpness & Resolution & Coverage & .05 & .25 & .75 & .95 \\\\ \n" ret = "Model & Order & Sharpness & Resolution & Coverage & .05 & .25 & .75 & .95 \\\\ \n"
for fts in models: for fts in models:
@ -653,151 +589,6 @@ def print_interval_statistics(original, models):
print(ret) print(ret)
def ahead_sliding_window(data, windowsize, train, steps, models=None, resolution = None, partitioners=[Grid.GridPartitioner],
partitions=[10], max_order=3, transformation=None, indexer=None, dump=False,
save=False, file=None, synthetic=False):
if models is None:
models = [pwfts.ProbabilisticWeightedFTS]
objs = {}
lcolors = {}
crps_interval = {}
crps_distr = {}
times1 = {}
times2 = {}
experiments = 0
for ct, train,test in cUtil.sliding_window(data, windowsize, train):
experiments += 1
for partition in partitions:
for partitioner in partitioners:
pttr = str(partitioner.__module__).split('.')[-1]
data_train_fs = partitioner(data=train, npart=partition, transformation=transformation)
for count, model in enumerate(models, start=0):
mfts = model("")
_key = mfts.shortname + " " + pttr+ " q = " +str(partition)
mfts.partitioner = data_train_fs
if not mfts.is_high_order:
if dump: print(ct,_key)
if _key not in objs:
objs[_key] = mfts
lcolors[_key] = colors[count % ncol]
crps_interval[_key] = []
crps_distr[_key] = []
times1[_key] = []
times2[_key] = []
if transformation is not None:
mfts.append_transformation(transformation)
_start = time.time()
mfts.train(train, sets=data_train_fs.sets)
_end = time.time()
_tdiff = _end - _start
_crps1, _crps2, _t1, _t2 = Measures.get_distribution_statistics(test,mfts,steps=steps,resolution=resolution)
crps_interval[_key].append_rhs(_crps1)
crps_distr[_key].append_rhs(_crps2)
times1[_key] = _tdiff + _t1
times2[_key] = _tdiff + _t2
if dump: print(_crps1, _crps2, _tdiff, _t1, _t2)
else:
for order in np.arange(1, max_order + 1):
if order >= mfts.min_order:
mfts = model("")
_key = mfts.shortname + " n = " + str(order) + " " + pttr + " q = " + str(partition)
mfts.partitioner = data_train_fs
if dump: print(ct,_key)
if _key not in objs:
objs[_key] = mfts
lcolors[_key] = colors[count % ncol]
crps_interval[_key] = []
crps_distr[_key] = []
times1[_key] = []
times2[_key] = []
if transformation is not None:
mfts.append_transformation(transformation)
_start = time.time()
mfts.train(train, sets=data_train_fs.sets, order=order)
_end = time.time()
_tdiff = _end - _start
_crps1, _crps2, _t1, _t2 = Measures.get_distribution_statistics(test, mfts, steps=steps,
resolution=resolution)
crps_interval[_key].append_rhs(_crps1)
crps_distr[_key].append_rhs(_crps2)
times1[_key] = _tdiff + _t1
times2[_key] = _tdiff + _t2
if dump: print(_crps1, _crps2, _tdiff, _t1, _t2)
return bUtil.save_dataframe_ahead(experiments, file, objs, crps_interval, crps_distr, times1, times2, save, synthetic)
def all_ahead_forecasters(data_train, data_test, partitions, start, steps, resolution = None, max_order=3,save=False, file=None, tam=[20, 5],
models=None, transformation=None, option=2):
if models is None:
models = [pwfts.ProbabilisticWeightedFTS]
if resolution is None: resolution = (max(data_train) - min(data_train)) / 100
objs = []
data_train_fs = Grid.GridPartitioner(data=data_train, npart=partitions, transformation=transformation).sets
lcolors = []
for count, model in cUtil.enumerate2(models, start=0, step=2):
mfts = model("")
if not mfts.is_high_order:
if transformation is not None:
mfts.append_transformation(transformation)
mfts.train(data_train, sets=data_train_fs.sets)
objs.append(mfts)
lcolors.append( colors[count % ncol] )
else:
for order in np.arange(1,max_order+1):
if order >= mfts.min_order:
mfts = model(" n = " + str(order))
if transformation is not None:
mfts.append_transformation(transformation)
mfts.train(data_train, sets=data_train_fs.sets, order=order)
objs.append(mfts)
lcolors.append(colors[count % ncol])
distributions = [False for k in objs]
distributions[0] = True
print_distribution_statistics(data_test[start:], objs, steps, resolution)
plot_compared_intervals_ahead(data_test, objs, lcolors, distributions=distributions, time_from=start, time_to=steps,
interpol=False, save=save, file=file, tam=tam, resolution=resolution, option=option)
def print_distribution_statistics(original, models, steps, resolution): def print_distribution_statistics(original, models, steps, resolution):
ret = "Model & Order & Interval & Distribution \\\\ \n" ret = "Model & Order & Interval & Distribution \\\\ \n"
for fts in models: for fts in models:

View File

@ -44,11 +44,15 @@ class Differential(Transformation):
""" """
Differentiation data transform Differentiation data transform
""" """
def __init__(self, parameters): def __init__(self, lag):
super(Differential, self).__init__() super(Differential, self).__init__()
self.lag = parameters self.lag = lag
self.minimal_length = 2 self.minimal_length = 2
@property
def parameters(self):
return self.lag
def apply(self, data, param=None, **kwargs): def apply(self, data, param=None, **kwargs):
if param is not None: if param is not None:
self.lag = param self.lag = param
@ -66,7 +70,7 @@ class Differential(Transformation):
def inverse(self, data, param, **kwargs): def inverse(self, data, param, **kwargs):
interval = kwargs.get("point_to_interval",False) type = kwargs.get("type","point")
if isinstance(data, (np.ndarray, np.generic)): if isinstance(data, (np.ndarray, np.generic)):
data = data.tolist() data = data.tolist()
@ -79,10 +83,14 @@ class Differential(Transformation):
# print(n) # print(n)
# print(len(param)) # print(len(param))
if not interval: if type == "point":
inc = [data[t] + param[t] for t in np.arange(0, n)] inc = [data[t] + param[t] for t in np.arange(0, n)]
else: elif type == "interval":
inc = [[data[t][0] + param[t], data[t][1] + param[t]] for t in np.arange(0, n)] inc = [[data[t][0] + param[t], data[t][1] + param[t]] for t in np.arange(0, n)]
elif type == "distribution":
for t in np.arange(0, n):
data[t].differential_offset(param[t])
inc = data
if n == 1: if n == 1:
return inc[0] return inc[0]
@ -103,6 +111,10 @@ class Scale(Transformation):
self.transf_max = max self.transf_max = max
self.transf_min = min self.transf_min = min
@property
def parameters(self):
return [self.transf_max, self.transf_min]
def apply(self, data, param=None,**kwargs): def apply(self, data, param=None,**kwargs):
if self.data_max is None: if self.data_max is None:
self.data_max = np.nanmax(data) self.data_max = np.nanmax(data)
@ -138,6 +150,10 @@ class AdaptiveExpectation(Transformation):
super(AdaptiveExpectation, self).__init__(parameters) super(AdaptiveExpectation, self).__init__(parameters)
self.h = parameters self.h = parameters
@property
def parameters(self):
return self.parameters
def apply(self, data, param=None,**kwargs): def apply(self, data, param=None,**kwargs):
return data return data
@ -160,6 +176,10 @@ class BoxCox(Transformation):
super(BoxCox, self).__init__() super(BoxCox, self).__init__()
self.plambda = plambda self.plambda = plambda
@property
def parameters(self):
return self.plambda
def apply(self, data, param=None, **kwargs): def apply(self, data, param=None, **kwargs):
if self.plambda != 0: if self.plambda != 0:
modified = [(dat ** self.plambda - 1) / self.plambda for dat in data] modified = [(dat ** self.plambda - 1) / self.plambda for dat in data]

View File

@ -95,6 +95,25 @@ class ProbabilityDistribution(object):
return ret return ret
def differential_offset(self, value):
nbins = []
dist = {}
for k in self.bins:
nk = k+value
nbins.append(nk)
dist[nk] = self.distribution[k]
self.bins = nbins
self.distribution = dist
self.labels = [str(k) for k in self.bins]
self.bin_index = SortedCollection.SortedCollection(iterable=sorted(self.bins))
self.quantile_index = None
self.cdf = None
self.qtl = None
def expected_value(self): def expected_value(self):
return np.nansum([v * self.distribution[v] for v in self.bins]) return np.nansum([v * self.distribution[v] for v in self.bins])

View File

@ -19,19 +19,34 @@ from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil
from pyFTS.models import pwfts from pyFTS.models import pwfts
from pyFTS.partitioners import Grid, Util as pUtil
partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff)
model = pwfts.ProbabilisticWeightedFTS('',partitioner=partitioner)
#model.append_transformation(tdiff)
model.fit(dataset[:800])
print(model.predict(dataset[800:1000], type='interval'))
''' '''
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, methods=[pwfts.ProbabilisticWeightedFTS], bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, methods=[pwfts.ProbabilisticWeightedFTS],
benchmark_models=False, orders=[1,2,3], partitions=np.arange(10,100,5), benchmark_models=False,
progress=False, type='point', #transformations=[tdiff],
#steps_ahead=[1,4,7,10], steps_ahead_sampler=10, orders=[1, 2, 3],
distributed=True, nodes=['192.168.0.102','192.168.0.106','192.168.0.110'], partitions=np.arange(10, 100, 5),
save=True, file="pwfts_taiex_partitioning.csv") progress=False, type='distribution',
''' #steps_ahead=[1,4,7,10], #steps_ahead=[1]
distributed=True, nodes=['192.168.0.110', '192.168.0.100','192.168.0.106'],
file="benchmarks.db", dataset="TAIEX", tag="partitioning")
#save=True, file="tmp.db")
'''
'''
dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';') dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';')
print(bUtil.analytic_tabular_dataframe(dat)) print(bUtil.analytic_tabular_dataframe(dat))
#print(dat["Size"].values[0]) #print(dat["Size"].values[0])
'''
''' '''
train_split = 2000 train_split = 2000
test_length = 200 test_length = 200