diff --git a/img/logo_medium.png b/img/logo_medium.png new file mode 100644 index 0000000..d2209c9 Binary files /dev/null and b/img/logo_medium.png differ diff --git a/img/minds_logo.jpeg b/img/minds_logo.jpeg new file mode 100644 index 0000000..03fa0d0 Binary files /dev/null and b/img/minds_logo.jpeg differ diff --git a/img/minds_logo_medium.jpeg b/img/minds_logo_medium.jpeg new file mode 100644 index 0000000..3cb22a5 Binary files /dev/null and b/img/minds_logo_medium.jpeg differ diff --git a/pyFTS/benchmarks/Measures.py b/pyFTS/benchmarks/Measures.py index 8bd1a41..84c45cd 100644 --- a/pyFTS/benchmarks/Measures.py +++ b/pyFTS/benchmarks/Measures.py @@ -271,6 +271,8 @@ def crps(targets, densities): def get_point_statistics(data, model, **kwargs): """Condensate all measures for point forecasters""" + steps_ahead = kwargs.get('steps_ahead',1) + indexer = kwargs.get('indexer', None) if indexer is not None: @@ -278,46 +280,89 @@ def get_point_statistics(data, model, **kwargs): else: ndata = np.array(data[model.order:]) - forecasts = model.predict(data, **kwargs) + ret = list() - try: + if steps_ahead == 1: + forecasts = model.forecast(data, **kwargs) if model.has_seasonality: nforecasts = np.array(forecasts) else: nforecasts = np.array(forecasts[:-1]) - except Exception as ex: - print(ex) - return [np.nan,np.nan,np.nan] - ret = list() + ret.append(np.round(rmse(ndata, nforecasts), 2)) + ret.append(np.round(smape(ndata, nforecasts), 2)) + ret.append(np.round(UStatistic(ndata, nforecasts), 2)) + else: + nforecasts = [] + for k in np.arange(model.order, len(ndata)-steps_ahead): + sample = ndata[k - model.order: k] + tmp = model.forecast_ahead(sample, steps_ahead, **kwargs) + nforecasts.append(tmp[-1]) - ret.append(np.round(rmse(ndata, nforecasts), 2)) - ret.append(np.round(smape(ndata, nforecasts), 2)) - ret.append(np.round(UStatistic(ndata, nforecasts), 2)) + start = model.order + steps_ahead + ret.append(np.round(rmse(ndata[start:], nforecasts), 2)) + ret.append(np.round(smape(ndata[start:], nforecasts), 2)) + ret.append(np.round(UStatistic(ndata[start:], nforecasts), 2)) return ret -def get_interval_statistics(original, model, **kwargs): +def get_interval_statistics(data, model, **kwargs): """Condensate all measures for point_to_interval forecasters""" + + steps_ahead = kwargs.get('steps_ahead', 1) + ret = list() - forecasts = model.predict(original, **kwargs) - ret.append(round(sharpness(forecasts), 2)) - ret.append(round(resolution(forecasts), 2)) - ret.append(round(coverage(original[model.order:], forecasts[:-1]), 2)) - ret.append(round(pinball_mean(0.05, original[model.order:], forecasts[:-1]), 2)) - ret.append(round(pinball_mean(0.25, original[model.order:], forecasts[:-1]), 2)) - ret.append(round(pinball_mean(0.75, original[model.order:], forecasts[:-1]), 2)) - ret.append(round(pinball_mean(0.95, original[model.order:], forecasts[:-1]), 2)) + + if steps_ahead == 1: + forecasts = model.forecast_interval(data, **kwargs) + ret.append(round(sharpness(forecasts), 2)) + ret.append(round(resolution(forecasts), 2)) + ret.append(round(coverage(data[model.order:], forecasts[:-1]), 2)) + ret.append(round(pinball_mean(0.05, data[model.order:], forecasts[:-1]), 2)) + ret.append(round(pinball_mean(0.25, data[model.order:], forecasts[:-1]), 2)) + ret.append(round(pinball_mean(0.75, data[model.order:], forecasts[:-1]), 2)) + ret.append(round(pinball_mean(0.95, data[model.order:], forecasts[:-1]), 2)) + else: + forecasts = [] + for k in np.arange(model.order, len(data) - steps_ahead): + sample = data[k - model.order: k] + tmp = model.forecast_ahead_interval(sample, steps_ahead, **kwargs) + forecasts.append(tmp[-1]) + + start = model.order + steps_ahead + ret.append(round(sharpness(forecasts), 2)) + ret.append(round(resolution(forecasts), 2)) + ret.append(round(coverage(data[model.order:], forecasts), 2)) + ret.append(round(pinball_mean(0.05, data[start:], forecasts), 2)) + ret.append(round(pinball_mean(0.25, data[start:], forecasts), 2)) + ret.append(round(pinball_mean(0.75, data[start:], forecasts), 2)) + ret.append(round(pinball_mean(0.95, data[start:], forecasts), 2)) return ret -def get_distribution_statistics(original, model, **kwargs): +def get_distribution_statistics(data, model, **kwargs): + steps_ahead = kwargs.get('steps_ahead', 1) + ret = list() - _s1 = time.time() - densities1 = model.predict(original, **kwargs) - _e1 = time.time() - ret.append(round(crps(original, densities1), 3)) - ret.append(round(_e1 - _s1, 3)) + + if steps_ahead == 1: + _s1 = time.time() + forecasts = model.forecast_distribution(data, **kwargs) + _e1 = time.time() + ret.append(round(crps(data, forecasts), 3)) + ret.append(round(_e1 - _s1, 3)) + else: + forecasts = [] + _s1 = time.time() + for k in np.arange(model.order, len(data) - steps_ahead): + sample = data[k - model.order: k] + tmp = model.forecast_ahead_distribution(sample, steps_ahead, **kwargs) + forecasts.append(tmp[-1]) + _e1 = time.time() + + start = model.order + steps_ahead + ret.append(round(crps(data[start:], forecasts), 3)) + ret.append(round(_e1 - _s1, 3)) return ret diff --git a/pyFTS/benchmarks/Util.py b/pyFTS/benchmarks/Util.py index 1dc76f5..d5c71d1 100644 --- a/pyFTS/benchmarks/Util.py +++ b/pyFTS/benchmarks/Util.py @@ -15,7 +15,7 @@ from copy import deepcopy from pyFTS.common import Util -def extract_measure(dataframe,measure,data_columns): +def extract_measure(dataframe, measure, data_columns): if not dataframe.empty: df = dataframe[(dataframe.Measure == measure)][data_columns] tmp = df.to_dict(orient="records")[0] @@ -92,12 +92,12 @@ def save_dataframe_point(experiments, file, objs, rmse, save, synthetic, smape, mod.append(mfts.partitioner.name) mod.append(mfts.partitioner.partitions) mod.append(len(mfts)) - mod.append(steps) - mod.append(method) else: mod.append('-') mod.append('-') mod.append('-') + mod.append(steps[k]) + mod.append(method[k]) mod.append(np.round(np.nanmean(rmse[k]), 2)) mod.append(np.round(np.nanstd(rmse[k]), 2)) mod.append(np.round(np.nanmean(smape[k]), 2)) @@ -126,17 +126,18 @@ def save_dataframe_point(experiments, file, objs, rmse, save, synthetic, smape, s = '-' p = '-' l = '-' - print([n, o, s, p, l, steps, method]) - tmp = [n, o, s, p, l, steps, method, 'RMSE'] + st = steps[k] + mt = method[k] + tmp = [n, o, s, p, l, st, mt, 'RMSE'] tmp.extend(rmse[k]) ret.append(deepcopy(tmp)) - tmp = [n, o, s, p, l, steps, method, 'SMAPE'] + tmp = [n, o, s, p, l, st, mt, 'SMAPE'] tmp.extend(smape[k]) ret.append(deepcopy(tmp)) - tmp = [n, o, s, p, l, steps, method, 'U'] + tmp = [n, o, s, p, l, st, mt, 'U'] tmp.extend(u[k]) ret.append(deepcopy(tmp)) - tmp = [n, o, s, p, l, steps, method, 'TIME'] + tmp = [n, o, s, p, l, st, mt, 'TIME'] tmp.extend(times[k]) ret.append(deepcopy(tmp)) except Exception as ex: @@ -154,13 +155,30 @@ def save_dataframe_point(experiments, file, objs, rmse, save, synthetic, smape, print(ret) -def cast_dataframe_to_synthetic_point(infile, outfile, experiments): - columns = point_dataframe_analytic_columns(experiments) +def cast_dataframe_to_synthetic(infile, outfile, experiments, type): + if type == 'point': + analytic_columns = point_dataframe_analytic_columns + synthetic_columns = point_dataframe_synthetic_columns + synthetize_measures = cast_dataframe_to_synthetic_point + elif type == 'interval': + analytic_columns = interval_dataframe_analytic_columns + synthetic_columns = interval_dataframe_synthetic_columns + synthetize_measures = cast_dataframe_to_synthetic_interval + elif type == 'distribution': + analytic_columns = probabilistic_dataframe_analytic_columns + synthetic_columns = probabilistic_dataframe_synthetic_columns + synthetize_measures = cast_dataframe_to_synthetic_probabilistic + else: + raise ValueError("Type parameter has an unknown value!") + + columns = analytic_columns(experiments) dat = pd.read_csv(infile, sep=";", usecols=columns) models = dat.Model.unique() orders = dat.Order.unique() schemes = dat.Scheme.unique() partitions = dat.Partitions.unique() + steps = dat.Steps.unique() + methods = dat.Method.unique() data_columns = analytical_data_columns(experiments) @@ -170,39 +188,48 @@ def cast_dataframe_to_synthetic_point(infile, outfile, experiments): for o in orders: for s in schemes: for p in partitions: - mod = [] - df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)] - if not df.empty: - rmse = extract_measure(df, 'RMSE', data_columns) - smape = extract_measure(df, 'SMAPE', data_columns) - u = extract_measure(df, 'U', data_columns) - times = extract_measure(df, 'TIME', data_columns) - mod.append(m) - mod.append(o) - mod.append(s) - mod.append(p) - mod.append(extract_measure(df, 'RMSE', ['Size'])[0]) - mod.append(np.round(np.nanmean(rmse), 2)) - mod.append(np.round(np.nanstd(rmse), 2)) - mod.append(np.round(np.nanmean(smape), 2)) - mod.append(np.round(np.nanstd(smape), 2)) - mod.append(np.round(np.nanmean(u), 2)) - mod.append(np.round(np.nanstd(u), 2)) - mod.append(np.round(np.nanmean(times), 4)) - mod.append(np.round(np.nanstd(times), 4)) - ret.append(mod) + for st in steps: + for mt in methods: + df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & + (dat.Partitions == p) & (dat.Steps == st) & (dat.Method == mt)] + if not df.empty: + mod = synthetize_measures(df, data_columns) + mod.insert(0, m) + mod.insert(1, o) + mod.insert(2, s) + mod.insert(3, p) + mod.insert(4, df.iat[0,5]) + mod.insert(5, st) + mod.insert(6, mt) + ret.append(mod) - dat = pd.DataFrame(ret, columns=point_dataframe_synthetic_columns()) + dat = pd.DataFrame(ret, columns=synthetic_columns()) dat.to_csv(outfile, sep=";", index=False) +def cast_dataframe_to_synthetic_point(df, data_columns): + ret = [] + rmse = extract_measure(df, 'RMSE', data_columns) + smape = extract_measure(df, 'SMAPE', data_columns) + u = extract_measure(df, 'U', data_columns) + times = extract_measure(df, 'TIME', data_columns) + ret.append(np.round(np.nanmean(rmse), 2)) + ret.append(np.round(np.nanstd(rmse), 2)) + ret.append(np.round(np.nanmean(smape), 2)) + ret.append(np.round(np.nanstd(smape), 2)) + ret.append(np.round(np.nanmean(u), 2)) + ret.append(np.round(np.nanstd(u), 2)) + ret.append(np.round(np.nanmean(times), 4)) + ret.append(np.round(np.nanstd(times), 4)) + + return ret + def analytical_data_columns(experiments): data_columns = [str(k) for k in np.arange(0, experiments)] return data_columns - def scale_params(data): vmin = np.nanmin(data) vlen = np.nanmax(data) - vmin @@ -215,12 +242,10 @@ def scale(data, params): return ndata - def stats(measure, data): print(measure, np.nanmean(data), np.nanstd(data)) - def unified_scaled_point(experiments, tam, save=False, file=None, sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'], sort_ascend=[1, 1, 1, 1],save_best=False, @@ -330,7 +355,6 @@ def unified_scaled_point(experiments, tam, save=False, file=None, Util.show_and_save_image(fig, file, save) - def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam, save=False, file=None, sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'], sort_ascend=[1, 1, 1, 1],save_best=False, @@ -419,14 +443,12 @@ def save_dataframe_interval(coverage, experiments, file, objs, resolution, save, mod.append(mfts.partitioner.name) mod.append(mfts.partitioner.partitions) mod.append(l) - mod.append(steps) - mod.append(method) else: mod.append('-') mod.append('-') mod.append('-') - mod.append(steps) - mod.append(method) + mod.append(steps[k]) + mod.append(method[k]) mod.append(round(np.nanmean(sharpness[k]), 2)) mod.append(round(np.nanstd(sharpness[k]), 2)) mod.append(round(np.nanmean(resolution[k]), 2)) @@ -461,29 +483,30 @@ def save_dataframe_interval(coverage, experiments, file, objs, resolution, save, s = '-' p = '-' l = '-' - - tmp = [n, o, s, p, l, steps, method, 'Sharpness'] + st = steps[k] + mt = method[k] + tmp = [n, o, s, p, l, st, mt, 'Sharpness'] tmp.extend(sharpness[k]) ret.append(deepcopy(tmp)) - tmp = [n, o, s, p, l, steps, method, 'Resolution'] + tmp = [n, o, s, p, l, st, mt, 'Resolution'] tmp.extend(resolution[k]) ret.append(deepcopy(tmp)) - tmp = [n, o, s, p, l, steps, method, 'Coverage'] + tmp = [n, o, s, p, l, st, mt, 'Coverage'] tmp.extend(coverage[k]) ret.append(deepcopy(tmp)) - tmp = [n, o, s, p, l, steps, method, 'TIME'] + tmp = [n, o, s, p, l, st, mt, 'TIME'] tmp.extend(times[k]) ret.append(deepcopy(tmp)) - tmp = [n, o, s, p, l, steps, method, 'Q05'] + tmp = [n, o, s, p, l, st, mt, 'Q05'] tmp.extend(q05[k]) ret.append(deepcopy(tmp)) - tmp = [n, o, s, p, l, steps, method, 'Q25'] + tmp = [n, o, s, p, l, st, mt, 'Q25'] tmp.extend(q25[k]) ret.append(deepcopy(tmp)) - tmp = [n, o, s, p, l, steps, method, 'Q75'] + tmp = [n, o, s, p, l, st, mt, 'Q75'] tmp.extend(q75[k]) ret.append(deepcopy(tmp)) - tmp = [n, o, s, p, l, steps, method, 'Q95'] + tmp = [n, o, s, p, l, st, mt, 'Q95'] tmp.extend(q95[k]) ret.append(deepcopy(tmp)) except Exception as ex: @@ -515,57 +538,34 @@ def interval_dataframe_synthetic_columns(): return columns -def cast_dataframe_to_synthetic_interval(infile, outfile, experiments): - columns = interval_dataframe_analytic_columns(experiments) - dat = pd.read_csv(infile, sep=";", usecols=columns) - models = dat.Model.unique() - orders = dat.Order.unique() - schemes = dat.Scheme.unique() - partitions = dat.Partitions.unique() - - data_columns = analytical_data_columns(experiments) - +def cast_dataframe_to_synthetic_interval(df, data_columns): + sharpness = extract_measure(df, 'Sharpness', data_columns) + resolution = extract_measure(df, 'Resolution', data_columns) + coverage = extract_measure(df, 'Coverage', data_columns) + times = extract_measure(df, 'TIME', data_columns) + q05 = extract_measure(df, 'Q05', data_columns) + q25 = extract_measure(df, 'Q25', data_columns) + q75 = extract_measure(df, 'Q75', data_columns) + q95 = extract_measure(df, 'Q95', data_columns) ret = [] + ret.append(np.round(np.nanmean(sharpness), 2)) + ret.append(np.round(np.nanstd(sharpness), 2)) + ret.append(np.round(np.nanmean(resolution), 2)) + ret.append(np.round(np.nanstd(resolution), 2)) + ret.append(np.round(np.nanmean(coverage), 2)) + ret.append(np.round(np.nanstd(coverage), 2)) + ret.append(np.round(np.nanmean(times), 4)) + ret.append(np.round(np.nanstd(times), 4)) + ret.append(np.round(np.nanmean(q05), 4)) + ret.append(np.round(np.nanstd(q05), 4)) + ret.append(np.round(np.nanmean(q25), 4)) + ret.append(np.round(np.nanstd(q25), 4)) + ret.append(np.round(np.nanmean(q75), 4)) + ret.append(np.round(np.nanstd(q75), 4)) + ret.append(np.round(np.nanmean(q95), 4)) + ret.append(np.round(np.nanstd(q95), 4)) + return ret - for m in models: - for o in orders: - for s in schemes: - for p in partitions: - mod = [] - df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)] - if not df.empty: - sharpness = extract_measure(df, 'Sharpness', data_columns) - resolution = extract_measure(df, 'Resolution', data_columns) - coverage = extract_measure(df, 'Coverage', data_columns) - times = extract_measure(df, 'TIME', data_columns) - q05 = extract_measure(df, 'Q05', data_columns) - q25 = extract_measure(df, 'Q25', data_columns) - q75 = extract_measure(df, 'Q75', data_columns) - q95 = extract_measure(df, 'Q95', data_columns) - mod.append(m) - mod.append(o) - mod.append(s) - mod.append(p) - mod.append(np.round(np.nanmean(sharpness), 2)) - mod.append(np.round(np.nanstd(sharpness), 2)) - mod.append(np.round(np.nanmean(resolution), 2)) - mod.append(np.round(np.nanstd(resolution), 2)) - mod.append(np.round(np.nanmean(coverage), 2)) - mod.append(np.round(np.nanstd(coverage), 2)) - mod.append(np.round(np.nanmean(times), 4)) - mod.append(np.round(np.nanstd(times), 4)) - mod.append(np.round(np.nanmean(q05), 4)) - mod.append(np.round(np.nanstd(q05), 4)) - mod.append(np.round(np.nanmean(q25), 4)) - mod.append(np.round(np.nanstd(q25), 4)) - mod.append(np.round(np.nanmean(q75), 4)) - mod.append(np.round(np.nanstd(q75), 4)) - mod.append(np.round(np.nanmean(q95), 4)) - mod.append(np.round(np.nanstd(q95), 4)) - ret.append(mod) - - dat = pd.DataFrame(ret, columns=interval_dataframe_synthetic_columns()) - dat.to_csv(outfile, sep=";", index=False) @@ -905,17 +905,14 @@ def save_dataframe_probabilistic(experiments, file, objs, crps, times, save, syn mod.append(mfts.partitioner.name) mod.append(mfts.partitioner.partitions) mod.append(len(mfts)) - mod.append(steps) - mod.append(method) else: mod.append('-') mod.append('-') mod.append('-') - mod.append(steps) - mod.append(method) + mod.append(steps[k]) + mod.append(method[k]) mod.append(np.round(np.nanmean(crps[k]), 2)) mod.append(np.round(np.nanstd(crps[k]), 2)) - mod.append(l) mod.append(np.round(np.nanmean(times[k]), 4)) mod.append(np.round(np.nanstd(times[k]), 4)) ret.append(mod) @@ -940,10 +937,12 @@ def save_dataframe_probabilistic(experiments, file, objs, crps, times, save, syn s = '-' p = '-' l = '-' - tmp = [n, o, s, p, l, steps, method, 'CRPS'] + st = steps[k] + mt = method[k] + tmp = [n, o, s, p, l, st, mt, 'CRPS'] tmp.extend(crps[k]) ret.append(deepcopy(tmp)) - tmp = [n, o, s, p, l, steps, method, 'TIME'] + tmp = [n, o, s, p, l, st, mt, 'TIME'] tmp.extend(times[k]) ret.append(deepcopy(tmp)) except Exception as ex: @@ -974,40 +973,15 @@ def probabilistic_dataframe_synthetic_columns(): return columns -def cast_dataframe_to_synthetic_probabilistic(infile, outfile, experiments): - columns = probabilistic_dataframe_analytic_columns(experiments) - dat = pd.read_csv(infile, sep=";", usecols=columns) - models = dat.Model.unique() - orders = dat.Order.unique() - schemes = dat.Scheme.unique() - partitions = dat.Partitions.unique() - - data_columns = analytical_data_columns(experiments) - +def cast_dataframe_to_synthetic_probabilistic(df, data_columns): + crps1 = extract_measure(df, 'CRPS', data_columns) + times1 = extract_measure(df, 'TIME', data_columns) ret = [] - - for m in models: - for o in orders: - for s in schemes: - for p in partitions: - mod = [] - df = dat[(dat.Model == m) & (dat.Order == o) & (dat.Scheme == s) & (dat.Partitions == p)] - if not df.empty: - crps1 = extract_measure(df, 'CRPS', data_columns) - times1 = extract_measure(df, 'TIME', data_columns) - mod.append(m) - mod.append(o) - mod.append(s) - mod.append(p) - mod.append(np.round(np.nanmean(crps1), 2)) - mod.append(np.round(np.nanstd(crps1), 2)) - mod.append(np.round(np.nanmean(times1), 2)) - mod.append(np.round(np.nanstd(times1), 2)) - ret.append(mod) - - dat = pd.DataFrame(ret, columns=probabilistic_dataframe_synthetic_columns()) - dat.to_csv(outfile, sep=";", index=False) - + ret.append(np.round(np.nanmean(crps1), 2)) + ret.append(np.round(np.nanstd(crps1), 2)) + ret.append(np.round(np.nanmean(times1), 2)) + ret.append(np.round(np.nanstd(times1), 2)) + return ret def unified_scaled_probabilistic(experiments, tam, save=False, file=None, diff --git a/pyFTS/benchmarks/benchmarks.py b/pyFTS/benchmarks/benchmarks.py index 409503f..3836756 100644 --- a/pyFTS/benchmarks/benchmarks.py +++ b/pyFTS/benchmarks/benchmarks.py @@ -81,6 +81,8 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs): partitioners_methods = __pop("partitioners_methods", [Grid.GridPartitioner], kwargs) partitions = __pop("partitions", [10], kwargs) + steps_ahead = __pop('steps_ahead', [1], kwargs) + methods = __pop('methods', None, kwargs) models = __pop('models', None, kwargs) @@ -178,27 +180,34 @@ def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs): else: partitioners_pool = partitioners_models - rng1 = partitioners_pool - + rng1 = steps_ahead if progress: - rng1 = tqdm(partitioners_pool, desc="Partitioners") + rng1 = tqdm(steps_ahead, desc="Steps") - for partitioner in rng1: - - rng2 = enumerate(pool,start=0) + for step in rng1: + rng2 = partitioners_pool if progress: - rng2 = enumerate(tqdm(pool, desc="Models"),start=0) + rng2 = tqdm(partitioners_pool, desc="Partitioners") - for _id, model in rng2: + for partitioner in rng2: - if not distributed: - job = experiment_method(deepcopy(model), deepcopy(partitioner), train, test, **kwargs) - jobs.append(job) - else: - job = cluster.submit(deepcopy(model), deepcopy(partitioner), train, test, **kwargs) - job.id = id # associate an ID to identify jobs (if needed later) - jobs.append(job) + rng3 = enumerate(pool,start=0) + + if progress: + rng3 = enumerate(tqdm(pool, desc="Models"),start=0) + + for _id, model in rng3: + + kwargs['steps_ahead'] = step + + if not distributed: + job = experiment_method(deepcopy(model), deepcopy(partitioner), train, test, **kwargs) + jobs.append(job) + else: + job = cluster.submit(deepcopy(model), deepcopy(partitioner), train, test, **kwargs) + job.id = id # associate an ID to identify jobs (if needed later) + jobs.append(job) if progress: progressbar.close() @@ -303,6 +312,9 @@ def run_point(mfts, partitioner, train_data, test_data, window_key=None, **kwarg _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) mfts.partitioner = partitioner + _key += str(steps_ahead) + _key += str(method) if method is not None else "" + if transformation is not None: mfts.append_transformation(transformation) @@ -363,6 +375,9 @@ def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kw if transformation is not None: mfts.append_transformation(transformation) + _key += str(steps_ahead) + _key += str(method) if method is not None else "" + _start = time.time() mfts.fit(train_data, order=mfts.order, **kwargs) _end = time.time() @@ -421,6 +436,9 @@ def run_probabilistic(mfts, partitioner, train_data, test_data, window_key=None, _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions) mfts.partitioner = partitioner + _key += str(steps_ahead) + _key += str(method) if method is not None else "" + if transformation is not None: mfts.append_transformation(transformation) @@ -478,8 +496,8 @@ def process_point_jobs(jobs, experiments, save=False, file=None, sintetic=False) smape = {} u = {} times = {} - steps = None - method = None + steps = {} + method = {} for job in jobs: _key = job['key'] @@ -489,14 +507,16 @@ def process_point_jobs(jobs, experiments, save=False, file=None, sintetic=False) smape[_key] = [] u[_key] = [] times[_key] = [] - steps[_key] = job['steps'] - method[_key] = job['method'] + steps[_key] = [] + method[_key] = [] + steps[_key] = job['steps'] + method[_key] = job['method'] rmse[_key].append(job['rmse']) smape[_key].append(job['smape']) u[_key].append(job['u']) times[_key].append(job['time']) - return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u) + return bUtil.save_dataframe_point(experiments, file, objs, rmse, save, sintetic, smape, times, u, steps, method) def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=False): @@ -509,6 +529,8 @@ def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=Fal q75 = {} q95 = {} times = {} + steps = {} + method = {} for job in jobs: _key = job['key'] @@ -522,6 +544,8 @@ def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=Fal q25[_key] = [] q75[_key] = [] q95[_key] = [] + steps[_key] = [] + method[_key] = [] sharpness[_key].append(job['sharpness']) resolution[_key].append(job['resolution']) @@ -531,16 +555,18 @@ def process_interval_jobs(jobs, experiments, save=False, file=None, sintetic=Fal q25[_key].append(job['Q25']) q75[_key].append(job['Q75']) q95[_key].append(job['Q95']) - - + steps[_key] = job['steps'] + method[_key] = job['method'] return bUtil.save_dataframe_interval(coverage, experiments, file, objs, resolution, save, sharpness, sintetic, - times, q05, q25, q75, q95) + times, q05, q25, q75, q95, steps, method) def process_probabilistic_jobs(jobs, experiments, save=False, file=None, sintetic=False): objs = {} crps = {} times = {} + steps = {} + method = {} for job in jobs: _key = job['key'] @@ -548,11 +574,15 @@ def process_probabilistic_jobs(jobs, experiments, save=False, file=None, sinteti objs[_key] = job['obj'] crps[_key] = [] times[_key] = [] + steps[_key] = [] + method[_key] = [] crps[_key].append(job['CRPS']) times[_key].append(job['time']) + steps[_key] = job['steps'] + method[_key] = job['method'] - return bUtil.save_dataframe_probabilistic(experiments, file, objs, crps, times, save, sintetic) + return bUtil.save_dataframe_probabilistic(experiments, file, objs, crps, times, save, sintetic, steps, method) diff --git a/pyFTS/common/fts.py b/pyFTS/common/fts.py index 13aec21..31f56ea 100644 --- a/pyFTS/common/fts.py +++ b/pyFTS/common/fts.py @@ -89,19 +89,22 @@ class FTS(object): steps_ahead = kwargs.get("steps_ahead", None) - if type == 'point' and (steps_ahead == None or steps_ahead == 1): - ret = self.forecast(ndata, **kwargs) - elif type == 'point' and steps_ahead > 1: - ret = self.forecast_ahead(ndata, steps_ahead, **kwargs) - elif type == 'interval' and (steps_ahead == None or steps_ahead == 1): - ret = self.forecast_interval(ndata, **kwargs) - elif type == 'interval' and steps_ahead > 1: - ret = self.forecast_ahead_interval(ndata, steps_ahead, **kwargs) - elif type == 'distribution' and (steps_ahead == None or steps_ahead == 1): - ret = self.forecast_distribution(ndata, **kwargs) - elif type == 'distribution' and steps_ahead > 1: - ret = self.forecast_ahead_distribution(ndata, steps_ahead, **kwargs) - else: + if steps_ahead == None or steps_ahead == 1: + if type == 'point': + ret = self.forecast(ndata, **kwargs) + elif type == 'interval': + ret = self.forecast_interval(ndata, **kwargs) + elif type == 'distribution': + ret = self.forecast_distribution(ndata, **kwargs) + elif steps_ahead > 1: + if type == 'point': + ret = self.forecast_ahead(ndata, steps_ahead, **kwargs) + elif type == 'interval': + ret = self.forecast_ahead_interval(ndata, steps_ahead, **kwargs) + elif type == 'distribution': + ret = self.forecast_ahead_distribution(ndata, steps_ahead, **kwargs) + + if not ['point', 'interval', 'distribution'].__contains__(type): raise ValueError('The argument \'type\' has an unknown value.') else: diff --git a/pyFTS/models/pwfts.py b/pyFTS/models/pwfts.py index 3980122..3fd3a09 100644 --- a/pyFTS/models/pwfts.py +++ b/pyFTS/models/pwfts.py @@ -372,6 +372,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): flrgs = self.generate_lhs_flrg(sample) + if 'type' in kwargs: + kwargs.pop('type') + dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs) for bin in _bins: @@ -409,7 +412,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): ret.append(ret[-1]) else: mp = self.forecast([ret[x] for x in np.arange(k - self.order, k)], **kwargs) - ret.append(mp) + ret.append(mp[0]) return ret[self.order:] @@ -427,13 +430,13 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): start = kwargs.get('start', self.order) - sample = data[start - (self.order - 1): start + 1] + sample = data[start - self.order: start] ret = [[k, k] for k in sample] for k in np.arange(self.order, steps+self.order): - if self.__check_interval_bounds(ret[-1]): + if len(ret) > 0 and self.__check_interval_bounds(ret[-1]): ret.append(ret[-1]) else: lower = self.forecast_interval([ret[x][0] for x in np.arange(k - self.order, k)], **kwargs) @@ -460,9 +463,11 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): start = kwargs.get('start', self.order) - sample = ndata[start - (self.order - 1): start + 1] + sample = ndata[start - self.order: start] for dat in sample: + if 'type' in kwargs: + kwargs.pop('type') tmp = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs) tmp.set(dat, 1.0) ret.append(tmp) diff --git a/pyFTS/probabilistic/ProbabilityDistribution.py b/pyFTS/probabilistic/ProbabilityDistribution.py index ea50ab9..ce9b78d 100644 --- a/pyFTS/probabilistic/ProbabilityDistribution.py +++ b/pyFTS/probabilistic/ProbabilityDistribution.py @@ -11,7 +11,7 @@ class ProbabilityDistribution(object): If type is histogram, the PDF is discrete If type is KDE the PDF is continuous """ - def __init__(self,type = "KDE", **kwargs): + def __init__(self, type = "KDE", **kwargs): self.uod = kwargs.get("uod", None) self.type = type diff --git a/pyFTS/tests/general.py b/pyFTS/tests/general.py index 0d2f916..38b03f9 100644 --- a/pyFTS/tests/general.py +++ b/pyFTS/tests/general.py @@ -19,14 +19,15 @@ from pyFTS.benchmarks import benchmarks as bchmk from pyFTS.models import pwfts -''' +#''' bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2, methods=[pwfts.ProbabilisticWeightedFTS], benchmark_models=False, orders=[1], partitions=[10], #np.arange(10,100,2), - progress=False, type='distribution', - distributed=False, nodes=['192.168.0.106', '192.168.0.105', '192.168.0.110'], - save=True, file="pwfts_taiex_interval.csv") -''' + progress=False, type='distribution', steps_ahead=[1,4,7,10], + #distributed=False, nodes=['192.168.0.106', '192.168.0.105', '192.168.0.110'], + save=True, file="pwfts_taiex_distribution.csv") +#''' +''' train_split = 2000 test_length = 200 @@ -55,7 +56,7 @@ tmp = pfts1_taiex.predict(dataset[train_split:train_split+200], type='point', print(tmp) - +''' ''' tmp = pfts1_taiex.predict(dataset[train_split:train_split+200], type='diPedro Pazzini