From 35898f338a887e29337cafa7ab6dba484cf86618 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Sat, 22 Jun 2019 09:20:53 -0300 Subject: [PATCH] Improvements for numerical stability --- pyFTS/benchmarks/Measures.py | 26 +- pyFTS/benchmarks/benchmarks.py | 274 ++++++++++++++++++ pyFTS/common/Util.py | 15 +- pyFTS/models/ensemble/ensemble.py | 2 +- pyFTS/models/ifts.py | 12 +- pyFTS/models/multivariate/grid.py | 4 +- pyFTS/models/multivariate/mvfts.py | 2 +- pyFTS/models/pwfts.py | 34 +-- pyFTS/partitioners/partitioner.py | 4 +- .../probabilistic/ProbabilityDistribution.py | 34 ++- pyFTS/tests/multivariate.py | 78 +++-- pyFTS/tests/pwfts.py | 95 +++--- 12 files changed, 432 insertions(+), 148 deletions(-) diff --git a/pyFTS/benchmarks/Measures.py b/pyFTS/benchmarks/Measures.py index 26c2260..a4b2d12 100644 --- a/pyFTS/benchmarks/Measures.py +++ b/pyFTS/benchmarks/Measures.py @@ -89,7 +89,7 @@ def smape(targets, forecasts, type=2): elif type == 2: return np.nanmean(np.abs(forecasts - targets) / (np.abs(forecasts) + abs(targets))) * 100 else: - return np.sum(np.abs(forecasts - targets)) / np.sum(forecasts + targets) + return np.nansum(np.abs(forecasts - targets)) / np.nansum(forecasts + targets) def mape_interval(targets, forecasts): @@ -124,7 +124,7 @@ def UStatistic(targets, forecasts): for k in np.arange(0, l - 1): y.append(np.subtract(forecasts[k], targets[k]) ** 2) naive.append(np.subtract(targets[k + 1], targets[k]) ** 2) - return np.sqrt(np.divide(np.sum(y), np.sum(naive))) + return np.sqrt(np.divide(np.nansum(y), np.nansum(naive))) def TheilsInequality(targets, forecasts): @@ -137,9 +137,9 @@ def TheilsInequality(targets, forecasts): """ res = targets - forecasts t = len(res) - us = np.sqrt(sum([u ** 2 for u in res])) - ys = np.sqrt(sum([y ** 2 for y in targets])) - fs = np.sqrt(sum([f ** 2 for f in forecasts])) + us = np.sqrt(np.nansum([u ** 2 for u in res])) + ys = np.sqrt(np.nansum([y ** 2 for y in targets])) + fs = np.sqrt(np.nansum([f ** 2 for f in forecasts])) return us / (ys + fs) @@ -255,12 +255,12 @@ def brier_score(targets, densities): try: v = d.bin_index.find_le(targets[ct]) - score = sum([d.density(k) ** 2 for k in d.bins if k != v]) + score = np.nansum([d.density(k) ** 2 for k in d.bins if k != v]) score += (d.density(v) - 1) ** 2 ret.append(score) except ValueError as ex: - ret.append(sum([d.density(k) ** 2 for k in d.bins])) - return sum(ret) / len(ret) + ret.append(np.nansum([d.density(k) ** 2 for k in d.bins])) + return np.nansum(ret) / len(ret) def logarithm_score(targets, densities): @@ -301,7 +301,7 @@ def crps(targets, densities): n = len(densities) for ct, df in enumerate(densities): - _crps += sum([(df.cumulative(bin) - (1 if bin >= targets[ct] else 0)) ** 2 for bin in df.bins]) + _crps += np.nansum([(df.cumulative(bin) - (1 if bin >= targets[ct] else 0)) ** 2 for bin in df.bins]) return _crps / n @@ -419,6 +419,8 @@ def get_interval_statistics(data, model, **kwargs): forecasts = model.predict(data, **kwargs) ret.append(round(sharpness(forecasts), 2)) ret.append(round(resolution(forecasts), 2)) + if model.is_multivariate: + data = data[model.target_variable.data_label].values ret.append(round(coverage(data[model.max_lag:], forecasts[:-1]), 2)) ret.append(round(pinball_mean(0.05, data[model.max_lag:], forecasts[:-1]), 2)) ret.append(round(pinball_mean(0.25, data[model.max_lag:], forecasts[:-1]), 2)) @@ -436,6 +438,8 @@ def get_interval_statistics(data, model, **kwargs): start = model.max_lag + steps_ahead - 1 ret.append(round(sharpness(forecasts), 2)) ret.append(round(resolution(forecasts), 2)) + if model.is_multivariate: + data = data[model.target_variable.data_label].values ret.append(round(coverage(data[model.max_lag:], forecasts), 2)) ret.append(round(pinball_mean(0.05, data[start:], forecasts), 2)) ret.append(round(pinball_mean(0.25, data[start:], forecasts), 2)) @@ -502,6 +506,8 @@ def get_distribution_statistics(data, model, **kwargs): _s1 = time.time() forecasts = model.predict(data, **kwargs) _e1 = time.time() + if model.is_multivariate: + data = data[model.target_variable.data_label].values ret.append(round(crps(data[model.max_lag:], forecasts[:-1]), 3)) ret.append(round(_e1 - _s1, 3)) ret.append(round(brier_score(data[model.max_lag:], forecasts[:-1]), 3)) @@ -516,6 +522,8 @@ def get_distribution_statistics(data, model, **kwargs): _e1 = time.time() start = model.max_lag + steps_ahead + if model.is_multivariate: + data = data[model.target_variable.data_label].values ret.append(round(crps(data[start:-1:skip], forecasts), 3)) ret.append(round(_e1 - _s1, 3)) ret.append(round(brier_score(data[start:-1:skip], forecasts), 3)) diff --git a/pyFTS/benchmarks/benchmarks.py b/pyFTS/benchmarks/benchmarks.py index 8e6459e..00990a0 100644 --- a/pyFTS/benchmarks/benchmarks.py +++ b/pyFTS/benchmarks/benchmarks.py @@ -81,6 +81,105 @@ def get_benchmark_probabilistic_methods(): return [arima.ARIMA, quantreg.QuantileRegression, knn.KNearestNeighbors] +def multivariate_sliding_window_benchmarks2(data, windowsize, train=0.8, **kwargs): + from pyFTS.models.multivariate import common, variable, mvfts + + tag = __pop('tag', None, kwargs) + dataset = __pop('dataset', None, kwargs) + + distributed = __pop('distributed', False, kwargs) + + variables = __pop('variables', {}, kwargs) + + target_variable = __pop('target_variable', '', kwargs) + + type = kwargs.get("type", 'point') + + steps_ahead = __pop('steps_ahead', [1], kwargs) + + steps_ahead = [k for k in steps_ahead] + + fts_methods = __pop('methods', [], kwargs) + + if fts_methods is not None: + methods_parameters = __pop('methods_parameters', None, kwargs) + + if type == 'point': + experiment_method = mv_run_point2 + synthesis_method = process_point_jobs2 + elif type == 'interval': + experiment_method = mv_run_interval2 + synthesis_method = process_interval_jobs2 + elif type == 'distribution': + experiment_method = mv_run_probabilistic2 + synthesis_method = process_probabilistic_jobs2 + else: + raise ValueError("Type parameter has a unkown value!") + + if distributed: + import pyFTS.distributed.dispy as dispy + + nodes = kwargs.get("nodes", ['127.0.0.1']) + cluster, http_server = dispy.start_dispy_cluster(experiment_method, nodes) + + inc = __pop("inc", 0.1, kwargs) + + file = kwargs.get('file', "benchmarks.db") + + conn = bUtil.open_benchmark_db(file) + + jobs = [] + for ct, train, test in cUtil.sliding_window(data, windowsize, train, inc=inc, **kwargs): + for id, fts_method in enumerate(fts_methods): + + kwargs['steps_ahead'] = max(steps_ahead) + parameters = {} + if methods_parameters is not None: + parameters = methods_parameters[id] + + vars = [] + tvar = None + + for key, value in variables.items(): + var = variable.Variable(key, data=train, **value) + vars.append(var) + if key == target_variable: + tvar = var + + + model = fts_method(explanatory_variables=vars, target_variable=tvar, + **parameters) + + if not distributed: + try: + job = experiment_method(model, train, test, ct, **kwargs) + synthesis_method(dataset, tag, job, conn) + except Exception as ex: + print('EXCEPTION! ', fts_method) + traceback.print_exc() + else: + job = cluster.submit(model, train, test, ct, **kwargs) + job.id = id + jobs.append(job) + + if distributed: + for job in jobs: + job() + if job.status == dispy.dispy.DispyJob.Finished and job is not None: + tmp = job.result + synthesis_method(dataset, tag, tmp, conn) + else: + print("status", job.status) + print("result", job.result) + print("stdout", job.stdout) + print("stderr", job.exception) + + cluster.wait() # wait for all jobs to finish + dispy.stop_dispy_cluster(cluster, http_server) + + conn.close() + + def sliding_window_benchmarks2(data, windowsize, train=0.8, **kwargs): tag = __pop('tag', None, kwargs) dataset = __pop('dataset', None, kwargs) @@ -630,6 +729,63 @@ def __build_model(fts_method, order, parameters, partitioner_method, partitions, return mfts, pttr +def mv_run_point2(mfts, train_data, test_data, window_key=None, **kwargs): + import time + from pyFTS.models import hofts, ifts, pwfts + from pyFTS.models.multivariate import mvfts, wmvfts, granular + from pyFTS.partitioners import Grid, Entropy, FCM + from pyFTS.benchmarks import Measures, arima, quantreg, BSTS, benchmarks + + tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS] + + tmp2 = [Grid.GridPartitioner, mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS] + + tmp4 = [arima.ARIMA, quantreg.QuantileRegression, BSTS.ARIMA] + + tmp3 = [Measures.get_interval_statistics] + + steps_ahead = kwargs.get('steps_ahead', 1) + method = kwargs.get('method', None) + + _start = time.time() + mfts.fit(train_data, **kwargs) + _end = time.time() + times = _end - _start + + if steps_ahead == 1: + + _start = time.time() + _rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts, **kwargs) + _end = time.time() + times += _end - _start + + ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None, + 'transformation': None, + 'size': len(mfts), 'time': times, + 'rmse': _rmse, 'smape': _smape, 'u': _u, 'window': window_key, + 'steps': steps_ahead, 'method': method} + else: + _start = time.time() + forecasts = mfts.predict(test_data, **kwargs) + _end = time.time() + times += _end - _start + + eval = Measures.get_point_ahead_statistics(test_data[mfts.order:mfts.order + steps_ahead], forecasts) + + for key in eval.keys(): + eval[key]["time"] = times + eval[key]["method"] = method + + ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None, + 'transformation': None, + 'size': len(mfts), 'time': times, + 'window': window_key, 'steps': steps_ahead, 'method': method, + 'ahead_results': eval + } + + return ret + + def run_point2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs): import time @@ -698,6 +854,67 @@ def run_point2(fts_method, order, partitioner_method, partitions, transformation return ret +def mv_run_interval2(mfts,train_data, test_data, window_key=None, **kwargs): + import time + from pyFTS.models import hofts,ifts,pwfts + from pyFTS.models.multivariate import mvfts, wmvfts, granular + from pyFTS.partitioners import Grid, Entropy, FCM + from pyFTS.benchmarks import Measures, arima, quantreg, BSTS, benchmarks + + tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS] + + tmp2 = [Grid.GridPartitioner, mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS ] + + tmp4 = [arima.ARIMA, quantreg.QuantileRegression, BSTS.ARIMA] + + tmp3 = [Measures.get_interval_statistics] + + steps_ahead = kwargs.get('steps_ahead', 1) + method = kwargs.get('method', None) + parameters = kwargs.get('parameters',{}) + + _start = time.time() + mfts.fit(train_data, **kwargs) + _end = time.time() + times = _end - _start + + if steps_ahead == 1: + + _start = time.time() + metrics = Measures.get_interval_statistics(test_data, mfts, **kwargs) + _end = time.time() + times += _end - _start + + ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None, + 'transformation': None, + 'size': len(mfts), 'time': times, + 'sharpness': metrics[0], 'resolution': metrics[1], 'coverage': metrics[2], + 'time': times,'pinball05': metrics[3], 'pinball25': metrics[4], 'pinball75': metrics[5], 'pinball95': metrics[6], + 'winkler05': metrics[7], 'winkler25': metrics[8], + 'window': window_key,'steps': steps_ahead, 'method': method} + else: + _start = time.time() + intervals = mfts.predict(test_data, **kwargs) + _end = time.time() + times += _end - _start + + eval = Measures.get_interval_ahead_statistics(test_data[mfts.order:mfts.order+steps_ahead], intervals) + + for key in eval.keys(): + eval[key]["time"] = times + eval[key]["method"] = method + + ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None, + 'transformation': None, + 'size': len(mfts), 'time': times, + 'window': window_key, 'steps': steps_ahead, 'method': method, + 'ahead_results': eval + } + + + return ret + + def run_interval2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs): import time from pyFTS.models import hofts,ifts,pwfts @@ -760,6 +977,63 @@ def run_interval2(fts_method, order, partitioner_method, partitions, transformat return ret +def mv_run_probabilistic2(mfts, train_data, test_data, window_key=None, **kwargs): + import time + from pyFTS.models import hofts, ifts, pwfts + from pyFTS.models.multivariate import mvfts, wmvfts, granular + from pyFTS.partitioners import Grid, Entropy, FCM + from pyFTS.benchmarks import Measures, arima, quantreg, BSTS, benchmarks + + tmp = [hofts.HighOrderFTS, ifts.IntervalFTS, ifts.WeightedIntervalFTS, pwfts.ProbabilisticWeightedFTS] + + tmp2 = [Grid.GridPartitioner, mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS] + + tmp4 = [arima.ARIMA, quantreg.QuantileRegression, BSTS.ARIMA] + + tmp3 = [Measures.get_interval_statistics] + + steps_ahead = kwargs.get('steps_ahead', 1) + method = kwargs.get('method', None) + parameters = kwargs.get('parameters', {}) + + _start = time.time() + mfts.fit(train_data, **kwargs) + _end = time.time() + times = _end - _start + + if steps_ahead == 1: + + _crps1, _t1, _brier = Measures.get_distribution_statistics(test_data, mfts, **kwargs) + times += _t1 + + ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None, + 'transformation': None, + 'size': len(mfts), 'time': times, + 'crps': _crps1, 'brier': _brier, 'window': window_key, + 'steps': steps_ahead, 'method': method} + else: + _start = time.time() + distributions = mfts.predict(test_data, **kwargs) + _end = time.time() + times += _end - _start + + eval = Measures.get_distribution_ahead_statistics(test_data[mfts.order:mfts.order+steps_ahead], distributions) + + for key in eval.keys(): + eval[key]["time"] = times + eval[key]["method"] = method + + ret = {'model': mfts.shortname, 'partitioner': None, 'order': mfts.order, 'partitions': None, + 'transformation': None, + 'size': len(mfts), 'time': times, + 'window': window_key, 'steps': steps_ahead, 'method': method, + 'ahead_results': eval + } + + return ret + + + def run_probabilistic2(fts_method, order, partitioner_method, partitions, transformation, train_data, test_data, window_key=None, **kwargs): import time import numpy as np diff --git a/pyFTS/common/Util.py b/pyFTS/common/Util.py index 174c009..484db71 100644 --- a/pyFTS/common/Util.py +++ b/pyFTS/common/Util.py @@ -202,13 +202,16 @@ def plot_distribution2(probabilitydist, data, **kwargs): scalarMap = cm.ScalarMappable(norm=normal, cmap=cmap) for ct in np.arange(1, int(lq / 2) + 1): - y = [[data[start_at], data[start_at]]] - for pd in probabilitydist: - qts = pd.quantile([qt[ct - 1], qt[-ct]]) - y.append(qts) + try: + y = [[data[start_at], data[start_at]]] + for pd in probabilitydist: + qts = pd.quantile([qt[ct - 1], qt[-ct]]) + y.append(qts) - ax.fill_between(x, [k[0] for k in y], [k[1] for k in y], - facecolor=scalarMap.to_rgba(ct / lq)) + ax.fill_between(x, [k[0] for k in y], [k[1] for k in y], + facecolor=scalarMap.to_rgba(ct / lq)) + except: + pass if kwargs.get('median',True): y = [data[start_at]] diff --git a/pyFTS/models/ensemble/ensemble.py b/pyFTS/models/ensemble/ensemble.py index 729b111..7bd7670 100644 --- a/pyFTS/models/ensemble/ensemble.py +++ b/pyFTS/models/ensemble/ensemble.py @@ -110,7 +110,7 @@ class EnsembleFTS(fts.FTS): ret = np.nanpercentile(forecasts, 50) elif self.point_method == 'quantile': alpha = kwargs.get("alpha",0.05) - ret = np.percentile(forecasts, alpha*100) + ret = np.nanpercentile(forecasts, alpha*100) elif self.point_method == 'exponential': l = len(self.models) if l == 1: diff --git a/pyFTS/models/ifts.py b/pyFTS/models/ifts.py index 20855ed..f0dd21a 100644 --- a/pyFTS/models/ifts.py +++ b/pyFTS/models/ifts.py @@ -80,9 +80,9 @@ class IntervalFTS(hofts.HighOrderFTS): affected_flrgs_memberships.append(mv) # gerar o intervalo - norm = sum(affected_flrgs_memberships) - lo_ = sum(lo) / norm - up_ = sum(up) / norm + norm = np.nansum(affected_flrgs_memberships) + lo_ = np.nansum(lo) / norm + up_ = np.nansum(up) / norm ret.append([lo_, up_]) return ret @@ -165,9 +165,9 @@ class WeightedIntervalFTS(hofts.WeightedHighOrderFTS): affected_flrgs_memberships.append(mv) # gerar o intervalo - norm = sum(affected_flrgs_memberships) - lo_ = sum(lo) / norm - up_ = sum(up) / norm + norm = np.nansum(affected_flrgs_memberships) + lo_ = np.nansum(lo) / norm + up_ = np.nansum(up) / norm ret.append([lo_, up_]) return ret diff --git a/pyFTS/models/multivariate/grid.py b/pyFTS/models/multivariate/grid.py index 078520c..a9b9a04 100644 --- a/pyFTS/models/multivariate/grid.py +++ b/pyFTS/models/multivariate/grid.py @@ -43,7 +43,7 @@ class GridCluster(partitioner.MultivariatePartitioner): for fset, mv in val: num.append(self.sets[fset].centroid * mv) den.append(mv) - ret.append(np.sum(num) / np.sum(den)) + ret.append(np.nansum(num) / np.nansum(den)) elif mode == 'both': num = np.mean([self.sets[fset].centroid for fset in val]) ret.append(num) @@ -53,7 +53,7 @@ class GridCluster(partitioner.MultivariatePartitioner): for fset, mv in enumerate(val): num.append(self.sets[self.ordered_sets[fset]].centroid * mv) den.append(mv) - ret.append(np.sum(num) / np.sum(den)) + ret.append(np.nansum(num) / np.nansum(den)) else: raise Exception('Unknown deffuzyfication mode') diff --git a/pyFTS/models/multivariate/mvfts.py b/pyFTS/models/multivariate/mvfts.py index 1d2e627..f688638 100644 --- a/pyFTS/models/multivariate/mvfts.py +++ b/pyFTS/models/multivariate/mvfts.py @@ -152,7 +152,7 @@ class MVFTS(fts.FTS): mv = np.array(mvs) mp = np.array(mps) - ret.append(np.dot(mv,mp.T)/np.sum(mv)) + ret.append(np.dot(mv,mp.T)/np.nansum(mv)) ret = self.target_variable.apply_inverse_transformations(ret, params=data[self.target_variable.data_label].values) diff --git a/pyFTS/models/pwfts.py b/pyFTS/models/pwfts.py index ea92c88..8523027 100644 --- a/pyFTS/models/pwfts.py +++ b/pyFTS/models/pwfts.py @@ -73,21 +73,21 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG): def get_midpoint(self, sets): '''Return the expectation of the PWFLRG, the weighted sum''' if self.midpoint is None: - self.midpoint = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].centroid + self.midpoint = np.nansum(np.array([self.rhs_unconditional_probability(s) * sets[s].centroid for s in self.RHS.keys()])) return self.midpoint def get_upper(self, sets): if self.upper is None: - self.upper = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].upper + self.upper = np.nansum(np.array([self.rhs_unconditional_probability(s) * sets[s].upper for s in self.RHS.keys()])) return self.upper def get_lower(self, sets): if self.lower is None: - self.lower = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].lower + self.lower = np.nansum(np.array([self.rhs_unconditional_probability(s) * sets[s].lower for s in self.RHS.keys()])) return self.lower @@ -159,7 +159,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): self.flrgs[flrg.get_key()].append_rhs(set, count=lhs_mv * mv) mvs.append(mv) - tmp_fq = sum([lhs_mv * kk for kk in mvs if kk > 0]) + tmp_fq = np.nansum([lhs_mv * kk for kk in mvs if kk > 0]) self.global_frequency_count += tmp_fq @@ -226,7 +226,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): self.flrgs[flrg.get_key()].append_rhs(set, count=lhs_mv * mv) mvs.append(mv) - tmp_fq = sum([lhs_mv*kk for kk in mvs if kk > 0]) + tmp_fq = np.nansum([lhs_mv*kk for kk in mvs if kk > 0]) self.global_frequency_count += tmp_fq @@ -268,7 +268,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): else: if len(flrg.LHS) > 0: pi = 1 / len(flrg.LHS) - ret = sum(np.array([pi * self.partitioner.sets[s].centroid for s in flrg.LHS])) + ret = np.nansum(np.array([pi * self.partitioner.sets[s].centroid for s in flrg.LHS])) else: ret = np.nan return ret @@ -282,10 +282,10 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): _set = self.partitioner.sets[s] tmp = _flrg.rhs_unconditional_probability(s) * (_set.membership(x) / _set.partition_function(uod=self.get_UoD())) cond.append(tmp) - ret = sum(np.array(cond)) + ret = np.nansum(np.array(cond)) else: pi = 1 / len(flrg.LHS) - ret = sum(np.array([pi * self.partitioner.sets[s].membership(x) for s in flrg.LHS])) + ret = np.nansum(np.array([pi * self.partitioner.sets[s].membership(x) for s in flrg.LHS])) return ret def get_upper(self, flrg): @@ -307,12 +307,12 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): def forecast(self, data, **kwargs): method = kwargs.get('method','heuristic') - l = len(data) + l = len(data)+1 ret = [] - for k in np.arange(self.max_lag - 1, l): - sample = data[k - (self.max_lag - 1): k + 1] + for k in np.arange(self.max_lag, l): + sample = data[k - self.max_lag: k] if method == 'heuristic': ret.append(self.point_heuristic(sample, **kwargs)) @@ -360,9 +360,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): mp.append(norm * self.get_midpoint(flrg)) norms.append(norm) - norm = sum(norms) + norm = np.nansum(norms) - final = sum(mp) / norm if norm != 0 else 0 + final = np.nansum(mp) / norm if norm != 0 else 0 if explain: print("Deffuzyfied value: {} \n".format(final)) @@ -431,12 +431,12 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): norms.append(norm) # gerar o intervalo - norm = sum(norms) + norm = np.nansum(norms) if norm == 0: return [0, 0] else: - lo_ = sum(lo) / norm - up_ = sum(up) / norm + lo_ = np.nansum(lo) / norm + up_ = np.nansum(up) / norm return [lo_, up_] def forecast_distribution(self, ndata, **kwargs): @@ -496,7 +496,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): else: num.append(0.0) den.append(0.000000001) - pf = sum(num) / sum(den) + pf = np.nansum(num) / np.nansum(den) dist.set(bin, pf) diff --git a/pyFTS/partitioners/partitioner.py b/pyFTS/partitioners/partitioner.py index 8621f63..dfb159e 100644 --- a/pyFTS/partitioners/partitioner.py +++ b/pyFTS/partitioners/partitioner.py @@ -202,9 +202,9 @@ class Partitioner(object): raise Exception('Unknown deffuzyfication mode') if mode in ('both','vector'): - return np.sum(num) / np.sum(den) + return np.nansum(num) / np.nansum(den) else: - return np.mean(num) + return np.nanmean(num) def check_bounds(self, data): """ diff --git a/pyFTS/probabilistic/ProbabilityDistribution.py b/pyFTS/probabilistic/ProbabilityDistribution.py index cbea1cd..4634ed6 100644 --- a/pyFTS/probabilistic/ProbabilityDistribution.py +++ b/pyFTS/probabilistic/ProbabilityDistribution.py @@ -209,11 +209,17 @@ class ProbabilityDistribution(object): if isinstance(values, list): ret = [] for val in values: - k = self.bin_index.find_ge(val) - ret.append(self.cdf[k]) + try: + k = self.bin_index.find_ge(val) + ret.append(self.cdf[k]) + except: + ret.append(np.nan) else: - k = self.bin_index.find_ge(values) - return self.cdf[values] + try: + k = self.bin_index.find_ge(values) + return self.cdf[values] + except: + return np.nan def quantile(self, values): """ @@ -229,11 +235,17 @@ class ProbabilityDistribution(object): if isinstance(values, list): ret = [] for val in values: - k = self.quantile_index.find_ge(val) - ret.append(self.qtl[str(k)][0]) + try: + k = self.quantile_index.find_ge(val) + ret.append(self.qtl[str(k)][0]) + except: + ret.append(np.nan) else: - k = self.quantile_index.find_ge(values) - ret = self.qtl[str(k)] + try: + k = self.quantile_index.find_ge(values) + ret = self.qtl[str(k)] + except: + return np.nan return ret @@ -243,7 +255,7 @@ class ProbabilityDistribution(object): :return:the entropy of the probability distribution """ - h = -sum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0 + h = -np.nansum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0 for k in self.bins]) return h @@ -255,7 +267,7 @@ class ProbabilityDistribution(object): :param q: a probabilistic.ProbabilityDistribution object :return: Cross entropy between this probability distribution and the given distribution """ - h = -sum([self.distribution[k] * np.log(q.distribution[k]) if self.distribution[k] > 0 else 0 + h = -np.nansum([self.distribution[k] * np.log(q.distribution[k]) if self.distribution[k] > 0 else 0 for k in self.bins]) return h @@ -267,7 +279,7 @@ class ProbabilityDistribution(object): :param q: a probabilistic.ProbabilityDistribution object :return: Kullback-Leibler divergence """ - h = sum([self.distribution[k] * np.log(self.distribution[k]/q.distribution[k]) if self.distribution[k] > 0 else 0 + h = np.nansum([self.distribution[k] * np.log(self.distribution[k]/q.distribution[k]) if self.distribution[k] > 0 else 0 for k in self.bins]) return h diff --git a/pyFTS/tests/multivariate.py b/pyFTS/tests/multivariate.py index a3ba783..ad11d25 100644 --- a/pyFTS/tests/multivariate.py +++ b/pyFTS/tests/multivariate.py @@ -7,13 +7,12 @@ from pyFTS.partitioners import Grid, Simple, Entropy from pyFTS.common import Util from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid, granular -from pyFTS.benchmarks import Measures +from pyFTS.benchmarks import benchmarks as bchmk, Measures from pyFTS.common import Util as cUtil - +from pyFTS.models import pwfts from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal.common import DateTime - from pyFTS.models.multivariate import common, variable, mvfts from pyFTS.partitioners import Grid from pyFTS.common import Membership @@ -22,54 +21,45 @@ from pyFTS.common import Membership import os -dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';') - -dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S') - -train_uv = dataset['glo_avg'].values[:24505] -test_uv = dataset['glo_avg'].values[24505:] - -train_mv = dataset.iloc[:24505] -test_mv = dataset.iloc[24505:] - -from pyFTS.models.multivariate import common, variable, mvfts -from pyFTS.models.seasonal import partitioner as seasonal -from pyFTS.models.seasonal.common import DateTime - -from itertools import product - -levels = ['VL', 'L', 'M', 'H', 'VH'] -sublevels = [str(k) for k in np.arange(0, 7)] -names = [] -for combination in product(*[levels, sublevels]): - names.append(combination[0] + combination[1]) - -sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Feb','Mar','Apr','May', - 'Jun','Jul', 'Aug','Sep','Oct', - 'Nov','Dec']} - -vmonth = variable.Variable("Month", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=12, - data=train_mv, partitioner_specific=sp) +from pyFTS.data import SONDA, Malaysia +df = Malaysia.get_dataframe() +df['time'] = pd.to_datetime(df["time"], format='%m/%d/%y %I:%M %p') sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]} -vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24, - data=train_mv, partitioner_specific=sp) +variables = { + "Hour": dict(data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24, + partitioner_specific=sp, alpha_cut=.3), + "Temperature": dict(data_label="temperature", alias='temp', + partitioner=Grid.GridPartitioner, npart=10, func=Membership.gaussmf, + alpha_cut=.25), + "Load": dict(data_label="load", alias='load', + partitioner=Grid.GridPartitioner, npart=10, func=Membership.gaussmf, + alpha_cut=.25) +} -vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad', - partitioner=Grid.GridPartitioner, npart=35, partitioner_specific={'names': names}, - data=train_mv) -fs = grid.GridCluster(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg) -model = cmvfts.ClusteredMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg, partitioner=fs, - order=2, knn=1) -#model = wmvfts.WeightedMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg) +methods = [mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS] +#methods = [granular.GranularWMVFTS] -model.fit(train_mv) -generator = lambda x : x + pd.to_timedelta(1, unit='h') -forecasts = model.predict(test_mv.iloc[:3], steps_ahead=48, generators={'data': generator} ) +parameters = [ + {},{}, + dict(fts_method=pwfts.ProbabilisticWeightedFTS, fuzzyfy_mode='both', + order=1, knn=1) +] -print(forecasts) +bchmk.multivariate_sliding_window_benchmarks2(df, 10000, train=0.9, inc=0.25, + methods=methods, + methods_parameters=parameters, + variables=variables, + target_variable='Load', + type='interval', + steps_ahead=[1], + distributed=False, + nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'], + file="experiments.db", dataset='Malaysia', + tag="experiments" + ) ''' diff --git a/pyFTS/tests/pwfts.py b/pyFTS/tests/pwfts.py index a912c17..39cf863 100644 --- a/pyFTS/tests/pwfts.py +++ b/pyFTS/tests/pwfts.py @@ -14,71 +14,68 @@ from pyFTS.models import pwfts,hofts,ifts from pyFTS.models.multivariate import granular, grid from pyFTS.partitioners import Grid, Util as pUtil -from pyFTS.models.multivariate import common, variable, mvfts +from pyFTS.models.multivariate import common, variable, mvfts, wmvfts from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal.common import DateTime from pyFTS.common import Membership -from pyFTS.data import SONDA, Malaysia +def sample_by_hour(data): + return [np.nanmean(data[k:k+60]) for k in np.arange(0,len(data),60)] -df = Malaysia.get_dataframe() -df['time'] = pd.to_datetime(df["time"], format='%m/%d/%y %I:%M %p') +def sample_date_by_hour(data): + return [data[k] for k in np.arange(0,len(data),60)] -train_mv = df.iloc[:8000] -test_mv = df.iloc[8000:10000] +from pyFTS.data import SONDA -sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]} +sonda = SONDA.get_dataframe()[['datahora','glo_avg','ws_10m']] -vhour = variable.Variable("Hour", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24, +sonda = sonda.drop(sonda.index[np.where(sonda["ws_10m"] <= 0.01)]) +sonda = sonda.drop(sonda.index[np.where(sonda["glo_avg"] <= 0.01)]) +sonda = sonda.dropna() +sonda['datahora'] = pd.to_datetime(sonda["datahora"], format='%Y-%m-%d %H:%M:%S') + + +var = { + 'datahora': sample_date_by_hour(sonda['datahora'].values), + 'glo_avg': sample_by_hour(sonda['glo_avg'].values), + 'ws_10m': sample_by_hour(sonda['ws_10m'].values), +} + +df = pd.DataFrame(var) + +train_mv = df.iloc[:9000] +test_mv = df.iloc[9000:10000] + +fig, ax = plt.subplots(nrows=2, ncols=1, figsize=[10,3]) + +sp = {'seasonality': DateTime.month, 'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']} + +vmonth = variable.Variable("Month", data_label="datahora", partitioner=seasonal.TimeGridPartitioner, npart=12, data=train_mv, partitioner_specific=sp, alpha_cut=.3) -vtemp = variable.Variable("Temperature", data_label="temperature", alias='temp', - partitioner=Grid.GridPartitioner, npart=5, func=Membership.gaussmf, - data=train_mv, alpha_cut=.3) -vload = variable.Variable("Load", data_label="load", alias='load', - partitioner=Grid.GridPartitioner, npart=5, func=Membership.gaussmf, - data=train_mv, alpha_cut=.3) -order = 1 -knn = 1 +vmonth.partitioner.plot(ax[0]) -model = granular.GranularWMVFTS(explanatory_variables=[vhour, vtemp, vload], target_variable=vload, +vwin = variable.Variable("Wind", data_label="ws_10m", alias='wind', + partitioner=Grid.GridPartitioner, npart=15, func=Membership.gaussmf, + data=train_mv, alpha_cut=.25) + +vwin.partitioner.plot(ax[1]) + +plt.tight_layout() + +order = 3 +knn = 2 + +model = granular.GranularWMVFTS(explanatory_variables=[vmonth, vwin], target_variable=vwin, fts_method=pwfts.ProbabilisticWeightedFTS, fuzzyfy_mode='both', order=order, knn=knn) model.fit(train_mv) +fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15,3]) +ax.plot(test_mv['ws_10m'].values[:100], label='original') -temp_generator = pwfts.ProbabilisticWeightedFTS(partitioner=vtemp.partitioner, order=1) -temp_generator.fit(train_mv['temperature'].values) +forecasts = model.predict(test_mv.iloc[:100], type='distribution') -#print(model) +Util.plot_distribution2(forecasts, test_mv['ws_10m'].values[:100], start_at=model.order-1, ax=ax) -time_generator = lambda x : pd.to_datetime(x) + pd.to_timedelta(1, unit='h') -#temp_generator = lambda x : x - -generators = {'time': time_generator, 'temperature': temp_generator} - -#print(model.predict(test_mv.iloc[:10], type='point', steps_ahead=10, generators=generators)) -#print(model.predict(test_mv.iloc[:10], type='interval', steps_ahead=10, generators=generators)) -print(model.predict(test_mv.iloc[:10], type='distribution', steps_ahead=10, generators=generators)) - - -# - -#forecasts1 = model.predict(test_mv, type='multivariate') -#forecasts2 = model.predict(test, type='multivariate', generators={'date': time_generator}, -# steps_ahead=200) - - -''' -from pyFTS.data import Enrollments -train = Enrollments.get_data() - -fs = Grid.GridPartitioner(data=train, npart=10) #, transformation=tdiff) - -model = pwfts.ProbabilisticWeightedFTS(partitioner=fs, order=2) -model.fit(train) -print(model) - -print(model.predict(train)) -''' \ No newline at end of file