From f3cf757e961bc1068d9e4dbb100c7be2957960fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Sat, 22 Jun 2019 18:29:40 -0300 Subject: [PATCH] Bugfixes and improvements on pwfts and multivariate.granular --- pyFTS/benchmarks/Measures.py | 3 + pyFTS/common/Util.py | 2 +- pyFTS/hyperparam/Evolutionary.py | 31 ++++++---- pyFTS/models/multivariate/granular.py | 4 +- pyFTS/models/pwfts.py | 4 +- .../probabilistic/ProbabilityDistribution.py | 2 +- pyFTS/tests/multivariate.py | 41 ++++++++++--- pyFTS/tests/pwfts.py | 61 +++---------------- 8 files changed, 69 insertions(+), 79 deletions(-) diff --git a/pyFTS/benchmarks/Measures.py b/pyFTS/benchmarks/Measures.py index a4b2d12..bdc5000 100644 --- a/pyFTS/benchmarks/Measures.py +++ b/pyFTS/benchmarks/Measures.py @@ -300,6 +300,9 @@ def crps(targets, densities): targets = [targets] n = len(densities) + if n == 0: + return np.nan + for ct, df in enumerate(densities): _crps += np.nansum([(df.cumulative(bin) - (1 if bin >= targets[ct] else 0)) ** 2 for bin in df.bins]) diff --git a/pyFTS/common/Util.py b/pyFTS/common/Util.py index 484db71..8b44364 100644 --- a/pyFTS/common/Util.py +++ b/pyFTS/common/Util.py @@ -216,7 +216,7 @@ def plot_distribution2(probabilitydist, data, **kwargs): if kwargs.get('median',True): y = [data[start_at]] for pd in probabilitydist: - qts = pd.quantile(.5) + qts = pd.quantile([.5]) y.append(qts[0]) ax.plot(x, y, color='red', label='Median') diff --git a/pyFTS/hyperparam/Evolutionary.py b/pyFTS/hyperparam/Evolutionary.py index 7f43ce8..6ec7dd0 100644 --- a/pyFTS/hyperparam/Evolutionary.py +++ b/pyFTS/hyperparam/Evolutionary.py @@ -139,24 +139,33 @@ def evaluate(dataset, individual, **kwargs): for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate): - model = phenotype(individual, train, fts_method=fts_method, parameters=parameters) + try: - forecasts = model.predict(test) + model = phenotype(individual, train, fts_method=fts_method, parameters=parameters) - rmse = Measures.rmse(test[model.max_lag:], forecasts[:-1]) - lengths.append(len(model)) + forecasts = model.predict(test) - errors.append(rmse) + rmse = Measures.rmse(test[model.max_lag:], forecasts[:-1]) + lengths.append(len(model)) - _lags = sum(model.lags) * 100 + errors.append(rmse) - _rmse = np.nanmean(errors) - _len = np.nanmean(lengths) + except: + lengths.append(np.nan) + errors.append(np.nan) - f1 = np.nansum([.6 * _rmse, .4 * np.nanstd(errors)]) - f2 = np.nansum([.4 * _len, .6 * _lags]) + try: + _lags = sum(model.lags) * 100 - return {'f1': f1, 'f2': f2, 'rmse': _rmse, 'size': _len } + _rmse = np.nanmean(errors) + _len = np.nanmean(lengths) + + f1 = np.nansum([.6 * _rmse, .4 * np.nanstd(errors)]) + f2 = np.nansum([.4 * _len, .6 * _lags]) + + return {'f1': f1, 'f2': f2, 'rmse': _rmse, 'size': _len } + except: + return {'f1': np.inf, 'f2': np.inf, 'rmse': np.inf, 'size': np.inf} def tournament(population, objective, **kwargs): diff --git a/pyFTS/models/multivariate/granular.py b/pyFTS/models/multivariate/granular.py index d6db190..b60b851 100644 --- a/pyFTS/models/multivariate/granular.py +++ b/pyFTS/models/multivariate/granular.py @@ -15,8 +15,8 @@ class GranularWMVFTS(cmvfts.ClusteredMVFTS): """The most recent trained model""" self.knn = kwargs.get('knn', 2) self.order = kwargs.get("order", 2) - self.shortname = "GranularWMVFTS" - self.name = "Granular Weighted Multivariate FTS" + self.shortname = "FIG-FTS" + self.name = "Fuzzy Information Granular FTS" self.mode = kwargs.get('mode','sets') def train(self, data, **kwargs): diff --git a/pyFTS/models/pwfts.py b/pyFTS/models/pwfts.py index 8523027..7fdff2a 100644 --- a/pyFTS/models/pwfts.py +++ b/pyFTS/models/pwfts.py @@ -165,8 +165,8 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): def pwflrg_lhs_memberhip_fuzzyfied(self, flrg, sample): vals = [] - for ct, fuzz in enumerate(sample): - vals.append([mv for fset, mv in fuzz if fset == flrg.LHS[ct]]) + for ct in range(len(flrg.LHS)): # fuzz in enumerate(sample): + vals.append([mv for fset, mv in sample[ct] if fset == flrg.LHS[ct]]) return np.nanprod(vals) diff --git a/pyFTS/probabilistic/ProbabilityDistribution.py b/pyFTS/probabilistic/ProbabilityDistribution.py index 4634ed6..51e0854 100644 --- a/pyFTS/probabilistic/ProbabilityDistribution.py +++ b/pyFTS/probabilistic/ProbabilityDistribution.py @@ -217,7 +217,7 @@ class ProbabilityDistribution(object): else: try: k = self.bin_index.find_ge(values) - return self.cdf[values] + return self.cdf[k] except: return np.nan diff --git a/pyFTS/tests/multivariate.py b/pyFTS/tests/multivariate.py index ad11d25..340dab8 100644 --- a/pyFTS/tests/multivariate.py +++ b/pyFTS/tests/multivariate.py @@ -19,9 +19,33 @@ from pyFTS.common import Membership import os +''' +def sample_by_hour(data): + return [np.nanmean(data[k:k+60]) for k in np.arange(0,len(data),60)] + +def sample_date_by_hour(data): + return [data[k] for k in np.arange(0,len(data),60)] + +from pyFTS.data import SONDA + +sonda = SONDA.get_dataframe()[['datahora','glo_avg','ws_10m']] + +sonda = sonda.drop(sonda.index[np.where(sonda["ws_10m"] <= 0.01)]) +sonda = sonda.drop(sonda.index[np.where(sonda["glo_avg"] <= 0.01)]) +sonda = sonda.dropna() +sonda['datahora'] = pd.to_datetime(sonda["datahora"], format='%Y-%m-%d %H:%M:%S') -from pyFTS.data import SONDA, Malaysia +var = { + 'datahora': sample_date_by_hour(sonda['datahora'].values), + 'glo_avg': sample_by_hour(sonda['glo_avg'].values), + 'ws_10m': sample_by_hour(sonda['ws_10m'].values) +} + +df = pd.DataFrame(var) +''' + +from pyFTS.data import Malaysia df = Malaysia.get_dataframe() df['time'] = pd.to_datetime(df["time"], format='%m/%d/%y %I:%M %p') @@ -39,11 +63,10 @@ variables = { alpha_cut=.25) } -methods = [mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS] -#methods = [granular.GranularWMVFTS] + +methods = [granular.GranularWMVFTS] parameters = [ - {},{}, dict(fts_method=pwfts.ProbabilisticWeightedFTS, fuzzyfy_mode='both', order=1, knn=1) ] @@ -52,16 +75,16 @@ bchmk.multivariate_sliding_window_benchmarks2(df, 10000, train=0.9, inc=0.25, methods=methods, methods_parameters=parameters, variables=variables, - target_variable='Load', - type='interval', + target_variable='Temperature', + type='distribution', steps_ahead=[1], - distributed=False, - nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'], - file="experiments.db", dataset='Malaysia', + file="experiments.db", dataset='Malaysia.temperature', tag="experiments" ) + + ''' from pyFTS.data import lorentz df = lorentz.get_dataframe(iterations=5000) diff --git a/pyFTS/tests/pwfts.py b/pyFTS/tests/pwfts.py index 39cf863..42e14f6 100644 --- a/pyFTS/tests/pwfts.py +++ b/pyFTS/tests/pwfts.py @@ -19,63 +19,18 @@ from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal.common import DateTime from pyFTS.common import Membership -def sample_by_hour(data): - return [np.nanmean(data[k:k+60]) for k in np.arange(0,len(data),60)] - -def sample_date_by_hour(data): - return [data[k] for k in np.arange(0,len(data),60)] - from pyFTS.data import SONDA -sonda = SONDA.get_dataframe()[['datahora','glo_avg','ws_10m']] +data = [k for k in SONDA.get_data('ws_10m') if k > 0.1 and k != np.nan and k is not None] +data = [np.nanmean(data[k:k+60]) for k in np.arange(0,len(data),60)] -sonda = sonda.drop(sonda.index[np.where(sonda["ws_10m"] <= 0.01)]) -sonda = sonda.drop(sonda.index[np.where(sonda["glo_avg"] <= 0.01)]) -sonda = sonda.dropna() -sonda['datahora'] = pd.to_datetime(sonda["datahora"], format='%Y-%m-%d %H:%M:%S') +train = data[:9000] +test = data[9000:10000] +fs = Grid.GridPartitioner(data=train, npart=95) -var = { - 'datahora': sample_date_by_hour(sonda['datahora'].values), - 'glo_avg': sample_by_hour(sonda['glo_avg'].values), - 'ws_10m': sample_by_hour(sonda['ws_10m'].values), -} +model = pwfts.ProbabilisticWeightedFTS(partitioner=fs, order=3) -df = pd.DataFrame(var) - -train_mv = df.iloc[:9000] -test_mv = df.iloc[9000:10000] - -fig, ax = plt.subplots(nrows=2, ncols=1, figsize=[10,3]) - -sp = {'seasonality': DateTime.month, 'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']} - -vmonth = variable.Variable("Month", data_label="datahora", partitioner=seasonal.TimeGridPartitioner, npart=12, - data=train_mv, partitioner_specific=sp, alpha_cut=.3) - -vmonth.partitioner.plot(ax[0]) - -vwin = variable.Variable("Wind", data_label="ws_10m", alias='wind', - partitioner=Grid.GridPartitioner, npart=15, func=Membership.gaussmf, - data=train_mv, alpha_cut=.25) - -vwin.partitioner.plot(ax[1]) - -plt.tight_layout() - -order = 3 -knn = 2 - -model = granular.GranularWMVFTS(explanatory_variables=[vmonth, vwin], target_variable=vwin, - fts_method=pwfts.ProbabilisticWeightedFTS, fuzzyfy_mode='both', - order=order, knn=knn) - -model.fit(train_mv) - -fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15,3]) -ax.plot(test_mv['ws_10m'].values[:100], label='original') - -forecasts = model.predict(test_mv.iloc[:100], type='distribution') - -Util.plot_distribution2(forecasts, test_mv['ws_10m'].values[:100], start_at=model.order-1, ax=ax) +model.fit(train) +model.predict(test) \ No newline at end of file