From a6d9d164e4f67f8a25d6e74d946c9e992b3a99f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido?= Date: Fri, 21 Jun 2019 15:10:19 -0300 Subject: [PATCH] Improvements for forecasting ahead in ClusteredMVFTS --- pyFTS/models/multivariate/cmvfts.py | 55 +++++-- pyFTS/models/multivariate/mvfts.py | 10 +- pyFTS/models/pwfts.py | 153 ++++++++++-------- pyFTS/partitioners/partitioner.py | 31 ++-- .../probabilistic/ProbabilityDistribution.py | 13 ++ pyFTS/tests/pwfts.py | 24 ++- 6 files changed, 180 insertions(+), 106 deletions(-) diff --git a/pyFTS/models/multivariate/cmvfts.py b/pyFTS/models/multivariate/cmvfts.py index 4591869..3d31f52 100644 --- a/pyFTS/models/multivariate/cmvfts.py +++ b/pyFTS/models/multivariate/cmvfts.py @@ -38,7 +38,7 @@ class ClusteredMVFTS(mvfts.MVFTS): def fuzzyfy(self,data): ndata = [] - for index, row in data.iterrows(): + for index, row in data.iterrows() if isinstance(data, pd.DataFrame) else enumerate(data): data_point = self.format_data(row) ndata.append(self.partitioner.fuzzyfy(data_point, mode=self.fuzzyfy_mode)) @@ -83,16 +83,7 @@ class ClusteredMVFTS(mvfts.MVFTS): return self.model.forecast_interval(data, fuzzyfied=pre_fuzz, **kwargs) - def forecast_ahead_interval(self, data, steps, **kwargs): - if not self.model.has_interval_forecasting: - raise Exception("The internal method does not support interval forecasting!") - - data = self.check_data(data) - - pre_fuzz = kwargs.get('pre_fuzzyfy', self.pre_fuzzyfy) - - return self.model.forecast_ahead_interval(data, steps, fuzzyfied=pre_fuzz, **kwargs) def forecast_distribution(self, data, **kwargs): @@ -107,14 +98,48 @@ class ClusteredMVFTS(mvfts.MVFTS): def forecast_ahead_distribution(self, data, steps, **kwargs): - if not self.model.has_probability_forecasting: - raise Exception("The internal method does not support probabilistic forecasting!") + generators = kwargs.get('generators', None) - data = self.check_data(data) + if generators is None: + raise Exception('You must provide parameter \'generators\'! generators is a dict where the keys' + + ' are the dataframe column names (except the target_variable) and the values are ' + + 'lambda functions that accept one value (the actual value of the variable) ' + ' and return the next value or trained FTS models that accept the actual values and ' + 'forecast new ones.') - pre_fuzz = kwargs.get('pre_fuzzyfy', self.pre_fuzzyfy) + ndata = self.apply_transformations(data) - return self.model.forecast_ahead_distribution(data, steps, fuzzyfied=pre_fuzz, **kwargs) + start = kwargs.get('start_at', self.order) + + ret = [] + sample = ndata.iloc[start - self.max_lag:] + for k in np.arange(0, steps): + tmp = self.forecast_distribution(sample.iloc[-self.max_lag:], **kwargs)[0] + + ret.append(tmp) + + new_data_point = {} + + for data_label in generators.keys(): + if data_label != self.target_variable.data_label: + if isinstance(generators[data_label], LambdaType): + last_data_point = sample.iloc[-1] + new_data_point[data_label] = generators[data_label](last_data_point[data_label]) + + elif isinstance(generators[data_label], fts.FTS): + gen_model = generators[data_label] + last_data_point = sample.iloc[-gen_model.order:] + + if not gen_model.is_multivariate: + last_data_point = last_data_point[data_label].values + + new_data_point[data_label] = gen_model.forecast(last_data_point)[0] + + new_data_point[self.target_variable.data_label] = tmp.expected_value() + + sample = sample.append(new_data_point, ignore_index=True) + + return ret[-steps:] def forecast_multivariate(self, data, **kwargs): diff --git a/pyFTS/models/multivariate/mvfts.py b/pyFTS/models/multivariate/mvfts.py index 58602b3..993984e 100644 --- a/pyFTS/models/multivariate/mvfts.py +++ b/pyFTS/models/multivariate/mvfts.py @@ -264,11 +264,11 @@ class MVFTS(fts.FTS): ret = [] ix = ndata.index[start - self.max_lag:] - lo = [ndata.loc[k] for k in ix] - up = [ndata.loc[k] for k in ix] + lo = ndata.loc[ix] #[ndata.loc[k] for k in ix] + up = ndata.loc[ix] #[ndata.loc[k] for k in ix] for k in np.arange(0, steps): - tmp_lo = self.forecast_interval(lo[-self.max_lag:], **kwargs) - tmp_up = self.forecast_interval(up[-self.max_lag:], **kwargs) + tmp_lo = self.forecast_interval(lo[-self.max_lag:], **kwargs)[0] + tmp_up = self.forecast_interval(up[-self.max_lag:], **kwargs)[0] ret.append([min(tmp_lo), max(tmp_up)]) @@ -300,7 +300,7 @@ class MVFTS(fts.FTS): lo = lo.append(new_data_point_lo, ignore_index=True) up = up.append(new_data_point_up, ignore_index=True) - return ret[-steps] + return ret[-steps:] def clone_parameters(self, model): super(MVFTS, self).clone_parameters(model) diff --git a/pyFTS/models/pwfts.py b/pyFTS/models/pwfts.py index 4e2bac8..ea92c88 100644 --- a/pyFTS/models/pwfts.py +++ b/pyFTS/models/pwfts.py @@ -171,6 +171,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): return np.nanprod(vals) def generate_lhs_flrg(self, sample, explain=False): + if not isinstance(sample, (list, np.ndarray)): + sample = [sample] + nsample = [self.partitioner.fuzzyfy(k, mode="sets", alpha_cut=self.alpha_cut) for k in sample] @@ -440,6 +443,8 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): smooth = kwargs.get("smooth", "none") + from_distribution = kwargs.get('from_distribution', False) + fuzzyfied = kwargs.get('fuzzyfied', False) l = len(ndata) @@ -457,39 +462,43 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): for k in np.arange(self.max_lag - 1, l): sample = ndata[k - (self.max_lag - 1): k + 1] - if not fuzzyfied: - flrgs = self.generate_lhs_flrg(sample) + if from_distribution: + dist = self.forecast_distribution_from_distribution(sample,smooth,uod,_bins) else: - fsets = self.get_sets_from_both_fuzzyfication(sample) - flrgs = self.generate_lhs_flrg_fuzzyfied(fsets) - if 'type' in kwargs: - kwargs.pop('type') + if not fuzzyfied: + flrgs = self.generate_lhs_flrg(sample) + else: + fsets = self.get_sets_from_both_fuzzyfication(sample) + flrgs = self.generate_lhs_flrg_fuzzyfied(fsets) - dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs) + if 'type' in kwargs: + kwargs.pop('type') - for bin in _bins: - num = [] - den = [] - for s in flrgs: - if s.get_key() in self.flrgs: - flrg = self.flrgs[s.get_key()] - wi = flrg.rhs_conditional_probability(bin, self.partitioner.sets, uod, nbins) - if not fuzzyfied: - pk = flrg.lhs_conditional_probability(sample, self.partitioner.sets, self.global_frequency_count, uod, nbins) + dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs) + + for bin in _bins: + num = [] + den = [] + for s in flrgs: + if s.get_key() in self.flrgs: + flrg = self.flrgs[s.get_key()] + wi = flrg.rhs_conditional_probability(bin, self.partitioner.sets, uod, nbins) + if not fuzzyfied: + pk = flrg.lhs_conditional_probability(sample, self.partitioner.sets, self.global_frequency_count, uod, nbins) + else: + lhs_mv = self.pwflrg_lhs_memberhip_fuzzyfied(flrg, sample) + pk = flrg.lhs_conditional_probability_fuzzyfied(lhs_mv, self.partitioner.sets, + self.global_frequency_count, uod, nbins) + + num.append(wi * pk) + den.append(pk) else: - lhs_mv = self.pwflrg_lhs_memberhip_fuzzyfied(flrg, sample) - pk = flrg.lhs_conditional_probability_fuzzyfied(lhs_mv, self.partitioner.sets, - self.global_frequency_count, uod, nbins) + num.append(0.0) + den.append(0.000000001) + pf = sum(num) / sum(den) - num.append(wi * pk) - den.append(pk) - else: - num.append(0.0) - den.append(0.000000001) - pf = sum(num) / sum(den) - - dist.set(bin, pf) + dist.set(bin, pf) ret.append(dist) @@ -532,7 +541,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): start = kwargs.get('start_at', 0) - fuzzyfied = kwargs.get('fuzzyfied', False) + fuzzyfied = kwargs.pop('fuzzyfied') sample = data[start: start + self.max_lag] @@ -541,12 +550,12 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): else: ret = [] for k in sample: - kv = self.partitioner.deffuzyfy(k,mode='both') - ret.append([kv,kv]) + kv = self.partitioner.defuzzyfy(k, mode='both') + ret.append([kv, kv]) ret.append(self.forecast_interval(sample, **kwargs)[0]) - for k in np.arange(self.max_lag+1, steps+self.max_lag): + for k in np.arange(start + self.max_lag, steps + start + self.max_lag): if len(ret) > 0 and self.__check_interval_bounds(ret[-1]): ret.append(ret[-1]) @@ -562,6 +571,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): ret = [] + if 'type' in kwargs: + kwargs.pop('type') + smooth = kwargs.get("smooth", "none") uod = self.get_UoD() @@ -575,50 +587,61 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS): start = kwargs.get('start_at', 0) - sample = ndata[start: start + self.max_lag] + fuzzyfied = kwargs.pop('fuzzyfied') + + if not fuzzyfied: + sample = ndata[start: start + self.max_lag] + else: + sample = [] + for k in ndata[start: start + self.max_lag]: + kv = self.partitioner.defuzzyfy(k, mode='both') + sample.append(kv) for dat in sample: - if 'type' in kwargs: - kwargs.pop('type') - tmp = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs) - tmp.set(dat, 1.0) - ret.append(tmp) + if not isinstance(dat, ProbabilityDistribution.ProbabilityDistribution): + tmp = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs) + tmp.set(dat, 1.0) + ret.append(tmp) + else: + ret.append(dat) - dist = self.forecast_distribution(sample, bins=_bins, **kwargs)[0] + dist = self.forecast_distribution_from_distribution(ret, smooth,uod,_bins,**kwargs) ret.append(dist) - for k in np.arange(self.max_lag+1, steps+self.max_lag+1): - dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs) - - lags = [] - - # Find all bins of past distributions with probability greater than zero - - for ct, lag in enumerate(self.lags): - dd = ret[k - lag] - vals = [float(v) for v in dd.bins if np.round(dd.density(v), 4) > 0.0] - lags.append( sorted(vals) ) - - - # Trace all possible combinations between the bins of past distributions - - for path in product(*lags): - - # get the combined probabilities for this path - pk = np.prod([ret[k - (self.max_lag + lag)].density(path[ct]) - for ct, lag in enumerate(self.lags)]) - - - d = self.forecast_distribution(path)[0] - - for bin in _bins: - dist.set(bin, dist.density(bin) + pk * d.density(bin)) - + for k in np.arange(start + self.max_lag, steps + start + self.max_lag): + dist = self.forescast_distribution_from_distribution(ret[k-self.max_lag:], smooth, uod, _bins, **kwargs) ret.append(dist) return ret[-steps:] + def forecast_distribution_from_distribution(self, previous_dist, smooth, uod, bins, **kwargs): + dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=bins, **kwargs) + + lags = [] + + # Find all bins of past distributions with probability greater than zero + + for ct, lag in enumerate(self.lags): + dd = previous_dist[-lag] + vals = [float(v) for v in dd.bins if np.round(dd.density(v), 4) > 0.0] + lags.append(sorted(vals)) + + # Trace all possible combinations between the bins of past distributions + + for path in product(*lags): + + # get the combined probabilities for this path + pk = np.prod([previous_dist[-lag].density(path[ct]) + for ct, lag in enumerate(self.lags)]) + + d = self.forecast_distribution(path)[0] + + for bin in bins: + dist.set(bin, dist.density(bin) + pk * d.density(bin)) + + return dist + def __str__(self): tmp = self.name + ":\n" for r in sorted(self.flrgs.keys()): diff --git a/pyFTS/partitioners/partitioner.py b/pyFTS/partitioners/partitioner.py index 2dd81ea..8621f63 100644 --- a/pyFTS/partitioners/partitioner.py +++ b/pyFTS/partitioners/partitioner.py @@ -185,29 +185,26 @@ class Partitioner(object): if not isinstance(values, list): values = [values] - ret = [] + num = [] + den = [] for val in values: + fset = val[0] + mv = val[1] if mode == 'both': - num = [] - den = [] - for fset, mv in val: - num.append( self.sets[fset].centroid * mv ) - den.append(mv) - ret.append(np.sum(num)/np.sum(den)) - elif mode == 'both': - num = np.mean([self.sets[fset].centroid for fset in val ]) - ret.append(num) + num.append( self.sets[fset].centroid * mv ) + den.append(mv) + elif mode == 'sets': + num.append(self.sets[fset].centroid) elif mode == 'vector': - num = [] - den = [] - for fset, mv in enumerate(val): - num.append(self.sets[self.ordered_sets[fset]].centroid * mv) - den.append(mv) - ret.append(np.sum(num) / np.sum(den)) + num.append(self.sets[self.ordered_sets[fset]].centroid * mv) + den.append(mv) else: raise Exception('Unknown deffuzyfication mode') - return ret + if mode in ('both','vector'): + return np.sum(num) / np.sum(den) + else: + return np.mean(num) def check_bounds(self, data): """ diff --git a/pyFTS/probabilistic/ProbabilityDistribution.py b/pyFTS/probabilistic/ProbabilityDistribution.py index 0c35b7e..cbea1cd 100644 --- a/pyFTS/probabilistic/ProbabilityDistribution.py +++ b/pyFTS/probabilistic/ProbabilityDistribution.py @@ -5,6 +5,19 @@ from pyFTS.common import FuzzySet,SortedCollection,tree from pyFTS.probabilistic import kde +def from_point(x,**kwargs): + """ + Create a probability distribution from a scalar value + + :param x: scalar value + :param kwargs: common parameters of the distribution + :return: the ProbabilityDistribution object + """ + tmp = ProbabilityDistribution(**kwargs) + tmp.set(x, 1.0) + return tmp + + class ProbabilityDistribution(object): """ Represents a discrete or continous probability distribution diff --git a/pyFTS/tests/pwfts.py b/pyFTS/tests/pwfts.py index 7c9f93f..bafd961 100644 --- a/pyFTS/tests/pwfts.py +++ b/pyFTS/tests/pwfts.py @@ -47,12 +47,28 @@ model = granular.GranularWMVFTS(explanatory_variables=[vhour, vtemp, vload], tar model.fit(train_mv) -print(model) + +temp_generator = pwfts.ProbabilisticWeightedFTS(partitioner=vtemp.partitioner, order=2) +temp_generator.fit(train_mv['temperature'].values) + +#print(model) + +time_generator = lambda x : pd.to_datetime(x) + pd.to_timedelta(1, unit='h') +#temp_generator = lambda x : x + +generators = {'time': time_generator, 'temperature': temp_generator} + +#print(model.predict(test_mv.iloc[:10], type='point', steps_ahead=10, generators=generators)) +#print(model.predict(test_mv.iloc[:10], type='interval', steps_ahead=10, generators=generators)) +print(model.predict(test_mv.iloc[:10], type='distribution', steps_ahead=10, generators=generators)) -print(model.predict(test_mv.iloc[:10], type='point')) -print(model.predict(test_mv.iloc[:10], type='interval')) -print(model.predict(test_mv.iloc[:10], type='distribution')) +# + +#forecasts1 = model.predict(test_mv, type='multivariate') +#forecasts2 = model.predict(test, type='multivariate', generators={'date': time_generator}, +# steps_ahead=200) + ''' from pyFTS.data import Enrollments