Improvements for forecasting ahead in ClusteredMVFTS

This commit is contained in:
Petrônio Cândido 2019-06-21 15:10:19 -03:00
parent 812b99bcea
commit a6d9d164e4
6 changed files with 180 additions and 106 deletions

View File

@ -38,7 +38,7 @@ class ClusteredMVFTS(mvfts.MVFTS):
def fuzzyfy(self,data):
ndata = []
for index, row in data.iterrows():
for index, row in data.iterrows() if isinstance(data, pd.DataFrame) else enumerate(data):
data_point = self.format_data(row)
ndata.append(self.partitioner.fuzzyfy(data_point, mode=self.fuzzyfy_mode))
@ -83,16 +83,7 @@ class ClusteredMVFTS(mvfts.MVFTS):
return self.model.forecast_interval(data, fuzzyfied=pre_fuzz, **kwargs)
def forecast_ahead_interval(self, data, steps, **kwargs):
if not self.model.has_interval_forecasting:
raise Exception("The internal method does not support interval forecasting!")
data = self.check_data(data)
pre_fuzz = kwargs.get('pre_fuzzyfy', self.pre_fuzzyfy)
return self.model.forecast_ahead_interval(data, steps, fuzzyfied=pre_fuzz, **kwargs)
def forecast_distribution(self, data, **kwargs):
@ -107,14 +98,48 @@ class ClusteredMVFTS(mvfts.MVFTS):
def forecast_ahead_distribution(self, data, steps, **kwargs):
if not self.model.has_probability_forecasting:
raise Exception("The internal method does not support probabilistic forecasting!")
generators = kwargs.get('generators', None)
data = self.check_data(data)
if generators is None:
raise Exception('You must provide parameter \'generators\'! generators is a dict where the keys' +
' are the dataframe column names (except the target_variable) and the values are ' +
'lambda functions that accept one value (the actual value of the variable) '
' and return the next value or trained FTS models that accept the actual values and '
'forecast new ones.')
pre_fuzz = kwargs.get('pre_fuzzyfy', self.pre_fuzzyfy)
ndata = self.apply_transformations(data)
return self.model.forecast_ahead_distribution(data, steps, fuzzyfied=pre_fuzz, **kwargs)
start = kwargs.get('start_at', self.order)
ret = []
sample = ndata.iloc[start - self.max_lag:]
for k in np.arange(0, steps):
tmp = self.forecast_distribution(sample.iloc[-self.max_lag:], **kwargs)[0]
ret.append(tmp)
new_data_point = {}
for data_label in generators.keys():
if data_label != self.target_variable.data_label:
if isinstance(generators[data_label], LambdaType):
last_data_point = sample.iloc[-1]
new_data_point[data_label] = generators[data_label](last_data_point[data_label])
elif isinstance(generators[data_label], fts.FTS):
gen_model = generators[data_label]
last_data_point = sample.iloc[-gen_model.order:]
if not gen_model.is_multivariate:
last_data_point = last_data_point[data_label].values
new_data_point[data_label] = gen_model.forecast(last_data_point)[0]
new_data_point[self.target_variable.data_label] = tmp.expected_value()
sample = sample.append(new_data_point, ignore_index=True)
return ret[-steps:]
def forecast_multivariate(self, data, **kwargs):

View File

@ -264,11 +264,11 @@ class MVFTS(fts.FTS):
ret = []
ix = ndata.index[start - self.max_lag:]
lo = [ndata.loc[k] for k in ix]
up = [ndata.loc[k] for k in ix]
lo = ndata.loc[ix] #[ndata.loc[k] for k in ix]
up = ndata.loc[ix] #[ndata.loc[k] for k in ix]
for k in np.arange(0, steps):
tmp_lo = self.forecast_interval(lo[-self.max_lag:], **kwargs)
tmp_up = self.forecast_interval(up[-self.max_lag:], **kwargs)
tmp_lo = self.forecast_interval(lo[-self.max_lag:], **kwargs)[0]
tmp_up = self.forecast_interval(up[-self.max_lag:], **kwargs)[0]
ret.append([min(tmp_lo), max(tmp_up)])
@ -300,7 +300,7 @@ class MVFTS(fts.FTS):
lo = lo.append(new_data_point_lo, ignore_index=True)
up = up.append(new_data_point_up, ignore_index=True)
return ret[-steps]
return ret[-steps:]
def clone_parameters(self, model):
super(MVFTS, self).clone_parameters(model)

View File

@ -171,6 +171,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
return np.nanprod(vals)
def generate_lhs_flrg(self, sample, explain=False):
if not isinstance(sample, (list, np.ndarray)):
sample = [sample]
nsample = [self.partitioner.fuzzyfy(k, mode="sets", alpha_cut=self.alpha_cut)
for k in sample]
@ -440,6 +443,8 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
smooth = kwargs.get("smooth", "none")
from_distribution = kwargs.get('from_distribution', False)
fuzzyfied = kwargs.get('fuzzyfied', False)
l = len(ndata)
@ -457,39 +462,43 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
for k in np.arange(self.max_lag - 1, l):
sample = ndata[k - (self.max_lag - 1): k + 1]
if not fuzzyfied:
flrgs = self.generate_lhs_flrg(sample)
if from_distribution:
dist = self.forecast_distribution_from_distribution(sample,smooth,uod,_bins)
else:
fsets = self.get_sets_from_both_fuzzyfication(sample)
flrgs = self.generate_lhs_flrg_fuzzyfied(fsets)
if 'type' in kwargs:
kwargs.pop('type')
if not fuzzyfied:
flrgs = self.generate_lhs_flrg(sample)
else:
fsets = self.get_sets_from_both_fuzzyfication(sample)
flrgs = self.generate_lhs_flrg_fuzzyfied(fsets)
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
if 'type' in kwargs:
kwargs.pop('type')
for bin in _bins:
num = []
den = []
for s in flrgs:
if s.get_key() in self.flrgs:
flrg = self.flrgs[s.get_key()]
wi = flrg.rhs_conditional_probability(bin, self.partitioner.sets, uod, nbins)
if not fuzzyfied:
pk = flrg.lhs_conditional_probability(sample, self.partitioner.sets, self.global_frequency_count, uod, nbins)
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
for bin in _bins:
num = []
den = []
for s in flrgs:
if s.get_key() in self.flrgs:
flrg = self.flrgs[s.get_key()]
wi = flrg.rhs_conditional_probability(bin, self.partitioner.sets, uod, nbins)
if not fuzzyfied:
pk = flrg.lhs_conditional_probability(sample, self.partitioner.sets, self.global_frequency_count, uod, nbins)
else:
lhs_mv = self.pwflrg_lhs_memberhip_fuzzyfied(flrg, sample)
pk = flrg.lhs_conditional_probability_fuzzyfied(lhs_mv, self.partitioner.sets,
self.global_frequency_count, uod, nbins)
num.append(wi * pk)
den.append(pk)
else:
lhs_mv = self.pwflrg_lhs_memberhip_fuzzyfied(flrg, sample)
pk = flrg.lhs_conditional_probability_fuzzyfied(lhs_mv, self.partitioner.sets,
self.global_frequency_count, uod, nbins)
num.append(0.0)
den.append(0.000000001)
pf = sum(num) / sum(den)
num.append(wi * pk)
den.append(pk)
else:
num.append(0.0)
den.append(0.000000001)
pf = sum(num) / sum(den)
dist.set(bin, pf)
dist.set(bin, pf)
ret.append(dist)
@ -532,7 +541,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
start = kwargs.get('start_at', 0)
fuzzyfied = kwargs.get('fuzzyfied', False)
fuzzyfied = kwargs.pop('fuzzyfied')
sample = data[start: start + self.max_lag]
@ -541,12 +550,12 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
else:
ret = []
for k in sample:
kv = self.partitioner.deffuzyfy(k,mode='both')
ret.append([kv,kv])
kv = self.partitioner.defuzzyfy(k, mode='both')
ret.append([kv, kv])
ret.append(self.forecast_interval(sample, **kwargs)[0])
for k in np.arange(self.max_lag+1, steps+self.max_lag):
for k in np.arange(start + self.max_lag, steps + start + self.max_lag):
if len(ret) > 0 and self.__check_interval_bounds(ret[-1]):
ret.append(ret[-1])
@ -562,6 +571,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
ret = []
if 'type' in kwargs:
kwargs.pop('type')
smooth = kwargs.get("smooth", "none")
uod = self.get_UoD()
@ -575,50 +587,61 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
start = kwargs.get('start_at', 0)
sample = ndata[start: start + self.max_lag]
fuzzyfied = kwargs.pop('fuzzyfied')
if not fuzzyfied:
sample = ndata[start: start + self.max_lag]
else:
sample = []
for k in ndata[start: start + self.max_lag]:
kv = self.partitioner.defuzzyfy(k, mode='both')
sample.append(kv)
for dat in sample:
if 'type' in kwargs:
kwargs.pop('type')
tmp = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
tmp.set(dat, 1.0)
ret.append(tmp)
if not isinstance(dat, ProbabilityDistribution.ProbabilityDistribution):
tmp = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
tmp.set(dat, 1.0)
ret.append(tmp)
else:
ret.append(dat)
dist = self.forecast_distribution(sample, bins=_bins, **kwargs)[0]
dist = self.forecast_distribution_from_distribution(ret, smooth,uod,_bins,**kwargs)
ret.append(dist)
for k in np.arange(self.max_lag+1, steps+self.max_lag+1):
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
lags = []
# Find all bins of past distributions with probability greater than zero
for ct, lag in enumerate(self.lags):
dd = ret[k - lag]
vals = [float(v) for v in dd.bins if np.round(dd.density(v), 4) > 0.0]
lags.append( sorted(vals) )
# Trace all possible combinations between the bins of past distributions
for path in product(*lags):
# get the combined probabilities for this path
pk = np.prod([ret[k - (self.max_lag + lag)].density(path[ct])
for ct, lag in enumerate(self.lags)])
d = self.forecast_distribution(path)[0]
for bin in _bins:
dist.set(bin, dist.density(bin) + pk * d.density(bin))
for k in np.arange(start + self.max_lag, steps + start + self.max_lag):
dist = self.forescast_distribution_from_distribution(ret[k-self.max_lag:], smooth, uod, _bins, **kwargs)
ret.append(dist)
return ret[-steps:]
def forecast_distribution_from_distribution(self, previous_dist, smooth, uod, bins, **kwargs):
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=bins, **kwargs)
lags = []
# Find all bins of past distributions with probability greater than zero
for ct, lag in enumerate(self.lags):
dd = previous_dist[-lag]
vals = [float(v) for v in dd.bins if np.round(dd.density(v), 4) > 0.0]
lags.append(sorted(vals))
# Trace all possible combinations between the bins of past distributions
for path in product(*lags):
# get the combined probabilities for this path
pk = np.prod([previous_dist[-lag].density(path[ct])
for ct, lag in enumerate(self.lags)])
d = self.forecast_distribution(path)[0]
for bin in bins:
dist.set(bin, dist.density(bin) + pk * d.density(bin))
return dist
def __str__(self):
tmp = self.name + ":\n"
for r in sorted(self.flrgs.keys()):

View File

@ -185,29 +185,26 @@ class Partitioner(object):
if not isinstance(values, list):
values = [values]
ret = []
num = []
den = []
for val in values:
fset = val[0]
mv = val[1]
if mode == 'both':
num = []
den = []
for fset, mv in val:
num.append( self.sets[fset].centroid * mv )
den.append(mv)
ret.append(np.sum(num)/np.sum(den))
elif mode == 'both':
num = np.mean([self.sets[fset].centroid for fset in val ])
ret.append(num)
num.append( self.sets[fset].centroid * mv )
den.append(mv)
elif mode == 'sets':
num.append(self.sets[fset].centroid)
elif mode == 'vector':
num = []
den = []
for fset, mv in enumerate(val):
num.append(self.sets[self.ordered_sets[fset]].centroid * mv)
den.append(mv)
ret.append(np.sum(num) / np.sum(den))
num.append(self.sets[self.ordered_sets[fset]].centroid * mv)
den.append(mv)
else:
raise Exception('Unknown deffuzyfication mode')
return ret
if mode in ('both','vector'):
return np.sum(num) / np.sum(den)
else:
return np.mean(num)
def check_bounds(self, data):
"""

View File

@ -5,6 +5,19 @@ from pyFTS.common import FuzzySet,SortedCollection,tree
from pyFTS.probabilistic import kde
def from_point(x,**kwargs):
"""
Create a probability distribution from a scalar value
:param x: scalar value
:param kwargs: common parameters of the distribution
:return: the ProbabilityDistribution object
"""
tmp = ProbabilityDistribution(**kwargs)
tmp.set(x, 1.0)
return tmp
class ProbabilityDistribution(object):
"""
Represents a discrete or continous probability distribution

View File

@ -47,12 +47,28 @@ model = granular.GranularWMVFTS(explanatory_variables=[vhour, vtemp, vload], tar
model.fit(train_mv)
print(model)
temp_generator = pwfts.ProbabilisticWeightedFTS(partitioner=vtemp.partitioner, order=2)
temp_generator.fit(train_mv['temperature'].values)
#print(model)
time_generator = lambda x : pd.to_datetime(x) + pd.to_timedelta(1, unit='h')
#temp_generator = lambda x : x
generators = {'time': time_generator, 'temperature': temp_generator}
#print(model.predict(test_mv.iloc[:10], type='point', steps_ahead=10, generators=generators))
#print(model.predict(test_mv.iloc[:10], type='interval', steps_ahead=10, generators=generators))
print(model.predict(test_mv.iloc[:10], type='distribution', steps_ahead=10, generators=generators))
print(model.predict(test_mv.iloc[:10], type='point'))
print(model.predict(test_mv.iloc[:10], type='interval'))
print(model.predict(test_mv.iloc[:10], type='distribution'))
#
#forecasts1 = model.predict(test_mv, type='multivariate')
#forecasts2 = model.predict(test, type='multivariate', generators={'date': time_generator},
# steps_ahead=200)
'''
from pyFTS.data import Enrollments