Improvements for forecasting ahead in ClusteredMVFTS

This commit is contained in:
Petrônio Cândido 2019-06-21 15:10:19 -03:00
parent 812b99bcea
commit a6d9d164e4
6 changed files with 180 additions and 106 deletions

View File

@ -38,7 +38,7 @@ class ClusteredMVFTS(mvfts.MVFTS):
def fuzzyfy(self,data): def fuzzyfy(self,data):
ndata = [] ndata = []
for index, row in data.iterrows(): for index, row in data.iterrows() if isinstance(data, pd.DataFrame) else enumerate(data):
data_point = self.format_data(row) data_point = self.format_data(row)
ndata.append(self.partitioner.fuzzyfy(data_point, mode=self.fuzzyfy_mode)) ndata.append(self.partitioner.fuzzyfy(data_point, mode=self.fuzzyfy_mode))
@ -83,16 +83,7 @@ class ClusteredMVFTS(mvfts.MVFTS):
return self.model.forecast_interval(data, fuzzyfied=pre_fuzz, **kwargs) return self.model.forecast_interval(data, fuzzyfied=pre_fuzz, **kwargs)
def forecast_ahead_interval(self, data, steps, **kwargs):
if not self.model.has_interval_forecasting:
raise Exception("The internal method does not support interval forecasting!")
data = self.check_data(data)
pre_fuzz = kwargs.get('pre_fuzzyfy', self.pre_fuzzyfy)
return self.model.forecast_ahead_interval(data, steps, fuzzyfied=pre_fuzz, **kwargs)
def forecast_distribution(self, data, **kwargs): def forecast_distribution(self, data, **kwargs):
@ -107,14 +98,48 @@ class ClusteredMVFTS(mvfts.MVFTS):
def forecast_ahead_distribution(self, data, steps, **kwargs): def forecast_ahead_distribution(self, data, steps, **kwargs):
if not self.model.has_probability_forecasting: generators = kwargs.get('generators', None)
raise Exception("The internal method does not support probabilistic forecasting!")
data = self.check_data(data) if generators is None:
raise Exception('You must provide parameter \'generators\'! generators is a dict where the keys' +
' are the dataframe column names (except the target_variable) and the values are ' +
'lambda functions that accept one value (the actual value of the variable) '
' and return the next value or trained FTS models that accept the actual values and '
'forecast new ones.')
pre_fuzz = kwargs.get('pre_fuzzyfy', self.pre_fuzzyfy) ndata = self.apply_transformations(data)
return self.model.forecast_ahead_distribution(data, steps, fuzzyfied=pre_fuzz, **kwargs) start = kwargs.get('start_at', self.order)
ret = []
sample = ndata.iloc[start - self.max_lag:]
for k in np.arange(0, steps):
tmp = self.forecast_distribution(sample.iloc[-self.max_lag:], **kwargs)[0]
ret.append(tmp)
new_data_point = {}
for data_label in generators.keys():
if data_label != self.target_variable.data_label:
if isinstance(generators[data_label], LambdaType):
last_data_point = sample.iloc[-1]
new_data_point[data_label] = generators[data_label](last_data_point[data_label])
elif isinstance(generators[data_label], fts.FTS):
gen_model = generators[data_label]
last_data_point = sample.iloc[-gen_model.order:]
if not gen_model.is_multivariate:
last_data_point = last_data_point[data_label].values
new_data_point[data_label] = gen_model.forecast(last_data_point)[0]
new_data_point[self.target_variable.data_label] = tmp.expected_value()
sample = sample.append(new_data_point, ignore_index=True)
return ret[-steps:]
def forecast_multivariate(self, data, **kwargs): def forecast_multivariate(self, data, **kwargs):

View File

@ -264,11 +264,11 @@ class MVFTS(fts.FTS):
ret = [] ret = []
ix = ndata.index[start - self.max_lag:] ix = ndata.index[start - self.max_lag:]
lo = [ndata.loc[k] for k in ix] lo = ndata.loc[ix] #[ndata.loc[k] for k in ix]
up = [ndata.loc[k] for k in ix] up = ndata.loc[ix] #[ndata.loc[k] for k in ix]
for k in np.arange(0, steps): for k in np.arange(0, steps):
tmp_lo = self.forecast_interval(lo[-self.max_lag:], **kwargs) tmp_lo = self.forecast_interval(lo[-self.max_lag:], **kwargs)[0]
tmp_up = self.forecast_interval(up[-self.max_lag:], **kwargs) tmp_up = self.forecast_interval(up[-self.max_lag:], **kwargs)[0]
ret.append([min(tmp_lo), max(tmp_up)]) ret.append([min(tmp_lo), max(tmp_up)])
@ -300,7 +300,7 @@ class MVFTS(fts.FTS):
lo = lo.append(new_data_point_lo, ignore_index=True) lo = lo.append(new_data_point_lo, ignore_index=True)
up = up.append(new_data_point_up, ignore_index=True) up = up.append(new_data_point_up, ignore_index=True)
return ret[-steps] return ret[-steps:]
def clone_parameters(self, model): def clone_parameters(self, model):
super(MVFTS, self).clone_parameters(model) super(MVFTS, self).clone_parameters(model)

View File

@ -171,6 +171,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
return np.nanprod(vals) return np.nanprod(vals)
def generate_lhs_flrg(self, sample, explain=False): def generate_lhs_flrg(self, sample, explain=False):
if not isinstance(sample, (list, np.ndarray)):
sample = [sample]
nsample = [self.partitioner.fuzzyfy(k, mode="sets", alpha_cut=self.alpha_cut) nsample = [self.partitioner.fuzzyfy(k, mode="sets", alpha_cut=self.alpha_cut)
for k in sample] for k in sample]
@ -440,6 +443,8 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
smooth = kwargs.get("smooth", "none") smooth = kwargs.get("smooth", "none")
from_distribution = kwargs.get('from_distribution', False)
fuzzyfied = kwargs.get('fuzzyfied', False) fuzzyfied = kwargs.get('fuzzyfied', False)
l = len(ndata) l = len(ndata)
@ -457,6 +462,10 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
for k in np.arange(self.max_lag - 1, l): for k in np.arange(self.max_lag - 1, l):
sample = ndata[k - (self.max_lag - 1): k + 1] sample = ndata[k - (self.max_lag - 1): k + 1]
if from_distribution:
dist = self.forecast_distribution_from_distribution(sample,smooth,uod,_bins)
else:
if not fuzzyfied: if not fuzzyfied:
flrgs = self.generate_lhs_flrg(sample) flrgs = self.generate_lhs_flrg(sample)
else: else:
@ -532,7 +541,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
start = kwargs.get('start_at', 0) start = kwargs.get('start_at', 0)
fuzzyfied = kwargs.get('fuzzyfied', False) fuzzyfied = kwargs.pop('fuzzyfied')
sample = data[start: start + self.max_lag] sample = data[start: start + self.max_lag]
@ -541,12 +550,12 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
else: else:
ret = [] ret = []
for k in sample: for k in sample:
kv = self.partitioner.deffuzyfy(k,mode='both') kv = self.partitioner.defuzzyfy(k, mode='both')
ret.append([kv,kv]) ret.append([kv, kv])
ret.append(self.forecast_interval(sample, **kwargs)[0]) ret.append(self.forecast_interval(sample, **kwargs)[0])
for k in np.arange(self.max_lag+1, steps+self.max_lag): for k in np.arange(start + self.max_lag, steps + start + self.max_lag):
if len(ret) > 0 and self.__check_interval_bounds(ret[-1]): if len(ret) > 0 and self.__check_interval_bounds(ret[-1]):
ret.append(ret[-1]) ret.append(ret[-1])
@ -562,6 +571,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
ret = [] ret = []
if 'type' in kwargs:
kwargs.pop('type')
smooth = kwargs.get("smooth", "none") smooth = kwargs.get("smooth", "none")
uod = self.get_UoD() uod = self.get_UoD()
@ -575,49 +587,60 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
start = kwargs.get('start_at', 0) start = kwargs.get('start_at', 0)
fuzzyfied = kwargs.pop('fuzzyfied')
if not fuzzyfied:
sample = ndata[start: start + self.max_lag] sample = ndata[start: start + self.max_lag]
else:
sample = []
for k in ndata[start: start + self.max_lag]:
kv = self.partitioner.defuzzyfy(k, mode='both')
sample.append(kv)
for dat in sample: for dat in sample:
if 'type' in kwargs: if not isinstance(dat, ProbabilityDistribution.ProbabilityDistribution):
kwargs.pop('type')
tmp = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs) tmp = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
tmp.set(dat, 1.0) tmp.set(dat, 1.0)
ret.append(tmp) ret.append(tmp)
else:
ret.append(dat)
dist = self.forecast_distribution(sample, bins=_bins, **kwargs)[0] dist = self.forecast_distribution_from_distribution(ret, smooth,uod,_bins,**kwargs)
ret.append(dist) ret.append(dist)
for k in np.arange(self.max_lag+1, steps+self.max_lag+1): for k in np.arange(start + self.max_lag, steps + start + self.max_lag):
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs) dist = self.forescast_distribution_from_distribution(ret[k-self.max_lag:], smooth, uod, _bins, **kwargs)
ret.append(dist)
return ret[-steps:]
def forecast_distribution_from_distribution(self, previous_dist, smooth, uod, bins, **kwargs):
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=bins, **kwargs)
lags = [] lags = []
# Find all bins of past distributions with probability greater than zero # Find all bins of past distributions with probability greater than zero
for ct, lag in enumerate(self.lags): for ct, lag in enumerate(self.lags):
dd = ret[k - lag] dd = previous_dist[-lag]
vals = [float(v) for v in dd.bins if np.round(dd.density(v), 4) > 0.0] vals = [float(v) for v in dd.bins if np.round(dd.density(v), 4) > 0.0]
lags.append( sorted(vals) ) lags.append(sorted(vals))
# Trace all possible combinations between the bins of past distributions # Trace all possible combinations between the bins of past distributions
for path in product(*lags): for path in product(*lags):
# get the combined probabilities for this path # get the combined probabilities for this path
pk = np.prod([ret[k - (self.max_lag + lag)].density(path[ct]) pk = np.prod([previous_dist[-lag].density(path[ct])
for ct, lag in enumerate(self.lags)]) for ct, lag in enumerate(self.lags)])
d = self.forecast_distribution(path)[0] d = self.forecast_distribution(path)[0]
for bin in _bins: for bin in bins:
dist.set(bin, dist.density(bin) + pk * d.density(bin)) dist.set(bin, dist.density(bin) + pk * d.density(bin))
ret.append(dist) return dist
return ret[-steps:]
def __str__(self): def __str__(self):
tmp = self.name + ":\n" tmp = self.name + ":\n"

View File

@ -185,29 +185,26 @@ class Partitioner(object):
if not isinstance(values, list): if not isinstance(values, list):
values = [values] values = [values]
ret = []
for val in values:
if mode == 'both':
num = [] num = []
den = [] den = []
for fset, mv in val: for val in values:
fset = val[0]
mv = val[1]
if mode == 'both':
num.append( self.sets[fset].centroid * mv ) num.append( self.sets[fset].centroid * mv )
den.append(mv) den.append(mv)
ret.append(np.sum(num)/np.sum(den)) elif mode == 'sets':
elif mode == 'both': num.append(self.sets[fset].centroid)
num = np.mean([self.sets[fset].centroid for fset in val ])
ret.append(num)
elif mode == 'vector': elif mode == 'vector':
num = []
den = []
for fset, mv in enumerate(val):
num.append(self.sets[self.ordered_sets[fset]].centroid * mv) num.append(self.sets[self.ordered_sets[fset]].centroid * mv)
den.append(mv) den.append(mv)
ret.append(np.sum(num) / np.sum(den))
else: else:
raise Exception('Unknown deffuzyfication mode') raise Exception('Unknown deffuzyfication mode')
return ret if mode in ('both','vector'):
return np.sum(num) / np.sum(den)
else:
return np.mean(num)
def check_bounds(self, data): def check_bounds(self, data):
""" """

View File

@ -5,6 +5,19 @@ from pyFTS.common import FuzzySet,SortedCollection,tree
from pyFTS.probabilistic import kde from pyFTS.probabilistic import kde
def from_point(x,**kwargs):
"""
Create a probability distribution from a scalar value
:param x: scalar value
:param kwargs: common parameters of the distribution
:return: the ProbabilityDistribution object
"""
tmp = ProbabilityDistribution(**kwargs)
tmp.set(x, 1.0)
return tmp
class ProbabilityDistribution(object): class ProbabilityDistribution(object):
""" """
Represents a discrete or continous probability distribution Represents a discrete or continous probability distribution

View File

@ -47,12 +47,28 @@ model = granular.GranularWMVFTS(explanatory_variables=[vhour, vtemp, vload], tar
model.fit(train_mv) model.fit(train_mv)
print(model)
temp_generator = pwfts.ProbabilisticWeightedFTS(partitioner=vtemp.partitioner, order=2)
temp_generator.fit(train_mv['temperature'].values)
#print(model)
time_generator = lambda x : pd.to_datetime(x) + pd.to_timedelta(1, unit='h')
#temp_generator = lambda x : x
generators = {'time': time_generator, 'temperature': temp_generator}
#print(model.predict(test_mv.iloc[:10], type='point', steps_ahead=10, generators=generators))
#print(model.predict(test_mv.iloc[:10], type='interval', steps_ahead=10, generators=generators))
print(model.predict(test_mv.iloc[:10], type='distribution', steps_ahead=10, generators=generators))
print(model.predict(test_mv.iloc[:10], type='point')) #
print(model.predict(test_mv.iloc[:10], type='interval'))
print(model.predict(test_mv.iloc[:10], type='distribution')) #forecasts1 = model.predict(test_mv, type='multivariate')
#forecasts2 = model.predict(test, type='multivariate', generators={'date': time_generator},
# steps_ahead=200)
''' '''
from pyFTS.data import Enrollments from pyFTS.data import Enrollments