Improvements for forecasting ahead in ClusteredMVFTS
This commit is contained in:
parent
812b99bcea
commit
a6d9d164e4
@ -38,7 +38,7 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
||||
|
||||
def fuzzyfy(self,data):
|
||||
ndata = []
|
||||
for index, row in data.iterrows():
|
||||
for index, row in data.iterrows() if isinstance(data, pd.DataFrame) else enumerate(data):
|
||||
data_point = self.format_data(row)
|
||||
ndata.append(self.partitioner.fuzzyfy(data_point, mode=self.fuzzyfy_mode))
|
||||
|
||||
@ -83,16 +83,7 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
||||
|
||||
return self.model.forecast_interval(data, fuzzyfied=pre_fuzz, **kwargs)
|
||||
|
||||
def forecast_ahead_interval(self, data, steps, **kwargs):
|
||||
|
||||
if not self.model.has_interval_forecasting:
|
||||
raise Exception("The internal method does not support interval forecasting!")
|
||||
|
||||
data = self.check_data(data)
|
||||
|
||||
pre_fuzz = kwargs.get('pre_fuzzyfy', self.pre_fuzzyfy)
|
||||
|
||||
return self.model.forecast_ahead_interval(data, steps, fuzzyfied=pre_fuzz, **kwargs)
|
||||
|
||||
def forecast_distribution(self, data, **kwargs):
|
||||
|
||||
@ -107,14 +98,48 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
||||
|
||||
def forecast_ahead_distribution(self, data, steps, **kwargs):
|
||||
|
||||
if not self.model.has_probability_forecasting:
|
||||
raise Exception("The internal method does not support probabilistic forecasting!")
|
||||
generators = kwargs.get('generators', None)
|
||||
|
||||
data = self.check_data(data)
|
||||
if generators is None:
|
||||
raise Exception('You must provide parameter \'generators\'! generators is a dict where the keys' +
|
||||
' are the dataframe column names (except the target_variable) and the values are ' +
|
||||
'lambda functions that accept one value (the actual value of the variable) '
|
||||
' and return the next value or trained FTS models that accept the actual values and '
|
||||
'forecast new ones.')
|
||||
|
||||
pre_fuzz = kwargs.get('pre_fuzzyfy', self.pre_fuzzyfy)
|
||||
ndata = self.apply_transformations(data)
|
||||
|
||||
return self.model.forecast_ahead_distribution(data, steps, fuzzyfied=pre_fuzz, **kwargs)
|
||||
start = kwargs.get('start_at', self.order)
|
||||
|
||||
ret = []
|
||||
sample = ndata.iloc[start - self.max_lag:]
|
||||
for k in np.arange(0, steps):
|
||||
tmp = self.forecast_distribution(sample.iloc[-self.max_lag:], **kwargs)[0]
|
||||
|
||||
ret.append(tmp)
|
||||
|
||||
new_data_point = {}
|
||||
|
||||
for data_label in generators.keys():
|
||||
if data_label != self.target_variable.data_label:
|
||||
if isinstance(generators[data_label], LambdaType):
|
||||
last_data_point = sample.iloc[-1]
|
||||
new_data_point[data_label] = generators[data_label](last_data_point[data_label])
|
||||
|
||||
elif isinstance(generators[data_label], fts.FTS):
|
||||
gen_model = generators[data_label]
|
||||
last_data_point = sample.iloc[-gen_model.order:]
|
||||
|
||||
if not gen_model.is_multivariate:
|
||||
last_data_point = last_data_point[data_label].values
|
||||
|
||||
new_data_point[data_label] = gen_model.forecast(last_data_point)[0]
|
||||
|
||||
new_data_point[self.target_variable.data_label] = tmp.expected_value()
|
||||
|
||||
sample = sample.append(new_data_point, ignore_index=True)
|
||||
|
||||
return ret[-steps:]
|
||||
|
||||
def forecast_multivariate(self, data, **kwargs):
|
||||
|
||||
|
@ -264,11 +264,11 @@ class MVFTS(fts.FTS):
|
||||
|
||||
ret = []
|
||||
ix = ndata.index[start - self.max_lag:]
|
||||
lo = [ndata.loc[k] for k in ix]
|
||||
up = [ndata.loc[k] for k in ix]
|
||||
lo = ndata.loc[ix] #[ndata.loc[k] for k in ix]
|
||||
up = ndata.loc[ix] #[ndata.loc[k] for k in ix]
|
||||
for k in np.arange(0, steps):
|
||||
tmp_lo = self.forecast_interval(lo[-self.max_lag:], **kwargs)
|
||||
tmp_up = self.forecast_interval(up[-self.max_lag:], **kwargs)
|
||||
tmp_lo = self.forecast_interval(lo[-self.max_lag:], **kwargs)[0]
|
||||
tmp_up = self.forecast_interval(up[-self.max_lag:], **kwargs)[0]
|
||||
|
||||
ret.append([min(tmp_lo), max(tmp_up)])
|
||||
|
||||
@ -300,7 +300,7 @@ class MVFTS(fts.FTS):
|
||||
lo = lo.append(new_data_point_lo, ignore_index=True)
|
||||
up = up.append(new_data_point_up, ignore_index=True)
|
||||
|
||||
return ret[-steps]
|
||||
return ret[-steps:]
|
||||
|
||||
def clone_parameters(self, model):
|
||||
super(MVFTS, self).clone_parameters(model)
|
||||
|
@ -171,6 +171,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
return np.nanprod(vals)
|
||||
|
||||
def generate_lhs_flrg(self, sample, explain=False):
|
||||
if not isinstance(sample, (list, np.ndarray)):
|
||||
sample = [sample]
|
||||
|
||||
nsample = [self.partitioner.fuzzyfy(k, mode="sets", alpha_cut=self.alpha_cut)
|
||||
for k in sample]
|
||||
|
||||
@ -440,6 +443,8 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
|
||||
smooth = kwargs.get("smooth", "none")
|
||||
|
||||
from_distribution = kwargs.get('from_distribution', False)
|
||||
|
||||
fuzzyfied = kwargs.get('fuzzyfied', False)
|
||||
|
||||
l = len(ndata)
|
||||
@ -457,39 +462,43 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
for k in np.arange(self.max_lag - 1, l):
|
||||
sample = ndata[k - (self.max_lag - 1): k + 1]
|
||||
|
||||
if not fuzzyfied:
|
||||
flrgs = self.generate_lhs_flrg(sample)
|
||||
if from_distribution:
|
||||
dist = self.forecast_distribution_from_distribution(sample,smooth,uod,_bins)
|
||||
else:
|
||||
fsets = self.get_sets_from_both_fuzzyfication(sample)
|
||||
flrgs = self.generate_lhs_flrg_fuzzyfied(fsets)
|
||||
|
||||
if 'type' in kwargs:
|
||||
kwargs.pop('type')
|
||||
if not fuzzyfied:
|
||||
flrgs = self.generate_lhs_flrg(sample)
|
||||
else:
|
||||
fsets = self.get_sets_from_both_fuzzyfication(sample)
|
||||
flrgs = self.generate_lhs_flrg_fuzzyfied(fsets)
|
||||
|
||||
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
|
||||
if 'type' in kwargs:
|
||||
kwargs.pop('type')
|
||||
|
||||
for bin in _bins:
|
||||
num = []
|
||||
den = []
|
||||
for s in flrgs:
|
||||
if s.get_key() in self.flrgs:
|
||||
flrg = self.flrgs[s.get_key()]
|
||||
wi = flrg.rhs_conditional_probability(bin, self.partitioner.sets, uod, nbins)
|
||||
if not fuzzyfied:
|
||||
pk = flrg.lhs_conditional_probability(sample, self.partitioner.sets, self.global_frequency_count, uod, nbins)
|
||||
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
|
||||
|
||||
for bin in _bins:
|
||||
num = []
|
||||
den = []
|
||||
for s in flrgs:
|
||||
if s.get_key() in self.flrgs:
|
||||
flrg = self.flrgs[s.get_key()]
|
||||
wi = flrg.rhs_conditional_probability(bin, self.partitioner.sets, uod, nbins)
|
||||
if not fuzzyfied:
|
||||
pk = flrg.lhs_conditional_probability(sample, self.partitioner.sets, self.global_frequency_count, uod, nbins)
|
||||
else:
|
||||
lhs_mv = self.pwflrg_lhs_memberhip_fuzzyfied(flrg, sample)
|
||||
pk = flrg.lhs_conditional_probability_fuzzyfied(lhs_mv, self.partitioner.sets,
|
||||
self.global_frequency_count, uod, nbins)
|
||||
|
||||
num.append(wi * pk)
|
||||
den.append(pk)
|
||||
else:
|
||||
lhs_mv = self.pwflrg_lhs_memberhip_fuzzyfied(flrg, sample)
|
||||
pk = flrg.lhs_conditional_probability_fuzzyfied(lhs_mv, self.partitioner.sets,
|
||||
self.global_frequency_count, uod, nbins)
|
||||
num.append(0.0)
|
||||
den.append(0.000000001)
|
||||
pf = sum(num) / sum(den)
|
||||
|
||||
num.append(wi * pk)
|
||||
den.append(pk)
|
||||
else:
|
||||
num.append(0.0)
|
||||
den.append(0.000000001)
|
||||
pf = sum(num) / sum(den)
|
||||
|
||||
dist.set(bin, pf)
|
||||
dist.set(bin, pf)
|
||||
|
||||
ret.append(dist)
|
||||
|
||||
@ -532,7 +541,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
|
||||
start = kwargs.get('start_at', 0)
|
||||
|
||||
fuzzyfied = kwargs.get('fuzzyfied', False)
|
||||
fuzzyfied = kwargs.pop('fuzzyfied')
|
||||
|
||||
sample = data[start: start + self.max_lag]
|
||||
|
||||
@ -541,12 +550,12 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
else:
|
||||
ret = []
|
||||
for k in sample:
|
||||
kv = self.partitioner.deffuzyfy(k,mode='both')
|
||||
ret.append([kv,kv])
|
||||
kv = self.partitioner.defuzzyfy(k, mode='both')
|
||||
ret.append([kv, kv])
|
||||
|
||||
ret.append(self.forecast_interval(sample, **kwargs)[0])
|
||||
|
||||
for k in np.arange(self.max_lag+1, steps+self.max_lag):
|
||||
for k in np.arange(start + self.max_lag, steps + start + self.max_lag):
|
||||
|
||||
if len(ret) > 0 and self.__check_interval_bounds(ret[-1]):
|
||||
ret.append(ret[-1])
|
||||
@ -562,6 +571,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
|
||||
ret = []
|
||||
|
||||
if 'type' in kwargs:
|
||||
kwargs.pop('type')
|
||||
|
||||
smooth = kwargs.get("smooth", "none")
|
||||
|
||||
uod = self.get_UoD()
|
||||
@ -575,50 +587,61 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
|
||||
start = kwargs.get('start_at', 0)
|
||||
|
||||
sample = ndata[start: start + self.max_lag]
|
||||
fuzzyfied = kwargs.pop('fuzzyfied')
|
||||
|
||||
if not fuzzyfied:
|
||||
sample = ndata[start: start + self.max_lag]
|
||||
else:
|
||||
sample = []
|
||||
for k in ndata[start: start + self.max_lag]:
|
||||
kv = self.partitioner.defuzzyfy(k, mode='both')
|
||||
sample.append(kv)
|
||||
|
||||
for dat in sample:
|
||||
if 'type' in kwargs:
|
||||
kwargs.pop('type')
|
||||
tmp = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
|
||||
tmp.set(dat, 1.0)
|
||||
ret.append(tmp)
|
||||
if not isinstance(dat, ProbabilityDistribution.ProbabilityDistribution):
|
||||
tmp = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
|
||||
tmp.set(dat, 1.0)
|
||||
ret.append(tmp)
|
||||
else:
|
||||
ret.append(dat)
|
||||
|
||||
dist = self.forecast_distribution(sample, bins=_bins, **kwargs)[0]
|
||||
dist = self.forecast_distribution_from_distribution(ret, smooth,uod,_bins,**kwargs)
|
||||
|
||||
ret.append(dist)
|
||||
|
||||
for k in np.arange(self.max_lag+1, steps+self.max_lag+1):
|
||||
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
|
||||
|
||||
lags = []
|
||||
|
||||
# Find all bins of past distributions with probability greater than zero
|
||||
|
||||
for ct, lag in enumerate(self.lags):
|
||||
dd = ret[k - lag]
|
||||
vals = [float(v) for v in dd.bins if np.round(dd.density(v), 4) > 0.0]
|
||||
lags.append( sorted(vals) )
|
||||
|
||||
|
||||
# Trace all possible combinations between the bins of past distributions
|
||||
|
||||
for path in product(*lags):
|
||||
|
||||
# get the combined probabilities for this path
|
||||
pk = np.prod([ret[k - (self.max_lag + lag)].density(path[ct])
|
||||
for ct, lag in enumerate(self.lags)])
|
||||
|
||||
|
||||
d = self.forecast_distribution(path)[0]
|
||||
|
||||
for bin in _bins:
|
||||
dist.set(bin, dist.density(bin) + pk * d.density(bin))
|
||||
|
||||
for k in np.arange(start + self.max_lag, steps + start + self.max_lag):
|
||||
dist = self.forescast_distribution_from_distribution(ret[k-self.max_lag:], smooth, uod, _bins, **kwargs)
|
||||
ret.append(dist)
|
||||
|
||||
return ret[-steps:]
|
||||
|
||||
def forecast_distribution_from_distribution(self, previous_dist, smooth, uod, bins, **kwargs):
|
||||
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=bins, **kwargs)
|
||||
|
||||
lags = []
|
||||
|
||||
# Find all bins of past distributions with probability greater than zero
|
||||
|
||||
for ct, lag in enumerate(self.lags):
|
||||
dd = previous_dist[-lag]
|
||||
vals = [float(v) for v in dd.bins if np.round(dd.density(v), 4) > 0.0]
|
||||
lags.append(sorted(vals))
|
||||
|
||||
# Trace all possible combinations between the bins of past distributions
|
||||
|
||||
for path in product(*lags):
|
||||
|
||||
# get the combined probabilities for this path
|
||||
pk = np.prod([previous_dist[-lag].density(path[ct])
|
||||
for ct, lag in enumerate(self.lags)])
|
||||
|
||||
d = self.forecast_distribution(path)[0]
|
||||
|
||||
for bin in bins:
|
||||
dist.set(bin, dist.density(bin) + pk * d.density(bin))
|
||||
|
||||
return dist
|
||||
|
||||
def __str__(self):
|
||||
tmp = self.name + ":\n"
|
||||
for r in sorted(self.flrgs.keys()):
|
||||
|
@ -185,29 +185,26 @@ class Partitioner(object):
|
||||
if not isinstance(values, list):
|
||||
values = [values]
|
||||
|
||||
ret = []
|
||||
num = []
|
||||
den = []
|
||||
for val in values:
|
||||
fset = val[0]
|
||||
mv = val[1]
|
||||
if mode == 'both':
|
||||
num = []
|
||||
den = []
|
||||
for fset, mv in val:
|
||||
num.append( self.sets[fset].centroid * mv )
|
||||
den.append(mv)
|
||||
ret.append(np.sum(num)/np.sum(den))
|
||||
elif mode == 'both':
|
||||
num = np.mean([self.sets[fset].centroid for fset in val ])
|
||||
ret.append(num)
|
||||
num.append( self.sets[fset].centroid * mv )
|
||||
den.append(mv)
|
||||
elif mode == 'sets':
|
||||
num.append(self.sets[fset].centroid)
|
||||
elif mode == 'vector':
|
||||
num = []
|
||||
den = []
|
||||
for fset, mv in enumerate(val):
|
||||
num.append(self.sets[self.ordered_sets[fset]].centroid * mv)
|
||||
den.append(mv)
|
||||
ret.append(np.sum(num) / np.sum(den))
|
||||
num.append(self.sets[self.ordered_sets[fset]].centroid * mv)
|
||||
den.append(mv)
|
||||
else:
|
||||
raise Exception('Unknown deffuzyfication mode')
|
||||
|
||||
return ret
|
||||
if mode in ('both','vector'):
|
||||
return np.sum(num) / np.sum(den)
|
||||
else:
|
||||
return np.mean(num)
|
||||
|
||||
def check_bounds(self, data):
|
||||
"""
|
||||
|
@ -5,6 +5,19 @@ from pyFTS.common import FuzzySet,SortedCollection,tree
|
||||
from pyFTS.probabilistic import kde
|
||||
|
||||
|
||||
def from_point(x,**kwargs):
|
||||
"""
|
||||
Create a probability distribution from a scalar value
|
||||
|
||||
:param x: scalar value
|
||||
:param kwargs: common parameters of the distribution
|
||||
:return: the ProbabilityDistribution object
|
||||
"""
|
||||
tmp = ProbabilityDistribution(**kwargs)
|
||||
tmp.set(x, 1.0)
|
||||
return tmp
|
||||
|
||||
|
||||
class ProbabilityDistribution(object):
|
||||
"""
|
||||
Represents a discrete or continous probability distribution
|
||||
|
@ -47,12 +47,28 @@ model = granular.GranularWMVFTS(explanatory_variables=[vhour, vtemp, vload], tar
|
||||
|
||||
model.fit(train_mv)
|
||||
|
||||
print(model)
|
||||
|
||||
temp_generator = pwfts.ProbabilisticWeightedFTS(partitioner=vtemp.partitioner, order=2)
|
||||
temp_generator.fit(train_mv['temperature'].values)
|
||||
|
||||
#print(model)
|
||||
|
||||
time_generator = lambda x : pd.to_datetime(x) + pd.to_timedelta(1, unit='h')
|
||||
#temp_generator = lambda x : x
|
||||
|
||||
generators = {'time': time_generator, 'temperature': temp_generator}
|
||||
|
||||
#print(model.predict(test_mv.iloc[:10], type='point', steps_ahead=10, generators=generators))
|
||||
#print(model.predict(test_mv.iloc[:10], type='interval', steps_ahead=10, generators=generators))
|
||||
print(model.predict(test_mv.iloc[:10], type='distribution', steps_ahead=10, generators=generators))
|
||||
|
||||
|
||||
print(model.predict(test_mv.iloc[:10], type='point'))
|
||||
print(model.predict(test_mv.iloc[:10], type='interval'))
|
||||
print(model.predict(test_mv.iloc[:10], type='distribution'))
|
||||
#
|
||||
|
||||
#forecasts1 = model.predict(test_mv, type='multivariate')
|
||||
#forecasts2 = model.predict(test, type='multivariate', generators={'date': time_generator},
|
||||
# steps_ahead=200)
|
||||
|
||||
|
||||
'''
|
||||
from pyFTS.data import Enrollments
|
||||
|
Loading…
Reference in New Issue
Block a user