Improvements for forecasting ahead in ClusteredMVFTS
This commit is contained in:
parent
812b99bcea
commit
a6d9d164e4
@ -38,7 +38,7 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
|||||||
|
|
||||||
def fuzzyfy(self,data):
|
def fuzzyfy(self,data):
|
||||||
ndata = []
|
ndata = []
|
||||||
for index, row in data.iterrows():
|
for index, row in data.iterrows() if isinstance(data, pd.DataFrame) else enumerate(data):
|
||||||
data_point = self.format_data(row)
|
data_point = self.format_data(row)
|
||||||
ndata.append(self.partitioner.fuzzyfy(data_point, mode=self.fuzzyfy_mode))
|
ndata.append(self.partitioner.fuzzyfy(data_point, mode=self.fuzzyfy_mode))
|
||||||
|
|
||||||
@ -83,16 +83,7 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
|||||||
|
|
||||||
return self.model.forecast_interval(data, fuzzyfied=pre_fuzz, **kwargs)
|
return self.model.forecast_interval(data, fuzzyfied=pre_fuzz, **kwargs)
|
||||||
|
|
||||||
def forecast_ahead_interval(self, data, steps, **kwargs):
|
|
||||||
|
|
||||||
if not self.model.has_interval_forecasting:
|
|
||||||
raise Exception("The internal method does not support interval forecasting!")
|
|
||||||
|
|
||||||
data = self.check_data(data)
|
|
||||||
|
|
||||||
pre_fuzz = kwargs.get('pre_fuzzyfy', self.pre_fuzzyfy)
|
|
||||||
|
|
||||||
return self.model.forecast_ahead_interval(data, steps, fuzzyfied=pre_fuzz, **kwargs)
|
|
||||||
|
|
||||||
def forecast_distribution(self, data, **kwargs):
|
def forecast_distribution(self, data, **kwargs):
|
||||||
|
|
||||||
@ -107,14 +98,48 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
|||||||
|
|
||||||
def forecast_ahead_distribution(self, data, steps, **kwargs):
|
def forecast_ahead_distribution(self, data, steps, **kwargs):
|
||||||
|
|
||||||
if not self.model.has_probability_forecasting:
|
generators = kwargs.get('generators', None)
|
||||||
raise Exception("The internal method does not support probabilistic forecasting!")
|
|
||||||
|
|
||||||
data = self.check_data(data)
|
if generators is None:
|
||||||
|
raise Exception('You must provide parameter \'generators\'! generators is a dict where the keys' +
|
||||||
|
' are the dataframe column names (except the target_variable) and the values are ' +
|
||||||
|
'lambda functions that accept one value (the actual value of the variable) '
|
||||||
|
' and return the next value or trained FTS models that accept the actual values and '
|
||||||
|
'forecast new ones.')
|
||||||
|
|
||||||
pre_fuzz = kwargs.get('pre_fuzzyfy', self.pre_fuzzyfy)
|
ndata = self.apply_transformations(data)
|
||||||
|
|
||||||
return self.model.forecast_ahead_distribution(data, steps, fuzzyfied=pre_fuzz, **kwargs)
|
start = kwargs.get('start_at', self.order)
|
||||||
|
|
||||||
|
ret = []
|
||||||
|
sample = ndata.iloc[start - self.max_lag:]
|
||||||
|
for k in np.arange(0, steps):
|
||||||
|
tmp = self.forecast_distribution(sample.iloc[-self.max_lag:], **kwargs)[0]
|
||||||
|
|
||||||
|
ret.append(tmp)
|
||||||
|
|
||||||
|
new_data_point = {}
|
||||||
|
|
||||||
|
for data_label in generators.keys():
|
||||||
|
if data_label != self.target_variable.data_label:
|
||||||
|
if isinstance(generators[data_label], LambdaType):
|
||||||
|
last_data_point = sample.iloc[-1]
|
||||||
|
new_data_point[data_label] = generators[data_label](last_data_point[data_label])
|
||||||
|
|
||||||
|
elif isinstance(generators[data_label], fts.FTS):
|
||||||
|
gen_model = generators[data_label]
|
||||||
|
last_data_point = sample.iloc[-gen_model.order:]
|
||||||
|
|
||||||
|
if not gen_model.is_multivariate:
|
||||||
|
last_data_point = last_data_point[data_label].values
|
||||||
|
|
||||||
|
new_data_point[data_label] = gen_model.forecast(last_data_point)[0]
|
||||||
|
|
||||||
|
new_data_point[self.target_variable.data_label] = tmp.expected_value()
|
||||||
|
|
||||||
|
sample = sample.append(new_data_point, ignore_index=True)
|
||||||
|
|
||||||
|
return ret[-steps:]
|
||||||
|
|
||||||
def forecast_multivariate(self, data, **kwargs):
|
def forecast_multivariate(self, data, **kwargs):
|
||||||
|
|
||||||
|
@ -264,11 +264,11 @@ class MVFTS(fts.FTS):
|
|||||||
|
|
||||||
ret = []
|
ret = []
|
||||||
ix = ndata.index[start - self.max_lag:]
|
ix = ndata.index[start - self.max_lag:]
|
||||||
lo = [ndata.loc[k] for k in ix]
|
lo = ndata.loc[ix] #[ndata.loc[k] for k in ix]
|
||||||
up = [ndata.loc[k] for k in ix]
|
up = ndata.loc[ix] #[ndata.loc[k] for k in ix]
|
||||||
for k in np.arange(0, steps):
|
for k in np.arange(0, steps):
|
||||||
tmp_lo = self.forecast_interval(lo[-self.max_lag:], **kwargs)
|
tmp_lo = self.forecast_interval(lo[-self.max_lag:], **kwargs)[0]
|
||||||
tmp_up = self.forecast_interval(up[-self.max_lag:], **kwargs)
|
tmp_up = self.forecast_interval(up[-self.max_lag:], **kwargs)[0]
|
||||||
|
|
||||||
ret.append([min(tmp_lo), max(tmp_up)])
|
ret.append([min(tmp_lo), max(tmp_up)])
|
||||||
|
|
||||||
@ -300,7 +300,7 @@ class MVFTS(fts.FTS):
|
|||||||
lo = lo.append(new_data_point_lo, ignore_index=True)
|
lo = lo.append(new_data_point_lo, ignore_index=True)
|
||||||
up = up.append(new_data_point_up, ignore_index=True)
|
up = up.append(new_data_point_up, ignore_index=True)
|
||||||
|
|
||||||
return ret[-steps]
|
return ret[-steps:]
|
||||||
|
|
||||||
def clone_parameters(self, model):
|
def clone_parameters(self, model):
|
||||||
super(MVFTS, self).clone_parameters(model)
|
super(MVFTS, self).clone_parameters(model)
|
||||||
|
@ -171,6 +171,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
|||||||
return np.nanprod(vals)
|
return np.nanprod(vals)
|
||||||
|
|
||||||
def generate_lhs_flrg(self, sample, explain=False):
|
def generate_lhs_flrg(self, sample, explain=False):
|
||||||
|
if not isinstance(sample, (list, np.ndarray)):
|
||||||
|
sample = [sample]
|
||||||
|
|
||||||
nsample = [self.partitioner.fuzzyfy(k, mode="sets", alpha_cut=self.alpha_cut)
|
nsample = [self.partitioner.fuzzyfy(k, mode="sets", alpha_cut=self.alpha_cut)
|
||||||
for k in sample]
|
for k in sample]
|
||||||
|
|
||||||
@ -440,6 +443,8 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
|||||||
|
|
||||||
smooth = kwargs.get("smooth", "none")
|
smooth = kwargs.get("smooth", "none")
|
||||||
|
|
||||||
|
from_distribution = kwargs.get('from_distribution', False)
|
||||||
|
|
||||||
fuzzyfied = kwargs.get('fuzzyfied', False)
|
fuzzyfied = kwargs.get('fuzzyfied', False)
|
||||||
|
|
||||||
l = len(ndata)
|
l = len(ndata)
|
||||||
@ -457,6 +462,10 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
|||||||
for k in np.arange(self.max_lag - 1, l):
|
for k in np.arange(self.max_lag - 1, l):
|
||||||
sample = ndata[k - (self.max_lag - 1): k + 1]
|
sample = ndata[k - (self.max_lag - 1): k + 1]
|
||||||
|
|
||||||
|
if from_distribution:
|
||||||
|
dist = self.forecast_distribution_from_distribution(sample,smooth,uod,_bins)
|
||||||
|
else:
|
||||||
|
|
||||||
if not fuzzyfied:
|
if not fuzzyfied:
|
||||||
flrgs = self.generate_lhs_flrg(sample)
|
flrgs = self.generate_lhs_flrg(sample)
|
||||||
else:
|
else:
|
||||||
@ -532,7 +541,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
|||||||
|
|
||||||
start = kwargs.get('start_at', 0)
|
start = kwargs.get('start_at', 0)
|
||||||
|
|
||||||
fuzzyfied = kwargs.get('fuzzyfied', False)
|
fuzzyfied = kwargs.pop('fuzzyfied')
|
||||||
|
|
||||||
sample = data[start: start + self.max_lag]
|
sample = data[start: start + self.max_lag]
|
||||||
|
|
||||||
@ -541,12 +550,12 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
|||||||
else:
|
else:
|
||||||
ret = []
|
ret = []
|
||||||
for k in sample:
|
for k in sample:
|
||||||
kv = self.partitioner.deffuzyfy(k,mode='both')
|
kv = self.partitioner.defuzzyfy(k, mode='both')
|
||||||
ret.append([kv,kv])
|
ret.append([kv, kv])
|
||||||
|
|
||||||
ret.append(self.forecast_interval(sample, **kwargs)[0])
|
ret.append(self.forecast_interval(sample, **kwargs)[0])
|
||||||
|
|
||||||
for k in np.arange(self.max_lag+1, steps+self.max_lag):
|
for k in np.arange(start + self.max_lag, steps + start + self.max_lag):
|
||||||
|
|
||||||
if len(ret) > 0 and self.__check_interval_bounds(ret[-1]):
|
if len(ret) > 0 and self.__check_interval_bounds(ret[-1]):
|
||||||
ret.append(ret[-1])
|
ret.append(ret[-1])
|
||||||
@ -562,6 +571,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
|||||||
|
|
||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
|
if 'type' in kwargs:
|
||||||
|
kwargs.pop('type')
|
||||||
|
|
||||||
smooth = kwargs.get("smooth", "none")
|
smooth = kwargs.get("smooth", "none")
|
||||||
|
|
||||||
uod = self.get_UoD()
|
uod = self.get_UoD()
|
||||||
@ -575,49 +587,60 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
|||||||
|
|
||||||
start = kwargs.get('start_at', 0)
|
start = kwargs.get('start_at', 0)
|
||||||
|
|
||||||
|
fuzzyfied = kwargs.pop('fuzzyfied')
|
||||||
|
|
||||||
|
if not fuzzyfied:
|
||||||
sample = ndata[start: start + self.max_lag]
|
sample = ndata[start: start + self.max_lag]
|
||||||
|
else:
|
||||||
|
sample = []
|
||||||
|
for k in ndata[start: start + self.max_lag]:
|
||||||
|
kv = self.partitioner.defuzzyfy(k, mode='both')
|
||||||
|
sample.append(kv)
|
||||||
|
|
||||||
for dat in sample:
|
for dat in sample:
|
||||||
if 'type' in kwargs:
|
if not isinstance(dat, ProbabilityDistribution.ProbabilityDistribution):
|
||||||
kwargs.pop('type')
|
|
||||||
tmp = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
|
tmp = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
|
||||||
tmp.set(dat, 1.0)
|
tmp.set(dat, 1.0)
|
||||||
ret.append(tmp)
|
ret.append(tmp)
|
||||||
|
else:
|
||||||
|
ret.append(dat)
|
||||||
|
|
||||||
dist = self.forecast_distribution(sample, bins=_bins, **kwargs)[0]
|
dist = self.forecast_distribution_from_distribution(ret, smooth,uod,_bins,**kwargs)
|
||||||
|
|
||||||
ret.append(dist)
|
ret.append(dist)
|
||||||
|
|
||||||
for k in np.arange(self.max_lag+1, steps+self.max_lag+1):
|
for k in np.arange(start + self.max_lag, steps + start + self.max_lag):
|
||||||
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
|
dist = self.forescast_distribution_from_distribution(ret[k-self.max_lag:], smooth, uod, _bins, **kwargs)
|
||||||
|
ret.append(dist)
|
||||||
|
|
||||||
|
return ret[-steps:]
|
||||||
|
|
||||||
|
def forecast_distribution_from_distribution(self, previous_dist, smooth, uod, bins, **kwargs):
|
||||||
|
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=bins, **kwargs)
|
||||||
|
|
||||||
lags = []
|
lags = []
|
||||||
|
|
||||||
# Find all bins of past distributions with probability greater than zero
|
# Find all bins of past distributions with probability greater than zero
|
||||||
|
|
||||||
for ct, lag in enumerate(self.lags):
|
for ct, lag in enumerate(self.lags):
|
||||||
dd = ret[k - lag]
|
dd = previous_dist[-lag]
|
||||||
vals = [float(v) for v in dd.bins if np.round(dd.density(v), 4) > 0.0]
|
vals = [float(v) for v in dd.bins if np.round(dd.density(v), 4) > 0.0]
|
||||||
lags.append( sorted(vals) )
|
lags.append(sorted(vals))
|
||||||
|
|
||||||
|
|
||||||
# Trace all possible combinations between the bins of past distributions
|
# Trace all possible combinations between the bins of past distributions
|
||||||
|
|
||||||
for path in product(*lags):
|
for path in product(*lags):
|
||||||
|
|
||||||
# get the combined probabilities for this path
|
# get the combined probabilities for this path
|
||||||
pk = np.prod([ret[k - (self.max_lag + lag)].density(path[ct])
|
pk = np.prod([previous_dist[-lag].density(path[ct])
|
||||||
for ct, lag in enumerate(self.lags)])
|
for ct, lag in enumerate(self.lags)])
|
||||||
|
|
||||||
|
|
||||||
d = self.forecast_distribution(path)[0]
|
d = self.forecast_distribution(path)[0]
|
||||||
|
|
||||||
for bin in _bins:
|
for bin in bins:
|
||||||
dist.set(bin, dist.density(bin) + pk * d.density(bin))
|
dist.set(bin, dist.density(bin) + pk * d.density(bin))
|
||||||
|
|
||||||
ret.append(dist)
|
return dist
|
||||||
|
|
||||||
return ret[-steps:]
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
tmp = self.name + ":\n"
|
tmp = self.name + ":\n"
|
||||||
|
@ -185,29 +185,26 @@ class Partitioner(object):
|
|||||||
if not isinstance(values, list):
|
if not isinstance(values, list):
|
||||||
values = [values]
|
values = [values]
|
||||||
|
|
||||||
ret = []
|
|
||||||
for val in values:
|
|
||||||
if mode == 'both':
|
|
||||||
num = []
|
num = []
|
||||||
den = []
|
den = []
|
||||||
for fset, mv in val:
|
for val in values:
|
||||||
|
fset = val[0]
|
||||||
|
mv = val[1]
|
||||||
|
if mode == 'both':
|
||||||
num.append( self.sets[fset].centroid * mv )
|
num.append( self.sets[fset].centroid * mv )
|
||||||
den.append(mv)
|
den.append(mv)
|
||||||
ret.append(np.sum(num)/np.sum(den))
|
elif mode == 'sets':
|
||||||
elif mode == 'both':
|
num.append(self.sets[fset].centroid)
|
||||||
num = np.mean([self.sets[fset].centroid for fset in val ])
|
|
||||||
ret.append(num)
|
|
||||||
elif mode == 'vector':
|
elif mode == 'vector':
|
||||||
num = []
|
|
||||||
den = []
|
|
||||||
for fset, mv in enumerate(val):
|
|
||||||
num.append(self.sets[self.ordered_sets[fset]].centroid * mv)
|
num.append(self.sets[self.ordered_sets[fset]].centroid * mv)
|
||||||
den.append(mv)
|
den.append(mv)
|
||||||
ret.append(np.sum(num) / np.sum(den))
|
|
||||||
else:
|
else:
|
||||||
raise Exception('Unknown deffuzyfication mode')
|
raise Exception('Unknown deffuzyfication mode')
|
||||||
|
|
||||||
return ret
|
if mode in ('both','vector'):
|
||||||
|
return np.sum(num) / np.sum(den)
|
||||||
|
else:
|
||||||
|
return np.mean(num)
|
||||||
|
|
||||||
def check_bounds(self, data):
|
def check_bounds(self, data):
|
||||||
"""
|
"""
|
||||||
|
@ -5,6 +5,19 @@ from pyFTS.common import FuzzySet,SortedCollection,tree
|
|||||||
from pyFTS.probabilistic import kde
|
from pyFTS.probabilistic import kde
|
||||||
|
|
||||||
|
|
||||||
|
def from_point(x,**kwargs):
|
||||||
|
"""
|
||||||
|
Create a probability distribution from a scalar value
|
||||||
|
|
||||||
|
:param x: scalar value
|
||||||
|
:param kwargs: common parameters of the distribution
|
||||||
|
:return: the ProbabilityDistribution object
|
||||||
|
"""
|
||||||
|
tmp = ProbabilityDistribution(**kwargs)
|
||||||
|
tmp.set(x, 1.0)
|
||||||
|
return tmp
|
||||||
|
|
||||||
|
|
||||||
class ProbabilityDistribution(object):
|
class ProbabilityDistribution(object):
|
||||||
"""
|
"""
|
||||||
Represents a discrete or continous probability distribution
|
Represents a discrete or continous probability distribution
|
||||||
|
@ -47,12 +47,28 @@ model = granular.GranularWMVFTS(explanatory_variables=[vhour, vtemp, vload], tar
|
|||||||
|
|
||||||
model.fit(train_mv)
|
model.fit(train_mv)
|
||||||
|
|
||||||
print(model)
|
|
||||||
|
temp_generator = pwfts.ProbabilisticWeightedFTS(partitioner=vtemp.partitioner, order=2)
|
||||||
|
temp_generator.fit(train_mv['temperature'].values)
|
||||||
|
|
||||||
|
#print(model)
|
||||||
|
|
||||||
|
time_generator = lambda x : pd.to_datetime(x) + pd.to_timedelta(1, unit='h')
|
||||||
|
#temp_generator = lambda x : x
|
||||||
|
|
||||||
|
generators = {'time': time_generator, 'temperature': temp_generator}
|
||||||
|
|
||||||
|
#print(model.predict(test_mv.iloc[:10], type='point', steps_ahead=10, generators=generators))
|
||||||
|
#print(model.predict(test_mv.iloc[:10], type='interval', steps_ahead=10, generators=generators))
|
||||||
|
print(model.predict(test_mv.iloc[:10], type='distribution', steps_ahead=10, generators=generators))
|
||||||
|
|
||||||
|
|
||||||
print(model.predict(test_mv.iloc[:10], type='point'))
|
#
|
||||||
print(model.predict(test_mv.iloc[:10], type='interval'))
|
|
||||||
print(model.predict(test_mv.iloc[:10], type='distribution'))
|
#forecasts1 = model.predict(test_mv, type='multivariate')
|
||||||
|
#forecasts2 = model.predict(test, type='multivariate', generators={'date': time_generator},
|
||||||
|
# steps_ahead=200)
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
from pyFTS.data import Enrollments
|
from pyFTS.data import Enrollments
|
||||||
|
Loading…
Reference in New Issue
Block a user