pyFTS/pyFTS/models/pwfts.py
2019-06-23 11:11:18 -03:00

693 lines
23 KiB
Python

#!/usr/bin/python
# -*- coding: utf8 -*-
import numpy as np
import pandas as pd
import math
from operator import itemgetter
from pyFTS.common import FLR, FuzzySet
from pyFTS.models import hofts, ifts
from pyFTS.probabilistic import ProbabilityDistribution
from itertools import product
class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
"""High Order Probabilistic Weighted Fuzzy Logical Relationship Group"""
def __init__(self, order):
super(ProbabilisticWeightedFLRG, self).__init__(order)
self.RHS = {}
self.frequency_count = 0.0
self.Z = None
def get_membership(self, data, sets):
if isinstance(data, (np.ndarray, list, tuple, set)):
return np.nanprod([sets[key].membership(data[count])
for count, key in enumerate(self.LHS, start=0)])
else:
return sets[self.LHS[0]].membership(data)
def append_rhs(self, c, **kwargs):
count = kwargs.get('count', 1.0)
self.frequency_count += count
if c in self.RHS:
self.RHS[c] += count
else:
self.RHS[c] = count
def lhs_conditional_probability(self, x, sets, norm, uod, nbins):
pk = self.frequency_count / norm
tmp = pk * (self.get_membership(x, sets) / self.partition_function(sets, uod, nbins=nbins))
return tmp
def lhs_conditional_probability_fuzzyfied(self, lhs_mv, sets, norm, uod, nbins):
pk = self.frequency_count / norm
tmp = pk * (lhs_mv / self.partition_function(sets, uod, nbins=nbins))
return tmp
def rhs_unconditional_probability(self, c):
return self.RHS[c] / self.frequency_count
def rhs_conditional_probability(self, x, sets, uod, nbins):
total = 0.0
for rhs in self.RHS.keys():
set = sets[rhs]
wi = self.rhs_unconditional_probability(rhs)
mv = set.membership(x) / set.partition_function(uod, nbins=nbins)
total += wi * mv
return total
def partition_function(self, sets, uod, nbins=100):
if self.Z is None:
self.Z = 0.0
for k in np.linspace(uod[0], uod[1], nbins):
for key in self.LHS:
self.Z += sets[key].membership(k)
return self.Z
def get_midpoint(self, sets):
'''Return the expectation of the PWFLRG, the weighted sum'''
if self.midpoint is None:
self.midpoint = np.nansum(np.array([self.rhs_unconditional_probability(s) * sets[s].centroid
for s in self.RHS.keys()]))
return self.midpoint
def get_upper(self, sets):
if self.upper is None:
self.upper = np.nansum(np.array([self.rhs_unconditional_probability(s) * sets[s].upper
for s in self.RHS.keys()]))
return self.upper
def get_lower(self, sets):
if self.lower is None:
self.lower = np.nansum(np.array([self.rhs_unconditional_probability(s) * sets[s].lower
for s in self.RHS.keys()]))
return self.lower
def __str__(self):
tmp2 = ""
for c in sorted(self.RHS.keys()):
if len(tmp2) > 0:
tmp2 = tmp2 + ", "
tmp2 = tmp2 + "(" + str(round(self.RHS[c] / self.frequency_count, 3)) + ")" + c
return self.get_key() + " -> " + tmp2
class ProbabilisticWeightedFTS(ifts.IntervalFTS):
"""High Order Probabilistic Weighted Fuzzy Time Series"""
def __init__(self, **kwargs):
super(ProbabilisticWeightedFTS, self).__init__(**kwargs)
self.shortname = "PWFTS"
self.name = "Probabilistic FTS"
self.detail = "Silva, P.; Guimarães, F.; Sadaei, H."
self.flrgs = {}
self.global_frequency_count = 0
self.has_point_forecasting = True
self.has_interval_forecasting = True
self.has_probability_forecasting = True
self.is_high_order = True
self.min_order = 1
self.auto_update = kwargs.get('update',False)
self.configure_lags(**kwargs)
def train(self, data, **kwargs):
self.configure_lags(**kwargs)
if not kwargs.get('fuzzyfied',False):
self.generate_flrg2(data)
else:
self.generate_flrg_fuzzyfied(data)
def generate_flrg2(self, data):
fuzz = []
l = len(data)
for k in np.arange(0, l):
fuzz.append(self.partitioner.fuzzyfy(data[k], mode='both', method='fuzzy',
alpha_cut=self.alpha_cut))
self.generate_flrg_fuzzyfied(fuzz)
def generate_flrg_fuzzyfied(self, data):
l = len(data)
for k in np.arange(self.max_lag, l):
sample = data[k - self.max_lag: k]
set_sample = []
for instance in sample:
set_sample.append([k for k, v in instance])
flrgs = self.generate_lhs_flrg_fuzzyfied(set_sample)
for flrg in flrgs:
if flrg.get_key() not in self.flrgs:
self.flrgs[flrg.get_key()] = flrg;
lhs_mv = self.pwflrg_lhs_memberhip_fuzzyfied(flrg, sample)
mvs = []
inst = data[k]
for set, mv in inst:
self.flrgs[flrg.get_key()].append_rhs(set, count=lhs_mv * mv)
mvs.append(mv)
tmp_fq = np.nansum([lhs_mv * kk for kk in mvs if kk > 0])
self.global_frequency_count += tmp_fq
def pwflrg_lhs_memberhip_fuzzyfied(self, flrg, sample):
vals = []
for ct in range(len(flrg.LHS)): # fuzz in enumerate(sample):
vals.append([mv for fset, mv in sample[ct] if fset == flrg.LHS[ct]])
return np.nanprod(vals)
def generate_lhs_flrg(self, sample, explain=False):
if not isinstance(sample, (list, np.ndarray)):
sample = [sample]
nsample = [self.partitioner.fuzzyfy(k, mode="sets", alpha_cut=self.alpha_cut)
for k in sample]
return self.generate_lhs_flrg_fuzzyfied(nsample, explain)
def generate_lhs_flrg_fuzzyfied(self, sample, explain=False):
lags = []
flrgs = []
for ct, o in enumerate(self.lags):
lhs = sample[o - 1]
lags.append( lhs )
if explain:
print("\t (Lag {}) {} -> {} \n".format(o, sample[o-1], lhs))
# Trace the possible paths
for path in product(*lags):
flrg = ProbabilisticWeightedFLRG(self.order)
for lhs in path:
flrg.append_lhs(lhs)
flrgs.append(flrg)
return flrgs
def generate_flrg(self, data):
l = len(data)
for k in np.arange(self.max_lag, l):
if self.dump: print("FLR: " + str(k))
sample = data[k - self.max_lag: k]
flrgs = self.generate_lhs_flrg(sample)
for flrg in flrgs:
lhs_mv = flrg.get_membership(sample, self.partitioner.sets)
if flrg.get_key() not in self.flrgs:
self.flrgs[flrg.get_key()] = flrg;
fuzzyfied = self.partitioner.fuzzyfy(data[k], mode='both', method='fuzzy',
alpha_cut=self.alpha_cut)
mvs = []
for set, mv in fuzzyfied:
self.flrgs[flrg.get_key()].append_rhs(set, count=lhs_mv * mv)
mvs.append(mv)
tmp_fq = np.nansum([lhs_mv*kk for kk in mvs if kk > 0])
self.global_frequency_count += tmp_fq
def update_model(self,data):
pass
def add_new_PWFLGR(self, flrg):
if flrg.get_key() not in self.flrgs:
tmp = ProbabilisticWeightedFLRG(self.order)
for fs in flrg.LHS: tmp.append_lhs(fs)
tmp.append_rhs(flrg.LHS[-1])
self.flrgs[tmp.get_key()] = tmp;
self.global_frequency_count += 1
def flrg_lhs_unconditional_probability(self, flrg):
if flrg.get_key() in self.flrgs:
return self.flrgs[flrg.get_key()].frequency_count / self.global_frequency_count
else:
return 0.0
#self.add_new_PWFLGR(flrg)
#return self.flrg_lhs_unconditional_probability(flrg)
def flrg_lhs_conditional_probability(self, x, flrg):
mv = flrg.get_membership(x, self.partitioner.sets)
pb = self.flrg_lhs_unconditional_probability(flrg)
return mv * pb
def flrg_lhs_conditional_probability_fuzzyfied(self, x, flrg):
mv = self.pwflrg_lhs_memberhip_fuzzyfied(flrg, x)
pb = self.flrg_lhs_unconditional_probability(flrg)
return mv * pb
def get_midpoint(self, flrg):
if flrg.get_key() in self.flrgs:
tmp = self.flrgs[flrg.get_key()]
ret = tmp.get_midpoint(self.partitioner.sets) #sum(np.array([tmp.rhs_unconditional_probability(s) * self.setsDict[s].centroid for s in tmp.RHS]))
else:
if len(flrg.LHS) > 0:
pi = 1 / len(flrg.LHS)
ret = np.nansum(np.array([pi * self.partitioner.sets[s].centroid for s in flrg.LHS]))
else:
ret = np.nan
return ret
def flrg_rhs_conditional_probability(self, x, flrg):
if flrg.get_key() in self.flrgs:
_flrg = self.flrgs[flrg.get_key()]
cond = []
for s in _flrg.RHS.keys():
_set = self.partitioner.sets[s]
tmp = _flrg.rhs_unconditional_probability(s) * (_set.membership(x) / _set.partition_function(uod=self.get_UoD()))
cond.append(tmp)
ret = np.nansum(np.array(cond))
else:
pi = 1 / len(flrg.LHS)
ret = np.nansum(np.array([pi * self.partitioner.sets[s].membership(x) for s in flrg.LHS]))
return ret
def get_upper(self, flrg):
if flrg.get_key() in self.flrgs:
tmp = self.flrgs[flrg.get_key()]
ret = tmp.get_upper(self.partitioner.sets)
else:
ret = 0
return ret
def get_lower(self, flrg):
if flrg.get_key() in self.flrgs:
tmp = self.flrgs[flrg.get_key()]
ret = tmp.get_lower(self.partitioner.sets)
else:
ret = 0
return ret
def forecast(self, data, **kwargs):
method = kwargs.get('method','heuristic')
l = len(data)+1
ret = []
for k in np.arange(self.max_lag, l):
sample = data[k - self.max_lag: k]
if method == 'heuristic':
ret.append(self.point_heuristic(sample, **kwargs))
elif method == 'expected_value':
ret.append(self.point_expected_value(sample, **kwargs))
else:
raise ValueError("Unknown point forecasting method!")
if self.auto_update and k > self.order+1: self.update_model(data[k - self.order - 1 : k])
return ret
def point_heuristic(self, sample, **kwargs):
explain = kwargs.get('explain', False)
fuzzyfied = kwargs.get('fuzzyfied', False)
if explain:
print("Fuzzyfication \n")
if not fuzzyfied:
flrgs = self.generate_lhs_flrg(sample, explain)
else:
fsets = self.get_sets_from_both_fuzzyfication(sample)
flrgs = self.generate_lhs_flrg_fuzzyfied(fsets, explain)
mp = []
norms = []
if explain:
print("Rules:\n")
for flrg in flrgs:
if not fuzzyfied:
norm = self.flrg_lhs_conditional_probability(sample, flrg)
else:
norm = self.flrg_lhs_conditional_probability_fuzzyfied(sample, flrg)
if norm == 0:
norm = self.flrg_lhs_unconditional_probability(flrg)
if explain:
print("\t {} \t Midpoint: {}\t Norm: {}\n".format(str(self.flrgs[flrg.get_key()]),
self.get_midpoint(flrg), norm))
mp.append(norm * self.get_midpoint(flrg))
norms.append(norm)
norm = np.nansum(norms)
final = np.nansum(mp) / norm if norm != 0 else 0
if explain:
print("Deffuzyfied value: {} \n".format(final))
return final
def get_sets_from_both_fuzzyfication(self, sample):
return [[k for k, v in inst] for inst in sample]
def point_expected_value(self, sample, **kwargs):
explain = kwargs.get('explain', False)
dist = self.forecast_distribution(sample, **kwargs)[0]
final = dist.expected_value()
return final
def forecast_interval(self, ndata, **kwargs):
method = kwargs.get('method','heuristic')
alpha = kwargs.get('alpha', 0.05)
l = len(ndata)
ret = []
for k in np.arange(self.max_lag - 1, l):
sample = ndata[k - (self.max_lag - 1): k + 1]
if method == 'heuristic':
ret.append(self.interval_heuristic(sample, **kwargs))
elif method == 'quantile':
ret.append(self.interval_quantile(sample, alpha, **kwargs))
else:
raise ValueError("Unknown interval forecasting method!")
return ret
def interval_quantile(self, ndata, alpha, **kwargs):
dist = self.forecast_distribution(ndata, **kwargs)
itvl = dist[0].quantile([alpha, 1.0 - alpha])
return itvl
def interval_heuristic(self, sample, **kwargs):
fuzzyfied = kwargs.get('fuzzyfied', False)
if not fuzzyfied:
flrgs = self.generate_lhs_flrg(sample)
else:
fsets = self.get_sets_from_both_fuzzyfication(sample)
flrgs = self.generate_lhs_flrg_fuzzyfied(fsets)
up = []
lo = []
norms = []
for flrg in flrgs:
if not fuzzyfied:
norm = self.flrg_lhs_conditional_probability(sample, flrg)
else:
norm = self.flrg_lhs_conditional_probability_fuzzyfied(sample, flrg)
if norm == 0:
norm = self.flrg_lhs_unconditional_probability(flrg)
up.append(norm * self.get_upper(flrg))
lo.append(norm * self.get_lower(flrg))
norms.append(norm)
# gerar o intervalo
norm = np.nansum(norms)
if norm == 0:
return [0, 0]
else:
lo_ = np.nansum(lo) / norm
up_ = np.nansum(up) / norm
return [lo_, up_]
def forecast_distribution(self, ndata, **kwargs):
smooth = kwargs.get("smooth", "none")
from_distribution = kwargs.get('from_distribution', False)
fuzzyfied = kwargs.get('fuzzyfied', False)
l = len(ndata)
uod = self.get_UoD()
if 'bins' in kwargs:
_bins = kwargs.pop('bins')
nbins = len(_bins)
else:
nbins = kwargs.get("num_bins", 100)
_bins = np.linspace(uod[0], uod[1], nbins)
ret = []
for k in np.arange(self.max_lag - 1, l):
sample = ndata[k - (self.max_lag - 1): k + 1]
if from_distribution:
dist = self.forecast_distribution_from_distribution(sample,smooth,uod,_bins)
else:
if not fuzzyfied:
flrgs = self.generate_lhs_flrg(sample)
else:
fsets = self.get_sets_from_both_fuzzyfication(sample)
flrgs = self.generate_lhs_flrg_fuzzyfied(fsets)
if 'type' in kwargs:
kwargs.pop('type')
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
for bin in _bins:
num = []
den = []
for s in flrgs:
if s.get_key() in self.flrgs:
flrg = self.flrgs[s.get_key()]
wi = flrg.rhs_conditional_probability(bin, self.partitioner.sets, uod, nbins)
if not fuzzyfied:
pk = flrg.lhs_conditional_probability(sample, self.partitioner.sets, self.global_frequency_count, uod, nbins)
else:
lhs_mv = self.pwflrg_lhs_memberhip_fuzzyfied(flrg, sample)
pk = flrg.lhs_conditional_probability_fuzzyfied(lhs_mv, self.partitioner.sets,
self.global_frequency_count, uod, nbins)
num.append(wi * pk)
den.append(pk)
else:
num.append(0.0)
den.append(0.000000001)
pf = np.nansum(num) / np.nansum(den)
dist.set(bin, pf)
ret.append(dist)
return ret
def __check_point_bounds(self, point):
lower_set = self.partitioner.lower_set()
upper_set = self.partitioner.upper_set()
return point <= lower_set.lower or point >= upper_set.upper
def forecast_ahead(self, data, steps, **kwargs):
l = len(data)
fuzzyfied = kwargs.get('fuzzyfied', False)
start = kwargs.get('start_at', 0)
ret = data[start: start+self.max_lag].tolist()
for k in np.arange(self.max_lag, steps+self.max_lag):
if self.__check_point_bounds(ret[-1]) and not fuzzyfied:
ret.append(ret[-1])
else:
mp = self.forecast(ret[k - self.max_lag: k], **kwargs)
ret.append(mp[0])
return ret[-steps:]
def __check_interval_bounds(self, interval):
if len(self.transformations) > 0:
lower_set = self.partitioner.lower_set()
upper_set = self.partitioner.upper_set()
return interval[0] <= lower_set.lower and interval[1] >= upper_set.upper
elif len(self.transformations) == 0:
return interval[0] <= self.original_min and interval[1] >= self.original_max
def forecast_ahead_interval(self, data, steps, **kwargs):
start = kwargs.get('start_at', 0)
if 'fuzzyfied' in kwargs:
fuzzyfied = kwargs.pop('fuzzyfied')
else:
fuzzyfied = False
sample = data[start: start + self.max_lag]
if not fuzzyfied:
ret = [[k, k] for k in sample]
else:
ret = []
for k in sample:
kv = self.partitioner.defuzzyfy(k, mode='both')
ret.append([kv, kv])
ret.append(self.forecast_interval(sample, **kwargs)[0])
for k in np.arange(start + self.max_lag, steps + start + self.max_lag):
if len(ret) > 0 and self.__check_interval_bounds(ret[-1]):
ret.append(ret[-1])
else:
lower = self.forecast_interval([ret[x][0] for x in np.arange(k - self.max_lag, k)], **kwargs)
upper = self.forecast_interval([ret[x][1] for x in np.arange(k - self.max_lag, k)], **kwargs)
ret.append([np.min(lower), np.max(upper)])
return ret[-steps:]
def forecast_ahead_distribution(self, ndata, steps, **kwargs):
ret = []
if 'type' in kwargs:
kwargs.pop('type')
smooth = kwargs.get("smooth", "none")
uod = self.get_UoD()
if 'bins' in kwargs:
_bins = kwargs.pop('bins')
nbins = len(_bins)
else:
nbins = kwargs.get("num_bins", 100)
_bins = np.linspace(uod[0], uod[1], nbins)
start = kwargs.get('start_at', 0)
if 'fuzzyfied' in kwargs:
fuzzyfied = kwargs.pop('fuzzyfied')
else:
fuzzyfied = False
if not fuzzyfied:
sample = ndata[start: start + self.max_lag]
else:
sample = []
for k in ndata[start: start + self.max_lag]:
kv = self.partitioner.defuzzyfy(k, mode='both')
sample.append(kv)
for dat in sample:
if not isinstance(dat, ProbabilityDistribution.ProbabilityDistribution):
tmp = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs)
tmp.set(dat, 1.0)
ret.append(tmp)
else:
ret.append(dat)
dist = self.forecast_distribution_from_distribution(ret, smooth,uod,_bins,**kwargs)
ret.append(dist)
for k in np.arange(start + self.max_lag, steps + start + self.max_lag):
dist = self.forescast_distribution_from_distribution(ret[k-self.max_lag:], smooth, uod, _bins, **kwargs)
ret.append(dist)
return ret[-steps:]
def forecast_distribution_from_distribution(self, previous_dist, smooth, uod, bins, **kwargs):
dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=bins, **kwargs)
lags = []
# Find all bins of past distributions with probability greater than zero
for ct, lag in enumerate(self.lags):
dd = previous_dist[-lag]
vals = [float(v) for v in dd.bins if np.round(dd.density(v), 4) > 0.0]
lags.append(sorted(vals))
# Trace all possible combinations between the bins of past distributions
for path in product(*lags):
# get the combined probabilities for this path
pk = np.prod([previous_dist[-lag].density(path[ct])
for ct, lag in enumerate(self.lags)])
d = self.forecast_distribution(path)[0]
for bin in bins:
dist.set(bin, dist.density(bin) + pk * d.density(bin))
return dist
def __str__(self):
tmp = self.name + ":\n"
for r in sorted(self.flrgs.keys()):
p = round(self.flrgs[r].frequency_count / self.global_frequency_count, 3)
tmp = tmp + "(" + str(p) + ") " + str(self.flrgs[r]) + "\n"
return tmp
def visualize_distributions(model, **kwargs):
import matplotlib.pyplot as plt
from matplotlib import gridspec
import seaborn as sns
ordered_sets = model.partitioner.ordered_sets
ftpg_keys = sorted(model.flrgs.keys(), key=lambda x: model.flrgs[x].get_midpoint(model.sets))
lhs_probs = [model.flrg_lhs_unconditional_probability(model.flrgs[k])
for k in ftpg_keys]
mat = np.zeros((len(ftpg_keys), len(ordered_sets)))
for row, w in enumerate(ftpg_keys):
for col, k in enumerate(ordered_sets):
if k in model.flrgs[w].RHS:
mat[row, col] = model.flrgs[w].rhs_unconditional_probability(k)
size = kwargs.get('size', (5,10))
fig = plt.figure(figsize=size)
gs = gridspec.GridSpec(1, 2, width_ratios=[1, 4])
ax1 = plt.subplot(gs[0])
sns.barplot(x='y', y='x', color='darkblue', data={'x': ftpg_keys, 'y': lhs_probs}, ax=ax1)
ax1.set_ylabel("LHS Probabilities")
ind_sets = range(len(ordered_sets))
ax = plt.subplot(gs[1])
sns.heatmap(mat, cmap='Blues', ax=ax, yticklabels=False)
ax.set_title("RHS probabilities")
ax.set_xticks(ind_sets)
ax.set_xticklabels(ordered_sets)
ax.grid(True)
ax.xaxis.set_tick_params(rotation=90)