Source code for pyFTS.models.pwfts

#!/usr/bin/python
# -*- coding: utf8 -*-

import numpy as np
import pandas as pd
import math
from operator import itemgetter
from pyFTS.common import FLR, FuzzySet, tree
from pyFTS.models import hofts, ifts
from pyFTS.probabilistic import ProbabilityDistribution


[docs]class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG): """High Order Probabilistic Weighted Fuzzy Logical Relationship Group""" def __init__(self, order): super(ProbabilisticWeightedFLRG, self).__init__(order) self.RHS = {} self.rhs_count = {} self.frequency_count = 0.0 self.Z = None
[docs] def get_membership(self, data, sets): if isinstance(data, (np.ndarray, list)): return np.nanprod([sets[key].membership(data[count]) for count, key in enumerate(self.LHS, start=0)]) else: return sets[self.LHS[0]].membership(data)
[docs] def append_rhs(self, c, **kwargs): mv = kwargs.get('mv', 1.0) self.frequency_count += mv if c in self.RHS: self.rhs_count[c] += mv else: self.RHS[c] = c self.rhs_count[c] = mv
[docs] def lhs_conditional_probability(self, x, sets, norm, uod, nbins): pk = self.frequency_count / norm tmp = pk * (self.get_membership(x, sets) / self.partition_function(sets, uod, nbins=nbins)) return tmp
[docs] def rhs_unconditional_probability(self, c): return self.rhs_count[c] / self.frequency_count
[docs] def rhs_conditional_probability(self, x, sets, uod, nbins): total = 0.0 for rhs in self.RHS: set = sets[rhs] wi = self.rhs_unconditional_probability(rhs) mv = set.membership(x) / set.partition_function(uod, nbins=nbins) total += wi * mv return total
[docs] def partition_function(self, sets, uod, nbins=100): if self.Z is None: self.Z = 0.0 for k in np.linspace(uod[0], uod[1], nbins): for key in self.LHS: self.Z += sets[key].membership(k) return self.Z
[docs] def get_midpoint(self, sets): '''Return the expectation of the PWFLRG, the weighted sum''' if self.midpoint is None: self.midpoint = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].centroid for s in self.RHS])) return self.midpoint
[docs] def get_upper(self, sets): if self.upper is None: self.upper = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].upper for s in self.RHS])) return self.upper
[docs] def get_lower(self, sets): if self.lower is None: self.lower = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].lower for s in self.RHS])) return self.lower
def __str__(self): tmp2 = "" for c in sorted(self.RHS): if len(tmp2) > 0: tmp2 = tmp2 + ", " tmp2 = tmp2 + "(" + str(round(self.rhs_count[c] / self.frequency_count, 3)) + ")" + c return self.get_key() + " -> " + tmp2
[docs]class ProbabilisticWeightedFTS(ifts.IntervalFTS): """High Order Probabilistic Weighted Fuzzy Time Series""" def __init__(self, **kwargs): super(ProbabilisticWeightedFTS, self).__init__(**kwargs) self.shortname = "PWFTS" self.name = "Probabilistic FTS" self.detail = "Silva, P.; GuimarĂ£es, F.; Sadaei, H." self.flrgs = {} self.global_frequency_count = 0 self.has_point_forecasting = True self.has_interval_forecasting = True self.has_probability_forecasting = True self.is_high_order = True self.min_order = 1 self.auto_update = kwargs.get('update',False) self.configure_lags(**kwargs)
[docs] def train(self, data, **kwargs): self.configure_lags(**kwargs) parameters = kwargs.get('parameters','fuzzy') if parameters == 'monotonic': tmpdata = FuzzySet.fuzzyfy_series_old(data, self.sets) flrs = FLR.generate_recurrent_flrs(tmpdata) self.generateFLRG(flrs) else: self.generate_flrg(data)
[docs] def generate_lhs_flrg(self, sample, explain=False): lags = {} flrgs = [] for ct, o in enumerate(self.lags): lhs = FuzzySet.fuzzyfy(sample[o - 1], partitioner=self.partitioner, mode="sets", alpha_cut=self.alpha_cut) lags[ct] = lhs if explain: print("\t (Lag {}) {} -> {} \n".format(o, sample[o-1], lhs)) root = tree.FLRGTreeNode(None) tree.build_tree_without_order(root, lags, 0) # Trace the possible paths for p in root.paths(): flrg = ProbabilisticWeightedFLRG(self.order) path = list(reversed(list(filter(None.__ne__, p)))) for lhs in path: flrg.append_lhs(lhs) flrgs.append(flrg) return flrgs
[docs] def generate_flrg(self, data): l = len(data) for k in np.arange(self.max_lag, l): if self.dump: print("FLR: " + str(k)) sample = data[k - self.max_lag: k] flrgs = self.generate_lhs_flrg(sample) for flrg in flrgs: lhs_mv = flrg.get_membership(sample, self.sets) if flrg.get_key() not in self.flrgs: self.flrgs[flrg.get_key()] = flrg; fuzzyfied = [(s, self.sets[s].membership(data[k])) for s in self.sets.keys() if self.sets[s].membership(data[k]) > self.alpha_cut] mvs = [] for set, mv in fuzzyfied: self.flrgs[flrg.get_key()].append_rhs(set, mv=lhs_mv * mv) mvs.append(mv) tmp_fq = sum([lhs_mv*kk for kk in mvs if kk > 0]) self.global_frequency_count += tmp_fq
[docs] def update_model(self,data): pass
[docs] def add_new_PWFLGR(self, flrg): if flrg.get_key() not in self.flrgs: tmp = ProbabilisticWeightedFLRG(self.order) for fs in flrg.LHS: tmp.append_lhs(fs) tmp.append_rhs(flrg.LHS[-1]) self.flrgs[tmp.get_key()] = tmp; self.global_frequency_count += 1
[docs] def flrg_lhs_unconditional_probability(self, flrg): if flrg.get_key() in self.flrgs: return self.flrgs[flrg.get_key()].frequency_count / self.global_frequency_count else: return 0.0
#self.add_new_PWFLGR(flrg) #return self.flrg_lhs_unconditional_probability(flrg)
[docs] def flrg_lhs_conditional_probability(self, x, flrg): mv = flrg.get_membership(x, self.sets) pb = self.flrg_lhs_unconditional_probability(flrg) return mv * pb
[docs] def get_midpoint(self, flrg): if flrg.get_key() in self.flrgs: tmp = self.flrgs[flrg.get_key()] ret = tmp.get_midpoint(self.sets) #sum(np.array([tmp.rhs_unconditional_probability(s) * self.setsDict[s].centroid for s in tmp.RHS])) else: if len(flrg.LHS) > 0: pi = 1 / len(flrg.LHS) ret = sum(np.array([pi * self.sets[s].centroid for s in flrg.LHS])) else: ret = np.nan return ret
[docs] def flrg_rhs_conditional_probability(self, x, flrg): if flrg.get_key() in self.flrgs: _flrg = self.flrgs[flrg.get_key()] cond = [] for s in _flrg.RHS.keys(): _set = self.sets[s] tmp = _flrg.rhs_unconditional_probability(s) * (_set.membership(x) / _set.partition_function(uod=self.get_UoD())) cond.append(tmp) ret = sum(np.array(cond)) else: pi = 1 / len(flrg.LHS) ret = sum(np.array([pi * self.sets[s].membership(x) for s in flrg.LHS])) return ret
[docs] def get_upper(self, flrg): if flrg.get_key() in self.flrgs: tmp = self.flrgs[flrg.get_key()] ret = tmp.get_upper(self.sets) else: ret = 0 return ret
[docs] def get_lower(self, flrg): if flrg.get_key() in self.flrgs: tmp = self.flrgs[flrg.get_key()] ret = tmp.get_lower(self.sets) else: ret = 0 return ret
[docs] def forecast(self, data, **kwargs): method = kwargs.get('method','heuristic') l = len(data) ret = [] for k in np.arange(self.max_lag - 1, l): sample = data[k - (self.max_lag - 1): k + 1] if method == 'heuristic': ret.append(self.point_heuristic(sample, **kwargs)) elif method == 'expected_value': ret.append(self.point_expected_value(sample, **kwargs)) else: raise ValueError("Unknown point forecasting method!") if self.auto_update and k > self.order+1: self.update_model(data[k - self.order - 1 : k]) return ret
[docs] def point_heuristic(self, sample, **kwargs): explain = kwargs.get('explain', False) if explain: print("Fuzzyfication \n") flrgs = self.generate_lhs_flrg(sample, explain) mp = [] norms = [] if explain: print("Rules:\n") for flrg in flrgs: norm = self.flrg_lhs_conditional_probability(sample, flrg) if norm == 0: norm = self.flrg_lhs_unconditional_probability(flrg) if explain: print("\t {} \t Midpoint: {}\t Norm: {}\n".format(str(self.flrgs[flrg.get_key()]), self.get_midpoint(flrg), norm)) mp.append(norm * self.get_midpoint(flrg)) norms.append(norm) norm = sum(norms) final = sum(mp) / norm if norm != 0 else 0 if explain: print("Deffuzyfied value: {} \n".format(final)) return final
[docs] def point_expected_value(self, sample, **kwargs): explain = kwargs.get('explain', False) dist = self.forecast_distribution(sample)[0] final = dist.expected_value() return final
[docs] def forecast_interval(self, ndata, **kwargs): method = kwargs.get('method','heuristic') alpha = kwargs.get('alpha', 0.05) l = len(ndata) ret = [] for k in np.arange(self.max_lag - 1, l): sample = ndata[k - (self.max_lag - 1): k + 1] if method == 'heuristic': ret.append(self.interval_heuristic(sample)) elif method == 'quantile': ret.append(self.interval_quantile(sample, alpha)) else: raise ValueError("Unknown interval forecasting method!") return ret
[docs] def interval_quantile(self, ndata, alpha): dist = self.forecast_distribution(ndata) itvl = dist[0].quantile([alpha, 1.0 - alpha]) return itvl
[docs] def interval_heuristic(self, sample): flrgs = self.generate_lhs_flrg(sample) up = [] lo = [] norms = [] for flrg in flrgs: norm = self.flrg_lhs_conditional_probability(sample, flrg) if norm == 0: norm = self.flrg_lhs_unconditional_probability(flrg) up.append(norm * self.get_upper(flrg)) lo.append(norm * self.get_lower(flrg)) norms.append(norm) # gerar o intervalo norm = sum(norms) if norm == 0: return [0, 0] else: lo_ = sum(lo) / norm up_ = sum(up) / norm return [lo_, up_]
[docs] def forecast_distribution(self, ndata, **kwargs): smooth = kwargs.get("smooth", "none") l = len(ndata) uod = self.get_UoD() if 'bins' in kwargs: _bins = kwargs.pop('bins') nbins = len(_bins) else: nbins = kwargs.get("num_bins", 100) _bins = np.linspace(uod[0], uod[1], nbins) ret = [] for k in np.arange(self.max_lag - 1, l): sample = ndata[k - (self.max_lag - 1): k + 1] flrgs = self.generate_lhs_flrg(sample) if 'type' in kwargs: kwargs.pop('type') dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs) for bin in _bins: num = [] den = [] for s in flrgs: if s.get_key() in self.flrgs: flrg = self.flrgs[s.get_key()] pk = flrg.lhs_conditional_probability(sample, self.sets, self.global_frequency_count, uod, nbins) wi = flrg.rhs_conditional_probability(bin, self.sets, uod, nbins) num.append(wi * pk) den.append(pk) else: num.append(0.0) den.append(0.000000001) pf = sum(num) / sum(den) dist.set(bin, pf) ret.append(dist) return ret
def __check_point_bounds(self, point): lower_set = self.partitioner.lower_set() upper_set = self.partitioner.upper_set() return point <= lower_set.lower or point >= upper_set.upper
[docs] def forecast_ahead(self, data, steps, **kwargs): l = len(data) start = kwargs.get('start', self.max_lag) ret = data[start - self.max_lag: start].tolist() for k in np.arange(self.max_lag, steps+self.max_lag): if self.__check_point_bounds(ret[-1]) : ret.append(ret[-1]) else: mp = self.forecast(ret[k - self.max_lag: k], **kwargs) ret.append(mp[0]) return ret[self.max_lag:]
def __check_interval_bounds(self, interval): if len(self.transformations) > 0: lower_set = self.partitioner.lower_set() upper_set = self.partitioner.upper_set() return interval[0] <= lower_set.lower and interval[1] >= upper_set.upper elif len(self.transformations) == 0: return interval[0] <= self.original_min and interval[1] >= self.original_max
[docs] def forecast_ahead_interval(self, data, steps, **kwargs): l = len(data) start = kwargs.get('start', self.max_lag) sample = data[start - self.max_lag: start] ret = [[k, k] for k in sample] ret.append(self.forecast_interval(sample)[0]) for k in np.arange(self.max_lag+1, steps+self.max_lag): if len(ret) > 0 and self.__check_interval_bounds(ret[-1]): ret.append(ret[-1]) else: lower = self.forecast_interval([ret[x][0] for x in np.arange(k - self.max_lag, k)], **kwargs) upper = self.forecast_interval([ret[x][1] for x in np.arange(k - self.max_lag, k)], **kwargs) ret.append([np.min(lower), np.max(upper)]) return ret[self.order:]
[docs] def forecast_ahead_distribution(self, ndata, steps, **kwargs): ret = [] smooth = kwargs.get("smooth", "none") uod = self.get_UoD() if 'bins' in kwargs: _bins = kwargs.pop('bins') nbins = len(_bins) else: nbins = kwargs.get("num_bins", 100) _bins = np.linspace(uod[0], uod[1], nbins) start = kwargs.get('start', self.max_lag) sample = ndata[start - self.max_lag: start] for dat in sample: if 'type' in kwargs: kwargs.pop('type') tmp = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs) tmp.set(dat, 1.0) ret.append(tmp) dist = self.forecast_distribution(sample, bins=_bins)[0] ret.append(dist) for k in np.arange(self.max_lag+1, steps+self.max_lag+1): dist = ProbabilityDistribution.ProbabilityDistribution(smooth, uod=uod, bins=_bins, **kwargs) lags = {} # Find all bins of past distributions with probability greater than zero for ct, d in enumerate(self.lags): dd = ret[k - d] vals = [float(v) for v in dd.bins if round(dd.density(v), 4) > 0] lags[ct] = sorted(vals) root = tree.FLRGTreeNode(None) tree.build_tree_without_order(root, lags, 0) # Trace all possible combinations between the bins of past distributions for p in root.paths(): path = list(reversed(list(filter(None.__ne__, p)))) # get the combined probabilities for this path pk = np.prod([ret[k - self.max_lag + o].density(path[ct]) for ct, o in enumerate(self.lags)]) d = self.forecast_distribution(path)[0] for bin in _bins: dist.set(bin, dist.density(bin) + pk * d.density(bin)) ret.append(dist) return ret[self.order:]
def __str__(self): tmp = self.name + ":\n" for r in sorted(self.flrgs): p = round(self.flrgs[r].frequency_count / self.global_frequency_count, 3) tmp = tmp + "(" + str(p) + ") " + str(self.flrgs[r]) + "\n" return tmp
[docs]def visualize_distributions(model, **kwargs): import matplotlib.pyplot as plt from matplotlib import gridspec import seaborn as sns ordered_sets = model.partitioner.ordered_sets ftpg_keys = sorted(model.flrgs.keys(), key=lambda x: model.flrgs[x].get_midpoint(model.sets)) lhs_probs = [model.flrg_lhs_unconditional_probability(model.flrgs[k]) for k in ftpg_keys] mat = np.zeros((len(ftpg_keys), len(ordered_sets))) for row, w in enumerate(ftpg_keys): for col, k in enumerate(ordered_sets): if k in model.flrgs[w].RHS: mat[row, col] = model.flrgs[w].rhs_unconditional_probability(k) size = kwargs.get('size', (5,10)) fig = plt.figure(figsize=size) gs = gridspec.GridSpec(1, 2, width_ratios=[1, 4]) ax1 = plt.subplot(gs[0]) sns.barplot(x='y', y='x', color='darkblue', data={'x': ftpg_keys, 'y': lhs_probs}, ax=ax1) ax1.set_ylabel("LHS Probabilities") ind_sets = range(len(ordered_sets)) ax = plt.subplot(gs[1]) sns.heatmap(mat, cmap='Blues', ax=ax, yticklabels=False) ax.set_title("RHS probabilities") ax.set_xticks(ind_sets) ax.set_xticklabels(ordered_sets) ax.grid(True) ax.xaxis.set_tick_params(rotation=90)