- new sliding window benchmarks

- statsmodels ARIMA wrapper for benchmarks
 - method refactoring at PWFTS
 - auto_update at PWFTS
 - method refactoring at ResidualAnalysis
This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-03-03 08:53:55 -03:00
parent 7f13a24402
commit 5c8c80cd8d
11 changed files with 881 additions and 739 deletions

View File

@ -2,6 +2,7 @@
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from pyFTS.common import FuzzySet,SortedCollection
# Autocorrelation function estimative # Autocorrelation function estimative
@ -32,7 +33,6 @@ def mape(targets, forecasts):
def smape(targets, forecasts, type=2): def smape(targets, forecasts, type=2):
return mape(targets, forecasts)
if type == 1: if type == 1:
return np.mean(np.abs(forecasts - targets) / ((forecasts + targets)/2)) return np.mean(np.abs(forecasts - targets) / ((forecasts + targets)/2))
elif type == 2: elif type == 2:
@ -52,10 +52,8 @@ def UStatistic(targets, forecasts):
naive = [] naive = []
y = [] y = []
for k in np.arange(0,l-1): for k in np.arange(0,l-1):
#y.append((forecasts[k ] - targets[k ]) ** 2) y.append((forecasts[k ] - targets[k ]) ** 2)
y.append(((forecasts[k + 1] - targets[k + 1]) / targets[k]) ** 2) naive.append((targets[k + 1] - targets[k]) ** 2)
#naive.append((targets[k + 1] - targets[k]) ** 2)
naive.append(((targets[k + 1] - targets[k]) / targets[k]) ** 2)
return np.sqrt(sum(y) / sum(naive)) return np.sqrt(sum(y) / sum(naive))
@ -111,3 +109,39 @@ def coverage(targets, forecasts):
else: else:
preds.append(0) preds.append(0)
return np.mean(preds) return np.mean(preds)
def pmf_to_cdf(density):
ret = []
for row in density.index:
tmp = []
prev = 0
for col in density.columns:
prev += density[col][row]
tmp.append( prev )
ret.append(tmp)
df = pd.DataFrame(ret, columns=density.columns)
return df
def heavyside_cdf(bins, targets):
ret = []
for t in targets:
result = [1 if b >= t else 0 for b in bins]
ret.append(result)
df = pd.DataFrame(ret, columns=bins)
return df
# Continuous Ranked Probability Score
def crps(targets, densities):
l = len(densities.columns)
n = len(densities.index)
Ff = pmf_to_cdf(densities)
Fa = heavyside_cdf(densities.columns, targets)
_crps = float(0.0)
for k in densities.index:
_crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns])
return _crps / float(l * n)

View File

@ -38,6 +38,12 @@ class ProbabilityDistribution(object):
ret.append(self.distribution[v] / self.count) ret.append(self.distribution[v] / self.count)
return ret return ret
def cummulative(self, values):
pass
def quantile(self, qt):
pass
def entropy(self): def entropy(self):
h = -sum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0 h = -sum([self.distribution[k] * np.log(self.distribution[k]) if self.distribution[k] > 0 else 0
for k in self.bins]) for k in self.bins])

View File

@ -71,7 +71,7 @@ def plotResiduals(targets, models, tam=[8, 8], save=False, file=None):
Util.showAndSaveImage(fig, file, save) Util.showAndSaveImage(fig, file, save)
def plotResiduals2(targets, models, tam=[8, 8], save=False, file=None): def plot_residuals(targets, models, tam=[8, 8], save=False, file=None):
fig, axes = plt.subplots(nrows=len(models), ncols=3, figsize=tam) fig, axes = plt.subplots(nrows=len(models), ncols=3, figsize=tam)
for c, mfts in enumerate(models, start=0): for c, mfts in enumerate(models, start=0):

52
benchmarks/arima.py Normal file
View File

@ -0,0 +1,52 @@
#!/usr/bin/python
# -*- coding: utf8 -*-
import numpy as np
from statsmodels.tsa.arima_model import ARIMA as stats_arima
from pyFTS import fts
class ARIMA(fts.FTS):
def __init__(self, name):
super(ARIMA, self).__init__(1, "ARIMA")
self.name = "ARIMA"
self.detail = "Auto Regressive Integrated Moving Average"
self.isHighOrder = True
self.model = None
self.model_fit = None
self.trained_data = None
self.p = 1
self.d = 0
self.q = 0
self.benchmark_only = True
self.minOrder = 1
def train(self, data, sets, order=1, parameters=None):
if parameters is not None:
self.p = parameters[0]
self.d = parameters[1]
self.q = parameters[2]
self.order = max([self.p, self.d, self.q])
self.shortname = "ARIMA(" + str(self.p) + "," + str(self.d) + "," + str(self.q) + ")"
old_fit = self.model_fit
self.model = stats_arima(data, order=(self.p, self.d, self.q))
try:
self.model_fit = self.model.fit(disp=0)
except:
try:
self.model = stats_arima(data, order=(self.p, self.d, self.q))
self.model_fit = self.model.fit(disp=1)
except:
self.model_fit = old_fit
self.trained_data = data #.tolist()
def forecast(self, data):
ret = []
for t in data:
output = self.model_fit.forecast()
ret.append( output[0] )
self.trained_data = np.append(self.trained_data, t) #.append(t)
self.train(self.trained_data,None,order=self.order, parameters=(self.p, self.d, self.q))
return ret

File diff suppressed because it is too large Load Diff

View File

@ -6,9 +6,11 @@ from pyFTS import fts
class Naive(fts.FTS): class Naive(fts.FTS):
def __init__(self, name): def __init__(self, name):
super(Naive, self).__init__(1, "Naïve " + name) super(Naive, self).__init__(1, "Naive " + name)
self.name = "Naïve Model" self.name = "Naïve Model"
self.detail = "Naïve Model" self.detail = "Naïve Model"
self.benchmark_only = True
self.isHighOrder = False
def forecast(self, data): def forecast(self, data):
return [k for k in data] return [k for k in data]

2
fts.py
View File

@ -26,6 +26,8 @@ class FTS(object):
self.original_max = 0 self.original_max = 0
self.original_min = 0 self.original_min = 0
self.partitioner = None self.partitioner = None
self.auto_update = False
self.benchmark_only = False
def fuzzy(self, data): def fuzzy(self, data):
best = {"fuzzyset": "", "membership": 0.0} best = {"fuzzyset": "", "membership": 0.0}

View File

@ -7,7 +7,7 @@ from pyFTS.partitioners import partitioner
class GridPartitioner(partitioner.Partitioner): class GridPartitioner(partitioner.Partitioner):
def __init__(self, data,npart,func = Membership.trimf, transformation=None): def __init__(self, data, npart, func = Membership.trimf, transformation=None):
super(GridPartitioner, self).__init__("Grid", data, npart, func=func, transformation=transformation) super(GridPartitioner, self).__init__("Grid", data, npart, func=func, transformation=transformation)
def build(self, data): def build(self, data):

487
pfts.py
View File

@ -1,487 +0,0 @@
#!/usr/bin/python
# -*- coding: utf8 -*-
import numpy as np
import pandas as pd
import math
from operator import itemgetter
from pyFTS.common import FuzzySet, SortedCollection
from pyFTS import hofts, ifts, tree
class ProbabilisticFLRG(hofts.HighOrderFLRG):
def __init__(self, order):
super(ProbabilisticFLRG, self).__init__(order)
self.RHS = {}
self.frequencyCount = 0.0
def appendRHS(self, c):
self.frequencyCount += 1.0
if c.name in self.RHS:
self.RHS[c.name] += 1.0
else:
self.RHS[c.name] = 1.0
def getProbability(self, c):
return self.RHS[c] / self.frequencyCount
def __str__(self):
tmp2 = ""
for c in sorted(self.RHS):
if len(tmp2) > 0:
tmp2 = tmp2 + ", "
tmp2 = tmp2 + "(" + str(round(self.RHS[c] / self.frequencyCount, 3)) + ")" + c
return self.strLHS() + " -> " + tmp2
class ProbabilisticFTS(ifts.IntervalFTS):
def __init__(self, name):
super(ProbabilisticFTS, self).__init__("PFTS")
self.shortname = "PFTS " + name
self.name = "Probabilistic FTS"
self.detail = "Silva, P.; Guimarães, F.; Sadaei, H."
self.flrgs = {}
self.globalFrequency = 0
self.hasPointForecasting = True
self.hasIntervalForecasting = True
self.hasDistributionForecasting = True
self.isHighOrder = True
def generateFLRG(self, flrs):
flrgs = {}
l = len(flrs)
for k in np.arange(self.order, l+1):
if self.dump: print("FLR: " + str(k))
flrg = ProbabilisticFLRG(self.order)
for kk in np.arange(k - self.order, k):
flrg.appendLHS(flrs[kk].LHS)
if self.dump: print("LHS: " + str(flrs[kk]))
if flrg.strLHS() in flrgs:
flrgs[flrg.strLHS()].appendRHS(flrs[k-1].RHS)
else:
flrgs[flrg.strLHS()] = flrg;
flrgs[flrg.strLHS()].appendRHS(flrs[k-1].RHS)
if self.dump: print("RHS: " + str(flrs[k-1]))
self.globalFrequency += 1
return (flrgs)
def addNewPFLGR(self,flrg):
if flrg.strLHS() not in self.flrgs:
tmp = ProbabilisticFLRG(self.order)
for fs in flrg.LHS: tmp.appendLHS(fs)
tmp.appendRHS(flrg.LHS[-1])
self.flrgs[tmp.strLHS()] = tmp;
self.globalFrequency += 1
def getProbability(self, flrg):
if flrg.strLHS() in self.flrgs:
return self.flrgs[flrg.strLHS()].frequencyCount / self.globalFrequency
else:
self.addNewPFLGR(flrg)
return self.getProbability(flrg)
def getMidpoints(self, flrg):
if flrg.strLHS() in self.flrgs:
tmp = self.flrgs[flrg.strLHS()]
ret = sum(np.array([tmp.getProbability(s) * self.setsDict[s].centroid for s in tmp.RHS]))
else:
pi = 1 / len(flrg.LHS)
ret = sum(np.array([pi * s.centroid for s in flrg.LHS]))
return ret
def getUpper(self, flrg):
if flrg.strLHS() in self.flrgs:
tmp = self.flrgs[flrg.strLHS()]
ret = sum(np.array([tmp.getProbability(s) * self.setsDict[s].upper for s in tmp.RHS]))
else:
pi = 1 / len(flrg.LHS)
ret = sum(np.array([pi * s.upper for s in flrg.LHS]))
return ret
def getLower(self, flrg):
if flrg.strLHS() in self.flrgs:
tmp = self.flrgs[flrg.strLHS()]
ret = sum(np.array([tmp.getProbability(s) * self.setsDict[s].lower for s in tmp.RHS]))
else:
pi = 1 / len(flrg.LHS)
ret = sum(np.array([pi * s.lower for s in flrg.LHS]))
return ret
def forecast(self, data):
ndata = np.array(self.doTransformations(data))
l = len(ndata)
ret = []
for k in np.arange(self.order - 1, l):
# print(k)
affected_flrgs = []
affected_flrgs_memberships = []
norms = []
mp = []
# Find the sets which membership > 0 for each lag
count = 0
lags = {}
if self.order > 1:
subset = ndata[k - (self.order - 1): k + 1]
for instance in subset:
mb = FuzzySet.fuzzyInstance(instance, self.sets)
tmp = np.argwhere(mb)
idx = np.ravel(tmp) # flatten the array
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
if instance <= self.sets[0].lower:
idx = [0]
elif instance >= self.sets[-1].upper:
idx = [len(self.sets) - 1]
else:
raise Exception(instance)
lags[count] = idx
count = count + 1
# Build the tree with all possible paths
root = tree.FLRGTreeNode(None)
self.buildTree(root, lags, 0)
# Trace the possible paths and build the PFLRG's
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
flrg = hofts.HighOrderFLRG(self.order)
for kk in path: flrg.appendLHS(self.sets[kk])
assert len(flrg.LHS) == subset.size, str(subset) + " -> " + str([s.name for s in flrg.LHS])
##
affected_flrgs.append(flrg)
# Find the general membership of FLRG
affected_flrgs_memberships.append(min(self.getSequenceMembership(subset, flrg.LHS)))
else:
mv = FuzzySet.fuzzyInstance(ndata[k], self.sets) # get all membership values
tmp = np.argwhere(mv) # get the indices of values > 0
idx = np.ravel(tmp) # flatten the array
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
if ndata[k] <= self.sets[0].lower:
idx = [0]
elif ndata[k] >= self.sets[-1].upper:
idx = [len(self.sets) - 1]
else:
raise Exception(ndata[k])
for kk in idx:
flrg = hofts.HighOrderFLRG(self.order)
flrg.appendLHS(self.sets[kk])
affected_flrgs.append(flrg)
affected_flrgs_memberships.append(mv[kk])
count = 0
for flrg in affected_flrgs:
# achar o os bounds de cada FLRG, ponderados pela probabilidade e pertinência
norm = self.getProbability(flrg) * affected_flrgs_memberships[count]
if norm == 0:
norm = self.getProbability(flrg) # * 0.001
mp.append(norm * self.getMidpoints(flrg))
norms.append(norm)
count = count + 1
# gerar o intervalo
norm = sum(norms)
if norm == 0:
ret.append(0)
else:
ret.append(sum(mp) / norm)
ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]])
return ret
def forecastInterval(self, data):
ndata = np.array(self.doTransformations(data))
l = len(ndata)
ret = []
for k in np.arange(self.order - 1, l):
# print(k)
affected_flrgs = []
affected_flrgs_memberships = []
norms = []
up = []
lo = []
# Find the sets which membership > 0 for each lag
count = 0
lags = {}
if self.order > 1:
subset = ndata[k - (self.order - 1): k + 1]
for instance in subset:
mb = FuzzySet.fuzzyInstance(instance, self.sets)
tmp = np.argwhere(mb)
idx = np.ravel(tmp) # flatten the array
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
if instance <= self.sets[0].lower:
idx = [0]
elif instance >= self.sets[-1].upper:
idx = [len(self.sets) - 1]
else:
raise Exception(instance)
lags[count] = idx
count = count + 1
# Build the tree with all possible paths
root = tree.FLRGTreeNode(None)
self.buildTree(root, lags, 0)
# Trace the possible paths and build the PFLRG's
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
flrg = hofts.HighOrderFLRG(self.order)
for kk in path: flrg.appendLHS(self.sets[kk])
assert len(flrg.LHS) == subset.size, str(subset) + " -> " + str([s.name for s in flrg.LHS])
##
affected_flrgs.append(flrg)
# Find the general membership of FLRG
affected_flrgs_memberships.append(min(self.getSequenceMembership(subset, flrg.LHS)))
else:
mv = FuzzySet.fuzzyInstance(ndata[k], self.sets) # get all membership values
tmp = np.argwhere(mv) # get the indices of values > 0
idx = np.ravel(tmp) # flatten the array
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
if ndata[k] <= self.sets[0].lower:
idx = [0]
elif ndata[k] >= self.sets[-1].upper:
idx = [len(self.sets) - 1]
else:
raise Exception(ndata[k])
for kk in idx:
flrg = hofts.HighOrderFLRG(self.order)
flrg.appendLHS(self.sets[kk])
affected_flrgs.append(flrg)
affected_flrgs_memberships.append(mv[kk])
count = 0
for flrg in affected_flrgs:
# achar o os bounds de cada FLRG, ponderados pela probabilidade e pertinência
norm = self.getProbability(flrg) * affected_flrgs_memberships[count]
if norm == 0:
norm = self.getProbability(flrg) # * 0.001
up.append(norm * self.getUpper(flrg))
lo.append(norm * self.getLower(flrg))
norms.append(norm)
count = count + 1
# gerar o intervalo
norm = sum(norms)
if norm == 0:
ret.append([0, 0])
else:
lo_ = self.doInverseTransformations(sum(lo) / norm, params=[data[k - (self.order - 1): k + 1]])
up_ = self.doInverseTransformations(sum(up) / norm, params=[data[k - (self.order - 1): k + 1]])
ret.append([lo_, up_])
return ret
def forecastAhead(self, data, steps):
ret = [data[k] for k in np.arange(len(data) - self.order, len(data))]
for k in np.arange(self.order - 1, steps):
if ret[-1] <= self.sets[0].lower or ret[-1] >= self.sets[-1].upper:
ret.append(ret[-1])
else:
mp = self.forecast([ret[x] for x in np.arange(k - self.order, k)])
ret.append(mp)
return ret
def forecastAheadInterval(self, data, steps):
ret = [[data[k], data[k]] for k in np.arange(len(data) - self.order, len(data))]
for k in np.arange(self.order, steps+self.order):
if ret[-1][0] <= self.sets[0].lower and ret[-1][1] >= self.sets[-1].upper:
ret.append(ret[-1])
else:
lower = self.forecastInterval([ret[x][0] for x in np.arange(k - self.order, k)])
upper = self.forecastInterval([ret[x][1] for x in np.arange(k - self.order, k)])
ret.append([np.min(lower), np.max(upper)])
return ret
def getGridClean(self, resolution):
grid = {}
if len(self.transformations) == 0:
_min = self.sets[0].lower
_max = self.sets[-1].upper
else:
_min = self.original_min
_max = self.original_max
for sbin in np.arange(_min,_max, resolution):
grid[sbin] = 0
return grid
def gridCount(self, grid, resolution, index, interval):
#print(interval)
for k in index.inside(interval[0],interval[1]):
#print(k)
grid[k] += 1
return grid
def gridCountPoint(self, grid, resolution, index, point):
k = index.find_ge(point)
# print(k)
grid[k] += 1
return grid
def buildTreeWithoutOrder(self, node, lags, level):
if level not in lags:
return
for s in lags[level]:
node.appendChild(tree.FLRGTreeNode(s))
for child in node.getChildren():
self.buildTreeWithoutOrder(child, lags, level + 1)
def forecastAheadDistribution(self, data, steps, resolution, parameters=None):
ret = []
intervals = self.forecastAheadInterval(data, steps)
grid = self.getGridClean(resolution)
index = SortedCollection.SortedCollection(iterable=grid.keys())
if parameters is None:
grids = []
for k in np.arange(0, steps):
grids.append(self.getGridClean(resolution))
for k in np.arange(self.order, steps + self.order):
lags = {}
cc = 0
for i in intervals[k - self.order : k]:
quantiles = []
for qt in np.arange(0, 50, 2):
quantiles.append(i[0] + qt * ((i[1] - i[0]) / 100))
quantiles.append(i[1] - qt * ((i[1] - i[0]) / 100))
quantiles.append(i[0] + ((i[1] - i[0]) / 2))
quantiles = list(set(quantiles))
quantiles.sort()
lags[cc] = quantiles
cc += 1
# Build the tree with all possible paths
root = tree.FLRGTreeNode(None)
self.buildTreeWithoutOrder(root, lags, 0)
# Trace the possible paths
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
qtle = self.forecastInterval(path)
grids[k - self.order] = self.gridCount(grids[k - self.order], resolution, index, np.ravel(qtle))
for k in np.arange(0, steps):
tmp = np.array([grids[k][q] for q in sorted(grids[k])])
ret.append(tmp / sum(tmp))
grid = self.getGridClean(resolution)
df = pd.DataFrame(ret, columns=sorted(grid))
return df
else:
print("novo")
ret = []
for k in np.arange(self.order, steps + self.order):
grid = self.getGridClean(resolution)
grid = self.gridCount(grid, resolution, index, intervals[k])
for qt in np.arange(0, 50, 1):
# print(qt)
qtle_lower = self.forecastInterval(
[intervals[x][0] + qt * ((intervals[x][1] - intervals[x][0]) / 100) for x in
np.arange(k - self.order, k)])
grid = self.gridCount(grid, resolution, index, np.ravel(qtle_lower))
qtle_upper = self.forecastInterval(
[intervals[x][1] - qt * ((intervals[x][1] - intervals[x][0]) / 100) for x in
np.arange(k - self.order, k)])
grid = self.gridCount(grid, resolution, index, np.ravel(qtle_upper))
qtle_mid = self.forecastInterval(
[intervals[x][0] + (intervals[x][1] - intervals[x][0]) / 2 for x in np.arange(k - self.order, k)])
grid = self.gridCount(grid, resolution, index, np.ravel(qtle_mid))
tmp = np.array([grid[k] for k in sorted(grid)])
ret.append(tmp / sum(tmp))
grid = self.getGridClean(resolution)
df = pd.DataFrame(ret, columns=sorted(grid))
return df
def __str__(self):
tmp = self.name + ":\n"
for r in sorted(self.flrgs):
p = round(self.flrgs[r].frequencyCount / self.globalFrequency, 3)
tmp = tmp + "(" + str(p) + ") " + str(self.flrgs[r]) + "\n"
return tmp

View File

@ -29,7 +29,7 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
else: else:
self.RHS[c.name] = mv self.RHS[c.name] = mv
def getProbability(self, c): def get_probability(self, c):
return self.RHS[c] / self.frequencyCount return self.RHS[c] / self.frequencyCount
def __str__(self): def __str__(self):
@ -42,7 +42,7 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
class ProbabilisticWeightedFTS(ifts.IntervalFTS): class ProbabilisticWeightedFTS(ifts.IntervalFTS):
def __init__(self, name): def __init__(self, name, update=True):
super(ProbabilisticWeightedFTS, self).__init__("PWFTS") super(ProbabilisticWeightedFTS, self).__init__("PWFTS")
self.shortname = "PWFTS " + name self.shortname = "PWFTS " + name
self.name = "Probabilistic FTS" self.name = "Probabilistic FTS"
@ -53,6 +53,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
self.hasIntervalForecasting = True self.hasIntervalForecasting = True
self.hasDistributionForecasting = True self.hasDistributionForecasting = True
self.isHighOrder = True self.isHighOrder = True
self.auto_update = update
def train(self, data, sets, order=1,parameters=None): def train(self, data, sets, order=1,parameters=None):
@ -125,14 +126,30 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
if flrg.strLHS() in flrgs: if flrg.strLHS() in flrgs:
flrgs[flrg.strLHS()].appendRHS(flrs[k-1].RHS) flrgs[flrg.strLHS()].appendRHS(flrs[k-1].RHS)
else: else:
flrgs[flrg.strLHS()] = flrg; flrgs[flrg.strLHS()] = flrg
flrgs[flrg.strLHS()].appendRHS(flrs[k-1].RHS) flrgs[flrg.strLHS()].appendRHS(flrs[k-1].RHS)
if self.dump: print("RHS: " + str(flrs[k-1])) if self.dump: print("RHS: " + str(flrs[k-1]))
self.globalFrequency += 1 self.globalFrequency += 1
return (flrgs) return (flrgs)
def addNewPFLGR(self,flrg): def update_model(self,data):
fzzy = FuzzySet.fuzzySeries(data, self.sets)
flrg = ProbabilisticWeightedFLRG(self.order)
for k in np.arange(0, self.order): flrg.appendLHS(fzzy[k])
if flrg.strLHS() in self.flrgs:
self.flrgs[flrg.strLHS()].appendRHS(fzzy[self.order])
else:
self.flrgs[flrg.strLHS()] = flrg
self.flrgs[flrg.strLHS()].appendRHS(fzzy[self.order])
self.globalFrequency += 1
def add_new_PWFLGR(self, flrg):
if flrg.strLHS() not in self.flrgs: if flrg.strLHS() not in self.flrgs:
tmp = ProbabilisticWeightedFLRG(self.order) tmp = ProbabilisticWeightedFLRG(self.order)
for fs in flrg.LHS: tmp.appendLHS(fs) for fs in flrg.LHS: tmp.appendLHS(fs)
@ -140,17 +157,17 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
self.flrgs[tmp.strLHS()] = tmp; self.flrgs[tmp.strLHS()] = tmp;
self.globalFrequency += 1 self.globalFrequency += 1
def getProbability(self, flrg): def get_probability(self, flrg):
if flrg.strLHS() in self.flrgs: if flrg.strLHS() in self.flrgs:
return self.flrgs[flrg.strLHS()].frequencyCount / self.globalFrequency return self.flrgs[flrg.strLHS()].frequencyCount / self.globalFrequency
else: else:
self.addNewPFLGR(flrg) self.add_new_PWFLGR(flrg)
return self.getProbability(flrg) return self.get_probability(flrg)
def getMidpoints(self, flrg): def getMidpoints(self, flrg):
if flrg.strLHS() in self.flrgs: if flrg.strLHS() in self.flrgs:
tmp = self.flrgs[flrg.strLHS()] tmp = self.flrgs[flrg.strLHS()]
ret = sum(np.array([tmp.getProbability(s) * self.setsDict[s].centroid for s in tmp.RHS])) ret = sum(np.array([tmp.get_probability(s) * self.setsDict[s].centroid for s in tmp.RHS]))
else: else:
pi = 1 / len(flrg.LHS) pi = 1 / len(flrg.LHS)
ret = sum(np.array([pi * s.centroid for s in flrg.LHS])) ret = sum(np.array([pi * s.centroid for s in flrg.LHS]))
@ -159,7 +176,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
def getUpper(self, flrg): def getUpper(self, flrg):
if flrg.strLHS() in self.flrgs: if flrg.strLHS() in self.flrgs:
tmp = self.flrgs[flrg.strLHS()] tmp = self.flrgs[flrg.strLHS()]
ret = sum(np.array([tmp.getProbability(s) * self.setsDict[s].upper for s in tmp.RHS])) ret = sum(np.array([tmp.get_probability(s) * self.setsDict[s].upper for s in tmp.RHS]))
else: else:
pi = 1 / len(flrg.LHS) pi = 1 / len(flrg.LHS)
ret = sum(np.array([pi * s.upper for s in flrg.LHS])) ret = sum(np.array([pi * s.upper for s in flrg.LHS]))
@ -168,7 +185,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
def getLower(self, flrg): def getLower(self, flrg):
if flrg.strLHS() in self.flrgs: if flrg.strLHS() in self.flrgs:
tmp = self.flrgs[flrg.strLHS()] tmp = self.flrgs[flrg.strLHS()]
ret = sum(np.array([tmp.getProbability(s) * self.setsDict[s].lower for s in tmp.RHS])) ret = sum(np.array([tmp.get_probability(s) * self.setsDict[s].lower for s in tmp.RHS]))
else: else:
pi = 1 / len(flrg.LHS) pi = 1 / len(flrg.LHS)
ret = sum(np.array([pi * s.lower for s in flrg.LHS])) ret = sum(np.array([pi * s.lower for s in flrg.LHS]))
@ -187,6 +204,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
# print(k) # print(k)
affected_flrgs = [] affected_flrgs = []
affected_rhs = []
affected_flrgs_memberships = [] affected_flrgs_memberships = []
norms = [] norms = []
@ -258,9 +276,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
count = 0 count = 0
for flrg in affected_flrgs: for flrg in affected_flrgs:
# achar o os bounds de cada FLRG, ponderados pela probabilidade e pertinência # achar o os bounds de cada FLRG, ponderados pela probabilidade e pertinência
norm = self.getProbability(flrg) * affected_flrgs_memberships[count] norm = self.get_probability(flrg) * affected_flrgs_memberships[count]
if norm == 0: if norm == 0:
norm = self.getProbability(flrg) # * 0.001 norm = self.get_probability(flrg) # * 0.001
mp.append(norm * self.getMidpoints(flrg)) mp.append(norm * self.getMidpoints(flrg))
norms.append(norm) norms.append(norm)
count = count + 1 count = count + 1
@ -272,6 +290,8 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
else: else:
ret.append(sum(mp) / norm) ret.append(sum(mp) / norm)
if self.auto_update and k > self.order+1: self.update_model(ndata[k - self.order - 1 : k])
ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]]) ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]])
return ret return ret
@ -361,9 +381,9 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
count = 0 count = 0
for flrg in affected_flrgs: for flrg in affected_flrgs:
# achar o os bounds de cada FLRG, ponderados pela probabilidade e pertinência # achar o os bounds de cada FLRG, ponderados pela probabilidade e pertinência
norm = self.getProbability(flrg) * affected_flrgs_memberships[count] norm = self.get_probability(flrg) * affected_flrgs_memberships[count]
if norm == 0: if norm == 0:
norm = self.getProbability(flrg) # * 0.001 norm = self.get_probability(flrg) # * 0.001
up.append(norm * self.getUpper(flrg)) up.append(norm * self.getUpper(flrg))
lo.append(norm * self.getLower(flrg)) lo.append(norm * self.getLower(flrg))
norms.append(norm) norms.append(norm)

View File

@ -11,15 +11,15 @@ from mpl_toolkits.mplot3d import Axes3D
import pandas as pd import pandas as pd
from pyFTS.partitioners import Grid, Entropy, FCM, Huarng from pyFTS.partitioners import Grid, Entropy, FCM, Huarng
from pyFTS.common import FLR,FuzzySet,Membership,Transformations from pyFTS.common import FLR,FuzzySet,Membership,Transformations
from pyFTS import fts,hofts,ifts,pwfts,tree, chen, pfts from pyFTS import fts,hofts,ifts,pwfts,tree, chen
from pyFTS.benchmarks import benchmarks as bchmk from pyFTS.benchmarks import benchmarks as bchmk
from pyFTS.benchmarks import naive from pyFTS.benchmarks import naive, arima
from pyFTS.benchmarks import Measures from pyFTS.benchmarks import Measures
from numpy import random from numpy import random
#print(FCM.FCMPartitionerTrimf.__module__) #print(FCM.FCMPartitionerTrimf.__module__)
#gauss = random.normal(0,1.0,2000) #gauss = random.normal(0,1.0,5000)
#gauss_teste = random.normal(0,1.0,400) #gauss_teste = random.normal(0,1.0,400)
@ -28,9 +28,12 @@ os.chdir("/home/petronio/dados/Dropbox/Doutorado/Disciplinas/AdvancedFuzzyTimeSe
#taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",") #taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",")
#taiex = np.array(taiexpd["avg"][:5000]) #taiex = np.array(taiexpd["avg"][:5000])
taiex = pd.read_csv("DataSets/TAIEX.csv", sep=",") nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",")
taiex_treino = np.array(taiex["avg"][2500:3900]) nasdaq = np.array(nasdaqpd["avg"][:5000])
taiex_teste = np.array(taiex["avg"][3901:4500])
#taiex = pd.read_csv("DataSets/TAIEX.csv", sep=",")
#taiex_treino = np.array(taiex["avg"][2500:3900])
#taiex_teste = np.array(taiex["avg"][3901:4500])
#print(len(taiex)) #print(len(taiex))
@ -38,30 +41,38 @@ taiex_teste = np.array(taiex["avg"][3901:4500])
#, , #, ,
diff = Transformations.Differential(1) #diff = Transformations.Differential(1)
#bchmk.sliding_window(taiex,2000,train=0.8, #transformation=diff, #models=[pwfts.ProbabilisticWeightedFTS],
# partitioners=[Grid.GridPartitioner, FCM.FCMPartitioner, Entropy.EntropyPartitioner],
# partitions=[10, 15, 20, 25, 30, 35, 40], dump=True, save=True, file="experiments/points.csv")
bchmk.allPointForecasters(taiex_treino, taiex_treino, 7, transformation=diff, bchmk.external_point_sliding_window([naive.Naive, arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA],
models=[ naive.Naive, pfts.ProbabilisticFTS, pwfts.ProbabilisticWeightedFTS], [None, (1,0,0),(1,1,0),(2,0,0), (2,1,0), (1,1,1), (1,0,1)],
statistics=True, residuals=False, series=False) nasdaq,2000,train=0.8, #transformation=diff, #models=[pwfts.ProbabilisticWeightedFTS], # #
dump=True, save=True, file="experiments/arima_nasdaq.csv")
data_train_fs = Grid.GridPartitioner(taiex_treino, 10, transformation=diff).sets
fts1 = pfts.ProbabilisticFTS("") #bchmk.point_sliding_window(taiex,2000,train=0.8, #transformation=diff, #models=[pwfts.ProbabilisticWeightedFTS], # #
fts1.appendTransformation(diff) # partitioners=[Grid.GridPartitioner], #Entropy.EntropyPartitioner], # FCM.FCMPartitioner, ],
fts1.train(taiex_treino, data_train_fs, order=1) # partitions= [45,55, 65, 75, 85, 95,105,115,125,135, 150], #np.arange(5,150,step=10), #
# dump=True, save=True, file="experiments/taiex_point_new.csv")
print(fts1.forecast([5000, 5000]))
fts2 = pwfts.ProbabilisticWeightedFTS("") #bchmk.allPointForecasters(taiex_treino, taiex_treino, 95, #transformation=diff,
fts2.appendTransformation(diff) # models=[ naive.Naive, pfts.ProbabilisticFTS, pwfts.ProbabilisticWeightedFTS],
fts2.train(taiex_treino, data_train_fs, order=1) # statistics=True, residuals=False, series=False)
print(fts2.forecast([5000, 5000])) #data_train_fs = Grid.GridPartitioner(taiex_treino, 10, transformation=diff).sets
#fts1 = pfts.ProbabilisticFTS("")
#fts1.appendTransformation(diff)
#fts1.train(taiex_treino, data_train_fs, order=1)
#print(fts1.forecast([5000, 5000]))
#fts2 = pwfts.ProbabilisticWeightedFTS("")
#fts2.appendTransformation(diff)
#fts2.train(taiex_treino, data_train_fs, order=1)
#print(fts2.forecast([5000, 5000]))
#tmp = Grid.GridPartitioner(taiex_treino,7,transformation=diff) #tmp = Grid.GridPartitioner(taiex_treino,7,transformation=diff)