A new method for fuzzy frequency count at PFTS was added but in benchmarks the previous method proved to be most efficient. The new method was keep for future investigations.

This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-02-16 11:54:37 -02:00
parent cb12810e0a
commit 47b1cb57c9
4 changed files with 101 additions and 11 deletions

View File

@ -71,3 +71,12 @@ class ProbabilityDistribution(object):
axis.set_xlabel('Universe of Discourse')
axis.set_ylabel('Probability')
def __str__(self):
head = '|'
body = '|'
for k in sorted(self.distribution.keys()):
head += str(round(k,2)) + '\t|'
body += str(round(self.distribution[k] / self.count,3)) + '\t|'
return head + '\n' + body

View File

@ -28,6 +28,14 @@ def fuzzyInstance(inst, fuzzySets):
return mv
def fuzzyInstances(data, fuzzySets):
ret = []
for inst in data:
mv = np.array([fs.membership(inst) for fs in fuzzySets])
ret.append(mv)
return ret
def getMaxMembershipFuzzySet(inst, fuzzySets):
mv = fuzzyInstance(inst, fuzzySets)
return fuzzySets[np.argwhere(mv == max(mv))[0, 0]]

66
pfts.py
View File

@ -5,7 +5,7 @@ import numpy as np
import pandas as pd
import math
from operator import itemgetter
from pyFTS.common import FuzzySet, SortedCollection
from pyFTS.common import FLR, FuzzySet, SortedCollection
from pyFTS import hofts, ifts, tree
@ -22,6 +22,13 @@ class ProbabilisticFLRG(hofts.HighOrderFLRG):
else:
self.RHS[c.name] = 1.0
def appendRHSFuzzy(self, c, mv):
self.frequencyCount += mv
if c.name in self.RHS:
self.RHS[c.name] += mv
else:
self.RHS[c.name] = mv
def getProbability(self, c):
return self.RHS[c] / self.frequencyCount
@ -47,6 +54,63 @@ class ProbabilisticFTS(ifts.IntervalFTS):
self.hasDistributionForecasting = True
self.isHighOrder = True
def train(self, data, sets, order=1,parameters=None):
data = self.doTransformations(data, updateUoD=True)
self.order = order
self.sets = sets
for s in self.sets: self.setsDict[s.name] = s
tmpdata = FuzzySet.fuzzySeries(data, sets)
flrs = FLR.generateRecurrentFLRs(tmpdata)
self.flrgs = self.generateFLRG(flrs)
#self.flrgs = self.generateFLRG2(data)
def generateFLRG2(self, data):
flrgs = {}
l = len(data)
for k in np.arange(self.order, l):
if self.dump: print("FLR: " + str(k))
flrg = ProbabilisticFLRG(self.order)
sample = data[k - self.order: k]
mvs = FuzzySet.fuzzyInstances(sample, self.sets)
lags = {}
for o in np.arange(0, self.order):
_sets = [self.sets[kk] for kk in np.arange(0, len(self.sets)) if mvs[o][kk] > 0]
lags[o] = _sets
root = tree.FLRGTreeNode(None)
self.buildTreeWithoutOrder(root, lags, 0)
# Trace the possible paths
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
lhs_mv = []
for c, e in enumerate(path, start=0):
lhs_mv.append( e.membership( sample[c] ) )
flrg.appendLHS(e)
if flrg.strLHS() not in flrgs:
flrgs[flrg.strLHS()] = flrg;
mv = FuzzySet.fuzzyInstance(data[k], self.sets)
rhs_mv = [mv[kk] for kk in np.arange(0, len(self.sets)) if mv[kk] > 0]
_sets = [self.sets[kk] for kk in np.arange(0, len(self.sets)) if mv[kk] > 0]
for c, e in enumerate(_sets, start=0):
flrgs[flrg.strLHS()].appendRHSFuzzy(e,rhs_mv[c]*max(lhs_mv))
self.globalFrequency += max(lhs_mv)
return (flrgs)
def generateFLRG(self, flrs):
flrgs = {}
l = len(flrs)

View File

@ -16,8 +16,8 @@ from pyFTS.benchmarks import benchmarks as bchmk
from pyFTS.benchmarks import Measures
from numpy import random
gauss_treino = random.normal(0,1.0,1600)
gauss_teste = random.normal(0,1.0,400)
#gauss_treino = random.normal(0,1.0,1600)
#gauss_teste = random.normal(0,1.0,400)
os.chdir("/home/petronio/dados/Dropbox/Doutorado/Disciplinas/AdvancedFuzzyTimeSeriesModels/")
@ -25,9 +25,9 @@ os.chdir("/home/petronio/dados/Dropbox/Doutorado/Disciplinas/AdvancedFuzzyTimeSe
#enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";")
#enrollments = np.array(enrollments["Enrollments"])
#taiex = pd.read_csv("DataSets/TAIEX.csv", sep=",")
#taiex_treino = np.array(taiex["avg"][2500:3900])
#taiex_teste = np.array(taiex["avg"][3901:4500])
taiex = pd.read_csv("DataSets/TAIEX.csv", sep=",")
taiex_treino = np.array(taiex["avg"][2500:3900])
taiex_teste = np.array(taiex["avg"][3901:4500])
#nasdaq = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",")
#nasdaq_treino = np.array(nasdaq["avg"][0:1600])
@ -35,16 +35,25 @@ os.chdir("/home/petronio/dados/Dropbox/Doutorado/Disciplinas/AdvancedFuzzyTimeSe
diff = Transformations.Differential(1)
fs = Grid.GridPartitionerTrimf(gauss_treino,10)
fs = Grid.GridPartitionerTrimf(taiex_treino,10)
#tmp = chen.ConventionalFTS("")
#pfts1 = pfts.ProbabilisticFTS("1")
pfts1 = pfts.ProbabilisticFTS("1")
#pfts1.appendTransformation(diff)
pfts1.train(gauss_treino,fs,1)
pfts2 = pfts.ProbabilisticFTS("n = 2")
pfts1.train(taiex_treino,fs,1)
from pyFTS.benchmarks import ProbabilityDistribution as dist
forecasts = pfts1.forecast(taiex_treino)
pmf1 = dist.ProbabilityDistribution("Original",10,[min(taiex_treino),max(taiex_treino)],data=forecasts)
print(pmf1)
#pfts2 = pfts.ProbabilisticFTS("n = 2")
#pfts2.appendTransformation(diff)
pfts2.train(gauss_treino,fs,2)
#pfts2.train(gauss_treino,fs,2)
#pfts3 = pfts.ProbabilisticFTS("n = 3")
#pfts3.appendTransformation(diff)