diff --git a/benchmarks/ProbabilityDistribution.py b/benchmarks/ProbabilityDistribution.py index ce7625f..199e8f1 100644 --- a/benchmarks/ProbabilityDistribution.py +++ b/benchmarks/ProbabilityDistribution.py @@ -71,3 +71,12 @@ class ProbabilityDistribution(object): axis.set_xlabel('Universe of Discourse') axis.set_ylabel('Probability') + + + def __str__(self): + head = '|' + body = '|' + for k in sorted(self.distribution.keys()): + head += str(round(k,2)) + '\t|' + body += str(round(self.distribution[k] / self.count,3)) + '\t|' + return head + '\n' + body diff --git a/common/FuzzySet.py b/common/FuzzySet.py index b749e83..ee5cb15 100644 --- a/common/FuzzySet.py +++ b/common/FuzzySet.py @@ -28,6 +28,14 @@ def fuzzyInstance(inst, fuzzySets): return mv +def fuzzyInstances(data, fuzzySets): + ret = [] + for inst in data: + mv = np.array([fs.membership(inst) for fs in fuzzySets]) + ret.append(mv) + return ret + + def getMaxMembershipFuzzySet(inst, fuzzySets): mv = fuzzyInstance(inst, fuzzySets) return fuzzySets[np.argwhere(mv == max(mv))[0, 0]] diff --git a/pfts.py b/pfts.py index 87fef27..5e08d3c 100644 --- a/pfts.py +++ b/pfts.py @@ -5,7 +5,7 @@ import numpy as np import pandas as pd import math from operator import itemgetter -from pyFTS.common import FuzzySet, SortedCollection +from pyFTS.common import FLR, FuzzySet, SortedCollection from pyFTS import hofts, ifts, tree @@ -22,6 +22,13 @@ class ProbabilisticFLRG(hofts.HighOrderFLRG): else: self.RHS[c.name] = 1.0 + def appendRHSFuzzy(self, c, mv): + self.frequencyCount += mv + if c.name in self.RHS: + self.RHS[c.name] += mv + else: + self.RHS[c.name] = mv + def getProbability(self, c): return self.RHS[c] / self.frequencyCount @@ -47,6 +54,63 @@ class ProbabilisticFTS(ifts.IntervalFTS): self.hasDistributionForecasting = True self.isHighOrder = True + def train(self, data, sets, order=1,parameters=None): + + data = self.doTransformations(data, updateUoD=True) + + self.order = order + self.sets = sets + for s in self.sets: self.setsDict[s.name] = s + tmpdata = FuzzySet.fuzzySeries(data, sets) + flrs = FLR.generateRecurrentFLRs(tmpdata) + self.flrgs = self.generateFLRG(flrs) + #self.flrgs = self.generateFLRG2(data) + + def generateFLRG2(self, data): + flrgs = {} + l = len(data) + for k in np.arange(self.order, l): + if self.dump: print("FLR: " + str(k)) + flrg = ProbabilisticFLRG(self.order) + + sample = data[k - self.order: k] + + mvs = FuzzySet.fuzzyInstances(sample, self.sets) + lags = {} + + for o in np.arange(0, self.order): + _sets = [self.sets[kk] for kk in np.arange(0, len(self.sets)) if mvs[o][kk] > 0] + + lags[o] = _sets + + root = tree.FLRGTreeNode(None) + + self.buildTreeWithoutOrder(root, lags, 0) + + # Trace the possible paths + for p in root.paths(): + path = list(reversed(list(filter(None.__ne__, p)))) + + lhs_mv = [] + for c, e in enumerate(path, start=0): + lhs_mv.append( e.membership( sample[c] ) ) + flrg.appendLHS(e) + + if flrg.strLHS() not in flrgs: + flrgs[flrg.strLHS()] = flrg; + + mv = FuzzySet.fuzzyInstance(data[k], self.sets) + + rhs_mv = [mv[kk] for kk in np.arange(0, len(self.sets)) if mv[kk] > 0] + _sets = [self.sets[kk] for kk in np.arange(0, len(self.sets)) if mv[kk] > 0] + + for c, e in enumerate(_sets, start=0): + flrgs[flrg.strLHS()].appendRHSFuzzy(e,rhs_mv[c]*max(lhs_mv)) + + self.globalFrequency += max(lhs_mv) + + return (flrgs) + def generateFLRG(self, flrs): flrgs = {} l = len(flrs) diff --git a/tests/pfts.py b/tests/pfts.py index ce6be1f..05e5807 100644 --- a/tests/pfts.py +++ b/tests/pfts.py @@ -16,8 +16,8 @@ from pyFTS.benchmarks import benchmarks as bchmk from pyFTS.benchmarks import Measures from numpy import random -gauss_treino = random.normal(0,1.0,1600) -gauss_teste = random.normal(0,1.0,400) +#gauss_treino = random.normal(0,1.0,1600) +#gauss_teste = random.normal(0,1.0,400) os.chdir("/home/petronio/dados/Dropbox/Doutorado/Disciplinas/AdvancedFuzzyTimeSeriesModels/") @@ -25,9 +25,9 @@ os.chdir("/home/petronio/dados/Dropbox/Doutorado/Disciplinas/AdvancedFuzzyTimeSe #enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";") #enrollments = np.array(enrollments["Enrollments"]) -#taiex = pd.read_csv("DataSets/TAIEX.csv", sep=",") -#taiex_treino = np.array(taiex["avg"][2500:3900]) -#taiex_teste = np.array(taiex["avg"][3901:4500]) +taiex = pd.read_csv("DataSets/TAIEX.csv", sep=",") +taiex_treino = np.array(taiex["avg"][2500:3900]) +taiex_teste = np.array(taiex["avg"][3901:4500]) #nasdaq = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",") #nasdaq_treino = np.array(nasdaq["avg"][0:1600]) @@ -35,16 +35,25 @@ os.chdir("/home/petronio/dados/Dropbox/Doutorado/Disciplinas/AdvancedFuzzyTimeSe diff = Transformations.Differential(1) -fs = Grid.GridPartitionerTrimf(gauss_treino,10) +fs = Grid.GridPartitionerTrimf(taiex_treino,10) #tmp = chen.ConventionalFTS("") -#pfts1 = pfts.ProbabilisticFTS("1") +pfts1 = pfts.ProbabilisticFTS("1") #pfts1.appendTransformation(diff) -pfts1.train(gauss_treino,fs,1) -pfts2 = pfts.ProbabilisticFTS("n = 2") +pfts1.train(taiex_treino,fs,1) + +from pyFTS.benchmarks import ProbabilityDistribution as dist + +forecasts = pfts1.forecast(taiex_treino) + +pmf1 = dist.ProbabilityDistribution("Original",10,[min(taiex_treino),max(taiex_treino)],data=forecasts) + +print(pmf1) + +#pfts2 = pfts.ProbabilisticFTS("n = 2") #pfts2.appendTransformation(diff) -pfts2.train(gauss_treino,fs,2) +#pfts2.train(gauss_treino,fs,2) #pfts3 = pfts.ProbabilisticFTS("n = 3") #pfts3.appendTransformation(diff)