From 7c1e79b30d030499569b9c1ca2b077fb84113fcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido=20de=20Lima=20e=20Silva?= Date: Tue, 8 Nov 2016 14:08:06 -0200 Subject: [PATCH] Corrections and optimizations in IFTS and PIFTS codes --- benchmarks.py | 34 ++++++++++++------ common.py | 9 ++--- fts.py | 1 + ifts.py | 2 ++ partitioner.py | 7 ++-- pifts.py | 96 +++++++++++++++++++++++++++++++++++++++++++++----- 6 files changed, 123 insertions(+), 26 deletions(-) diff --git a/benchmarks.py b/benchmarks.py index 289f0a5..5276fe7 100644 --- a/benchmarks.py +++ b/benchmarks.py @@ -1,6 +1,7 @@ import numpy as np import pandas as pd import matplotlib as plt +import matplotlib.colors as pltcolors import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from sklearn.cross_validation import KFold @@ -66,7 +67,7 @@ def plotDistribution(dist): alpha = np.array([dist[x][k] for x in dist])*100 x = [k for x in np.arange(0,len(alpha))] y = dist.columns - plt.scatter(x,y,c=alpha,marker='s',linewidths=0,cmap='Reds',edgecolors=None) + plt.scatter(x,y,c=alpha,marker='s',linewidths=0,cmap='Oranges',norm=pltcolors.Normalize(vmin=0,vmax=1),vmin=0,vmax=1,edgecolors=None) def plotComparedSeries(original,models, colors): fig = plt.figure(figsize=[25,10]) @@ -79,6 +80,7 @@ def plotComparedSeries(original,models, colors): count = 0 for fts in models: forecasted = fts.forecast(original) + if fts.isInterval: lower = [kk[0] for kk in forecasted] upper = [kk[1] for kk in forecasted] @@ -106,20 +108,28 @@ def plotComparedSeries(original,models, colors): ax.set_xlim([0,len(original)]) -def plotComparedIntervalsAhead(original,models, colors, time_from, time_to): +def plotComparedIntervalsAhead(original,models, colors, distributions, time_from, time_to): fig = plt.figure(figsize=[25,10]) ax = fig.add_subplot(111) mi = [] ma = [] - ax.plot(original,color='black',label="Original") count = 0 for fts in models: + if fts.isDensity and distributions[count]: + density = fts.forecastDistributionAhead(original[:time_from],time_to,25) + for k in density.index: + alpha = np.array([density[x][k] for x in density])*100 + x = [time_from + fts.order + k for x in np.arange(0,len(alpha))] + y = density.columns + ax.scatter(x,y,c=alpha,marker='s',linewidths=0,cmap='Oranges', + norm=pltcolors.Normalize(vmin=0,vmax=1),vmin=0,vmax=1,edgecolors=None) + if fts.isInterval: - forecasted = fts.forecastAhead(original[:time_from],time_to) - lower = [kk[0] for kk in forecasted] - upper = [kk[1] for kk in forecasted] + forecasts = fts.forecastAhead(original[:time_from],time_to) + lower = [kk[0] for kk in forecasts] + upper = [kk[1] for kk in forecasts] mi.append(min(lower)) ma.append(max(upper)) for k in np.arange(0,time_from): @@ -129,15 +139,17 @@ def plotComparedIntervalsAhead(original,models, colors, time_from, time_to): ax.plot(upper,color=colors[count]) else: - forecasted = fts.forecast(original) - mi.append(min(forecasted)) - ma.append(max(forecasted)) - forecasted.insert(0,None) - ax.plot(forecasted,color=colors[count],label=fts.shortname) + forecasts = fts.forecast(original) + mi.append(min(forecasts)) + ma.append(max(forecasts)) + for k in np.arange(0,time_from): + forecasts.insert(0,None) + ax.plot(forecasts,color=colors[count],label=fts.shortname) handles0, labels0 = ax.get_legend_handles_labels() ax.legend(handles0,labels0) count = count + 1 + ax.plot(original,color='black',label="Original") #ax.set_title(fts.name) ax.set_ylim([min(mi),max(ma)]) ax.set_ylabel('F(T)') diff --git a/common.py b/common.py index 0dc6e0c..edb78d3 100644 --- a/common.py +++ b/common.py @@ -8,12 +8,13 @@ def differential(original): return np.array(diff) def trimf(x,parameters): - if(x < parameters[0]): + xx = round(x,3) + if(xx < parameters[0]): return 0 - elif(x >= parameters[0] and x < parameters[1]): + elif(xx >= parameters[0] and xx < parameters[1]): return (x-parameters[0])/(parameters[1]-parameters[0]) - elif(x >= parameters[1] and x <= parameters[2]): - return (parameters[2]-x)/(parameters[2]-parameters[1]) + elif(xx >= parameters[1] and xx <= parameters[2]): + return (parameters[2]-xx)/(parameters[2]-parameters[1]) else: return 0 diff --git a/fts.py b/fts.py index 22233ee..e1cb1ff 100644 --- a/fts.py +++ b/fts.py @@ -11,6 +11,7 @@ class FTS: self.detail = name self.isSeasonal = False self.isInterval = False + self.isDensity = False def fuzzy(self,data): best = {"fuzzyset":"", "membership":0.0} diff --git a/ifts.py b/ifts.py index ca90e4b..e81c898 100644 --- a/ifts.py +++ b/ifts.py @@ -27,6 +27,8 @@ class IntervalFTS(hofts.HighOrderFTS): return ret def getSequenceMembership(self, data, fuzzySets): + #print(data) + #print(fuzzySets) mb = [ fuzzySets[k].membership( data[k] ) for k in np.arange(0,len(data)) ] return mb diff --git a/partitioner.py b/partitioner.py index f99e69b..3a904b4 100644 --- a/partitioner.py +++ b/partitioner.py @@ -1,4 +1,5 @@ import numpy as np +import math from pyFTS import * #print(common.__dict__) @@ -10,10 +11,10 @@ def GridPartitionerTrimf(data,npart,names = None,prefix = "A"): dmin = min(data) dmin = dmin - dmin*0.10 dlen = dmax - dmin - partlen = dlen / npart - partition = dmin + partlen = math.ceil(dlen / npart) + partition = math.ceil(dmin) for c in range(npart): - sets.append(common.FuzzySet(prefix+str(c),common.trimf,[partition-partlen, partition, partition+partlen], partition ) ) + sets.append(common.FuzzySet(prefix+str(c),common.trimf,[round(partition-partlen,3), partition, partition+partlen], partition ) ) partition = partition + partlen return sets diff --git a/pifts.py b/pifts.py index 47fc432..b15d0b5 100644 --- a/pifts.py +++ b/pifts.py @@ -1,5 +1,6 @@ import numpy as np import pandas as pd +import math from pyFTS import * class ProbabilisticFLRG(hofts.HighOrderFLRG): @@ -35,6 +36,7 @@ class ProbabilisticIntervalFTS(ifts.IntervalFTS): self.flrgs = {} self.globalFrequency = 0 self.isInterval = True + self.isDensity = True def generateFLRG(self, flrs): flrgs = {} @@ -58,14 +60,16 @@ class ProbabilisticIntervalFTS(ifts.IntervalFTS): if flrg.strLHS() in self.flrgs: return self.flrgs[ flrg.strLHS() ].frequencyCount / self.globalFrequency else: - return 1/ self.globalFrequency + return 1.0 / self.globalFrequency def getUpper(self,flrg): if flrg.strLHS() in self.flrgs: tmp = self.flrgs[ flrg.strLHS() ] ret = sum(np.array([ tmp.getProbability(s) * self.setsDict[s].upper for s in tmp.RHS])) else: - ret = flrg.LHS[-1].upper + #print("hit" + flrg.strLHS()) + #ret = flrg.LHS[-1].upper + ret = sum(np.array([ 0.33 * s.upper for s in flrg.LHS])) return ret def getLower(self,flrg): @@ -73,7 +77,9 @@ class ProbabilisticIntervalFTS(ifts.IntervalFTS): tmp = self.flrgs[ flrg.strLHS() ] ret = sum(np.array([ tmp.getProbability(s) * self.setsDict[s].lower for s in tmp.RHS])) else: - ret = flrg.LHS[-1].lower + #print("hit" + flrg.strLHS()) + #ret = flrg.LHS[-1].lower + ret = sum(np.array([ 0.33 * s.lower for s in flrg.LHS])) return ret def forecast(self,data): @@ -88,6 +94,8 @@ class ProbabilisticIntervalFTS(ifts.IntervalFTS): for k in np.arange(self.order-1,l): + #print(k) + affected_flrgs = [] affected_flrgs_memberships = [] norms = [] @@ -107,15 +115,18 @@ class ProbabilisticIntervalFTS(ifts.IntervalFTS): idx = np.ravel(tmp) #flatten the array if idx.size == 0: # the element is out of the bounds of the Universe of Discourse - if instance <= self.sets[0].lower: + #print("high order - idx.size == 0 - " + str(instance)) + if math.ceil(instance) <= self.sets[0].lower: idx = [0] - if instance >= self.sets[-1].upper: + elif math.ceil(instance) >= self.sets[-1].upper: idx = [len(self.sets)-1] - + #print(idx) + else: + raise Exception( instance ) + #print(idx) lags[count] = idx count = count + 1 - # Build the tree with all possible paths root = tree.FLRGTreeNode(None) @@ -129,26 +140,43 @@ class ProbabilisticIntervalFTS(ifts.IntervalFTS): flrg = hofts.HighOrderFLRG(self.order) for kk in path: flrg.appendLHS(self.sets[ kk ]) + assert len(flrg.LHS) == subset.size, str(subset) + " -> " + str([s.name for s in flrg.LHS]) + ## affected_flrgs.append( flrg ) # Find the general membership of FLRG affected_flrgs_memberships.append(min(self.getSequenceMembership(subset, flrg.LHS))) + #print(self.getSequenceMembership(subset, flrg.LHS)) else: mv = common.fuzzyInstance(ndata[k],self.sets) # get all membership values tmp = np.argwhere( mv ) # get the indices of values > 0 idx = np.ravel(tmp) # flatten the array + + if idx.size == 0: # the element is out of the bounds of the Universe of Discourse + #print("idx.size == 0") + if math.ceil(ndata[k]) <= self.sets[0].lower: + idx = [0] + elif math.ceil(ndata[k]) >= self.sets[-1].upper: + idx = [len(self.sets)-1] + #print(idx) + else: + raise Exception( ndata[k] ) + #print(idx) for kk in idx: flrg = hofts.HighOrderFLRG(self.order) flrg.appendLHS(self.sets[ kk ]) affected_flrgs.append( flrg ) + #print(mv[kk]) affected_flrgs_memberships.append(mv[kk]) count = 0 for flrg in affected_flrgs: # achar o os bounds de cada FLRG, ponderados pela probabilidade e pertinĂȘncia norm = self.getProbability(flrg) * affected_flrgs_memberships[count] + if norm == 0: + norm = self.getProbability(flrg) # * 0.001 up.append( norm * self.getUpper(flrg) ) lo.append( norm * self.getLower(flrg) ) norms.append(norm) @@ -158,6 +186,7 @@ class ProbabilisticIntervalFTS(ifts.IntervalFTS): norm = sum(norms) if norm == 0: ret.append( [ 0, 0 ] ) + print("disparou") else: ret.append( [ sum(lo)/norm, sum(up)/norm ] ) @@ -165,12 +194,16 @@ class ProbabilisticIntervalFTS(ifts.IntervalFTS): def forecastAhead(self,data,steps): ret = [[data[k],data[k]] for k in np.arange(len(data)-self.order,len(data))] - for k in np.arange(self.order,steps): + #print(ret) + for k in np.arange(self.order-1,steps): + if ret[-1][0] <= self.sets[0].lower and ret[-1][1] >= self.sets[-1].upper: ret.append(ret[-1]) + #print("disparou") else: lower = self.forecast( [ret[x][0] for x in np.arange(k-self.order,k)] ) upper = self.forecast( [ret[x][1] for x in np.arange(k-self.order,k)] ) + ret.append([np.min(lower),np.max(upper)]) return ret @@ -187,6 +220,53 @@ class ProbabilisticIntervalFTS(ifts.IntervalFTS): if sbin >= interval[0] and (sbin + resolution) <= interval[1]: grid[sbin] = grid[sbin] + 1 return grid + + def forecastDistributionAhead2(self,data,steps,resolution): + + ret = [] + + intervals = self.forecastAhead(data,steps) + + for k in np.arange(self.order,steps): + + grid = self.getGridClean(resolution) + grid = self.gridCount(grid,resolution, intervals[k]) + + lags = {} + + cc = 0 + for x in np.arange(k-self.order,k): + tmp = [] + for qt in np.arange(0,100,5): + tmp.append(intervals[x][0] + qt*(intervals[x][1]-intervals[x][0])/100) + tmp.append(intervals[x][1] - qt*(intervals[x][1]-intervals[x][0])/100) + tmp.append(intervals[x][0] + (intervals[x][1]-intervals[x][0])/2) + + lags[cc] = tmp + + cc = cc + 1 + # Build the tree with all possible paths + + root = tree.FLRGTreeNode(None) + + self.buildTree(root,lags,0) + + # Trace the possible paths and build the PFLRG's + + for p in root.paths(): + path = list(reversed(list(filter(None.__ne__, p)))) + + subset = [kk for kk in path] + + qtle = self.forecast(subset) + grid = self.gridCount(grid,resolution, np.ravel(qtle)) + + tmp = np.array([ grid[k] for k in sorted(grid) ]) + ret.append( tmp/sum(tmp) ) + + grid = self.getGridClean(resolution) + df = pd.DataFrame(ret, columns=sorted(grid)) + return df def forecastDistributionAhead(self,data,steps,resolution):