From 0cf938c2a6b4dd50a177af6a87e97f265c3dac74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido=20de=20Lima=20e=20Silva?= Date: Sun, 29 Jan 2017 21:59:50 -0200 Subject: [PATCH] Cascaded transformations in all fts models --- benchmarks/benchmarks.py | 63 ++++++++++++---------- common/Transformations.py | 23 +++++++- common/Util.py | 8 ++- fts.py | 16 +++++- hofts.py | 2 +- ifts.py | 8 ++- partitioners/Grid.py | 18 ++++--- pfts.py | 111 +++++++++++++++++++++++++------------- tests/pfts.py | 23 ++++---- 9 files changed, 178 insertions(+), 94 deletions(-) diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py index 2f163e7..36c528f 100644 --- a/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks.py @@ -108,7 +108,7 @@ def allIntervalForecasters(data_train, data_test, partitions, max_order=3,save=F lcolors = [] - for count, model in enumerate(models, start=0): + for count, model in Util.enumerate2(models, start=0, step=2): mfts = model("") if not mfts.isHighOrder: if transformation is not None: @@ -126,21 +126,20 @@ def allIntervalForecasters(data_train, data_test, partitions, max_order=3,save=F objs.append(mfts) lcolors.append(colors[count % ncol]) - print(getIntervalStatistics(data_test, objs)) plotComparedSeries(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam, intervals=True) def getIntervalStatistics(original, models): - ret = "Model & Order & Sharpness & Resolution & Coverage \\ \n" + ret = "Model & Order & Sharpness & Resolution & Coverage \\\\ \n" for fts in models: forecasts = fts.forecastInterval(original) ret += fts.shortname + " & " ret += str(fts.order) + " & " ret += str(round(Measures.sharpness(forecasts), 2)) + " & " ret += str(round(Measures.resolution(forecasts), 2)) + " & " - ret += str(round(Measures.coverage(original[fts.order:], forecasts[:-1]), 2)) + " \\ \n" + ret += str(round(Measures.coverage(original[fts.order:], forecasts[:-1]), 2)) + " \\\\ \n" return ret @@ -213,11 +212,10 @@ def plotComparedIntervalsAhead(original, models, colors, distributions, time_fro mi = [] ma = [] - count = 0 - for fts in models: + for count, fts in enumerate(models, start=0): if fts.hasDistributionForecasting and distributions[count]: - density = fts.forecastAheadDistribution(original[time_from - fts.order:time_from], time_to, resolution, - parameters=None) + density = fts.forecastAheadDistribution(original[time_from - fts.order:time_from], + time_to, resolution, parameters=True) y = density.columns t = len(y) @@ -258,12 +256,22 @@ def plotComparedIntervalsAhead(original, models, colors, distributions, time_fro forecasts.insert(0, None) ax.plot(forecasts, color=colors[count], label=fts.shortname) - count = count + 1 ax.plot(original, color='black', label="Original") handles0, labels0 = ax.get_legend_handles_labels() ax.legend(handles0, labels0, loc=2) # ax.set_title(fts.name) - ax.set_ylim([min(mi), max(ma)]) + _mi = min(mi) + if _mi < 0: + _mi *= 1.1 + else: + _mi *= 0.9 + _ma = max(ma) + if _ma < 0: + _ma *= 0.9 + else: + _ma *= 1.1 + + ax.set_ylim([_mi, _ma]) ax.set_ylabel('F(T)') ax.set_xlabel('T') ax.set_xlim([0, len(original)]) @@ -552,8 +560,8 @@ def compareModelsTable(original, models_fo, models_ho): return sup + header + body + "\\end{tabular}" -def simpleSearch_RMSE(train, test, model, partitions, orders, save=False, file=None, tam=[10, 15], plotforecasts=False, - elev=30, azim=144): +def simpleSearch_RMSE(train, test, model, partitions, orders, save=False, file=None, tam=[10, 15], + plotforecasts=False, elev=30, azim=144, intervals=False): ret = [] errors = np.array([[0 for k in range(len(partitions))] for kk in range(len(orders))]) forecasted_best = [] @@ -568,29 +576,28 @@ def simpleSearch_RMSE(train, test, model, partitions, orders, save=False, file=N ax0.set_xlabel('T') min_rmse = 1000000.0 best = None - pc = 0 - for p in partitions: - oc = 0 - for o in orders: - sets = Grid.GridPartitionerTrimf(train, p) + + for pc, p in enumerate(partitions, start=0): + + sets = Grid.GridPartitionerTrimf(train, p) + for oc, o in enumerate(orders, start=0): fts = model("q = " + str(p) + " n = " + str(o)) fts.train(train, sets, o) - forecasted = fts.forecast(test) - error = Measures.rmse(np.array(test[o:]), np.array(forecasted[:-1])) - mape = Measures.mape(np.array(test[o:]), np.array(forecasted[:-1])) - # print(train[o:]) - # print(forecasted[-1]) - for kk in range(o): - forecasted.insert(0, None) - if plotforecasts: ax0.plot(forecasted, label=fts.name) - # print(o, p, mape) + if not intervals: + forecasted = fts.forecast(test) + error = Measures.rmse(np.array(test[o:]), np.array(forecasted[:-1])) + for kk in range(o): + forecasted.insert(0, None) + if plotforecasts: ax0.plot(forecasted, label=fts.name) + else: + forecasted = fts.forecastInterval(test) + error = 1.0 - Measures.rmse_interval(np.array(test[o:]), np.array(forecasted[:-1])) errors[oc, pc] = error if error < min_rmse: min_rmse = error best = fts forecasted_best = forecasted - oc += 1 - pc += 1 + # print(min_rmse) if plotforecasts: # handles0, labels0 = ax0.get_legend_handles_labels() diff --git a/common/Transformations.py b/common/Transformations.py index 9bc1a89..8bbcf66 100644 --- a/common/Transformations.py +++ b/common/Transformations.py @@ -30,15 +30,34 @@ class Differential(Transformation): def apply(self, data, param=None): if param is not None: self.lag = param + + if not isinstance(data, (list, np.ndarray, np.generic)): + data = [data] + + if isinstance(data, (np.ndarray, np.generic)): + data = data.tolist() + n = len(data) diff = [data[t - self.lag] - data[t] for t in np.arange(self.lag, n)] for t in np.arange(0, self.lag): diff.insert(0, 0) return diff def inverse(self,data, param): + + if isinstance(data, (np.ndarray, np.generic)): + data = data.tolist() + + if not isinstance(data, list): + data = [data] + n = len(data) - inc = [data[t] + param[t] for t in np.arange(1, n)] - return inc + + inc = [data[t] + param[t] for t in np.arange(0, n)] + + if n == 1: + return inc[0] + else: + return inc def boxcox(original, plambda): diff --git a/common/Util.py b/common/Util.py index 0cc0fa0..8291b17 100644 --- a/common/Util.py +++ b/common/Util.py @@ -20,4 +20,10 @@ def showAndSaveImage(fig,file,flag,lgd=None): fig.savefig(uniquefilename(file), additional_artists=lgd,bbox_inches='tight') #bbox_extra_artists=(lgd,), ) else: fig.savefig(uniquefilename(file)) - plt.close(fig) \ No newline at end of file + plt.close(fig) + + +def enumerate2(xs, start=0, step=1): + for x in xs: + yield (start, x) + start += step \ No newline at end of file diff --git a/fts.py b/fts.py index 4462a74..2aa282a 100644 --- a/fts.py +++ b/fts.py @@ -19,6 +19,8 @@ class FTS(object): self.dump = False self.transformations = [] self.transformations_param = [] + self.original_max = 0 + self.original_min = 0 def fuzzy(self, data): best = {"fuzzyset": "", "membership": 0.0} @@ -59,8 +61,20 @@ class FTS(object): def appendTransformation(self, transformation): self.transformations.append(transformation) - def doTransformations(self,data,params=None): + def doTransformations(self,data,params=None,updateUoD=False): ndata = data + + if updateUoD: + if min(data) < 0: + self.original_min = min(data) * 1.1 + else: + self.original_min = min(data) * 0.9 + + if max(data) > 0: + self.original_max = max(data) * 1.1 + else: + self.original_max = max(data) * 0.9 + if len(self.transformations) > 0: if params is None: params = [ None for k in self.transformations] diff --git a/hofts.py b/hofts.py index 1ff9bab..30064e3 100644 --- a/hofts.py +++ b/hofts.py @@ -62,7 +62,7 @@ class HighOrderFTS(fts.FTS): def train(self, data, sets, order=1,parameters=None): - data = self.doTransformations(data) + data = self.doTransformations(data, updateUoD=True) self.order = order self.sets = sets diff --git a/ifts.py b/ifts.py index 1506f22..cfa5a1c 100644 --- a/ifts.py +++ b/ifts.py @@ -49,9 +49,7 @@ class IntervalFTS(hofts.HighOrderFTS): def forecastInterval(self, data): - data = np.array(data) - - ndata = self.doTransformations(data) + ndata = np.array(self.doTransformations(data)) l = len(ndata) @@ -115,8 +113,8 @@ class IntervalFTS(hofts.HighOrderFTS): # gerar o intervalo norm = sum(affected_flrgs_memberships) - lo_ = self.doInverseTransformations(sum(lo) / norm, param=[data[k - (self.order - 1): k + 1]]) - up_ = self.doInverseTransformations(sum(up) / norm, param=[data[k - (self.order - 1): k + 1]]) + lo_ = self.doInverseTransformations(sum(lo) / norm, params=[data[k - (self.order - 1): k + 1]]) + up_ = self.doInverseTransformations(sum(up) / norm, params=[data[k - (self.order - 1): k + 1]]) ret.append([lo_, up_]) return ret diff --git a/partitioners/Grid.py b/partitioners/Grid.py index 70361b9..5adebec 100644 --- a/partitioners/Grid.py +++ b/partitioners/Grid.py @@ -9,20 +9,24 @@ from pyFTS.common import FuzzySet, Membership def GridPartitionerTrimf(data, npart, names=None, prefix="A"): sets = [] - dmax = max(data) - dmax += dmax * 0.1 - dmin = min(data) - dmin -= dmin * 0.1 + if min(data) < 0: + dmin = min(data) * 1.1 + else: + dmin = min(data) * 0.9 + + if max(data) > 0: + dmax = max(data) * 1.1 + else: + dmax = max(data) * 0.9 + dlen = dmax - dmin partlen = math.ceil(dlen / npart) - #p2 = partlen / 2 - #partition = dmin #+ partlen + count = 0 for c in np.arange(dmin, dmax, partlen): sets.append( FuzzySet.FuzzySet(prefix + str(count), Membership.trimf, [c - partlen, c, c + partlen],c)) count += 1 - #partition += partlen return sets diff --git a/pfts.py b/pfts.py index 80a7646..96e771b 100644 --- a/pfts.py +++ b/pfts.py @@ -214,9 +214,7 @@ class ProbabilisticFTS(ifts.IntervalFTS): def forecastInterval(self, data): - data = np.array(data) - - ndata = self.doTransformations(data) + ndata = np.array(self.doTransformations(data)) l = len(ndata) @@ -349,7 +347,15 @@ class ProbabilisticFTS(ifts.IntervalFTS): def getGridClean(self, resolution): grid = {} - for sbin in np.arange(self.sets[0].lower, self.sets[-1].upper, resolution): + + if len(self.transformations) == 0: + _min = self.sets[0].lower + _max = self.sets[-1].upper + else: + _min = self.original_min + _max = self.original_max + + for sbin in np.arange(_min,_max, resolution): grid[sbin] = 0 return grid @@ -378,7 +384,7 @@ class ProbabilisticFTS(ifts.IntervalFTS): for child in node.getChildren(): self.buildTreeWithoutOrder(child, lags, level + 1) - def forecastAheadDistribution(self, data, steps, resolution,parameters=None): + def forecastAheadDistribution(self, data, steps, resolution, parameters=None): ret = [] @@ -388,58 +394,89 @@ class ProbabilisticFTS(ifts.IntervalFTS): index = SortedCollection.SortedCollection(iterable=grid.keys()) - grids = [] - for k in np.arange(0, steps): - grids.append(self.getGridClean(resolution)) + if parameters is None: - for k in np.arange(self.order, steps + self.order): + grids = [] + for k in np.arange(0, steps): + grids.append(self.getGridClean(resolution)) - lags = {} + for k in np.arange(self.order, steps + self.order): - cc = 0 + lags = {} - for i in intervals[k - self.order : k]: + cc = 0 - quantiles = [] + for i in intervals[k - self.order : k]: - for qt in np.arange(0, 50, 2): - quantiles.append(i[0] + qt * ((i[1] - i[0]) / 100)) - quantiles.append(i[1] - qt * ((i[1] - i[0]) / 100)) - quantiles.append(i[0] + ((i[1] - i[0]) / 2)) + quantiles = [] - quantiles = list(set(quantiles)) + for qt in np.arange(0, 50, 2): + quantiles.append(i[0] + qt * ((i[1] - i[0]) / 100)) + quantiles.append(i[1] - qt * ((i[1] - i[0]) / 100)) + quantiles.append(i[0] + ((i[1] - i[0]) / 2)) - quantiles.sort() + quantiles = list(set(quantiles)) - lags[cc] = quantiles + quantiles.sort() - cc += 1 + lags[cc] = quantiles - # Build the tree with all possible paths + cc += 1 - root = tree.FLRGTreeNode(None) + # Build the tree with all possible paths - self.buildTreeWithoutOrder(root, lags, 0) + root = tree.FLRGTreeNode(None) - # Trace the possible paths + self.buildTreeWithoutOrder(root, lags, 0) - for p in root.paths(): - path = list(reversed(list(filter(None.__ne__, p)))) + # Trace the possible paths + + for p in root.paths(): + path = list(reversed(list(filter(None.__ne__, p)))) - if parameters is None: qtle = self.forecastInterval(path) + grids[k - self.order] = self.gridCount(grids[k - self.order], resolution, index, np.ravel(qtle)) - else: - qtle = self.forecast(path) - grids[k - self.order] = self.gridCountPoint(grids[k - self.order], resolution, index, np.ravel(qtle)) - for k in np.arange(0, steps): - tmp = np.array([grids[k][q] for q in sorted(grids[k])]) - ret.append(tmp / sum(tmp)) + for k in np.arange(0, steps): + tmp = np.array([grids[k][q] for q in sorted(grids[k])]) + ret.append(tmp / sum(tmp)) - grid = self.getGridClean(resolution) - df = pd.DataFrame(ret, columns=sorted(grid)) - return df + grid = self.getGridClean(resolution) + df = pd.DataFrame(ret, columns=sorted(grid)) + return df + else: + + print("novo") + + ret = [] + + for k in np.arange(self.order, steps + self.order): + + grid = self.getGridClean(resolution) + grid = self.gridCount(grid, resolution, index, intervals[k]) + + for qt in np.arange(0, 50, 1): + # print(qt) + qtle_lower = self.forecastInterval( + [intervals[x][0] + qt * ((intervals[x][1] - intervals[x][0]) / 100) for x in + np.arange(k - self.order, k)]) + grid = self.gridCount(grid, resolution, index, np.ravel(qtle_lower)) + qtle_upper = self.forecastInterval( + [intervals[x][1] - qt * ((intervals[x][1] - intervals[x][0]) / 100) for x in + np.arange(k - self.order, k)]) + grid = self.gridCount(grid, resolution, index, np.ravel(qtle_upper)) + qtle_mid = self.forecastInterval( + [intervals[x][0] + (intervals[x][1] - intervals[x][0]) / 2 for x in np.arange(k - self.order, k)]) + grid = self.gridCount(grid, resolution, index, np.ravel(qtle_mid)) + + tmp = np.array([grid[k] for k in sorted(grid)]) + + ret.append(tmp / sum(tmp)) + + grid = self.getGridClean(resolution) + df = pd.DataFrame(ret, columns=sorted(grid)) + return df def __str__(self): diff --git a/tests/pfts.py b/tests/pfts.py index 69c50d7..ce6bd15 100644 --- a/tests/pfts.py +++ b/tests/pfts.py @@ -10,27 +10,26 @@ from mpl_toolkits.mplot3d import Axes3D import pandas as pd from pyFTS.partitioners import Grid -from pyFTS.common import FLR,FuzzySet,Membership +from pyFTS.common import FLR,FuzzySet,Membership,Transformations from pyFTS import fts,hofts,ifts,pfts,tree, chen from pyFTS.benchmarks import benchmarks as bchmk os.chdir("/home/petronio/dados/Dropbox/Doutorado/Disciplinas/AdvancedFuzzyTimeSeriesModels/") -#enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";") -#enrollments = np.array(enrollments["Enrollments"]) +enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";") +enrollments = np.array(enrollments["Enrollments"]) -#enrollments_fs1 = Grid.GridPartitionerTrimf(enrollments,6) +#diff = Transformations.Differential(1) + +fs = Grid.GridPartitionerTrimf(enrollments,6) #tmp = chen.ConventionalFTS("") -pfts1_enrollments = pfts.ProbabilisticFTS("1") -#pfts1_enrollments.train(enrollments,enrollments_fs1,1) -#pfts1_enrollments.shortname = "1st Order" -#pfts2_enrollments = pfts.ProbabilisticFTS("2") -#pfts2_enrollments.dump = False -#pfts2_enrollments.shortname = "2nd Order" -#pfts2_enrollments.train(enrollments,enrollments_fs1,2) +pfts1 = pfts.ProbabilisticFTS("1") +#pfts1.appendTransformation(diff) +pfts1.train(enrollments,fs,1) +#bchmk.plotComparedIntervalsAhead(enrollments,[pfts1], ["blue"],[True],5,10) -#pfts1_enrollments.forecastAheadDistribution2(enrollments[:15],5,100) +pfts1.forecastAheadDistribution(enrollments,5,1, parameters=True)