diff --git a/benchmarks/Measures.py b/benchmarks/Measures.py index cff52d0..5d6f4e4 100644 --- a/benchmarks/Measures.py +++ b/benchmarks/Measures.py @@ -109,3 +109,38 @@ def coverage(targets, forecasts): preds.append(0) return np.mean(preds) + +def pmf_to_cdf(density): + ret = [] + for row in density.index: + tmp = [] + prev = 0 + for col in density.columns: + prev += density[col][row] + tmp.append( prev ) + ret.append(tmp) + df = pd.DataFrame(ret, columns=density.columns) + return df + + +def heavyside_cdf(bins, targets): + ret = [] + for t in targets: + result = [1 if b >= t else 0 for b in bins] + ret.append(result) + df = pd.DataFrame(ret, columns=bins) + return df + + +# Continuous Ranked Probability Score +def crps(targets, densities): + l = len(densities.columns) + n = len(densities.index) + Ff = pmf_to_cdf(densities) + Fa = heavyside_cdf(densities.columns, targets) + + _crps = float(0.0) + for k in densities.index: + _crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns]) + + return _crps / float(l * n) diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py index 36c528f..da37924 100644 --- a/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks.py @@ -5,6 +5,7 @@ import numpy as np import pandas as pd import matplotlib as plt import matplotlib.colors as pltcolors +import matplotlib.cm as cmx import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D # from sklearn.cross_validation import KFold @@ -201,12 +202,71 @@ def plotComparedSeries(original, models, colors, typeonlegend=False, save=False, Util.showAndSaveImage(fig, file, save, lgd=legends) +def allAheadForecasters(data_train, data_test, partitions, start, steps, resolution = None, max_order=3,save=False, file=None, tam=[20, 5], + models=None, transformation=None, option=2): + if models is None: + models = [pfts.ProbabilisticFTS] + + if resolution is None: resolution = (max(data_train) - min(data_train)) / 100 + + objs = [] + + if transformation is not None: + data_train_fs = Grid.GridPartitionerTrimf(transformation.apply(data_train),partitions) + else: + data_train_fs = Grid.GridPartitionerTrimf(data_train, partitions) + + lcolors = [] + + for count, model in Util.enumerate2(models, start=0, step=2): + mfts = model("") + if not mfts.isHighOrder: + if transformation is not None: + mfts.appendTransformation(transformation) + mfts.train(data_train, data_train_fs) + objs.append(mfts) + lcolors.append( colors[count % ncol] ) + else: + for order in np.arange(1,max_order+1): + if order >= mfts.minOrder: + mfts = model(" n = " + str(order)) + if transformation is not None: + mfts.appendTransformation(transformation) + mfts.train(data_train, data_train_fs, order=order) + objs.append(mfts) + lcolors.append(colors[count % ncol]) + + distributions = [False for k in objs] + + distributions[0] = True + + print(getDistributionStatistics(data_test[start:], objs, steps, resolution)) + + #plotComparedIntervalsAhead(data_test, objs, lcolors, distributions=, save=save, file=file, tam=tam, intervals=True) + + +def getDistributionStatistics(original, models, steps, resolution): + ret = "Model & Order & Interval & Distribution \\\\ \n" + for fts in models: + densities1 = fts.forecastAheadDistribution(original,steps,resolution, parameters=3) + densities2 = fts.forecastAheadDistribution(original, steps, resolution, parameters=2) + ret += fts.shortname + " & " + ret += str(fts.order) + " & " + ret += str(round(Measures.crps(original, densities1), 3)) + " & " + ret += str(round(Measures.crps(original, densities2), 3)) + " \\\\ \n" + return ret + def plotComparedIntervalsAhead(original, models, colors, distributions, time_from, time_to, - interpol=False, save=False, file=None, tam=[20, 5], resolution=None): + interpol=False, save=False, file=None, tam=[20, 5], resolution=None, + cmap='Blues',option=2): fig = plt.figure(figsize=tam) ax = fig.add_subplot(111) + cm = plt.get_cmap(cmap) + cNorm = pltcolors.Normalize(vmin=0, vmax=1) + scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=cm) + if resolution is None: resolution = (max(original) - min(original)) / 100 mi = [] @@ -215,26 +275,44 @@ def plotComparedIntervalsAhead(original, models, colors, distributions, time_fro for count, fts in enumerate(models, start=0): if fts.hasDistributionForecasting and distributions[count]: density = fts.forecastAheadDistribution(original[time_from - fts.order:time_from], - time_to, resolution, parameters=True) + time_to, resolution, parameters=option) + Y = [] + X = [] + C = [] + S = [] y = density.columns t = len(y) + ss = time_to ** 2 + for k in density.index: - alpha = np.array([density[q][k] for q in density]) * 100 + #alpha = [scalarMap.to_rgba(density[col][k]) for col in density.columns] + col = [density[col][k]*5 for col in density.columns] x = [time_from + k for x in np.arange(0, t)] - for cc in np.arange(0, resolution, 5): - ax.scatter(x, y + cc, c=alpha, marker='s', linewidths=0, cmap='Oranges', edgecolors=None) - if interpol and k < max(density.index): - diffs = [(density[q][k + 1] - density[q][k]) / 50 for q in density] - for p in np.arange(0, 50): - xx = [time_from + k + 0.02 * p for q in np.arange(0, t)] - alpha2 = np.array( - [density[density.columns[q]][k] + diffs[q] * p for q in np.arange(0, t)]) * 100 - ax.scatter(xx, y, c=alpha2, marker='s', linewidths=0, cmap='Oranges', - norm=pltcolors.Normalize(vmin=0, vmax=1), vmin=0, vmax=1, edgecolors=None) + s = [ss for x in np.arange(0, t)] + + ic = resolution/10 + + for cc in np.arange(0, resolution, ic): + Y.append(y + cc) + X.append(x) + C.append(col) + S.append(s) + + Y = np.hstack(Y) + X = np.hstack(X) + C = np.hstack(C) + S = np.hstack(S) + + s = ax.scatter(X, Y, c=C, marker='s',s=S, linewidths=0, edgecolors=None, cmap=cmap) + s.set_clim([0, 1]) + cb = fig.colorbar(s) + + cb.set_label('Density') + if fts.hasIntervalForecasting: forecasts = fts.forecastAheadInterval(original[time_from - fts.order:time_from], time_to) @@ -276,6 +354,8 @@ def plotComparedIntervalsAhead(original, models, colors, distributions, time_fro ax.set_xlabel('T') ax.set_xlim([0, len(original)]) + #plt.colorbar() + Util.showAndSaveImage(fig, file, save) diff --git a/fts.py b/fts.py index 2aa282a..c0db302 100644 --- a/fts.py +++ b/fts.py @@ -63,7 +63,6 @@ class FTS(object): def doTransformations(self,data,params=None,updateUoD=False): ndata = data - if updateUoD: if min(data) < 0: self.original_min = min(data) * 1.1 diff --git a/partitioners/Grid.py b/partitioners/Grid.py index 5adebec..2b8ceb8 100644 --- a/partitioners/Grid.py +++ b/partitioners/Grid.py @@ -9,18 +9,20 @@ from pyFTS.common import FuzzySet, Membership def GridPartitionerTrimf(data, npart, names=None, prefix="A"): sets = [] - if min(data) < 0: - dmin = min(data) * 1.1 + _min = min(data) + if _min < 0: + dmin = _min * 1.1 else: - dmin = min(data) * 0.9 + dmin = _min * 0.9 - if max(data) > 0: - dmax = max(data) * 1.1 + _max = max(data) + if _max > 0: + dmax = _max * 1.1 else: - dmax = max(data) * 0.9 + dmax = _max * 0.9 dlen = dmax - dmin - partlen = math.ceil(dlen / npart) + partlen = dlen / npart count = 0 for c in np.arange(dmin, dmax, partlen): diff --git a/pfts.py b/pfts.py index 96e771b..0a91fbd 100644 --- a/pfts.py +++ b/pfts.py @@ -243,15 +243,15 @@ class ProbabilisticFTS(ifts.IntervalFTS): idx = np.ravel(tmp) # flatten the array if idx.size == 0: # the element is out of the bounds of the Universe of Discourse - if instance <= self.sets[0].lower: + if instance <= np.ceil(self.sets[0].lower): idx = [0] - elif instance >= self.sets[-1].upper: + elif instance >= np.floor(self.sets[-1].upper): idx = [len(self.sets) - 1] else: raise Exception(instance) lags[count] = idx - count = count + 1 + count += 1 # Build the tree with all possible paths @@ -331,11 +331,16 @@ class ProbabilisticFTS(ifts.IntervalFTS): return ret def forecastAheadInterval(self, data, steps): - ret = [[data[k], data[k]] for k in np.arange(len(data) - self.order, len(data))] + + l = len(data) + + ret = [[data[k], data[k]] for k in np.arange(l - self.order, l)] for k in np.arange(self.order, steps+self.order): - if ret[-1][0] <= self.sets[0].lower and ret[-1][1] >= self.sets[-1].upper: + if (len(self.transformations) > 0 and ret[-1][0] <= self.sets[0].lower and ret[-1][1] >= self.sets[ + -1].upper) or (len(self.transformations) == 0 and ret[-1][0] <= self.original_min and ret[-1][ + 1] >= self.original_max): ret.append(ret[-1]) else: lower = self.forecastInterval([ret[x][0] for x in np.arange(k - self.order, k)]) @@ -384,7 +389,7 @@ class ProbabilisticFTS(ifts.IntervalFTS): for child in node.getChildren(): self.buildTreeWithoutOrder(child, lags, level + 1) - def forecastAheadDistribution(self, data, steps, resolution, parameters=None): + def forecastAheadDistribution(self, data, steps, resolution, parameters=2): ret = [] @@ -394,7 +399,7 @@ class ProbabilisticFTS(ifts.IntervalFTS): index = SortedCollection.SortedCollection(iterable=grid.keys()) - if parameters is None: + if parameters == 1: grids = [] for k in np.arange(0, steps): @@ -442,12 +447,7 @@ class ProbabilisticFTS(ifts.IntervalFTS): tmp = np.array([grids[k][q] for q in sorted(grids[k])]) ret.append(tmp / sum(tmp)) - grid = self.getGridClean(resolution) - df = pd.DataFrame(ret, columns=sorted(grid)) - return df - else: - - print("novo") + elif parameters == 2: ret = [] @@ -474,9 +474,20 @@ class ProbabilisticFTS(ifts.IntervalFTS): ret.append(tmp / sum(tmp)) - grid = self.getGridClean(resolution) - df = pd.DataFrame(ret, columns=sorted(grid)) - return df + else: + ret = [] + + for k in np.arange(self.order, steps + self.order): + grid = self.getGridClean(resolution) + grid = self.gridCount(grid, resolution, index, intervals[k]) + + tmp = np.array([grid[k] for k in sorted(grid)]) + + ret.append(tmp / sum(tmp)) + + grid = self.getGridClean(resolution) + df = pd.DataFrame(ret, columns=sorted(grid)) + return df def __str__(self): diff --git a/tests/pfts.py b/tests/pfts.py index ce6bd15..03c2f22 100644 --- a/tests/pfts.py +++ b/tests/pfts.py @@ -13,23 +13,46 @@ from pyFTS.partitioners import Grid from pyFTS.common import FLR,FuzzySet,Membership,Transformations from pyFTS import fts,hofts,ifts,pfts,tree, chen from pyFTS.benchmarks import benchmarks as bchmk +from pyFTS.benchmarks import Measures +from numpy import random + +gauss_treino = random.normal(0,1.0,1600) +gauss_teste = random.normal(0,1.0,400) os.chdir("/home/petronio/dados/Dropbox/Doutorado/Disciplinas/AdvancedFuzzyTimeSeriesModels/") -enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";") -enrollments = np.array(enrollments["Enrollments"]) +#enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";") +#enrollments = np.array(enrollments["Enrollments"]) -#diff = Transformations.Differential(1) +#taiex = pd.read_csv("DataSets/TAIEX.csv", sep=",") +#taiex_treino = np.array(taiex["avg"][2500:3900]) +#taiex_teste = np.array(taiex["avg"][3901:4500]) -fs = Grid.GridPartitionerTrimf(enrollments,6) +#nasdaq = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",") +#nasdaq_treino = np.array(nasdaq["avg"][0:1600]) +#nasdaq_teste = np.array(nasdaq["avg"][1601:2000]) + +diff = Transformations.Differential(1) + +fs = Grid.GridPartitionerTrimf(gauss_treino,7) #tmp = chen.ConventionalFTS("") pfts1 = pfts.ProbabilisticFTS("1") #pfts1.appendTransformation(diff) -pfts1.train(enrollments,fs,1) +pfts1.train(gauss_treino,fs,1) +pfts2 = pfts.ProbabilisticFTS("n = 2") +#pfts2.appendTransformation(diff) +pfts2.train(gauss_treino,fs,2) + +pfts3 = pfts.ProbabilisticFTS("n = 3") +#pfts3.appendTransformation(diff) +pfts3.train(gauss_treino,fs,3) + +densities1 = pfts1.forecastAheadDistribution(gauss_teste[:50],2,1.50, parameters=2) + +#print(bchmk.getDistributionStatistics(gauss_teste[:50], [pfts1,pfts2,pfts3], 20, 1.50)) + -#bchmk.plotComparedIntervalsAhead(enrollments,[pfts1], ["blue"],[True],5,10) -pfts1.forecastAheadDistribution(enrollments,5,1, parameters=True)