Several bugfixes and optimizations

This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-02-01 14:54:37 -02:00
parent 0cf938c2a6
commit 71c71ca9d9
6 changed files with 194 additions and 44 deletions

View File

@ -109,3 +109,38 @@ def coverage(targets, forecasts):
preds.append(0) preds.append(0)
return np.mean(preds) return np.mean(preds)
def pmf_to_cdf(density):
ret = []
for row in density.index:
tmp = []
prev = 0
for col in density.columns:
prev += density[col][row]
tmp.append( prev )
ret.append(tmp)
df = pd.DataFrame(ret, columns=density.columns)
return df
def heavyside_cdf(bins, targets):
ret = []
for t in targets:
result = [1 if b >= t else 0 for b in bins]
ret.append(result)
df = pd.DataFrame(ret, columns=bins)
return df
# Continuous Ranked Probability Score
def crps(targets, densities):
l = len(densities.columns)
n = len(densities.index)
Ff = pmf_to_cdf(densities)
Fa = heavyside_cdf(densities.columns, targets)
_crps = float(0.0)
for k in densities.index:
_crps += sum([ (Ff[col][k]-Fa[col][k])**2 for col in densities.columns])
return _crps / float(l * n)

View File

@ -5,6 +5,7 @@ import numpy as np
import pandas as pd import pandas as pd
import matplotlib as plt import matplotlib as plt
import matplotlib.colors as pltcolors import matplotlib.colors as pltcolors
import matplotlib.cm as cmx
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D from mpl_toolkits.mplot3d import Axes3D
# from sklearn.cross_validation import KFold # from sklearn.cross_validation import KFold
@ -201,12 +202,71 @@ def plotComparedSeries(original, models, colors, typeonlegend=False, save=False,
Util.showAndSaveImage(fig, file, save, lgd=legends) Util.showAndSaveImage(fig, file, save, lgd=legends)
def allAheadForecasters(data_train, data_test, partitions, start, steps, resolution = None, max_order=3,save=False, file=None, tam=[20, 5],
models=None, transformation=None, option=2):
if models is None:
models = [pfts.ProbabilisticFTS]
if resolution is None: resolution = (max(data_train) - min(data_train)) / 100
objs = []
if transformation is not None:
data_train_fs = Grid.GridPartitionerTrimf(transformation.apply(data_train),partitions)
else:
data_train_fs = Grid.GridPartitionerTrimf(data_train, partitions)
lcolors = []
for count, model in Util.enumerate2(models, start=0, step=2):
mfts = model("")
if not mfts.isHighOrder:
if transformation is not None:
mfts.appendTransformation(transformation)
mfts.train(data_train, data_train_fs)
objs.append(mfts)
lcolors.append( colors[count % ncol] )
else:
for order in np.arange(1,max_order+1):
if order >= mfts.minOrder:
mfts = model(" n = " + str(order))
if transformation is not None:
mfts.appendTransformation(transformation)
mfts.train(data_train, data_train_fs, order=order)
objs.append(mfts)
lcolors.append(colors[count % ncol])
distributions = [False for k in objs]
distributions[0] = True
print(getDistributionStatistics(data_test[start:], objs, steps, resolution))
#plotComparedIntervalsAhead(data_test, objs, lcolors, distributions=, save=save, file=file, tam=tam, intervals=True)
def getDistributionStatistics(original, models, steps, resolution):
ret = "Model & Order & Interval & Distribution \\\\ \n"
for fts in models:
densities1 = fts.forecastAheadDistribution(original,steps,resolution, parameters=3)
densities2 = fts.forecastAheadDistribution(original, steps, resolution, parameters=2)
ret += fts.shortname + " & "
ret += str(fts.order) + " & "
ret += str(round(Measures.crps(original, densities1), 3)) + " & "
ret += str(round(Measures.crps(original, densities2), 3)) + " \\\\ \n"
return ret
def plotComparedIntervalsAhead(original, models, colors, distributions, time_from, time_to, def plotComparedIntervalsAhead(original, models, colors, distributions, time_from, time_to,
interpol=False, save=False, file=None, tam=[20, 5], resolution=None): interpol=False, save=False, file=None, tam=[20, 5], resolution=None,
cmap='Blues',option=2):
fig = plt.figure(figsize=tam) fig = plt.figure(figsize=tam)
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
cm = plt.get_cmap(cmap)
cNorm = pltcolors.Normalize(vmin=0, vmax=1)
scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=cm)
if resolution is None: resolution = (max(original) - min(original)) / 100 if resolution is None: resolution = (max(original) - min(original)) / 100
mi = [] mi = []
@ -215,26 +275,44 @@ def plotComparedIntervalsAhead(original, models, colors, distributions, time_fro
for count, fts in enumerate(models, start=0): for count, fts in enumerate(models, start=0):
if fts.hasDistributionForecasting and distributions[count]: if fts.hasDistributionForecasting and distributions[count]:
density = fts.forecastAheadDistribution(original[time_from - fts.order:time_from], density = fts.forecastAheadDistribution(original[time_from - fts.order:time_from],
time_to, resolution, parameters=True) time_to, resolution, parameters=option)
Y = []
X = []
C = []
S = []
y = density.columns y = density.columns
t = len(y) t = len(y)
ss = time_to ** 2
for k in density.index: for k in density.index:
alpha = np.array([density[q][k] for q in density]) * 100 #alpha = [scalarMap.to_rgba(density[col][k]) for col in density.columns]
col = [density[col][k]*5 for col in density.columns]
x = [time_from + k for x in np.arange(0, t)] x = [time_from + k for x in np.arange(0, t)]
for cc in np.arange(0, resolution, 5): s = [ss for x in np.arange(0, t)]
ax.scatter(x, y + cc, c=alpha, marker='s', linewidths=0, cmap='Oranges', edgecolors=None)
if interpol and k < max(density.index): ic = resolution/10
diffs = [(density[q][k + 1] - density[q][k]) / 50 for q in density]
for p in np.arange(0, 50): for cc in np.arange(0, resolution, ic):
xx = [time_from + k + 0.02 * p for q in np.arange(0, t)] Y.append(y + cc)
alpha2 = np.array( X.append(x)
[density[density.columns[q]][k] + diffs[q] * p for q in np.arange(0, t)]) * 100 C.append(col)
ax.scatter(xx, y, c=alpha2, marker='s', linewidths=0, cmap='Oranges', S.append(s)
norm=pltcolors.Normalize(vmin=0, vmax=1), vmin=0, vmax=1, edgecolors=None)
Y = np.hstack(Y)
X = np.hstack(X)
C = np.hstack(C)
S = np.hstack(S)
s = ax.scatter(X, Y, c=C, marker='s',s=S, linewidths=0, edgecolors=None, cmap=cmap)
s.set_clim([0, 1])
cb = fig.colorbar(s)
cb.set_label('Density')
if fts.hasIntervalForecasting: if fts.hasIntervalForecasting:
forecasts = fts.forecastAheadInterval(original[time_from - fts.order:time_from], time_to) forecasts = fts.forecastAheadInterval(original[time_from - fts.order:time_from], time_to)
@ -276,6 +354,8 @@ def plotComparedIntervalsAhead(original, models, colors, distributions, time_fro
ax.set_xlabel('T') ax.set_xlabel('T')
ax.set_xlim([0, len(original)]) ax.set_xlim([0, len(original)])
#plt.colorbar()
Util.showAndSaveImage(fig, file, save) Util.showAndSaveImage(fig, file, save)

1
fts.py
View File

@ -63,7 +63,6 @@ class FTS(object):
def doTransformations(self,data,params=None,updateUoD=False): def doTransformations(self,data,params=None,updateUoD=False):
ndata = data ndata = data
if updateUoD: if updateUoD:
if min(data) < 0: if min(data) < 0:
self.original_min = min(data) * 1.1 self.original_min = min(data) * 1.1

View File

@ -9,18 +9,20 @@ from pyFTS.common import FuzzySet, Membership
def GridPartitionerTrimf(data, npart, names=None, prefix="A"): def GridPartitionerTrimf(data, npart, names=None, prefix="A"):
sets = [] sets = []
if min(data) < 0: _min = min(data)
dmin = min(data) * 1.1 if _min < 0:
dmin = _min * 1.1
else: else:
dmin = min(data) * 0.9 dmin = _min * 0.9
if max(data) > 0: _max = max(data)
dmax = max(data) * 1.1 if _max > 0:
dmax = _max * 1.1
else: else:
dmax = max(data) * 0.9 dmax = _max * 0.9
dlen = dmax - dmin dlen = dmax - dmin
partlen = math.ceil(dlen / npart) partlen = dlen / npart
count = 0 count = 0
for c in np.arange(dmin, dmax, partlen): for c in np.arange(dmin, dmax, partlen):

43
pfts.py
View File

@ -243,15 +243,15 @@ class ProbabilisticFTS(ifts.IntervalFTS):
idx = np.ravel(tmp) # flatten the array idx = np.ravel(tmp) # flatten the array
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
if instance <= self.sets[0].lower: if instance <= np.ceil(self.sets[0].lower):
idx = [0] idx = [0]
elif instance >= self.sets[-1].upper: elif instance >= np.floor(self.sets[-1].upper):
idx = [len(self.sets) - 1] idx = [len(self.sets) - 1]
else: else:
raise Exception(instance) raise Exception(instance)
lags[count] = idx lags[count] = idx
count = count + 1 count += 1
# Build the tree with all possible paths # Build the tree with all possible paths
@ -331,11 +331,16 @@ class ProbabilisticFTS(ifts.IntervalFTS):
return ret return ret
def forecastAheadInterval(self, data, steps): def forecastAheadInterval(self, data, steps):
ret = [[data[k], data[k]] for k in np.arange(len(data) - self.order, len(data))]
l = len(data)
ret = [[data[k], data[k]] for k in np.arange(l - self.order, l)]
for k in np.arange(self.order, steps+self.order): for k in np.arange(self.order, steps+self.order):
if ret[-1][0] <= self.sets[0].lower and ret[-1][1] >= self.sets[-1].upper: if (len(self.transformations) > 0 and ret[-1][0] <= self.sets[0].lower and ret[-1][1] >= self.sets[
-1].upper) or (len(self.transformations) == 0 and ret[-1][0] <= self.original_min and ret[-1][
1] >= self.original_max):
ret.append(ret[-1]) ret.append(ret[-1])
else: else:
lower = self.forecastInterval([ret[x][0] for x in np.arange(k - self.order, k)]) lower = self.forecastInterval([ret[x][0] for x in np.arange(k - self.order, k)])
@ -384,7 +389,7 @@ class ProbabilisticFTS(ifts.IntervalFTS):
for child in node.getChildren(): for child in node.getChildren():
self.buildTreeWithoutOrder(child, lags, level + 1) self.buildTreeWithoutOrder(child, lags, level + 1)
def forecastAheadDistribution(self, data, steps, resolution, parameters=None): def forecastAheadDistribution(self, data, steps, resolution, parameters=2):
ret = [] ret = []
@ -394,7 +399,7 @@ class ProbabilisticFTS(ifts.IntervalFTS):
index = SortedCollection.SortedCollection(iterable=grid.keys()) index = SortedCollection.SortedCollection(iterable=grid.keys())
if parameters is None: if parameters == 1:
grids = [] grids = []
for k in np.arange(0, steps): for k in np.arange(0, steps):
@ -442,12 +447,7 @@ class ProbabilisticFTS(ifts.IntervalFTS):
tmp = np.array([grids[k][q] for q in sorted(grids[k])]) tmp = np.array([grids[k][q] for q in sorted(grids[k])])
ret.append(tmp / sum(tmp)) ret.append(tmp / sum(tmp))
grid = self.getGridClean(resolution) elif parameters == 2:
df = pd.DataFrame(ret, columns=sorted(grid))
return df
else:
print("novo")
ret = [] ret = []
@ -474,9 +474,20 @@ class ProbabilisticFTS(ifts.IntervalFTS):
ret.append(tmp / sum(tmp)) ret.append(tmp / sum(tmp))
grid = self.getGridClean(resolution) else:
df = pd.DataFrame(ret, columns=sorted(grid)) ret = []
return df
for k in np.arange(self.order, steps + self.order):
grid = self.getGridClean(resolution)
grid = self.gridCount(grid, resolution, index, intervals[k])
tmp = np.array([grid[k] for k in sorted(grid)])
ret.append(tmp / sum(tmp))
grid = self.getGridClean(resolution)
df = pd.DataFrame(ret, columns=sorted(grid))
return df
def __str__(self): def __str__(self):

View File

@ -13,23 +13,46 @@ from pyFTS.partitioners import Grid
from pyFTS.common import FLR,FuzzySet,Membership,Transformations from pyFTS.common import FLR,FuzzySet,Membership,Transformations
from pyFTS import fts,hofts,ifts,pfts,tree, chen from pyFTS import fts,hofts,ifts,pfts,tree, chen
from pyFTS.benchmarks import benchmarks as bchmk from pyFTS.benchmarks import benchmarks as bchmk
from pyFTS.benchmarks import Measures
from numpy import random
gauss_treino = random.normal(0,1.0,1600)
gauss_teste = random.normal(0,1.0,400)
os.chdir("/home/petronio/dados/Dropbox/Doutorado/Disciplinas/AdvancedFuzzyTimeSeriesModels/") os.chdir("/home/petronio/dados/Dropbox/Doutorado/Disciplinas/AdvancedFuzzyTimeSeriesModels/")
enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";") #enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";")
enrollments = np.array(enrollments["Enrollments"]) #enrollments = np.array(enrollments["Enrollments"])
#diff = Transformations.Differential(1) #taiex = pd.read_csv("DataSets/TAIEX.csv", sep=",")
#taiex_treino = np.array(taiex["avg"][2500:3900])
#taiex_teste = np.array(taiex["avg"][3901:4500])
fs = Grid.GridPartitionerTrimf(enrollments,6) #nasdaq = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",")
#nasdaq_treino = np.array(nasdaq["avg"][0:1600])
#nasdaq_teste = np.array(nasdaq["avg"][1601:2000])
diff = Transformations.Differential(1)
fs = Grid.GridPartitionerTrimf(gauss_treino,7)
#tmp = chen.ConventionalFTS("") #tmp = chen.ConventionalFTS("")
pfts1 = pfts.ProbabilisticFTS("1") pfts1 = pfts.ProbabilisticFTS("1")
#pfts1.appendTransformation(diff) #pfts1.appendTransformation(diff)
pfts1.train(enrollments,fs,1) pfts1.train(gauss_treino,fs,1)
pfts2 = pfts.ProbabilisticFTS("n = 2")
#pfts2.appendTransformation(diff)
pfts2.train(gauss_treino,fs,2)
pfts3 = pfts.ProbabilisticFTS("n = 3")
#pfts3.appendTransformation(diff)
pfts3.train(gauss_treino,fs,3)
densities1 = pfts1.forecastAheadDistribution(gauss_teste[:50],2,1.50, parameters=2)
#print(bchmk.getDistributionStatistics(gauss_teste[:50], [pfts1,pfts2,pfts3], 20, 1.50))
#bchmk.plotComparedIntervalsAhead(enrollments,[pfts1], ["blue"],[True],5,10)
pfts1.forecastAheadDistribution(enrollments,5,1, parameters=True)