Cascaded transformations in all fts models

This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-01-29 21:59:50 -02:00
parent 8b3aceed58
commit 0cf938c2a6
9 changed files with 178 additions and 94 deletions

View File

@ -108,7 +108,7 @@ def allIntervalForecasters(data_train, data_test, partitions, max_order=3,save=F
lcolors = [] lcolors = []
for count, model in enumerate(models, start=0): for count, model in Util.enumerate2(models, start=0, step=2):
mfts = model("") mfts = model("")
if not mfts.isHighOrder: if not mfts.isHighOrder:
if transformation is not None: if transformation is not None:
@ -126,21 +126,20 @@ def allIntervalForecasters(data_train, data_test, partitions, max_order=3,save=F
objs.append(mfts) objs.append(mfts)
lcolors.append(colors[count % ncol]) lcolors.append(colors[count % ncol])
print(getIntervalStatistics(data_test, objs)) print(getIntervalStatistics(data_test, objs))
plotComparedSeries(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam, intervals=True) plotComparedSeries(data_test, objs, lcolors, typeonlegend=False, save=save, file=file, tam=tam, intervals=True)
def getIntervalStatistics(original, models): def getIntervalStatistics(original, models):
ret = "Model & Order & Sharpness & Resolution & Coverage \\ \n" ret = "Model & Order & Sharpness & Resolution & Coverage \\\\ \n"
for fts in models: for fts in models:
forecasts = fts.forecastInterval(original) forecasts = fts.forecastInterval(original)
ret += fts.shortname + " & " ret += fts.shortname + " & "
ret += str(fts.order) + " & " ret += str(fts.order) + " & "
ret += str(round(Measures.sharpness(forecasts), 2)) + " & " ret += str(round(Measures.sharpness(forecasts), 2)) + " & "
ret += str(round(Measures.resolution(forecasts), 2)) + " & " ret += str(round(Measures.resolution(forecasts), 2)) + " & "
ret += str(round(Measures.coverage(original[fts.order:], forecasts[:-1]), 2)) + " \\ \n" ret += str(round(Measures.coverage(original[fts.order:], forecasts[:-1]), 2)) + " \\\\ \n"
return ret return ret
@ -213,11 +212,10 @@ def plotComparedIntervalsAhead(original, models, colors, distributions, time_fro
mi = [] mi = []
ma = [] ma = []
count = 0 for count, fts in enumerate(models, start=0):
for fts in models:
if fts.hasDistributionForecasting and distributions[count]: if fts.hasDistributionForecasting and distributions[count]:
density = fts.forecastAheadDistribution(original[time_from - fts.order:time_from], time_to, resolution, density = fts.forecastAheadDistribution(original[time_from - fts.order:time_from],
parameters=None) time_to, resolution, parameters=True)
y = density.columns y = density.columns
t = len(y) t = len(y)
@ -258,12 +256,22 @@ def plotComparedIntervalsAhead(original, models, colors, distributions, time_fro
forecasts.insert(0, None) forecasts.insert(0, None)
ax.plot(forecasts, color=colors[count], label=fts.shortname) ax.plot(forecasts, color=colors[count], label=fts.shortname)
count = count + 1
ax.plot(original, color='black', label="Original") ax.plot(original, color='black', label="Original")
handles0, labels0 = ax.get_legend_handles_labels() handles0, labels0 = ax.get_legend_handles_labels()
ax.legend(handles0, labels0, loc=2) ax.legend(handles0, labels0, loc=2)
# ax.set_title(fts.name) # ax.set_title(fts.name)
ax.set_ylim([min(mi), max(ma)]) _mi = min(mi)
if _mi < 0:
_mi *= 1.1
else:
_mi *= 0.9
_ma = max(ma)
if _ma < 0:
_ma *= 0.9
else:
_ma *= 1.1
ax.set_ylim([_mi, _ma])
ax.set_ylabel('F(T)') ax.set_ylabel('F(T)')
ax.set_xlabel('T') ax.set_xlabel('T')
ax.set_xlim([0, len(original)]) ax.set_xlim([0, len(original)])
@ -552,8 +560,8 @@ def compareModelsTable(original, models_fo, models_ho):
return sup + header + body + "\\end{tabular}" return sup + header + body + "\\end{tabular}"
def simpleSearch_RMSE(train, test, model, partitions, orders, save=False, file=None, tam=[10, 15], plotforecasts=False, def simpleSearch_RMSE(train, test, model, partitions, orders, save=False, file=None, tam=[10, 15],
elev=30, azim=144): plotforecasts=False, elev=30, azim=144, intervals=False):
ret = [] ret = []
errors = np.array([[0 for k in range(len(partitions))] for kk in range(len(orders))]) errors = np.array([[0 for k in range(len(partitions))] for kk in range(len(orders))])
forecasted_best = [] forecasted_best = []
@ -568,29 +576,28 @@ def simpleSearch_RMSE(train, test, model, partitions, orders, save=False, file=N
ax0.set_xlabel('T') ax0.set_xlabel('T')
min_rmse = 1000000.0 min_rmse = 1000000.0
best = None best = None
pc = 0
for p in partitions: for pc, p in enumerate(partitions, start=0):
oc = 0
for o in orders: sets = Grid.GridPartitionerTrimf(train, p)
sets = Grid.GridPartitionerTrimf(train, p) for oc, o in enumerate(orders, start=0):
fts = model("q = " + str(p) + " n = " + str(o)) fts = model("q = " + str(p) + " n = " + str(o))
fts.train(train, sets, o) fts.train(train, sets, o)
forecasted = fts.forecast(test) if not intervals:
error = Measures.rmse(np.array(test[o:]), np.array(forecasted[:-1])) forecasted = fts.forecast(test)
mape = Measures.mape(np.array(test[o:]), np.array(forecasted[:-1])) error = Measures.rmse(np.array(test[o:]), np.array(forecasted[:-1]))
# print(train[o:]) for kk in range(o):
# print(forecasted[-1]) forecasted.insert(0, None)
for kk in range(o): if plotforecasts: ax0.plot(forecasted, label=fts.name)
forecasted.insert(0, None) else:
if plotforecasts: ax0.plot(forecasted, label=fts.name) forecasted = fts.forecastInterval(test)
# print(o, p, mape) error = 1.0 - Measures.rmse_interval(np.array(test[o:]), np.array(forecasted[:-1]))
errors[oc, pc] = error errors[oc, pc] = error
if error < min_rmse: if error < min_rmse:
min_rmse = error min_rmse = error
best = fts best = fts
forecasted_best = forecasted forecasted_best = forecasted
oc += 1
pc += 1
# print(min_rmse) # print(min_rmse)
if plotforecasts: if plotforecasts:
# handles0, labels0 = ax0.get_legend_handles_labels() # handles0, labels0 = ax0.get_legend_handles_labels()

View File

@ -30,15 +30,34 @@ class Differential(Transformation):
def apply(self, data, param=None): def apply(self, data, param=None):
if param is not None: if param is not None:
self.lag = param self.lag = param
if not isinstance(data, (list, np.ndarray, np.generic)):
data = [data]
if isinstance(data, (np.ndarray, np.generic)):
data = data.tolist()
n = len(data) n = len(data)
diff = [data[t - self.lag] - data[t] for t in np.arange(self.lag, n)] diff = [data[t - self.lag] - data[t] for t in np.arange(self.lag, n)]
for t in np.arange(0, self.lag): diff.insert(0, 0) for t in np.arange(0, self.lag): diff.insert(0, 0)
return diff return diff
def inverse(self,data, param): def inverse(self,data, param):
if isinstance(data, (np.ndarray, np.generic)):
data = data.tolist()
if not isinstance(data, list):
data = [data]
n = len(data) n = len(data)
inc = [data[t] + param[t] for t in np.arange(1, n)]
return inc inc = [data[t] + param[t] for t in np.arange(0, n)]
if n == 1:
return inc[0]
else:
return inc
def boxcox(original, plambda): def boxcox(original, plambda):

View File

@ -21,3 +21,9 @@ def showAndSaveImage(fig,file,flag,lgd=None):
else: else:
fig.savefig(uniquefilename(file)) fig.savefig(uniquefilename(file))
plt.close(fig) plt.close(fig)
def enumerate2(xs, start=0, step=1):
for x in xs:
yield (start, x)
start += step

16
fts.py
View File

@ -19,6 +19,8 @@ class FTS(object):
self.dump = False self.dump = False
self.transformations = [] self.transformations = []
self.transformations_param = [] self.transformations_param = []
self.original_max = 0
self.original_min = 0
def fuzzy(self, data): def fuzzy(self, data):
best = {"fuzzyset": "", "membership": 0.0} best = {"fuzzyset": "", "membership": 0.0}
@ -59,8 +61,20 @@ class FTS(object):
def appendTransformation(self, transformation): def appendTransformation(self, transformation):
self.transformations.append(transformation) self.transformations.append(transformation)
def doTransformations(self,data,params=None): def doTransformations(self,data,params=None,updateUoD=False):
ndata = data ndata = data
if updateUoD:
if min(data) < 0:
self.original_min = min(data) * 1.1
else:
self.original_min = min(data) * 0.9
if max(data) > 0:
self.original_max = max(data) * 1.1
else:
self.original_max = max(data) * 0.9
if len(self.transformations) > 0: if len(self.transformations) > 0:
if params is None: if params is None:
params = [ None for k in self.transformations] params = [ None for k in self.transformations]

View File

@ -62,7 +62,7 @@ class HighOrderFTS(fts.FTS):
def train(self, data, sets, order=1,parameters=None): def train(self, data, sets, order=1,parameters=None):
data = self.doTransformations(data) data = self.doTransformations(data, updateUoD=True)
self.order = order self.order = order
self.sets = sets self.sets = sets

View File

@ -49,9 +49,7 @@ class IntervalFTS(hofts.HighOrderFTS):
def forecastInterval(self, data): def forecastInterval(self, data):
data = np.array(data) ndata = np.array(self.doTransformations(data))
ndata = self.doTransformations(data)
l = len(ndata) l = len(ndata)
@ -115,8 +113,8 @@ class IntervalFTS(hofts.HighOrderFTS):
# gerar o intervalo # gerar o intervalo
norm = sum(affected_flrgs_memberships) norm = sum(affected_flrgs_memberships)
lo_ = self.doInverseTransformations(sum(lo) / norm, param=[data[k - (self.order - 1): k + 1]]) lo_ = self.doInverseTransformations(sum(lo) / norm, params=[data[k - (self.order - 1): k + 1]])
up_ = self.doInverseTransformations(sum(up) / norm, param=[data[k - (self.order - 1): k + 1]]) up_ = self.doInverseTransformations(sum(up) / norm, params=[data[k - (self.order - 1): k + 1]])
ret.append([lo_, up_]) ret.append([lo_, up_])
return ret return ret

View File

@ -9,20 +9,24 @@ from pyFTS.common import FuzzySet, Membership
def GridPartitionerTrimf(data, npart, names=None, prefix="A"): def GridPartitionerTrimf(data, npart, names=None, prefix="A"):
sets = [] sets = []
dmax = max(data) if min(data) < 0:
dmax += dmax * 0.1 dmin = min(data) * 1.1
dmin = min(data) else:
dmin -= dmin * 0.1 dmin = min(data) * 0.9
if max(data) > 0:
dmax = max(data) * 1.1
else:
dmax = max(data) * 0.9
dlen = dmax - dmin dlen = dmax - dmin
partlen = math.ceil(dlen / npart) partlen = math.ceil(dlen / npart)
#p2 = partlen / 2
#partition = dmin #+ partlen
count = 0 count = 0
for c in np.arange(dmin, dmax, partlen): for c in np.arange(dmin, dmax, partlen):
sets.append( sets.append(
FuzzySet.FuzzySet(prefix + str(count), Membership.trimf, [c - partlen, c, c + partlen],c)) FuzzySet.FuzzySet(prefix + str(count), Membership.trimf, [c - partlen, c, c + partlen],c))
count += 1 count += 1
#partition += partlen
return sets return sets

111
pfts.py
View File

@ -214,9 +214,7 @@ class ProbabilisticFTS(ifts.IntervalFTS):
def forecastInterval(self, data): def forecastInterval(self, data):
data = np.array(data) ndata = np.array(self.doTransformations(data))
ndata = self.doTransformations(data)
l = len(ndata) l = len(ndata)
@ -349,7 +347,15 @@ class ProbabilisticFTS(ifts.IntervalFTS):
def getGridClean(self, resolution): def getGridClean(self, resolution):
grid = {} grid = {}
for sbin in np.arange(self.sets[0].lower, self.sets[-1].upper, resolution):
if len(self.transformations) == 0:
_min = self.sets[0].lower
_max = self.sets[-1].upper
else:
_min = self.original_min
_max = self.original_max
for sbin in np.arange(_min,_max, resolution):
grid[sbin] = 0 grid[sbin] = 0
return grid return grid
@ -378,7 +384,7 @@ class ProbabilisticFTS(ifts.IntervalFTS):
for child in node.getChildren(): for child in node.getChildren():
self.buildTreeWithoutOrder(child, lags, level + 1) self.buildTreeWithoutOrder(child, lags, level + 1)
def forecastAheadDistribution(self, data, steps, resolution,parameters=None): def forecastAheadDistribution(self, data, steps, resolution, parameters=None):
ret = [] ret = []
@ -388,58 +394,89 @@ class ProbabilisticFTS(ifts.IntervalFTS):
index = SortedCollection.SortedCollection(iterable=grid.keys()) index = SortedCollection.SortedCollection(iterable=grid.keys())
grids = [] if parameters is None:
for k in np.arange(0, steps):
grids.append(self.getGridClean(resolution))
for k in np.arange(self.order, steps + self.order): grids = []
for k in np.arange(0, steps):
grids.append(self.getGridClean(resolution))
lags = {} for k in np.arange(self.order, steps + self.order):
cc = 0 lags = {}
for i in intervals[k - self.order : k]: cc = 0
quantiles = [] for i in intervals[k - self.order : k]:
for qt in np.arange(0, 50, 2): quantiles = []
quantiles.append(i[0] + qt * ((i[1] - i[0]) / 100))
quantiles.append(i[1] - qt * ((i[1] - i[0]) / 100))
quantiles.append(i[0] + ((i[1] - i[0]) / 2))
quantiles = list(set(quantiles)) for qt in np.arange(0, 50, 2):
quantiles.append(i[0] + qt * ((i[1] - i[0]) / 100))
quantiles.append(i[1] - qt * ((i[1] - i[0]) / 100))
quantiles.append(i[0] + ((i[1] - i[0]) / 2))
quantiles.sort() quantiles = list(set(quantiles))
lags[cc] = quantiles quantiles.sort()
cc += 1 lags[cc] = quantiles
# Build the tree with all possible paths cc += 1
root = tree.FLRGTreeNode(None) # Build the tree with all possible paths
self.buildTreeWithoutOrder(root, lags, 0) root = tree.FLRGTreeNode(None)
# Trace the possible paths self.buildTreeWithoutOrder(root, lags, 0)
for p in root.paths(): # Trace the possible paths
path = list(reversed(list(filter(None.__ne__, p))))
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
if parameters is None:
qtle = self.forecastInterval(path) qtle = self.forecastInterval(path)
grids[k - self.order] = self.gridCount(grids[k - self.order], resolution, index, np.ravel(qtle)) grids[k - self.order] = self.gridCount(grids[k - self.order], resolution, index, np.ravel(qtle))
else:
qtle = self.forecast(path)
grids[k - self.order] = self.gridCountPoint(grids[k - self.order], resolution, index, np.ravel(qtle))
for k in np.arange(0, steps): for k in np.arange(0, steps):
tmp = np.array([grids[k][q] for q in sorted(grids[k])]) tmp = np.array([grids[k][q] for q in sorted(grids[k])])
ret.append(tmp / sum(tmp)) ret.append(tmp / sum(tmp))
grid = self.getGridClean(resolution) grid = self.getGridClean(resolution)
df = pd.DataFrame(ret, columns=sorted(grid)) df = pd.DataFrame(ret, columns=sorted(grid))
return df return df
else:
print("novo")
ret = []
for k in np.arange(self.order, steps + self.order):
grid = self.getGridClean(resolution)
grid = self.gridCount(grid, resolution, index, intervals[k])
for qt in np.arange(0, 50, 1):
# print(qt)
qtle_lower = self.forecastInterval(
[intervals[x][0] + qt * ((intervals[x][1] - intervals[x][0]) / 100) for x in
np.arange(k - self.order, k)])
grid = self.gridCount(grid, resolution, index, np.ravel(qtle_lower))
qtle_upper = self.forecastInterval(
[intervals[x][1] - qt * ((intervals[x][1] - intervals[x][0]) / 100) for x in
np.arange(k - self.order, k)])
grid = self.gridCount(grid, resolution, index, np.ravel(qtle_upper))
qtle_mid = self.forecastInterval(
[intervals[x][0] + (intervals[x][1] - intervals[x][0]) / 2 for x in np.arange(k - self.order, k)])
grid = self.gridCount(grid, resolution, index, np.ravel(qtle_mid))
tmp = np.array([grid[k] for k in sorted(grid)])
ret.append(tmp / sum(tmp))
grid = self.getGridClean(resolution)
df = pd.DataFrame(ret, columns=sorted(grid))
return df
def __str__(self): def __str__(self):

View File

@ -10,27 +10,26 @@ from mpl_toolkits.mplot3d import Axes3D
import pandas as pd import pandas as pd
from pyFTS.partitioners import Grid from pyFTS.partitioners import Grid
from pyFTS.common import FLR,FuzzySet,Membership from pyFTS.common import FLR,FuzzySet,Membership,Transformations
from pyFTS import fts,hofts,ifts,pfts,tree, chen from pyFTS import fts,hofts,ifts,pfts,tree, chen
from pyFTS.benchmarks import benchmarks as bchmk from pyFTS.benchmarks import benchmarks as bchmk
os.chdir("/home/petronio/dados/Dropbox/Doutorado/Disciplinas/AdvancedFuzzyTimeSeriesModels/") os.chdir("/home/petronio/dados/Dropbox/Doutorado/Disciplinas/AdvancedFuzzyTimeSeriesModels/")
#enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";") enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";")
#enrollments = np.array(enrollments["Enrollments"]) enrollments = np.array(enrollments["Enrollments"])
#enrollments_fs1 = Grid.GridPartitionerTrimf(enrollments,6) #diff = Transformations.Differential(1)
fs = Grid.GridPartitionerTrimf(enrollments,6)
#tmp = chen.ConventionalFTS("") #tmp = chen.ConventionalFTS("")
pfts1_enrollments = pfts.ProbabilisticFTS("1") pfts1 = pfts.ProbabilisticFTS("1")
#pfts1_enrollments.train(enrollments,enrollments_fs1,1) #pfts1.appendTransformation(diff)
#pfts1_enrollments.shortname = "1st Order" pfts1.train(enrollments,fs,1)
#pfts2_enrollments = pfts.ProbabilisticFTS("2")
#pfts2_enrollments.dump = False
#pfts2_enrollments.shortname = "2nd Order"
#pfts2_enrollments.train(enrollments,enrollments_fs1,2)
#bchmk.plotComparedIntervalsAhead(enrollments,[pfts1], ["blue"],[True],5,10)
#pfts1_enrollments.forecastAheadDistribution2(enrollments[:15],5,100) pfts1.forecastAheadDistribution(enrollments,5,1, parameters=True)