- Bugfixes due to refactorings

This commit is contained in:
Petrônio Cândido 2018-03-05 15:07:02 -03:00
parent 9718f48b39
commit 3d64c5065e
18 changed files with 1292 additions and 258 deletions

View File

@ -488,14 +488,18 @@ def all_interval_forecasters(data_train, data_test, partitions, max_order=3,save
def print_interval_statistics(original, models):
ret = "Model & Order & Sharpness & Resolution & Coverage \\\\ \n"
ret = "Model & Order & Sharpness & Resolution & Coverage & .05 & .25 & .75 & .95 \\\\ \n"
for fts in models:
_sharp, _res, _cov = Measures.get_interval_statistics(original, fts)
_sharp, _res, _cov, _q5, _q25, _q75, _q95 = Measures.get_interval_statistics(original, fts)
ret += fts.shortname + " & "
ret += str(fts.order) + " & "
ret += str(_sharp) + " & "
ret += str(_res) + " & "
ret += str(_cov) + " \\\\ \n"
ret += str(_cov) + " &"
ret += str(_q5) + " &"
ret += str(_q25) + " &"
ret += str(_q75) + " &"
ret += str(_q95) + "\\\\ \n"
print(ret)

View File

@ -78,6 +78,7 @@ def load_obj(file):
obj = dill.load(_file)
return obj
def persist_env(file):
"""
Persist an entire environment on file. This function depends on Dill package
@ -85,6 +86,7 @@ def persist_env(file):
"""
dill.dump_session(file)
def load_env(file):
dill.load_session(file)
@ -94,11 +96,13 @@ def simple_model_train(model, data, parameters):
return model
def distributed_train(model, train_method, nodes, fts_method, data, num_batches,
train_parameters, **kwargs):
def distributed_train(model, train_method, nodes, fts_method, data, num_batches=10,
train_parameters={}, **kwargs):
import dispy, dispy.httpd, datetime
batch_save = kwargs.get('batch_save', True) # save model between batches
batch_save = kwargs.get('batch_save', False) # save model between batches
batch_save_interval = kwargs.get('batch_save_interval', 1)
file_path = kwargs.get('file_path', None)
@ -118,8 +122,6 @@ def distributed_train(model, train_method, nodes, fts_method, data, num_batches,
else:
ndata = data[ct - model.order: ct + batch_size]
#self.train(ndata, **kwargs)
tmp_model = fts_method(str(bcount))
tmp_model.clone_parameters(model)
@ -136,7 +138,7 @@ def distributed_train(model, train_method, nodes, fts_method, data, num_batches,
if job.status == dispy.DispyJob.Finished and tmp is not None:
model.merge(tmp)
if batch_save:
if batch_save and (job.id % batch_save_interval) == 0:
persist_obj(model, file_path)
else:
@ -155,3 +157,53 @@ def distributed_train(model, train_method, nodes, fts_method, data, num_batches,
cluster.close()
return model
def simple_model_predict(model, data, parameters):
return model.predict(data, **parameters)
def distributed_predict(model, parameters, nodes, data, num_batches):
import dispy, dispy.httpd
cluster = dispy.JobCluster(simple_model_predict, nodes=nodes) # , depends=dependencies)
http_server = dispy.httpd.DispyHTTPServer(cluster)
jobs = []
n = len(data)
batch_size = int(n / num_batches)
bcount = 1
for ct in range(model.order, n, batch_size):
if model.is_multivariate:
ndata = data.iloc[ct - model.order:ct + batch_size]
else:
ndata = data[ct - model.order: ct + batch_size]
job = cluster.submit(model, ndata, parameters)
job.id = bcount # associate an ID to identify jobs (if needed later)
jobs.append(job)
bcount += 1
ret = []
for job in jobs:
tmp = job()
if job.status == dispy.DispyJob.Finished and tmp is not None:
if job.id < batch_size:
ret.extend(tmp[:-1])
else:
ret.extend(tmp)
else:
print(job.exception)
print(job.stdout)
cluster.wait() # wait for all jobs to finish
cluster.print_status()
http_server.shutdown() # this waits until browser gets all updates
cluster.close()
return ret

View File

@ -47,13 +47,13 @@ class FLRG(object):
self.midpoint = np.nanmean(self.get_midpoints(sets))
return self.midpoint
def get_midpoints(self,sets):
def get_midpoints(self, sets):
if isinstance(self.RHS, (list, set)):
return np.array([sets[s].centroid for s in self.RHS])
elif isinstance(self.RHS, dict):
return np.array([sets[self.RHS[s]].centroid for s in self.RHS.keys()])
def get_lower(self,sets):
def get_lower(self, sets):
if self.lower is None:
if isinstance(self.RHS, list):
self.lower = min([sets[rhs].lower for rhs in self.RHS])
@ -61,7 +61,7 @@ class FLRG(object):
self.lower = min([sets[self.RHS[s]].lower for s in self.RHS.keys()])
return self.lower
def get_upper(self, t,sets):
def get_upper(self, sets):
if self.upper is None:
if isinstance(self.RHS, list):
self.upper = max([sets[rhs].upper for rhs in self.RHS])

View File

@ -2,11 +2,6 @@ import numpy as np
import pandas as pd
from pyFTS.common import FuzzySet, SortedCollection, tree, Util
def parallel_train(data, method, **kwargs):
model = method(**kwargs)
model.train(data)
return model
class FTS(object):
"""
@ -67,7 +62,19 @@ class FTS(object):
:param kwargs:
:return:
"""
type = kwargs.get("type", 'point')
if 'distributed' in kwargs:
distributed = kwargs.pop('distributed')
else:
distributed = False
if distributed is None or distributed == False:
if 'type' in kwargs:
type = kwargs.pop("type")
else:
type = 'point'
steps_ahead = kwargs.get("steps_ahead", None)
if type == 'point' and steps_ahead == None:
@ -85,6 +92,13 @@ class FTS(object):
else:
raise ValueError('The argument \'type\' has an unknown value.')
else:
nodes = kwargs.get("nodes", ['127.0.0.1'])
num_batches = kwargs.get('num_batches', 10)
return Util.distributed_predict(self, kwargs, nodes, data, num_batches)
def forecast(self, data, **kwargs):
"""
@ -180,21 +194,24 @@ class FTS(object):
import datetime
num_batches = kwargs.get('num_batches', None)
num_batches = kwargs.get('num_batches', 10)
save = kwargs.get('save_model', False) # save model on disk
batch_save = kwargs.get('batch_save', True) #save model between batches
batch_save = kwargs.get('batch_save', False) #save model between batches
file_path = kwargs.get('file_path', None)
distributed = kwargs.get('distributed', False)
batch_save_interval = kwargs.get('batch_save_interval', 10)
if distributed:
nodes = kwargs.get('nodes', False)
train_method = kwargs.get('train_method', Util.simple_model_train)
Util.distributed_train(self, train_method, nodes, type(self), data, num_batches, {},
batch_save=batch_save, file_path=file_path)
batch_save=batch_save, file_path=file_path,
batch_save_interval=batch_save_interval)
else:
print("[{0: %H:%M:%S}] Start training".format(datetime.datetime.now()))
@ -303,6 +320,9 @@ class FTS(object):
else:
return data
def get_UoD(self):
return [self.original_min, self.original_max]
def __str__(self):
tmp = self.name + ":\n"
for r in sorted(self.flrgs):

View File

@ -1,8 +1,8 @@
"""
Simple High Order extension of Conventional FTS by Chen (1996)
High Order FTS
[1] S.-M. Chen, Forecasting enrollments based on fuzzy time series,
Fuzzy Sets Syst., vol. 81, no. 3, pp. 311319, 1996.
Severiano, S. A. Jr; Silva, P. C. L.; Sadaei, H. J.; Guimarães, F. G. Very Short-term Solar Forecasting
using Fuzzy Time Series. 2017 IEEE International Conference on Fuzzy Systems. DOI10.1109/FUZZ-IEEE.2017.8015732
"""
import numpy as np

View File

@ -26,105 +26,51 @@ class IntervalFTS(hofts.HighOrderFTS):
def get_upper(self, flrg):
if flrg.get_key() in self.flrgs:
tmp = self.flrgs[flrg.get_key()]
ret = tmp.get_upper()
ret = tmp.get_upper(self.sets)
else:
ret = flrg.LHS[-1].upper
ret = self.sets[flrg.LHS[-1]].upper
return ret
def get_lower(self, flrg):
if flrg.get_key() in self.flrgs:
tmp = self.flrgs[flrg.get_key()]
ret = tmp.get_lower()
ret = tmp.get_lower(self.sets)
else:
ret = flrg.LHS[-1].lower
ret = self.sets[flrg.LHS[-1]].lower
return ret
def get_sequence_membership(self, data, fuzzySets):
mb = [fuzzySets[k].membership(data[k]) for k in np.arange(0, len(data))]
return mb
def forecast_interval(self, data, **kwargs):
ndata = np.array(self.apply_transformations(data))
l = len(ndata)
ret = []
for k in np.arange(self.order - 1, l):
l = len(data)
affected_flrgs = []
affected_flrgs_memberships = []
if l <= self.order:
return data
ndata = self.apply_transformations(data)
for k in np.arange(self.order, l+1):
sample = ndata[k - self.order: k]
flrgs = self.generate_lhs_flrg(sample)
up = []
lo = []
affected_flrgs_memberships = []
# Achar os conjuntos que tem pert > 0 para cada lag
count = 0
lags = {}
if self.order > 1:
subset = ndata[k - (self.order - 1): k + 1]
for instance in subset:
mb = FuzzySet.fuzzyfy_instance(instance, self.sets)
tmp = np.argwhere(mb)
idx = np.ravel(tmp) # flat the array
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
if instance <= self.sets[0].lower:
idx = [0]
elif instance >= self.sets[-1].upper:
idx = [len(self.sets) - 1]
else:
raise Exception(instance)
lags[count] = idx
count = count + 1
# Constrói uma árvore com todos os caminhos possíveis
root = tree.FLRGTreeNode(None)
self.build_tree(root, lags, 0)
# Traça os possíveis caminhos e costrói as HOFLRG's
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
flrg = hofts.HighOrderFLRG(self.order)
for kk in path: flrg.append_lhs(self.sets[kk])
affected_flrgs.append(flrg)
# Acha a pertinência geral de cada FLRG
affected_flrgs_memberships.append(min(self.getSequenceMembership(subset, flrg.LHS)))
else:
mv = FuzzySet.fuzzyfy_instance(ndata[k], self.sets)
tmp = np.argwhere(mv)
idx = np.ravel(tmp)
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
if ndata[k] <= self.sets[0].lower:
idx = [0]
elif ndata[k] >= self.sets[-1].upper:
idx = [len(self.sets) - 1]
else:
raise Exception(ndata[k])
for kk in idx:
flrg = hofts.HighOrderFLRG(self.order)
flrg.append_lhs(self.sets[kk])
affected_flrgs.append(flrg)
affected_flrgs_memberships.append(mv[kk])
count = 0
for flrg in affected_flrgs:
for flrg in flrgs:
# achar o os bounds de cada FLRG, ponderados pela pertinência
up.append(affected_flrgs_memberships[count] * self.get_upper(flrg))
lo.append(affected_flrgs_memberships[count] * self.get_lower(flrg))
count = count + 1
mv = flrg.get_membership(sample, self.sets)
up.append(mv * self.get_upper(flrg))
lo.append(mv * self.get_lower(flrg))
affected_flrgs_memberships.append(mv)
# gerar o intervalo
norm = sum(affected_flrgs_memberships)

View File

@ -3,9 +3,9 @@ import pandas as pd
def fuzzyfy_instance(data_point, var):
mv = np.array([fs.membership(data_point) for fs in var.partitioner.sets])
mv = np.array([var.partitioner.sets[key].membership(data_point) for key in var.partitioner.ordered_sets])
ix = np.ravel(np.argwhere(mv > 0.0))
sets = [var.partitioner.sets[i] for i in ix]
sets = [(var.name, var.partitioner.ordered_sets[i]) for i in ix]
return sets

View File

@ -19,8 +19,13 @@ class FLRG(flg.FLRG):
def append_rhs(self, set, **kwargs):
self.RHS.add(set)
def get_membership(self, data, sets):
return np.nanmin([sets[self.LHS[k]].membership(data[k]) for k in self.LHS.keys()])
def get_membership(self, data, variables):
mvs = []
for var in variables:
s = self.LHS[var.name]
mvs.append(var.partitioner.sets[s].membership(data[var.name]))
return np.nanmin(mvs)
def __str__(self):
_str = ""

View File

@ -50,9 +50,10 @@ class MVFTS(fts.FTS):
flr = MVFLR.FLR()
for c, e in enumerate(path, start=0):
flr.set_lhs(e.variable, e.name)
for v, s in path:
flr.set_lhs(v, s)
if len(flr.LHS.keys()) == len(self.explanatory_variables):
flrs.append(flr)
return flrs
@ -70,8 +71,8 @@ class MVFTS(fts.FTS):
target = common.fuzzyfy_instance(target_point, self.target_variable)
for flr in tmp_flrs:
for t in target:
flr.set_rhs(t.name)
for v, s in target:
flr.set_rhs(s)
flrs.append(flr)
return flrs
@ -108,8 +109,8 @@ class MVFTS(fts.FTS):
mvs.append(0.)
mps.append(0.)
else:
mvs.append(self.flrgs[flrg.get_key()].get_membership(self.format_data(data_point)))
mps.append(self.flrgs[flrg.get_key()].get_midpoint())
mvs.append(self.flrgs[flrg.get_key()].get_membership(self.format_data(data_point), self.explanatory_variables))
mps.append(self.flrgs[flrg.get_key()].get_midpoint(self.target_variable.partitioner.sets))
#print('mv', mvs)
#print('mp', mps)

View File

@ -20,7 +20,10 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
self.Z = None
def get_membership(self, data, sets):
return np.nanprod([sets[k].membership(data[k]) for k in self.LHS])
if isinstance(data, (np.ndarray, list)):
return np.nanprod([sets[key].membership(data[count]) for count, key in enumerate(self.LHS)])
else:
return sets[self.LHS[0]].membership(data)
def append_rhs(self, c, **kwargs):
mv = kwargs.get('mv', 1.0)
@ -34,7 +37,7 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
def lhs_conditional_probability(self, x, sets, norm, uod, nbins):
pk = self.frequency_count / norm
tmp = pk * (self.get_membership(x, sets) / self.partition_function(uod, nbins=nbins))
tmp = pk * (self.get_membership(x, sets) / self.partition_function(sets, uod, nbins=nbins))
return tmp
@ -110,17 +113,18 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
data = self.apply_transformations(data, updateUoD=True)
parameters = kwargs.get('parameters','Fuzzy')
parameters = kwargs.get('parameters','fuzzy')
self.order = kwargs.get('order',1)
if kwargs.get('sets',None) is None and self.partitioner is not None:
if kwargs.get('sets', None) is None and self.partitioner is not None:
self.sets = self.partitioner.sets
self.original_min = self.partitioner.min
self.original_max = self.partitioner.max
else:
self.sets = kwargs.get('sets',None)
for s in self.sets: self.setsDict[s.name] = s
if parameters == 'Monotonic':
if parameters == 'monotonic':
tmpdata = FuzzySet.fuzzyfy_series_old(data, self.sets)
flrs = FLR.generate_recurrent_flrs(tmpdata)
self.generateFLRG(flrs)
@ -433,7 +437,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
den = []
for s in flrgs:
flrg = self.flrgs[s.get_key()]
pk = flrg.lhs_conditional_probability(sample, self.global_frequency_count, uod, nbins)
pk = flrg.lhs_conditional_probability(sample, self.sets, self.global_frequency_count, uod, nbins)
wi = flrg.rhs_conditional_probability(bin, self.sets, uod, nbins)
num.append(wi * pk)
den.append(pk)

View File

@ -1,4 +1,4 @@
from pyFTS.common import Membership
from pyFTS.common import Membership, FuzzySet as FS
from pyFTS.common.Composite import FuzzySet as Composite
from pyFTS.partitioners import partitioner, Grid
from pyFTS.models.seasonal.common import DateTime, FuzzySet, strip_datepart
@ -32,6 +32,11 @@ class TimeGridPartitioner(partitioner.Partitioner):
self.sets = self.build(None)
if self.ordered_sets is None and self.setnames is not None:
self.ordered_sets = self.setnames
else:
self.ordered_sets = FS.set_ordered(self.sets)
def build(self, data):
sets = {}

View File

@ -4,7 +4,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# High Order Fuzzy Time Series \n"
"# High Order Fuzzy Time Series \n",
"\n",
"Severiano, S. A. Jr; Silva, P. C. L.; Sadaei, H. J.; Guimarães, F. G. Very Short-term Solar Forecasting using Fuzzy Time Series. 2017 IEEE International Conference on Fuzzy Systems. DOI10.1109/FUZZ-IEEE.2017.8015732"
]
},
{

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -107,7 +107,7 @@ class ProbabilityDistribution(object):
if str(ret) not in self.qtl:
self.qtl[str(ret)] = []
self.qtl[str(ret)].append_rhs(k)
self.qtl[str(ret)].append(k)
_keys = [float(k) for k in sorted(self.qtl.keys())]

View File

@ -6,12 +6,12 @@ from pyFTS.common import Transformations
from pyFTS.data import SONDA
df = SONDA.get_dataframe()
train = df.iloc[0:1572480] #three years
test = df.iloc[1572480:2096640] #ears
train = df.iloc[0:1578241] #three years
#test = df.iloc[1572480:2096640] #ears
del df
from pyFTS.partitioners import Grid, Util as pUtil
from pyFTS.common import Transformations
from pyFTS.common import Transformations, Util
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
@ -21,6 +21,7 @@ diff = Transformations.Differential(1)
np = 10
model = mvfts.MVFTS("")
fig, axes = plt.subplots(nrows=5, ncols=1,figsize=[15,10])
@ -48,5 +49,11 @@ model.append_variable(vrain)
model.target_variable = vrain
model.fit(train, num_batches=20, save=True, batch_save=True, file_path='mvfts_sonda3', distributed=True,
nodes=['192.168.0.110','192.168.0.106'])
#model.fit(train, num_batches=60, save=True, batch_save=True, file_path='mvfts_sonda')
model.fit(train, num_batches=200, save=True, batch_save=True, file_path='mvfts_sonda', distributed=True,
nodes=['192.168.0.110','192.168.0.106'], batch_save_interval=10)
#model = Util.load_obj('mvfts_sonda')

View File

@ -1,111 +1,45 @@
#!/usr/bin/python
# -*- coding: utf8 -*-
import os
import numpy as np
import pandas as pd
import matplotlib as plt
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
from pyFTS.common import FLR, FuzzySet, Membership, Transformations, fts
from pyFTS.models import chen
from pyFTS.benchmarks import benchmarks as bchmk
from numpy import random
#gauss_treino = random.normal(0,1.0,1600)
#gauss_teste = random.normal(0,1.0,400)
os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/")
'''
enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";")
enrollments = np.array(enrollments["Enrollments"])
'''
taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",")
data = np.array(taiexpd["avg"][:5000])
del(taiexpd)
#from mpl_toolkits.mplot3d import Axes3D
import importlib
import pandas as pd
from pyFTS.partitioners import Grid
from pyFTS.common import FLR, FuzzySet, Membership, SortedCollection
from pyFTS import fts
from pyFTS import hofts
from pyFTS import pwfts
from pyFTS import tree
from pyFTS.benchmarks import benchmarks as bchmk
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
#uod = [10162, 21271]
from pyFTS.common import Util
fs1 = Grid.GridPartitioner(data[:3000], 30)
#for s in enrollments_fs1.sets:
# print(s) #.partition_function(uod, 100))
from pyFTS.data import TAIEX
pfts1 = pwfts.ProbabilisticWeightedFTS("1", partitioner=fs1)
pfts1.train(data, None, 1)
pfts1.shortname = "1st Order"
#print(pfts1_enrollments)
#tmp = pfts1.forecast(data[3000:3020])
#tmp = pfts1.forecast_interval(data[3000:3020])
tmp = pfts1.forecast_distribution(data[3500])
p = 0
for b in tmp[0].bins:
p += tmp[0].density(b)
print(p)
#tmp = pfts1.forecast_ahead_interval(data[3000:3020],20)
#tmp = pfts1.forecast_ahead_distribution(data[3000:3020],20, method=3, h=0.45, kernel="gaussian")
#print(tmp[0])
#print(tmp[0].quantile([0.05, 0.95]))
#pfts1_enrollments.AprioriPDF
#norm = pfts1_enrollments.global_frequency_count
#uod = pfts1.get_UoD()
#for k in sorted(pfts1_enrollments.flrgs.keys())
# flrg = pfts1_enrollments.flrgs[k]
# tmp = flrg.get_LHSprobability(15000, norm, uod, 100)
# print(tmp) #flrg.partition_function(uod,100))
#print("MARGINAL VERIFICATION")
#for s in sorted(pfts1_enrollments.flrgs.keys()):
# flrg = pfts1_enrollments.flrgs[s]
#print(flrg.get_LHSprobability(15000, norm, uod, 100))
# print(sum([flrg.get_LHSprobability(k, norm, uod, 100) for k in np.linspace(uod[0],uod[1],100)]))
taiex = TAIEX.get_data()
train = taiex[:3000]
test = taiex[3000:3200]
from pyFTS.common import Transformations
tdiff = Transformations.Differential(1)
'''
pfts2_enrollments = pwfts.ProbabilisticWeightedFTS("2")
pfts2_enrollments.dump = False
pfts2_enrollments.shortname = "2nd Order"
pfts2_enrollments.train(enrollments, enrollments_fs1, 2)
pfts3_enrollments = pwfts.ProbabilisticWeightedFTS("3")
pfts3_enrollments.dump = False
pfts3_enrollments.shortname = "3rd Order"
pfts3_enrollments.train(enrollments, enrollments_fs1, 3)
from pyFTS.partitioners import Grid, Util as pUtil
from pyFTS.common import FLR,FuzzySet,Membership,SortedCollection
taiex_fs1 = Grid.GridPartitioner(data=train, npart=30)
taiex_fs2 = Grid.GridPartitioner(data=train, npart=10, transformation=tdiff)
#pUtil.plot_partitioners(train, [taiex_fs1,taiex_fs2], tam=[15,7])
from pyFTS.common import fts,tree
from pyFTS.models import hofts, pwfts
pfts1_taiex = pwfts.ProbabilisticWeightedFTS("1", partitioner=taiex_fs1)
#pfts1_taiex.appendTransformation(diff)
pfts1_taiex.fit(train, save_model=True, file_path='pwfts')
pfts1_taiex.shortname = "1st Order"
print(pfts1_taiex)
bchmk.plot_compared_series(enrollments,[pfts1_enrollments,pfts2_enrollments, pfts3_enrollments],
["red","blue","green"], linewidth=2,
typeonlegend=True,save=False,file="pictures/pwfts_enrollments_interval.png",
tam=[20,7],points=False, intervals=True)
'''
model = Util.load_obj('pwfts')
model.predict(test, type='distribution')
#'''