- Bugfixes due to refactorings

This commit is contained in:
Petrônio Cândido 2018-03-05 15:07:02 -03:00
parent 9718f48b39
commit 3d64c5065e
18 changed files with 1292 additions and 258 deletions

View File

@ -488,14 +488,18 @@ def all_interval_forecasters(data_train, data_test, partitions, max_order=3,save
def print_interval_statistics(original, models): def print_interval_statistics(original, models):
ret = "Model & Order & Sharpness & Resolution & Coverage \\\\ \n" ret = "Model & Order & Sharpness & Resolution & Coverage & .05 & .25 & .75 & .95 \\\\ \n"
for fts in models: for fts in models:
_sharp, _res, _cov = Measures.get_interval_statistics(original, fts) _sharp, _res, _cov, _q5, _q25, _q75, _q95 = Measures.get_interval_statistics(original, fts)
ret += fts.shortname + " & " ret += fts.shortname + " & "
ret += str(fts.order) + " & " ret += str(fts.order) + " & "
ret += str(_sharp) + " & " ret += str(_sharp) + " & "
ret += str(_res) + " & " ret += str(_res) + " & "
ret += str(_cov) + " \\\\ \n" ret += str(_cov) + " &"
ret += str(_q5) + " &"
ret += str(_q25) + " &"
ret += str(_q75) + " &"
ret += str(_q95) + "\\\\ \n"
print(ret) print(ret)

View File

@ -78,6 +78,7 @@ def load_obj(file):
obj = dill.load(_file) obj = dill.load(_file)
return obj return obj
def persist_env(file): def persist_env(file):
""" """
Persist an entire environment on file. This function depends on Dill package Persist an entire environment on file. This function depends on Dill package
@ -85,6 +86,7 @@ def persist_env(file):
""" """
dill.dump_session(file) dill.dump_session(file)
def load_env(file): def load_env(file):
dill.load_session(file) dill.load_session(file)
@ -94,11 +96,13 @@ def simple_model_train(model, data, parameters):
return model return model
def distributed_train(model, train_method, nodes, fts_method, data, num_batches, def distributed_train(model, train_method, nodes, fts_method, data, num_batches=10,
train_parameters, **kwargs): train_parameters={}, **kwargs):
import dispy, dispy.httpd, datetime import dispy, dispy.httpd, datetime
batch_save = kwargs.get('batch_save', True) # save model between batches batch_save = kwargs.get('batch_save', False) # save model between batches
batch_save_interval = kwargs.get('batch_save_interval', 1)
file_path = kwargs.get('file_path', None) file_path = kwargs.get('file_path', None)
@ -118,8 +122,6 @@ def distributed_train(model, train_method, nodes, fts_method, data, num_batches,
else: else:
ndata = data[ct - model.order: ct + batch_size] ndata = data[ct - model.order: ct + batch_size]
#self.train(ndata, **kwargs)
tmp_model = fts_method(str(bcount)) tmp_model = fts_method(str(bcount))
tmp_model.clone_parameters(model) tmp_model.clone_parameters(model)
@ -136,7 +138,7 @@ def distributed_train(model, train_method, nodes, fts_method, data, num_batches,
if job.status == dispy.DispyJob.Finished and tmp is not None: if job.status == dispy.DispyJob.Finished and tmp is not None:
model.merge(tmp) model.merge(tmp)
if batch_save: if batch_save and (job.id % batch_save_interval) == 0:
persist_obj(model, file_path) persist_obj(model, file_path)
else: else:
@ -155,3 +157,53 @@ def distributed_train(model, train_method, nodes, fts_method, data, num_batches,
cluster.close() cluster.close()
return model return model
def simple_model_predict(model, data, parameters):
return model.predict(data, **parameters)
def distributed_predict(model, parameters, nodes, data, num_batches):
import dispy, dispy.httpd
cluster = dispy.JobCluster(simple_model_predict, nodes=nodes) # , depends=dependencies)
http_server = dispy.httpd.DispyHTTPServer(cluster)
jobs = []
n = len(data)
batch_size = int(n / num_batches)
bcount = 1
for ct in range(model.order, n, batch_size):
if model.is_multivariate:
ndata = data.iloc[ct - model.order:ct + batch_size]
else:
ndata = data[ct - model.order: ct + batch_size]
job = cluster.submit(model, ndata, parameters)
job.id = bcount # associate an ID to identify jobs (if needed later)
jobs.append(job)
bcount += 1
ret = []
for job in jobs:
tmp = job()
if job.status == dispy.DispyJob.Finished and tmp is not None:
if job.id < batch_size:
ret.extend(tmp[:-1])
else:
ret.extend(tmp)
else:
print(job.exception)
print(job.stdout)
cluster.wait() # wait for all jobs to finish
cluster.print_status()
http_server.shutdown() # this waits until browser gets all updates
cluster.close()
return ret

View File

@ -47,13 +47,13 @@ class FLRG(object):
self.midpoint = np.nanmean(self.get_midpoints(sets)) self.midpoint = np.nanmean(self.get_midpoints(sets))
return self.midpoint return self.midpoint
def get_midpoints(self,sets): def get_midpoints(self, sets):
if isinstance(self.RHS, (list, set)): if isinstance(self.RHS, (list, set)):
return np.array([sets[s].centroid for s in self.RHS]) return np.array([sets[s].centroid for s in self.RHS])
elif isinstance(self.RHS, dict): elif isinstance(self.RHS, dict):
return np.array([sets[self.RHS[s]].centroid for s in self.RHS.keys()]) return np.array([sets[self.RHS[s]].centroid for s in self.RHS.keys()])
def get_lower(self,sets): def get_lower(self, sets):
if self.lower is None: if self.lower is None:
if isinstance(self.RHS, list): if isinstance(self.RHS, list):
self.lower = min([sets[rhs].lower for rhs in self.RHS]) self.lower = min([sets[rhs].lower for rhs in self.RHS])
@ -61,7 +61,7 @@ class FLRG(object):
self.lower = min([sets[self.RHS[s]].lower for s in self.RHS.keys()]) self.lower = min([sets[self.RHS[s]].lower for s in self.RHS.keys()])
return self.lower return self.lower
def get_upper(self, t,sets): def get_upper(self, sets):
if self.upper is None: if self.upper is None:
if isinstance(self.RHS, list): if isinstance(self.RHS, list):
self.upper = max([sets[rhs].upper for rhs in self.RHS]) self.upper = max([sets[rhs].upper for rhs in self.RHS])

View File

@ -2,11 +2,6 @@ import numpy as np
import pandas as pd import pandas as pd
from pyFTS.common import FuzzySet, SortedCollection, tree, Util from pyFTS.common import FuzzySet, SortedCollection, tree, Util
def parallel_train(data, method, **kwargs):
model = method(**kwargs)
model.train(data)
return model
class FTS(object): class FTS(object):
""" """
@ -67,7 +62,19 @@ class FTS(object):
:param kwargs: :param kwargs:
:return: :return:
""" """
type = kwargs.get("type", 'point')
if 'distributed' in kwargs:
distributed = kwargs.pop('distributed')
else:
distributed = False
if distributed is None or distributed == False:
if 'type' in kwargs:
type = kwargs.pop("type")
else:
type = 'point'
steps_ahead = kwargs.get("steps_ahead", None) steps_ahead = kwargs.get("steps_ahead", None)
if type == 'point' and steps_ahead == None: if type == 'point' and steps_ahead == None:
@ -85,6 +92,13 @@ class FTS(object):
else: else:
raise ValueError('The argument \'type\' has an unknown value.') raise ValueError('The argument \'type\' has an unknown value.')
else:
nodes = kwargs.get("nodes", ['127.0.0.1'])
num_batches = kwargs.get('num_batches', 10)
return Util.distributed_predict(self, kwargs, nodes, data, num_batches)
def forecast(self, data, **kwargs): def forecast(self, data, **kwargs):
""" """
@ -180,21 +194,24 @@ class FTS(object):
import datetime import datetime
num_batches = kwargs.get('num_batches', None) num_batches = kwargs.get('num_batches', 10)
save = kwargs.get('save_model', False) # save model on disk save = kwargs.get('save_model', False) # save model on disk
batch_save = kwargs.get('batch_save', True) #save model between batches batch_save = kwargs.get('batch_save', False) #save model between batches
file_path = kwargs.get('file_path', None) file_path = kwargs.get('file_path', None)
distributed = kwargs.get('distributed', False) distributed = kwargs.get('distributed', False)
batch_save_interval = kwargs.get('batch_save_interval', 10)
if distributed: if distributed:
nodes = kwargs.get('nodes', False) nodes = kwargs.get('nodes', False)
train_method = kwargs.get('train_method', Util.simple_model_train) train_method = kwargs.get('train_method', Util.simple_model_train)
Util.distributed_train(self, train_method, nodes, type(self), data, num_batches, {}, Util.distributed_train(self, train_method, nodes, type(self), data, num_batches, {},
batch_save=batch_save, file_path=file_path) batch_save=batch_save, file_path=file_path,
batch_save_interval=batch_save_interval)
else: else:
print("[{0: %H:%M:%S}] Start training".format(datetime.datetime.now())) print("[{0: %H:%M:%S}] Start training".format(datetime.datetime.now()))
@ -303,6 +320,9 @@ class FTS(object):
else: else:
return data return data
def get_UoD(self):
return [self.original_min, self.original_max]
def __str__(self): def __str__(self):
tmp = self.name + ":\n" tmp = self.name + ":\n"
for r in sorted(self.flrgs): for r in sorted(self.flrgs):

View File

@ -1,8 +1,8 @@
""" """
Simple High Order extension of Conventional FTS by Chen (1996) High Order FTS
[1] S.-M. Chen, Forecasting enrollments based on fuzzy time series, Severiano, S. A. Jr; Silva, P. C. L.; Sadaei, H. J.; Guimarães, F. G. Very Short-term Solar Forecasting
Fuzzy Sets Syst., vol. 81, no. 3, pp. 311319, 1996. using Fuzzy Time Series. 2017 IEEE International Conference on Fuzzy Systems. DOI10.1109/FUZZ-IEEE.2017.8015732
""" """
import numpy as np import numpy as np

View File

@ -26,105 +26,51 @@ class IntervalFTS(hofts.HighOrderFTS):
def get_upper(self, flrg): def get_upper(self, flrg):
if flrg.get_key() in self.flrgs: if flrg.get_key() in self.flrgs:
tmp = self.flrgs[flrg.get_key()] tmp = self.flrgs[flrg.get_key()]
ret = tmp.get_upper() ret = tmp.get_upper(self.sets)
else: else:
ret = flrg.LHS[-1].upper ret = self.sets[flrg.LHS[-1]].upper
return ret return ret
def get_lower(self, flrg): def get_lower(self, flrg):
if flrg.get_key() in self.flrgs: if flrg.get_key() in self.flrgs:
tmp = self.flrgs[flrg.get_key()] tmp = self.flrgs[flrg.get_key()]
ret = tmp.get_lower() ret = tmp.get_lower(self.sets)
else: else:
ret = flrg.LHS[-1].lower ret = self.sets[flrg.LHS[-1]].lower
return ret return ret
def get_sequence_membership(self, data, fuzzySets): def get_sequence_membership(self, data, fuzzySets):
mb = [fuzzySets[k].membership(data[k]) for k in np.arange(0, len(data))] mb = [fuzzySets[k].membership(data[k]) for k in np.arange(0, len(data))]
return mb return mb
def forecast_interval(self, data, **kwargs): def forecast_interval(self, data, **kwargs):
ndata = np.array(self.apply_transformations(data))
l = len(ndata)
ret = [] ret = []
for k in np.arange(self.order - 1, l): l = len(data)
affected_flrgs = [] if l <= self.order:
affected_flrgs_memberships = [] return data
ndata = self.apply_transformations(data)
for k in np.arange(self.order, l+1):
sample = ndata[k - self.order: k]
flrgs = self.generate_lhs_flrg(sample)
up = [] up = []
lo = [] lo = []
affected_flrgs_memberships = []
# Achar os conjuntos que tem pert > 0 para cada lag for flrg in flrgs:
count = 0
lags = {}
if self.order > 1:
subset = ndata[k - (self.order - 1): k + 1]
for instance in subset:
mb = FuzzySet.fuzzyfy_instance(instance, self.sets)
tmp = np.argwhere(mb)
idx = np.ravel(tmp) # flat the array
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
if instance <= self.sets[0].lower:
idx = [0]
elif instance >= self.sets[-1].upper:
idx = [len(self.sets) - 1]
else:
raise Exception(instance)
lags[count] = idx
count = count + 1
# Constrói uma árvore com todos os caminhos possíveis
root = tree.FLRGTreeNode(None)
self.build_tree(root, lags, 0)
# Traça os possíveis caminhos e costrói as HOFLRG's
for p in root.paths():
path = list(reversed(list(filter(None.__ne__, p))))
flrg = hofts.HighOrderFLRG(self.order)
for kk in path: flrg.append_lhs(self.sets[kk])
affected_flrgs.append(flrg)
# Acha a pertinência geral de cada FLRG
affected_flrgs_memberships.append(min(self.getSequenceMembership(subset, flrg.LHS)))
else:
mv = FuzzySet.fuzzyfy_instance(ndata[k], self.sets)
tmp = np.argwhere(mv)
idx = np.ravel(tmp)
if idx.size == 0: # the element is out of the bounds of the Universe of Discourse
if ndata[k] <= self.sets[0].lower:
idx = [0]
elif ndata[k] >= self.sets[-1].upper:
idx = [len(self.sets) - 1]
else:
raise Exception(ndata[k])
for kk in idx:
flrg = hofts.HighOrderFLRG(self.order)
flrg.append_lhs(self.sets[kk])
affected_flrgs.append(flrg)
affected_flrgs_memberships.append(mv[kk])
count = 0
for flrg in affected_flrgs:
# achar o os bounds de cada FLRG, ponderados pela pertinência # achar o os bounds de cada FLRG, ponderados pela pertinência
up.append(affected_flrgs_memberships[count] * self.get_upper(flrg)) mv = flrg.get_membership(sample, self.sets)
lo.append(affected_flrgs_memberships[count] * self.get_lower(flrg)) up.append(mv * self.get_upper(flrg))
count = count + 1 lo.append(mv * self.get_lower(flrg))
affected_flrgs_memberships.append(mv)
# gerar o intervalo # gerar o intervalo
norm = sum(affected_flrgs_memberships) norm = sum(affected_flrgs_memberships)

View File

@ -3,9 +3,9 @@ import pandas as pd
def fuzzyfy_instance(data_point, var): def fuzzyfy_instance(data_point, var):
mv = np.array([fs.membership(data_point) for fs in var.partitioner.sets]) mv = np.array([var.partitioner.sets[key].membership(data_point) for key in var.partitioner.ordered_sets])
ix = np.ravel(np.argwhere(mv > 0.0)) ix = np.ravel(np.argwhere(mv > 0.0))
sets = [var.partitioner.sets[i] for i in ix] sets = [(var.name, var.partitioner.ordered_sets[i]) for i in ix]
return sets return sets

View File

@ -19,8 +19,13 @@ class FLRG(flg.FLRG):
def append_rhs(self, set, **kwargs): def append_rhs(self, set, **kwargs):
self.RHS.add(set) self.RHS.add(set)
def get_membership(self, data, sets): def get_membership(self, data, variables):
return np.nanmin([sets[self.LHS[k]].membership(data[k]) for k in self.LHS.keys()]) mvs = []
for var in variables:
s = self.LHS[var.name]
mvs.append(var.partitioner.sets[s].membership(data[var.name]))
return np.nanmin(mvs)
def __str__(self): def __str__(self):
_str = "" _str = ""

View File

@ -50,9 +50,10 @@ class MVFTS(fts.FTS):
flr = MVFLR.FLR() flr = MVFLR.FLR()
for c, e in enumerate(path, start=0): for v, s in path:
flr.set_lhs(e.variable, e.name) flr.set_lhs(v, s)
if len(flr.LHS.keys()) == len(self.explanatory_variables):
flrs.append(flr) flrs.append(flr)
return flrs return flrs
@ -70,8 +71,8 @@ class MVFTS(fts.FTS):
target = common.fuzzyfy_instance(target_point, self.target_variable) target = common.fuzzyfy_instance(target_point, self.target_variable)
for flr in tmp_flrs: for flr in tmp_flrs:
for t in target: for v, s in target:
flr.set_rhs(t.name) flr.set_rhs(s)
flrs.append(flr) flrs.append(flr)
return flrs return flrs
@ -108,8 +109,8 @@ class MVFTS(fts.FTS):
mvs.append(0.) mvs.append(0.)
mps.append(0.) mps.append(0.)
else: else:
mvs.append(self.flrgs[flrg.get_key()].get_membership(self.format_data(data_point))) mvs.append(self.flrgs[flrg.get_key()].get_membership(self.format_data(data_point), self.explanatory_variables))
mps.append(self.flrgs[flrg.get_key()].get_midpoint()) mps.append(self.flrgs[flrg.get_key()].get_midpoint(self.target_variable.partitioner.sets))
#print('mv', mvs) #print('mv', mvs)
#print('mp', mps) #print('mp', mps)

View File

@ -20,7 +20,10 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
self.Z = None self.Z = None
def get_membership(self, data, sets): def get_membership(self, data, sets):
return np.nanprod([sets[k].membership(data[k]) for k in self.LHS]) if isinstance(data, (np.ndarray, list)):
return np.nanprod([sets[key].membership(data[count]) for count, key in enumerate(self.LHS)])
else:
return sets[self.LHS[0]].membership(data)
def append_rhs(self, c, **kwargs): def append_rhs(self, c, **kwargs):
mv = kwargs.get('mv', 1.0) mv = kwargs.get('mv', 1.0)
@ -34,7 +37,7 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
def lhs_conditional_probability(self, x, sets, norm, uod, nbins): def lhs_conditional_probability(self, x, sets, norm, uod, nbins):
pk = self.frequency_count / norm pk = self.frequency_count / norm
tmp = pk * (self.get_membership(x, sets) / self.partition_function(uod, nbins=nbins)) tmp = pk * (self.get_membership(x, sets) / self.partition_function(sets, uod, nbins=nbins))
return tmp return tmp
@ -110,17 +113,18 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
data = self.apply_transformations(data, updateUoD=True) data = self.apply_transformations(data, updateUoD=True)
parameters = kwargs.get('parameters','Fuzzy') parameters = kwargs.get('parameters','fuzzy')
self.order = kwargs.get('order',1) self.order = kwargs.get('order',1)
if kwargs.get('sets',None) is None and self.partitioner is not None:
if kwargs.get('sets', None) is None and self.partitioner is not None:
self.sets = self.partitioner.sets self.sets = self.partitioner.sets
self.original_min = self.partitioner.min self.original_min = self.partitioner.min
self.original_max = self.partitioner.max self.original_max = self.partitioner.max
else: else:
self.sets = kwargs.get('sets',None) self.sets = kwargs.get('sets',None)
for s in self.sets: self.setsDict[s.name] = s
if parameters == 'Monotonic': if parameters == 'monotonic':
tmpdata = FuzzySet.fuzzyfy_series_old(data, self.sets) tmpdata = FuzzySet.fuzzyfy_series_old(data, self.sets)
flrs = FLR.generate_recurrent_flrs(tmpdata) flrs = FLR.generate_recurrent_flrs(tmpdata)
self.generateFLRG(flrs) self.generateFLRG(flrs)
@ -433,7 +437,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
den = [] den = []
for s in flrgs: for s in flrgs:
flrg = self.flrgs[s.get_key()] flrg = self.flrgs[s.get_key()]
pk = flrg.lhs_conditional_probability(sample, self.global_frequency_count, uod, nbins) pk = flrg.lhs_conditional_probability(sample, self.sets, self.global_frequency_count, uod, nbins)
wi = flrg.rhs_conditional_probability(bin, self.sets, uod, nbins) wi = flrg.rhs_conditional_probability(bin, self.sets, uod, nbins)
num.append(wi * pk) num.append(wi * pk)
den.append(pk) den.append(pk)

View File

@ -1,4 +1,4 @@
from pyFTS.common import Membership from pyFTS.common import Membership, FuzzySet as FS
from pyFTS.common.Composite import FuzzySet as Composite from pyFTS.common.Composite import FuzzySet as Composite
from pyFTS.partitioners import partitioner, Grid from pyFTS.partitioners import partitioner, Grid
from pyFTS.models.seasonal.common import DateTime, FuzzySet, strip_datepart from pyFTS.models.seasonal.common import DateTime, FuzzySet, strip_datepart
@ -32,6 +32,11 @@ class TimeGridPartitioner(partitioner.Partitioner):
self.sets = self.build(None) self.sets = self.build(None)
if self.ordered_sets is None and self.setnames is not None:
self.ordered_sets = self.setnames
else:
self.ordered_sets = FS.set_ordered(self.sets)
def build(self, data): def build(self, data):
sets = {} sets = {}

View File

@ -4,7 +4,9 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# High Order Fuzzy Time Series \n" "# High Order Fuzzy Time Series \n",
"\n",
"Severiano, S. A. Jr; Silva, P. C. L.; Sadaei, H. J.; Guimarães, F. G. Very Short-term Solar Forecasting using Fuzzy Time Series. 2017 IEEE International Conference on Fuzzy Systems. DOI10.1109/FUZZ-IEEE.2017.8015732"
] ]
}, },
{ {

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -107,7 +107,7 @@ class ProbabilityDistribution(object):
if str(ret) not in self.qtl: if str(ret) not in self.qtl:
self.qtl[str(ret)] = [] self.qtl[str(ret)] = []
self.qtl[str(ret)].append_rhs(k) self.qtl[str(ret)].append(k)
_keys = [float(k) for k in sorted(self.qtl.keys())] _keys = [float(k) for k in sorted(self.qtl.keys())]

View File

@ -6,12 +6,12 @@ from pyFTS.common import Transformations
from pyFTS.data import SONDA from pyFTS.data import SONDA
df = SONDA.get_dataframe() df = SONDA.get_dataframe()
train = df.iloc[0:1572480] #three years train = df.iloc[0:1578241] #three years
test = df.iloc[1572480:2096640] #ears #test = df.iloc[1572480:2096640] #ears
del df del df
from pyFTS.partitioners import Grid, Util as pUtil from pyFTS.partitioners import Grid, Util as pUtil
from pyFTS.common import Transformations from pyFTS.common import Transformations, Util
from pyFTS.models.multivariate import common, variable, mvfts from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime from pyFTS.models.seasonal.common import DateTime
@ -21,6 +21,7 @@ diff = Transformations.Differential(1)
np = 10 np = 10
model = mvfts.MVFTS("") model = mvfts.MVFTS("")
fig, axes = plt.subplots(nrows=5, ncols=1,figsize=[15,10]) fig, axes = plt.subplots(nrows=5, ncols=1,figsize=[15,10])
@ -48,5 +49,11 @@ model.append_variable(vrain)
model.target_variable = vrain model.target_variable = vrain
model.fit(train, num_batches=20, save=True, batch_save=True, file_path='mvfts_sonda3', distributed=True,
nodes=['192.168.0.110','192.168.0.106']) #model.fit(train, num_batches=60, save=True, batch_save=True, file_path='mvfts_sonda')
model.fit(train, num_batches=200, save=True, batch_save=True, file_path='mvfts_sonda', distributed=True,
nodes=['192.168.0.110','192.168.0.106'], batch_save_interval=10)
#model = Util.load_obj('mvfts_sonda')

View File

@ -1,111 +1,45 @@
#!/usr/bin/python
# -*- coding: utf8 -*-
import os import os
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import matplotlib as plt import matplotlib as plt
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D #from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
from pyFTS.common import FLR, FuzzySet, Membership, Transformations, fts
from pyFTS.models import chen
from pyFTS.benchmarks import benchmarks as bchmk
from numpy import random
#gauss_treino = random.normal(0,1.0,1600)
#gauss_teste = random.normal(0,1.0,400)
os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/")
'''
enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";")
enrollments = np.array(enrollments["Enrollments"])
'''
taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",")
data = np.array(taiexpd["avg"][:5000])
del(taiexpd)
import importlib import importlib
import pandas as pd from statsmodels.tsa.stattools import adfuller
from pyFTS.partitioners import Grid from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from pyFTS.common import FLR, FuzzySet, Membership, SortedCollection
from pyFTS import fts
from pyFTS import hofts
from pyFTS import pwfts
from pyFTS import tree
from pyFTS.benchmarks import benchmarks as bchmk
#uod = [10162, 21271] from pyFTS.common import Util
fs1 = Grid.GridPartitioner(data[:3000], 30) from pyFTS.data import TAIEX
#for s in enrollments_fs1.sets:
# print(s) #.partition_function(uod, 100))
pfts1 = pwfts.ProbabilisticWeightedFTS("1", partitioner=fs1) taiex = TAIEX.get_data()
pfts1.train(data, None, 1)
pfts1.shortname = "1st Order"
#print(pfts1_enrollments)
#tmp = pfts1.forecast(data[3000:3020])
#tmp = pfts1.forecast_interval(data[3000:3020])
tmp = pfts1.forecast_distribution(data[3500])
p = 0
for b in tmp[0].bins:
p += tmp[0].density(b)
print(p)
#tmp = pfts1.forecast_ahead_interval(data[3000:3020],20)
#tmp = pfts1.forecast_ahead_distribution(data[3000:3020],20, method=3, h=0.45, kernel="gaussian")
#print(tmp[0])
#print(tmp[0].quantile([0.05, 0.95]))
#pfts1_enrollments.AprioriPDF
#norm = pfts1_enrollments.global_frequency_count
#uod = pfts1.get_UoD()
#for k in sorted(pfts1_enrollments.flrgs.keys())
# flrg = pfts1_enrollments.flrgs[k]
# tmp = flrg.get_LHSprobability(15000, norm, uod, 100)
# print(tmp) #flrg.partition_function(uod,100))
#print("MARGINAL VERIFICATION")
#for s in sorted(pfts1_enrollments.flrgs.keys()):
# flrg = pfts1_enrollments.flrgs[s]
#print(flrg.get_LHSprobability(15000, norm, uod, 100))
# print(sum([flrg.get_LHSprobability(k, norm, uod, 100) for k in np.linspace(uod[0],uod[1],100)]))
train = taiex[:3000]
test = taiex[3000:3200]
from pyFTS.common import Transformations
tdiff = Transformations.Differential(1)
''' '''
pfts2_enrollments = pwfts.ProbabilisticWeightedFTS("2") from pyFTS.partitioners import Grid, Util as pUtil
pfts2_enrollments.dump = False from pyFTS.common import FLR,FuzzySet,Membership,SortedCollection
pfts2_enrollments.shortname = "2nd Order" taiex_fs1 = Grid.GridPartitioner(data=train, npart=30)
pfts2_enrollments.train(enrollments, enrollments_fs1, 2) taiex_fs2 = Grid.GridPartitioner(data=train, npart=10, transformation=tdiff)
pfts3_enrollments = pwfts.ProbabilisticWeightedFTS("3")
pfts3_enrollments.dump = False #pUtil.plot_partitioners(train, [taiex_fs1,taiex_fs2], tam=[15,7])
pfts3_enrollments.shortname = "3rd Order"
pfts3_enrollments.train(enrollments, enrollments_fs1, 3) from pyFTS.common import fts,tree
from pyFTS.models import hofts, pwfts
pfts1_taiex = pwfts.ProbabilisticWeightedFTS("1", partitioner=taiex_fs1)
#pfts1_taiex.appendTransformation(diff)
pfts1_taiex.fit(train, save_model=True, file_path='pwfts')
pfts1_taiex.shortname = "1st Order"
print(pfts1_taiex)
bchmk.plot_compared_series(enrollments,[pfts1_enrollments,pfts2_enrollments, pfts3_enrollments],
["red","blue","green"], linewidth=2,
typeonlegend=True,save=False,file="pictures/pwfts_enrollments_interval.png",
tam=[20,7],points=False, intervals=True)
''' '''
model = Util.load_obj('pwfts')
model.predict(test, type='distribution')
#'''