- Compacting datasets with bz2

- Refactoring generate_flrg and train methods
 - Introducing batches and model saving on fit method
This commit is contained in:
Petrônio Cândido 2018-03-02 19:20:21 -03:00
parent cbc0974a3b
commit 00db6a30ad
40 changed files with 409 additions and 26313 deletions

View File

@ -1,6 +1,8 @@
include data/Enrollments.csv include data/Enrollments.csv
include data/AirPassengers.csv include data/AirPassengers.csv
include data/NASDAQ.csv include data/NASDAQ.csv.bz2
include data/SP500.csv include data/SP500.csv.bz2
include data/sunspots.csv include data/sunspots.csv
include data/TAIEX.csv include data/TAIEX.csv.bz2
include data/INMET.csv.bz2
include data/SONDA_BSB.csv.bz2

View File

@ -16,7 +16,7 @@ class FuzzySet(FuzzySet.FuzzySet):
Create an empty composite fuzzy set Create an empty composite fuzzy set
:param name: fuzzy set name :param name: fuzzy set name
""" """
super(FuzzySet, self).__init__(self, name, None, None, None, type='composite') super(FuzzySet, self).__init__(name, None, None, None, type='composite')
self.superset = superset self.superset = superset
if self.superset: if self.superset:
self.sets = [] self.sets = []

View File

@ -1,6 +1,6 @@
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from pyFTS.common import FuzzySet, SortedCollection, tree from pyFTS.common import FuzzySet, SortedCollection, tree, Util
class FTS(object): class FTS(object):
@ -164,9 +164,42 @@ class FTS(object):
:param data: :param data:
:param kwargs: :param kwargs:
:keyword
num_batches: split the training data in num_batches to save memory during the training process
save_model: save final model on disk
batch_save: save the model between each batch
file_path: path to save the model
:return: :return:
""" """
self.train(data, **kwargs)
num_batches = kwargs.get('num_batches', None)
save = kwargs.get('save_model', False) # save model on disk
batch_save = kwargs.get('batch_save', True) #save model between batches
file_path = kwargs.get('file_path', None)
if num_batches is not None:
n = len(data)
batch_size = round(n / num_batches, 0)
for ct in range(self.order, n, batch_size):
if self.is_multivariate:
ndata = data.iloc[ct - self.order:ct + batch_size]
else:
ndata = data[ct - self.order : ct + batch_size]
self.train(ndata, **kwargs)
if batch_save:
Util.persist_obj(self,file_path)
else:
self.train(data, **kwargs)
if save:
Util.persist_obj(self, file_path)
def append_transformation(self, transformation): def append_transformation(self, transformation):
if transformation is not None: if transformation is not None:

BIN
pyFTS/data/INMET.csv.bz2 Normal file

Binary file not shown.

26
pyFTS/data/INMET.py Normal file
View File

@ -0,0 +1,26 @@
#--------------------
#BDMEP - INMET
#--------------------
#Estação : BELO HORIZONTE - MG (OMM: 83587)
#Latitude (graus) : -19.93
#Longitude (graus) : -43.93
#Altitude (metros): 915.00
#Estação Operante
#Inicio de operação: 03/03/1910
#Periodo solicitado dos dados: 01/01/2000 a 31/12/2012
#Os dados listados abaixo são os que encontram-se digitados no BDMEP
#Hora em UTC
# http://www.inmet.gov.br
import pandas as pd
import numpy as np
import os
import pkg_resources
def get_dataframe():
filename = pkg_resources.resource_filename('pyFTS', 'data/INMET.csv.bz2')
dat = pd.read_csv(filename, sep=";", compression='bz2')
dat["DataHora"] = pd.to_datetime(dat["DataHora"], format='%d/%m/%Y %H:%M')
return dat

File diff suppressed because it is too large Load Diff

BIN
pyFTS/data/NASDAQ.csv.bz2 Normal file

Binary file not shown.

View File

@ -5,7 +5,7 @@ import pkg_resources
def get_data(): def get_data():
filename = pkg_resources.resource_filename('pyFTS', 'data/NASDAQ.csv') filename = pkg_resources.resource_filename('pyFTS', 'data/NASDAQ.csv.bz2')
dat = pd.read_csv(filename, sep=";") dat = pd.read_csv(filename, sep=";", compression='bz2')
dat = np.array(dat["avg"]) dat = np.array(dat["avg"])
return dat return dat

18
pyFTS/data/SONDA.py Normal file
View File

@ -0,0 +1,18 @@
import pandas as pd
import numpy as np
import os
import pkg_resources
def get_data(field):
filename = pkg_resources.resource_filename('pyFTS', 'data/SONDA_BSB.csv.bz2')
dat = pd.read_csv(filename, sep=";", compression='bz2')
dat = np.array(dat[field])
return dat
def get_dataframe():
filename = pkg_resources.resource_filename('pyFTS', 'data/SONDA_BSB.csv.bz2')
dat = pd.read_csv(filename, sep=";", compression='bz2')
dat["datahora"] = pd.to_datetime(dat["datahora"], format='%Y-%m-%d %H:%M:%S')
return dat

Binary file not shown.

File diff suppressed because it is too large Load Diff

BIN
pyFTS/data/SP500.csv.bz2 Normal file

Binary file not shown.

View File

@ -5,7 +5,7 @@ import pkg_resources
def get_data(): def get_data():
filename = pkg_resources.resource_filename('pyFTS', 'data/SP500.csv') filename = pkg_resources.resource_filename('pyFTS', 'data/SP500.csv.bz2')
dat = pd.read_csv(filename, sep=",") dat = pd.read_csv(filename, sep=",", compression='bz2')
dat = np.array(dat["Avg"]) dat = np.array(dat["Avg"])
return dat return dat

File diff suppressed because it is too large Load Diff

BIN
pyFTS/data/TAIEX.csv.bz2 Normal file

Binary file not shown.

View File

@ -5,14 +5,14 @@ import pkg_resources
def get_data(): def get_data():
filename = pkg_resources.resource_filename('pyFTS', 'data/TAIEX.csv') filename = pkg_resources.resource_filename('pyFTS', 'data/TAIEX.csv.bz2')
dat = pd.read_csv(filename, sep=",") dat = pd.read_csv(filename, sep=",", compression='bz2')
dat = np.array(dat["avg"]) dat = np.array(dat["avg"])
return dat return dat
def get_dataframe(): def get_dataframe():
filename = pkg_resources.resource_filename('pyFTS', 'data/TAIEX.csv') filename = pkg_resources.resource_filename('pyFTS', 'data/TAIEX.csv.bz2')
dat = pd.read_csv(filename, sep=",") dat = pd.read_csv(filename, sep=",", compression='bz2')
dat["Date"] = pd.to_datetime(dat["Date"]) dat["Date"] = pd.to_datetime(dat["Date"])
return dat return dat

View File

@ -37,14 +37,12 @@ class ConventionalFTS(fts.FTS):
self.flrgs = {} self.flrgs = {}
def generate_flrg(self, flrs): def generate_flrg(self, flrs):
flrgs = {}
for flr in flrs: for flr in flrs:
if flr.LHS.name in flrgs: if flr.LHS.name in self.flrgs:
flrgs[flr.LHS.name].append(flr.RHS) self.flrgs[flr.LHS.name].append(flr.RHS)
else: else:
flrgs[flr.LHS.name] = ConventionalFLRG(flr.LHS) self.flrgs[flr.LHS.name] = ConventionalFLRG(flr.LHS)
flrgs[flr.LHS.name].append(flr.RHS) self.flrgs[flr.LHS.name].append(flr.RHS)
return (flrgs)
def train(self, data, **kwargs): def train(self, data, **kwargs):
if kwargs.get('sets', None) is not None: if kwargs.get('sets', None) is not None:
@ -52,7 +50,7 @@ class ConventionalFTS(fts.FTS):
ndata = self.apply_transformations(data) ndata = self.apply_transformations(data)
tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets) tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets)
flrs = FLR.generate_non_recurrent_flrs(tmpdata) flrs = FLR.generate_non_recurrent_flrs(tmpdata)
self.flrgs = self.generate_flrg(flrs) self.generate_flrg(flrs)
def forecast(self, data, **kwargs): def forecast(self, data, **kwargs):

View File

@ -53,11 +53,9 @@ class TrendWeightedFTS(yu.WeightedFTS):
self.is_high_order = False self.is_high_order = False
def generate_FLRG(self, flrs): def generate_FLRG(self, flrs):
flrgs = {}
for flr in flrs: for flr in flrs:
if flr.LHS.name in flrgs: if flr.LHS.name in self.flrgs:
flrgs[flr.LHS.name].append(flr.RHS) self.flrgs[flr.LHS.name].append(flr.RHS)
else: else:
flrgs[flr.LHS.name] = TrendWeightedFLRG(flr.LHS) self.flrgs[flr.LHS.name] = TrendWeightedFLRG(flr.LHS)
flrgs[flr.LHS.name].append(flr.RHS) self.flrgs[flr.LHS.name].append(flr.RHS)
return (flrgs)

View File

@ -32,12 +32,12 @@ class EnsembleFTS(fts.FTS):
self.point_method = kwargs.get('point_method', 'mean') self.point_method = kwargs.get('point_method', 'mean')
self.interval_method = kwargs.get('interval_method', 'quantile') self.interval_method = kwargs.get('interval_method', 'quantile')
def appendModel(self, model): def append_model(self, model):
self.models.append(model) self.models.append(model)
if model.order > self.order: if model.order > self.order:
self.order = model.order self.order = model.order
def train(self, data, sets, order=1,parameters=None): def train(self, data, **kwargs):
self.original_max = max(data) self.original_max = max(data)
self.original_min = min(data) self.original_min = min(data)
@ -228,10 +228,12 @@ class AllMethodEnsembleFTS(EnsembleFTS):
for t in self.transformations: for t in self.transformations:
model.append_transformation(t) model.append_transformation(t)
def train(self, data, sets, order=1, parameters=None): def train(self, data, **kwargs):
self.original_max = max(data) self.original_max = max(data)
self.original_min = min(data) self.original_min = min(data)
order = kwargs.get('order',2)
fo_methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS, fo_methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS,
sadaei.ExponentialyWeightedFTS, ismailefendi.ImprovedWeightedFTS] sadaei.ExponentialyWeightedFTS, ismailefendi.ImprovedWeightedFTS]
@ -240,16 +242,16 @@ class AllMethodEnsembleFTS(EnsembleFTS):
for method in fo_methods: for method in fo_methods:
model = method("") model = method("")
self.set_transformations(model) self.set_transformations(model)
model.train(data, sets) model.train(data, **kwargs)
self.appendModel(model) self.append_model(model)
for method in ho_methods: for method in ho_methods:
for o in np.arange(1, order+1): for o in np.arange(1, order+1):
model = method("") model = method("")
if model.min_order >= o: if model.min_order >= o:
self.set_transformations(model) self.set_transformations(model)
model.train(data, sets, order=o) model.train(data, **kwargs)
self.appendModel(model) self.append_model(model)

View File

@ -41,7 +41,7 @@ class SeasonalEnsembleFTS(ensemble.EnsembleFTS):
self.original_max = max(self.indexer.get_data(data)) self.original_max = max(self.indexer.get_data(data))
self.original_min = min(self.indexer.get_data(data)) self.original_min = min(self.indexer.get_data(data))
def train(self, data, sets, order=1, parameters=None): def train(self, data, **kwargs):
self.original_max = max(self.indexer.get_data(data)) self.original_max = max(self.indexer.get_data(data))
self.original_min = min(self.indexer.get_data(data)) self.original_min = min(self.indexer.get_data(data))
@ -59,7 +59,7 @@ class SeasonalEnsembleFTS(ensemble.EnsembleFTS):
for m in pool.keys()) for m in pool.keys())
for tmp in results: for tmp in results:
self.appendModel(tmp) self.append_model(tmp)
cUtil.persist_obj(self, "models/"+self.name+".pkl") cUtil.persist_obj(self, "models/"+self.name+".pkl")

View File

@ -77,7 +77,6 @@ class HighOrderFTS(fts.FTS):
self.build_tree_without_order(child, lags, level + 1) self.build_tree_without_order(child, lags, level + 1)
def generateFLRG(self, flrs): def generateFLRG(self, flrs):
flrgs = {}
l = len(flrs) l = len(flrs)
for k in np.arange(self.order + 1, l): for k in np.arange(self.order + 1, l):
flrg = HighOrderFLRG(self.order) flrg = HighOrderFLRG(self.order)
@ -85,15 +84,13 @@ class HighOrderFTS(fts.FTS):
for kk in np.arange(k - self.order, k): for kk in np.arange(k - self.order, k):
flrg.append_lhs(flrs[kk].LHS) flrg.append_lhs(flrs[kk].LHS)
if flrg.str_lhs() in flrgs: if flrg.str_lhs() in self.flrgs:
flrgs[flrg.str_lhs()].append_rhs(flrs[k].RHS) self.flrgs[flrg.str_lhs()].append_rhs(flrs[k].RHS)
else: else:
flrgs[flrg.str_lhs()] = flrg; self.flrgs[flrg.str_lhs()] = flrg;
flrgs[flrg.str_lhs()].append_rhs(flrs[k].RHS) self.flrgs[flrg.str_lhs()].append_rhs(flrs[k].RHS)
return (flrgs)
def generate_flrg(self, data): def generate_flrg(self, data):
flrgs = {}
l = len(data) l = len(data)
for k in np.arange(self.order, l): for k in np.arange(self.order, l):
if self.dump: print("FLR: " + str(k)) if self.dump: print("FLR: " + str(k))
@ -121,13 +118,12 @@ class HighOrderFTS(fts.FTS):
for lhs in path: for lhs in path:
flrg.append_lhs(lhs) flrg.append_lhs(lhs)
if flrg.str_lhs() not in flrgs: if flrg.str_lhs() not in self.flrgs:
flrgs[flrg.str_lhs()] = flrg; self.flrgs[flrg.str_lhs()] = flrg;
for st in rhs: for st in rhs:
flrgs[flrg.str_lhs()].append_rhs(st) self.flrgs[flrg.str_lhs()].append_rhs(st)
return flrgs
def train(self, data, **kwargs): def train(self, data, **kwargs):
@ -138,7 +134,7 @@ class HighOrderFTS(fts.FTS):
if kwargs.get('sets', None) is not None: if kwargs.get('sets', None) is not None:
self.sets = kwargs.get('sets', None) self.sets = kwargs.get('sets', None)
for s in self.sets: self.setsDict[s.name] = s for s in self.sets: self.setsDict[s.name] = s
self.flrgs = self.generate_flrg(data) self.generate_flrg(data)
def forecast(self, data, **kwargs): def forecast(self, data, **kwargs):

View File

@ -54,14 +54,12 @@ class ImprovedWeightedFTS(fts.FTS):
self.setsDict = {} self.setsDict = {}
def generate_flrg(self, flrs): def generate_flrg(self, flrs):
flrgs = {}
for flr in flrs: for flr in flrs:
if flr.LHS.name in flrgs: if flr.LHS.name in self.flrgs:
flrgs[flr.LHS.name].append(flr.RHS) self.flrgs[flr.LHS.name].append(flr.RHS)
else: else:
flrgs[flr.LHS.name] = ImprovedWeightedFLRG(flr.LHS); self.flrgs[flr.LHS.name] = ImprovedWeightedFLRG(flr.LHS);
flrgs[flr.LHS.name].append(flr.RHS) self.flrgs[flr.LHS.name].append(flr.RHS)
return (flrgs)
def train(self, data, **kwargs): def train(self, data, **kwargs):
if kwargs.get('sets', None) is not None: if kwargs.get('sets', None) is not None:
@ -73,7 +71,7 @@ class ImprovedWeightedFTS(fts.FTS):
tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets) tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets)
flrs = FLR.generate_recurrent_flrs(tmpdata) flrs = FLR.generate_recurrent_flrs(tmpdata)
self.flrgs = self.generate_flrg(flrs) self.generate_flrg(flrs)
def forecast(self, data, **kwargs): def forecast(self, data, **kwargs):
l = 1 l = 1

View File

@ -15,6 +15,7 @@ class MVFTS(fts.FTS):
self.explanatory_variables = [] self.explanatory_variables = []
self.target_variable = None self.target_variable = None
self.flrgs = {} self.flrgs = {}
self.is_multivariate = True
def append_variable(self, var): def append_variable(self, var):
self.explanatory_variables.append(var) self.explanatory_variables.append(var)
@ -76,24 +77,21 @@ class MVFTS(fts.FTS):
return flrs return flrs
def generate_flrg(self, flrs): def generate_flrg(self, flrs):
flrgs = {}
for flr in flrs: for flr in flrs:
flrg = mvflrg.FLRG(lhs=flr.LHS) flrg = mvflrg.FLRG(lhs=flr.LHS)
if flrg.get_key() not in flrgs: if flrg.get_key() not in self.flrgs:
flrgs[flrg.get_key()] = flrg self.flrgs[flrg.get_key()] = flrg
flrgs[flrg.get_key()].append_rhs(flr.RHS) self.flrgs[flrg.get_key()].append_rhs(flr.RHS)
return flrgs
def train(self, data, **kwargs): def train(self, data, **kwargs):
ndata = self.apply_transformations(data) ndata = self.apply_transformations(data)
flrs = self.generate_flrs(ndata) flrs = self.generate_flrs(ndata)
self.flrgs = self.generate_flrg(flrs) self.generate_flrg(flrs)
def forecast(self, data, **kwargs): def forecast(self, data, **kwargs):
ret = [] ret = []

View File

@ -33,9 +33,10 @@ class Variable:
mf = kwargs.get('func', Membership.trimf) mf = kwargs.get('func', Membership.trimf)
np = kwargs.get('npart', 10) np = kwargs.get('npart', 10)
data = kwargs.get('data', None) data = kwargs.get('data', None)
kw = kwargs.get('partitioner_specific', {})
self.partitioner = fs(data=data[self.data_label].values, npart=np, func=mf, self.partitioner = fs(data=data[self.data_label].values, npart=np, func=mf,
transformation=self.transformation, prefix=self.alias, transformation=self.transformation, prefix=self.alias,
variable=self.name) variable=self.name, **kw)
self.partitioner.name = self.name + " " + self.partitioner.name self.partitioner.name = self.name + " " + self.partitioner.name

View File

@ -31,7 +31,7 @@ class FuzzySet(FS.FuzzySet):
- noise: Pertubation function that adds noise on the membership function - noise: Pertubation function that adds noise on the membership function
- noise_params: Parameters for noise pertubation function - noise_params: Parameters for noise pertubation function
""" """
super(FuzzySet, self).__init__(name=name, mf=mf, parameters=parameters, centroid=None) super(FuzzySet, self).__init__(name=name, mf=mf, parameters=parameters, centroid=None, alpha=1.0, **kwargs)
self.location = kwargs.get("location", None) self.location = kwargs.get("location", None)
self.location_params = kwargs.get("location_params", None) self.location_params = kwargs.get("location_params", None)
@ -42,6 +42,7 @@ class FuzzySet(FS.FuzzySet):
self.noise = kwargs.get("noise", None) self.noise = kwargs.get("noise", None)
self.noise_params = kwargs.get("noise_params", None) self.noise_params = kwargs.get("noise_params", None)
self.perturbated_parameters = {} self.perturbated_parameters = {}
self.type = 'nonstationary'
if self.location is not None and not isinstance(self.location, (list, set)): if self.location is not None and not isinstance(self.location, (list, set)):
self.location = [self.location] self.location = [self.location]

View File

@ -22,11 +22,9 @@ class ConditionalVarianceFTS(chen.ConventionalFTS):
self.min_stack = [0,0,0] self.min_stack = [0,0,0]
self.max_stack = [0,0,0] self.max_stack = [0,0,0]
def train(self, data, sets = None, order=1,parameters=None): def train(self, data, **kwargs):
if sets is not None: if kwargs.get('sets', None) is not None:
self.sets = sets self.sets = kwargs.get('sets', None)
else:
self.sets = self.partitioner.sets
ndata = self.apply_transformations(data) ndata = self.apply_transformations(data)
@ -35,17 +33,15 @@ class ConditionalVarianceFTS(chen.ConventionalFTS):
tmpdata = common.fuzzySeries(ndata, self.sets, method='fuzzy', const_t=0) tmpdata = common.fuzzySeries(ndata, self.sets, method='fuzzy', const_t=0)
flrs = FLR.generate_non_recurrent_flrs(tmpdata) flrs = FLR.generate_non_recurrent_flrs(tmpdata)
self.flrgs = self.generate_flrg(flrs) self.generate_flrg(flrs)
def generate_flrg(self, flrs, **kwargs): def generate_flrg(self, flrs, **kwargs):
flrgs = {}
for flr in flrs: for flr in flrs:
if flr.LHS.name in flrgs: if flr.LHS.name in self.flrgs:
flrgs[flr.LHS.name].append(flr.RHS) self.flrgs[flr.LHS.name].append(flr.RHS)
else: else:
flrgs[flr.LHS.name] = nsfts.ConventionalNonStationaryFLRG(flr.LHS) self.flrgs[flr.LHS.name] = nsfts.ConventionalNonStationaryFLRG(flr.LHS)
flrgs[flr.LHS.name].append(flr.RHS) self.flrgs[flr.LHS.name].append(flr.RHS)
return flrgs
def _smooth(self, a): def _smooth(self, a):
return .1 * a[0] + .3 * a[1] + .6 * a[2] return .1 * a[0] + .3 * a[1] + .6 * a[2]

View File

@ -46,7 +46,6 @@ class HighOrderNonStationaryFTS(hofts.HighOrderFTS):
self.flrgs = {} self.flrgs = {}
def generate_flrg(self, data, **kwargs): def generate_flrg(self, data, **kwargs):
flrgs = {}
l = len(data) l = len(data)
window_size = kwargs.get("window_size", 1) window_size = kwargs.get("window_size", 1)
for k in np.arange(self.order, l): for k in np.arange(self.order, l):
@ -84,30 +83,27 @@ class HighOrderNonStationaryFTS(hofts.HighOrderFTS):
for c, e in enumerate(path, start=0): for c, e in enumerate(path, start=0):
flrg.appendLHS(e) flrg.appendLHS(e)
if flrg.strLHS() not in flrgs: if flrg.strLHS() not in self.flrgs:
flrgs[flrg.strLHS()] = flrg; self.flrgs[flrg.strLHS()] = flrg;
for st in rhs: for st in rhs:
flrgs[flrg.strLHS()].append_rhs(st) self.flrgs[flrg.strLHS()].append_rhs(st)
# flrgs = sorted(flrgs, key=lambda flrg: flrg.get_midpoint(0, window_size=1)) # flrgs = sorted(flrgs, key=lambda flrg: flrg.get_midpoint(0, window_size=1))
return flrgs def train(self, data, **kwargs):
def train(self, data, sets=None, order=2, parameters=None): if kwargs.get('order', None) is not None:
self.order = kwargs.get('order', 1)
self.order = order if kwargs.get('sets', None) is not None:
self.sets = kwargs.get('sets', None)
if sets is not None:
self.sets = sets
else:
self.sets = self.partitioner.sets
ndata = self.apply_transformations(data) ndata = self.apply_transformations(data)
#tmpdata = common.fuzzyfy_series_old(ndata, self.sets) #tmpdata = common.fuzzyfy_series_old(ndata, self.sets)
#flrs = FLR.generate_recurrent_flrs(ndata) #flrs = FLR.generate_recurrent_flrs(ndata)
window_size = parameters if parameters is not None else 1 window_size = kwargs.get('parameters', 1)
self.flrgs = self.generate_flrg(ndata, window_size=window_size) self.generate_flrg(ndata, window_size=window_size)
def _affected_flrgs(self, sample, k, time_displacement, window_size): def _affected_flrgs(self, sample, k, time_displacement, window_size):
# print("input: " + str(ndata[k])) # print("input: " + str(ndata[k]))

View File

@ -1,6 +1,6 @@
import numpy as np import numpy as np
from pyFTS.common import FLR, fts from pyFTS.common import FLR, fts
from pyFTS.nonstationary import common, flrg from pyFTS.models.nonstationary import common, flrg
class ConventionalNonStationaryFLRG(flrg.NonStationaryFLRG): class ConventionalNonStationaryFLRG(flrg.NonStationaryFLRG):
@ -34,29 +34,25 @@ class NonStationaryFTS(fts.FTS):
self.method = kwargs.get("method",'fuzzy') self.method = kwargs.get("method",'fuzzy')
def generate_flrg(self, flrs, **kwargs): def generate_flrg(self, flrs, **kwargs):
flrgs = {}
for flr in flrs: for flr in flrs:
if flr.LHS.name in flrgs: if flr.LHS.name in self.flrgs:
flrgs[flr.LHS.name].append(flr.RHS) self.flrgs[flr.LHS.name].append(flr.RHS)
else: else:
flrgs[flr.LHS.name] = ConventionalNonStationaryFLRG(flr.LHS) self.flrgs[flr.LHS.name] = ConventionalNonStationaryFLRG(flr.LHS)
flrgs[flr.LHS.name].append(flr.RHS) self.flrgs[flr.LHS.name].append(flr.RHS)
return flrgs
def train(self, data, sets=None, order=1, parameters=None): def train(self, data, **kwargs):
if sets is not None: if kwargs.get('sets', None) is not None:
self.sets = sets self.sets = kwargs.get('sets', None)
else:
self.sets = self.partitioner.sets
ndata = self.apply_transformations(data) ndata = self.apply_transformations(data)
window_size = parameters if parameters is not None else 1 window_size = kwargs.get('parameters', 1)
tmpdata = common.fuzzySeries(ndata, self.sets, window_size, method=self.method) tmpdata = common.fuzzySeries(ndata, self.sets, window_size, method=self.method)
#print([k[0].name for k in tmpdata]) #print([k[0].name for k in tmpdata])
flrs = FLR.generate_recurrent_flrs(tmpdata) flrs = FLR.generate_recurrent_flrs(tmpdata)
#print([str(k) for k in flrs]) #print([str(k) for k in flrs])
self.flrgs = self.generate_flrg(flrs) self.generate_flrg(flrs)
def forecast(self, data, **kwargs): def forecast(self, data, **kwargs):

View File

@ -128,12 +128,11 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
if parameters == 'Monotonic': if parameters == 'Monotonic':
tmpdata = FuzzySet.fuzzyfy_series_old(data, self.sets) tmpdata = FuzzySet.fuzzyfy_series_old(data, self.sets)
flrs = FLR.generate_recurrent_flrs(tmpdata) flrs = FLR.generate_recurrent_flrs(tmpdata)
self.flrgs = self.generateFLRG(flrs) self.generateFLRG(flrs)
else: else:
self.flrgs = self.generate_flrg(data) self.generate_flrg(data)
def generate_flrg(self, data): def generate_flrg(self, data):
flrgs = {}
l = len(data) l = len(data)
for k in np.arange(self.order, l): for k in np.arange(self.order, l):
if self.dump: print("FLR: " + str(k)) if self.dump: print("FLR: " + str(k))
@ -168,20 +167,17 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
lhs_mv = np.prod(tmp_path) lhs_mv = np.prod(tmp_path)
if flrg.str_lhs() not in flrgs: if flrg.str_lhs() not in self.flrgs:
flrgs[flrg.str_lhs()] = flrg; self.flrgs[flrg.str_lhs()] = flrg;
for st in idx: for st in idx:
flrgs[flrg.str_lhs()].appendRHSFuzzy(self.sets[st], lhs_mv * mv[st]) self.flrgs[flrg.str_lhs()].appendRHSFuzzy(self.sets[st], lhs_mv * mv[st])
tmp_fq = sum([lhs_mv*kk for kk in mv if kk > 0]) tmp_fq = sum([lhs_mv*kk for kk in mv if kk > 0])
self.global_frequency_count = self.global_frequency_count + tmp_fq self.global_frequency_count = self.global_frequency_count + tmp_fq
return flrgs
def generateFLRG(self, flrs): def generateFLRG(self, flrs):
flrgs = {}
l = len(flrs) l = len(flrs)
for k in np.arange(self.order, l+1): for k in np.arange(self.order, l+1):
if self.dump: print("FLR: " + str(k)) if self.dump: print("FLR: " + str(k))
@ -191,15 +187,14 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
flrg.append_lhs(flrs[kk].LHS) flrg.append_lhs(flrs[kk].LHS)
if self.dump: print("LHS: " + str(flrs[kk])) if self.dump: print("LHS: " + str(flrs[kk]))
if flrg.str_lhs() in flrgs: if flrg.str_lhs() in self.flrgs:
flrgs[flrg.str_lhs()].append_rhs(flrs[k - 1].RHS) self.flrgs[flrg.str_lhs()].append_rhs(flrs[k - 1].RHS)
else: else:
flrgs[flrg.str_lhs()] = flrg self.flrgs[flrg.str_lhs()] = flrg
flrgs[flrg.str_lhs()].append_rhs(flrs[k - 1].RHS) self.flrgs[flrg.str_lhs()].append_rhs(flrs[k - 1].RHS)
if self.dump: print("RHS: " + str(flrs[k-1])) if self.dump: print("RHS: " + str(flrs[k-1]))
self.global_frequency_count += 1 self.global_frequency_count += 1
return (flrgs)
def update_model(self,data): def update_model(self,data):

View File

@ -58,14 +58,12 @@ class ExponentialyWeightedFTS(fts.FTS):
self.c = kwargs.get('c', default_c) self.c = kwargs.get('c', default_c)
def generate_flrg(self, flrs, c): def generate_flrg(self, flrs, c):
flrgs = {}
for flr in flrs: for flr in flrs:
if flr.LHS.name in flrgs: if flr.LHS.name in self.flrgs:
flrgs[flr.LHS.name].append(flr.RHS) self.flrgs[flr.LHS.name].append(flr.RHS)
else: else:
flrgs[flr.LHS.name] = ExponentialyWeightedFLRG(flr.LHS, c=c); self.flrgs[flr.LHS.name] = ExponentialyWeightedFLRG(flr.LHS, c=c);
flrgs[flr.LHS.name].append(flr.RHS) self.flrgs[flr.LHS.name].append(flr.RHS)
return (flrgs)
def train(self, data, **kwargs): def train(self, data, **kwargs):
self.c = kwargs.get('parameters', default_c) self.c = kwargs.get('parameters', default_c)
@ -74,7 +72,7 @@ class ExponentialyWeightedFTS(fts.FTS):
ndata = self.apply_transformations(data) ndata = self.apply_transformations(data)
tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets) tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets)
flrs = FLR.generate_recurrent_flrs(tmpdata) flrs = FLR.generate_recurrent_flrs(tmpdata)
self.flrgs = self.generate_flrg(flrs, self.c) self.generate_flrg(flrs, self.c)
def forecast(self, data, **kwargs): def forecast(self, data, **kwargs):
l = 1 l = 1

View File

@ -44,25 +44,23 @@ class ContextualMultiSeasonalFTS(sfts.SeasonalFTS):
self.indexer = indexer self.indexer = indexer
self.flrgs = {} self.flrgs = {}
def generateFLRG(self, flrs): def generate_flrg(self, flrs):
flrgs = {}
for flr in flrs: for flr in flrs:
if str(flr.index) not in flrgs: if str(flr.index) not in self.flrgs:
flrgs[str(flr.index)] = ContextualSeasonalFLRG(flr.index) self.flrgs[str(flr.index)] = ContextualSeasonalFLRG(flr.index)
flrgs[str(flr.index)].append(flr) self.flrgs[str(flr.index)].append(flr)
return (flrgs) def train(self, data, **kwargs):
if kwargs.get('sets', None) is not None:
def train(self, data, sets, order=1, parameters=None): self.sets = kwargs.get('sets', None)
self.sets = sets if kwargs.get('parameters', None) is not None:
self.seasonality = parameters self.seasonality = kwargs.get('parameters', None)
flrs = FLR.generate_indexed_flrs(self.sets, self.indexer, data) flrs = FLR.generate_indexed_flrs(self.sets, self.indexer, data)
self.flrgs = self.generateFLRG(flrs) self.generate_flrg(flrs)
def getMidpoints(self, flrg, data): def get_midpoints(self, flrg, data):
if data.name in flrg.flrgs: if data.name in flrg.flrgs:
ret = np.array([s.centroid for s in flrg.flrgs[data.name].RHS]) ret = np.array([s.centroid for s in flrg.flrgs[data.name].RHS])
return ret return ret
@ -82,7 +80,7 @@ class ContextualMultiSeasonalFTS(sfts.SeasonalFTS):
d = FuzzySet.get_maximum_membership_fuzzyset(ndata[k], self.sets) d = FuzzySet.get_maximum_membership_fuzzyset(ndata[k], self.sets)
mp = self.getMidpoints(flrg, d) mp = self.get_midpoints(flrg, d)
ret.append(sum(mp) / len(mp)) ret.append(sum(mp) / len(mp))
@ -90,12 +88,12 @@ class ContextualMultiSeasonalFTS(sfts.SeasonalFTS):
return ret return ret
def forecastAhead(self, data, steps, **kwargs): def forecast_ahead(self, data, steps, **kwargs):
ret = [] ret = []
for i in steps: for i in steps:
flrg = self.flrgs[str(i)] flrg = self.flrgs[str(i)]
mp = self.getMidpoints(flrg) mp = self.get_midpoints(flrg)
ret.append(sum(mp) / len(mp)) ret.append(sum(mp) / len(mp))

View File

@ -28,7 +28,7 @@ class DateTime(Enum):
second_of_day = 86400 second_of_day = 86400
def strip_datepart(self, date, date_part): def strip_datepart(date, date_part):
if date_part == DateTime.year: if date_part == DateTime.year:
tmp = date.year tmp = date.year
elif date_part == DateTime.month: elif date_part == DateTime.month:
@ -90,7 +90,8 @@ class FuzzySet(FuzzySet.FuzzySet):
def __init__(self, datepart, name, mf, parameters, centroid, alpha=1.0, **kwargs): def __init__(self, datepart, name, mf, parameters, centroid, alpha=1.0, **kwargs):
super(FuzzySet, self).__init__(name, mf, parameters, centroid, alpha, type = 'datetime', **kwargs) super(FuzzySet, self).__init__(name, mf, parameters, centroid, alpha, type = 'datetime', **kwargs)
self.datepart = datepart self.datepart = datepart
self.type = 'seasonal'
def membership(self, x): def membership(self, x):
dp = strip_datepart(x, self.datepart) dp = strip_datepart(x, self.datepart)
return self.mf.membership(dp) return self.mf(dp, self.parameters) * self.alpha

View File

@ -20,24 +20,22 @@ class MultiSeasonalFTS(sfts.SeasonalFTS):
self.indexer = indexer self.indexer = indexer
self.flrgs = {} self.flrgs = {}
def generateFLRG(self, flrs): def generate_flrg(self, flrs):
flrgs = {}
for flr in flrs: for flr in flrs:
if str(flr.index) not in self.flrgs: if str(flr.index) not in self.flrgs:
flrgs[str(flr.index)] = sfts.SeasonalFLRG(flr.index) self.flrgs[str(flr.index)] = sfts.SeasonalFLRG(flr.index)
flrgs[str(flr.index)].append(flr.RHS) self.flrgs[str(flr.index)].append(flr.RHS)
return (flrgs) def train(self, data, **kwargs):
if kwargs.get('sets', None) is not None:
def train(self, data, sets, order=1, parameters=None): self.sets = kwargs.get('sets', None)
self.sets = sets if kwargs.get('parameters', None) is not None:
self.seasonality = parameters self.seasonality = kwargs.get('parameters', None)
#ndata = self.indexer.set_data(data,self.doTransformations(self.indexer.get_data(data))) #ndata = self.indexer.set_data(data,self.doTransformations(self.indexer.get_data(data)))
flrs = FLR.generate_indexed_flrs(self.sets, self.indexer, data) flrs = FLR.generate_indexed_flrs(self.sets, self.indexer, data)
self.flrgs = self.generateFLRG(flrs) self.generate_flrg(flrs)
def forecast(self, data, **kwargs): def forecast(self, data, **kwargs):

View File

@ -17,7 +17,7 @@ class TimeGridPartitioner(partitioner.Partitioner):
:param npart: The number of universe of discourse partitions, i.e., the number of fuzzy sets that will be created :param npart: The number of universe of discourse partitions, i.e., the number of fuzzy sets that will be created
:param func: Fuzzy membership function (pyFTS.common.Membership) :param func: Fuzzy membership function (pyFTS.common.Membership)
""" """
super(TimeGridPartitioner, self).__init__(name="TimeGrid", **kwargs) super(TimeGridPartitioner, self).__init__(name="TimeGrid", preprocess=False, **kwargs)
self.season = kwargs.get('seasonality', DateTime.day_of_year) self.season = kwargs.get('seasonality', DateTime.day_of_year)
data = kwargs.get('data', None) data = kwargs.get('data', None)
@ -101,11 +101,12 @@ class TimeGridPartitioner(partitioner.Partitioner):
ticks = [] ticks = []
x = [] x = []
for s in self.sets: for s in self.sets:
if s.type == 'common': if s.type == 'composite':
self.plot_set(ax, s)
elif s.type == 'composite':
for ss in s.sets: for ss in s.sets:
self.plot_set(ax, ss) self.plot_set(ax, ss)
# ticks.append(str(round(s.centroid, 0)) + '\n' + s.name) else:
# x.append(s.centroid) self.plot_set(ax, s)
# plt.xticks(x, ticks) ticks.append(str(round(s.centroid, 0)) + '\n' + s.name)
x.append(s.centroid)
ax.xaxis.set_ticklabels(ticks)
ax.xaxis.set_ticks(x)

View File

@ -43,29 +43,29 @@ class SeasonalFTS(fts.FTS):
self.has_seasonality = True self.has_seasonality = True
self.has_point_forecasting = True self.has_point_forecasting = True
self.is_high_order = False self.is_high_order = False
self.flrgs = {}
def generate_flrg(self, flrs):
def generateFLRG(self, flrs):
flrgs = {}
for ct, flr in enumerate(flrs, start=1): for ct, flr in enumerate(flrs, start=1):
season = self.indexer.get_season_by_index(ct)[0] season = self.indexer.get_season_by_index(ct)[0]
ss = str(season) ss = str(season)
if ss not in flrgs: if ss not in self.flrgs:
flrgs[ss] = SeasonalFLRG(season) self.flrgs[ss] = SeasonalFLRG(season)
#print(season) #print(season)
flrgs[ss].append(flr.RHS) self.flrgs[ss].append(flr.RHS)
return (flrgs) def train(self, data, **kwargs):
if kwargs.get('sets', None) is not None:
def train(self, data, sets, order=1, parameters=None): self.sets = kwargs.get('sets', None)
self.sets = sets
ndata = self.apply_transformations(data) ndata = self.apply_transformations(data)
tmpdata = FuzzySet.fuzzyfy_series_old(ndata, sets) tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets)
flrs = FLR.generate_recurrent_flrs(tmpdata) flrs = FLR.generate_recurrent_flrs(tmpdata)
self.flrgs = self.generateFLRG(flrs) self.generate_flrg(flrs)
def forecast(self, data, **kwargs): def forecast(self, data, **kwargs):

View File

@ -14,8 +14,15 @@ class ConventionalFTS(fts.FTS):
super(ConventionalFTS, self).__init__(1, "FTS " + name, **kwargs) super(ConventionalFTS, self).__init__(1, "FTS " + name, **kwargs)
self.name = "Traditional FTS" self.name = "Traditional FTS"
self.detail = "Song & Chissom" self.detail = "Song & Chissom"
if self.sets is not None and self.partitioner is not None:
self.sets = self.partitioner.sets
self.R = None self.R = None
if self.sets is not None:
self.R = np.zeros((len(self.sets),len(self.sets)))
def flr_membership_matrix(self, flr): def flr_membership_matrix(self, flr):
lm = [flr.LHS.membership(k.centroid) for k in self.sets] lm = [flr.LHS.membership(k.centroid) for k in self.sets]
rm = [flr.RHS.membership(k.centroid) for k in self.sets] rm = [flr.RHS.membership(k.centroid) for k in self.sets]
@ -28,14 +35,14 @@ class ConventionalFTS(fts.FTS):
return r return r
def operation_matrix(self, flrs): def operation_matrix(self, flrs):
r = np.zeros((len(self.sets),len(self.sets))) if self.R is None:
self.R = np.zeros((len(self.sets), len(self.sets)))
for k in flrs: for k in flrs:
mm = self.flr_membership_matrix(k) mm = self.flr_membership_matrix(k)
for k in range(0, len(self.sets)): for k in range(0, len(self.sets)):
for l in range(0, len(self.sets)): for l in range(0, len(self.sets)):
r[k][l] = max(r[k][l], mm[k][l]) self.R[k][l] = max(r[k][l], mm[k][l])
return r
def train(self, data, **kwargs): def train(self, data, **kwargs):
if kwargs.get('sets', None) is not None: if kwargs.get('sets', None) is not None:
@ -43,7 +50,7 @@ class ConventionalFTS(fts.FTS):
ndata = self.apply_transformations(data) ndata = self.apply_transformations(data)
tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets) tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets)
flrs = FLR.generate_non_recurrent_flrs(tmpdata) flrs = FLR.generate_non_recurrent_flrs(tmpdata)
self.R = self.operation_matrix(flrs) self.operation_matrix(flrs)
def forecast(self, data, **kwargs): def forecast(self, data, **kwargs):

View File

@ -47,14 +47,12 @@ class WeightedFTS(fts.FTS):
self.detail = "Yu" self.detail = "Yu"
def generate_FLRG(self, flrs): def generate_FLRG(self, flrs):
flrgs = {}
for flr in flrs: for flr in flrs:
if flr.LHS.name in flrgs: if flr.LHS.name in self.flrgs:
flrgs[flr.LHS.name].append(flr.RHS) self.flrgs[flr.LHS.name].append(flr.RHS)
else: else:
flrgs[flr.LHS.name] = WeightedFLRG(flr.LHS); self.flrgs[flr.LHS.name] = WeightedFLRG(flr.LHS);
flrgs[flr.LHS.name].append(flr.RHS) self.flrgs[flr.LHS.name].append(flr.RHS)
return (flrgs)
def train(self, data, **kwargs): def train(self, data, **kwargs):
if kwargs.get('sets', None) is not None: if kwargs.get('sets', None) is not None:
@ -62,7 +60,7 @@ class WeightedFTS(fts.FTS):
ndata = self.apply_transformations(data) ndata = self.apply_transformations(data)
tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets) tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets)
flrs = FLR.generate_recurrent_flrs(tmpdata) flrs = FLR.generate_recurrent_flrs(tmpdata)
self.flrgs = self.generate_FLRG(flrs) self.generate_FLRG(flrs)
def forecast(self, data, **kwargs): def forecast(self, data, **kwargs):
l = 1 l = 1

File diff suppressed because one or more lines are too long

View File

@ -37,7 +37,7 @@ for method in fo_methods:
model = method("") model = method("")
model.append_transformation(diff) model.append_transformation(diff)
model.train(passengers, fs.sets) model.train(passengers, fs.sets)
e.appendModel(model) e.append_model(model)
for method in ho_methods: for method in ho_methods:
@ -45,7 +45,7 @@ for method in ho_methods:
model = method("") model = method("")
model.append_transformation(diff) model.append_transformation(diff)
model.train(passengers, fs.sets, order=order) model.train(passengers, fs.sets, order=order)
e.appendModel(model) e.append_model(model)
arima100 = arima.ARIMA("", alpha=0.25) arima100 = arima.ARIMA("", alpha=0.25)
@ -65,10 +65,10 @@ arima201 = arima.ARIMA("", alpha=0.25)
arima201.train(passengers, None, order=(2,0,1)) arima201.train(passengers, None, order=(2,0,1))
e.appendModel(arima100) e.append_model(arima100)
e.appendModel(arima101) e.append_model(arima101)
e.appendModel(arima200) e.append_model(arima200)
e.appendModel(arima201) e.append_model(arima201)
e.train(passengers, None) e.train(passengers, None)

View File

@ -4,16 +4,33 @@ from pyFTS.partitioners import Util
from pyFTS.common import Membership from pyFTS.common import Membership
#fs = partitioner.TimeGridPartitioner(None, 12, common.DateTime.day_of_year, func=Membership.trapmf, #fs = partitioner.TimeGridPartitioner(data=None, npart=12, seasonality=common.DateTime.day_of_year,
# func=Membership.trapmf,
# names=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']) # names=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
#fs = partitioner.TimeGridPartitioner(None, 24, common.DateTime.minute_of_day, func=Membership.trapmf) #fs = partitioner.TimeGridPartitioner(None, 24, common.DateTime.minute_of_day, func=Membership.trapmf)
fs = partitioner.TimeGridPartitioner(None, 7, common.DateTime.hour_of_week, func=Membership.trapmf) #fs = partitioner.TimeGridPartitioner(None, 7, common.DateTime.hour_of_week, func=Membership.trapmf)
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[6, 8]) #fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[6, 8])
fs.plot(ax) #fs.plot(ax)
plt.show() #plt.show()
"""
import pandas as pd
from pyFTS.data import SONDA
df = SONDA.get_dataframe()
df = df.drop(df[df.rain.values > 100].index)
df = df.drop(df[df.press.values < 800].index)
df = df.drop(df[df.humid.values < 15].index)
df.to_csv("SONDA_BSB_MOD.csv", sep=";", index=False)
"""
import os
print(os.getcwd())