- Compacting datasets with bz2
- Refactoring generate_flrg and train methods - Introducing batches and model saving on fit method
This commit is contained in:
parent
cbc0974a3b
commit
00db6a30ad
8
MANIFEST
8
MANIFEST
@ -1,6 +1,8 @@
|
||||
include data/Enrollments.csv
|
||||
include data/AirPassengers.csv
|
||||
include data/NASDAQ.csv
|
||||
include data/SP500.csv
|
||||
include data/NASDAQ.csv.bz2
|
||||
include data/SP500.csv.bz2
|
||||
include data/sunspots.csv
|
||||
include data/TAIEX.csv
|
||||
include data/TAIEX.csv.bz2
|
||||
include data/INMET.csv.bz2
|
||||
include data/SONDA_BSB.csv.bz2
|
@ -16,7 +16,7 @@ class FuzzySet(FuzzySet.FuzzySet):
|
||||
Create an empty composite fuzzy set
|
||||
:param name: fuzzy set name
|
||||
"""
|
||||
super(FuzzySet, self).__init__(self, name, None, None, None, type='composite')
|
||||
super(FuzzySet, self).__init__(name, None, None, None, type='composite')
|
||||
self.superset = superset
|
||||
if self.superset:
|
||||
self.sets = []
|
||||
|
@ -1,6 +1,6 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pyFTS.common import FuzzySet, SortedCollection, tree
|
||||
from pyFTS.common import FuzzySet, SortedCollection, tree, Util
|
||||
|
||||
|
||||
class FTS(object):
|
||||
@ -164,10 +164,43 @@ class FTS(object):
|
||||
|
||||
:param data:
|
||||
:param kwargs:
|
||||
|
||||
:keyword
|
||||
num_batches: split the training data in num_batches to save memory during the training process
|
||||
save_model: save final model on disk
|
||||
batch_save: save the model between each batch
|
||||
file_path: path to save the model
|
||||
:return:
|
||||
"""
|
||||
|
||||
num_batches = kwargs.get('num_batches', None)
|
||||
|
||||
save = kwargs.get('save_model', False) # save model on disk
|
||||
|
||||
batch_save = kwargs.get('batch_save', True) #save model between batches
|
||||
|
||||
file_path = kwargs.get('file_path', None)
|
||||
|
||||
if num_batches is not None:
|
||||
n = len(data)
|
||||
batch_size = round(n / num_batches, 0)
|
||||
for ct in range(self.order, n, batch_size):
|
||||
if self.is_multivariate:
|
||||
ndata = data.iloc[ct - self.order:ct + batch_size]
|
||||
else:
|
||||
ndata = data[ct - self.order : ct + batch_size]
|
||||
|
||||
self.train(ndata, **kwargs)
|
||||
|
||||
if batch_save:
|
||||
Util.persist_obj(self,file_path)
|
||||
|
||||
else:
|
||||
self.train(data, **kwargs)
|
||||
|
||||
if save:
|
||||
Util.persist_obj(self, file_path)
|
||||
|
||||
def append_transformation(self, transformation):
|
||||
if transformation is not None:
|
||||
self.transformations.append(transformation)
|
||||
|
BIN
pyFTS/data/INMET.csv.bz2
Normal file
BIN
pyFTS/data/INMET.csv.bz2
Normal file
Binary file not shown.
26
pyFTS/data/INMET.py
Normal file
26
pyFTS/data/INMET.py
Normal file
@ -0,0 +1,26 @@
|
||||
#--------------------
|
||||
#BDMEP - INMET
|
||||
#--------------------
|
||||
#Estação : BELO HORIZONTE - MG (OMM: 83587)
|
||||
#Latitude (graus) : -19.93
|
||||
#Longitude (graus) : -43.93
|
||||
#Altitude (metros): 915.00
|
||||
#Estação Operante
|
||||
#Inicio de operação: 03/03/1910
|
||||
#Periodo solicitado dos dados: 01/01/2000 a 31/12/2012
|
||||
#Os dados listados abaixo são os que encontram-se digitados no BDMEP
|
||||
#Hora em UTC
|
||||
|
||||
# http://www.inmet.gov.br
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
import pkg_resources
|
||||
|
||||
|
||||
def get_dataframe():
|
||||
filename = pkg_resources.resource_filename('pyFTS', 'data/INMET.csv.bz2')
|
||||
dat = pd.read_csv(filename, sep=";", compression='bz2')
|
||||
dat["DataHora"] = pd.to_datetime(dat["DataHora"], format='%d/%m/%Y %H:%M')
|
||||
return dat
|
File diff suppressed because it is too large
Load Diff
BIN
pyFTS/data/NASDAQ.csv.bz2
Normal file
BIN
pyFTS/data/NASDAQ.csv.bz2
Normal file
Binary file not shown.
@ -5,7 +5,7 @@ import pkg_resources
|
||||
|
||||
|
||||
def get_data():
|
||||
filename = pkg_resources.resource_filename('pyFTS', 'data/NASDAQ.csv')
|
||||
dat = pd.read_csv(filename, sep=";")
|
||||
filename = pkg_resources.resource_filename('pyFTS', 'data/NASDAQ.csv.bz2')
|
||||
dat = pd.read_csv(filename, sep=";", compression='bz2')
|
||||
dat = np.array(dat["avg"])
|
||||
return dat
|
||||
|
18
pyFTS/data/SONDA.py
Normal file
18
pyFTS/data/SONDA.py
Normal file
@ -0,0 +1,18 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
import pkg_resources
|
||||
|
||||
|
||||
def get_data(field):
|
||||
filename = pkg_resources.resource_filename('pyFTS', 'data/SONDA_BSB.csv.bz2')
|
||||
dat = pd.read_csv(filename, sep=";", compression='bz2')
|
||||
dat = np.array(dat[field])
|
||||
return dat
|
||||
|
||||
|
||||
def get_dataframe():
|
||||
filename = pkg_resources.resource_filename('pyFTS', 'data/SONDA_BSB.csv.bz2')
|
||||
dat = pd.read_csv(filename, sep=";", compression='bz2')
|
||||
dat["datahora"] = pd.to_datetime(dat["datahora"], format='%Y-%m-%d %H:%M:%S')
|
||||
return dat
|
BIN
pyFTS/data/SONDA_BSB.csv.bz2
Normal file
BIN
pyFTS/data/SONDA_BSB.csv.bz2
Normal file
Binary file not shown.
16924
pyFTS/data/SP500.csv
16924
pyFTS/data/SP500.csv
File diff suppressed because it is too large
Load Diff
BIN
pyFTS/data/SP500.csv.bz2
Normal file
BIN
pyFTS/data/SP500.csv.bz2
Normal file
Binary file not shown.
@ -5,7 +5,7 @@ import pkg_resources
|
||||
|
||||
|
||||
def get_data():
|
||||
filename = pkg_resources.resource_filename('pyFTS', 'data/SP500.csv')
|
||||
dat = pd.read_csv(filename, sep=",")
|
||||
filename = pkg_resources.resource_filename('pyFTS', 'data/SP500.csv.bz2')
|
||||
dat = pd.read_csv(filename, sep=",", compression='bz2')
|
||||
dat = np.array(dat["Avg"])
|
||||
return dat
|
||||
|
5261
pyFTS/data/TAIEX.csv
5261
pyFTS/data/TAIEX.csv
File diff suppressed because it is too large
Load Diff
BIN
pyFTS/data/TAIEX.csv.bz2
Normal file
BIN
pyFTS/data/TAIEX.csv.bz2
Normal file
Binary file not shown.
@ -5,14 +5,14 @@ import pkg_resources
|
||||
|
||||
|
||||
def get_data():
|
||||
filename = pkg_resources.resource_filename('pyFTS', 'data/TAIEX.csv')
|
||||
dat = pd.read_csv(filename, sep=",")
|
||||
filename = pkg_resources.resource_filename('pyFTS', 'data/TAIEX.csv.bz2')
|
||||
dat = pd.read_csv(filename, sep=",", compression='bz2')
|
||||
dat = np.array(dat["avg"])
|
||||
return dat
|
||||
|
||||
|
||||
def get_dataframe():
|
||||
filename = pkg_resources.resource_filename('pyFTS', 'data/TAIEX.csv')
|
||||
dat = pd.read_csv(filename, sep=",")
|
||||
filename = pkg_resources.resource_filename('pyFTS', 'data/TAIEX.csv.bz2')
|
||||
dat = pd.read_csv(filename, sep=",", compression='bz2')
|
||||
dat["Date"] = pd.to_datetime(dat["Date"])
|
||||
return dat
|
||||
|
@ -37,14 +37,12 @@ class ConventionalFTS(fts.FTS):
|
||||
self.flrgs = {}
|
||||
|
||||
def generate_flrg(self, flrs):
|
||||
flrgs = {}
|
||||
for flr in flrs:
|
||||
if flr.LHS.name in flrgs:
|
||||
flrgs[flr.LHS.name].append(flr.RHS)
|
||||
if flr.LHS.name in self.flrgs:
|
||||
self.flrgs[flr.LHS.name].append(flr.RHS)
|
||||
else:
|
||||
flrgs[flr.LHS.name] = ConventionalFLRG(flr.LHS)
|
||||
flrgs[flr.LHS.name].append(flr.RHS)
|
||||
return (flrgs)
|
||||
self.flrgs[flr.LHS.name] = ConventionalFLRG(flr.LHS)
|
||||
self.flrgs[flr.LHS.name].append(flr.RHS)
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
if kwargs.get('sets', None) is not None:
|
||||
@ -52,7 +50,7 @@ class ConventionalFTS(fts.FTS):
|
||||
ndata = self.apply_transformations(data)
|
||||
tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets)
|
||||
flrs = FLR.generate_non_recurrent_flrs(tmpdata)
|
||||
self.flrgs = self.generate_flrg(flrs)
|
||||
self.generate_flrg(flrs)
|
||||
|
||||
def forecast(self, data, **kwargs):
|
||||
|
||||
|
@ -53,11 +53,9 @@ class TrendWeightedFTS(yu.WeightedFTS):
|
||||
self.is_high_order = False
|
||||
|
||||
def generate_FLRG(self, flrs):
|
||||
flrgs = {}
|
||||
for flr in flrs:
|
||||
if flr.LHS.name in flrgs:
|
||||
flrgs[flr.LHS.name].append(flr.RHS)
|
||||
if flr.LHS.name in self.flrgs:
|
||||
self.flrgs[flr.LHS.name].append(flr.RHS)
|
||||
else:
|
||||
flrgs[flr.LHS.name] = TrendWeightedFLRG(flr.LHS)
|
||||
flrgs[flr.LHS.name].append(flr.RHS)
|
||||
return (flrgs)
|
||||
self.flrgs[flr.LHS.name] = TrendWeightedFLRG(flr.LHS)
|
||||
self.flrgs[flr.LHS.name].append(flr.RHS)
|
||||
|
@ -32,12 +32,12 @@ class EnsembleFTS(fts.FTS):
|
||||
self.point_method = kwargs.get('point_method', 'mean')
|
||||
self.interval_method = kwargs.get('interval_method', 'quantile')
|
||||
|
||||
def appendModel(self, model):
|
||||
def append_model(self, model):
|
||||
self.models.append(model)
|
||||
if model.order > self.order:
|
||||
self.order = model.order
|
||||
|
||||
def train(self, data, sets, order=1,parameters=None):
|
||||
def train(self, data, **kwargs):
|
||||
self.original_max = max(data)
|
||||
self.original_min = min(data)
|
||||
|
||||
@ -228,10 +228,12 @@ class AllMethodEnsembleFTS(EnsembleFTS):
|
||||
for t in self.transformations:
|
||||
model.append_transformation(t)
|
||||
|
||||
def train(self, data, sets, order=1, parameters=None):
|
||||
def train(self, data, **kwargs):
|
||||
self.original_max = max(data)
|
||||
self.original_min = min(data)
|
||||
|
||||
order = kwargs.get('order',2)
|
||||
|
||||
fo_methods = [song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS, cheng.TrendWeightedFTS,
|
||||
sadaei.ExponentialyWeightedFTS, ismailefendi.ImprovedWeightedFTS]
|
||||
|
||||
@ -240,16 +242,16 @@ class AllMethodEnsembleFTS(EnsembleFTS):
|
||||
for method in fo_methods:
|
||||
model = method("")
|
||||
self.set_transformations(model)
|
||||
model.train(data, sets)
|
||||
self.appendModel(model)
|
||||
model.train(data, **kwargs)
|
||||
self.append_model(model)
|
||||
|
||||
for method in ho_methods:
|
||||
for o in np.arange(1, order+1):
|
||||
model = method("")
|
||||
if model.min_order >= o:
|
||||
self.set_transformations(model)
|
||||
model.train(data, sets, order=o)
|
||||
self.appendModel(model)
|
||||
model.train(data, **kwargs)
|
||||
self.append_model(model)
|
||||
|
||||
|
||||
|
||||
|
@ -41,7 +41,7 @@ class SeasonalEnsembleFTS(ensemble.EnsembleFTS):
|
||||
self.original_max = max(self.indexer.get_data(data))
|
||||
self.original_min = min(self.indexer.get_data(data))
|
||||
|
||||
def train(self, data, sets, order=1, parameters=None):
|
||||
def train(self, data, **kwargs):
|
||||
self.original_max = max(self.indexer.get_data(data))
|
||||
self.original_min = min(self.indexer.get_data(data))
|
||||
|
||||
@ -59,7 +59,7 @@ class SeasonalEnsembleFTS(ensemble.EnsembleFTS):
|
||||
for m in pool.keys())
|
||||
|
||||
for tmp in results:
|
||||
self.appendModel(tmp)
|
||||
self.append_model(tmp)
|
||||
|
||||
cUtil.persist_obj(self, "models/"+self.name+".pkl")
|
||||
|
||||
|
@ -77,7 +77,6 @@ class HighOrderFTS(fts.FTS):
|
||||
self.build_tree_without_order(child, lags, level + 1)
|
||||
|
||||
def generateFLRG(self, flrs):
|
||||
flrgs = {}
|
||||
l = len(flrs)
|
||||
for k in np.arange(self.order + 1, l):
|
||||
flrg = HighOrderFLRG(self.order)
|
||||
@ -85,15 +84,13 @@ class HighOrderFTS(fts.FTS):
|
||||
for kk in np.arange(k - self.order, k):
|
||||
flrg.append_lhs(flrs[kk].LHS)
|
||||
|
||||
if flrg.str_lhs() in flrgs:
|
||||
flrgs[flrg.str_lhs()].append_rhs(flrs[k].RHS)
|
||||
if flrg.str_lhs() in self.flrgs:
|
||||
self.flrgs[flrg.str_lhs()].append_rhs(flrs[k].RHS)
|
||||
else:
|
||||
flrgs[flrg.str_lhs()] = flrg;
|
||||
flrgs[flrg.str_lhs()].append_rhs(flrs[k].RHS)
|
||||
return (flrgs)
|
||||
self.flrgs[flrg.str_lhs()] = flrg;
|
||||
self.flrgs[flrg.str_lhs()].append_rhs(flrs[k].RHS)
|
||||
|
||||
def generate_flrg(self, data):
|
||||
flrgs = {}
|
||||
l = len(data)
|
||||
for k in np.arange(self.order, l):
|
||||
if self.dump: print("FLR: " + str(k))
|
||||
@ -121,13 +118,12 @@ class HighOrderFTS(fts.FTS):
|
||||
for lhs in path:
|
||||
flrg.append_lhs(lhs)
|
||||
|
||||
if flrg.str_lhs() not in flrgs:
|
||||
flrgs[flrg.str_lhs()] = flrg;
|
||||
if flrg.str_lhs() not in self.flrgs:
|
||||
self.flrgs[flrg.str_lhs()] = flrg;
|
||||
|
||||
for st in rhs:
|
||||
flrgs[flrg.str_lhs()].append_rhs(st)
|
||||
self.flrgs[flrg.str_lhs()].append_rhs(st)
|
||||
|
||||
return flrgs
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
|
||||
@ -138,7 +134,7 @@ class HighOrderFTS(fts.FTS):
|
||||
if kwargs.get('sets', None) is not None:
|
||||
self.sets = kwargs.get('sets', None)
|
||||
for s in self.sets: self.setsDict[s.name] = s
|
||||
self.flrgs = self.generate_flrg(data)
|
||||
self.generate_flrg(data)
|
||||
|
||||
def forecast(self, data, **kwargs):
|
||||
|
||||
|
@ -54,14 +54,12 @@ class ImprovedWeightedFTS(fts.FTS):
|
||||
self.setsDict = {}
|
||||
|
||||
def generate_flrg(self, flrs):
|
||||
flrgs = {}
|
||||
for flr in flrs:
|
||||
if flr.LHS.name in flrgs:
|
||||
flrgs[flr.LHS.name].append(flr.RHS)
|
||||
if flr.LHS.name in self.flrgs:
|
||||
self.flrgs[flr.LHS.name].append(flr.RHS)
|
||||
else:
|
||||
flrgs[flr.LHS.name] = ImprovedWeightedFLRG(flr.LHS);
|
||||
flrgs[flr.LHS.name].append(flr.RHS)
|
||||
return (flrgs)
|
||||
self.flrgs[flr.LHS.name] = ImprovedWeightedFLRG(flr.LHS);
|
||||
self.flrgs[flr.LHS.name].append(flr.RHS)
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
if kwargs.get('sets', None) is not None:
|
||||
@ -73,7 +71,7 @@ class ImprovedWeightedFTS(fts.FTS):
|
||||
|
||||
tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets)
|
||||
flrs = FLR.generate_recurrent_flrs(tmpdata)
|
||||
self.flrgs = self.generate_flrg(flrs)
|
||||
self.generate_flrg(flrs)
|
||||
|
||||
def forecast(self, data, **kwargs):
|
||||
l = 1
|
||||
|
@ -15,6 +15,7 @@ class MVFTS(fts.FTS):
|
||||
self.explanatory_variables = []
|
||||
self.target_variable = None
|
||||
self.flrgs = {}
|
||||
self.is_multivariate = True
|
||||
|
||||
def append_variable(self, var):
|
||||
self.explanatory_variables.append(var)
|
||||
@ -76,24 +77,21 @@ class MVFTS(fts.FTS):
|
||||
return flrs
|
||||
|
||||
def generate_flrg(self, flrs):
|
||||
flrgs = {}
|
||||
|
||||
for flr in flrs:
|
||||
flrg = mvflrg.FLRG(lhs=flr.LHS)
|
||||
|
||||
if flrg.get_key() not in flrgs:
|
||||
flrgs[flrg.get_key()] = flrg
|
||||
if flrg.get_key() not in self.flrgs:
|
||||
self.flrgs[flrg.get_key()] = flrg
|
||||
|
||||
flrgs[flrg.get_key()].append_rhs(flr.RHS)
|
||||
self.flrgs[flrg.get_key()].append_rhs(flr.RHS)
|
||||
|
||||
return flrgs
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
|
||||
ndata = self.apply_transformations(data)
|
||||
|
||||
flrs = self.generate_flrs(ndata)
|
||||
self.flrgs = self.generate_flrg(flrs)
|
||||
self.generate_flrg(flrs)
|
||||
|
||||
def forecast(self, data, **kwargs):
|
||||
ret = []
|
||||
|
@ -33,9 +33,10 @@ class Variable:
|
||||
mf = kwargs.get('func', Membership.trimf)
|
||||
np = kwargs.get('npart', 10)
|
||||
data = kwargs.get('data', None)
|
||||
kw = kwargs.get('partitioner_specific', {})
|
||||
self.partitioner = fs(data=data[self.data_label].values, npart=np, func=mf,
|
||||
transformation=self.transformation, prefix=self.alias,
|
||||
variable=self.name)
|
||||
variable=self.name, **kw)
|
||||
|
||||
self.partitioner.name = self.name + " " + self.partitioner.name
|
||||
|
||||
|
@ -31,7 +31,7 @@ class FuzzySet(FS.FuzzySet):
|
||||
- noise: Pertubation function that adds noise on the membership function
|
||||
- noise_params: Parameters for noise pertubation function
|
||||
"""
|
||||
super(FuzzySet, self).__init__(name=name, mf=mf, parameters=parameters, centroid=None)
|
||||
super(FuzzySet, self).__init__(name=name, mf=mf, parameters=parameters, centroid=None, alpha=1.0, **kwargs)
|
||||
|
||||
self.location = kwargs.get("location", None)
|
||||
self.location_params = kwargs.get("location_params", None)
|
||||
@ -42,6 +42,7 @@ class FuzzySet(FS.FuzzySet):
|
||||
self.noise = kwargs.get("noise", None)
|
||||
self.noise_params = kwargs.get("noise_params", None)
|
||||
self.perturbated_parameters = {}
|
||||
self.type = 'nonstationary'
|
||||
|
||||
if self.location is not None and not isinstance(self.location, (list, set)):
|
||||
self.location = [self.location]
|
||||
|
@ -22,11 +22,9 @@ class ConditionalVarianceFTS(chen.ConventionalFTS):
|
||||
self.min_stack = [0,0,0]
|
||||
self.max_stack = [0,0,0]
|
||||
|
||||
def train(self, data, sets = None, order=1,parameters=None):
|
||||
if sets is not None:
|
||||
self.sets = sets
|
||||
else:
|
||||
self.sets = self.partitioner.sets
|
||||
def train(self, data, **kwargs):
|
||||
if kwargs.get('sets', None) is not None:
|
||||
self.sets = kwargs.get('sets', None)
|
||||
|
||||
ndata = self.apply_transformations(data)
|
||||
|
||||
@ -35,17 +33,15 @@ class ConditionalVarianceFTS(chen.ConventionalFTS):
|
||||
|
||||
tmpdata = common.fuzzySeries(ndata, self.sets, method='fuzzy', const_t=0)
|
||||
flrs = FLR.generate_non_recurrent_flrs(tmpdata)
|
||||
self.flrgs = self.generate_flrg(flrs)
|
||||
self.generate_flrg(flrs)
|
||||
|
||||
def generate_flrg(self, flrs, **kwargs):
|
||||
flrgs = {}
|
||||
for flr in flrs:
|
||||
if flr.LHS.name in flrgs:
|
||||
flrgs[flr.LHS.name].append(flr.RHS)
|
||||
if flr.LHS.name in self.flrgs:
|
||||
self.flrgs[flr.LHS.name].append(flr.RHS)
|
||||
else:
|
||||
flrgs[flr.LHS.name] = nsfts.ConventionalNonStationaryFLRG(flr.LHS)
|
||||
flrgs[flr.LHS.name].append(flr.RHS)
|
||||
return flrgs
|
||||
self.flrgs[flr.LHS.name] = nsfts.ConventionalNonStationaryFLRG(flr.LHS)
|
||||
self.flrgs[flr.LHS.name].append(flr.RHS)
|
||||
|
||||
def _smooth(self, a):
|
||||
return .1 * a[0] + .3 * a[1] + .6 * a[2]
|
||||
|
@ -46,7 +46,6 @@ class HighOrderNonStationaryFTS(hofts.HighOrderFTS):
|
||||
self.flrgs = {}
|
||||
|
||||
def generate_flrg(self, data, **kwargs):
|
||||
flrgs = {}
|
||||
l = len(data)
|
||||
window_size = kwargs.get("window_size", 1)
|
||||
for k in np.arange(self.order, l):
|
||||
@ -84,30 +83,27 @@ class HighOrderNonStationaryFTS(hofts.HighOrderFTS):
|
||||
for c, e in enumerate(path, start=0):
|
||||
flrg.appendLHS(e)
|
||||
|
||||
if flrg.strLHS() not in flrgs:
|
||||
flrgs[flrg.strLHS()] = flrg;
|
||||
if flrg.strLHS() not in self.flrgs:
|
||||
self.flrgs[flrg.strLHS()] = flrg;
|
||||
|
||||
for st in rhs:
|
||||
flrgs[flrg.strLHS()].append_rhs(st)
|
||||
self.flrgs[flrg.strLHS()].append_rhs(st)
|
||||
|
||||
# flrgs = sorted(flrgs, key=lambda flrg: flrg.get_midpoint(0, window_size=1))
|
||||
|
||||
return flrgs
|
||||
def train(self, data, **kwargs):
|
||||
|
||||
def train(self, data, sets=None, order=2, parameters=None):
|
||||
if kwargs.get('order', None) is not None:
|
||||
self.order = kwargs.get('order', 1)
|
||||
|
||||
self.order = order
|
||||
|
||||
if sets is not None:
|
||||
self.sets = sets
|
||||
else:
|
||||
self.sets = self.partitioner.sets
|
||||
if kwargs.get('sets', None) is not None:
|
||||
self.sets = kwargs.get('sets', None)
|
||||
|
||||
ndata = self.apply_transformations(data)
|
||||
#tmpdata = common.fuzzyfy_series_old(ndata, self.sets)
|
||||
#flrs = FLR.generate_recurrent_flrs(ndata)
|
||||
window_size = parameters if parameters is not None else 1
|
||||
self.flrgs = self.generate_flrg(ndata, window_size=window_size)
|
||||
window_size = kwargs.get('parameters', 1)
|
||||
self.generate_flrg(ndata, window_size=window_size)
|
||||
|
||||
def _affected_flrgs(self, sample, k, time_displacement, window_size):
|
||||
# print("input: " + str(ndata[k]))
|
||||
|
@ -1,6 +1,6 @@
|
||||
import numpy as np
|
||||
from pyFTS.common import FLR, fts
|
||||
from pyFTS.nonstationary import common, flrg
|
||||
from pyFTS.models.nonstationary import common, flrg
|
||||
|
||||
|
||||
class ConventionalNonStationaryFLRG(flrg.NonStationaryFLRG):
|
||||
@ -34,29 +34,25 @@ class NonStationaryFTS(fts.FTS):
|
||||
self.method = kwargs.get("method",'fuzzy')
|
||||
|
||||
def generate_flrg(self, flrs, **kwargs):
|
||||
flrgs = {}
|
||||
for flr in flrs:
|
||||
if flr.LHS.name in flrgs:
|
||||
flrgs[flr.LHS.name].append(flr.RHS)
|
||||
if flr.LHS.name in self.flrgs:
|
||||
self.flrgs[flr.LHS.name].append(flr.RHS)
|
||||
else:
|
||||
flrgs[flr.LHS.name] = ConventionalNonStationaryFLRG(flr.LHS)
|
||||
flrgs[flr.LHS.name].append(flr.RHS)
|
||||
return flrgs
|
||||
self.flrgs[flr.LHS.name] = ConventionalNonStationaryFLRG(flr.LHS)
|
||||
self.flrgs[flr.LHS.name].append(flr.RHS)
|
||||
|
||||
def train(self, data, sets=None, order=1, parameters=None):
|
||||
def train(self, data, **kwargs):
|
||||
|
||||
if sets is not None:
|
||||
self.sets = sets
|
||||
else:
|
||||
self.sets = self.partitioner.sets
|
||||
if kwargs.get('sets', None) is not None:
|
||||
self.sets = kwargs.get('sets', None)
|
||||
|
||||
ndata = self.apply_transformations(data)
|
||||
window_size = parameters if parameters is not None else 1
|
||||
window_size = kwargs.get('parameters', 1)
|
||||
tmpdata = common.fuzzySeries(ndata, self.sets, window_size, method=self.method)
|
||||
#print([k[0].name for k in tmpdata])
|
||||
flrs = FLR.generate_recurrent_flrs(tmpdata)
|
||||
#print([str(k) for k in flrs])
|
||||
self.flrgs = self.generate_flrg(flrs)
|
||||
self.generate_flrg(flrs)
|
||||
|
||||
def forecast(self, data, **kwargs):
|
||||
|
||||
|
@ -128,12 +128,11 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
if parameters == 'Monotonic':
|
||||
tmpdata = FuzzySet.fuzzyfy_series_old(data, self.sets)
|
||||
flrs = FLR.generate_recurrent_flrs(tmpdata)
|
||||
self.flrgs = self.generateFLRG(flrs)
|
||||
self.generateFLRG(flrs)
|
||||
else:
|
||||
self.flrgs = self.generate_flrg(data)
|
||||
self.generate_flrg(data)
|
||||
|
||||
def generate_flrg(self, data):
|
||||
flrgs = {}
|
||||
l = len(data)
|
||||
for k in np.arange(self.order, l):
|
||||
if self.dump: print("FLR: " + str(k))
|
||||
@ -168,20 +167,17 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
|
||||
lhs_mv = np.prod(tmp_path)
|
||||
|
||||
if flrg.str_lhs() not in flrgs:
|
||||
flrgs[flrg.str_lhs()] = flrg;
|
||||
if flrg.str_lhs() not in self.flrgs:
|
||||
self.flrgs[flrg.str_lhs()] = flrg;
|
||||
|
||||
for st in idx:
|
||||
flrgs[flrg.str_lhs()].appendRHSFuzzy(self.sets[st], lhs_mv * mv[st])
|
||||
self.flrgs[flrg.str_lhs()].appendRHSFuzzy(self.sets[st], lhs_mv * mv[st])
|
||||
|
||||
tmp_fq = sum([lhs_mv*kk for kk in mv if kk > 0])
|
||||
|
||||
self.global_frequency_count = self.global_frequency_count + tmp_fq
|
||||
|
||||
return flrgs
|
||||
|
||||
def generateFLRG(self, flrs):
|
||||
flrgs = {}
|
||||
l = len(flrs)
|
||||
for k in np.arange(self.order, l+1):
|
||||
if self.dump: print("FLR: " + str(k))
|
||||
@ -191,15 +187,14 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
||||
flrg.append_lhs(flrs[kk].LHS)
|
||||
if self.dump: print("LHS: " + str(flrs[kk]))
|
||||
|
||||
if flrg.str_lhs() in flrgs:
|
||||
flrgs[flrg.str_lhs()].append_rhs(flrs[k - 1].RHS)
|
||||
if flrg.str_lhs() in self.flrgs:
|
||||
self.flrgs[flrg.str_lhs()].append_rhs(flrs[k - 1].RHS)
|
||||
else:
|
||||
flrgs[flrg.str_lhs()] = flrg
|
||||
flrgs[flrg.str_lhs()].append_rhs(flrs[k - 1].RHS)
|
||||
self.flrgs[flrg.str_lhs()] = flrg
|
||||
self.flrgs[flrg.str_lhs()].append_rhs(flrs[k - 1].RHS)
|
||||
if self.dump: print("RHS: " + str(flrs[k-1]))
|
||||
|
||||
self.global_frequency_count += 1
|
||||
return (flrgs)
|
||||
|
||||
def update_model(self,data):
|
||||
|
||||
|
@ -58,14 +58,12 @@ class ExponentialyWeightedFTS(fts.FTS):
|
||||
self.c = kwargs.get('c', default_c)
|
||||
|
||||
def generate_flrg(self, flrs, c):
|
||||
flrgs = {}
|
||||
for flr in flrs:
|
||||
if flr.LHS.name in flrgs:
|
||||
flrgs[flr.LHS.name].append(flr.RHS)
|
||||
if flr.LHS.name in self.flrgs:
|
||||
self.flrgs[flr.LHS.name].append(flr.RHS)
|
||||
else:
|
||||
flrgs[flr.LHS.name] = ExponentialyWeightedFLRG(flr.LHS, c=c);
|
||||
flrgs[flr.LHS.name].append(flr.RHS)
|
||||
return (flrgs)
|
||||
self.flrgs[flr.LHS.name] = ExponentialyWeightedFLRG(flr.LHS, c=c);
|
||||
self.flrgs[flr.LHS.name].append(flr.RHS)
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
self.c = kwargs.get('parameters', default_c)
|
||||
@ -74,7 +72,7 @@ class ExponentialyWeightedFTS(fts.FTS):
|
||||
ndata = self.apply_transformations(data)
|
||||
tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets)
|
||||
flrs = FLR.generate_recurrent_flrs(tmpdata)
|
||||
self.flrgs = self.generate_flrg(flrs, self.c)
|
||||
self.generate_flrg(flrs, self.c)
|
||||
|
||||
def forecast(self, data, **kwargs):
|
||||
l = 1
|
||||
|
@ -44,25 +44,23 @@ class ContextualMultiSeasonalFTS(sfts.SeasonalFTS):
|
||||
self.indexer = indexer
|
||||
self.flrgs = {}
|
||||
|
||||
def generateFLRG(self, flrs):
|
||||
flrgs = {}
|
||||
|
||||
def generate_flrg(self, flrs):
|
||||
for flr in flrs:
|
||||
|
||||
if str(flr.index) not in flrgs:
|
||||
flrgs[str(flr.index)] = ContextualSeasonalFLRG(flr.index)
|
||||
if str(flr.index) not in self.flrgs:
|
||||
self.flrgs[str(flr.index)] = ContextualSeasonalFLRG(flr.index)
|
||||
|
||||
flrgs[str(flr.index)].append(flr)
|
||||
self.flrgs[str(flr.index)].append(flr)
|
||||
|
||||
return (flrgs)
|
||||
|
||||
def train(self, data, sets, order=1, parameters=None):
|
||||
self.sets = sets
|
||||
self.seasonality = parameters
|
||||
def train(self, data, **kwargs):
|
||||
if kwargs.get('sets', None) is not None:
|
||||
self.sets = kwargs.get('sets', None)
|
||||
if kwargs.get('parameters', None) is not None:
|
||||
self.seasonality = kwargs.get('parameters', None)
|
||||
flrs = FLR.generate_indexed_flrs(self.sets, self.indexer, data)
|
||||
self.flrgs = self.generateFLRG(flrs)
|
||||
self.generate_flrg(flrs)
|
||||
|
||||
def getMidpoints(self, flrg, data):
|
||||
def get_midpoints(self, flrg, data):
|
||||
if data.name in flrg.flrgs:
|
||||
ret = np.array([s.centroid for s in flrg.flrgs[data.name].RHS])
|
||||
return ret
|
||||
@ -82,7 +80,7 @@ class ContextualMultiSeasonalFTS(sfts.SeasonalFTS):
|
||||
|
||||
d = FuzzySet.get_maximum_membership_fuzzyset(ndata[k], self.sets)
|
||||
|
||||
mp = self.getMidpoints(flrg, d)
|
||||
mp = self.get_midpoints(flrg, d)
|
||||
|
||||
ret.append(sum(mp) / len(mp))
|
||||
|
||||
@ -90,12 +88,12 @@ class ContextualMultiSeasonalFTS(sfts.SeasonalFTS):
|
||||
|
||||
return ret
|
||||
|
||||
def forecastAhead(self, data, steps, **kwargs):
|
||||
def forecast_ahead(self, data, steps, **kwargs):
|
||||
ret = []
|
||||
for i in steps:
|
||||
flrg = self.flrgs[str(i)]
|
||||
|
||||
mp = self.getMidpoints(flrg)
|
||||
mp = self.get_midpoints(flrg)
|
||||
|
||||
ret.append(sum(mp) / len(mp))
|
||||
|
||||
|
@ -28,7 +28,7 @@ class DateTime(Enum):
|
||||
second_of_day = 86400
|
||||
|
||||
|
||||
def strip_datepart(self, date, date_part):
|
||||
def strip_datepart(date, date_part):
|
||||
if date_part == DateTime.year:
|
||||
tmp = date.year
|
||||
elif date_part == DateTime.month:
|
||||
@ -90,7 +90,8 @@ class FuzzySet(FuzzySet.FuzzySet):
|
||||
def __init__(self, datepart, name, mf, parameters, centroid, alpha=1.0, **kwargs):
|
||||
super(FuzzySet, self).__init__(name, mf, parameters, centroid, alpha, type = 'datetime', **kwargs)
|
||||
self.datepart = datepart
|
||||
self.type = 'seasonal'
|
||||
|
||||
def membership(self, x):
|
||||
dp = strip_datepart(x, self.datepart)
|
||||
return self.mf.membership(dp)
|
||||
return self.mf(dp, self.parameters) * self.alpha
|
@ -20,24 +20,22 @@ class MultiSeasonalFTS(sfts.SeasonalFTS):
|
||||
self.indexer = indexer
|
||||
self.flrgs = {}
|
||||
|
||||
def generateFLRG(self, flrs):
|
||||
flrgs = {}
|
||||
|
||||
def generate_flrg(self, flrs):
|
||||
for flr in flrs:
|
||||
|
||||
if str(flr.index) not in self.flrgs:
|
||||
flrgs[str(flr.index)] = sfts.SeasonalFLRG(flr.index)
|
||||
self.flrgs[str(flr.index)] = sfts.SeasonalFLRG(flr.index)
|
||||
|
||||
flrgs[str(flr.index)].append(flr.RHS)
|
||||
self.flrgs[str(flr.index)].append(flr.RHS)
|
||||
|
||||
return (flrgs)
|
||||
|
||||
def train(self, data, sets, order=1, parameters=None):
|
||||
self.sets = sets
|
||||
self.seasonality = parameters
|
||||
def train(self, data, **kwargs):
|
||||
if kwargs.get('sets', None) is not None:
|
||||
self.sets = kwargs.get('sets', None)
|
||||
if kwargs.get('parameters', None) is not None:
|
||||
self.seasonality = kwargs.get('parameters', None)
|
||||
#ndata = self.indexer.set_data(data,self.doTransformations(self.indexer.get_data(data)))
|
||||
flrs = FLR.generate_indexed_flrs(self.sets, self.indexer, data)
|
||||
self.flrgs = self.generateFLRG(flrs)
|
||||
self.generate_flrg(flrs)
|
||||
|
||||
def forecast(self, data, **kwargs):
|
||||
|
||||
|
@ -17,7 +17,7 @@ class TimeGridPartitioner(partitioner.Partitioner):
|
||||
:param npart: The number of universe of discourse partitions, i.e., the number of fuzzy sets that will be created
|
||||
:param func: Fuzzy membership function (pyFTS.common.Membership)
|
||||
"""
|
||||
super(TimeGridPartitioner, self).__init__(name="TimeGrid", **kwargs)
|
||||
super(TimeGridPartitioner, self).__init__(name="TimeGrid", preprocess=False, **kwargs)
|
||||
|
||||
self.season = kwargs.get('seasonality', DateTime.day_of_year)
|
||||
data = kwargs.get('data', None)
|
||||
@ -101,11 +101,12 @@ class TimeGridPartitioner(partitioner.Partitioner):
|
||||
ticks = []
|
||||
x = []
|
||||
for s in self.sets:
|
||||
if s.type == 'common':
|
||||
self.plot_set(ax, s)
|
||||
elif s.type == 'composite':
|
||||
if s.type == 'composite':
|
||||
for ss in s.sets:
|
||||
self.plot_set(ax, ss)
|
||||
# ticks.append(str(round(s.centroid, 0)) + '\n' + s.name)
|
||||
# x.append(s.centroid)
|
||||
# plt.xticks(x, ticks)
|
||||
else:
|
||||
self.plot_set(ax, s)
|
||||
ticks.append(str(round(s.centroid, 0)) + '\n' + s.name)
|
||||
x.append(s.centroid)
|
||||
ax.xaxis.set_ticklabels(ticks)
|
||||
ax.xaxis.set_ticks(x)
|
||||
|
@ -43,29 +43,29 @@ class SeasonalFTS(fts.FTS):
|
||||
self.has_seasonality = True
|
||||
self.has_point_forecasting = True
|
||||
self.is_high_order = False
|
||||
self.flrgs = {}
|
||||
|
||||
def generate_flrg(self, flrs):
|
||||
|
||||
def generateFLRG(self, flrs):
|
||||
flrgs = {}
|
||||
for ct, flr in enumerate(flrs, start=1):
|
||||
|
||||
season = self.indexer.get_season_by_index(ct)[0]
|
||||
|
||||
ss = str(season)
|
||||
|
||||
if ss not in flrgs:
|
||||
flrgs[ss] = SeasonalFLRG(season)
|
||||
if ss not in self.flrgs:
|
||||
self.flrgs[ss] = SeasonalFLRG(season)
|
||||
|
||||
#print(season)
|
||||
flrgs[ss].append(flr.RHS)
|
||||
self.flrgs[ss].append(flr.RHS)
|
||||
|
||||
return (flrgs)
|
||||
|
||||
def train(self, data, sets, order=1, parameters=None):
|
||||
self.sets = sets
|
||||
def train(self, data, **kwargs):
|
||||
if kwargs.get('sets', None) is not None:
|
||||
self.sets = kwargs.get('sets', None)
|
||||
ndata = self.apply_transformations(data)
|
||||
tmpdata = FuzzySet.fuzzyfy_series_old(ndata, sets)
|
||||
tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets)
|
||||
flrs = FLR.generate_recurrent_flrs(tmpdata)
|
||||
self.flrgs = self.generateFLRG(flrs)
|
||||
self.generate_flrg(flrs)
|
||||
|
||||
def forecast(self, data, **kwargs):
|
||||
|
||||
|
@ -14,8 +14,15 @@ class ConventionalFTS(fts.FTS):
|
||||
super(ConventionalFTS, self).__init__(1, "FTS " + name, **kwargs)
|
||||
self.name = "Traditional FTS"
|
||||
self.detail = "Song & Chissom"
|
||||
if self.sets is not None and self.partitioner is not None:
|
||||
self.sets = self.partitioner.sets
|
||||
|
||||
self.R = None
|
||||
|
||||
if self.sets is not None:
|
||||
self.R = np.zeros((len(self.sets),len(self.sets)))
|
||||
|
||||
|
||||
def flr_membership_matrix(self, flr):
|
||||
lm = [flr.LHS.membership(k.centroid) for k in self.sets]
|
||||
rm = [flr.RHS.membership(k.centroid) for k in self.sets]
|
||||
@ -28,14 +35,14 @@ class ConventionalFTS(fts.FTS):
|
||||
return r
|
||||
|
||||
def operation_matrix(self, flrs):
|
||||
r = np.zeros((len(self.sets),len(self.sets)))
|
||||
if self.R is None:
|
||||
self.R = np.zeros((len(self.sets), len(self.sets)))
|
||||
for k in flrs:
|
||||
mm = self.flr_membership_matrix(k)
|
||||
for k in range(0, len(self.sets)):
|
||||
for l in range(0, len(self.sets)):
|
||||
r[k][l] = max(r[k][l], mm[k][l])
|
||||
self.R[k][l] = max(r[k][l], mm[k][l])
|
||||
|
||||
return r
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
if kwargs.get('sets', None) is not None:
|
||||
@ -43,7 +50,7 @@ class ConventionalFTS(fts.FTS):
|
||||
ndata = self.apply_transformations(data)
|
||||
tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets)
|
||||
flrs = FLR.generate_non_recurrent_flrs(tmpdata)
|
||||
self.R = self.operation_matrix(flrs)
|
||||
self.operation_matrix(flrs)
|
||||
|
||||
def forecast(self, data, **kwargs):
|
||||
|
||||
|
@ -47,14 +47,12 @@ class WeightedFTS(fts.FTS):
|
||||
self.detail = "Yu"
|
||||
|
||||
def generate_FLRG(self, flrs):
|
||||
flrgs = {}
|
||||
for flr in flrs:
|
||||
if flr.LHS.name in flrgs:
|
||||
flrgs[flr.LHS.name].append(flr.RHS)
|
||||
if flr.LHS.name in self.flrgs:
|
||||
self.flrgs[flr.LHS.name].append(flr.RHS)
|
||||
else:
|
||||
flrgs[flr.LHS.name] = WeightedFLRG(flr.LHS);
|
||||
flrgs[flr.LHS.name].append(flr.RHS)
|
||||
return (flrgs)
|
||||
self.flrgs[flr.LHS.name] = WeightedFLRG(flr.LHS);
|
||||
self.flrgs[flr.LHS.name].append(flr.RHS)
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
if kwargs.get('sets', None) is not None:
|
||||
@ -62,7 +60,7 @@ class WeightedFTS(fts.FTS):
|
||||
ndata = self.apply_transformations(data)
|
||||
tmpdata = FuzzySet.fuzzyfy_series_old(ndata, self.sets)
|
||||
flrs = FLR.generate_recurrent_flrs(tmpdata)
|
||||
self.flrgs = self.generate_FLRG(flrs)
|
||||
self.generate_FLRG(flrs)
|
||||
|
||||
def forecast(self, data, **kwargs):
|
||||
l = 1
|
||||
|
File diff suppressed because one or more lines are too long
@ -37,7 +37,7 @@ for method in fo_methods:
|
||||
model = method("")
|
||||
model.append_transformation(diff)
|
||||
model.train(passengers, fs.sets)
|
||||
e.appendModel(model)
|
||||
e.append_model(model)
|
||||
|
||||
|
||||
for method in ho_methods:
|
||||
@ -45,7 +45,7 @@ for method in ho_methods:
|
||||
model = method("")
|
||||
model.append_transformation(diff)
|
||||
model.train(passengers, fs.sets, order=order)
|
||||
e.appendModel(model)
|
||||
e.append_model(model)
|
||||
|
||||
|
||||
arima100 = arima.ARIMA("", alpha=0.25)
|
||||
@ -65,10 +65,10 @@ arima201 = arima.ARIMA("", alpha=0.25)
|
||||
arima201.train(passengers, None, order=(2,0,1))
|
||||
|
||||
|
||||
e.appendModel(arima100)
|
||||
e.appendModel(arima101)
|
||||
e.appendModel(arima200)
|
||||
e.appendModel(arima201)
|
||||
e.append_model(arima100)
|
||||
e.append_model(arima101)
|
||||
e.append_model(arima200)
|
||||
e.append_model(arima201)
|
||||
|
||||
e.train(passengers, None)
|
||||
|
||||
|
@ -4,16 +4,33 @@ from pyFTS.partitioners import Util
|
||||
from pyFTS.common import Membership
|
||||
|
||||
|
||||
#fs = partitioner.TimeGridPartitioner(None, 12, common.DateTime.day_of_year, func=Membership.trapmf,
|
||||
#fs = partitioner.TimeGridPartitioner(data=None, npart=12, seasonality=common.DateTime.day_of_year,
|
||||
# func=Membership.trapmf,
|
||||
# names=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
|
||||
|
||||
|
||||
#fs = partitioner.TimeGridPartitioner(None, 24, common.DateTime.minute_of_day, func=Membership.trapmf)
|
||||
|
||||
fs = partitioner.TimeGridPartitioner(None, 7, common.DateTime.hour_of_week, func=Membership.trapmf)
|
||||
#fs = partitioner.TimeGridPartitioner(None, 7, common.DateTime.hour_of_week, func=Membership.trapmf)
|
||||
|
||||
|
||||
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[6, 8])
|
||||
#fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[6, 8])
|
||||
|
||||
fs.plot(ax)
|
||||
plt.show()
|
||||
#fs.plot(ax)
|
||||
#plt.show()
|
||||
|
||||
"""
|
||||
import pandas as pd
|
||||
|
||||
from pyFTS.data import SONDA
|
||||
df = SONDA.get_dataframe()
|
||||
|
||||
df = df.drop(df[df.rain.values > 100].index)
|
||||
df = df.drop(df[df.press.values < 800].index)
|
||||
df = df.drop(df[df.humid.values < 15].index)
|
||||
|
||||
df.to_csv("SONDA_BSB_MOD.csv", sep=";", index=False)
|
||||
"""
|
||||
|
||||
import os
|
||||
print(os.getcwd())
|
Loading…
Reference in New Issue
Block a user