From 8efd7c38d8970ccde2cc7b85e5e56ee43e7fb40d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido=20de=20Lima=20e=20Silva?= Date: Sun, 21 May 2017 18:04:10 -0300 Subject: [PATCH] - SeasonalIndexer on sfts --- benchmarks/distributed_benchmarks.py | 5 ++- fts.py | 1 + models/seasonal/SeasonalIndexer.py | 29 +++++++------- sfts.py | 29 ++++++-------- tests/general.py | 58 +++++++++++++++++----------- 5 files changed, 68 insertions(+), 54 deletions(-) diff --git a/benchmarks/distributed_benchmarks.py b/benchmarks/distributed_benchmarks.py index bbf9e82..1f923b7 100644 --- a/benchmarks/distributed_benchmarks.py +++ b/benchmarks/distributed_benchmarks.py @@ -438,6 +438,9 @@ def run_ahead(mfts, partitioner, train_data, test_data, steps, resolution, windo if transformation is not None: mfts.appendTransformation(transformation) + if mfts.has_seasonality: + mfts.indexer = indexer + try: _start = time.time() mfts.train(train_data, partitioner.sets, order=mfts.order) @@ -553,7 +556,7 @@ def ahead_sliding_window(data, windowsize, steps, resolution, train=0.8, inc=0.1 continue else: benchmarks_only[m.shortname] = m - job = cluster.submit(m, data_train_fs, train, test, steps, resolution, ct, transformation) + job = cluster.submit(m, data_train_fs, train, test, steps, resolution, ct, transformation, indexer) job.id = id # associate an ID to identify jobs (if needed later) jobs.append(job) diff --git a/fts.py b/fts.py index 9108afa..018794a 100644 --- a/fts.py +++ b/fts.py @@ -36,6 +36,7 @@ class FTS(object): self.partitioner = None self.auto_update = False self.benchmark_only = False + self.indexer = None def fuzzy(self, data): """ diff --git a/models/seasonal/SeasonalIndexer.py b/models/seasonal/SeasonalIndexer.py index d9d9b11..c99813a 100644 --- a/models/seasonal/SeasonalIndexer.py +++ b/models/seasonal/SeasonalIndexer.py @@ -30,23 +30,26 @@ class LinearSeasonalIndexer(SeasonalIndexer): self.seasons = seasons def get_season_of_data(self,data): - return self.get_season_by_index(np.arange(0,len(data))) + return self.get_season_by_index(np.arange(0, len(data)).tolist()) def get_season_by_index(self,index): ret = [] - for ix in index: - if self.num_seasons == 1: - season = ix % self.seasons - else: - season = [] - for seasonality in self.seasons: - print("S ", seasonality) - tmp = ix // seasonality - print("T ", tmp) - season.append(tmp) - #season.append(rest) + if not isinstance(index, (list, np.ndarray)): + season = (index % self.seasons[0]) + 1 + else: + for ix in index: + if self.num_seasons == 1: + season = (ix % self.seasons[0]) + else: + season = [] + for seasonality in self.seasons: + #print("S ", seasonality) + tmp = ix // seasonality + #print("T ", tmp) + season.append(tmp) + #season.append(rest) - ret.append(season) + ret.append(season) return ret diff --git a/sfts.py b/sfts.py index a475e5a..930f378 100644 --- a/sfts.py +++ b/sfts.py @@ -46,28 +46,24 @@ class SeasonalFTS(fts.FTS): self.is_high_order = False def generateFLRG(self, flrs): - flrgs = [] - season = 1 - for flr in flrs: + flrgs = {} + for ct, flr in enumerate(flrs, start=1): - if len(flrgs) < self.seasonality: - flrgs.append(SeasonalFLRG(season)) + season = self.indexer.get_season_by_index(ct)[0] + + if season not in flrgs: + flrgs[season] = SeasonalFLRG(season) #print(season) - flrgs[season-1].append(flr.RHS) - - season = (season + 1) % (self.seasonality + 1) - - if season == 0: season = 1 + flrgs[season].append(flr.RHS) return (flrgs) - def train(self, data, sets, order=1,parameters=12): + def train(self, data, sets, order=1, parameters=None): self.sets = sets - self.seasonality = parameters ndata = self.doTransformations(data) tmpdata = FuzzySet.fuzzySeries(ndata, sets) - flrs = FLR.generateRecurrentFLRs(tmpdata) + flrs = FLR.generateNonRecurrentFLRs(tmpdata) self.flrgs = self.generateFLRG(flrs) def forecast(self, data, **kwargs): @@ -79,13 +75,10 @@ class SeasonalFTS(fts.FTS): ret = [] for k in np.arange(1, l): - #flrg = self.flrgs[ndata[k]] - season = (k + 1) % (self.seasonality + 1) + season = self.indexer.get_season_by_index(k)[0] - #print(season) - - flrg = self.flrgs[season-1] + flrg = self.flrgs[season] mp = self.getMidpoints(flrg) diff --git a/tests/general.py b/tests/general.py index 8e40825..b31619a 100644 --- a/tests/general.py +++ b/tests/general.py @@ -19,15 +19,15 @@ from numpy import random os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/") -enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";") -enrollments = np.array(enrollments["Enrollments"]) - diff = Transformations.Differential(1) """ DATASETS """ +#enrollments = pd.read_csv("DataSets/Enrollments.csv", sep=";") +#enrollments = np.array(enrollments["Enrollments"]) + #passengers = pd.read_csv("DataSets/AirPassengers.csv", sep=",") #passengers = np.array(passengers["Passengers"]) @@ -37,8 +37,8 @@ DATASETS #gauss = random.normal(0,1.0,5000) #gauss_teste = random.normal(0,1.0,400) -taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",") -taiex = np.array(taiexpd["avg"][:5000]) +#taiexpd = pd.read_csv("DataSets/TAIEX.csv", sep=",") +#taiex = np.array(taiexpd["avg"][:5000]) #nasdaqpd = pd.read_csv("DataSets/NASDAQ_IXIC.csv", sep=",") #nasdaq = np.array(nasdaqpd["avg"][0:5000]) @@ -52,9 +52,9 @@ taiex = np.array(taiexpd["avg"][:5000]) #sonda = np.array(sondapd["glo_avg"]) #del(sondapd) -#bestpd = pd.read_csv("DataSets/BEST_TAVG.csv", sep=";") -#best = np.array(bestpd["Anomaly"]) -#del(bestpd) +bestpd = pd.read_csv("DataSets/BEST_TAVG.csv", sep=";") +best = np.array(bestpd["Anomaly"]) +del(bestpd) #print(lag) #print(a) @@ -135,36 +135,50 @@ bchmk.interval_sliding_window(sp500, 2000, train=0.8, inc=0.2, #models=[yu.Weigh dump=True, save=True, file="experiments/sp500_analytic_diff.csv", nodes=['192.168.0.103', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) -#""" +""" -bchmk.ahead_sliding_window(taiex, 2000, steps=10, resolution=100, train=0.8, inc=0.1, +""" + +bchmk.ahead_sliding_window(best, 4000, steps=10, resolution=100, train=0.8, inc=0.5, partitioners=[Grid.GridPartitioner], partitions= np.arange(10,200,step=10), - dump=True, save=True, file="experiments/taiex_ahead_analytic.csv", - nodes=['192.168.0.105', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) + dump=True, save=True, file="experiments/best_ahead_analytic.csv", + nodes=['192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) -bchmk.ahead_sliding_window(taiex, 2000, steps=10, resolution=100, train=0.8, inc=0.1, + +bchmk.ahead_sliding_window(best, 4000, steps=10, resolution=100, train=0.8, inc=0.5, partitioners=[Grid.GridPartitioner], partitions= np.arange(3,20,step=2), transformation=diff, - dump=True, save=True, file="experiments/taiex_ahead_analytic_diff.csv", - nodes=['192.168.0.105', '192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) + dump=True, save=True, file="experiments/best_ahead_analytic_diff.csv", + nodes=['192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) """ from pyFTS.partitioners import Grid -from pyFTS import pwfts +from pyFTS.models.seasonal import SeasonalIndexer +from pyFTS import sfts + +ix = SeasonalIndexer.LinearSeasonalIndexer([10]) + +#print(ix.get_season_of_data(best[:2000])) + +#print(ix.get_season_by_index(45)) diff = Transformations.Differential(1) -fs = Grid.GridPartitioner(taiex[:2000], 10, transformation=diff) - -tmp = pwfts.ProbabilisticWeightedFTS("") +fs = Grid.GridPartitioner(best[:2000], 10, transformation=diff) +tmp = sfts.SeasonalFTS("") +tmp.indexer = ix tmp.appendTransformation(diff) -tmp.train(taiex[:1600], fs.sets, order=1) +#tmp = pwfts.ProbabilisticWeightedFTS("") -x = tmp.forecastInterval(taiex[1600:1610]) +#tmp.appendTransformation(diff) -print(taiex[1600:1610]) +tmp.train(best[:1600], fs.sets, order=1) + +x = tmp.forecast(best[1600:1610]) + +#print(taiex[1600:1610]) print(x) #""" \ No newline at end of file