From 55d3deadfc3e20d6dcdee04534a92d1d531d1c93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=C3=B4nio=20C=C3=A2ndido=20de=20Lima=20e=20Silva?= Date: Wed, 8 Feb 2017 13:23:41 -0200 Subject: [PATCH] - DateTimeSeasonalIndexer - persist_obj, load_obj, persist_env, load_env --- common/FLR.py | 6 +-- common/Util.py | 19 +++++++- models/msfts.py | 43 ++++++++++------- models/seasonal/SeasonalIndexer.py | 77 ++++++++++++++++++++++++++++-- tests/sfts.py | 67 ++++++++++++++++++++++---- 5 files changed, 178 insertions(+), 34 deletions(-) diff --git a/common/FLR.py b/common/FLR.py index eebcf50..bb546c8 100644 --- a/common/FLR.py +++ b/common/FLR.py @@ -40,9 +40,9 @@ def generateIndexedFLRs(sets, indexer, data): flrs = [] index = indexer.get_season_of_data(data) ndata = indexer.get_data(data) - for k in np.arange(0,len(data)-1): - lhs = FuzzySet.getMaxMembershipFuzzySet(ndata[k],sets) - rhs = FuzzySet.getMaxMembershipFuzzySet(ndata[k+1], sets) + for k in np.arange(1,len(data)): + lhs = FuzzySet.getMaxMembershipFuzzySet(ndata[k-1],sets) + rhs = FuzzySet.getMaxMembershipFuzzySet(ndata[k], sets) season = index[k] flr = IndexedFLR(season,lhs,rhs) flrs.append(flr) diff --git a/common/Util.py b/common/Util.py index 8291b17..62ac6bc 100644 --- a/common/Util.py +++ b/common/Util.py @@ -1,5 +1,6 @@ import time import matplotlib.pyplot as plt +import dill current_milli_time = lambda: int(round(time.time() * 1000)) @@ -26,4 +27,20 @@ def showAndSaveImage(fig,file,flag,lgd=None): def enumerate2(xs, start=0, step=1): for x in xs: yield (start, x) - start += step \ No newline at end of file + start += step + + +def persist_obj(obj, file): + with open(file, 'wb') as _file: + dill.dump(obj, _file) + +def load_obj(file): + with open(file, 'rb') as _file: + obj = dill.load(_file) + return obj + +def persist_env(file): + dill.dump_session(file) + +def load_env(file): + dill.load_session(file) \ No newline at end of file diff --git a/models/msfts.py b/models/msfts.py index d3681d2..1d3c6a1 100644 --- a/models/msfts.py +++ b/models/msfts.py @@ -2,6 +2,7 @@ import numpy as np from pyFTS.common import FuzzySet,FLR from pyFTS import fts, sfts + class MultiSeasonalFTS(sfts.SeasonalFTS): def __init__(self, name, indexer): super(MultiSeasonalFTS, self).__init__("MSFTS") @@ -18,44 +19,50 @@ class MultiSeasonalFTS(sfts.SeasonalFTS): def generateFLRG(self, flrs): flrgs = {} - for index, season in enumerate(self.indexer.get_season_of_data(flrs),start=0): + for flr in flrs: - print(index) - print(season) + if str(flr.index) not in self.flrgs: + flrgs[str(flr.index)] = sfts.SeasonalFLRG(flr.index) - if str(season) not in self.flrgs: - flrgs[str(season)] = sfts.SeasonalFLRG(season) - - flrgs[str(season)].append(flrs[index].RHS) + flrgs[str(flr.index)].append(flr.RHS) return (flrgs) def train(self, data, sets, order=1, parameters=None): self.sets = sets self.seasonality = parameters - ndata = self.indexer.set_data(data,self.doTransformations(self.indexer.get_data(data))) - tmpdata = FuzzySet.fuzzySeries(ndata, sets) - flrs = FLR.generateRecurrentFLRs(tmpdata) + #ndata = self.indexer.set_data(data,self.doTransformations(self.indexer.get_data(data))) + flrs = FLR.generateIndexedFLRs(self.sets, self.indexer, data) self.flrgs = self.generateFLRG(flrs) def forecast(self, data): - ndata = np.array(self.doTransformations(self.indexer.get_data(data))) - - l = len(ndata) - ret = [] - for k in np.arange(1, l): + index = self.indexer.get_season_of_data(data) + ndata = self.indexer.get_data(data) - season = self.indexer.get_season_index(k) + for k in np.arange(1, len(data)): - flrg = self.flrgs[str(season)] + flrg = self.flrgs[str(index[k])] mp = self.getMidpoints(flrg) ret.append(sum(mp) / len(mp)) - ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]]) + ret = self.doInverseTransformations(ret, params=[ndata[self.order - 1:]]) + + return ret + + def forecastAhead(self, data, steps): + ret = [] + for i in steps: + flrg = self.flrgs[str(i)] + + mp = self.getMidpoints(flrg) + + ret.append(sum(mp) / len(mp)) + + ret = self.doInverseTransformations(ret, params=data) return ret diff --git a/models/seasonal/SeasonalIndexer.py b/models/seasonal/SeasonalIndexer.py index 729652c..2a0d679 100644 --- a/models/seasonal/SeasonalIndexer.py +++ b/models/seasonal/SeasonalIndexer.py @@ -1,4 +1,5 @@ import numpy as np +from enum import Enum class SeasonalIndexer(object): def __init__(self,num_seasons): @@ -68,6 +69,7 @@ class DataFrameSeasonalIndexer(SeasonalIndexer): self.data_fields = data_fields def get_season_of_data(self,data): + #data = data.copy() ret = [] for ix in data.index: season = [] @@ -75,7 +77,8 @@ class DataFrameSeasonalIndexer(SeasonalIndexer): if self.seasons[c] is None: season.append(data[f][ix]) else: - season.append(data[f][ix] // self.seasons[c]) + a = data[f][ix] + season.append(a // self.seasons[c]) ret.append(season) return ret @@ -98,5 +101,73 @@ class DataFrameSeasonalIndexer(SeasonalIndexer): return data[self.data_fields].tolist() def set_data(self, data, value): - data[self.data_fields] = value - return data \ No newline at end of file + data.loc[:,self.data_fields] = value + return data + +class DateTime(Enum): + year = 1 + month = 2 + day_of_month = 3 + day_of_year = 4 + day_of_week = 5 + hour = 6 + minute = 7 + second = 8 + + +class DateTimeSeasonalIndexer(SeasonalIndexer): + def __init__(self,date_field, index_fields, index_seasons, data_fields): + super(DateTimeSeasonalIndexer, self).__init__(len(index_seasons)) + self.fields = index_fields + self.seasons = index_seasons + self.data_fields = data_fields + self.date_field = date_field + + def strip_datepart(self, date, date_part, resolution): + if date_part == DateTime.year: + tmp = date.year + elif date_part == DateTime.month: + tmp = date.month + elif date_part == DateTime.day_of_year: + tmp = date.timetuple().tm_yday + elif date_part == DateTime.day_of_month: + tmp = date.day + elif date_part == DateTime.day_of_week: + tmp = date.weekday() + elif date_part == DateTime.hour: + tmp = date.hour + elif date_part == DateTime.minute: + tmp = date.minute + elif date_part == DateTime.second: + tmp = date.second + + if resolution is None: + return tmp + else: + return tmp // resolution + + def get_season_of_data(self, data): + # data = data.copy() + ret = [] + for ix in data.index: + date = data[self.date_field][ix] + season = [] + for c, f in enumerate(self.fields, start=0): + season.append( self.strip_datepart(date, f, self.seasons[c]) ) + ret.append(season) + return ret + + def get_season_by_index(self, index): + raise Exception("Operation not available!") + + def get_data_by_season(self, data, indexes): + raise Exception("Operation not available!") + + def get_index_by_season(self, indexes): + raise Exception("Operation not available!") + + def get_data(self, data): + return data[self.data_fields].tolist() + + def set_data(self, data, value): + raise Exception("Operation not available!") \ No newline at end of file diff --git a/tests/sfts.py b/tests/sfts.py index 1e62ea8..44716dd 100644 --- a/tests/sfts.py +++ b/tests/sfts.py @@ -8,9 +8,11 @@ import matplotlib as plt import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D +import datetime + import pandas as pd -from pyFTS.partitioners import Grid -from pyFTS.common import FLR,FuzzySet,Membership,Transformations +from pyFTS.partitioners import Grid, CMeans, FCM, Entropy +from pyFTS.common import FLR,FuzzySet,Membership,Transformations,Util from pyFTS import fts,sfts from pyFTS.models import msfts from pyFTS.benchmarks import benchmarks as bchmk @@ -18,12 +20,36 @@ from pyFTS.benchmarks import Measures os.chdir("/home/petronio/dados/Dropbox/Doutorado/Disciplinas/AdvancedFuzzyTimeSeriesModels/") -sonda = pd.read_csv("DataSets/SONDA_BSB_CLEAN.csv", sep=";") +sonda = pd.read_csv("DataSets/SONDA_BSB_MOD.csv", sep=";") + +sonda['data'] = pd.to_datetime(sonda['data']) sonda = sonda[:][527041:] sonda.index = np.arange(0,len(sonda.index)) +#data = [] + +#for i in sonda.index: + + #inst = [] + + #year = int( sonda["year"][i] ) + #day_of_year = int( sonda["day"][i] ) + #minute = int (sonda["min"][i] ) + + #glo_avg = sonda["glo_avg"][i] + + #inst.append( datetime.datetime(year, 1, 1) + datetime.timedelta(day_of_year - 1, minutes=minute) ) + + #inst.append( glo_avg ) + + #data.append(inst) + +#nov = pd.DataFrame(data,columns=["data","glo_avg"]) + +#nov.to_csv("DataSets/SONDA_BSB_MOD.csv", sep=";") + sonda_treino = sonda[:1051200] sonda_teste = sonda[1051201:] @@ -37,19 +63,42 @@ from pyFTS.models.seasonal import SeasonalIndexer from pyFTS.models import msfts from pyFTS.common import FLR -ix = SeasonalIndexer.DataFrameSeasonalIndexer(['day','min'],[30, 60],'glo_avg') +ix = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.month, + SeasonalIndexer.DateTime.hour, SeasonalIndexer.DateTime.minute], + [None, None,15],'glo_avg') -fs = Grid.GridPartitionerTrimf(ix.get_data(sonda_treino),20) +tmp = ix.get_data(sonda_treino) +for max_part in [10, 20, 30, 40, 50]: + + fs1 = Grid.GridPartitionerTrimf(tmp,max_part) + + Util.persist_obj(fs1,"models/sonda_fs_grid_" + str(max_part) + ".pkl") + + fs2 = FCM.FCMPartitionerTrimf(tmp, max_part) + + Util.persist_obj(fs2, "models/sonda_fs_fcm_" + str(max_part) + ".pkl") + + fs3 = Entropy.EntropyPartitionerTrimf(tmp, max_part) + + Util.persist_obj(fs3, "models/sonda_fs_entropy_" + str(max_part) + ".pkl") + + +#fs = Util.load_obj("models/sonda_fs_grid_50.pkl") + +#for f in fs: +# print(f) #mfts = msfts.MultiSeasonalFTS("",ix) -#mfts.train(sonda_teste,fs) +#mfts.train(sonda_treino,fs) #print(str(mfts)) +#plt.plot(mfts.forecast(sonda_teste)) + #[10, 508] -flrs = FLR.generateIndexedFLRs(fs, ix, sonda_treino[110000:111450]) +#flrs = FLR.generateIndexedFLRs(fs, ix, sonda_treino[110000:111450]) -for i in flrs: #ix.get_data(sonda_treino[111430:111450]): - print(i) \ No newline at end of file +#for i in mfts.forecast(sonda_teste): +# print(i) \ No newline at end of file