- DateTimeSeasonalIndexer

-  persist_obj, load_obj, persist_env, load_env
This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-02-08 13:23:41 -02:00
parent bb42a6be07
commit 55d3deadfc
5 changed files with 178 additions and 34 deletions

View File

@ -40,9 +40,9 @@ def generateIndexedFLRs(sets, indexer, data):
flrs = []
index = indexer.get_season_of_data(data)
ndata = indexer.get_data(data)
for k in np.arange(0,len(data)-1):
lhs = FuzzySet.getMaxMembershipFuzzySet(ndata[k],sets)
rhs = FuzzySet.getMaxMembershipFuzzySet(ndata[k+1], sets)
for k in np.arange(1,len(data)):
lhs = FuzzySet.getMaxMembershipFuzzySet(ndata[k-1],sets)
rhs = FuzzySet.getMaxMembershipFuzzySet(ndata[k], sets)
season = index[k]
flr = IndexedFLR(season,lhs,rhs)
flrs.append(flr)

View File

@ -1,5 +1,6 @@
import time
import matplotlib.pyplot as plt
import dill
current_milli_time = lambda: int(round(time.time() * 1000))
@ -27,3 +28,19 @@ def enumerate2(xs, start=0, step=1):
for x in xs:
yield (start, x)
start += step
def persist_obj(obj, file):
with open(file, 'wb') as _file:
dill.dump(obj, _file)
def load_obj(file):
with open(file, 'rb') as _file:
obj = dill.load(_file)
return obj
def persist_env(file):
dill.dump_session(file)
def load_env(file):
dill.load_session(file)

View File

@ -2,6 +2,7 @@ import numpy as np
from pyFTS.common import FuzzySet,FLR
from pyFTS import fts, sfts
class MultiSeasonalFTS(sfts.SeasonalFTS):
def __init__(self, name, indexer):
super(MultiSeasonalFTS, self).__init__("MSFTS")
@ -18,44 +19,50 @@ class MultiSeasonalFTS(sfts.SeasonalFTS):
def generateFLRG(self, flrs):
flrgs = {}
for index, season in enumerate(self.indexer.get_season_of_data(flrs),start=0):
for flr in flrs:
print(index)
print(season)
if str(flr.index) not in self.flrgs:
flrgs[str(flr.index)] = sfts.SeasonalFLRG(flr.index)
if str(season) not in self.flrgs:
flrgs[str(season)] = sfts.SeasonalFLRG(season)
flrgs[str(season)].append(flrs[index].RHS)
flrgs[str(flr.index)].append(flr.RHS)
return (flrgs)
def train(self, data, sets, order=1, parameters=None):
self.sets = sets
self.seasonality = parameters
ndata = self.indexer.set_data(data,self.doTransformations(self.indexer.get_data(data)))
tmpdata = FuzzySet.fuzzySeries(ndata, sets)
flrs = FLR.generateRecurrentFLRs(tmpdata)
#ndata = self.indexer.set_data(data,self.doTransformations(self.indexer.get_data(data)))
flrs = FLR.generateIndexedFLRs(self.sets, self.indexer, data)
self.flrgs = self.generateFLRG(flrs)
def forecast(self, data):
ndata = np.array(self.doTransformations(self.indexer.get_data(data)))
l = len(ndata)
ret = []
for k in np.arange(1, l):
index = self.indexer.get_season_of_data(data)
ndata = self.indexer.get_data(data)
season = self.indexer.get_season_index(k)
for k in np.arange(1, len(data)):
flrg = self.flrgs[str(season)]
flrg = self.flrgs[str(index[k])]
mp = self.getMidpoints(flrg)
ret.append(sum(mp) / len(mp))
ret = self.doInverseTransformations(ret, params=[data[self.order - 1:]])
ret = self.doInverseTransformations(ret, params=[ndata[self.order - 1:]])
return ret
def forecastAhead(self, data, steps):
ret = []
for i in steps:
flrg = self.flrgs[str(i)]
mp = self.getMidpoints(flrg)
ret.append(sum(mp) / len(mp))
ret = self.doInverseTransformations(ret, params=data)
return ret

View File

@ -1,4 +1,5 @@
import numpy as np
from enum import Enum
class SeasonalIndexer(object):
def __init__(self,num_seasons):
@ -68,6 +69,7 @@ class DataFrameSeasonalIndexer(SeasonalIndexer):
self.data_fields = data_fields
def get_season_of_data(self,data):
#data = data.copy()
ret = []
for ix in data.index:
season = []
@ -75,7 +77,8 @@ class DataFrameSeasonalIndexer(SeasonalIndexer):
if self.seasons[c] is None:
season.append(data[f][ix])
else:
season.append(data[f][ix] // self.seasons[c])
a = data[f][ix]
season.append(a // self.seasons[c])
ret.append(season)
return ret
@ -98,5 +101,73 @@ class DataFrameSeasonalIndexer(SeasonalIndexer):
return data[self.data_fields].tolist()
def set_data(self, data, value):
data[self.data_fields] = value
data.loc[:,self.data_fields] = value
return data
class DateTime(Enum):
year = 1
month = 2
day_of_month = 3
day_of_year = 4
day_of_week = 5
hour = 6
minute = 7
second = 8
class DateTimeSeasonalIndexer(SeasonalIndexer):
def __init__(self,date_field, index_fields, index_seasons, data_fields):
super(DateTimeSeasonalIndexer, self).__init__(len(index_seasons))
self.fields = index_fields
self.seasons = index_seasons
self.data_fields = data_fields
self.date_field = date_field
def strip_datepart(self, date, date_part, resolution):
if date_part == DateTime.year:
tmp = date.year
elif date_part == DateTime.month:
tmp = date.month
elif date_part == DateTime.day_of_year:
tmp = date.timetuple().tm_yday
elif date_part == DateTime.day_of_month:
tmp = date.day
elif date_part == DateTime.day_of_week:
tmp = date.weekday()
elif date_part == DateTime.hour:
tmp = date.hour
elif date_part == DateTime.minute:
tmp = date.minute
elif date_part == DateTime.second:
tmp = date.second
if resolution is None:
return tmp
else:
return tmp // resolution
def get_season_of_data(self, data):
# data = data.copy()
ret = []
for ix in data.index:
date = data[self.date_field][ix]
season = []
for c, f in enumerate(self.fields, start=0):
season.append( self.strip_datepart(date, f, self.seasons[c]) )
ret.append(season)
return ret
def get_season_by_index(self, index):
raise Exception("Operation not available!")
def get_data_by_season(self, data, indexes):
raise Exception("Operation not available!")
def get_index_by_season(self, indexes):
raise Exception("Operation not available!")
def get_data(self, data):
return data[self.data_fields].tolist()
def set_data(self, data, value):
raise Exception("Operation not available!")

View File

@ -8,9 +8,11 @@ import matplotlib as plt
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import datetime
import pandas as pd
from pyFTS.partitioners import Grid
from pyFTS.common import FLR,FuzzySet,Membership,Transformations
from pyFTS.partitioners import Grid, CMeans, FCM, Entropy
from pyFTS.common import FLR,FuzzySet,Membership,Transformations,Util
from pyFTS import fts,sfts
from pyFTS.models import msfts
from pyFTS.benchmarks import benchmarks as bchmk
@ -18,12 +20,36 @@ from pyFTS.benchmarks import Measures
os.chdir("/home/petronio/dados/Dropbox/Doutorado/Disciplinas/AdvancedFuzzyTimeSeriesModels/")
sonda = pd.read_csv("DataSets/SONDA_BSB_CLEAN.csv", sep=";")
sonda = pd.read_csv("DataSets/SONDA_BSB_MOD.csv", sep=";")
sonda['data'] = pd.to_datetime(sonda['data'])
sonda = sonda[:][527041:]
sonda.index = np.arange(0,len(sonda.index))
#data = []
#for i in sonda.index:
#inst = []
#year = int( sonda["year"][i] )
#day_of_year = int( sonda["day"][i] )
#minute = int (sonda["min"][i] )
#glo_avg = sonda["glo_avg"][i]
#inst.append( datetime.datetime(year, 1, 1) + datetime.timedelta(day_of_year - 1, minutes=minute) )
#inst.append( glo_avg )
#data.append(inst)
#nov = pd.DataFrame(data,columns=["data","glo_avg"])
#nov.to_csv("DataSets/SONDA_BSB_MOD.csv", sep=";")
sonda_treino = sonda[:1051200]
sonda_teste = sonda[1051201:]
@ -37,19 +63,42 @@ from pyFTS.models.seasonal import SeasonalIndexer
from pyFTS.models import msfts
from pyFTS.common import FLR
ix = SeasonalIndexer.DataFrameSeasonalIndexer(['day','min'],[30, 60],'glo_avg')
ix = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.month,
SeasonalIndexer.DateTime.hour, SeasonalIndexer.DateTime.minute],
[None, None,15],'glo_avg')
fs = Grid.GridPartitionerTrimf(ix.get_data(sonda_treino),20)
tmp = ix.get_data(sonda_treino)
for max_part in [10, 20, 30, 40, 50]:
fs1 = Grid.GridPartitionerTrimf(tmp,max_part)
Util.persist_obj(fs1,"models/sonda_fs_grid_" + str(max_part) + ".pkl")
fs2 = FCM.FCMPartitionerTrimf(tmp, max_part)
Util.persist_obj(fs2, "models/sonda_fs_fcm_" + str(max_part) + ".pkl")
fs3 = Entropy.EntropyPartitionerTrimf(tmp, max_part)
Util.persist_obj(fs3, "models/sonda_fs_entropy_" + str(max_part) + ".pkl")
#fs = Util.load_obj("models/sonda_fs_grid_50.pkl")
#for f in fs:
# print(f)
#mfts = msfts.MultiSeasonalFTS("",ix)
#mfts.train(sonda_teste,fs)
#mfts.train(sonda_treino,fs)
#print(str(mfts))
#plt.plot(mfts.forecast(sonda_teste))
#[10, 508]
flrs = FLR.generateIndexedFLRs(fs, ix, sonda_treino[110000:111450])
#flrs = FLR.generateIndexedFLRs(fs, ix, sonda_treino[110000:111450])
for i in flrs: #ix.get_data(sonda_treino[111430:111450]):
print(i)
#for i in mfts.forecast(sonda_teste):
# print(i)