data.Malaysia added; Several improvements on pyFTS.data modules

This commit is contained in:
Petrônio Cândido 2018-11-07 10:37:00 -02:00
parent 9bbb5d4c4d
commit 34f609d770
8 changed files with 70 additions and 47 deletions

View File

@ -28,7 +28,8 @@ def get_dataframe():
:return: Pandas DataFrame
"""
df = pd.read_csv('https://query.data.world/s/72gews5w3c7oaf7by5vp7evsasluia')
df = common.get_dataframe("BTCUSD.csv", "https://query.data.world/s/72gews5w3c7oaf7by5vp7evsasluia",
sep=",")
return df

View File

@ -29,7 +29,8 @@ def get_dataframe():
:return: Pandas DataFrame
"""
df = pd.read_csv('https://query.data.world/s/d4hfir3xrelkx33o3bfs5dbhyiztml')
df = common.get_dataframe("DowJones.csv", "https://query.data.world/s/d4hfir3xrelkx33o3bfs5dbhyiztml",
sep=",")
return df

View File

@ -27,7 +27,8 @@ def get_dataframe():
:return: Pandas DataFrame
"""
df = pd.read_csv('https://query.data.world/s/gvsaeruthnxjkwzl7z4ki7u5rduah3')
df = common.get_dataframe("EURGBP.csv", "https://query.data.world/s/gvsaeruthnxjkwzl7z4ki7u5rduah3",
sep=",")
return df

View File

@ -27,7 +27,8 @@ def get_dataframe():
:return: Pandas DataFrame
"""
df = pd.read_csv('https://query.data.world/s/od4eojioz4w6o5bbwxjfn6j5zoqtos')
df = common.get_dataframe("EURUSD.csv", "https://query.data.world/s/od4eojioz4w6o5bbwxjfn6j5zoqtos",
sep=",")
return df

View File

@ -29,7 +29,8 @@ def get_dataframe():
:return: Pandas DataFrame
"""
df = pd.read_csv('https://query.data.world/s/qj4ly7o4rl7oq527xzy4v76wkr3hws')
df = common.get_dataframe("ETHUSD.csv", "https://query.data.world/s/qj4ly7o4rl7oq527xzy4v76wkr3hws",
sep=",")
return df

View File

@ -27,7 +27,8 @@ def get_dataframe():
:return: Pandas DataFrame
"""
df = pd.read_csv('https://query.data.world/s/sw4mijpowb3mqv6bsat7cdj54hyxix')
df = common.get_dataframe("GBPUSD.csv", "https://query.data.world/s/sw4mijpowb3mqv6bsat7cdj54hyxix",
sep=",")
return df

34
pyFTS/data/Malaysia.py Normal file
View File

@ -0,0 +1,34 @@
"""
Hourly Malaysia eletric load and tempeature
"""
from pyFTS.data import common
import pandas as pd
import numpy as np
def get_data(field='load'):
"""
Get the univariate time series data.
:param field: dataset field to load
:return: numpy array
"""
dat = get_dataframe()
return np.array(dat[field])
def get_dataframe():
"""
Get the complete multivariate time series data.
:return: Pandas DataFrame
"""
df = common.get_dataframe("malaysia.csv","https://query.data.world/s/e5arbthdytod3m7wfcg7gmtluh3wa5",
sep=";")
return df
return df

View File

@ -4,11 +4,13 @@ from pyFTS.data import TAIEX as tx
from pyFTS.common import Transformations
from pyFTS.data import SONDA
df = SONDA.get_dataframe()
train = df.iloc[0:578241] #three years
test = df.iloc[1572480:2096640] #one year
del df
from pyFTS.data import Malaysia
dataset = Malaysia.get_dataframe()
print(dataset.head())
dataset["date"] = pd.to_datetime(dataset["time"], format='%m/%d/%y %I:%M %p')
from pyFTS.partitioners import Grid, Util as pUtil
from pyFTS.common import Transformations, Util
@ -19,49 +21,30 @@ from pyFTS.models.seasonal.common import DateTime
bc = Transformations.BoxCox(0)
tdiff = Transformations.Differential(1)
np = 10
from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
mv_train = dataset.iloc[:100000]
model = mvfts.MVFTS("")
sp = {'seasonality': DateTime.month , 'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul', 'Aug','Sep','Oct','Nov','Dec']}
fig, axes = plt.subplots(nrows=5, ncols=1,figsize=[15,10])
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=12,
data=mv_train, partitioner_specific=sp)
sp = {'seasonality': DateTime.day_of_week, 'names': ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']}
sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul', 'Aug','Sep','Oct','Nov','Dec']}
vday = variable.Variable("Weekday", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=7,
data=mv_train, partitioner_specific=sp)
vmonth = variable.Variable("Month", data_label="datahora", partitioner=seasonal.TimeGridPartitioner, npart=12,
data=train, partitioner_specific=sp)
vmonth.partitioner.plot(axes[0])
sp = {'seasonality': DateTime.hour_of_day}
sp = {'seasonality': DateTime.minute_of_day}
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=mv_train, partitioner_specific=sp)
vhour = variable.Variable("Hour", data_label="datahora", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train, partitioner_specific=sp)
vload = variable.Variable("load", data_label="load", partitioner=Grid.GridPartitioner, npart=10,
data=mv_train)
vhour.partitioner.plot(axes[1])
vtemperature = variable.Variable("temperature", data_label="temperature", partitioner=Grid.GridPartitioner, npart=10,
data=mv_train)
vavg = variable.Variable("Radiance", data_label="glo_avg", partitioner=Grid.GridPartitioner, npart=30,
data=train)
model1 = mvfts.MVFTS("")
model1.append_variable(vmonth)
model1.append_variable(vhour)
model1.append_variable(vavg)
model1.target_variable = vavg
#model1.fit(train, num_batches=60, save=True, batch_save=True, file_path='mvfts_sonda')
#model.fit(train, num_batches=60, save=True, batch_save=True, file_path='mvfts_sonda')
#model1.fit(train, num_batches=200, save=True, batch_save=True, file_path='mvfts_sonda', distributed=False,
# nodes=['192.168.0.110'], batch_save_interval=10)
model = Util.load_obj('mvfts_sonda')
forecasts = model.predict(test)