MVFTS bugfixes

This commit is contained in:
Petrônio Cândido 2018-11-19 09:35:23 -02:00
parent 8c1fec482d
commit 32d15cd25c
2 changed files with 87 additions and 160 deletions

View File

@ -39,6 +39,7 @@ class TimeGridPartitioner(partitioner.Partitioner):
else: else:
self.ordered_sets = FS.set_ordered(self.sets) self.ordered_sets = FS.set_ordered(self.sets)
if self.type == 'seasonal':
self.extractor = lambda x: strip_datepart(x, self.season) self.extractor = lambda x: strip_datepart(x, self.season)
def build(self, data): def build(self, data):

View File

@ -17,119 +17,16 @@ from pyFTS.models.multivariate import common, variable, mvfts, cmvfts
from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime from pyFTS.models.seasonal.common import DateTime
# Multivariate time series
from pyFTS.data import Malaysia train_mv = {}
test_mv = {}
dataset = Malaysia.get_dataframe() models = {}
dataset["time"] = pd.to_datetime(dataset["time"], format='%m/%d/%y %I:%M %p') for key in ['price', 'solar', 'load']:
models[key] = []
data = dataset['load'].values
train_split = 8760
train_mv = dataset.iloc[:train_split]
test_mv = dataset.iloc[train_split:]
sp = {'seasonality': DateTime.month , #'type': 'common',
'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul', 'Aug','Sep','Oct','Nov','Dec']}
vmonth = variable.Variable("Month", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=12,
data=train_mv, partitioner_specific=sp)
sp = {'seasonality': DateTime.day_of_week, #'type': 'common',
'names': ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']}
vday = variable.Variable("Weekday", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=7,
data=train_mv, partitioner_specific=sp)
sp = {'seasonality': DateTime.hour_of_day} #, 'type': 'common'}
vhour = variable.Variable("Hour", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train_mv, partitioner_specific=sp)
vload = variable.Variable("load", data_label="load", partitioner=Grid.GridPartitioner, npart=10,
data=train_mv)
"""
model = cmvfts.ClusteredMVFTS(order=2, knn=3, cluster_params={'optmize': True})
model.append_variable(vmonthp)
model.append_variable(vdayp)
model.append_variable(vhourp)
model.append_variable(vload)
model.target_variable = vload
model.fit(train_mv)
print(len(model.cluster.sets.keys()))
model.cluster.prune()
print(len(model.cluster.sets.keys()))
model.predict(test_mv)
"""
'''
from pyFTS.data import Malaysia
dataset = Malaysia.get_dataframe()
dataset["date"] = pd.to_datetime(dataset["time"], format='%m/%d/%y %I:%M %p')
train_mv = dataset.iloc[:10000]
test_mv = dataset.iloc[10000:]
sp = {'seasonality': DateTime.month , 'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul', 'Aug','Sep','Oct','Nov','Dec']}
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=12,
data=train_mv, partitioner_specific=sp)
sp = {'seasonality': DateTime.day_of_week, 'names': ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']}
vday = variable.Variable("Weekday", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=7,
data=train_mv, partitioner_specific=sp)
sp = {'seasonality': DateTime.hour_of_day}
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train_mv, partitioner_specific=sp)
vload = variable.Variable("load", data_label="load", partitioner=Grid.GridPartitioner, npart=10,
data=train_mv)
vtemperature = variable.Variable("temperature", data_label="temperature", partitioner=Grid.GridPartitioner, npart=10,
data=train_mv)
"""
variables = {
'month': vmonth,
'day': vday,
'hour': vhour,
'temperature': vtemperature,
'load': vload
}
var_list = [k for k in variables.keys()]
models = []
import itertools
for k in [itertools.combinations(var_list, r) for r in range(2,len(var_list))]:
for x in k:
model = mvfts.MVFTS()
print(x)
for w in x:
model.append_variable(variables[w])
model.shortname += ' ' + w
model.target_variable = vload
model.fit(mv_train)
models.append(model)
"""
"""
dataset = pd.read_csv('/home/petronio/Downloads/priceHong') dataset = pd.read_csv('/home/petronio/Downloads/priceHong')
dataset['hour'] = dataset.index.values % 24 dataset['hour'] = dataset.index.values % 24
@ -137,71 +34,100 @@ data = dataset['price'].values.flatten()
train_split = 24 * 800 train_split = 24 * 800
# Multivariate time series
train_mv = dataset.iloc[:train_split] train_mv['price'] = dataset.iloc[:train_split]
test_mv = dataset.iloc[train_split:] test_mv['price'] = dataset.iloc[train_split:]
#model = Util.load_obj('/home/petronio/Downloads/ClusteredMVFTS4') dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';')
dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S')
train_mv['solar'] = dataset.iloc[:24505]
test_mv['solar'] = dataset.iloc[24505:]
from pyFTS.data import Malaysia
dataset = Malaysia.get_dataframe()
dataset["time"] = pd.to_datetime(dataset["time"], format='%m/%d/%y %I:%M %p')
train_mv['load'] = dataset.iloc[:train_split]
test_mv['load'] = dataset.iloc[train_split:]
exogenous = {}
endogenous = {}
for key in models.keys():
exogenous[key] = {}
vhour = variable.Variable("Hour", data_label="hour", partitioner=seasonal.TimeGridPartitioner, npart=24, vhour = variable.Variable("Hour", data_label="hour", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=dataset, data=train_mv['price'],
partitioner_specific={'seasonality': DateTime.hour_of_day, 'type': 'common'}) partitioner_specific={'seasonality': DateTime.hour_of_day, 'type': 'common'})
exogenous['price']['Hour'] = vhour
vprice = variable.Variable("Price", data_label="price", partitioner=Grid.GridPartitioner, npart=55, vprice = variable.Variable("Price", data_label="price", partitioner=Grid.GridPartitioner, npart=55,
data=train_mv) data=train_mv['price'])
model = cmvfts.ClusteredMVFTS(order=2, knn=3) endogenous['price'] = vprice
model.append_variable(vhour)
model.append_variable(vprice)
model.target_variable = vprice
model.fit(train_mv)
data = [[1, 1.0], [2, 2.0]]
df = pd.DataFrame(data, columns=['hour','price'])
forecasts = model.predict(df, steps_ahead=24, generators={'Hour': lambda x : (x+1)%24 }) sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
"""
'''
params = [ vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
{}, data=train_mv['solar'], partitioner_specific=sp)
{},
{'order': 2, 'knn': 3, 'cluster_params': {'optmize': True}}, exogenous['solar']['Hour'] = vhour
{'order': 2, 'knn': 2, 'cluster_params': {'optmize': True}},
{'order': 2, 'knn': 1, 'cluster_params': {'optmize': True}} vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
partitioner=Grid.GridPartitioner, npart=30, alpha_cut=.3,
data=train_mv['solar'])
endogenous['solar'] = vavg
sp = {'seasonality': DateTime.hour_of_day}
vhourp = variable.Variable("Hour", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train_mv['load'], partitioner_specific=sp)
exogenous['load']['Hour'] = vhourp
vload = variable.Variable("load", data_label="load", partitioner=Grid.GridPartitioner, npart=10,
data=train_mv['load'])
endogenous['load'] = vload
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts
fig, ax = plt.subplots(nrows=3, ncols=1, figsize=[15,15])
parameters = [
{},{},
{'order':2, 'knn': 1},
{'order':2, 'knn': 2},
{'order':2, 'knn': 3},
] ]
from pyFTS.models.multivariate import grid for ct, key in enumerate(models.keys()):
cluster = None for ct2, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS,
cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS]):
print(key, method, parameters[ct2])
model = method(**parameters[ct2])
_key2 = ""
for k in parameters[ct2].keys():
_key2 += k + str(parameters[ct2][k])
model.shortname += str(ct) + key + _key2
for var in exogenous[key].values():
model.append_variable(var)
model.append_variable(endogenous[key])
model.target_variable = endogenous[key]
model.fit(train_mv[key])
models[key].append(model.shortname)
for ct, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS]): Util.persist_obj(model, model.shortname)
model = method(**params[ct]) del(model)
model.append_variable(vmonth)
model.append_variable(vday)
model.append_variable(vhour)
model.append_variable(vload)
model.target_variable = vload
model.fit(train_mv)
if method == cmvfts.ClusteredMVFTS:
model.cluster.prune()
try:
print(model.shortname, params[ct], Measures.get_point_statistics(test_mv, model))
except Exception as ex:
print(model.shortname, params[ct])
print(ex)
print("\n\n==============================================\n\n")
#print(model1)
#print(model1.predict(test_mv, steps_ahead=24, generators={'Hour': lambda x : (x+1)%24 }))
#'''