Improvement on FCM GA including both average and standard deviation on the learning optimization objective

This commit is contained in:
Petrônio Cândido 2019-05-07 14:06:12 -03:00
parent f28fcf0a66
commit 048bb64927
3 changed files with 128 additions and 169 deletions

View File

@ -6,6 +6,7 @@ import time
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import dill import dill
import numpy as np import numpy as np
import pandas as pd
import matplotlib.cm as cmx import matplotlib.cm as cmx
import matplotlib.colors as pltcolors import matplotlib.colors as pltcolors
from pyFTS.probabilistic import ProbabilityDistribution from pyFTS.probabilistic import ProbabilityDistribution
@ -340,7 +341,10 @@ def sliding_window(data, windowsize, train=0.8, inc=0.1, **kwargs):
:param inc: percentual of data used for slide the window :param inc: percentual of data used for slide the window
:return: window count, training set, test set :return: window count, training set, test set
""" """
l = len(data)
multivariate = True if isinstance(data, pd.DataFrame) else False
l = len(data) if not multivariate else len(data.index)
ttrain = int(round(windowsize * train, 0)) ttrain = int(round(windowsize * train, 0))
ic = int(round(windowsize * inc, 0)) ic = int(round(windowsize * inc, 0))
@ -357,7 +361,10 @@ def sliding_window(data, windowsize, train=0.8, inc=0.1, **kwargs):
_end = l _end = l
else: else:
_end = count + windowsize _end = count + windowsize
yield (count, data[count : count + ttrain], data[count + ttrain : _end] ) if multivariate:
yield (count, data.iloc[count: count + ttrain], data.iloc[count + ttrain: _end])
else:
yield (count, data[count : count + ttrain], data[count + ttrain : _end] )
def persist_obj(obj, file): def persist_obj(obj, file):

View File

@ -125,9 +125,10 @@ def evaluate(dataset, individual, **kwargs):
errors.append(rmse) errors.append(rmse)
_rmse = np.nanmean(errors) _rmse = np.nanmean(errors)
_std = np.nanstd(errors)
#print("EVALUATION {}".format(individual)) #print("EVALUATION {}".format(individual))
return {'rmse': _rmse} return {'rmse': .6 * _rmse + .4 * _std}

View File

@ -6,193 +6,144 @@ from pyFTS.data import Enrollments, TAIEX, SONDA
from pyFTS.partitioners import Grid, Simple, Entropy from pyFTS.partitioners import Grid, Simple, Entropy
from pyFTS.common import Util from pyFTS.common import Util
from pyspark import SparkConf from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid, granular
from pyspark import SparkContext
import os
# make sure pyspark tells workers to use python3 not 2 if both are installed
os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3'
os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3'
#'''
from pyFTS.models.multivariate import common, variable, wmvfts
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
from pyFTS.partitioners import Grid
import matplotlib.pyplot as plt
'''
#fig, ax = plt.subplots(nrows=3, ncols=1, figsize=[15,5])
sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul', 'Aug','Sep','Oct','Nov','Dec']}
vmonth = variable.Variable("Month", data_label="datahora", partitioner=seasonal.TimeGridPartitioner, npart=12, alpha_cut=.25,
data=train, partitioner_specific=sp)
#vmonth.partitioner.plot(ax[0])
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
vhour = variable.Variable("Hour", data_label="datahora", partitioner=seasonal.TimeGridPartitioner, npart=24, alpha_cut=.2,
data=train, partitioner_specific=sp)
#vhour.partitioner.plot(ax[1])
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='R',
partitioner=Grid.GridPartitioner, npart=35, alpha_cut=.3,
data=train)
#vavg.partitioner.plot(ax[2])
#plt.tight_layout()
#Util.show_and_save_image(fig, 'variables', True)
model = wmvfts.WeightedMVFTS(explanatory_variables=[vmonth,vhour,vavg], target_variable=vavg)
_s1 = time.time()
model.fit(train)
#model.fit(data, distributed='spark', url='spark://192.168.0.106:7077', num_batches=4)
_s2 = time.time()
print(_s2-_s1)
Util.persist_obj(model, 'sonda_wmvfts')
'''
#model = Util.load_obj('sonda_wmvfts')
'''
from pyFTS.benchmarks import Measures from pyFTS.benchmarks import Measures
from pyFTS.common import Util as cUtil
_s1 = time.time()
print(Measures.get_point_statistics(test, model))
_s2 = time.time()
print(_s2-_s1)
'''
#print(len(model))
#
#model.fit(data, distributed='dispy', nodes=['192.168.0.110'])
#'''
'''
from pyFTS.models.multivariate import common, variable, mvfts, wmvfts, cmvfts, grid
from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime from pyFTS.models.seasonal.common import DateTime
dataset = pd.read_csv('/home/petronio/Downloads/gefcom12.csv')
dataset = dataset.dropna()
train_mv = dataset.iloc[:15000]
test_mv = dataset.iloc[15000:]
from pyFTS.models.multivariate import common, variable, mvfts from pyFTS.models.multivariate import common, variable, mvfts
from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.partitioners import Grid
from pyFTS.models.seasonal.common import DateTime from pyFTS.common import Membership
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]} import os
'''
from pyFTS.data import lorentz
df = lorentz.get_dataframe(iterations=5000)
train = df.iloc[:4000]
#test = df.iloc[4000:]
npart=120
import sys
vx = variable.Variable("x", data_label="x", alias='x', partitioner=Grid.GridPartitioner,
partitioner_specific={'mf': Membership.gaussmf}, npart=npart, data=train)
vy = variable.Variable("y", data_label="y", alias='y', partitioner=Grid.GridPartitioner,
partitioner_specific={'mf': Membership.gaussmf}, npart=int(npart*1.5), data=train)
vz = variable.Variable("z", data_label="z", alias='z', partitioner=Grid.GridPartitioner,
partitioner_specific={'mf': Membership.gaussmf}, npart=int(npart*1.2), data=train)
rows = []
for ct, train, test in cUtil.sliding_window(df, windowsize=4100, train=.97, inc=.05):
print('Window {}'.format(ct))
for order in [1, 2, 3]:
for knn in [1, 2, 3]:
model = granular.GranularWMVFTS(explanatory_variables=[vx, vy, vz], target_variable=vx, order=order,
knn=knn)
model.fit(train)
forecasts1 = model.predict(test, type='multivariate')
forecasts2 = model.predict(test, type='multivariate', steps_ahead=100)
for var in ['x', 'y', 'z']:
row = [order, knn, var, len(model)]
for horizon in [1, 25, 50, 75, 100]:
if horizon == 1:
row.append( Measures.mape(test[var].values[model.order:model.order+10],
forecasts1[var].values[:10]))
else:
row.append( Measures.mape(test[var].values[:horizon],
forecasts2[var].values[:horizon]))
print(row)
rows.append(row)
columns = ['Order', 'knn', 'var', 'Rules']
for horizon in [1, 25, 50, 75, 100]:
columns.append('h{}'.format(horizon))
final = pd.DataFrame(rows, columns=columns)
final.to_csv('gmvfts_lorentz1.csv',sep=';',index=False)
'''
import pandas as pd
df = pd.read_csv('https://query.data.world/s/ftb7bzgobr6bsg6bsuxuqowja6ew4r')
#df.dropna()
mload = np.nanmean(df["load"].values)
df['load'] = np.where(pd.isna(df["load"]), mload, df["load"])
mtemp = np.nanmean(df["temperature"].values)
df['temperature'] = np.where(pd.isna(df["temperature"]), mtemp, df["temperature"])
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d %H:%M:%S')
df['hour'] = np.float64(df['date'].apply(lambda x: x.strftime('%H')))
df['weekday'] = np.float64(df['date'].apply(lambda x: x.strftime('%w')))
df['month'] = np.float64(df['date'].apply(lambda x: x.strftime('%m')))
train_mv = df.iloc[:31000]
test_mv = df.iloc[:31000:]
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24, vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train_mv, partitioner_specific=sp) data=train_mv, partitioner_specific=sp, alpha_cut=.3)
sp = {'seasonality': DateTime.day_of_week, 'names': ['mon','tue','wed','tur','fri','sat','sun']}
vday = variable.Variable("DayOfWeek", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=7,
data=train_mv, partitioner_specific=sp)
#sp = {'seasonality': DateTime.day_of_month, 'names': ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']}
sp = {'seasonality': DateTime.quarter}
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=4,
data=train_mv, partitioner_specific=sp)
vtemp = variable.Variable("Temperature", data_label="temperature", alias='temp',
partitioner=Grid.GridPartitioner, npart=15, func=Membership.gaussmf,
data=train_mv, alpha_cut=.3)
vload = variable.Variable("Load", data_label="load", alias='load', vload = variable.Variable("Load", data_label="load", alias='load',
partitioner=Grid.GridPartitioner, npart=20, partitioner=Grid.GridPartitioner, npart=20, func=Membership.gaussmf,
data=train_mv) data=train_mv, alpha_cut=.3)
vtemp = variable.Variable("Temperature", data_label="temperature", alias='temperature',
partitioner=Grid.GridPartitioner, npart=20,
data=train_mv)
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid
mtemp = wmvfts.WeightedMVFTS(explanatory_variables=[vhour, vmonth, vtemp], target_variable=vtemp)
mtemp.fit(train_mv)
Util.persist_obj(mtemp, 'mtemp')
from pyFTS.models import hofts
#mtemp = hofts.WeightedHighOrderFTS(order=2, partitioner=vtemp.partitioner)
#mtemp.fit(train_mv['temperature'].values)
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid
mload = wmvfts.WeightedMVFTS(explanatory_variables=[vtemp, vload], target_variable=vload)
mload.fit(train_mv)
Util.persist_obj(mload, 'mload')
rows = []
time_generator = lambda x : pd.to_datetime(x) + pd.to_timedelta(1, unit='h') time_generator = lambda x : pd.to_datetime(x) + pd.to_timedelta(1, unit='h')
for ct, train, test in cUtil.sliding_window(df, windowsize=32000, train=.98, inc=.05):
print('Window {}'.format(ct))
for order in [1, 2, 3]:
for knn in [1, 2, 3]:
model = granular.GranularWMVFTS(explanatory_variables=[vhour, vtemp, vload], target_variable=vload,
order=order, knn=knn)
forecasts = mload.predict(test_mv.iloc[:1], steps_ahead=48, generators={'date': time_generator, model.fit(train)
'temperature': mtemp})
''' forecasts1 = model.predict(test, type='multivariate')
forecasts2 = model.predict(test, type='multivariate', generators={'date': time_generator},
steps_ahead=100)
for var in ['temperature','load']:
row = [order, knn, var, len(model)]
for horizon in [1, 25, 50, 75, 100]:
if horizon == 1:
row.append(Measures.mape(test[var].values[model.order:model.order + 10],
forecasts1[var].values[:10]))
else:
row.append(Measures.mape(test[var].values[:horizon],
forecasts2[var].values[:horizon]))
data = pd.read_csv('https://query.data.world/s/6xfb5useuotbbgpsnm5b2l3wzhvw2i', sep=';') print(row)
rows.append(row)
train = data.iloc[:9000] columns = ['Order', 'knn', 'var', 'Rules']
test = data.iloc[9000:9200] for horizon in [1, 25, 50, 75, 100]:
columns.append('h{}'.format(horizon))
final = pd.DataFrame(rows, columns=columns)
from pyFTS.models.multivariate import common, variable, mvfts final.to_csv('gmvfts_gefcom12.csv', sep=';', index=False)
from pyFTS.models.seasonal import partitioner as seasonal
from pyFTS.models.seasonal.common import DateTime
from pyFTS.partitioners import Grid
sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Fev','Mar','Abr','Mai','Jun','Jul', 'Ago','Set','Out','Nov','Dez']}
vmonth = variable.Variable("Month", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=12,
data=train, partitioner_specific=sp, alpha_cut=.5)
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train, partitioner_specific=sp, alpha_cut=.5)
#print(vhour.partitioner)
#print(vmonth.partitioner.fuzzyfy(180))
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
partitioner=Grid.GridPartitioner, npart=25, alpha_cut=.3,
data=train)
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid, granular
model = granular.GranularWMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg,
order=2, knn=7)
model.fit(train)
print(model)
#model.predict(test)