Improvement on FCM GA including both average and standard deviation on the learning optimization objective
This commit is contained in:
parent
f28fcf0a66
commit
048bb64927
@ -6,6 +6,7 @@ import time
|
||||
import matplotlib.pyplot as plt
|
||||
import dill
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib.cm as cmx
|
||||
import matplotlib.colors as pltcolors
|
||||
from pyFTS.probabilistic import ProbabilityDistribution
|
||||
@ -340,7 +341,10 @@ def sliding_window(data, windowsize, train=0.8, inc=0.1, **kwargs):
|
||||
:param inc: percentual of data used for slide the window
|
||||
:return: window count, training set, test set
|
||||
"""
|
||||
l = len(data)
|
||||
|
||||
multivariate = True if isinstance(data, pd.DataFrame) else False
|
||||
|
||||
l = len(data) if not multivariate else len(data.index)
|
||||
ttrain = int(round(windowsize * train, 0))
|
||||
ic = int(round(windowsize * inc, 0))
|
||||
|
||||
@ -357,6 +361,9 @@ def sliding_window(data, windowsize, train=0.8, inc=0.1, **kwargs):
|
||||
_end = l
|
||||
else:
|
||||
_end = count + windowsize
|
||||
if multivariate:
|
||||
yield (count, data.iloc[count: count + ttrain], data.iloc[count + ttrain: _end])
|
||||
else:
|
||||
yield (count, data[count : count + ttrain], data[count + ttrain : _end] )
|
||||
|
||||
|
||||
|
@ -125,9 +125,10 @@ def evaluate(dataset, individual, **kwargs):
|
||||
errors.append(rmse)
|
||||
|
||||
_rmse = np.nanmean(errors)
|
||||
_std = np.nanstd(errors)
|
||||
|
||||
#print("EVALUATION {}".format(individual))
|
||||
return {'rmse': _rmse}
|
||||
return {'rmse': .6 * _rmse + .4 * _std}
|
||||
|
||||
|
||||
|
||||
|
@ -6,193 +6,144 @@ from pyFTS.data import Enrollments, TAIEX, SONDA
|
||||
from pyFTS.partitioners import Grid, Simple, Entropy
|
||||
from pyFTS.common import Util
|
||||
|
||||
from pyspark import SparkConf
|
||||
from pyspark import SparkContext
|
||||
|
||||
import os
|
||||
# make sure pyspark tells workers to use python3 not 2 if both are installed
|
||||
os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3'
|
||||
os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3'
|
||||
#'''
|
||||
|
||||
|
||||
from pyFTS.models.multivariate import common, variable, wmvfts
|
||||
from pyFTS.models.seasonal import partitioner as seasonal
|
||||
from pyFTS.models.seasonal.common import DateTime
|
||||
from pyFTS.partitioners import Grid
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
'''
|
||||
#fig, ax = plt.subplots(nrows=3, ncols=1, figsize=[15,5])
|
||||
|
||||
|
||||
sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul', 'Aug','Sep','Oct','Nov','Dec']}
|
||||
|
||||
vmonth = variable.Variable("Month", data_label="datahora", partitioner=seasonal.TimeGridPartitioner, npart=12, alpha_cut=.25,
|
||||
data=train, partitioner_specific=sp)
|
||||
|
||||
#vmonth.partitioner.plot(ax[0])
|
||||
|
||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
|
||||
|
||||
vhour = variable.Variable("Hour", data_label="datahora", partitioner=seasonal.TimeGridPartitioner, npart=24, alpha_cut=.2,
|
||||
data=train, partitioner_specific=sp)
|
||||
|
||||
#vhour.partitioner.plot(ax[1])
|
||||
|
||||
|
||||
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='R',
|
||||
partitioner=Grid.GridPartitioner, npart=35, alpha_cut=.3,
|
||||
data=train)
|
||||
|
||||
#vavg.partitioner.plot(ax[2])
|
||||
|
||||
#plt.tight_layout()
|
||||
|
||||
#Util.show_and_save_image(fig, 'variables', True)
|
||||
|
||||
model = wmvfts.WeightedMVFTS(explanatory_variables=[vmonth,vhour,vavg], target_variable=vavg)
|
||||
|
||||
|
||||
_s1 = time.time()
|
||||
model.fit(train)
|
||||
#model.fit(data, distributed='spark', url='spark://192.168.0.106:7077', num_batches=4)
|
||||
_s2 = time.time()
|
||||
|
||||
print(_s2-_s1)
|
||||
|
||||
Util.persist_obj(model, 'sonda_wmvfts')
|
||||
'''
|
||||
|
||||
#model = Util.load_obj('sonda_wmvfts')
|
||||
|
||||
'''
|
||||
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid, granular
|
||||
from pyFTS.benchmarks import Measures
|
||||
from pyFTS.common import Util as cUtil
|
||||
|
||||
_s1 = time.time()
|
||||
print(Measures.get_point_statistics(test, model))
|
||||
_s2 = time.time()
|
||||
|
||||
print(_s2-_s1)
|
||||
'''
|
||||
|
||||
#print(len(model))
|
||||
|
||||
|
||||
#
|
||||
|
||||
#model.fit(data, distributed='dispy', nodes=['192.168.0.110'])
|
||||
#'''
|
||||
|
||||
'''
|
||||
from pyFTS.models.multivariate import common, variable, mvfts, wmvfts, cmvfts, grid
|
||||
from pyFTS.models.seasonal import partitioner as seasonal
|
||||
from pyFTS.models.seasonal.common import DateTime
|
||||
|
||||
dataset = pd.read_csv('/home/petronio/Downloads/gefcom12.csv')
|
||||
dataset = dataset.dropna()
|
||||
|
||||
train_mv = dataset.iloc[:15000]
|
||||
test_mv = dataset.iloc[15000:]
|
||||
|
||||
from pyFTS.models.multivariate import common, variable, mvfts
|
||||
from pyFTS.models.seasonal import partitioner as seasonal
|
||||
from pyFTS.models.seasonal.common import DateTime
|
||||
from pyFTS.partitioners import Grid
|
||||
from pyFTS.common import Membership
|
||||
|
||||
|
||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
|
||||
import os
|
||||
|
||||
'''
|
||||
from pyFTS.data import lorentz
|
||||
df = lorentz.get_dataframe(iterations=5000)
|
||||
|
||||
train = df.iloc[:4000]
|
||||
#test = df.iloc[4000:]
|
||||
|
||||
npart=120
|
||||
|
||||
|
||||
import sys
|
||||
|
||||
|
||||
vx = variable.Variable("x", data_label="x", alias='x', partitioner=Grid.GridPartitioner,
|
||||
partitioner_specific={'mf': Membership.gaussmf}, npart=npart, data=train)
|
||||
vy = variable.Variable("y", data_label="y", alias='y', partitioner=Grid.GridPartitioner,
|
||||
partitioner_specific={'mf': Membership.gaussmf}, npart=int(npart*1.5), data=train)
|
||||
vz = variable.Variable("z", data_label="z", alias='z', partitioner=Grid.GridPartitioner,
|
||||
partitioner_specific={'mf': Membership.gaussmf}, npart=int(npart*1.2), data=train)
|
||||
|
||||
|
||||
|
||||
rows = []
|
||||
|
||||
for ct, train, test in cUtil.sliding_window(df, windowsize=4100, train=.97, inc=.05):
|
||||
print('Window {}'.format(ct))
|
||||
for order in [1, 2, 3]:
|
||||
for knn in [1, 2, 3]:
|
||||
model = granular.GranularWMVFTS(explanatory_variables=[vx, vy, vz], target_variable=vx, order=order,
|
||||
knn=knn)
|
||||
|
||||
model.fit(train)
|
||||
|
||||
forecasts1 = model.predict(test, type='multivariate')
|
||||
forecasts2 = model.predict(test, type='multivariate', steps_ahead=100)
|
||||
|
||||
for var in ['x', 'y', 'z']:
|
||||
row = [order, knn, var, len(model)]
|
||||
for horizon in [1, 25, 50, 75, 100]:
|
||||
if horizon == 1:
|
||||
row.append( Measures.mape(test[var].values[model.order:model.order+10],
|
||||
forecasts1[var].values[:10]))
|
||||
else:
|
||||
row.append( Measures.mape(test[var].values[:horizon],
|
||||
forecasts2[var].values[:horizon]))
|
||||
|
||||
print(row)
|
||||
rows.append(row)
|
||||
|
||||
columns = ['Order', 'knn', 'var', 'Rules']
|
||||
for horizon in [1, 25, 50, 75, 100]:
|
||||
columns.append('h{}'.format(horizon))
|
||||
final = pd.DataFrame(rows, columns=columns)
|
||||
|
||||
final.to_csv('gmvfts_lorentz1.csv',sep=';',index=False)
|
||||
'''
|
||||
|
||||
import pandas as pd
|
||||
df = pd.read_csv('https://query.data.world/s/ftb7bzgobr6bsg6bsuxuqowja6ew4r')
|
||||
|
||||
#df.dropna()
|
||||
|
||||
mload = np.nanmean(df["load"].values)
|
||||
df['load'] = np.where(pd.isna(df["load"]), mload, df["load"])
|
||||
|
||||
mtemp = np.nanmean(df["temperature"].values)
|
||||
df['temperature'] = np.where(pd.isna(df["temperature"]), mtemp, df["temperature"])
|
||||
|
||||
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d %H:%M:%S')
|
||||
|
||||
df['hour'] = np.float64(df['date'].apply(lambda x: x.strftime('%H')))
|
||||
df['weekday'] = np.float64(df['date'].apply(lambda x: x.strftime('%w')))
|
||||
df['month'] = np.float64(df['date'].apply(lambda x: x.strftime('%m')))
|
||||
|
||||
train_mv = df.iloc[:31000]
|
||||
test_mv = df.iloc[:31000:]
|
||||
|
||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
|
||||
|
||||
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||
data=train_mv, partitioner_specific=sp)
|
||||
|
||||
sp = {'seasonality': DateTime.day_of_week, 'names': ['mon','tue','wed','tur','fri','sat','sun']}
|
||||
|
||||
vday = variable.Variable("DayOfWeek", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=7,
|
||||
data=train_mv, partitioner_specific=sp)
|
||||
|
||||
#sp = {'seasonality': DateTime.day_of_month, 'names': ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec']}
|
||||
|
||||
sp = {'seasonality': DateTime.quarter}
|
||||
|
||||
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=4,
|
||||
data=train_mv, partitioner_specific=sp)
|
||||
data=train_mv, partitioner_specific=sp, alpha_cut=.3)
|
||||
|
||||
vtemp = variable.Variable("Temperature", data_label="temperature", alias='temp',
|
||||
partitioner=Grid.GridPartitioner, npart=15, func=Membership.gaussmf,
|
||||
data=train_mv, alpha_cut=.3)
|
||||
|
||||
vload = variable.Variable("Load", data_label="load", alias='load',
|
||||
partitioner=Grid.GridPartitioner, npart=20,
|
||||
data=train_mv)
|
||||
|
||||
vtemp = variable.Variable("Temperature", data_label="temperature", alias='temperature',
|
||||
partitioner=Grid.GridPartitioner, npart=20,
|
||||
data=train_mv)
|
||||
|
||||
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid
|
||||
|
||||
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid
|
||||
|
||||
mtemp = wmvfts.WeightedMVFTS(explanatory_variables=[vhour, vmonth, vtemp], target_variable=vtemp)
|
||||
mtemp.fit(train_mv)
|
||||
|
||||
Util.persist_obj(mtemp, 'mtemp')
|
||||
|
||||
from pyFTS.models import hofts
|
||||
|
||||
#mtemp = hofts.WeightedHighOrderFTS(order=2, partitioner=vtemp.partitioner)
|
||||
#mtemp.fit(train_mv['temperature'].values)
|
||||
|
||||
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid
|
||||
|
||||
mload = wmvfts.WeightedMVFTS(explanatory_variables=[vtemp, vload], target_variable=vload)
|
||||
mload.fit(train_mv)
|
||||
|
||||
Util.persist_obj(mload, 'mload')
|
||||
partitioner=Grid.GridPartitioner, npart=20, func=Membership.gaussmf,
|
||||
data=train_mv, alpha_cut=.3)
|
||||
|
||||
rows = []
|
||||
|
||||
time_generator = lambda x : pd.to_datetime(x) + pd.to_timedelta(1, unit='h')
|
||||
|
||||
for ct, train, test in cUtil.sliding_window(df, windowsize=32000, train=.98, inc=.05):
|
||||
print('Window {}'.format(ct))
|
||||
for order in [1, 2, 3]:
|
||||
for knn in [1, 2, 3]:
|
||||
model = granular.GranularWMVFTS(explanatory_variables=[vhour, vtemp, vload], target_variable=vload,
|
||||
order=order, knn=knn)
|
||||
|
||||
forecasts = mload.predict(test_mv.iloc[:1], steps_ahead=48, generators={'date': time_generator,
|
||||
'temperature': mtemp})
|
||||
model.fit(train)
|
||||
|
||||
'''
|
||||
forecasts1 = model.predict(test, type='multivariate')
|
||||
forecasts2 = model.predict(test, type='multivariate', generators={'date': time_generator},
|
||||
steps_ahead=100)
|
||||
|
||||
for var in ['temperature','load']:
|
||||
row = [order, knn, var, len(model)]
|
||||
for horizon in [1, 25, 50, 75, 100]:
|
||||
if horizon == 1:
|
||||
row.append(Measures.mape(test[var].values[model.order:model.order + 10],
|
||||
forecasts1[var].values[:10]))
|
||||
else:
|
||||
row.append(Measures.mape(test[var].values[:horizon],
|
||||
forecasts2[var].values[:horizon]))
|
||||
|
||||
data = pd.read_csv('https://query.data.world/s/6xfb5useuotbbgpsnm5b2l3wzhvw2i', sep=';')
|
||||
print(row)
|
||||
rows.append(row)
|
||||
|
||||
train = data.iloc[:9000]
|
||||
test = data.iloc[9000:9200]
|
||||
columns = ['Order', 'knn', 'var', 'Rules']
|
||||
for horizon in [1, 25, 50, 75, 100]:
|
||||
columns.append('h{}'.format(horizon))
|
||||
final = pd.DataFrame(rows, columns=columns)
|
||||
|
||||
from pyFTS.models.multivariate import common, variable, mvfts
|
||||
from pyFTS.models.seasonal import partitioner as seasonal
|
||||
from pyFTS.models.seasonal.common import DateTime
|
||||
from pyFTS.partitioners import Grid
|
||||
|
||||
sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Fev','Mar','Abr','Mai','Jun','Jul', 'Ago','Set','Out','Nov','Dez']}
|
||||
|
||||
vmonth = variable.Variable("Month", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=12,
|
||||
data=train, partitioner_specific=sp, alpha_cut=.5)
|
||||
|
||||
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]}
|
||||
|
||||
vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||
data=train, partitioner_specific=sp, alpha_cut=.5)
|
||||
|
||||
#print(vhour.partitioner)
|
||||
|
||||
#print(vmonth.partitioner.fuzzyfy(180))
|
||||
|
||||
vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad',
|
||||
partitioner=Grid.GridPartitioner, npart=25, alpha_cut=.3,
|
||||
data=train)
|
||||
|
||||
from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts, grid, granular
|
||||
|
||||
model = granular.GranularWMVFTS(explanatory_variables=[vmonth, vhour, vavg], target_variable=vavg,
|
||||
order=2, knn=7)
|
||||
|
||||
model.fit(train)
|
||||
|
||||
print(model)
|
||||
|
||||
#model.predict(test)
|
||||
final.to_csv('gmvfts_gefcom12.csv', sep=';', index=False)
|
Loading…
Reference in New Issue
Block a user