- Improvements on probability distributions and KDE
- Seasonal Ensemble
This commit is contained in:
parent
ba71e08e76
commit
9bfd931e45
@ -209,6 +209,8 @@ def scale(data, params):
|
||||
ndata = [(k-params[0])/params[1] for k in data]
|
||||
return ndata
|
||||
|
||||
def stats(measure, data):
|
||||
print(measure, np.nanmean(data), np.nanstd(data))
|
||||
|
||||
def unified_scaled_point(experiments, tam, save=False, file=None,
|
||||
sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'],
|
||||
@ -259,7 +261,6 @@ def unified_scaled_point(experiments, tam, save=False, file=None,
|
||||
mdl[b]['times'] = []
|
||||
|
||||
best = bests[b]
|
||||
print(best)
|
||||
tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
|
||||
& (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
|
||||
tmpl = extract_measure(tmp,'RMSE',data_columns)
|
||||
@ -277,10 +278,13 @@ def unified_scaled_point(experiments, tam, save=False, file=None,
|
||||
|
||||
models[b]['label'] = check_replace_list(best["Model"] + " " + str(best["Order"]), replace)
|
||||
|
||||
|
||||
print("GLOBAL")
|
||||
rmse_param = scale_params(rmse)
|
||||
stats("rmse", rmse)
|
||||
smape_param = scale_params(smape)
|
||||
stats("smape", smape)
|
||||
u_param = scale_params(u)
|
||||
stats("u", u)
|
||||
times_param = scale_params(times)
|
||||
|
||||
for key in sorted(models.keys()):
|
||||
@ -295,9 +299,13 @@ def unified_scaled_point(experiments, tam, save=False, file=None,
|
||||
times = []
|
||||
labels = []
|
||||
for key in sorted(models.keys()):
|
||||
print(key)
|
||||
rmse.append(models[key]['rmse'])
|
||||
stats("rmse", models[key]['rmse'])
|
||||
smape.append(models[key]['smape'])
|
||||
stats("smape", models[key]['smape'])
|
||||
u.append(models[key]['u'])
|
||||
stats("u", models[key]['u'])
|
||||
times.append(models[key]['times'])
|
||||
labels.append(models[key]['label'])
|
||||
|
||||
@ -995,6 +1003,8 @@ def unified_scaled_ahead(experiments, tam, save=False, file=None,
|
||||
|
||||
for experiment in experiments:
|
||||
|
||||
print(experiment)
|
||||
|
||||
mdl = {}
|
||||
|
||||
dat_syn = pd.read_csv(experiment[0], sep=";", usecols=ahead_dataframe_synthetic_columns())
|
||||
@ -1023,6 +1033,9 @@ def unified_scaled_ahead(experiments, tam, save=False, file=None,
|
||||
mdl[b]['crps2'] = []
|
||||
|
||||
best = bests[b]
|
||||
|
||||
print(best)
|
||||
|
||||
tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"])
|
||||
& (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])]
|
||||
tmpl = extract_measure(tmp, 'CRPS_Interval', data_columns)
|
||||
|
26
ensemble.py
26
ensemble.py
@ -11,6 +11,7 @@ from pyFTS.benchmarks import arima, quantreg
|
||||
from pyFTS.common import Transformations
|
||||
import scipy.stats as st
|
||||
from pyFTS import tree
|
||||
from pyFTS.models import msfts
|
||||
|
||||
def sampler(data, quantiles):
|
||||
ret = []
|
||||
@ -241,3 +242,28 @@ class AllMethodEnsembleFTS(EnsembleFTS):
|
||||
self.appendModel(model)
|
||||
|
||||
|
||||
class SeasonalEnsembleFTS(EnsembleFTS):
|
||||
def __init__(self, name, **kwargs):
|
||||
super(SeasonalEnsembleFTS, self).__init__(name="Seasonal Ensemble FTS", **kwargs)
|
||||
self.min_order = 1
|
||||
self.indexers = []
|
||||
self.partitioners = []
|
||||
self.is_multivariate = True
|
||||
self.has_seasonality = True
|
||||
self.has_probability_forecasting = True
|
||||
|
||||
def train(self, data, sets, order=1, parameters=None):
|
||||
self.original_max = max(data)
|
||||
self.original_min = min(data)
|
||||
|
||||
for ix in self.indexers:
|
||||
for pt in self.partitioners:
|
||||
|
||||
model = msfts.MultiSeasonalFTS()
|
||||
model.indexer = ix
|
||||
model.appendTransformation(pt.transformation)
|
||||
model.train(data,pt.sets,order=1)
|
||||
|
||||
self.appendModel(model)
|
||||
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
import numpy as np
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class SeasonalIndexer(object):
|
||||
"""
|
||||
Seasonal Indexer. Responsible to find the seasonal index of a data point inside its data set
|
||||
@ -117,6 +118,7 @@ class DataFrameSeasonalIndexer(SeasonalIndexer):
|
||||
data.loc[:,self.data_fields] = value
|
||||
return data
|
||||
|
||||
|
||||
class DateTime(Enum):
|
||||
year = 1
|
||||
month = 2
|
||||
|
@ -3,7 +3,7 @@ import pandas as pd
|
||||
import matplotlib as plt
|
||||
import matplotlib.colors as pltcolors
|
||||
import matplotlib.pyplot as plt
|
||||
from mpl_toolkits.mplot3d import Axes3D
|
||||
#from mpl_toolkits.mplot3d import Axes3D
|
||||
from pyFTS.common import Membership, Util
|
||||
from pyFTS.partitioners import Grid,Huarng,FCM,Entropy
|
||||
|
||||
|
@ -7,7 +7,7 @@ class Partitioner(object):
|
||||
Universe of Discourse partitioner. Split data on several fuzzy sets
|
||||
"""
|
||||
|
||||
def __init__(self, name, data, npart, func=Membership.trimf, names=None, prefix="A", transformation=None):
|
||||
def __init__(self, name, data, npart, func=Membership.trimf, names=None, prefix="A", transformation=None, indexer=None):
|
||||
"""
|
||||
Universe of Discourse partitioner scheme. Split data on several fuzzy sets
|
||||
:param name: partitioner name
|
||||
@ -25,9 +25,15 @@ class Partitioner(object):
|
||||
self.setnames = names
|
||||
self.prefix = prefix
|
||||
self.transformation = transformation
|
||||
self.indexer = indexer
|
||||
|
||||
if self.indexer is not None:
|
||||
ndata = self.indexer.get_data(data)
|
||||
else:
|
||||
ndata = data
|
||||
|
||||
if transformation is not None:
|
||||
ndata = transformation.apply(data)
|
||||
ndata = transformation.apply(ndata)
|
||||
else:
|
||||
ndata = data
|
||||
|
||||
@ -42,8 +48,11 @@ class Partitioner(object):
|
||||
self.max = _max * 1.1
|
||||
else:
|
||||
self.max = _max * 0.9
|
||||
|
||||
self.sets = self.build(ndata)
|
||||
|
||||
del(ndata)
|
||||
|
||||
def build(self, data):
|
||||
"""
|
||||
Perform the partitioning of the Universe of Discourse
|
||||
|
@ -5,38 +5,55 @@ from pyFTS.common import FuzzySet,SortedCollection
|
||||
|
||||
|
||||
class ProbabilityDistribution(object):
|
||||
def __init__(self,name,nbins,uod,bins=None,labels=None, data=None):
|
||||
self.name = name
|
||||
self.nbins = nbins
|
||||
self.uod = uod
|
||||
if bins is None:
|
||||
#range = (uod[1] - uod[0])/nbins
|
||||
#self.bins = np.arange(uod[0],uod[1],range).tolist()
|
||||
self.bins = np.linspace(uod[0], uod[1], nbins).tolist()
|
||||
self.labels = [str(k) for k in self.bins]
|
||||
"""
|
||||
Represents a discrete or continous probability distribution
|
||||
If type is histogram, the PDF is discrete
|
||||
If type is KDE the PDF is continuous
|
||||
"""
|
||||
def __init__(self,type, **kwargs):
|
||||
if type is None:
|
||||
self.type = "KDE"
|
||||
else:
|
||||
self.bins = bins
|
||||
self.labels = labels
|
||||
self.type = type
|
||||
self.description = kwargs.get("description", None)
|
||||
|
||||
self.index = SortedCollection.SortedCollection(iterable=sorted(self.bins))
|
||||
self.distribution = {}
|
||||
self.count = 0
|
||||
for k in self.bins: self.distribution[k] = 0
|
||||
self.uod = kwargs.get("uod", None)
|
||||
|
||||
if data is not None: self.append(data)
|
||||
if self.type == "histogram":
|
||||
self.nbins = kwargs.get("num_bins", None)
|
||||
self.bins = kwargs.get("bins", None)
|
||||
self.labels = kwargs.get("bins_labels", None)
|
||||
|
||||
if self.bins is None:
|
||||
self.bins = np.linspace(self.uod[0], self.uod[1], self.nbins).tolist()
|
||||
self.labels = [str(k) for k in self.bins]
|
||||
|
||||
self.index = SortedCollection.SortedCollection(iterable=sorted(self.bins))
|
||||
self.distribution = {}
|
||||
self.count = 0
|
||||
for k in self.bins: self.distribution[k] = 0
|
||||
|
||||
self.data = kwargs.get("data",None)
|
||||
|
||||
def append(self, values):
|
||||
for k in values:
|
||||
v = self.index.find_ge(k)
|
||||
self.distribution[v] += 1
|
||||
self.count += 1
|
||||
if self.type == "histogram":
|
||||
for k in values:
|
||||
v = self.index.find_ge(k)
|
||||
self.distribution[v] += 1
|
||||
self.count += 1
|
||||
else:
|
||||
self.data.extend(values)
|
||||
|
||||
def density(self, values):
|
||||
ret = []
|
||||
for k in values:
|
||||
v = self.index.find_ge(k)
|
||||
ret.append(self.distribution[v] / self.count)
|
||||
return ret
|
||||
if self.type == "histogram":
|
||||
ret = []
|
||||
for k in values:
|
||||
v = self.index.find_ge(k)
|
||||
ret.append(self.distribution[v] / self.count)
|
||||
return ret
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
def cummulative(self, values):
|
||||
pass
|
||||
|
@ -5,21 +5,20 @@ Kernel Density Estimation
|
||||
|
||||
class KernelSmoothing(object):
|
||||
"""Kernel Density Estimation"""
|
||||
def __init__(self,h, data, method="epanechnikov"):
|
||||
def __init__(self,h, method="epanechnikov"):
|
||||
self.h = h
|
||||
self.data = data
|
||||
self.method = method
|
||||
|
||||
def kernel(self, u):
|
||||
if self.method == "epanechnikov":
|
||||
return (3/4) * (1 - u**2)
|
||||
elif self.method == "uniform":
|
||||
elif self.method == "gaussian":
|
||||
return 0.5
|
||||
elif self.method == "uniform":
|
||||
return 0.5
|
||||
|
||||
def probability(self, x):
|
||||
l = len(self.data)
|
||||
p = sum([self.kernel((x - k)/self.h) for k in self.data]) / l*self.h
|
||||
def probability(self, x, data):
|
||||
l = len(data)
|
||||
p = sum([self.kernel((x - k)/self.h) for k in data]) / l*self.h
|
||||
|
||||
return p
|
@ -10,7 +10,7 @@ import matplotlib.pyplot as plt
|
||||
|
||||
import pandas as pd
|
||||
from pyFTS.partitioners import Grid, Entropy, FCM, Huarng
|
||||
from pyFTS.common import FLR,FuzzySet,Membership,Transformations
|
||||
from pyFTS.common import FLR,FuzzySet,Membership,Transformations, Util as cUtil
|
||||
from pyFTS import fts,hofts,ifts,pwfts,tree, chen
|
||||
#from pyFTS.benchmarks import benchmarks as bchmk
|
||||
from pyFTS.benchmarks import naive, arima
|
||||
@ -20,8 +20,8 @@ from pyFTS.models.seasonal import SeasonalIndexer
|
||||
|
||||
os.chdir("/home/petronio/dados/Dropbox/Doutorado/Codigos/")
|
||||
|
||||
diff = Transformations.Differential(1)
|
||||
ix = SeasonalIndexer.LinearSeasonalIndexer([12, 24], [720, 1],[False, False])
|
||||
#diff = Transformations.Differential(1)
|
||||
#ix = SeasonalIndexer.LinearSeasonalIndexer([12, 24], [720, 1],[False, False])
|
||||
|
||||
"""
|
||||
DATASETS
|
||||
@ -63,6 +63,52 @@ DATASETS
|
||||
#print(lag)
|
||||
#print(a)
|
||||
|
||||
sonda = pd.read_csv("DataSets/SONDA_BSB_MOD.csv", sep=";")
|
||||
|
||||
sonda['data'] = pd.to_datetime(sonda['data'])
|
||||
|
||||
sonda = sonda[:][527041:]
|
||||
|
||||
sonda.index = np.arange(0,len(sonda.index))
|
||||
|
||||
sonda_treino = sonda[:1051200]
|
||||
sonda_teste = sonda[1051201:]
|
||||
|
||||
from pyFTS.models.seasonal import SeasonalIndexer
|
||||
|
||||
ix_m15 = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.minute],[15],'glo_avg')
|
||||
|
||||
cUtil.persist_obj(ix_m15, "models/sonda_ix_m15.pkl")
|
||||
|
||||
|
||||
ix_Mh = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.month,SeasonalIndexer.DateTime.hour],
|
||||
[None, None],'glo_avg')
|
||||
|
||||
cUtil.persist_obj(ix_Mh, "models/sonda_ix_Mh.pkl")
|
||||
|
||||
ix_Mhm15 = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.month,
|
||||
SeasonalIndexer.DateTime.hour, SeasonalIndexer.DateTime.minute],
|
||||
[None, None,15],'glo_avg')
|
||||
|
||||
cUtil.persist_obj(ix_Mhm15, "models/sonda_ix_Mhm15.pkl")
|
||||
|
||||
|
||||
tmp = ix_Mh.get_data(sonda_treino)
|
||||
for max_part in [10, 20, 30, 40, 50]:
|
||||
|
||||
fs1 = Grid.GridPartitionerTrimf(tmp,max_part)
|
||||
|
||||
cUtil.persist_obj(fs1,"models/sonda_fs_grid_" + str(max_part) + ".pkl")
|
||||
|
||||
fs2 = FCM.FCMPartitionerTrimf(tmp, max_part)
|
||||
|
||||
cUtil.persist_obj(fs2, "models/sonda_fs_fcm_" + str(max_part) + ".pkl")
|
||||
|
||||
fs3 = Entropy.EntropyPartitionerTrimf(tmp, max_part)
|
||||
|
||||
cUtil.persist_obj(fs3, "models/sonda_fs_entropy_" + str(max_part) + ".pkl")
|
||||
|
||||
|
||||
from pyFTS.benchmarks import benchmarks as bchmk
|
||||
#from pyFTS.benchmarks import distributed_benchmarks as bchmk
|
||||
#from pyFTS.benchmarks import parallel_benchmarks as bchmk
|
||||
@ -189,7 +235,6 @@ experiments = [
|
||||
Util.unified_scaled_point(experiments,tam=[15,8],save=True,file="pictures/unified_experiments_point.png",
|
||||
ignore=['ARIMA(1,0,0)','ARIMA(2,0,0)','ARIMA(2,0,1)','ARIMA(2,0,2)','QAR(2)'],
|
||||
replace=[['ARIMA','ARIMA'],['QAR','QAR']])
|
||||
|
||||
'''
|
||||
|
||||
'''
|
||||
@ -215,13 +260,14 @@ Util.unified_scaled_interval_pinball(experiments,tam=[15,8],save=True,file="pict
|
||||
|
||||
'''
|
||||
|
||||
'''
|
||||
experiments = [
|
||||
["experiments/taiex_ahead_synthetic.csv","experiments/taiex_ahead_analytic.csv",16],
|
||||
["experiments/nasdaq_ahead_synthetic.csv","experiments/nasdaq_ahead_analytic.csv",11],
|
||||
["experiments/sp500_ahead_synthetic.csv","experiments/sp500_ahead_analytic.csv", 21],
|
||||
["experiments/best_ahead_synthetic.csv","experiments/best_ahead_analytic.csv", 24],
|
||||
["experiments/sondasun_ahead_synthetic.csv","experiments/sondasun_ahead_analytic.csv",13],
|
||||
["experiments/sondawind_ahead_synthetic.csv","experiments/sondawind_ahead_analytic.csv", 13],
|
||||
["experiments/taiex_ahead_synthetic_diff.csv","experiments/taiex_ahead_analytic_diff.csv",16],
|
||||
["experiments/nasdaq_ahead_synthetic_diff.csv","experiments/nasdaq_ahead_analytic_diff.csv",11],
|
||||
["experiments/sp500_ahead_synthetic_diff.csv","experiments/sp500_ahead_analytic_diff.csv", 21],
|
||||
["experiments/best_ahead_synthetic_diff.csv","experiments/best_ahead_analytic_diff.csv", 24],
|
||||
["experiments/sondasun_ahead_synthetic_diff.csv","experiments/sondasun_ahead_analytic_diff.csv",13],
|
||||
["experiments/sondawind_ahead_synthetic_diff.csv","experiments/sondawind_ahead_analytic_diff.csv", 13],
|
||||
["experiments/gauss_ahead_synthetic_diff.csv","experiments/gauss_ahead_analytic_diff.csv",16]
|
||||
]
|
||||
|
||||
@ -233,7 +279,9 @@ Util.unified_scaled_ahead(experiments,tam=[15,8],save=True,file="pictures/unifie
|
||||
|
||||
|
||||
|
||||
"""
|
||||
'''
|
||||
|
||||
'''
|
||||
from pyFTS.partitioners import Grid
|
||||
|
||||
from pyFTS import sfts
|
||||
@ -268,4 +316,4 @@ x = tmp.forecast(sonda[:1610])
|
||||
|
||||
#print(taiex[1600:1610])
|
||||
print(x)
|
||||
#"""
|
||||
'''
|
Loading…
Reference in New Issue
Block a user