- Bugfixes and improvements on ProbabilityDistribution and KDE, msfts
- Scale Transformation
This commit is contained in:
parent
962ef89bcf
commit
011e0ee4ce
@ -234,7 +234,7 @@ def get_point_statistics(data, model, indexer=None):
|
||||
"""Condensate all measures for point forecasters"""
|
||||
|
||||
if indexer is not None:
|
||||
ndata = np.array(indexer.get_data(data[model.order:]))
|
||||
ndata = np.array(indexer.get_data(data))
|
||||
else:
|
||||
ndata = np.array(data[model.order:])
|
||||
|
||||
|
@ -62,7 +62,7 @@ def generateRecurrentFLRs(fuzzyData):
|
||||
return flrs
|
||||
|
||||
|
||||
def generateIndexedFLRs(sets, indexer, data):
|
||||
def generateIndexedFLRs(sets, indexer, data, transformation=None):
|
||||
"""
|
||||
Create a season-indexed ordered FLR set from a list of fuzzy sets with recurrence
|
||||
:param sets: fuzzy sets
|
||||
@ -73,7 +73,9 @@ def generateIndexedFLRs(sets, indexer, data):
|
||||
flrs = []
|
||||
index = indexer.get_season_of_data(data)
|
||||
ndata = indexer.get_data(data)
|
||||
for k in np.arange(1,len(data)):
|
||||
if transformation is not None:
|
||||
ndata = transformation.apply(ndata)
|
||||
for k in np.arange(1,len(ndata)):
|
||||
lhs = FuzzySet.getMaxMembershipFuzzySet(ndata[k-1],sets)
|
||||
rhs = FuzzySet.getMaxMembershipFuzzySet(ndata[k], sets)
|
||||
season = index[k]
|
||||
|
@ -73,6 +73,41 @@ class Differential(Transformation):
|
||||
return inc
|
||||
|
||||
|
||||
class Scale(Transformation):
|
||||
def __init__(self, min=0, max=1):
|
||||
super(Scale, self).__init__([min, max])
|
||||
self.data_max = None
|
||||
self.data_min = None
|
||||
self.transf_max = max
|
||||
self.transf_min = min
|
||||
|
||||
def apply(self, data, param=None,**kwargs):
|
||||
if self.data_max is None:
|
||||
self.data_max = np.nanmax(data)
|
||||
self.data_min = np.nanmin(data)
|
||||
data_range = self.data_max - self.data_min
|
||||
transf_range = self.transf_max - self.transf_min
|
||||
if isinstance(data, list):
|
||||
tmp = [(k + (-1 * self.data_min)) / data_range for k in data]
|
||||
tmp2 = [ (k * transf_range) + self.transf_min for k in tmp]
|
||||
else:
|
||||
tmp = (data + (-1 * self.data_min)) / data_range
|
||||
tmp2 = (tmp * transf_range) + self.transf_min
|
||||
|
||||
return tmp2
|
||||
|
||||
def inverse(self, data, param, **kwargs):
|
||||
data_range = self.data_max - self.data_min
|
||||
transf_range = self.transf_max - self.transf_min
|
||||
if isinstance(data, list):
|
||||
tmp2 = [(k - self.transf_min) / transf_range for k in data]
|
||||
tmp = [(k * data_range) + self.data_min for k in tmp2]
|
||||
else:
|
||||
tmp2 = (data - self.transf_min) / transf_range
|
||||
tmp = (tmp2 * data_range) + self.data_min
|
||||
return tmp
|
||||
|
||||
|
||||
class AdaptiveExpectation(Transformation):
|
||||
"""
|
||||
Adaptive Expectation post processing
|
||||
|
@ -82,7 +82,8 @@ class SeasonalEnsembleFTS(ensemble.EnsembleFTS):
|
||||
|
||||
tmp = np.ravel(tmp).tolist()
|
||||
|
||||
dist = ProbabilityDistribution.ProbabilityDistribution("KDE",h=h,uod=[self.original_min, self.original_max], data=tmp)
|
||||
dist = ProbabilityDistribution.ProbabilityDistribution("KDE", uod=[self.original_min, self.original_max], data=tmp,
|
||||
**kwargs)
|
||||
|
||||
ret.append(dist)
|
||||
|
||||
|
@ -13,10 +13,10 @@ class MultiSeasonalFTS(sfts.SeasonalFTS):
|
||||
self.shortname = "MSFTS " + name
|
||||
self.detail = ""
|
||||
self.seasonality = 1
|
||||
self.hasSeasonality = True
|
||||
self.hasPointForecasting = True
|
||||
self.isHighOrder = True
|
||||
self.isMultivariate = True
|
||||
self.has_seasonality = True
|
||||
self.has_point_forecasting = True
|
||||
self.is_high_order = False
|
||||
self.is_multivariate = True
|
||||
self.indexer = indexer
|
||||
self.flrgs = {}
|
||||
|
||||
|
@ -123,8 +123,11 @@ class ProbabilityDistribution(object):
|
||||
ys = [self.distribution[k]/self.count for k in self.bins]
|
||||
else:
|
||||
ys = [self.distribution[k] for k in self.bins]
|
||||
yp = [0 for k in self.data]
|
||||
axis.plot(self.data, yp, c="red")
|
||||
|
||||
axis.plot(self.bins, ys,c=color, label=self.name)
|
||||
|
||||
axis.plot(self.bins, ys, c=color, label=self.name)
|
||||
|
||||
axis.set_xlabel('Universe of Discourse')
|
||||
axis.set_ylabel('Probability')
|
||||
@ -134,5 +137,8 @@ class ProbabilityDistribution(object):
|
||||
body = '|'
|
||||
for k in sorted(self.distribution.keys()):
|
||||
head += str(round(k,2)) + '\t|'
|
||||
body += str(round(self.distribution[k] / self.count,3)) + '\t|'
|
||||
if self.type == "histogram":
|
||||
body += str(round(self.distribution[k] / self.count,3)) + '\t|'
|
||||
else:
|
||||
body += str(round(self.distribution[k], 3)) + '\t|'
|
||||
return head + '\n' + body
|
||||
|
@ -1,3 +1,6 @@
|
||||
from pyFTS.common import Transformations
|
||||
import numpy as np
|
||||
|
||||
"""
|
||||
Kernel Density Estimation
|
||||
"""
|
||||
@ -8,17 +11,21 @@ class KernelSmoothing(object):
|
||||
def __init__(self,h, method="epanechnikov"):
|
||||
self.h = h
|
||||
self.method = method
|
||||
self.transf = Transformations.Scale(min=0,max=1)
|
||||
|
||||
def kernel(self, u):
|
||||
if self.method == "epanechnikov":
|
||||
return (3/4) * (1 - u**2)
|
||||
return (3/4)*(1 - u**2)
|
||||
elif self.method == "gaussian":
|
||||
return 0.5
|
||||
return (1/np.sqrt(2*np.pi))*np.exp(-0.5*u**2)
|
||||
elif self.method == "uniform":
|
||||
return 0.5
|
||||
|
||||
def probability(self, x, data):
|
||||
l = len(data)
|
||||
p = sum([self.kernel((x - k)/self.h) for k in data]) / l*self.h
|
||||
|
||||
ndata = self.transf.apply(data)
|
||||
nx = self.transf.apply(x)
|
||||
p = sum([self.kernel((nx - k)/self.h) for k in ndata]) / l*self.h
|
||||
|
||||
return p
|
@ -62,22 +62,43 @@ DATASETS
|
||||
|
||||
#print(lag)
|
||||
#print(a)
|
||||
|
||||
sonda = pd.read_csv("DataSets/SONDA_BSB_MOD.csv", sep=";")
|
||||
#'''
|
||||
sonda = pd.read_csv("DataSets/SONDA_BSB_15MIN_AVG.csv", sep=";")
|
||||
|
||||
sonda['data'] = pd.to_datetime(sonda['data'])
|
||||
|
||||
sonda = sonda[:][527041:]
|
||||
#sonda = sonda[:][527041:].dropna()
|
||||
|
||||
sonda.index = np.arange(0,len(sonda.index))
|
||||
|
||||
sonda_treino = sonda[:1051200]
|
||||
sonda_teste = sonda[1051901:1051910]
|
||||
sonda_treino = sonda[:105313].dropna()
|
||||
sonda_teste = sonda[105314:].dropna()
|
||||
|
||||
ix_m15 = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.minute],[15],'glo_avg', name='m15')
|
||||
#ix_m15 = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.minute],[15],'glo_avg', name='m15')
|
||||
|
||||
fs1 = Grid.GridPartitioner(sonda_treino,50,transformation=diff, indexer=ix_m15)
|
||||
#fs1 = Grid.GridPartitioner(sonda_treino, 50, transformation=diff, indexer=ix_m15)
|
||||
|
||||
#ix = cUtil.load_obj("models/sonda_ix_Mhm15.pkl")
|
||||
|
||||
#fs = cUtil.load_obj("models/sonda_fs_Entropy40_diff.pkl")
|
||||
|
||||
#from pyFTS.models import msfts
|
||||
|
||||
#obj = msfts.MultiSeasonalFTS("sonda_msfts_Entropy40_Mhm15", indexer=ix)
|
||||
|
||||
#obj.appendTransformation(diff)
|
||||
|
||||
#obj.train(sonda_treino, fs.sets)
|
||||
|
||||
#cUtil.persist_obj(obj, "models/sonda_msfts_Entropy40_Mhm15.pkl")
|
||||
|
||||
ftse = cUtil.load_obj("models/sonda_ensemble_msfts.pkl")
|
||||
|
||||
tmp = ftse.forecastDistribution(sonda_teste[850:860], h=0.5, method="gaussian")
|
||||
|
||||
print(tmp[0])
|
||||
|
||||
#'''
|
||||
|
||||
'''
|
||||
from pyFTS.models.seasonal import SeasonalIndexer
|
||||
@ -223,7 +244,7 @@ bchmk.ahead_sliding_window(sonda, 10000, steps=10, resolution=10, train=0.2, inc
|
||||
dump=True, save=True, file="experiments/sondawind_ahead_analytic_diff.csv",
|
||||
nodes=['192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
|
||||
|
||||
"""
|
||||
|
||||
|
||||
from pyFTS import pwfts
|
||||
from pyFTS.common import Transformations
|
||||
@ -240,7 +261,7 @@ from pyFTS.partitioners import Grid
|
||||
#bchmk.plot_compared_intervals_ahead(best[1600:1700],[model], ['blue','red'],
|
||||
# distributions=[True], save=True, file="pictures/best_ahead_forecasts",
|
||||
# time_from=40, time_to=60, resolution=100)
|
||||
'''
|
||||
|
||||
experiments = [
|
||||
["experiments/taiex_point_synthetic_diff.csv","experiments/taiex_point_analytic_diff.csv",16],
|
||||
["experiments/nasdaq_point_synthetic_diff.csv","experiments/nasdaq_point_analytic_diff.csv", 11],
|
||||
@ -320,7 +341,6 @@ diff = Transformations.Differential(1)
|
||||
fs = Grid.GridPartitioner(sonda[:9000], 10, transformation=diff)
|
||||
|
||||
|
||||
'''
|
||||
tmp = sfts.SeasonalFTS("")
|
||||
tmp.indexer = ix
|
||||
tmp.appendTransformation(diff)
|
||||
|
Loading…
Reference in New Issue
Block a user