- Bugfixes and improvements on ProbabilityDistribution and KDE, msfts

- Scale Transformation
This commit is contained in:
Petrônio Cândido de Lima e Silva 2017-07-05 15:35:22 -03:00
parent 962ef89bcf
commit 011e0ee4ce
8 changed files with 94 additions and 23 deletions

View File

@ -234,7 +234,7 @@ def get_point_statistics(data, model, indexer=None):
"""Condensate all measures for point forecasters""" """Condensate all measures for point forecasters"""
if indexer is not None: if indexer is not None:
ndata = np.array(indexer.get_data(data[model.order:])) ndata = np.array(indexer.get_data(data))
else: else:
ndata = np.array(data[model.order:]) ndata = np.array(data[model.order:])

View File

@ -62,7 +62,7 @@ def generateRecurrentFLRs(fuzzyData):
return flrs return flrs
def generateIndexedFLRs(sets, indexer, data): def generateIndexedFLRs(sets, indexer, data, transformation=None):
""" """
Create a season-indexed ordered FLR set from a list of fuzzy sets with recurrence Create a season-indexed ordered FLR set from a list of fuzzy sets with recurrence
:param sets: fuzzy sets :param sets: fuzzy sets
@ -73,7 +73,9 @@ def generateIndexedFLRs(sets, indexer, data):
flrs = [] flrs = []
index = indexer.get_season_of_data(data) index = indexer.get_season_of_data(data)
ndata = indexer.get_data(data) ndata = indexer.get_data(data)
for k in np.arange(1,len(data)): if transformation is not None:
ndata = transformation.apply(ndata)
for k in np.arange(1,len(ndata)):
lhs = FuzzySet.getMaxMembershipFuzzySet(ndata[k-1],sets) lhs = FuzzySet.getMaxMembershipFuzzySet(ndata[k-1],sets)
rhs = FuzzySet.getMaxMembershipFuzzySet(ndata[k], sets) rhs = FuzzySet.getMaxMembershipFuzzySet(ndata[k], sets)
season = index[k] season = index[k]

View File

@ -73,6 +73,41 @@ class Differential(Transformation):
return inc return inc
class Scale(Transformation):
def __init__(self, min=0, max=1):
super(Scale, self).__init__([min, max])
self.data_max = None
self.data_min = None
self.transf_max = max
self.transf_min = min
def apply(self, data, param=None,**kwargs):
if self.data_max is None:
self.data_max = np.nanmax(data)
self.data_min = np.nanmin(data)
data_range = self.data_max - self.data_min
transf_range = self.transf_max - self.transf_min
if isinstance(data, list):
tmp = [(k + (-1 * self.data_min)) / data_range for k in data]
tmp2 = [ (k * transf_range) + self.transf_min for k in tmp]
else:
tmp = (data + (-1 * self.data_min)) / data_range
tmp2 = (tmp * transf_range) + self.transf_min
return tmp2
def inverse(self, data, param, **kwargs):
data_range = self.data_max - self.data_min
transf_range = self.transf_max - self.transf_min
if isinstance(data, list):
tmp2 = [(k - self.transf_min) / transf_range for k in data]
tmp = [(k * data_range) + self.data_min for k in tmp2]
else:
tmp2 = (data - self.transf_min) / transf_range
tmp = (tmp2 * data_range) + self.data_min
return tmp
class AdaptiveExpectation(Transformation): class AdaptiveExpectation(Transformation):
""" """
Adaptive Expectation post processing Adaptive Expectation post processing

View File

@ -82,7 +82,8 @@ class SeasonalEnsembleFTS(ensemble.EnsembleFTS):
tmp = np.ravel(tmp).tolist() tmp = np.ravel(tmp).tolist()
dist = ProbabilityDistribution.ProbabilityDistribution("KDE",h=h,uod=[self.original_min, self.original_max], data=tmp) dist = ProbabilityDistribution.ProbabilityDistribution("KDE", uod=[self.original_min, self.original_max], data=tmp,
**kwargs)
ret.append(dist) ret.append(dist)

View File

@ -13,10 +13,10 @@ class MultiSeasonalFTS(sfts.SeasonalFTS):
self.shortname = "MSFTS " + name self.shortname = "MSFTS " + name
self.detail = "" self.detail = ""
self.seasonality = 1 self.seasonality = 1
self.hasSeasonality = True self.has_seasonality = True
self.hasPointForecasting = True self.has_point_forecasting = True
self.isHighOrder = True self.is_high_order = False
self.isMultivariate = True self.is_multivariate = True
self.indexer = indexer self.indexer = indexer
self.flrgs = {} self.flrgs = {}

View File

@ -123,6 +123,9 @@ class ProbabilityDistribution(object):
ys = [self.distribution[k]/self.count for k in self.bins] ys = [self.distribution[k]/self.count for k in self.bins]
else: else:
ys = [self.distribution[k] for k in self.bins] ys = [self.distribution[k] for k in self.bins]
yp = [0 for k in self.data]
axis.plot(self.data, yp, c="red")
axis.plot(self.bins, ys, c=color, label=self.name) axis.plot(self.bins, ys, c=color, label=self.name)
@ -134,5 +137,8 @@ class ProbabilityDistribution(object):
body = '|' body = '|'
for k in sorted(self.distribution.keys()): for k in sorted(self.distribution.keys()):
head += str(round(k,2)) + '\t|' head += str(round(k,2)) + '\t|'
if self.type == "histogram":
body += str(round(self.distribution[k] / self.count,3)) + '\t|' body += str(round(self.distribution[k] / self.count,3)) + '\t|'
else:
body += str(round(self.distribution[k], 3)) + '\t|'
return head + '\n' + body return head + '\n' + body

View File

@ -1,3 +1,6 @@
from pyFTS.common import Transformations
import numpy as np
""" """
Kernel Density Estimation Kernel Density Estimation
""" """
@ -8,17 +11,21 @@ class KernelSmoothing(object):
def __init__(self,h, method="epanechnikov"): def __init__(self,h, method="epanechnikov"):
self.h = h self.h = h
self.method = method self.method = method
self.transf = Transformations.Scale(min=0,max=1)
def kernel(self, u): def kernel(self, u):
if self.method == "epanechnikov": if self.method == "epanechnikov":
return (3/4)*(1 - u**2) return (3/4)*(1 - u**2)
elif self.method == "gaussian": elif self.method == "gaussian":
return 0.5 return (1/np.sqrt(2*np.pi))*np.exp(-0.5*u**2)
elif self.method == "uniform": elif self.method == "uniform":
return 0.5 return 0.5
def probability(self, x, data): def probability(self, x, data):
l = len(data) l = len(data)
p = sum([self.kernel((x - k)/self.h) for k in data]) / l*self.h
ndata = self.transf.apply(data)
nx = self.transf.apply(x)
p = sum([self.kernel((nx - k)/self.h) for k in ndata]) / l*self.h
return p return p

View File

@ -62,22 +62,43 @@ DATASETS
#print(lag) #print(lag)
#print(a) #print(a)
#'''
sonda = pd.read_csv("DataSets/SONDA_BSB_MOD.csv", sep=";") sonda = pd.read_csv("DataSets/SONDA_BSB_15MIN_AVG.csv", sep=";")
sonda['data'] = pd.to_datetime(sonda['data']) sonda['data'] = pd.to_datetime(sonda['data'])
sonda = sonda[:][527041:] #sonda = sonda[:][527041:].dropna()
sonda.index = np.arange(0,len(sonda.index)) sonda.index = np.arange(0,len(sonda.index))
sonda_treino = sonda[:1051200] sonda_treino = sonda[:105313].dropna()
sonda_teste = sonda[1051901:1051910] sonda_teste = sonda[105314:].dropna()
ix_m15 = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.minute],[15],'glo_avg', name='m15') #ix_m15 = SeasonalIndexer.DateTimeSeasonalIndexer('data',[SeasonalIndexer.DateTime.minute],[15],'glo_avg', name='m15')
fs1 = Grid.GridPartitioner(sonda_treino,50,transformation=diff, indexer=ix_m15) #fs1 = Grid.GridPartitioner(sonda_treino, 50, transformation=diff, indexer=ix_m15)
#ix = cUtil.load_obj("models/sonda_ix_Mhm15.pkl")
#fs = cUtil.load_obj("models/sonda_fs_Entropy40_diff.pkl")
#from pyFTS.models import msfts
#obj = msfts.MultiSeasonalFTS("sonda_msfts_Entropy40_Mhm15", indexer=ix)
#obj.appendTransformation(diff)
#obj.train(sonda_treino, fs.sets)
#cUtil.persist_obj(obj, "models/sonda_msfts_Entropy40_Mhm15.pkl")
ftse = cUtil.load_obj("models/sonda_ensemble_msfts.pkl")
tmp = ftse.forecastDistribution(sonda_teste[850:860], h=0.5, method="gaussian")
print(tmp[0])
#'''
''' '''
from pyFTS.models.seasonal import SeasonalIndexer from pyFTS.models.seasonal import SeasonalIndexer
@ -223,7 +244,7 @@ bchmk.ahead_sliding_window(sonda, 10000, steps=10, resolution=10, train=0.2, inc
dump=True, save=True, file="experiments/sondawind_ahead_analytic_diff.csv", dump=True, save=True, file="experiments/sondawind_ahead_analytic_diff.csv",
nodes=['192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts]) nodes=['192.168.0.106', '192.168.0.108', '192.168.0.109']) #, depends=[hofts, ifts])
"""
from pyFTS import pwfts from pyFTS import pwfts
from pyFTS.common import Transformations from pyFTS.common import Transformations
@ -240,7 +261,7 @@ from pyFTS.partitioners import Grid
#bchmk.plot_compared_intervals_ahead(best[1600:1700],[model], ['blue','red'], #bchmk.plot_compared_intervals_ahead(best[1600:1700],[model], ['blue','red'],
# distributions=[True], save=True, file="pictures/best_ahead_forecasts", # distributions=[True], save=True, file="pictures/best_ahead_forecasts",
# time_from=40, time_to=60, resolution=100) # time_from=40, time_to=60, resolution=100)
'''
experiments = [ experiments = [
["experiments/taiex_point_synthetic_diff.csv","experiments/taiex_point_analytic_diff.csv",16], ["experiments/taiex_point_synthetic_diff.csv","experiments/taiex_point_analytic_diff.csv",16],
["experiments/nasdaq_point_synthetic_diff.csv","experiments/nasdaq_point_analytic_diff.csv", 11], ["experiments/nasdaq_point_synthetic_diff.csv","experiments/nasdaq_point_analytic_diff.csv", 11],
@ -320,7 +341,6 @@ diff = Transformations.Differential(1)
fs = Grid.GridPartitioner(sonda[:9000], 10, transformation=diff) fs = Grid.GridPartitioner(sonda[:9000], 10, transformation=diff)
'''
tmp = sfts.SeasonalFTS("") tmp = sfts.SeasonalFTS("")
tmp.indexer = ix tmp.indexer = ix
tmp.appendTransformation(diff) tmp.appendTransformation(diff)