Bugfixes and improvements on pwfts and multivariate.granular
This commit is contained in:
parent
35898f338a
commit
f3cf757e96
@ -300,6 +300,9 @@ def crps(targets, densities):
|
|||||||
targets = [targets]
|
targets = [targets]
|
||||||
|
|
||||||
n = len(densities)
|
n = len(densities)
|
||||||
|
if n == 0:
|
||||||
|
return np.nan
|
||||||
|
|
||||||
for ct, df in enumerate(densities):
|
for ct, df in enumerate(densities):
|
||||||
_crps += np.nansum([(df.cumulative(bin) - (1 if bin >= targets[ct] else 0)) ** 2 for bin in df.bins])
|
_crps += np.nansum([(df.cumulative(bin) - (1 if bin >= targets[ct] else 0)) ** 2 for bin in df.bins])
|
||||||
|
|
||||||
|
@ -216,7 +216,7 @@ def plot_distribution2(probabilitydist, data, **kwargs):
|
|||||||
if kwargs.get('median',True):
|
if kwargs.get('median',True):
|
||||||
y = [data[start_at]]
|
y = [data[start_at]]
|
||||||
for pd in probabilitydist:
|
for pd in probabilitydist:
|
||||||
qts = pd.quantile(.5)
|
qts = pd.quantile([.5])
|
||||||
y.append(qts[0])
|
y.append(qts[0])
|
||||||
|
|
||||||
ax.plot(x, y, color='red', label='Median')
|
ax.plot(x, y, color='red', label='Median')
|
||||||
|
@ -139,24 +139,33 @@ def evaluate(dataset, individual, **kwargs):
|
|||||||
|
|
||||||
for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate):
|
for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate):
|
||||||
|
|
||||||
model = phenotype(individual, train, fts_method=fts_method, parameters=parameters)
|
try:
|
||||||
|
|
||||||
forecasts = model.predict(test)
|
model = phenotype(individual, train, fts_method=fts_method, parameters=parameters)
|
||||||
|
|
||||||
rmse = Measures.rmse(test[model.max_lag:], forecasts[:-1])
|
forecasts = model.predict(test)
|
||||||
lengths.append(len(model))
|
|
||||||
|
|
||||||
errors.append(rmse)
|
rmse = Measures.rmse(test[model.max_lag:], forecasts[:-1])
|
||||||
|
lengths.append(len(model))
|
||||||
|
|
||||||
_lags = sum(model.lags) * 100
|
errors.append(rmse)
|
||||||
|
|
||||||
_rmse = np.nanmean(errors)
|
except:
|
||||||
_len = np.nanmean(lengths)
|
lengths.append(np.nan)
|
||||||
|
errors.append(np.nan)
|
||||||
|
|
||||||
f1 = np.nansum([.6 * _rmse, .4 * np.nanstd(errors)])
|
try:
|
||||||
f2 = np.nansum([.4 * _len, .6 * _lags])
|
_lags = sum(model.lags) * 100
|
||||||
|
|
||||||
return {'f1': f1, 'f2': f2, 'rmse': _rmse, 'size': _len }
|
_rmse = np.nanmean(errors)
|
||||||
|
_len = np.nanmean(lengths)
|
||||||
|
|
||||||
|
f1 = np.nansum([.6 * _rmse, .4 * np.nanstd(errors)])
|
||||||
|
f2 = np.nansum([.4 * _len, .6 * _lags])
|
||||||
|
|
||||||
|
return {'f1': f1, 'f2': f2, 'rmse': _rmse, 'size': _len }
|
||||||
|
except:
|
||||||
|
return {'f1': np.inf, 'f2': np.inf, 'rmse': np.inf, 'size': np.inf}
|
||||||
|
|
||||||
|
|
||||||
def tournament(population, objective, **kwargs):
|
def tournament(population, objective, **kwargs):
|
||||||
|
@ -15,8 +15,8 @@ class GranularWMVFTS(cmvfts.ClusteredMVFTS):
|
|||||||
"""The most recent trained model"""
|
"""The most recent trained model"""
|
||||||
self.knn = kwargs.get('knn', 2)
|
self.knn = kwargs.get('knn', 2)
|
||||||
self.order = kwargs.get("order", 2)
|
self.order = kwargs.get("order", 2)
|
||||||
self.shortname = "GranularWMVFTS"
|
self.shortname = "FIG-FTS"
|
||||||
self.name = "Granular Weighted Multivariate FTS"
|
self.name = "Fuzzy Information Granular FTS"
|
||||||
self.mode = kwargs.get('mode','sets')
|
self.mode = kwargs.get('mode','sets')
|
||||||
|
|
||||||
def train(self, data, **kwargs):
|
def train(self, data, **kwargs):
|
||||||
|
@ -165,8 +165,8 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
|||||||
|
|
||||||
def pwflrg_lhs_memberhip_fuzzyfied(self, flrg, sample):
|
def pwflrg_lhs_memberhip_fuzzyfied(self, flrg, sample):
|
||||||
vals = []
|
vals = []
|
||||||
for ct, fuzz in enumerate(sample):
|
for ct in range(len(flrg.LHS)): # fuzz in enumerate(sample):
|
||||||
vals.append([mv for fset, mv in fuzz if fset == flrg.LHS[ct]])
|
vals.append([mv for fset, mv in sample[ct] if fset == flrg.LHS[ct]])
|
||||||
|
|
||||||
return np.nanprod(vals)
|
return np.nanprod(vals)
|
||||||
|
|
||||||
|
@ -217,7 +217,7 @@ class ProbabilityDistribution(object):
|
|||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
k = self.bin_index.find_ge(values)
|
k = self.bin_index.find_ge(values)
|
||||||
return self.cdf[values]
|
return self.cdf[k]
|
||||||
except:
|
except:
|
||||||
return np.nan
|
return np.nan
|
||||||
|
|
||||||
|
@ -19,9 +19,33 @@ from pyFTS.common import Membership
|
|||||||
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
'''
|
||||||
|
def sample_by_hour(data):
|
||||||
|
return [np.nanmean(data[k:k+60]) for k in np.arange(0,len(data),60)]
|
||||||
|
|
||||||
|
def sample_date_by_hour(data):
|
||||||
|
return [data[k] for k in np.arange(0,len(data),60)]
|
||||||
|
|
||||||
|
from pyFTS.data import SONDA
|
||||||
|
|
||||||
|
sonda = SONDA.get_dataframe()[['datahora','glo_avg','ws_10m']]
|
||||||
|
|
||||||
|
sonda = sonda.drop(sonda.index[np.where(sonda["ws_10m"] <= 0.01)])
|
||||||
|
sonda = sonda.drop(sonda.index[np.where(sonda["glo_avg"] <= 0.01)])
|
||||||
|
sonda = sonda.dropna()
|
||||||
|
sonda['datahora'] = pd.to_datetime(sonda["datahora"], format='%Y-%m-%d %H:%M:%S')
|
||||||
|
|
||||||
|
|
||||||
from pyFTS.data import SONDA, Malaysia
|
var = {
|
||||||
|
'datahora': sample_date_by_hour(sonda['datahora'].values),
|
||||||
|
'glo_avg': sample_by_hour(sonda['glo_avg'].values),
|
||||||
|
'ws_10m': sample_by_hour(sonda['ws_10m'].values)
|
||||||
|
}
|
||||||
|
|
||||||
|
df = pd.DataFrame(var)
|
||||||
|
'''
|
||||||
|
|
||||||
|
from pyFTS.data import Malaysia
|
||||||
|
|
||||||
df = Malaysia.get_dataframe()
|
df = Malaysia.get_dataframe()
|
||||||
df['time'] = pd.to_datetime(df["time"], format='%m/%d/%y %I:%M %p')
|
df['time'] = pd.to_datetime(df["time"], format='%m/%d/%y %I:%M %p')
|
||||||
@ -39,11 +63,10 @@ variables = {
|
|||||||
alpha_cut=.25)
|
alpha_cut=.25)
|
||||||
}
|
}
|
||||||
|
|
||||||
methods = [mvfts.MVFTS, wmvfts.WeightedMVFTS, granular.GranularWMVFTS]
|
|
||||||
#methods = [granular.GranularWMVFTS]
|
methods = [granular.GranularWMVFTS]
|
||||||
|
|
||||||
parameters = [
|
parameters = [
|
||||||
{},{},
|
|
||||||
dict(fts_method=pwfts.ProbabilisticWeightedFTS, fuzzyfy_mode='both',
|
dict(fts_method=pwfts.ProbabilisticWeightedFTS, fuzzyfy_mode='both',
|
||||||
order=1, knn=1)
|
order=1, knn=1)
|
||||||
]
|
]
|
||||||
@ -52,16 +75,16 @@ bchmk.multivariate_sliding_window_benchmarks2(df, 10000, train=0.9, inc=0.25,
|
|||||||
methods=methods,
|
methods=methods,
|
||||||
methods_parameters=parameters,
|
methods_parameters=parameters,
|
||||||
variables=variables,
|
variables=variables,
|
||||||
target_variable='Load',
|
target_variable='Temperature',
|
||||||
type='interval',
|
type='distribution',
|
||||||
steps_ahead=[1],
|
steps_ahead=[1],
|
||||||
distributed=False,
|
file="experiments.db", dataset='Malaysia.temperature',
|
||||||
nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],
|
|
||||||
file="experiments.db", dataset='Malaysia',
|
|
||||||
tag="experiments"
|
tag="experiments"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
'''
|
'''
|
||||||
from pyFTS.data import lorentz
|
from pyFTS.data import lorentz
|
||||||
df = lorentz.get_dataframe(iterations=5000)
|
df = lorentz.get_dataframe(iterations=5000)
|
||||||
|
@ -19,63 +19,18 @@ from pyFTS.models.seasonal import partitioner as seasonal
|
|||||||
from pyFTS.models.seasonal.common import DateTime
|
from pyFTS.models.seasonal.common import DateTime
|
||||||
from pyFTS.common import Membership
|
from pyFTS.common import Membership
|
||||||
|
|
||||||
def sample_by_hour(data):
|
|
||||||
return [np.nanmean(data[k:k+60]) for k in np.arange(0,len(data),60)]
|
|
||||||
|
|
||||||
def sample_date_by_hour(data):
|
|
||||||
return [data[k] for k in np.arange(0,len(data),60)]
|
|
||||||
|
|
||||||
from pyFTS.data import SONDA
|
from pyFTS.data import SONDA
|
||||||
|
|
||||||
sonda = SONDA.get_dataframe()[['datahora','glo_avg','ws_10m']]
|
data = [k for k in SONDA.get_data('ws_10m') if k > 0.1 and k != np.nan and k is not None]
|
||||||
|
data = [np.nanmean(data[k:k+60]) for k in np.arange(0,len(data),60)]
|
||||||
|
|
||||||
sonda = sonda.drop(sonda.index[np.where(sonda["ws_10m"] <= 0.01)])
|
train = data[:9000]
|
||||||
sonda = sonda.drop(sonda.index[np.where(sonda["glo_avg"] <= 0.01)])
|
test = data[9000:10000]
|
||||||
sonda = sonda.dropna()
|
|
||||||
sonda['datahora'] = pd.to_datetime(sonda["datahora"], format='%Y-%m-%d %H:%M:%S')
|
|
||||||
|
|
||||||
|
fs = Grid.GridPartitioner(data=train, npart=95)
|
||||||
|
|
||||||
var = {
|
model = pwfts.ProbabilisticWeightedFTS(partitioner=fs, order=3)
|
||||||
'datahora': sample_date_by_hour(sonda['datahora'].values),
|
|
||||||
'glo_avg': sample_by_hour(sonda['glo_avg'].values),
|
|
||||||
'ws_10m': sample_by_hour(sonda['ws_10m'].values),
|
|
||||||
}
|
|
||||||
|
|
||||||
df = pd.DataFrame(var)
|
model.fit(train)
|
||||||
|
|
||||||
train_mv = df.iloc[:9000]
|
|
||||||
test_mv = df.iloc[9000:10000]
|
|
||||||
|
|
||||||
fig, ax = plt.subplots(nrows=2, ncols=1, figsize=[10,3])
|
|
||||||
|
|
||||||
sp = {'seasonality': DateTime.month, 'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']}
|
|
||||||
|
|
||||||
vmonth = variable.Variable("Month", data_label="datahora", partitioner=seasonal.TimeGridPartitioner, npart=12,
|
|
||||||
data=train_mv, partitioner_specific=sp, alpha_cut=.3)
|
|
||||||
|
|
||||||
vmonth.partitioner.plot(ax[0])
|
|
||||||
|
|
||||||
vwin = variable.Variable("Wind", data_label="ws_10m", alias='wind',
|
|
||||||
partitioner=Grid.GridPartitioner, npart=15, func=Membership.gaussmf,
|
|
||||||
data=train_mv, alpha_cut=.25)
|
|
||||||
|
|
||||||
vwin.partitioner.plot(ax[1])
|
|
||||||
|
|
||||||
plt.tight_layout()
|
|
||||||
|
|
||||||
order = 3
|
|
||||||
knn = 2
|
|
||||||
|
|
||||||
model = granular.GranularWMVFTS(explanatory_variables=[vmonth, vwin], target_variable=vwin,
|
|
||||||
fts_method=pwfts.ProbabilisticWeightedFTS, fuzzyfy_mode='both',
|
|
||||||
order=order, knn=knn)
|
|
||||||
|
|
||||||
model.fit(train_mv)
|
|
||||||
|
|
||||||
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15,3])
|
|
||||||
ax.plot(test_mv['ws_10m'].values[:100], label='original')
|
|
||||||
|
|
||||||
forecasts = model.predict(test_mv.iloc[:100], type='distribution')
|
|
||||||
|
|
||||||
Util.plot_distribution2(forecasts, test_mv['ws_10m'].values[:100], start_at=model.order-1, ax=ax)
|
|
||||||
|
|
||||||
|
model.predict(test)
|
Loading…
Reference in New Issue
Block a user