Clustered MVFTS improvements
This commit is contained in:
parent
a8c05563d4
commit
8c1fec482d
@ -16,8 +16,8 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
||||
"""The cluster method to be called when a new model is build"""
|
||||
self.cluster_params = kwargs.get('cluster_params', {})
|
||||
"""The cluster method parameters"""
|
||||
self.cluster = None
|
||||
"""The most recent trained clusterer"""
|
||||
self.cluster = kwargs.get('cluster', None)
|
||||
"""The trained clusterer"""
|
||||
|
||||
self.fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS)
|
||||
"""The FTS method to be called when a new model is build"""
|
||||
@ -38,17 +38,16 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
||||
|
||||
def fuzzyfy(self,data):
|
||||
ndata = []
|
||||
for ct in range(1, len(data.index)+1):
|
||||
ix = data.index[ct - 1]
|
||||
data_point = self.format_data(data.loc[ix])
|
||||
for index, row in data.iterrows():
|
||||
data_point = self.format_data(row)
|
||||
ndata.append(common.fuzzyfy_instance_clustered(data_point, self.cluster, self.alpha_cut))
|
||||
|
||||
return ndata
|
||||
|
||||
|
||||
def train(self, data, **kwargs):
|
||||
|
||||
self.cluster = self.cluster_method(data=data, mvfts=self, neighbors=self.knn)
|
||||
if self.cluster is None:
|
||||
self.cluster = self.cluster_method(data=data, mvfts=self, neighbors=self.knn, **self.cluster_params)
|
||||
|
||||
self.model = self.fts_method(partitioner=self.cluster, **self.fts_params)
|
||||
if self.model.is_high_order:
|
||||
@ -59,6 +58,8 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
||||
|
||||
self.model.train(ndata, fuzzyfied=True)
|
||||
|
||||
self.cluster.prune()
|
||||
|
||||
def forecast(self, ndata, **kwargs):
|
||||
|
||||
ndata = self.fuzzyfy(ndata)
|
||||
|
@ -5,6 +5,7 @@ from scipy.spatial import KDTree
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class GridCluster(partitioner.Partitioner):
|
||||
"""
|
||||
A cartesian product of all fuzzy sets of all variables
|
||||
@ -17,8 +18,12 @@ class GridCluster(partitioner.Partitioner):
|
||||
self.sets = {}
|
||||
self.kdtree = None
|
||||
self.index = {}
|
||||
self.build(None)
|
||||
self.neighbors = kwargs.get('neighbors', 2)
|
||||
self.optmize = kwargs.get('optmize', False)
|
||||
if self.optmize:
|
||||
self.count = {}
|
||||
data = kwargs.get('data', [None])
|
||||
self.build(data)
|
||||
|
||||
def build(self, data):
|
||||
|
||||
@ -26,7 +31,6 @@ class GridCluster(partitioner.Partitioner):
|
||||
for k in self.mvfts.explanatory_variables]
|
||||
|
||||
midpoints = []
|
||||
index = {}
|
||||
|
||||
c = 0
|
||||
for k in product(*fsets):
|
||||
@ -44,14 +48,59 @@ class GridCluster(partitioner.Partitioner):
|
||||
self.index[c] = _key
|
||||
c += 1
|
||||
|
||||
import sys
|
||||
sys.setrecursionlimit(100000)
|
||||
|
||||
self.kdtree = KDTree(midpoints)
|
||||
|
||||
sys.setrecursionlimit(1000)
|
||||
|
||||
def prune(self):
|
||||
|
||||
if not self.optmize:
|
||||
return
|
||||
|
||||
for fset in [fs for fs in self.sets.keys()]:
|
||||
if fset not in self.count:
|
||||
fs = self.sets.pop(fset)
|
||||
del (fs)
|
||||
|
||||
|
||||
vars = [k.name for k in self.mvfts.explanatory_variables]
|
||||
|
||||
midpoints = []
|
||||
|
||||
self.index = {}
|
||||
|
||||
for ct, fset in enumerate(self.sets.values()):
|
||||
mp = []
|
||||
for vr in vars:
|
||||
mp.append(fset.sets[vr].centroid)
|
||||
midpoints.append(mp)
|
||||
self.index[ct] = fset.name
|
||||
|
||||
import sys
|
||||
sys.setrecursionlimit(100000)
|
||||
|
||||
self.kdtree = KDTree(midpoints)
|
||||
|
||||
sys.setrecursionlimit(1000)
|
||||
|
||||
|
||||
def knn(self, data):
|
||||
tmp = [data[k.name] for k in self.mvfts.explanatory_variables]
|
||||
tmp, ix = self.kdtree.query(tmp, self.neighbors )
|
||||
tmp = [data[k.name]
|
||||
for k in self.mvfts.explanatory_variables]
|
||||
tmp, ix = self.kdtree.query(tmp, self.neighbors)
|
||||
|
||||
if not isinstance(ix, (list, np.ndarray)):
|
||||
ix = [ix]
|
||||
|
||||
return [self.index[k] for k in ix]
|
||||
if self.optmize:
|
||||
tmp = []
|
||||
for k in ix:
|
||||
tmp.append(self.index[k])
|
||||
self.count[self.index[k]] = 1
|
||||
return tmp
|
||||
else:
|
||||
return [self.index[k] for k in ix]
|
||||
|
||||
|
@ -31,7 +31,8 @@ class MVFTS(fts.FTS):
|
||||
def format_data(self, data):
|
||||
ndata = {}
|
||||
for var in self.explanatory_variables:
|
||||
ndata[var.name] = data[var.data_label]
|
||||
#ndata[var.name] = data[var.data_label]
|
||||
ndata[var.name] = var.partitioner.extractor(data[var.data_label])
|
||||
|
||||
return ndata
|
||||
|
||||
@ -109,9 +110,8 @@ class MVFTS(fts.FTS):
|
||||
def forecast(self, data, **kwargs):
|
||||
ret = []
|
||||
ndata = self.apply_transformations(data)
|
||||
for ix in ndata.index:
|
||||
data_point = ndata.loc[ix]
|
||||
flrs = self.generate_lhs_flrs(data_point)
|
||||
for index, row in ndata.iterrows():
|
||||
flrs = self.generate_lhs_flrs(row)
|
||||
mvs = []
|
||||
mps = []
|
||||
for flr in flrs:
|
||||
@ -120,7 +120,7 @@ class MVFTS(fts.FTS):
|
||||
mvs.append(0.)
|
||||
mps.append(0.)
|
||||
else:
|
||||
mvs.append(self.flrgs[flrg.get_key()].get_membership(self.format_data(data_point), self.explanatory_variables))
|
||||
mvs.append(self.flrgs[flrg.get_key()].get_membership(self.format_data(row), self.explanatory_variables))
|
||||
mps.append(self.flrgs[flrg.get_key()].get_midpoint(self.target_variable.partitioner.sets))
|
||||
|
||||
mv = np.array(mvs)
|
||||
|
@ -3,6 +3,8 @@ import pandas as pd
|
||||
from enum import Enum
|
||||
from pyFTS.common import FuzzySet, Membership
|
||||
from pyFTS.partitioners import partitioner, Grid
|
||||
from datetime import date as dt
|
||||
|
||||
|
||||
|
||||
class DateTime(Enum):
|
||||
@ -94,7 +96,7 @@ class FuzzySet(FuzzySet.FuzzySet):
|
||||
self.type = kwargs.get('type', 'seasonal')
|
||||
|
||||
def transform(self, x):
|
||||
if self.type == 'seasonal':
|
||||
if self.type == 'seasonal' and isinstance(x, (dt, pd.Timestamp)):
|
||||
dp = strip_datepart(x, self.datepart)
|
||||
else:
|
||||
dp = x
|
||||
|
@ -39,6 +39,8 @@ class TimeGridPartitioner(partitioner.Partitioner):
|
||||
else:
|
||||
self.ordered_sets = FS.set_ordered(self.sets)
|
||||
|
||||
self.extractor = lambda x: strip_datepart(x, self.season)
|
||||
|
||||
def build(self, data):
|
||||
sets = {}
|
||||
|
||||
|
@ -30,6 +30,8 @@ class Partitioner(object):
|
||||
"""In a multivariate context, the variable that contains this partitioner"""
|
||||
self.type = kwargs.get('type', 'common')
|
||||
"""The type of fuzzy sets that are generated by this partitioner"""
|
||||
self.extractor = kwargs.get('extractor', lambda x: x)
|
||||
"""Anonymous function used to extract a single primitive type from an object instance"""
|
||||
self.ordered_sets = None
|
||||
|
||||
if kwargs.get('preprocess',True):
|
||||
|
@ -17,37 +17,92 @@ from pyFTS.models.multivariate import common, variable, mvfts, cmvfts
|
||||
from pyFTS.models.seasonal import partitioner as seasonal
|
||||
from pyFTS.models.seasonal.common import DateTime
|
||||
|
||||
|
||||
from pyFTS.data import Malaysia
|
||||
|
||||
dataset = Malaysia.get_dataframe()
|
||||
|
||||
dataset["time"] = pd.to_datetime(dataset["time"], format='%m/%d/%y %I:%M %p')
|
||||
|
||||
|
||||
data = dataset['load'].values
|
||||
|
||||
train_split = 8760
|
||||
|
||||
|
||||
train_mv = dataset.iloc[:train_split]
|
||||
test_mv = dataset.iloc[train_split:]
|
||||
|
||||
sp = {'seasonality': DateTime.month , #'type': 'common',
|
||||
'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul', 'Aug','Sep','Oct','Nov','Dec']}
|
||||
|
||||
vmonth = variable.Variable("Month", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=12,
|
||||
data=train_mv, partitioner_specific=sp)
|
||||
|
||||
sp = {'seasonality': DateTime.day_of_week, #'type': 'common',
|
||||
'names': ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']}
|
||||
|
||||
vday = variable.Variable("Weekday", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=7,
|
||||
data=train_mv, partitioner_specific=sp)
|
||||
|
||||
sp = {'seasonality': DateTime.hour_of_day} #, 'type': 'common'}
|
||||
|
||||
vhour = variable.Variable("Hour", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||
data=train_mv, partitioner_specific=sp)
|
||||
|
||||
vload = variable.Variable("load", data_label="load", partitioner=Grid.GridPartitioner, npart=10,
|
||||
data=train_mv)
|
||||
|
||||
"""
|
||||
model = cmvfts.ClusteredMVFTS(order=2, knn=3, cluster_params={'optmize': True})
|
||||
model.append_variable(vmonthp)
|
||||
model.append_variable(vdayp)
|
||||
model.append_variable(vhourp)
|
||||
model.append_variable(vload)
|
||||
model.target_variable = vload
|
||||
model.fit(train_mv)
|
||||
|
||||
print(len(model.cluster.sets.keys()))
|
||||
|
||||
model.cluster.prune()
|
||||
|
||||
print(len(model.cluster.sets.keys()))
|
||||
|
||||
model.predict(test_mv)
|
||||
"""
|
||||
|
||||
'''
|
||||
from pyFTS.data import Malaysia
|
||||
|
||||
dataset = Malaysia.get_dataframe()
|
||||
|
||||
dataset["date"] = pd.to_datetime(dataset["time"], format='%m/%d/%y %I:%M %p')
|
||||
|
||||
mv_train = dataset.iloc[:100000]
|
||||
train_mv = dataset.iloc[:10000]
|
||||
test_mv = dataset.iloc[10000:]
|
||||
|
||||
sp = {'seasonality': DateTime.month , 'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul', 'Aug','Sep','Oct','Nov','Dec']}
|
||||
|
||||
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=12,
|
||||
data=mv_train, partitioner_specific=sp)
|
||||
data=train_mv, partitioner_specific=sp)
|
||||
|
||||
sp = {'seasonality': DateTime.day_of_week, 'names': ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']}
|
||||
|
||||
vday = variable.Variable("Weekday", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=7,
|
||||
data=mv_train, partitioner_specific=sp)
|
||||
data=train_mv, partitioner_specific=sp)
|
||||
|
||||
sp = {'seasonality': DateTime.hour_of_day}
|
||||
|
||||
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||
data=mv_train, partitioner_specific=sp)
|
||||
data=train_mv, partitioner_specific=sp)
|
||||
|
||||
vload = variable.Variable("load", data_label="load", partitioner=Grid.GridPartitioner, npart=10,
|
||||
data=mv_train)
|
||||
data=train_mv)
|
||||
|
||||
vtemperature = variable.Variable("temperature", data_label="temperature", partitioner=Grid.GridPartitioner, npart=10,
|
||||
data=mv_train)
|
||||
|
||||
data=train_mv)
|
||||
|
||||
"""
|
||||
variables = {
|
||||
'month': vmonth,
|
||||
'day': vday,
|
||||
@ -74,7 +129,7 @@ for k in [itertools.combinations(var_list, r) for r in range(2,len(var_list))]:
|
||||
models.append(model)
|
||||
"""
|
||||
|
||||
#"""
|
||||
"""
|
||||
dataset = pd.read_csv('/home/petronio/Downloads/priceHong')
|
||||
dataset['hour'] = dataset.index.values % 24
|
||||
|
||||
@ -107,27 +162,46 @@ data = [[1, 1.0], [2, 2.0]]
|
||||
df = pd.DataFrame(data, columns=['hour','price'])
|
||||
|
||||
forecasts = model.predict(df, steps_ahead=24, generators={'Hour': lambda x : (x+1)%24 })
|
||||
|
||||
|
||||
"""
|
||||
'''
|
||||
|
||||
params = [
|
||||
{},
|
||||
{},
|
||||
{'order': 2, 'knn': 1},
|
||||
{'order': 2, 'knn': 2},
|
||||
{'order': 2, 'knn': 3}
|
||||
{'order': 2, 'knn': 3, 'cluster_params': {'optmize': True}},
|
||||
{'order': 2, 'knn': 2, 'cluster_params': {'optmize': True}},
|
||||
{'order': 2, 'knn': 1, 'cluster_params': {'optmize': True}}
|
||||
]
|
||||
|
||||
from pyFTS.models.multivariate import grid
|
||||
|
||||
cluster = None
|
||||
|
||||
|
||||
for ct, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS]):
|
||||
|
||||
model = method(**params[ct])
|
||||
model.append_variable(vmonth)
|
||||
model.append_variable(vday)
|
||||
model.append_variable(vhour)
|
||||
model.append_variable(vprice)
|
||||
model.target_variable = vprice
|
||||
model.append_variable(vload)
|
||||
model.target_variable = vload
|
||||
model.fit(train_mv)
|
||||
print(model.shortname, params[ct], Measures.get_point_statistics(test_mv, model))
|
||||
|
||||
if method == cmvfts.ClusteredMVFTS:
|
||||
model.cluster.prune()
|
||||
|
||||
try:
|
||||
|
||||
print(model.shortname, params[ct], Measures.get_point_statistics(test_mv, model))
|
||||
|
||||
except Exception as ex:
|
||||
print(model.shortname, params[ct])
|
||||
print(ex)
|
||||
print("\n\n==============================================\n\n")
|
||||
|
||||
#print(model1)
|
||||
|
||||
#print(model1.predict(test_mv, steps_ahead=24, generators={'Hour': lambda x : (x+1)%24 }))
|
||||
|
||||
'''
|
||||
#'''
|
Loading…
Reference in New Issue
Block a user