Clustered MVFTS improvements
This commit is contained in:
parent
a8c05563d4
commit
8c1fec482d
@ -16,8 +16,8 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
|||||||
"""The cluster method to be called when a new model is build"""
|
"""The cluster method to be called when a new model is build"""
|
||||||
self.cluster_params = kwargs.get('cluster_params', {})
|
self.cluster_params = kwargs.get('cluster_params', {})
|
||||||
"""The cluster method parameters"""
|
"""The cluster method parameters"""
|
||||||
self.cluster = None
|
self.cluster = kwargs.get('cluster', None)
|
||||||
"""The most recent trained clusterer"""
|
"""The trained clusterer"""
|
||||||
|
|
||||||
self.fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS)
|
self.fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS)
|
||||||
"""The FTS method to be called when a new model is build"""
|
"""The FTS method to be called when a new model is build"""
|
||||||
@ -38,17 +38,16 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
|||||||
|
|
||||||
def fuzzyfy(self,data):
|
def fuzzyfy(self,data):
|
||||||
ndata = []
|
ndata = []
|
||||||
for ct in range(1, len(data.index)+1):
|
for index, row in data.iterrows():
|
||||||
ix = data.index[ct - 1]
|
data_point = self.format_data(row)
|
||||||
data_point = self.format_data(data.loc[ix])
|
|
||||||
ndata.append(common.fuzzyfy_instance_clustered(data_point, self.cluster, self.alpha_cut))
|
ndata.append(common.fuzzyfy_instance_clustered(data_point, self.cluster, self.alpha_cut))
|
||||||
|
|
||||||
return ndata
|
return ndata
|
||||||
|
|
||||||
|
|
||||||
def train(self, data, **kwargs):
|
def train(self, data, **kwargs):
|
||||||
|
|
||||||
self.cluster = self.cluster_method(data=data, mvfts=self, neighbors=self.knn)
|
if self.cluster is None:
|
||||||
|
self.cluster = self.cluster_method(data=data, mvfts=self, neighbors=self.knn, **self.cluster_params)
|
||||||
|
|
||||||
self.model = self.fts_method(partitioner=self.cluster, **self.fts_params)
|
self.model = self.fts_method(partitioner=self.cluster, **self.fts_params)
|
||||||
if self.model.is_high_order:
|
if self.model.is_high_order:
|
||||||
@ -59,6 +58,8 @@ class ClusteredMVFTS(mvfts.MVFTS):
|
|||||||
|
|
||||||
self.model.train(ndata, fuzzyfied=True)
|
self.model.train(ndata, fuzzyfied=True)
|
||||||
|
|
||||||
|
self.cluster.prune()
|
||||||
|
|
||||||
def forecast(self, ndata, **kwargs):
|
def forecast(self, ndata, **kwargs):
|
||||||
|
|
||||||
ndata = self.fuzzyfy(ndata)
|
ndata = self.fuzzyfy(ndata)
|
||||||
|
@ -5,6 +5,7 @@ from scipy.spatial import KDTree
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
class GridCluster(partitioner.Partitioner):
|
class GridCluster(partitioner.Partitioner):
|
||||||
"""
|
"""
|
||||||
A cartesian product of all fuzzy sets of all variables
|
A cartesian product of all fuzzy sets of all variables
|
||||||
@ -17,8 +18,12 @@ class GridCluster(partitioner.Partitioner):
|
|||||||
self.sets = {}
|
self.sets = {}
|
||||||
self.kdtree = None
|
self.kdtree = None
|
||||||
self.index = {}
|
self.index = {}
|
||||||
self.build(None)
|
|
||||||
self.neighbors = kwargs.get('neighbors', 2)
|
self.neighbors = kwargs.get('neighbors', 2)
|
||||||
|
self.optmize = kwargs.get('optmize', False)
|
||||||
|
if self.optmize:
|
||||||
|
self.count = {}
|
||||||
|
data = kwargs.get('data', [None])
|
||||||
|
self.build(data)
|
||||||
|
|
||||||
def build(self, data):
|
def build(self, data):
|
||||||
|
|
||||||
@ -26,7 +31,6 @@ class GridCluster(partitioner.Partitioner):
|
|||||||
for k in self.mvfts.explanatory_variables]
|
for k in self.mvfts.explanatory_variables]
|
||||||
|
|
||||||
midpoints = []
|
midpoints = []
|
||||||
index = {}
|
|
||||||
|
|
||||||
c = 0
|
c = 0
|
||||||
for k in product(*fsets):
|
for k in product(*fsets):
|
||||||
@ -44,14 +48,59 @@ class GridCluster(partitioner.Partitioner):
|
|||||||
self.index[c] = _key
|
self.index[c] = _key
|
||||||
c += 1
|
c += 1
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.setrecursionlimit(100000)
|
||||||
|
|
||||||
self.kdtree = KDTree(midpoints)
|
self.kdtree = KDTree(midpoints)
|
||||||
|
|
||||||
|
sys.setrecursionlimit(1000)
|
||||||
|
|
||||||
|
def prune(self):
|
||||||
|
|
||||||
|
if not self.optmize:
|
||||||
|
return
|
||||||
|
|
||||||
|
for fset in [fs for fs in self.sets.keys()]:
|
||||||
|
if fset not in self.count:
|
||||||
|
fs = self.sets.pop(fset)
|
||||||
|
del (fs)
|
||||||
|
|
||||||
|
|
||||||
|
vars = [k.name for k in self.mvfts.explanatory_variables]
|
||||||
|
|
||||||
|
midpoints = []
|
||||||
|
|
||||||
|
self.index = {}
|
||||||
|
|
||||||
|
for ct, fset in enumerate(self.sets.values()):
|
||||||
|
mp = []
|
||||||
|
for vr in vars:
|
||||||
|
mp.append(fset.sets[vr].centroid)
|
||||||
|
midpoints.append(mp)
|
||||||
|
self.index[ct] = fset.name
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.setrecursionlimit(100000)
|
||||||
|
|
||||||
|
self.kdtree = KDTree(midpoints)
|
||||||
|
|
||||||
|
sys.setrecursionlimit(1000)
|
||||||
|
|
||||||
|
|
||||||
def knn(self, data):
|
def knn(self, data):
|
||||||
tmp = [data[k.name] for k in self.mvfts.explanatory_variables]
|
tmp = [data[k.name]
|
||||||
tmp, ix = self.kdtree.query(tmp, self.neighbors )
|
for k in self.mvfts.explanatory_variables]
|
||||||
|
tmp, ix = self.kdtree.query(tmp, self.neighbors)
|
||||||
|
|
||||||
if not isinstance(ix, (list, np.ndarray)):
|
if not isinstance(ix, (list, np.ndarray)):
|
||||||
ix = [ix]
|
ix = [ix]
|
||||||
|
|
||||||
|
if self.optmize:
|
||||||
|
tmp = []
|
||||||
|
for k in ix:
|
||||||
|
tmp.append(self.index[k])
|
||||||
|
self.count[self.index[k]] = 1
|
||||||
|
return tmp
|
||||||
|
else:
|
||||||
return [self.index[k] for k in ix]
|
return [self.index[k] for k in ix]
|
||||||
|
|
||||||
|
@ -31,7 +31,8 @@ class MVFTS(fts.FTS):
|
|||||||
def format_data(self, data):
|
def format_data(self, data):
|
||||||
ndata = {}
|
ndata = {}
|
||||||
for var in self.explanatory_variables:
|
for var in self.explanatory_variables:
|
||||||
ndata[var.name] = data[var.data_label]
|
#ndata[var.name] = data[var.data_label]
|
||||||
|
ndata[var.name] = var.partitioner.extractor(data[var.data_label])
|
||||||
|
|
||||||
return ndata
|
return ndata
|
||||||
|
|
||||||
@ -109,9 +110,8 @@ class MVFTS(fts.FTS):
|
|||||||
def forecast(self, data, **kwargs):
|
def forecast(self, data, **kwargs):
|
||||||
ret = []
|
ret = []
|
||||||
ndata = self.apply_transformations(data)
|
ndata = self.apply_transformations(data)
|
||||||
for ix in ndata.index:
|
for index, row in ndata.iterrows():
|
||||||
data_point = ndata.loc[ix]
|
flrs = self.generate_lhs_flrs(row)
|
||||||
flrs = self.generate_lhs_flrs(data_point)
|
|
||||||
mvs = []
|
mvs = []
|
||||||
mps = []
|
mps = []
|
||||||
for flr in flrs:
|
for flr in flrs:
|
||||||
@ -120,7 +120,7 @@ class MVFTS(fts.FTS):
|
|||||||
mvs.append(0.)
|
mvs.append(0.)
|
||||||
mps.append(0.)
|
mps.append(0.)
|
||||||
else:
|
else:
|
||||||
mvs.append(self.flrgs[flrg.get_key()].get_membership(self.format_data(data_point), self.explanatory_variables))
|
mvs.append(self.flrgs[flrg.get_key()].get_membership(self.format_data(row), self.explanatory_variables))
|
||||||
mps.append(self.flrgs[flrg.get_key()].get_midpoint(self.target_variable.partitioner.sets))
|
mps.append(self.flrgs[flrg.get_key()].get_midpoint(self.target_variable.partitioner.sets))
|
||||||
|
|
||||||
mv = np.array(mvs)
|
mv = np.array(mvs)
|
||||||
|
@ -3,6 +3,8 @@ import pandas as pd
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
from pyFTS.common import FuzzySet, Membership
|
from pyFTS.common import FuzzySet, Membership
|
||||||
from pyFTS.partitioners import partitioner, Grid
|
from pyFTS.partitioners import partitioner, Grid
|
||||||
|
from datetime import date as dt
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class DateTime(Enum):
|
class DateTime(Enum):
|
||||||
@ -94,7 +96,7 @@ class FuzzySet(FuzzySet.FuzzySet):
|
|||||||
self.type = kwargs.get('type', 'seasonal')
|
self.type = kwargs.get('type', 'seasonal')
|
||||||
|
|
||||||
def transform(self, x):
|
def transform(self, x):
|
||||||
if self.type == 'seasonal':
|
if self.type == 'seasonal' and isinstance(x, (dt, pd.Timestamp)):
|
||||||
dp = strip_datepart(x, self.datepart)
|
dp = strip_datepart(x, self.datepart)
|
||||||
else:
|
else:
|
||||||
dp = x
|
dp = x
|
||||||
|
@ -39,6 +39,8 @@ class TimeGridPartitioner(partitioner.Partitioner):
|
|||||||
else:
|
else:
|
||||||
self.ordered_sets = FS.set_ordered(self.sets)
|
self.ordered_sets = FS.set_ordered(self.sets)
|
||||||
|
|
||||||
|
self.extractor = lambda x: strip_datepart(x, self.season)
|
||||||
|
|
||||||
def build(self, data):
|
def build(self, data):
|
||||||
sets = {}
|
sets = {}
|
||||||
|
|
||||||
|
@ -30,6 +30,8 @@ class Partitioner(object):
|
|||||||
"""In a multivariate context, the variable that contains this partitioner"""
|
"""In a multivariate context, the variable that contains this partitioner"""
|
||||||
self.type = kwargs.get('type', 'common')
|
self.type = kwargs.get('type', 'common')
|
||||||
"""The type of fuzzy sets that are generated by this partitioner"""
|
"""The type of fuzzy sets that are generated by this partitioner"""
|
||||||
|
self.extractor = kwargs.get('extractor', lambda x: x)
|
||||||
|
"""Anonymous function used to extract a single primitive type from an object instance"""
|
||||||
self.ordered_sets = None
|
self.ordered_sets = None
|
||||||
|
|
||||||
if kwargs.get('preprocess',True):
|
if kwargs.get('preprocess',True):
|
||||||
|
@ -17,37 +17,92 @@ from pyFTS.models.multivariate import common, variable, mvfts, cmvfts
|
|||||||
from pyFTS.models.seasonal import partitioner as seasonal
|
from pyFTS.models.seasonal import partitioner as seasonal
|
||||||
from pyFTS.models.seasonal.common import DateTime
|
from pyFTS.models.seasonal.common import DateTime
|
||||||
|
|
||||||
|
|
||||||
|
from pyFTS.data import Malaysia
|
||||||
|
|
||||||
|
dataset = Malaysia.get_dataframe()
|
||||||
|
|
||||||
|
dataset["time"] = pd.to_datetime(dataset["time"], format='%m/%d/%y %I:%M %p')
|
||||||
|
|
||||||
|
|
||||||
|
data = dataset['load'].values
|
||||||
|
|
||||||
|
train_split = 8760
|
||||||
|
|
||||||
|
|
||||||
|
train_mv = dataset.iloc[:train_split]
|
||||||
|
test_mv = dataset.iloc[train_split:]
|
||||||
|
|
||||||
|
sp = {'seasonality': DateTime.month , #'type': 'common',
|
||||||
|
'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul', 'Aug','Sep','Oct','Nov','Dec']}
|
||||||
|
|
||||||
|
vmonth = variable.Variable("Month", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=12,
|
||||||
|
data=train_mv, partitioner_specific=sp)
|
||||||
|
|
||||||
|
sp = {'seasonality': DateTime.day_of_week, #'type': 'common',
|
||||||
|
'names': ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']}
|
||||||
|
|
||||||
|
vday = variable.Variable("Weekday", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=7,
|
||||||
|
data=train_mv, partitioner_specific=sp)
|
||||||
|
|
||||||
|
sp = {'seasonality': DateTime.hour_of_day} #, 'type': 'common'}
|
||||||
|
|
||||||
|
vhour = variable.Variable("Hour", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||||
|
data=train_mv, partitioner_specific=sp)
|
||||||
|
|
||||||
|
vload = variable.Variable("load", data_label="load", partitioner=Grid.GridPartitioner, npart=10,
|
||||||
|
data=train_mv)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
model = cmvfts.ClusteredMVFTS(order=2, knn=3, cluster_params={'optmize': True})
|
||||||
|
model.append_variable(vmonthp)
|
||||||
|
model.append_variable(vdayp)
|
||||||
|
model.append_variable(vhourp)
|
||||||
|
model.append_variable(vload)
|
||||||
|
model.target_variable = vload
|
||||||
|
model.fit(train_mv)
|
||||||
|
|
||||||
|
print(len(model.cluster.sets.keys()))
|
||||||
|
|
||||||
|
model.cluster.prune()
|
||||||
|
|
||||||
|
print(len(model.cluster.sets.keys()))
|
||||||
|
|
||||||
|
model.predict(test_mv)
|
||||||
|
"""
|
||||||
|
|
||||||
|
'''
|
||||||
from pyFTS.data import Malaysia
|
from pyFTS.data import Malaysia
|
||||||
|
|
||||||
dataset = Malaysia.get_dataframe()
|
dataset = Malaysia.get_dataframe()
|
||||||
|
|
||||||
dataset["date"] = pd.to_datetime(dataset["time"], format='%m/%d/%y %I:%M %p')
|
dataset["date"] = pd.to_datetime(dataset["time"], format='%m/%d/%y %I:%M %p')
|
||||||
|
|
||||||
mv_train = dataset.iloc[:100000]
|
train_mv = dataset.iloc[:10000]
|
||||||
|
test_mv = dataset.iloc[10000:]
|
||||||
|
|
||||||
sp = {'seasonality': DateTime.month , 'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul', 'Aug','Sep','Oct','Nov','Dec']}
|
sp = {'seasonality': DateTime.month , 'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul', 'Aug','Sep','Oct','Nov','Dec']}
|
||||||
|
|
||||||
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=12,
|
vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=12,
|
||||||
data=mv_train, partitioner_specific=sp)
|
data=train_mv, partitioner_specific=sp)
|
||||||
|
|
||||||
sp = {'seasonality': DateTime.day_of_week, 'names': ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']}
|
sp = {'seasonality': DateTime.day_of_week, 'names': ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']}
|
||||||
|
|
||||||
vday = variable.Variable("Weekday", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=7,
|
vday = variable.Variable("Weekday", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=7,
|
||||||
data=mv_train, partitioner_specific=sp)
|
data=train_mv, partitioner_specific=sp)
|
||||||
|
|
||||||
sp = {'seasonality': DateTime.hour_of_day}
|
sp = {'seasonality': DateTime.hour_of_day}
|
||||||
|
|
||||||
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||||
data=mv_train, partitioner_specific=sp)
|
data=train_mv, partitioner_specific=sp)
|
||||||
|
|
||||||
vload = variable.Variable("load", data_label="load", partitioner=Grid.GridPartitioner, npart=10,
|
vload = variable.Variable("load", data_label="load", partitioner=Grid.GridPartitioner, npart=10,
|
||||||
data=mv_train)
|
data=train_mv)
|
||||||
|
|
||||||
vtemperature = variable.Variable("temperature", data_label="temperature", partitioner=Grid.GridPartitioner, npart=10,
|
vtemperature = variable.Variable("temperature", data_label="temperature", partitioner=Grid.GridPartitioner, npart=10,
|
||||||
data=mv_train)
|
data=train_mv)
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
variables = {
|
variables = {
|
||||||
'month': vmonth,
|
'month': vmonth,
|
||||||
'day': vday,
|
'day': vday,
|
||||||
@ -74,7 +129,7 @@ for k in [itertools.combinations(var_list, r) for r in range(2,len(var_list))]:
|
|||||||
models.append(model)
|
models.append(model)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
#"""
|
"""
|
||||||
dataset = pd.read_csv('/home/petronio/Downloads/priceHong')
|
dataset = pd.read_csv('/home/petronio/Downloads/priceHong')
|
||||||
dataset['hour'] = dataset.index.values % 24
|
dataset['hour'] = dataset.index.values % 24
|
||||||
|
|
||||||
@ -107,27 +162,46 @@ data = [[1, 1.0], [2, 2.0]]
|
|||||||
df = pd.DataFrame(data, columns=['hour','price'])
|
df = pd.DataFrame(data, columns=['hour','price'])
|
||||||
|
|
||||||
forecasts = model.predict(df, steps_ahead=24, generators={'Hour': lambda x : (x+1)%24 })
|
forecasts = model.predict(df, steps_ahead=24, generators={'Hour': lambda x : (x+1)%24 })
|
||||||
|
"""
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
|
||||||
params = [
|
params = [
|
||||||
{},
|
{},
|
||||||
{},
|
{},
|
||||||
{'order': 2, 'knn': 1},
|
{'order': 2, 'knn': 3, 'cluster_params': {'optmize': True}},
|
||||||
{'order': 2, 'knn': 2},
|
{'order': 2, 'knn': 2, 'cluster_params': {'optmize': True}},
|
||||||
{'order': 2, 'knn': 3}
|
{'order': 2, 'knn': 1, 'cluster_params': {'optmize': True}}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
from pyFTS.models.multivariate import grid
|
||||||
|
|
||||||
|
cluster = None
|
||||||
|
|
||||||
|
|
||||||
for ct, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS]):
|
for ct, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS]):
|
||||||
|
|
||||||
model = method(**params[ct])
|
model = method(**params[ct])
|
||||||
|
model.append_variable(vmonth)
|
||||||
|
model.append_variable(vday)
|
||||||
model.append_variable(vhour)
|
model.append_variable(vhour)
|
||||||
model.append_variable(vprice)
|
model.append_variable(vload)
|
||||||
model.target_variable = vprice
|
model.target_variable = vload
|
||||||
model.fit(train_mv)
|
model.fit(train_mv)
|
||||||
|
|
||||||
|
if method == cmvfts.ClusteredMVFTS:
|
||||||
|
model.cluster.prune()
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
print(model.shortname, params[ct], Measures.get_point_statistics(test_mv, model))
|
print(model.shortname, params[ct], Measures.get_point_statistics(test_mv, model))
|
||||||
|
|
||||||
|
except Exception as ex:
|
||||||
|
print(model.shortname, params[ct])
|
||||||
|
print(ex)
|
||||||
|
print("\n\n==============================================\n\n")
|
||||||
|
|
||||||
#print(model1)
|
#print(model1)
|
||||||
|
|
||||||
#print(model1.predict(test_mv, steps_ahead=24, generators={'Hour': lambda x : (x+1)%24 }))
|
#print(model1.predict(test_mv, steps_ahead=24, generators={'Hour': lambda x : (x+1)%24 }))
|
||||||
|
|
||||||
'''
|
#'''
|
Loading…
Reference in New Issue
Block a user