diff --git a/pyFTS/models/seasonal/partitioner.py b/pyFTS/models/seasonal/partitioner.py index a20d2f2..b894909 100644 --- a/pyFTS/models/seasonal/partitioner.py +++ b/pyFTS/models/seasonal/partitioner.py @@ -39,7 +39,8 @@ class TimeGridPartitioner(partitioner.Partitioner): else: self.ordered_sets = FS.set_ordered(self.sets) - self.extractor = lambda x: strip_datepart(x, self.season) + if self.type == 'seasonal': + self.extractor = lambda x: strip_datepart(x, self.season) def build(self, data): sets = {} diff --git a/pyFTS/tests/multivariate.py b/pyFTS/tests/multivariate.py index 5123ff8..1f73595 100644 --- a/pyFTS/tests/multivariate.py +++ b/pyFTS/tests/multivariate.py @@ -17,119 +17,16 @@ from pyFTS.models.multivariate import common, variable, mvfts, cmvfts from pyFTS.models.seasonal import partitioner as seasonal from pyFTS.models.seasonal.common import DateTime +# Multivariate time series -from pyFTS.data import Malaysia +train_mv = {} +test_mv = {} -dataset = Malaysia.get_dataframe() +models = {} -dataset["time"] = pd.to_datetime(dataset["time"], format='%m/%d/%y %I:%M %p') +for key in ['price', 'solar', 'load']: + models[key] = [] - -data = dataset['load'].values - -train_split = 8760 - - -train_mv = dataset.iloc[:train_split] -test_mv = dataset.iloc[train_split:] - -sp = {'seasonality': DateTime.month , #'type': 'common', - 'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul', 'Aug','Sep','Oct','Nov','Dec']} - -vmonth = variable.Variable("Month", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=12, - data=train_mv, partitioner_specific=sp) - -sp = {'seasonality': DateTime.day_of_week, #'type': 'common', - 'names': ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']} - -vday = variable.Variable("Weekday", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=7, - data=train_mv, partitioner_specific=sp) - -sp = {'seasonality': DateTime.hour_of_day} #, 'type': 'common'} - -vhour = variable.Variable("Hour", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24, - data=train_mv, partitioner_specific=sp) - -vload = variable.Variable("load", data_label="load", partitioner=Grid.GridPartitioner, npart=10, - data=train_mv) - -""" -model = cmvfts.ClusteredMVFTS(order=2, knn=3, cluster_params={'optmize': True}) -model.append_variable(vmonthp) -model.append_variable(vdayp) -model.append_variable(vhourp) -model.append_variable(vload) -model.target_variable = vload -model.fit(train_mv) - -print(len(model.cluster.sets.keys())) - -model.cluster.prune() - -print(len(model.cluster.sets.keys())) - -model.predict(test_mv) -""" - -''' -from pyFTS.data import Malaysia - -dataset = Malaysia.get_dataframe() - -dataset["date"] = pd.to_datetime(dataset["time"], format='%m/%d/%y %I:%M %p') - -train_mv = dataset.iloc[:10000] -test_mv = dataset.iloc[10000:] - -sp = {'seasonality': DateTime.month , 'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul', 'Aug','Sep','Oct','Nov','Dec']} - -vmonth = variable.Variable("Month", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=12, - data=train_mv, partitioner_specific=sp) - -sp = {'seasonality': DateTime.day_of_week, 'names': ['Mon','Tue','Wed','Thu','Fri','Sat','Sun']} - -vday = variable.Variable("Weekday", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=7, - data=train_mv, partitioner_specific=sp) - -sp = {'seasonality': DateTime.hour_of_day} - -vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24, - data=train_mv, partitioner_specific=sp) - -vload = variable.Variable("load", data_label="load", partitioner=Grid.GridPartitioner, npart=10, - data=train_mv) - -vtemperature = variable.Variable("temperature", data_label="temperature", partitioner=Grid.GridPartitioner, npart=10, - data=train_mv) - -""" -variables = { - 'month': vmonth, - 'day': vday, - 'hour': vhour, - 'temperature': vtemperature, - 'load': vload -} - -var_list = [k for k in variables.keys()] - -models = [] - -import itertools - -for k in [itertools.combinations(var_list, r) for r in range(2,len(var_list))]: - for x in k: - model = mvfts.MVFTS() - print(x) - for w in x: - model.append_variable(variables[w]) - model.shortname += ' ' + w - model.target_variable = vload - model.fit(mv_train) - models.append(model) -""" - -""" dataset = pd.read_csv('/home/petronio/Downloads/priceHong') dataset['hour'] = dataset.index.values % 24 @@ -137,71 +34,100 @@ data = dataset['price'].values.flatten() train_split = 24 * 800 -# Multivariate time series -train_mv = dataset.iloc[:train_split] -test_mv = dataset.iloc[train_split:] +train_mv['price'] = dataset.iloc[:train_split] +test_mv['price'] = dataset.iloc[train_split:] -#model = Util.load_obj('/home/petronio/Downloads/ClusteredMVFTS4') +dataset = pd.read_csv('https://query.data.world/s/2bgegjggydd3venttp3zlosh3wpjqj', sep=';') + +dataset['data'] = pd.to_datetime(dataset["data"], format='%Y-%m-%d %H:%M:%S') + +train_mv['solar'] = dataset.iloc[:24505] +test_mv['solar'] = dataset.iloc[24505:] + +from pyFTS.data import Malaysia + +dataset = Malaysia.get_dataframe() + +dataset["time"] = pd.to_datetime(dataset["time"], format='%m/%d/%y %I:%M %p') + +train_mv['load'] = dataset.iloc[:train_split] +test_mv['load'] = dataset.iloc[train_split:] +exogenous = {} +endogenous = {} + +for key in models.keys(): + exogenous[key] = {} vhour = variable.Variable("Hour", data_label="hour", partitioner=seasonal.TimeGridPartitioner, npart=24, - data=dataset, + data=train_mv['price'], partitioner_specific={'seasonality': DateTime.hour_of_day, 'type': 'common'}) +exogenous['price']['Hour'] = vhour + vprice = variable.Variable("Price", data_label="price", partitioner=Grid.GridPartitioner, npart=55, - data=train_mv) -model = cmvfts.ClusteredMVFTS(order=2, knn=3) -model.append_variable(vhour) -model.append_variable(vprice) -model.target_variable = vprice -model.fit(train_mv) + data=train_mv['price']) +endogenous['price'] = vprice -data = [[1, 1.0], [2, 2.0]] -df = pd.DataFrame(data, columns=['hour','price']) -forecasts = model.predict(df, steps_ahead=24, generators={'Hour': lambda x : (x+1)%24 }) -""" -''' +sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k) for k in range(0,24)]} -params = [ - {}, - {}, - {'order': 2, 'knn': 3, 'cluster_params': {'optmize': True}}, - {'order': 2, 'knn': 2, 'cluster_params': {'optmize': True}}, - {'order': 2, 'knn': 1, 'cluster_params': {'optmize': True}} +vhour = variable.Variable("Hour", data_label="data", partitioner=seasonal.TimeGridPartitioner, npart=24, + data=train_mv['solar'], partitioner_specific=sp) + +exogenous['solar']['Hour'] = vhour + +vavg = variable.Variable("Radiation", data_label="glo_avg", alias='rad', + partitioner=Grid.GridPartitioner, npart=30, alpha_cut=.3, + data=train_mv['solar']) + +endogenous['solar'] = vavg + + +sp = {'seasonality': DateTime.hour_of_day} + +vhourp = variable.Variable("Hour", data_label="time", partitioner=seasonal.TimeGridPartitioner, npart=24, + data=train_mv['load'], partitioner_specific=sp) + +exogenous['load']['Hour'] = vhourp + +vload = variable.Variable("load", data_label="load", partitioner=Grid.GridPartitioner, npart=10, + data=train_mv['load']) + +endogenous['load'] = vload + + +from pyFTS.models.multivariate import mvfts, wmvfts, cmvfts + +fig, ax = plt.subplots(nrows=3, ncols=1, figsize=[15,15]) + +parameters = [ + {},{}, + {'order':2, 'knn': 1}, + {'order':2, 'knn': 2}, + {'order':2, 'knn': 3}, ] -from pyFTS.models.multivariate import grid +for ct, key in enumerate(models.keys()): -cluster = None + for ct2, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS, + cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS]): + print(key, method, parameters[ct2]) + model = method(**parameters[ct2]) + _key2 = "" + for k in parameters[ct2].keys(): + _key2 += k + str(parameters[ct2][k]) + model.shortname += str(ct) + key + _key2 + for var in exogenous[key].values(): + model.append_variable(var) + model.append_variable(endogenous[key]) + model.target_variable = endogenous[key] + model.fit(train_mv[key]) + models[key].append(model.shortname) -for ct, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS, cmvfts.ClusteredMVFTS]): + Util.persist_obj(model, model.shortname) - model = method(**params[ct]) - model.append_variable(vmonth) - model.append_variable(vday) - model.append_variable(vhour) - model.append_variable(vload) - model.target_variable = vload - model.fit(train_mv) - - if method == cmvfts.ClusteredMVFTS: - model.cluster.prune() - - try: - - print(model.shortname, params[ct], Measures.get_point_statistics(test_mv, model)) - - except Exception as ex: - print(model.shortname, params[ct]) - print(ex) - print("\n\n==============================================\n\n") - -#print(model1) - -#print(model1.predict(test_mv, steps_ahead=24, generators={'Hour': lambda x : (x+1)%24 })) - -#''' \ No newline at end of file + del(model)