748 KiB
748 KiB
First Order Improved Weighted Fuzzy Time Series by Efendi, Ismail and Deris (2013)¶
R. Efendi, Z. Ismail, and M. M. Deris, “Improved weight Fuzzy Time Series as used in the exchange rates forecasting of US Dollar to Ringgit Malaysia,” Int. J. Comput. Intell. Appl., vol. 12, no. 1, p. 1350005, 2013.
Environment Setup¶
Library install/update¶
In [1]:
!pip3 install -U git+https://github.com/PYFTS/pyFTS
External libraries import¶
In [2]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import seaborn as sns
%pylab inline
Common pyFTS imports¶
In [0]:
from pyFTS.common import Util as cUtil
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil
from pyFTS.partitioners import Util as pUtil
from pyFTS.models import ismailefendi
Common data transformations¶
In [0]:
from pyFTS.common import Transformations
tdiff = Transformations.Differential(1)
boxcox = Transformations.BoxCox(0)
Datasets¶
Data Loading¶
In [0]:
from pyFTS.data import TAIEX, NASDAQ, SP500
dataset_names = ["TAIEX", "SP500","NASDAQ"]
def get_dataset(name):
if dataset_name == "TAIEX":
return TAIEX.get_data()
elif dataset_name == "SP500":
return SP500.get_data()[11500:16000]
elif dataset_name == "NASDAQ":
return NASDAQ.get_data()
train_split = 2000
test_length = 200
Visualization¶
In [0]:
fig, ax = plt.subplots(nrows=2, ncols=3, figsize=[10,5])
for count,dataset_name in enumerate(dataset_names):
dataset = get_dataset(dataset_name)
dataset_diff = tdiff.apply(dataset)
ax[0][count].plot(dataset)
ax[1][count].plot(dataset_diff)
ax[0][count].set_title(dataset_name)
Statistics¶
In [0]:
from statsmodels.tsa.stattools import adfuller
rows =[]
for count,dataset_name in enumerate(dataset_names):
row = [dataset_name]
dataset = get_dataset(dataset_name)
result = adfuller(dataset)
row.extend([result[0],result[1]])
row.extend([value for key, value in result[4].items()])
rows.append(row)
pd.DataFrame(rows,columns=['Dataset','ADF Statistic','p-value','Cr. Val. 1%','Cr. Val. 5%','Cr. Val. 10%'])
Out[0]:
Partitioning¶
The best number of partitions of the Universe of Discourse is an optimization problem. The know more about partitioning schemes please look on the Partitioners notebook. To know more about benchmarking look on the Benchmarks notebook.
In [0]:
from pyFTS.partitioners import Grid, Util as pUtil
from pyFTS.benchmarks import benchmarks as bchmk
from pyFTS.models import chen
tag = 'chen_partitioning'
_type = 'point'
for dataset_name in dataset_names:
dataset = get_dataset(dataset_name)
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
methods=[chen.ConventionalFTS],
benchmark_models=False,
transformations=[None],
partitions=np.arange(10,100,2),
progress=False, type=_type,
file="benchmarks.db", dataset=dataset_name, tag=tag)
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
methods=[chen.ConventionalFTS],
benchmark_models=False,
transformations=[tdiff],
partitions=np.arange(3,30,1),
progress=False, type=_type,
file="benchmarks.db", dataset=dataset_name, tag=tag)
In [0]:
from pyFTS.benchmarks import Util as bUtil
df1 = bUtil.get_dataframe_from_bd("benchmarks.db",
"tag = 'chen_partitioning' and measure = 'rmse'and transformation is null")
df2 = bUtil.get_dataframe_from_bd("benchmarks.db",
"tag = 'chen_partitioning' and measure = 'rmse' and transformation is not null")
fig, ax = plt.subplots(nrows=2, ncols=1, figsize=[15,7])
g1 = sns.boxplot(x='Partitions', y='Value', hue='Dataset', data=df1, showfliers=False, ax=ax[0],
palette="Set3")
box = g1.get_position()
g1.set_position([box.x0, box.y0, box.width * 0.85, box.height])
g1.legend(loc='right', bbox_to_anchor=(1.15, 0.5), ncol=1)
ax[0].set_title("Original data")
ax[0].set_ylabel("RMSE")
ax[0].set_xlabel("")
g2 = sns.boxplot(x='Partitions', y='Value', hue='Dataset', data=df2, showfliers=False, ax=ax[1],
palette="Set3")
box = g2.get_position()
g2.set_position([box.x0, box.y0, box.width * 0.85, box.height])
g2.legend(loc='right', bbox_to_anchor=(1.15, 0.5), ncol=1)
ax[1].set_title("Differentiated data")
ax[1].set_ylabel("RMSE")
ax[1].set_xlabel("Number of partitions of the UoD")
Comparing the partitioning schemas¶
In [6]:
from pyFTS.partitioners import Grid, Util as pUtil
fig, ax = plt.subplots(nrows=2, ncols=3, figsize=[20,5])
partitioners = {}
partitioners_diff = {}
for count,dataset_name in enumerate(dataset_names):
dataset = get_dataset(dataset_name)
partitioner = Grid.GridPartitioner(data=dataset, npart=30)
partitioners[dataset_name] = partitioner
partitioner_diff = Grid.GridPartitioner(data=dataset, npart=10, transformation=tdiff)
partitioners_diff[dataset_name] = partitioner_diff
pUtil.plot_sets(dataset, [partitioner.sets], titles=[dataset_name], axis=ax[0][count])
pUtil.plot_sets(dataset, [partitioner_diff.sets], titles=[''], axis=ax[1][count])
Fitting models¶
With original data¶
In [7]:
for count,dataset_name in enumerate(dataset_names):
dataset = get_dataset(dataset_name)
model1 = ismailefendi.ImprovedWeightedFTS(partitioner=partitioners[dataset_name])
model1.name=dataset_name
model1.fit(dataset[:train_split], save_model=True, file_path='model1'+dataset_name, order=1)
print(model1)
With transformed data¶
In [8]:
for count,dataset_name in enumerate(dataset_names):
dataset = get_dataset(dataset_name)
model2 = ismailefendi.ImprovedWeightedFTS(partitioner=partitioners_diff[dataset_name])
model2.name=dataset_name
model2.append_transformation(tdiff)
model2.fit(dataset[:train_split], save_model=True, file_path='model2'+dataset_name, order=1)
print(model2)
Predicting with the models¶
In [9]:
fig, ax = plt.subplots(nrows=3, ncols=1, figsize=[20,10])
for count,dataset_name in enumerate(dataset_names):
dataset = get_dataset(dataset_name)
ax[count].plot(dataset[train_split:train_split+200])
model1 = cUtil.load_obj('model1'+dataset_name)
forecasts = model1.predict(dataset[train_split:train_split+200])
ax[count].plot(forecasts)
ax[count].set_title(dataset_name)
plt.tight_layout()
In [10]:
from pyFTS.benchmarks import Measures
rows = []
for count,dataset_name in enumerate(dataset_names):
row = [dataset_name]
dataset = get_dataset(dataset_name)
test = dataset[train_split:train_split+200]
model1 = cUtil.load_obj('model1'+dataset_name)
row.extend(Measures.get_point_statistics(test, model1))
rows.append(row)
pd.DataFrame(rows,columns=["Dataset","RMSE","SMAPE","Theil's U"])
Out[10]:
In [11]:
fig, ax = plt.subplots(nrows=3, ncols=1, figsize=[20,10])
for count,dataset_name in enumerate(dataset_names):
dataset = get_dataset(dataset_name)
ax[count].plot(dataset[train_split:train_split+200])
model1 = cUtil.load_obj('model2'+dataset_name)
forecasts = model1.predict(dataset[train_split:train_split+200])
ax[count].plot(forecasts)
ax[count].set_title(dataset_name)
plt.tight_layout()
In [12]:
from pyFTS.benchmarks import Measures
rows = []
for count,dataset_name in enumerate(dataset_names):
row = [dataset_name]
dataset = get_dataset(dataset_name)
test = dataset[train_split:train_split+200]
model1 = cUtil.load_obj('model2'+dataset_name)
row.extend(Measures.get_point_statistics(test, model1))
rows.append(row)
pd.DataFrame(rows,columns=["Dataset","RMSE","SMAPE","Theil's U"])
Out[12]:
Residual Analysis¶
In [14]:
from pyFTS.benchmarks import ResidualAnalysis as ra
for count,dataset_name in enumerate(dataset_names):
dataset = get_dataset(dataset_name)
model1 = cUtil.load_obj('model1'+dataset_name)
model1 = cUtil.load_obj('model2'+dataset_name)
ra.plot_residuals_by_model(dataset, [model1, model2])
In [0]:
In [0]: