786 KiB
786 KiB
First Order Weighted Fuzzy Time Series by Yu(2005)¶
H.-K. Yu, “Weighted fuzzy time series models for TAIEX forecasting,” Phys. A Stat. Mech. its Appl., vol. 349, no. 3, pp. 609–624, 2005.
Environment Setup¶
Library install/update¶
In [1]:
!pip3 install -U git+https://github.com/PYFTS/pyFTS
#!pip3 install dispy
External libraries import¶
In [2]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import seaborn as sns
%pylab inline
Common pyFTS imports¶
In [ ]:
from pyFTS.common import Util as cUtil
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil
from pyFTS.partitioners import Util as pUtil
from pyFTS.models import chen
Common data transformations¶
In [ ]:
from pyFTS.common import Transformations
tdiff = Transformations.Differential(1)
boxcox = Transformations.BoxCox(0)
Datasets¶
Data Loading¶
In [ ]:
from pyFTS.data import TAIEX, NASDAQ, SP500
dataset_names = ["TAIEX", "SP500","NASDAQ"]
def get_dataset(name):
if dataset_name == "TAIEX":
return TAIEX.get_data()
elif dataset_name == "SP500":
return SP500.get_data()[11500:16000]
elif dataset_name == "NASDAQ":
return NASDAQ.get_data()
train_split = 2000
test_length = 200
Visualization¶
In [6]:
fig, ax = plt.subplots(nrows=2, ncols=3, figsize=[10,5])
for count,dataset_name in enumerate(dataset_names):
dataset = get_dataset(dataset_name)
dataset_diff = tdiff.apply(dataset)
ax[0][count].plot(dataset)
ax[1][count].plot(dataset_diff)
ax[0][count].set_title(dataset_name)
Statistics¶
In [7]:
from statsmodels.tsa.stattools import adfuller
rows =[]
for count,dataset_name in enumerate(dataset_names):
row = [dataset_name]
dataset = get_dataset(dataset_name)
result = adfuller(dataset)
row.extend([result[0],result[1]])
row.extend([value for key, value in result[4].items()])
rows.append(row)
pd.DataFrame(rows,columns=['Dataset','ADF Statistic','p-value','Cr. Val. 1%','Cr. Val. 5%','Cr. Val. 10%'])
Out[7]:
Partitioning¶
Наилучшее количество разделов Вселенной Дискурса является задачей оптимизации. Чтобы узнать больше о схемах секционирования, просмотрите блокнот Partitioners. Чтобы узнать больше о сравнительном тестировании, загляните в блокнот «Бенчмарки».
In [ ]:
from pyFTS.partitioners import Grid, Util as pUtil
from pyFTS.benchmarks import benchmarks as bchmk
from pyFTS.models import chen
tag = 'chen_partitioning'
_type = 'point'
for dataset_name in dataset_names:
dataset = get_dataset(dataset_name)
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
methods=[chen.ConventionalFTS],
benchmark_models=False,
transformations=[None],
partitions=np.arange(10,100,2),
progress=False, type=_type,
file="benchmarks.db", dataset=dataset_name, tag=tag)
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
methods=[chen.ConventionalFTS],
benchmark_models=False,
transformations=[tdiff],
partitions=np.arange(3,30,1),
progress=False, type=_type,
file="benchmarks.db", dataset=dataset_name, tag=tag)
In [9]:
from pyFTS.benchmarks import Util as bUtil
df1 = bUtil.get_dataframe_from_bd("benchmarks.db",
"tag = 'chen_partitioning' and measure = 'rmse'and transformation is null")
df2 = bUtil.get_dataframe_from_bd("benchmarks.db",
"tag = 'chen_partitioning' and measure = 'rmse' and transformation is not null")
fig, ax = plt.subplots(nrows=2, ncols=1, figsize=[15,7])
g1 = sns.boxplot(x='Partitions', y='Value', hue='Dataset', data=df1, showfliers=False, ax=ax[0],
palette="Set3")
box = g1.get_position()
g1.set_position([box.x0, box.y0, box.width * 0.85, box.height])
g1.legend(loc='right', bbox_to_anchor=(1.15, 0.5), ncol=1)
ax[0].set_title("Original data")
ax[0].set_ylabel("RMSE")
ax[0].set_xlabel("")
g2 = sns.boxplot(x='Partitions', y='Value', hue='Dataset', data=df2, showfliers=False, ax=ax[1],
palette="Set3")
box = g2.get_position()
g2.set_position([box.x0, box.y0, box.width * 0.85, box.height])
g2.legend(loc='right', bbox_to_anchor=(1.15, 0.5), ncol=1)
ax[1].set_title("Differentiated data")
ax[1].set_ylabel("RMSE")
ax[1].set_xlabel("Number of partitions of the UoD")
Out[9]:
Comparing the partitioning schemas¶
In [10]:
from pyFTS.partitioners import Grid, Util as pUtil
fig, ax = plt.subplots(nrows=2, ncols=3, figsize=[20,5])
partitioners = {}
partitioners_diff = {}
for count,dataset_name in enumerate(dataset_names):
dataset = get_dataset(dataset_name)
partitioner = Grid.GridPartitioner(data=dataset, npart=30)
partitioners[dataset_name] = partitioner
partitioner_diff = Grid.GridPartitioner(data=dataset, npart=10, transformation=tdiff)
partitioners_diff[dataset_name] = partitioner_diff
pUtil.plot_sets(dataset, [partitioner.sets], titles=[dataset_name], axis=ax[0][count])
pUtil.plot_sets(dataset, [partitioner_diff.sets], titles=[''], axis=ax[1][count])
Fitting models¶
With original data¶
In [ ]:
for count,dataset_name in enumerate(dataset_names):
dataset = get_dataset(dataset_name)
model1 = chen.ConventionalFTS(partitioner=partitioners[dataset_name])
model1.name=dataset_name
model1.fit(dataset[:train_split], save_model=True, file_path='model1'+dataset_name, order=1)
With transformed data¶
In [ ]:
for count,dataset_name in enumerate(dataset_names):
dataset = get_dataset(dataset_name)
model2 = chen.ConventionalFTS(partitioner=partitioners_diff[dataset_name])
model2.name=dataset_name
model2.append_transformation(tdiff)
model2.fit(dataset[:train_split], save_model=True, file_path='model2'+dataset_name, order=1)
Predicting with the models¶
In [13]:
fig, ax = plt.subplots(nrows=3, ncols=1, figsize=[20,10])
for count,dataset_name in enumerate(dataset_names):
dataset = get_dataset(dataset_name)
ax[count].plot(dataset[train_split:train_split+200])
model1 = cUtil.load_obj('model1'+dataset_name)
forecasts = model1.predict(dataset[train_split:train_split+200])
ax[count].plot(forecasts)
ax[count].set_title(dataset_name)
plt.tight_layout()
In [14]:
from pyFTS.benchmarks import Measures
rows = []
for count,dataset_name in enumerate(dataset_names):
row = [dataset_name]
dataset = get_dataset(dataset_name)
test = dataset[train_split:train_split+200]
model1 = cUtil.load_obj('model1'+dataset_name)
row.extend(Measures.get_point_statistics(test, model1))
rows.append(row)
pd.DataFrame(rows,columns=["Dataset","RMSE","SMAPE","Theil's U"])
Out[14]:
In [15]:
fig, ax = plt.subplots(nrows=3, ncols=1, figsize=[20,10])
for count,dataset_name in enumerate(dataset_names):
dataset = get_dataset(dataset_name)
ax[count].plot(dataset[train_split:train_split+200])
model1 = cUtil.load_obj('model2'+dataset_name)
forecasts = model1.predict(dataset[train_split:train_split+200])
ax[count].plot(forecasts)
ax[count].set_title(dataset_name)
plt.tight_layout()
In [16]:
from pyFTS.benchmarks import Measures
rows = []
for count,dataset_name in enumerate(dataset_names):
row = [dataset_name]
dataset = get_dataset(dataset_name)
test = dataset[train_split:train_split+200]
model1 = cUtil.load_obj('model2'+dataset_name)
row.extend(Measures.get_point_statistics(test, model1))
rows.append(row)
pd.DataFrame(rows,columns=["Dataset","RMSE","SMAPE","Theil's U"])
Out[16]:
Residual Analysis¶
In [17]:
from pyFTS.benchmarks import ResidualAnalysis as ra
for count,dataset_name in enumerate(dataset_names):
dataset = get_dataset(dataset_name)
model1 = cUtil.load_obj('model1'+dataset_name)
model1 = cUtil.load_obj('model2'+dataset_name)
ra.plot_residuals_by_model(dataset, [model1, model2])
In [ ]:
In [ ]: