259 lines
9.2 KiB
Python
259 lines
9.2 KiB
Python
#!/usr/bin/python
|
|
# -*- coding: utf8 -*-
|
|
|
|
import os
|
|
import numpy as np
|
|
import matplotlib.pylab as plt
|
|
#from mpl_toolkits.mplot3d import Axes3D
|
|
|
|
import pandas as pd
|
|
|
|
from pyFTS.common import Util as cUtil, FuzzySet
|
|
from pyFTS.partitioners import Grid, Entropy, Util as pUtil, Simple
|
|
from pyFTS.benchmarks import benchmarks as bchmk, Measures
|
|
from pyFTS.models import chen, yu, cheng, ismailefendi, hofts, pwfts
|
|
from pyFTS.common import Transformations, Membership
|
|
|
|
tdiff = Transformations.Differential(1)
|
|
|
|
|
|
from pyFTS.data import TAIEX, SP500, NASDAQ, Malaysia, Enrollments
|
|
|
|
#from pyFTS.data import mackey_glass
|
|
#y = mackey_glass.get_data()
|
|
|
|
from pyFTS.partitioners import Grid
|
|
from pyFTS.models import pwfts, tsaur
|
|
|
|
dataset = pd.read_csv('/home/petronio/Downloads/Klang-daily Max.csv', sep=',')
|
|
|
|
dataset['date'] = pd.to_datetime(dataset["Day/Month/Year"], format='%m/%d/%Y')
|
|
dataset['value'] = dataset['Daily-Max API']
|
|
|
|
|
|
train_uv = dataset['value'].values[:732]
|
|
test_uv = dataset['value'].values[732:]
|
|
|
|
from itertools import product
|
|
|
|
levels = ['VeryLow', 'Low', 'Medium', 'High', 'VeryHigh']
|
|
sublevels = [str(k) for k in np.arange(0, 7)]
|
|
names = []
|
|
for combination in product(*[levels, sublevels]):
|
|
names.append(combination[0] + combination[1])
|
|
|
|
print(names)
|
|
|
|
#partitioner = Grid.GridPartitioner(data=train_uv, npart=35, names=names)
|
|
partitioner = Entropy.EntropyPartitioner(data=train_uv,npart=35, names=names)
|
|
|
|
print(partitioner)
|
|
|
|
model = pwfts.ProbabilisticWeightedFTS(partitioner=partitioner) #, order=2, lags=[3,4])
|
|
#model = tsaur.MarkovWeightedFTS(partitioner=partitioner)
|
|
model.fit(train_uv)
|
|
|
|
from pyFTS.benchmarks import benchmarks as bchmk
|
|
|
|
print(model)
|
|
|
|
print(model.forecast(test_uv))
|
|
|
|
#distributions = model.predict(y[800:820])
|
|
|
|
#print(distributions)
|
|
|
|
|
|
'''
|
|
#dataset = SP500.get_data()[11500:16000]
|
|
#dataset = NASDAQ.get_data()
|
|
#print(len(dataset))
|
|
|
|
|
|
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
|
|
methods=[chen.ConventionalFTS], #[pwfts.ProbabilisticWeightedFTS],
|
|
benchmark_models=False,
|
|
transformations=[None],
|
|
#orders=[1, 2, 3],
|
|
partitions=np.arange(10, 100, 2),
|
|
progress=False, type="point",
|
|
#steps_ahead=[1,2,4,6,8,10],
|
|
distributed=False, nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],
|
|
file="benchmarks.db", dataset="TAIEX", tag="comparisons")
|
|
|
|
|
|
|
|
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
|
|
methods=[chen.ConventionalFTS], # [pwfts.ProbabilisticWeightedFTS],
|
|
benchmark_models=False,
|
|
transformations=[tdiff],
|
|
#orders=[1, 2, 3],
|
|
partitions=np.arange(3, 30, 1),
|
|
progress=False, type="point",
|
|
#steps_ahead=[1,2,4,6,8,10],
|
|
distributed=False, nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],
|
|
file="benchmarks.db", dataset="NASDAQ", tag="comparisons")
|
|
|
|
'''
|
|
'''
|
|
from pyFTS.partitioners import Grid, Util as pUtil
|
|
partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff)
|
|
|
|
model = pwfts.ProbabilisticWeightedFTS('',partitioner=partitioner)
|
|
model.append_transformation(tdiff)
|
|
model.fit(dataset[:800])
|
|
print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead=7))
|
|
#tmp = model.predict(dataset[800:1000], type='distribution', steps_ahead=7)
|
|
#for tmp2 in tmp:
|
|
# print(tmp2)
|
|
'''
|
|
'''
|
|
|
|
types = ['point','interval','distribution']
|
|
benchmark_methods=[[arima.ARIMA for k in range(8)] + [quantreg.QuantileRegression for k in range(4)]]
|
|
benchmark_methods=[
|
|
[arima.ARIMA for k in range(4)] + [naive.Naive],
|
|
[arima.ARIMA for k in range(8)] + [quantreg.QuantileRegression for k in range(4)],
|
|
[arima.ARIMA for k in range(4)] + [quantreg.QuantileRegression for k in range(2)]
|
|
+ [knn.KNearestNeighbors for k in range(3)]
|
|
]
|
|
benchmark_methods_parameters= [
|
|
[
|
|
{'order': (1, 0, 0), 'alpha': .05},
|
|
{'order': (1, 0, 0), 'alpha': .25},
|
|
{'order': (1, 0, 1), 'alpha': .05},
|
|
{'order': (1, 0, 1), 'alpha': .25},
|
|
{'order': (2, 0, 1), 'alpha': .05},
|
|
{'order': (2, 0, 1), 'alpha': .25},
|
|
{'order': (2, 0, 2), 'alpha': .05},
|
|
{'order': (2, 0, 2), 'alpha': .25},
|
|
{'order': 1, 'alpha': .05},
|
|
{'order': 1, 'alpha': .25},
|
|
{'order': 2, 'alpha': .05},
|
|
{'order': 2, 'alpha': .25}
|
|
]
|
|
]
|
|
benchmark_methods_parameters= [
|
|
[
|
|
{'order': (1, 0, 0)},
|
|
{'order': (1, 0, 1)},
|
|
{'order': (2, 0, 1)},
|
|
{'order': (2, 0, 2)},
|
|
{},
|
|
],[
|
|
{'order': (1, 0, 0), 'alpha': .05},
|
|
{'order': (1, 0, 0), 'alpha': .25},
|
|
{'order': (1, 0, 1), 'alpha': .05},
|
|
{'order': (1, 0, 1), 'alpha': .25},
|
|
{'order': (2, 0, 1), 'alpha': .05},
|
|
{'order': (2, 0, 1), 'alpha': .25},
|
|
{'order': (2, 0, 2), 'alpha': .05},
|
|
{'order': (2, 0, 2), 'alpha': .25},
|
|
{'order': 1, 'alpha': .05},
|
|
{'order': 1, 'alpha': .25},
|
|
{'order': 2, 'alpha': .05},
|
|
{'order': 2, 'alpha': .25}
|
|
],[
|
|
{'order': (1, 0, 0)},
|
|
{'order': (1, 0, 1)},
|
|
{'order': (2, 0, 1)},
|
|
{'order': (2, 0, 2)},
|
|
{'order': 1, 'dist': True},
|
|
{'order': 2, 'dist': True},
|
|
{'order': 1}, {'order': 2}, {'order': 3},
|
|
]
|
|
]
|
|
dataset_name = "SP500"
|
|
tag = "ahead2"
|
|
|
|
from pyFTS.benchmarks import arima, naive, quantreg
|
|
|
|
for ct, type in enumerate(types):
|
|
|
|
|
|
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
|
|
methods=[pwfts.ProbabilisticWeightedFTS],
|
|
benchmark_models=False,
|
|
#benchmark_methods=benchmark_methods[ct],
|
|
#benchmark_methods_parameters=benchmark_methods_parameters[ct],
|
|
transformations=[tdiff],
|
|
orders=[1], #, 2, 3],
|
|
partitions=[5], #np.arange(3, 35, 2),
|
|
progress=False, type=type,
|
|
steps_ahead=[2, 4, 6, 8, 10],
|
|
distributed=True, nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],
|
|
file="benchmarks.db", dataset=dataset_name, tag=tag)
|
|
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
|
|
methods=[pwfts.ProbabilisticWeightedFTS],
|
|
benchmark_models=False,
|
|
#benchmark_methods=benchmark_methods[ct],
|
|
#benchmark_methods_parameters=benchmark_methods_parameters[ct],
|
|
transformations=[None],
|
|
orders=[1], #,2,3],
|
|
partitions=[30], #np.arange(15, 85, 5),
|
|
progress=False, type=type,
|
|
steps_ahead=[2, 4, 6, 8, 10],
|
|
distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
|
|
file="benchmarks.db", dataset=dataset_name, tag=tag)
|
|
|
|
|
|
'''
|
|
'''
|
|
dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';')
|
|
print(bUtil.analytic_tabular_dataframe(dat))
|
|
#print(dat["Size"].values[0])
|
|
'''
|
|
'''
|
|
train_split = 2000
|
|
test_length = 200
|
|
|
|
from pyFTS.partitioners import Grid, Util as pUtil
|
|
partitioner = Grid.GridPartitioner(data=dataset[:train_split], npart=30)
|
|
#partitioner = Grid.GridPartitioner(data=dataset[:train_split], npart=10, transformation=tdiff)
|
|
|
|
from pyFTS.common import fts,tree
|
|
from pyFTS.models import hofts, pwfts
|
|
|
|
pfts1_taiex = pwfts.ProbabilisticWeightedFTS("1", partitioner=partitioner)
|
|
#pfts1_taiex.append_transformation(tdiff)
|
|
pfts1_taiex.fit(dataset[:train_split], save_model=True, file_path='pwfts')
|
|
pfts1_taiex.shortname = "1st Order"
|
|
|
|
print(pfts1_taiex)
|
|
|
|
tmp = pfts1_taiex.predict(dataset[train_split:train_split+200], type='point',
|
|
method='heuristic')
|
|
|
|
|
|
print(tmp)
|
|
|
|
tmp = pfts1_taiex.predict(dataset[train_split:train_split+200], type='point',
|
|
method='expected_value')
|
|
|
|
|
|
print(tmp)
|
|
'''
|
|
|
|
'''
|
|
tmp = pfts1_taiex.predict(dataset[train_split:train_split+200], type='diPedro Pazzini
|
|
stribution', steps_ahead=20)
|
|
|
|
|
|
f, ax = plt.subplots(3, 4, figsize=[20,15])
|
|
tmp[0].plot(ax[0][0], title='t=1')
|
|
tmp[2].plot(ax[0][1], title='t=20')
|
|
tmp[4].plot(ax[0][2], title='t=40')
|
|
tmp[6].plot(ax[0][3], title='t=60')
|
|
tmp[8].plot(ax[1][0], title='t=80')
|
|
tmp[10].plot(ax[1][1], title='t=100')
|
|
tmp[12].plot(ax[1][2], title='t=120')
|
|
tmp[14].plot(ax[1][3], title='t=140')
|
|
tmp[16].plot(ax[2][0], title='t=160')
|
|
tmp[18].plot(ax[2][1], title='t=180')
|
|
tmp[20].plot(ax[2][2], title='t=200')
|
|
|
|
|
|
f, ax = plt.subplots(1, 1, figsize=[20,15])
|
|
bchmk.plot_distribution(ax, 'blue', tmp, f, 0, reference_data=dataset[train_split:train_split+200])
|
|
|
|
''' |