HOFTS bugfix

This commit is contained in:
Petrônio Cândido 2019-02-14 16:44:28 -02:00
parent 0017d471b5
commit cef00bd71a
2 changed files with 15 additions and 231 deletions

View File

@ -187,6 +187,8 @@ class HighOrderFTS(fts.FTS):
fuzzyfied = kwargs.get('fuzzyfied', False) fuzzyfied = kwargs.get('fuzzyfied', False)
mode = kwargs.get('mode', 'mean')
ret = [] ret = []
l = len(ndata) if not explain else self.max_lag + 1 l = len(ndata) if not explain else self.max_lag + 1
@ -234,7 +236,11 @@ class HighOrderFTS(fts.FTS):
print("\t {} \t Midpoint: {}\n".format(str(flrg), mp)) print("\t {} \t Midpoint: {}\n".format(str(flrg), mp))
print("\t {} \t Membership: {}\n".format(str(flrg), mv)) print("\t {} \t Membership: {}\n".format(str(flrg), mv))
final = np.dot(midpoints, memberships) if not fuzzyfied else np.nanmean(midpoints) if mode == "mean" or fuzzyfied:
final = np.nanmean(midpoints)
else:
final = np.dot(midpoints, memberships)
ret.append(final) ret.append(final)
if explain: if explain:

View File

@ -25,235 +25,13 @@ from pyFTS.data import TAIEX, SP500, NASDAQ, Malaysia, Enrollments
from pyFTS.partitioners import Grid from pyFTS.partitioners import Grid
from pyFTS.models import pwfts, tsaur from pyFTS.models import pwfts, tsaur
dataset = pd.read_csv('/home/petronio/Downloads/Klang-daily Max.csv', sep=',') x = [k for k in np.arange(-2*np.pi, 2*np.pi, 0.5)]
y = [np.sin(k) for k in x]
dataset['date'] = pd.to_datetime(dataset["Day/Month/Year"], format='%m/%d/%Y') part = Grid.GridPartitioner(data=y, npart=35)
dataset['value'] = dataset['Daily-Max API'] model = hofts.HighOrderFTS(order=2, partitioner=part)
model.fit(y)
forecasts = model.predict(y)
print([round(k,2) for k in y[2:]])
train_uv = dataset['value'].values[:732] print([round(k,2) for k in forecasts[:-1]])
test_uv = dataset['value'].values[732:]
from itertools import product
levels = ['VeryLow', 'Low', 'Medium', 'High', 'VeryHigh']
sublevels = [str(k) for k in np.arange(0, 7)]
names = []
for combination in product(*[levels, sublevels]):
names.append(combination[0] + combination[1])
print(names)
#partitioner = Grid.GridPartitioner(data=train_uv, npart=35, names=names)
partitioner = Entropy.EntropyPartitioner(data=train_uv,npart=35, names=names)
print(partitioner)
model = pwfts.ProbabilisticWeightedFTS(partitioner=partitioner) #, order=2, lags=[3,4])
#model = tsaur.MarkovWeightedFTS(partitioner=partitioner)
model.fit(train_uv)
from pyFTS.benchmarks import benchmarks as bchmk
print(model)
print(model.forecast(test_uv))
#distributions = model.predict(y[800:820])
#print(distributions)
'''
#dataset = SP500.get_data()[11500:16000]
#dataset = NASDAQ.get_data()
#print(len(dataset))
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
methods=[chen.ConventionalFTS], #[pwfts.ProbabilisticWeightedFTS],
benchmark_models=False,
transformations=[None],
#orders=[1, 2, 3],
partitions=np.arange(10, 100, 2),
progress=False, type="point",
#steps_ahead=[1,2,4,6,8,10],
distributed=False, nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],
file="benchmarks.db", dataset="TAIEX", tag="comparisons")
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
methods=[chen.ConventionalFTS], # [pwfts.ProbabilisticWeightedFTS],
benchmark_models=False,
transformations=[tdiff],
#orders=[1, 2, 3],
partitions=np.arange(3, 30, 1),
progress=False, type="point",
#steps_ahead=[1,2,4,6,8,10],
distributed=False, nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],
file="benchmarks.db", dataset="NASDAQ", tag="comparisons")
'''
'''
from pyFTS.partitioners import Grid, Util as pUtil
partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff)
model = pwfts.ProbabilisticWeightedFTS('',partitioner=partitioner)
model.append_transformation(tdiff)
model.fit(dataset[:800])
print(Measures.get_distribution_statistics(dataset[800:1000], model, steps_ahead=7))
#tmp = model.predict(dataset[800:1000], type='distribution', steps_ahead=7)
#for tmp2 in tmp:
# print(tmp2)
'''
'''
types = ['point','interval','distribution']
benchmark_methods=[[arima.ARIMA for k in range(8)] + [quantreg.QuantileRegression for k in range(4)]]
benchmark_methods=[
[arima.ARIMA for k in range(4)] + [naive.Naive],
[arima.ARIMA for k in range(8)] + [quantreg.QuantileRegression for k in range(4)],
[arima.ARIMA for k in range(4)] + [quantreg.QuantileRegression for k in range(2)]
+ [knn.KNearestNeighbors for k in range(3)]
]
benchmark_methods_parameters= [
[
{'order': (1, 0, 0), 'alpha': .05},
{'order': (1, 0, 0), 'alpha': .25},
{'order': (1, 0, 1), 'alpha': .05},
{'order': (1, 0, 1), 'alpha': .25},
{'order': (2, 0, 1), 'alpha': .05},
{'order': (2, 0, 1), 'alpha': .25},
{'order': (2, 0, 2), 'alpha': .05},
{'order': (2, 0, 2), 'alpha': .25},
{'order': 1, 'alpha': .05},
{'order': 1, 'alpha': .25},
{'order': 2, 'alpha': .05},
{'order': 2, 'alpha': .25}
]
]
benchmark_methods_parameters= [
[
{'order': (1, 0, 0)},
{'order': (1, 0, 1)},
{'order': (2, 0, 1)},
{'order': (2, 0, 2)},
{},
],[
{'order': (1, 0, 0), 'alpha': .05},
{'order': (1, 0, 0), 'alpha': .25},
{'order': (1, 0, 1), 'alpha': .05},
{'order': (1, 0, 1), 'alpha': .25},
{'order': (2, 0, 1), 'alpha': .05},
{'order': (2, 0, 1), 'alpha': .25},
{'order': (2, 0, 2), 'alpha': .05},
{'order': (2, 0, 2), 'alpha': .25},
{'order': 1, 'alpha': .05},
{'order': 1, 'alpha': .25},
{'order': 2, 'alpha': .05},
{'order': 2, 'alpha': .25}
],[
{'order': (1, 0, 0)},
{'order': (1, 0, 1)},
{'order': (2, 0, 1)},
{'order': (2, 0, 2)},
{'order': 1, 'dist': True},
{'order': 2, 'dist': True},
{'order': 1}, {'order': 2}, {'order': 3},
]
]
dataset_name = "SP500"
tag = "ahead2"
from pyFTS.benchmarks import arima, naive, quantreg
for ct, type in enumerate(types):
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
methods=[pwfts.ProbabilisticWeightedFTS],
benchmark_models=False,
#benchmark_methods=benchmark_methods[ct],
#benchmark_methods_parameters=benchmark_methods_parameters[ct],
transformations=[tdiff],
orders=[1], #, 2, 3],
partitions=[5], #np.arange(3, 35, 2),
progress=False, type=type,
steps_ahead=[2, 4, 6, 8, 10],
distributed=True, nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],
file="benchmarks.db", dataset=dataset_name, tag=tag)
bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
methods=[pwfts.ProbabilisticWeightedFTS],
benchmark_models=False,
#benchmark_methods=benchmark_methods[ct],
#benchmark_methods_parameters=benchmark_methods_parameters[ct],
transformations=[None],
orders=[1], #,2,3],
partitions=[30], #np.arange(15, 85, 5),
progress=False, type=type,
steps_ahead=[2, 4, 6, 8, 10],
distributed=True, nodes=['192.168.0.110', '192.168.0.107','192.168.0.106'],
file="benchmarks.db", dataset=dataset_name, tag=tag)
'''
'''
dat = pd.read_csv('pwfts_taiex_partitioning.csv', sep=';')
print(bUtil.analytic_tabular_dataframe(dat))
#print(dat["Size"].values[0])
'''
'''
train_split = 2000
test_length = 200
from pyFTS.partitioners import Grid, Util as pUtil
partitioner = Grid.GridPartitioner(data=dataset[:train_split], npart=30)
#partitioner = Grid.GridPartitioner(data=dataset[:train_split], npart=10, transformation=tdiff)
from pyFTS.common import fts,tree
from pyFTS.models import hofts, pwfts
pfts1_taiex = pwfts.ProbabilisticWeightedFTS("1", partitioner=partitioner)
#pfts1_taiex.append_transformation(tdiff)
pfts1_taiex.fit(dataset[:train_split], save_model=True, file_path='pwfts')
pfts1_taiex.shortname = "1st Order"
print(pfts1_taiex)
tmp = pfts1_taiex.predict(dataset[train_split:train_split+200], type='point',
method='heuristic')
print(tmp)
tmp = pfts1_taiex.predict(dataset[train_split:train_split+200], type='point',
method='expected_value')
print(tmp)
'''
'''
tmp = pfts1_taiex.predict(dataset[train_split:train_split+200], type='diPedro Pazzini
stribution', steps_ahead=20)
f, ax = plt.subplots(3, 4, figsize=[20,15])
tmp[0].plot(ax[0][0], title='t=1')
tmp[2].plot(ax[0][1], title='t=20')
tmp[4].plot(ax[0][2], title='t=40')
tmp[6].plot(ax[0][3], title='t=60')
tmp[8].plot(ax[1][0], title='t=80')
tmp[10].plot(ax[1][1], title='t=100')
tmp[12].plot(ax[1][2], title='t=120')
tmp[14].plot(ax[1][3], title='t=140')
tmp[16].plot(ax[2][0], title='t=160')
tmp[18].plot(ax[2][1], title='t=180')
tmp[20].plot(ax[2][2], title='t=200')
f, ax = plt.subplots(1, 1, figsize=[20,15])
bchmk.plot_distribution(ax, 'blue', tmp, f, 0, reference_data=dataset[train_split:train_split+200])
'''