Bugfixes in pwfts, distributed and composite
This commit is contained in:
parent
e6247b0779
commit
87686e5ff0
@ -28,6 +28,7 @@ class FuzzySet(object):
|
|||||||
self.Z = None
|
self.Z = None
|
||||||
"""Partition function in respect to the membership function"""
|
"""Partition function in respect to the membership function"""
|
||||||
|
|
||||||
|
if parameters is not None:
|
||||||
if self.mf == Membership.gaussmf:
|
if self.mf == Membership.gaussmf:
|
||||||
self.lower = parameters[0] - parameters[1]*3
|
self.lower = parameters[0] - parameters[1]*3
|
||||||
self.upper = parameters[0] + parameters[1]*3
|
self.upper = parameters[0] + parameters[1]*3
|
||||||
|
@ -322,7 +322,7 @@ class FTS(object):
|
|||||||
|
|
||||||
dump = kwargs.get('dump', None)
|
dump = kwargs.get('dump', None)
|
||||||
|
|
||||||
num_batches = kwargs.get('num_batches', None)
|
num_batches = kwargs.get('num_batches', 1)
|
||||||
|
|
||||||
save = kwargs.get('save_model', False) # save model on disk
|
save = kwargs.get('save_model', False) # save model on disk
|
||||||
|
|
||||||
@ -334,7 +334,7 @@ class FTS(object):
|
|||||||
|
|
||||||
batch_save_interval = kwargs.get('batch_save_interval', 10)
|
batch_save_interval = kwargs.get('batch_save_interval', 10)
|
||||||
|
|
||||||
if distributed is not None:
|
if distributed is not None and distributed:
|
||||||
|
|
||||||
if distributed == 'dispy':
|
if distributed == 'dispy':
|
||||||
from pyFTS.distributed import dispy
|
from pyFTS.distributed import dispy
|
||||||
@ -345,9 +345,10 @@ class FTS(object):
|
|||||||
batch_save_interval=batch_save_interval)
|
batch_save_interval=batch_save_interval)
|
||||||
elif distributed == 'spark':
|
elif distributed == 'spark':
|
||||||
from pyFTS.distributed import spark
|
from pyFTS.distributed import spark
|
||||||
|
url = kwargs.get('url', 'spark://192.168.0.110:7077')
|
||||||
|
app = kwargs.get('app', 'pyFTS')
|
||||||
|
|
||||||
spark.distributed_train(self, data, self.partitioner,
|
spark.distributed_train(self, data, url=url, app=app)
|
||||||
url='spark://192.168.0.110:7077', app='pyFTS')
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
if dump == 'time':
|
if dump == 'time':
|
||||||
|
@ -20,7 +20,6 @@ def stop_dispy_cluster(cluster, http_server):
|
|||||||
cluster.close()
|
cluster.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def simple_model_train(model, data, parameters):
|
def simple_model_train(model, data, parameters):
|
||||||
model.train(data, **parameters)
|
model.train(data, **parameters)
|
||||||
return model
|
return model
|
||||||
@ -50,7 +49,7 @@ def distributed_train(model, train_method, nodes, fts_method, data, num_batches=
|
|||||||
else:
|
else:
|
||||||
ndata = data[ct - model.order: ct + batch_size]
|
ndata = data[ct - model.order: ct + batch_size]
|
||||||
|
|
||||||
tmp_model = fts_method(str(bcount))
|
tmp_model = fts_method()
|
||||||
|
|
||||||
tmp_model.clone_parameters(model)
|
tmp_model.clone_parameters(model)
|
||||||
|
|
||||||
|
@ -13,23 +13,6 @@ import os
|
|||||||
os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3'
|
os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3'
|
||||||
os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3'
|
os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3'
|
||||||
|
|
||||||
conf = None
|
|
||||||
|
|
||||||
|
|
||||||
def get_conf(url, app):
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
:param url:
|
|
||||||
:param app:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
if conf is None:
|
|
||||||
conf = SparkConf()
|
|
||||||
conf.setMaster(url)
|
|
||||||
conf.setAppName(app)
|
|
||||||
|
|
||||||
return conf
|
|
||||||
|
|
||||||
|
|
||||||
def get_partitioner(shared_partitioner):
|
def get_partitioner(shared_partitioner):
|
||||||
@ -47,7 +30,7 @@ def get_partitioner(shared_partitioner):
|
|||||||
return fs_tmp
|
return fs_tmp
|
||||||
|
|
||||||
|
|
||||||
def slave_train(data):
|
def slave_train(data, shared_method, shared_partitioner, shared_order):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
:param data:
|
:param data:
|
||||||
@ -64,13 +47,29 @@ def slave_train(data):
|
|||||||
return [(k, model.flrgs[k]) for k in model.flrgs]
|
return [(k, model.flrgs[k]) for k in model.flrgs]
|
||||||
|
|
||||||
|
|
||||||
def distributed_train(model, data, partitioner, url='spark://192.168.0.110:7077', app='pyFTS'):
|
def distributed_train(model, data, url='spark://192.168.0.110:7077', app='pyFTS'):
|
||||||
with SparkContext(conf=get_conf(url=url, app=app)) as context:
|
"""
|
||||||
shared_partitioner = context.broadcast(partitioner.sets)
|
|
||||||
|
|
||||||
flrgs = context.parallelize(data).mapPartitions(slave_train)
|
|
||||||
|
|
||||||
model = hofts.WeightedHighOrderFTS(partitioner=partitioner, order=shared_order.value)
|
:param model:
|
||||||
|
:param data:
|
||||||
|
:param url:
|
||||||
|
:param app:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
|
||||||
|
conf = SparkConf()
|
||||||
|
conf.setMaster(url)
|
||||||
|
conf.setAppName(app)
|
||||||
|
|
||||||
|
with SparkContext(conf=conf) as context:
|
||||||
|
shared_partitioner = context.broadcast(model.partitioner.sets)
|
||||||
|
shared_order = context.broadcast(model.order)
|
||||||
|
shared_method = context.broadcast(type(model))
|
||||||
|
|
||||||
|
func = lambda x: slave_train(x, shared_method, shared_partitioner, shared_order)
|
||||||
|
|
||||||
|
flrgs = context.parallelize(data).mapPartitions(func)
|
||||||
|
|
||||||
for k in flrgs.collect():
|
for k in flrgs.collect():
|
||||||
model.append_rule(k[1])
|
model.append_rule(k[1])
|
||||||
|
@ -16,7 +16,6 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
|
|||||||
def __init__(self, order):
|
def __init__(self, order):
|
||||||
super(ProbabilisticWeightedFLRG, self).__init__(order)
|
super(ProbabilisticWeightedFLRG, self).__init__(order)
|
||||||
self.RHS = {}
|
self.RHS = {}
|
||||||
self.rhs_count = {}
|
|
||||||
self.frequency_count = 0.0
|
self.frequency_count = 0.0
|
||||||
self.Z = None
|
self.Z = None
|
||||||
|
|
||||||
@ -43,11 +42,11 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
|
|||||||
return tmp
|
return tmp
|
||||||
|
|
||||||
def rhs_unconditional_probability(self, c):
|
def rhs_unconditional_probability(self, c):
|
||||||
return self.rhs_count[c] / self.frequency_count
|
return self.RHS[c] / self.frequency_count
|
||||||
|
|
||||||
def rhs_conditional_probability(self, x, sets, uod, nbins):
|
def rhs_conditional_probability(self, x, sets, uod, nbins):
|
||||||
total = 0.0
|
total = 0.0
|
||||||
for rhs in self.RHS:
|
for rhs in self.RHS.keys():
|
||||||
set = sets[rhs]
|
set = sets[rhs]
|
||||||
wi = self.rhs_unconditional_probability(rhs)
|
wi = self.rhs_unconditional_probability(rhs)
|
||||||
mv = set.membership(x) / set.partition_function(uod, nbins=nbins)
|
mv = set.membership(x) / set.partition_function(uod, nbins=nbins)
|
||||||
@ -68,28 +67,30 @@ class ProbabilisticWeightedFLRG(hofts.HighOrderFLRG):
|
|||||||
'''Return the expectation of the PWFLRG, the weighted sum'''
|
'''Return the expectation of the PWFLRG, the weighted sum'''
|
||||||
if self.midpoint is None:
|
if self.midpoint is None:
|
||||||
self.midpoint = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].centroid
|
self.midpoint = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].centroid
|
||||||
for s in self.RHS]))
|
for s in self.RHS.keys()]))
|
||||||
|
|
||||||
return self.midpoint
|
return self.midpoint
|
||||||
|
|
||||||
def get_upper(self, sets):
|
def get_upper(self, sets):
|
||||||
if self.upper is None:
|
if self.upper is None:
|
||||||
self.upper = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].upper for s in self.RHS]))
|
self.upper = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].upper
|
||||||
|
for s in self.RHS.keys()]))
|
||||||
|
|
||||||
return self.upper
|
return self.upper
|
||||||
|
|
||||||
def get_lower(self, sets):
|
def get_lower(self, sets):
|
||||||
if self.lower is None:
|
if self.lower is None:
|
||||||
self.lower = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].lower for s in self.RHS]))
|
self.lower = np.sum(np.array([self.rhs_unconditional_probability(s) * sets[s].lower
|
||||||
|
for s in self.RHS.keys()]))
|
||||||
|
|
||||||
return self.lower
|
return self.lower
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
tmp2 = ""
|
tmp2 = ""
|
||||||
for c in sorted(self.RHS):
|
for c in sorted(self.RHS.keys()):
|
||||||
if len(tmp2) > 0:
|
if len(tmp2) > 0:
|
||||||
tmp2 = tmp2 + ", "
|
tmp2 = tmp2 + ", "
|
||||||
tmp2 = tmp2 + "(" + str(round(self.rhs_count[c] / self.frequency_count, 3)) + ")" + c
|
tmp2 = tmp2 + "(" + str(round(self.RHS[c] / self.frequency_count, 3)) + ")" + c
|
||||||
return self.get_key() + " -> " + tmp2
|
return self.get_key() + " -> " + tmp2
|
||||||
|
|
||||||
|
|
||||||
@ -530,7 +531,7 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
|
|||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
tmp = self.name + ":\n"
|
tmp = self.name + ":\n"
|
||||||
for r in sorted(self.flrgs):
|
for r in sorted(self.flrgs.keys()):
|
||||||
p = round(self.flrgs[r].frequency_count / self.global_frequency_count, 3)
|
p = round(self.flrgs[r].frequency_count / self.global_frequency_count, 3)
|
||||||
tmp = tmp + "(" + str(p) + ") " + str(self.flrgs[r]) + "\n"
|
tmp = tmp + "(" + str(p) + ") " + str(self.flrgs[r]) + "\n"
|
||||||
return tmp
|
return tmp
|
||||||
|
@ -25,20 +25,24 @@ from pyFTS.data import TAIEX, SP500, NASDAQ, Malaysia, Enrollments
|
|||||||
from pyFTS.partitioners import Grid
|
from pyFTS.partitioners import Grid
|
||||||
from pyFTS.models import pwfts, tsaur
|
from pyFTS.models import pwfts, tsaur
|
||||||
|
|
||||||
train = TAIEX.get_data()[:1000]
|
dataset = pd.read_csv('/home/petronio/Downloads/kalang.csv', sep=',')
|
||||||
test = TAIEX.get_data()[1000:1200]
|
|
||||||
|
|
||||||
partitioner = Grid.GridPartitioner(data=train, npart=35)
|
dataset['date'] = pd.to_datetime(dataset["date"], format='%Y-%m-%d %H:%M:%S')
|
||||||
|
|
||||||
#model = pwfts.ProbabilisticWeightedFTS(partitioner=partitioner) #, order=2, lags=[3,4])
|
train_uv = dataset['value'].values[:24505]
|
||||||
model = tsaur.MarkovWeightedFTS(partitioner=partitioner)
|
test_uv = dataset['value'].values[24505:]
|
||||||
model.fit(train)
|
|
||||||
|
partitioner = Grid.GridPartitioner(data=train_uv, npart=35)
|
||||||
|
|
||||||
|
model = pwfts.ProbabilisticWeightedFTS(partitioner=partitioner) #, order=2, lags=[3,4])
|
||||||
|
#model = tsaur.MarkovWeightedFTS(partitioner=partitioner)
|
||||||
|
model.fit(train_uv)
|
||||||
|
|
||||||
from pyFTS.benchmarks import benchmarks as bchmk
|
from pyFTS.benchmarks import benchmarks as bchmk
|
||||||
|
|
||||||
print(model)
|
print(model)
|
||||||
|
|
||||||
print(model.forecast(test))
|
print(model.forecast(test_uv))
|
||||||
|
|
||||||
#distributions = model.predict(y[800:820])
|
#distributions = model.predict(y[800:820])
|
||||||
|
|
||||||
|
@ -18,6 +18,53 @@ from pyFTS.models.multivariate import common, variable, mvfts, cmvfts
|
|||||||
from pyFTS.models.seasonal import partitioner as seasonal
|
from pyFTS.models.seasonal import partitioner as seasonal
|
||||||
from pyFTS.models.seasonal.common import DateTime
|
from pyFTS.models.seasonal.common import DateTime
|
||||||
|
|
||||||
|
dataset = pd.read_csv('/home/petronio/Downloads/kalang.csv', sep=',')
|
||||||
|
|
||||||
|
dataset['date'] = pd.to_datetime(dataset["date"], format='%Y-%m-%d %H:%M:%S')
|
||||||
|
|
||||||
|
train_uv = dataset['value'].values[:24505]
|
||||||
|
test_uv = dataset['value'].values[24505:]
|
||||||
|
|
||||||
|
train_mv = dataset.iloc[:24505]
|
||||||
|
test_mv = dataset.iloc[24505:]
|
||||||
|
|
||||||
|
print(train_mv)
|
||||||
|
|
||||||
|
sp = {'seasonality': DateTime.minute_of_day, 'names': [str(k)+'hs' for k in range(0,24)]}
|
||||||
|
|
||||||
|
vhour = variable.Variable("Hour", data_label="date", partitioner=seasonal.TimeGridPartitioner, npart=24,
|
||||||
|
data=train_mv, partitioner_specific=sp)
|
||||||
|
|
||||||
|
vvalue = variable.Variable("Pollution", data_label="value", alias='value',
|
||||||
|
partitioner=Grid.GridPartitioner, npart=35,
|
||||||
|
data=train_mv)
|
||||||
|
|
||||||
|
parameters = [
|
||||||
|
{},{},
|
||||||
|
{'order':2, 'knn': 1},
|
||||||
|
{'order':2, 'knn': 2},
|
||||||
|
{'order':2, 'knn': 3},
|
||||||
|
]
|
||||||
|
|
||||||
|
for ct, method in enumerate([mvfts.MVFTS, wmvfts.WeightedMVFTS,
|
||||||
|
cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS,cmvfts.ClusteredMVFTS]):
|
||||||
|
print(method)
|
||||||
|
model = method(**parameters[ct])
|
||||||
|
model.shortname += str(ct)
|
||||||
|
model.append_variable(vhour)
|
||||||
|
model.append_variable(vvalue)
|
||||||
|
model.target_variable = vvalue
|
||||||
|
model.fit(train_mv)
|
||||||
|
|
||||||
|
Util.persist_obj(model, model.shortname)
|
||||||
|
|
||||||
|
forecasts = model.predict(test_mv.iloc[:100])
|
||||||
|
|
||||||
|
print(model)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
from pyFTS.data import henon
|
from pyFTS.data import henon
|
||||||
df = henon.get_dataframe(iterations=1000)
|
df = henon.get_dataframe(iterations=1000)
|
||||||
|
|
||||||
@ -48,3 +95,4 @@ ax[1][1].plot(test['y'].values)
|
|||||||
ax[1][1].plot(forecasts['y'].values)
|
ax[1][1].plot(forecasts['y'].values)
|
||||||
|
|
||||||
print(forecasts)
|
print(forecasts)
|
||||||
|
'''
|
@ -13,15 +13,20 @@ import os
|
|||||||
os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3'
|
os.environ['PYSPARK_PYTHON'] = '/usr/bin/python3'
|
||||||
os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3'
|
os.environ['PYSPARK_DRIVER_PYTHON'] = '/usr/bin/python3'
|
||||||
|
|
||||||
conf = SparkConf()
|
|
||||||
conf.setMaster('spark://192.168.0.110:7077')
|
|
||||||
conf.setAppName('pyFTS')
|
|
||||||
|
|
||||||
data = TAIEX.get_data()
|
data = TAIEX.get_data()
|
||||||
|
|
||||||
fs = Grid.GridPartitioner(data=data, npart=50)
|
fs = Grid.GridPartitioner(data=data, npart=50)
|
||||||
|
|
||||||
|
model = hofts.WeightedHighOrderFTS(partitioner=fs, order=2)
|
||||||
|
|
||||||
|
model.fit(data, distributed='spark', url='spark://192.168.0.110:7077')
|
||||||
|
#model.fit(data, distributed='dispy', nodes=['192.168.0.110'])
|
||||||
|
|
||||||
|
print(model)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
def fun(x):
|
def fun(x):
|
||||||
return (x, x % 2)
|
return (x, x % 2)
|
||||||
|
|
||||||
@ -76,7 +81,7 @@ with SparkContext(conf=conf) as sc:
|
|||||||
|
|
||||||
print(model)
|
print(model)
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user