pwfts.visualize_distributions

This commit is contained in:
Petrônio Cândido 2018-04-19 16:21:59 -03:00
parent 654ddec218
commit 6adf4710b6
4 changed files with 67 additions and 18 deletions

View File

@ -10,14 +10,21 @@ class FLRG(flg.FLRG):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(FLRG,self).__init__(0,**kwargs) super(FLRG,self).__init__(0,**kwargs)
self.order = kwargs.get('order', 1)
self.LHS = kwargs.get('lhs', {}) self.LHS = kwargs.get('lhs', {})
self.RHS = set() self.RHS = set()
def set_lhs(self, var, set): def set_lhs(self, var, fset):
self.LHS[var] = set if self.order == 1:
self.LHS[var] = fset
else:
if var not in self.LHS:
self.LHS[var] = []
self.LHS[var].append(fset)
def append_rhs(self, set, **kwargs):
self.RHS.add(set) def append_rhs(self, fset, **kwargs):
self.RHS.add(fset)
def get_membership(self, data, variables): def get_membership(self, data, variables):
mvs = [] mvs = []

View File

@ -91,6 +91,8 @@ class MVFTS(fts.FTS):
ndata = self.apply_transformations(data) ndata = self.apply_transformations(data)
self.order = kwargs.get('order',1)
flrs = self.generate_flrs(ndata) flrs = self.generate_flrs(ndata)
self.generate_flrg(flrs) self.generate_flrg(flrs)

View File

@ -517,3 +517,39 @@ class ProbabilisticWeightedFTS(ifts.IntervalFTS):
p = round(self.flrgs[r].frequency_count / self.global_frequency_count, 3) p = round(self.flrgs[r].frequency_count / self.global_frequency_count, 3)
tmp = tmp + "(" + str(p) + ") " + str(self.flrgs[r]) + "\n" tmp = tmp + "(" + str(p) + ") " + str(self.flrgs[r]) + "\n"
return tmp return tmp
def visualize_distributions(model, **kwargs):
import matplotlib.pyplot as plt
from matplotlib import gridspec
import seaborn as sns
ordered_sets = model.partitioner.ordered_sets
ftpg_keys = sorted(model.flrgs.keys(), key=lambda x: model.flrgs[x].get_midpoint(model.sets))
lhs_probs = [model.flrg_lhs_unconditional_probability(model.flrgs[k])
for k in ftpg_keys]
mat = np.zeros((len(ftpg_keys), len(ordered_sets)))
for row, w in enumerate(ftpg_keys):
for col, k in enumerate(ordered_sets):
if k in model.flrgs[w].RHS:
mat[row, col] = model.flrgs[w].rhs_unconditional_probability(k)
size = kwargs.get('size', (5,10))
fig = plt.figure(figsize=size)
gs = gridspec.GridSpec(1, 2, width_ratios=[1, 4])
ax1 = plt.subplot(gs[0])
sns.barplot(x='y', y='x', color='darkblue', data={'x': ftpg_keys, 'y': lhs_probs}, ax=ax1)
ax1.set_ylabel("LHS Probabilities")
ind_sets = range(len(ordered_sets))
ax = plt.subplot(gs[1])
sns.heatmap(mat, cmap='Blues', ax=ax, yticklabels=False)
ax.set_title("RHS probabilities")
ax.set_xticks(ind_sets)
ax.set_xticklabels(ordered_sets)
ax.grid(True)
ax.xaxis.set_tick_params(rotation=90)

View File

@ -6,7 +6,7 @@ from pyFTS.common import Transformations
from pyFTS.data import SONDA from pyFTS.data import SONDA
df = SONDA.get_dataframe() df = SONDA.get_dataframe()
train = df.iloc[0:1578241] #three years train = df.iloc[0:578241] #three years
#test = df.iloc[1572480:2096640] #ears #test = df.iloc[1572480:2096640] #ears
del df del df
@ -26,36 +26,40 @@ model = mvfts.MVFTS("")
fig, axes = plt.subplots(nrows=5, ncols=1,figsize=[15,10]) fig, axes = plt.subplots(nrows=5, ncols=1,figsize=[15,10])
sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul', 'Aug','Sep','Oct','Nov','Dec']} sp = {'seasonality': DateTime.day_of_year , 'names': ['Jan','Feb','Mar','Apr','May','Jun','Jul', 'Aug','Sep','Oct','Nov','Dec']}
vmonth = variable.Variable("Month", data_label="datahora", partitioner=seasonal.TimeGridPartitioner, npart=12, vmonth = variable.Variable("Month", data_label="datahora", partitioner=seasonal.TimeGridPartitioner, npart=12,
data=train, partitioner_specific=sp) data=train, partitioner_specific=sp)
model.append_variable(vmonth) vmonth.partitioner.plot(axes[0])
sp = {'seasonality': DateTime.minute_of_day} sp = {'seasonality': DateTime.minute_of_day}
vhour = variable.Variable("Hour", data_label="datahora", partitioner=seasonal.TimeGridPartitioner, npart=24, vhour = variable.Variable("Hour", data_label="datahora", partitioner=seasonal.TimeGridPartitioner, npart=24,
data=train, partitioner_specific=sp) data=train, partitioner_specific=sp)
model.append_variable(vhour)
vhumid = variable.Variable("Humidity", data_label="humid", partitioner=Grid.GridPartitioner, npart=np, data=train, vhour.partitioner.plot(axes[1])
transformation=tdiff)
model.append_variable(vhumid)
vpress = variable.Variable("AtmPress", data_label="press", partitioner=Grid.GridPartitioner, npart=np, data=train, vavg = variable.Variable("Radiance", data_label="glo_avg", partitioner=Grid.GridPartitioner, npart=30,
transformation=tdiff) data=train)
model.append_variable(vpress)
vrain = variable.Variable("Rain", data_label="rain", partitioner=Grid.GridPartitioner, npart=20, data=train)#train) model1 = mvfts.MVFTS("")
model.append_variable(vrain)
model.target_variable = vrain model1.append_variable(vmonth)
model1.append_variable(vhour)
model1.append_variable(vavg)
model1.target_variable = vavg
#model1.fit(train, num_batches=60, save=True, batch_save=True, file_path='mvfts_sonda')
#model.fit(train, num_batches=60, save=True, batch_save=True, file_path='mvfts_sonda') #model.fit(train, num_batches=60, save=True, batch_save=True, file_path='mvfts_sonda')
model.fit(train, num_batches=200, save=True, batch_save=True, file_path='mvfts_sonda', distributed=True, model1.fit(train, num_batches=200, save=True, batch_save=True, file_path='mvfts_sonda', distributed=True,
nodes=['192.168.1.22'], batch_save_interval=10) nodes=['192.168.1.35'], batch_save_interval=10)
#model = Util.load_obj('mvfts_sonda') #model = Util.load_obj('mvfts_sonda')