2017-03-06 55 views
0

我一直在试图推行标准化的简单版本流与Keras流动,正如本文解释道:https://arxiv.org/pdf/1505.05770.pdf正火的实施Keras

我的问题是,损失总是负无穷大,我可以我不明白我做错了什么。有谁能够帮助我 ?

下面是该过程:

  1. 所述编码器生成大小latent_dim = 100的载体。这些是z_mean, z_log_var, u, b, w

  2. z_meanz_log_var,使用重新参数化特技我可以品尝z_0N(z_mean, z_log_var)

  3. 然后我可以计算log(abs(1+u.T.dot(psi(z_0))))

  4. 然后我可以计算z_1

下面是这四个步骤的代码:

def sampling(args): 
    z_mean, z_log_var = args 

    # sample epsilon according to N(O,I) 
    epsilon = K.random_normal(shape=(batch_size, latent_dim), mean=0., 
           std=epsilon_std) 

    # generate z0 according to N(z_mean, z_log_var) 
    z0 = z_mean + K.exp(z_log_var/2) * epsilon 
    print('z0', z0) 
    return z0 

def logdet_loss(args): 
    z0, w, u, b = args 
    b2 = K.squeeze(b, 1) 
    beta = K.sum(tf.multiply(w, z0), 1) # <w|z0> 
    linear_trans = beta + b2 # <w|z0> + b 

    # change u2 so that the transformation z0->z1 is invertible 
    alpha = K.sum(tf.multiply(w, u), 1) # 
    diag1 = tf.diag(K.softplus(alpha) - 1 - alpha) 
    u2 = u + K.dot(diag1, w)/K.sum(K.square(w)+1e-7) 
    gamma = K.sum(tf.multiply(w,u2), 1) 

    logdet = K.log(K.abs(1 + (1 - K.square(K.tanh(linear_trans)))*gamma) + 1e-6) 

    return logdet 

def transform_z0(args): 
    z0, w, u, b = args 
    b2 = K.squeeze(b, 1) 
    beta = K.sum(tf.multiply(w, z0), 1) 

    # change u2 so that the transformation z0->z1 is invertible 
    alpha = K.sum(tf.multiply(w, u), 1) 
    diag1 = tf.diag(K.softplus(alpha) - 1 - alpha) 
    u2 = u + K.dot(diag1, w)/K.sum(K.square(w)+1e-7) 
    diag2 = tf.diag(K.tanh(beta + b2)) 

    # generate z1 
    z1 = z0 + K.dot(diag2,u2) 
    return z1 

那么这里就是损失(其中logdet定义如上)

def vae_loss(x, x_decoded_mean): 
    xent_loss = K.mean(objectives.categorical_crossentropy(x, x_decoded_mean), -1) 
    ln_q0z0 = K.sum(log_normal2(z0, z_mean, z_log_var, eps=1e-6), -1) 
    ln_pz1 = K.sum(log_stdnormal(z1), -1) 
    result = K.mean(logdet + ln_pz1 + xent_loss - ln_q0z0) 
    return result 
+0

看来潜在变量有快速增加的规范,在第一个时代之后它已经超过1e6 – sbaur

回答

0

,因为我无法使它工作,我曾试图实施this论文中描述的规范化流程:改进变推理 与逆自回归流动

但是我仍然满足岔开了同样的问题损失(趋于-infinity),这是没有意义的。我的实施必须有问题。

以下是重要的部分:

# the encoder 
h = encoder_block(x) # a convnet taking proteins as input (matrices of size 400x22), I don't describe it since it isn't very important 
z_log_var = Dense(latent_dim)(h) 
z_mean = Dense(latent_dim)(h) 
h_ = Dense(latent_dim)(h) 
encoder = Model(x, [z_mean,z_log_var, h_]) 

# the latent variables (only one transformation to keep it simple) 
latent_input = Input(shape=(latent_dim, 2), batch_shape=(batch_size, latent_dim, 2)) 
hl = Convolution1D(1, filter_length, activation="relu", border_mode="same")(latent_input) 
hl = Reshape((latent_dim,))(hl) 
mean_1 = Dense(latent_dim)(hl) 
std_1 = Dense(latent_dim)(hl) 
latent_model = Model(latent_input, [mean_1, std_1]) 

# the decoder 
decoder_input = Input((latent_dim,), batch_shape=(batch_size, latent_dim)) 
decoder=decoder_block() # a convnet that I don't describe 
x_decoded_mean = decoder(decoder_input) 
generator = Model(decoder_input, x_decoded_mean) 

# the VAE 
z_mean, z_log_var, other = encoder(vae_input) 
eps = Lambda(sample_eps, name='sample_eps')([z_mean, z_log_var, other]) 
z0 = Lambda(sample_z0, name='sample_z0')([z_mean, z_log_var, eps]) 
l = Lambda(sample_l, name='sample_l')([eps, z_log_var]) 
mean, std = latent_model(merge([Reshape((latent_dim,1))(z0), Reshape((latent_dim,1))(other)], mode="concat", concat_axis=-1)) 
z = Lambda(transform_z0)([z0, mean, std]) 
l = Lambda(transform_l)([l, std]) 
x_decoded_mean = generator(z) 
vae = Model(vae_input, x_decoded_mean) 

# and here is the loss 
def vae_loss(x, x_decoded_mean): 
    xent_loss = K.mean(objectives.categorical_crossentropy(x, x_decoded_mean), -1) 
    ln_q0z0 = K.sum(log_normal2(z0, z_mean, z_log_var), -1) 
    ln_pz1 = K.sum(log_stdnormal(z), -1) 
    result = K.mean(l + ln_pz1 + xent_loss - ln_q0z0) 
    return result 

下面是utils的功能我在Lambda层以上使用:

def sample_eps(args): 

    # sample epsilon according to N(O,I) 
    epsilon = K.random_normal(shape=(batch_size, latent_dim), mean=0., 
           std=epsilon_std) 

    return epsilon 

def sample_z0(args): 
    z_mean, z_log_var, epsilon = args 
    # generate z0 according to N(z_mean, z_log_var) 
    z0 = z_mean + K.exp(z_log_var/2) * epsilon 
    return z0 

def sample_l(args): 
    epsilon, z_log_var = args 
    l = -0.5*K.sum(z_log_var + epsilon**2 + K.log(2*math.pi), -1) 
    return l 

def transform_z0(args): 
    z0, mean, std = args 
    z = z0 
    sig_std = K.sigmoid(std) 
    z *= sig_std 
    z += (1-sig_std)*mean 
    return z 

def transform_l(args): 
    l, std = args 
    sig_std = K.sigmoid(std) 
    l -= K.sum(K.log(sig_std+1e-8), -1) 
    return l