0

Tensorflow渐变对于位于第一个conv层后的conv层始终为零。我试过不同的方法来检查,但渐变总是零!这是可以运行检查的小型可重复代码。Tensorflow渐变始终为零

from __future__ import absolute_import 
from __future__ import division 
from __future__ import print_function 

import tensorflow as tf 
import numpy as np 
import math 
import os 
import random 
import tflearn 
batch_size = 100 
start = 0 
end = batch_size 
learning_rate = 0.000001 
num_classes = 4 
time_steps = 4 
embedding = 2 
step = 1 
_units = 500 
num_of_filters = 1000 

train_set_x = [[[1,2],[3,4],[5,6],[7,8]],[[1,2],[3,4],[5,6],[7,8]]] 
train_set_y = [0,1] 

X = tf.placeholder(tf.float32, [None,time_steps,embedding]) 
Y = tf.placeholder(tf.int32, [None]) 


x = tf.expand_dims(X,3) 

filter_shape = [1, embedding, 1, num_of_filters] 
conv_weights = tf.get_variable("conv_weights1" , filter_shape, tf.float32, tf.contrib.layers.xavier_initializer()) 
conv_biases = tf.Variable(tf.constant(0.1, shape=[num_of_filters])) 
conv = tf.nn.conv2d(x, conv_weights, strides=[1,1,1,1], padding = "VALID") 
normalize = conv + conv_biases 
tf_normalize = tflearn.layers.normalization.batch_normalization(normalize) 
relu = tf.nn.elu(tf_normalize) 
pooling = tf.reduce_max(relu, reduction_indices = 3, keep_dims = True) 
outputs_fed_lstm = pooling 

filter_shape2 = [1, 1, 1, num_of_filters] 
conv_weights2 = tf.get_variable("conv_weights2" , filter_shape2, tf.float32, tf.contrib.layers.xavier_initializer()) 
conv_biases2 = tf.Variable(tf.constant(0.1, shape=[num_of_filters])) 
conv2 = tf.nn.conv2d(outputs_fed_lstm, conv_weights2, strides=[1,1,1,1], padding = "VALID") 
normalize2 = conv2 + conv_biases2 
tf_normalize2 = tflearn.layers.normalization.batch_normalization(normalize2) 
relu2 = tf.nn.elu(tf_normalize2) 
pooling2 = tf.reduce_max(relu2, reduction_indices = 3, keep_dims = True) 
outputs_fed_lstm2 = pooling2 

x = tf.squeeze(outputs_fed_lstm2, [2])  
x = tf.transpose(x, [1, 0, 2]) 
x = tf.reshape(x, [-1, 1]) 
x = tf.split(0, time_steps, x) 

lstm = tf.nn.rnn_cell.LSTMCell(num_units = _units) 

# multi_lstm = tf.nn.rnn_cell.MultiRNNCell([lstm] * lstm_layers, state_is_tuple = True) 

outputs , state = tf.nn.rnn(lstm,x, dtype = tf.float32)  

weights = tf.Variable(tf.random_normal([_units,num_classes])) 
biases = tf.Variable(tf.random_normal([num_classes])) 

logits = tf.matmul(outputs[-1], weights) + biases 



c_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits,Y) 
loss = tf.reduce_mean(c_loss) 


global_step = tf.Variable(0, name="global_step", trainable=False) 
# decayed_learning_rate = tf.train.exponential_decay(learning_rate,0,10000,0.9) 
optimizer= tf.train.AdamOptimizer(learning_rate) 
minimize_loss = optimizer.minimize(loss, global_step=global_step) 
grads_and_vars = optimizer.compute_gradients(loss,[conv_weights2]) 
correct_predict = tf.nn.in_top_k(logits, Y, 1) 
accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32)) 


init = tf.initialize_all_variables() 

with tf.Session() as sess: 
    sess.run(init) 
    for i in range(1): 
     for j in range(1): 
      x = train_set_x 
      y = train_set_y 
      sess.run(minimize_loss,feed_dict={X : x, Y : y}) 
      step += 1 
      gr_print = sess.run([grad for grad, _ in grads_and_vars], feed_dict={X : x, Y : y}) 
      print (gr_print) 
      cost = sess.run(loss,feed_dict = {X: x,Y: y}) 
      accu = sess.run(accuracy,feed_dict = {X: x, Y: y}) 
      print ("Loss after one Epoch(Training) = " + "{:.6f}".format(cost) + ", Training Accuracy= " + "{:.5f}".format(accu)) 

这里是输出

[array([[[[ 0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 5.21326828, 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  , 
      0.  , 0.  , 0.  , 0.  , 0.  ]]]], dtype=float32)] 

回答

2

计算你是什么样的怪异。让我们来看看在你的模型形状:

  • 输入x[batch_size, 4, 2, 1]
  • 1卷积conv[batch_size, 4, 1, 1000]
  • 1日最大池pooling[batch_size, 4, 1, 1]
  • 第二卷积conv2[batch_size, 4, 1, 1000]
  • 第二个最大池polling2[batch_size, 4, 1, 1]
  • 输入LSTM:[4, batch_size, 1]
  • LSTM的输出:[batch_size, 500]

据我所知,您尝试应用两个一维卷积,然后一个LSTM。然而,第一卷积在尺寸为embedding=2的第三维上。

之后,您应用最大池所有1000个大小的嵌入。你也许应该适用最高池大小4的第二维:

pooling = tf.nn.max_pool(conv, [1, 2, 1, 1], [1, 2, 1, 1], "VALID") 
# pooling has shape [batch_size, 2, 1, 1000] 

关于你梯度的问题,它来自两个最大池。只有1000个输入被通过,所以渐变色的输入999 1 0

这就是为什么你的第一CONV的权重只有2非零梯度,第二CONV权重只有1非零梯度。总而言之,真正的问题是你的架构在这里,你应该先把它重写在一张纸上。

+0

非常感谢!现在我知道问题在哪里。我肯定会投票,但我没有足够的代表!我试图想出一个更好的NLP模型,所以这就是奇怪的原因。 – shader

+0

没问题!调试模型时分析形状通常很有用。一个很好的规则是尽量保持总体尺寸的平稳下降(在4000到4之间,然后到4000) –

+0

谢谢!我还有一个问题,我的网络是这样的,输入5个conv层,然后输入一个RNN。我的问题是如何仅使用conv层来适配我的网络?其余的网络将是相同的。我应该让自己的conv层更宽,还是应该添加更多的conv层以使其更深?我认为更深层次的转换只会导致参数更少,但是使转换层更宽将导致过度配合?你怎么看?通过扩大conv层我的意思是增加更多的过滤器。 – shader