0
我尝试学习一个网络,但始终得到零点渐变。我真的很困惑,我没有任何想法。渐变是零
我有格式输入数据(的batch_size,120,10,3)和后六层(CONV1 - POOL1 - CONV2 - POOL2 -fc1 - FC2)我期望大小为1x1的输出(0或1)。所有这一切真的很好。
但是,当我尝试学习网络时,我遇到了困难。我总是得到一个零梯度。我做错了什么?
import tensorflow as tf
import data_collection as dc
INPUT_HEIGHT = 120
INPUT_WIDTH = 10
INPUT_DEPTH = 3
KERNEL_HEIGHT = 5
KERNEL_WIDTH = 5
KERNEL_1_IN_CHANNEL = 3
KERNEL_1_OUT_CHANNEL = 32
KERNEL_2_OUT_CHANNEL = 64
FULLY_CONNECTED_1_OUTPUTS = 1024
FULLY_CONNECTED_2_OUTPUTS = 1
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
def max_pool_2x1(x):
return tf.nn.max_pool(x, ksize=[1, 2, 1, 1],
strides=[1, 2, 1, 1], padding='SAME')
if __name__ == '__main__':
# Placeholder
x = tf.placeholder(tf.float32, [None, INPUT_HEIGHT, INPUT_WIDTH, INPUT_DEPTH])
y_ = tf.placeholder(tf.float32, [None, 1])
# First layer - convolution
W_conv1 = weight_variable([KERNEL_HEIGHT, KERNEL_WIDTH, KERNEL_1_IN_CHANNEL, KERNEL_1_OUT_CHANNEL])
b_conv1 = bias_variable([KERNEL_1_OUT_CHANNEL])
h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1)
# Second layer - 2x2 pooling
h_pool1 = max_pool_2x2(h_conv1)
# Third layer - convolution
W_conv2 = weight_variable([KERNEL_HEIGHT, KERNEL_WIDTH, KERNEL_1_OUT_CHANNEL, KERNEL_2_OUT_CHANNEL])
b_conv2 = bias_variable([KERNEL_2_OUT_CHANNEL])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
# Fourth layer - 2x1 pooling
h_pool2 = max_pool_2x1(h_conv2)
# Fifth layer - fully connected layer (30*5*64) -> (1024)
W_fc1 = weight_variable([30 * 5 * KERNEL_2_OUT_CHANNEL, FULLY_CONNECTED_1_OUTPUTS])
b_fc1 = bias_variable([FULLY_CONNECTED_1_OUTPUTS])
h_pool2_flat = tf.reshape(h_pool2, [-1, 30 * 5 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# Sixth layer - fully connected layer (1024) -> (1)
W_fc2 = weight_variable([FULLY_CONNECTED_1_OUTPUTS, FULLY_CONNECTED_2_OUTPUTS])
b_fc2 = bias_variable([FULLY_CONNECTED_2_OUTPUTS])
y_conv = tf.nn.sigmoid(tf.matmul(h_fc1, W_fc2) + b_fc2)
# Training
cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(targets=y_, logits=y_conv))
optimizer = tf.train.GradientDescentOptimizer(1e-8)
gvs = optimizer.compute_gradients(cross_entropy)
train_step = optimizer.apply_gradients(gvs)
correct_prediction = tf.equal(tf.round(y_conv), y_)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
for i in range(200):
batch_xs, batch_ys = dc.get_train_data(), dc.get_train_labels()
if i % 100 == 0:
train_accuracy = accuracy.eval(session=sess, feed_dict={x: batch_xs, y_: batch_ys})
print("step %d, training accuracy %.3f" % (i, train_accuracy))
print("Y_conv_train is " + str(
sess.run(tf.matmul(h_fc1, W_fc2) + b_fc2, feed_dict={x: batch_xs, y_: batch_ys})))
test_accuracy = accuracy.eval(session=sess, feed_dict={x: dc.get_test_data(), y_: dc.get_test_labels()})
print("step %d, test accuracy %.3f" % (i, test_accuracy))
print("Y_conv_test is " + str(sess.run(tf.matmul(h_fc1, W_fc2) + b_fc2, feed_dict={x: dc.get_test_data(),
y_: dc.get_test_labels()})))
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
因此,我所有的时间都有相同的输出。
step 0, training accuracy 0.500
Y_conv_train is [[ -35.52193451]
[-252.8659668 ]]
step 0, test accuracy 0.000
Y_conv_test is [[ 139.66842651]]
step 100, training accuracy 0.500
Y_conv_train is [[ -35.52193451]
[-252.8659668 ]]
step 100, test accuracy 0.000
Y_conv_test is [[ 139.66842651]]
UPDATE! 问题已解决。我忘记了标准化的数据。
我试图改变学习速度,但它没有帮助我。我仍然得到零梯度。 – Vladimir
这是什么输出? var_grad = tf.gradients(cross_entropy,[W_fc2])[0]然后你sess.run(var_grad)这会告诉你该变量的渐变。 – Steven
我调试它,梯度为零。我特别没有粘贴渐变变量的输出列表,因为它们的尺寸非常大。此输出显示没有sigmoid函数的y_conv。如果权重将被更新,y_conv也会更新。但是,它不会发生。 – Vladimir