0
我试图使用训练LSTM来表现得像控制器一样。基本这是一个多对多的问题。我有7个输入功能,每个功能都是40个值的序列。我的输出有两个特征,也是40个值的序列。TensorFlow中的多对多LSTM:训练错误不会减少
我有2层。第一层有四个LSTM单元,第二层有两个LSTM单元。代码如下。
该代码运行并产生预期的输出,但我无法减少训练误差(均方误差)。错误只是在前1000个纪元后停止改进。
我试过使用不同的批量大小。但即使批量大小为1,我也会收到很高的错误。我用简单的正弦函数尝试了同一个网络,它正常工作,即错误正在减少。这是因为我的序列长度太大,因此消失梯度问题正在发生。我能做些什么来改善训练错误?
#Specify input and ouput features
Xfeatures = 7 #Number of input features
Yfeatures = 2 #Number of input features
num_steps = 40
# reset everything to rerun in jupyter
tf.reset_default_graph()
# Placeholder for the inputs in a given iteration.
u = tf.placeholder(tf.float32, [train_batch_size,num_steps,Xfeatures])
u_NN = tf.placeholder(tf.float32, [train_batch_size,num_steps,Yfeatures])
with tf.name_scope('Normalization'):
#L2 normalization for input data
Xnorm = tf.nn.l2_normalize(u_opt, 0, epsilon=1e-12, name='Normalize')
lstm1= tf.contrib.rnn.BasicLSTMCell(lstm1_size)
lstm2 = tf.contrib.rnn.BasicLSTMCell(lstm2_size)
stacked_lstm = tf.contrib.rnn.MultiRNNCell([lstm1, lstm2])
print(lstm1.output_size)
print(stacked_lstm.output_size)
LSTM_outputs, states = tf.nn.dynamic_rnn(stacked_lstm, Xnorm, dtype=tf.float32)
#Loss
mean_square_error = tf.losses.mean_squared_error(u_NN,LSTM_outputs)
train_step = tf.train.AdamOptimizer(learning_rate).minimize(mean_square_error)
#Initialization and training session
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
#print(sess.run([LSTM_outputs],feed_dict={u_opt:InputX1}))
print(sess.run([mean_square_error],feed_dict={u_opt:InputX1,u_NN:InputY1}))
for i in range(training_epochs):
sess.run([train_step],feed_dict={u_opt:InputX1,u_NN:InputY1})
if i%display_epoch ==0:
print("Training loss is:",sess.run([mean_square_error],feed_dict={u_opt:InputX1,u_NN:InputY1}),"at itertion:",i)
print(sess.run([mean_square_error],feed_dict={u_opt:InputX1,u_NN:InputY1}))
print(sess.run([LSTM_outputs],feed_dict={u_opt:InputX1}))