2017-09-02 158 views
0

我想实现seq2seq模型使用Tensorflow 1.3.0文本摘要。ValueError异常:尺寸必须是平等的,但512和256

我试图在编码层使用MultiRNNCellbidirectional_dynamic_rnn。我错过了一些东西,但无法找到它。错误堆栈跟踪不直观,这使得它更难以理解。

我得到以下错误,而建设的图表。

--------------------------------------------------------------------------- 
InvalidArgumentError      Traceback (most recent call last) 
~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/common_shapes.py in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn) 
    653   graph_def_version, node_def_str, input_shapes, input_tensors, 
--> 654   input_tensors_as_shapes, status) 
    655 except errors.InvalidArgumentError as err: 

~/anaconda2/envs/tensorflow/lib/python3.5/contextlib.py in __exit__(self, type, value, traceback) 
    65    try: 
---> 66     next(self.gen) 
    67    except StopIteration: 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/errors_impl.py in raise_exception_on_not_ok_status() 
    465   compat.as_text(pywrap_tensorflow.TF_Message(status)), 
--> 466   pywrap_tensorflow.TF_GetCode(status)) 
    467 finally: 

InvalidArgumentError: Dimensions must be equal, but are 512 and 256 for 'decoding/decoder/while/BasicDecoderStep/decoder/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/mul' (op: 'Mul') with input shapes: [?,512], [?,256]. 

During handling of the above exception, another exception occurred: 

ValueError        Traceback (most recent call last) 
<ipython-input-119-85ee67bc88e5> in <module>() 
     9  # Create the training and inference logits 
    10  training_logits, inference_logits = seq2seq_model(input_,target,embeding_matrix,vocab_to_int,source_seq_length,target_seq_length, 
---> 11     max_target_seq_length,rnn_size,keep_probability,num_layers,batch_size) 
    12 
    13  # Create tensors for the training logits and inference logits 

<ipython-input-114-5ad1bf459bd7> in seq2seq_model(source_input, target_input, embeding_matrix, vocab_to_int, source_sequence_length, target_sequence_length, max_target_length, rnn_size, keep_prob, num_layers, batch_size) 
    15  training_logits, inference_logits = decoding_layer(target_input,encoder_states,embedings, 
    16                 vocab_to_int,rnn_size,target_sequence_length, 
---> 17                 max_target_length,batch_size,num_layers) 
    18 
    19  return training_logits, inference_logits 

<ipython-input-113-c2b4542605d2> in decoding_layer(target_inputs, encoder_state, embedding, vocab_to_int, rnn_size, target_sequence_length, max_target_length, batch_size, num_layers) 
    12 
    13   training_logits = training_decoder(embed,decoder_cell,encoder_state,output_layer, 
---> 14           target_sequence_length,max_target_length) 
    15 
    16 

<ipython-input-117-012bbcdcf997> in training_decoder(dec_embed_input, decoder_cell, encoder_state, output_layer, target_sequence_length, max_target_length) 
    17 
    18  final_outputs, final_state = tf.contrib.seq2seq.dynamic_decode(decoder=decoder,impute_finished=True, 
---> 19              maximum_iterations=max_target_length) 
    20 
    21  return final_outputs 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py in dynamic_decode(decoder, output_time_major, impute_finished, maximum_iterations, parallel_iterations, swap_memory, scope) 
    284   ], 
    285   parallel_iterations=parallel_iterations, 
--> 286   swap_memory=swap_memory) 
    287 
    288  final_outputs_ta = res[1] 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name) 
    2773  context = WhileContext(parallel_iterations, back_prop, swap_memory, name) 
    2774  ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, context) 
-> 2775  result = context.BuildLoop(cond, body, loop_vars, shape_invariants) 
    2776  return result 
    2777 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in BuildLoop(self, pred, body, loop_vars, shape_invariants) 
    2602  self.Enter() 
    2603  original_body_result, exit_vars = self._BuildLoop(
-> 2604   pred, body, original_loop_vars, loop_vars, shape_invariants) 
    2605  finally: 
    2606  self.Exit() 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants) 
    2552   structure=original_loop_vars, 
    2553   flat_sequence=vars_for_body_with_tensor_arrays) 
-> 2554  body_result = body(*packed_vars_for_body) 
    2555  if not nest.is_sequence(body_result): 
    2556  body_result = [body_result] 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py in body(time, outputs_ta, state, inputs, finished, sequence_lengths) 
    232  """ 
    233  (next_outputs, decoder_state, next_inputs, 
--> 234  decoder_finished) = decoder.step(time, inputs, state) 
    235  next_finished = math_ops.logical_or(decoder_finished, finished) 
    236  if maximum_iterations is not None: 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py in step(self, time, inputs, state, name) 
    137  """ 
    138  with ops.name_scope(name, "BasicDecoderStep", (time, inputs, state)): 
--> 139  cell_outputs, cell_state = self._cell(inputs, state) 
    140  if self._output_layer is not None: 
    141   cell_outputs = self._output_layer(cell_outputs) 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope) 
    178  with vs.variable_scope(vs.get_variable_scope(), 
    179        custom_getter=self._rnn_get_variable): 
--> 180   return super(RNNCell, self).__call__(inputs, state) 
    181 
    182 def _rnn_get_variable(self, getter, *args, **kwargs): 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs) 
    448   # Check input assumptions set after layer building, e.g. input shape. 
    449   self._assert_input_compatibility(inputs) 
--> 450   outputs = self.call(inputs, *args, **kwargs) 
    451 
    452   # Apply activity regularization. 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state) 
    936          [-1, cell.state_size]) 
    937   cur_state_pos += cell.state_size 
--> 938   cur_inp, new_state = cell(cur_inp, cur_state) 
    939   new_states.append(new_state) 
    940 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope) 
    772        self._recurrent_input_noise, 
    773        self._input_keep_prob) 
--> 774  output, new_state = self._cell(inputs, state, scope) 
    775  if _should_dropout(self._state_keep_prob): 
    776  new_state = self._dropout(new_state, "state", 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope) 
    178  with vs.variable_scope(vs.get_variable_scope(), 
    179        custom_getter=self._rnn_get_variable): 
--> 180   return super(RNNCell, self).__call__(inputs, state) 
    181 
    182 def _rnn_get_variable(self, getter, *args, **kwargs): 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs) 
    448   # Check input assumptions set after layer building, e.g. input shape. 
    449   self._assert_input_compatibility(inputs) 
--> 450   outputs = self.call(inputs, *args, **kwargs) 
    451 
    452   # Apply activity regularization. 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state) 
    405 
    406  new_c = (
--> 407   c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) 
    408  new_h = self._activation(new_c) * sigmoid(o) 
    409 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py in binary_op_wrapper(x, y) 
    863   else: 
    864    raise 
--> 865  return func(x, y, name=name) 
    866 
    867 def binary_op_wrapper_sparse(sp_x, y): 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py in _mul_dispatch(x, y, name) 
    1086 is_tensor_y = isinstance(y, ops.Tensor) 
    1087 if is_tensor_y: 
-> 1088  return gen_math_ops._mul(x, y, name=name) 
    1089 else: 
    1090  assert isinstance(y, sparse_tensor.SparseTensor) # Case: Dense * Sparse. 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/gen_math_ops.py in _mul(x, y, name) 
    1447  A `Tensor`. Has the same type as `x`. 
    1448 """ 
-> 1449 result = _op_def_lib.apply_op("Mul", x=x, y=y, name=name) 
    1450 return result 
    1451 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py in apply_op(self, op_type_name, name, **keywords) 
    765   op = g.create_op(op_type_name, inputs, output_types, name=scope, 
    766       input_types=input_types, attrs=attr_protos, 
--> 767       op_def=op_def) 
    768   if output_structure: 
    769   outputs = op.outputs 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py in create_op(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_shapes, compute_device) 
    2630      original_op=self._default_original_op, op_def=op_def) 
    2631  if compute_shapes: 
-> 2632  set_shapes_for_outputs(ret) 
    2633  self._add_op(ret) 
    2634  self._record_op_seen_by_control_dependencies(ret) 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py in set_shapes_for_outputs(op) 
    1909  shape_func = _call_cpp_shape_fn_and_require_op 
    1910 
-> 1911 shapes = shape_func(op) 
    1912 if shapes is None: 
    1913  raise RuntimeError(

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py in call_with_requiring(op) 
    1859 
    1860 def call_with_requiring(op): 
-> 1861  return call_cpp_shape_fn(op, require_shape_fn=True) 
    1862 
    1863 _call_cpp_shape_fn_and_require_op = call_with_requiring 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/common_shapes.py in call_cpp_shape_fn(op, require_shape_fn) 
    593  res = _call_cpp_shape_fn_impl(op, input_tensors_needed, 
    594         input_tensors_as_shapes_needed, 
--> 595         require_shape_fn) 
    596  if not isinstance(res, dict): 
    597  # Handles the case where _call_cpp_shape_fn_impl calls unknown_shape(op). 

~/anaconda2/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/common_shapes.py in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn) 
    657  missing_shape_fn = True 
    658  else: 
--> 659  raise ValueError(err.message) 
    660 
    661 if missing_shape_fn: 

ValueError: Dimensions must be equal, but are 512 and 256 for 'decoding/decoder/while/BasicDecoderStep/decoder/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/mul' (op: 'Mul') with input shapes: [?,512], [?,256]. 

我无法理解错误。它试图引用哪个矩阵?请帮助我,我对Tensorflow相当陌生。

回答

1

错误指出,在解码器的LSTM(decoding/decoder/while/BasicDecoderStep/decoder/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/mul)内部存在乘法期间的尺寸不匹配(Mul)。

我的猜测是,你的实现,需要两倍多的细胞解码器LSTM作为编码器LSTM,由于您使用的双向编码器。如果有与256个单元一个LSTM双向编码器,那么结果将有512个单位(如您连接向前和向后LSTM的输出)。目前解码器似乎期望输入256个单元。

+0

你的意思是这样做 tf.contrib.rnn.BasicLSTMCell(2 * rnn_size) –

+0

tf.contrib.rnn.BasicLSTMCell(2 * rnn_size)解决了这个问题。 –

相关问题