Keras列车局部模型问题（关于GAN模型）

当使用keras实现GAN模型时，我遇到了一个奇怪的问题。Keras列车局部模型问题（关于GAN模型）

与赣我们需要建立G和d，然后再添加一个新的顺序模型（GAN），并添加（G），加（d）顺序之后。

当我做D.train_on_batch，我得到了一个InvalidArgumentError: You must feed a value for placeholder tensor 'dense_input_1' with dtype float Keras似乎支持G（通过GAN模型）。

如果我删除GAN model（最后堆叠的G然后是D顺序模型），它会正确计算d_loss。

我的环境是：

的Ubuntu 16.04
keras 1.2.2
tensorflow-GPU 1.0.0
keras配置：{ "backend": "tensorflow", "image_dim_ordering": "tf", "epsilon": 1e-07, "floatx": "float32" }

我知道相当多的人们用keras成功地实施了GAN，所以我想知道我错在哪里。

import numpy as np 
import keras.layers as kl 
import keras.models as km 
import keras.optimizers as ko 
from keras.datasets import mnist 

batch_size = 16 
lr = 0.0001 

def noise_gen(batch_size, z_dim): 
    noise = np.zeros((batch_size, z_dim), dtype=np.float32) 
    for i in range(batch_size): 
     noise[i, :] = np.random.uniform(-1, 1, z_dim) 
    return noise 

# --------------------Generator Model-------------------- 

model = km.Sequential() 

model.add(kl.Dense(input_dim=100, output_dim=1024)) 
model.add(kl.Activation('relu')) 

model.add(kl.Dense(7*7*128)) 
model.add(kl.BatchNormalization()) 
model.add(kl.Activation('relu')) 
model.add(kl.Reshape((7, 7, 128), input_shape=(7*7*128,))) 

model.add(kl.Deconvolution2D(64, 5, 5, (None, 14, 14, 64), subsample=(2, 2), 
    input_shape=(7, 7, 128), border_mode='same')) 
model.add(kl.BatchNormalization()) 
model.add(kl.Activation('relu')) 

model.add(kl.Deconvolution2D(1, 5, 5, (None, 28, 28, 1), subsample=(2, 2), 
    input_shape=(14, 14, 64), border_mode='same')) 

G = model 
G.compile( loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True)) 

# --------------------Discriminator Model-------------------- 

model = km.Sequential() 

model.add(kl.Convolution2D(64, 5, 5, subsample=(2, 2), input_shape=(28, 28, 1))) 
model.add(kl.LeakyReLU(alpha=0.2)) 

model.add(kl.Convolution2D(128, 5, 5, subsample=(2, 2))) 
model.add(kl.BatchNormalization()) 
model.add(kl.LeakyReLU(alpha=0.2)) 

model.add(kl.Flatten()) 
model.add(kl.Dense(1)) 
model.add(kl.Activation('sigmoid')) 

D = model 
D.compile( loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True)) 

# --------------------GAN Model-------------------- 

model = km.Sequential() 
model.add(G) 
D.trainable = False # Is this necessary? 
model.add(D) 
GAN = model 
GAN.compile(loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True)) 

# --------------------Main Code-------------------- 
(X, _), _ = mnist.load_data() 
X = X/255. 
X = X[:, :, :, np.newaxis] 

X_batch = X[0:batch_size, :] 
Z1_batch = noise_gen(batch_size, 100) 
Z2_batch = noise_gen(batch_size, 100) 

fake_batch = G.predict(Z1_batch) 
real_batch = X_batch 
print('--------------------Fake Image Generated!--------------------') 

combined_X_batch = np.concatenate((real_batch, fake_batch)) 
combined_y_batch = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1)))) 
print('real_batch={}, fake_batch={}'.format(real_batch.shape, fake_batch.shape)) 

D.trainable = True 
d_loss = D.train_on_batch(combined_X_batch, combined_y_batch) 
print('--------------------Discriminator trained!--------------------') 
print(d_loss) 

D.trainable = False 
g_loss = GAN.train_on_batch(Z2_batch, np.ones((batch_size, 1))) 
print('--------------------GAN trained!--------------------') 
print(g_loss)

错误消息：

W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float 
    [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]] 
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float 
    [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]] 
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float 
    [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]] 
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float 
    [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]] 
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float 
    [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]] 
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float 
    [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]] 
Traceback (most recent call last): 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1022, in _do_call 
    return fn(*args) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1004, in _run_fn 
    status, run_metadata) 
    File "/usr/lib/python3.5/contextlib.py", line 66, in __exit__ 
    next(self.gen) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/errors_impl.py", line 469, in raise_exception_on_not_ok_status 
    pywrap_tensorflow.TF_GetCode(status)) 
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'dense_input_1' with dtype float 
    [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]] 
    [[Node: mul_5/_77 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_1018_mul_5", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]] 

During handling of the above exception, another exception occurred: 

Traceback (most recent call last): 
    File "./gen.py", line 84, in <module> 
    d_loss = D.train_on_batch(combined_X_batch, combined_y_batch) 
    File "/usr/local/lib/python3.5/dist-packages/keras/models.py", line 766, in train_on_batch 
    class_weight=class_weight) 
    File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 1320, in train_on_batch 
    outputs = self.train_function(ins) 
    File "/usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py", line 1943, in __call__ 
    feed_dict=feed_dict) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 767, in run 
    run_metadata_ptr) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 965, in _run 
    feed_dict_string, options, run_metadata) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1015, in _do_run 
    target_list, options, run_metadata) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1035, in _do_call 
    raise type(e)(node_def, op, message) 
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'dense_input_1' with dtype float 
    [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]] 
    [[Node: mul_5/_77 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_1018_mul_5", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]] 

Caused by op 'dense_input_1', defined at: 
    File "./gen.py", line 20, in <module> 
    model.add(kl.Dense(input_dim=100, output_dim=1024)) 
    File "/usr/local/lib/python3.5/dist-packages/keras/models.py", line 299, in add 
    layer.create_input_layer(batch_input_shape, input_dtype) 
    File "/usr/local/lib/python3.5/dist-packages/keras/engine/topology.py", line 397, in create_input_layer 
    dtype=input_dtype, name=name) 
    File "/usr/local/lib/python3.5/dist-packages/keras/engine/topology.py", line 1198, in Input 
    input_tensor=tensor) 
    File "/usr/local/lib/python3.5/dist-packages/keras/engine/topology.py", line 1116, in __init__ 
    name=self.name) 
    File "/usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py", line 321, in placeholder 
    x = tf.placeholder(dtype, shape=shape, name=name) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_ops.py", line 1520, in placeholder 
    name=name) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 2149, in _placeholder 
    name=name) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op 
    op_def=op_def) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 2395, in create_op 
    original_op=self._default_original_op, op_def=op_def) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1264, in __init__ 
    self._traceback = _extract_stack() 

InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'dense_input_1' with dtype float 
    [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]] 
    [[Node: mul_5/_77 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_1018_mul_5", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

来源

2017-02-23 Adam

任何反馈的答案吗？这有帮助吗？ –

@NassimBen对不起，回复晚了。我试图弄清楚问题到底是什么，我终于明白了。 – Adam

很酷，你能和我们分享吗？总是感兴趣:) –

首先，我会建议你切换到功能API模型。这些混合模型更容易被功能模型处理。

我不知道为什么你的解决方案没有奏效被honnest，似乎当你的d模型链接到一个新的输入一样，它得到的“破坏”善良，被链接到它。我发现这个问题的方法是定义图层并将它们用于Discriminator和GAN模型。这里是代码：

import numpy as np 
from keras.layers import * 
import keras.models as km 
import keras.optimizers as ko 
from keras.datasets import mnist 

batch_size = 16 
lr = 0.0001 

def noise_gen(batch_size, z_dim): 
    noise = np.zeros((batch_size, z_dim), dtype=np.float32) 
    for i in range(batch_size): 
     noise[i, :] = np.random.uniform(-1, 1, z_dim) 
    return noise 

# Changes the traiable argument for all the layers of model 
# to the boolean argument "trainable" 
def make_trainable(model, trainable): 
    model.trainable = trainable 
    for l in model.layers: 
     l.trainable = trainable 

# --------------------Generator Model-------------------- 

g_input = Input(shape=(100,)) 

g_hidden = Dense(1024, activation='relu')(g_input) 
g_hidden = Dense(7*7*128, activation='relu')(g_hidden) 
g_hidden = BatchNormalization()(g_hidden) 
g_hidden = Reshape((7,7,128))(g_hidden) 

g_hidden = Deconvolution2D(64,5,5, (None, 14, 14, 64), subsample=(2,2), 
     border_mode='same', activation='relu')(g_hidden) 
g_hidden = BatchNormalization()(g_hidden) 
g_output = Deconvolution2D(1,5,5, (None, 28, 28, 1), subsample=(2,2), 
     border_mode='same')(g_hidden) 

G = km.Model(input=g_input,output=g_output) 
G.compile(loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True)) 
G.summary() 

# --------------------Discriminator Model-------------------- 

d_input = Input(shape=(28,28,1)) 

d_l1 = Convolution2D(64,5,5, subsample=(2,2)) 
d_hidden_1 = d_l1(d_input) 
d_l2 = LeakyReLU(alpha=0.2) 
d_hidden_2 = d_l2(d_hidden_1) 

d_l3 = Convolution2D(128,5,5, subsample=(2,2)) 
d_hidden_3 = d_l3(d_hidden_2) 
d_l4 = BatchNormalization() 
d_hidden_4 = d_l4(d_hidden_3) 
d_l5 = LeakyReLU(alpha=0.2) 
d_hidden_5 = d_l5(d_hidden_4) 

d_l6 = Flatten() 
d_hidden_6 = d_l6(d_hidden_5) 
d_l7 = Dense(1, activation='sigmoid') 
d_output = d_l7(d_hidden_6) 

D = km.Model(input=d_input,output=d_output) 
D.compile(loss='binary_crossentropy',optimizer=ko.SGD(lr=lr,momentum=0.9, nesterov=True)) 
D.summary() 

# --------------------GAN Model-------------------- 
make_trainable(D,False) 

gan_input = Input(shape=(100,)) 
gan_hidden = G(gan_input) 
gan_hidden = d_l1(gan_hidden) 
gan_hidden = d_l2(gan_hidden) 
gan_hidden = d_l3(gan_hidden) 
gan_hidden = d_l4(gan_hidden) 
gan_hidden = d_l5(gan_hidden) 
gan_hidden = d_l6(gan_hidden) 
gan_output = d_l7(gan_hidden) 

GAN = km.Model(input=gan_input,output=gan_output) 
GAN.compile(loss='binary_crossentropy',optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True)) 
GAN.summary() 

# --------------------Main Code-------------------- 
(X, _), _ = mnist.load_data() 
X = X/255. 
X = X[:, :, :, np.newaxis] 

X_batch = X[0:batch_size, :] 
Z1_batch = noise_gen(batch_size, 100) 
Z2_batch = noise_gen(batch_size, 100) 

print(type(X_batch),X_batch.shape) 
print(type(Z1_batch),Z1_batch.shape) 

fake_batch = G.predict(Z1_batch) 
real_batch = X_batch 
print('--------------------Fake Image Generated!--------------------') 

combined_X_batch = np.concatenate((real_batch, fake_batch)) 
combined_y_batch = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1)))) 
print('real_batch={}, fake_batch={}'.format(real_batch.shape, fake_batch.shape)) 
print(type(combined_X_batch),combined_X_batch.dtype,combined_X_batch.shape) 
print(type(combined_y_batch),combined_y_batch.dtype,combined_y_batch.shape) 
make_trainable(D,True) 
d_loss = D.train_on_batch(combined_X_batch, combined_y_batch) 
print('--------------------Discriminator trained!--------------------') 
print(d_loss) 

make_trainable(D,False) 
g_loss = GAN.train_on_batch(Z2_batch, np.ones((batch_size, 1))) 
print('--------------------GAN trained!--------------------') 
print(g_loss)

这有帮助吗？

来源

2017-02-26 16:00:28

争取相当长一段时间后，我终于得到它，它是鉴别的BatchNormalization层导致的问题。

如果你只是注释掉鉴的model.add(kl.BatchNormalization())。它会正常工作。

然而，如图@NassimBen，功能API不引起任何问题。

import numpy as np 
import keras.layers as kl 
import keras.models as km 
import keras.optimizers as ko 
from keras.datasets import mnist 

batch_size = 16 
lr = 0.0001 

def noise_gen(batch_size, z_dim): 
    noise = np.zeros((batch_size, z_dim), dtype=np.float32) 
    for i in range(batch_size): 
     noise[i, :] = np.random.uniform(-1, 1, z_dim) 
    return noise 

# --------------------Generator Model-------------------- 

model = km.Sequential() 

model.add(kl.Dense(input_dim=100, output_dim=1024)) 
model.add(kl.Activation('relu')) 

model.add(kl.Dense(7*7*128)) 
model.add(kl.BatchNormalization()) 
model.add(kl.Activation('relu')) 
model.add(kl.Reshape((7, 7, 128), input_shape=(7*7*128,))) 

model.add(kl.Deconvolution2D(64, 5, 5, (None, 14, 14, 64), subsample=(2, 2), 
          input_shape=(7, 7, 128), border_mode='same')) 
model.add(kl.BatchNormalization()) 
model.add(kl.Activation('relu')) 

model.add(kl.Deconvolution2D(1, 5, 5, (None, 28, 28, 1), subsample=(2, 2), 
          input_shape=(14, 14, 64), border_mode='same')) 

G = model 
G.compile( loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True)) 

# --------------------Discriminator Model-------------------- 

model = km.Sequential() 

model.add(kl.Convolution2D(64, 5, 5, subsample=(2, 2), input_shape=(28, 28, 1))) 
model.add(kl.LeakyReLU(alpha=0.2)) 

model.add(kl.Convolution2D(128, 5, 5, subsample=(2, 2))) 
# model.add(kl.BatchNormalization()) 
model.add(kl.LeakyReLU(alpha=0.2)) 

model.add(kl.Flatten()) 
model.add(kl.Dense(1)) 
model.add(kl.Activation('sigmoid')) 

D = model 
D.compile( loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True)) 

# --------------------GAN Model-------------------- 

model = km.Sequential() 
model.add(G) 
D.trainable = False # Is this necessary? 
model.add(D) 
GAN = model 
GAN.compile(loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True)) 

# --------------------Main Code-------------------- 
(X, _), _ = mnist.load_data() 
X = X/255. 
X = X[:, :, :, np.newaxis] 

X_batch = X[0:batch_size, :] 
Z1_batch = noise_gen(batch_size, 100) 
Z2_batch = noise_gen(batch_size, 100) 

fake_batch = G.predict(Z1_batch) 
real_batch = X_batch 
print('--------------------Fake Image Generated!--------------------') 

combined_X_batch = np.concatenate((real_batch, fake_batch)) 
combined_y_batch = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1)))) 
print('real_batch={}, fake_batch={}'.format(real_batch.shape, fake_batch.shape)) 

D.trainable = True 
d_loss = D.train_on_batch(combined_X_batch, combined_y_batch) 
print('--------------------Discriminator trained!--------------------') 
print(d_loss) 

D.trainable = False 
g_loss = GAN.train_on_batch(Z2_batch, np.ones((batch_size, 1))) 
print('--------------------GAN trained!--------------------') 
print(g_loss)

来源

2017-02-28 06:19:03 Adam

很酷，你追踪它！感谢分享，这是一个有趣的现象，我会调查 –

Thx。如果您有任何进展，请让我知道（并可能更新答案）。我将首先使用功能性API。 – Adam

一定会做:) –

Keras列车局部模型问题（关于GAN模型）

回答

相关问题