2017-02-23 118 views
2

当使用keras实现GAN模型时,我遇到了一个奇怪的问题。Keras列车局部模型问题(关于GAN模型)

与赣我们需要建立G和d,然后再添加一个新的顺序模型(GAN),并添加(G),加(d)顺序之后。

当我做D.train_on_batch,我得到了一个InvalidArgumentError: You must feed a value for placeholder tensor 'dense_input_1' with dtype float Keras似乎支持G(通过GAN模型)。

如果我删除GAN model(最后堆叠的G然后是D顺序模型),它会正确计算d_loss

我的环境是:

  • 的Ubuntu 16.04
  • keras 1.2.2
  • tensorflow-GPU 1.0.0
  • keras配置:{ "backend": "tensorflow", "image_dim_ordering": "tf", "epsilon": 1e-07, "floatx": "float32" }

我知道相当多的人们用keras成功地实施了GAN,所以我想知道我错在哪里。

import numpy as np 
import keras.layers as kl 
import keras.models as km 
import keras.optimizers as ko 
from keras.datasets import mnist 

batch_size = 16 
lr = 0.0001 

def noise_gen(batch_size, z_dim): 
    noise = np.zeros((batch_size, z_dim), dtype=np.float32) 
    for i in range(batch_size): 
     noise[i, :] = np.random.uniform(-1, 1, z_dim) 
    return noise 

# --------------------Generator Model-------------------- 

model = km.Sequential() 

model.add(kl.Dense(input_dim=100, output_dim=1024)) 
model.add(kl.Activation('relu')) 

model.add(kl.Dense(7*7*128)) 
model.add(kl.BatchNormalization()) 
model.add(kl.Activation('relu')) 
model.add(kl.Reshape((7, 7, 128), input_shape=(7*7*128,))) 

model.add(kl.Deconvolution2D(64, 5, 5, (None, 14, 14, 64), subsample=(2, 2), 
    input_shape=(7, 7, 128), border_mode='same')) 
model.add(kl.BatchNormalization()) 
model.add(kl.Activation('relu')) 

model.add(kl.Deconvolution2D(1, 5, 5, (None, 28, 28, 1), subsample=(2, 2), 
    input_shape=(14, 14, 64), border_mode='same')) 

G = model 
G.compile( loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True)) 

# --------------------Discriminator Model-------------------- 

model = km.Sequential() 

model.add(kl.Convolution2D(64, 5, 5, subsample=(2, 2), input_shape=(28, 28, 1))) 
model.add(kl.LeakyReLU(alpha=0.2)) 

model.add(kl.Convolution2D(128, 5, 5, subsample=(2, 2))) 
model.add(kl.BatchNormalization()) 
model.add(kl.LeakyReLU(alpha=0.2)) 

model.add(kl.Flatten()) 
model.add(kl.Dense(1)) 
model.add(kl.Activation('sigmoid')) 

D = model 
D.compile( loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True)) 

# --------------------GAN Model-------------------- 

model = km.Sequential() 
model.add(G) 
D.trainable = False # Is this necessary? 
model.add(D) 
GAN = model 
GAN.compile(loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True)) 

# --------------------Main Code-------------------- 
(X, _), _ = mnist.load_data() 
X = X/255. 
X = X[:, :, :, np.newaxis] 

X_batch = X[0:batch_size, :] 
Z1_batch = noise_gen(batch_size, 100) 
Z2_batch = noise_gen(batch_size, 100) 

fake_batch = G.predict(Z1_batch) 
real_batch = X_batch 
print('--------------------Fake Image Generated!--------------------') 

combined_X_batch = np.concatenate((real_batch, fake_batch)) 
combined_y_batch = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1)))) 
print('real_batch={}, fake_batch={}'.format(real_batch.shape, fake_batch.shape)) 

D.trainable = True 
d_loss = D.train_on_batch(combined_X_batch, combined_y_batch) 
print('--------------------Discriminator trained!--------------------') 
print(d_loss) 

D.trainable = False 
g_loss = GAN.train_on_batch(Z2_batch, np.ones((batch_size, 1))) 
print('--------------------GAN trained!--------------------') 
print(g_loss) 

错误消息:

W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float 
    [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]] 
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float 
    [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]] 
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float 
    [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]] 
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float 
    [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]] 
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float 
    [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]] 
W tensorflow/core/framework/op_kernel.cc:993] Invalid argument: You must feed a value for placeholder tensor 'dense_input_1' with dtype float 
    [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]] 
Traceback (most recent call last): 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1022, in _do_call 
    return fn(*args) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1004, in _run_fn 
    status, run_metadata) 
    File "/usr/lib/python3.5/contextlib.py", line 66, in __exit__ 
    next(self.gen) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/errors_impl.py", line 469, in raise_exception_on_not_ok_status 
    pywrap_tensorflow.TF_GetCode(status)) 
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'dense_input_1' with dtype float 
    [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]] 
    [[Node: mul_5/_77 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_1018_mul_5", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]] 

During handling of the above exception, another exception occurred: 

Traceback (most recent call last): 
    File "./gen.py", line 84, in <module> 
    d_loss = D.train_on_batch(combined_X_batch, combined_y_batch) 
    File "/usr/local/lib/python3.5/dist-packages/keras/models.py", line 766, in train_on_batch 
    class_weight=class_weight) 
    File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 1320, in train_on_batch 
    outputs = self.train_function(ins) 
    File "/usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py", line 1943, in __call__ 
    feed_dict=feed_dict) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 767, in run 
    run_metadata_ptr) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 965, in _run 
    feed_dict_string, options, run_metadata) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1015, in _do_run 
    target_list, options, run_metadata) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py", line 1035, in _do_call 
    raise type(e)(node_def, op, message) 
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'dense_input_1' with dtype float 
    [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]] 
    [[Node: mul_5/_77 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_1018_mul_5", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]] 

Caused by op 'dense_input_1', defined at: 
    File "./gen.py", line 20, in <module> 
    model.add(kl.Dense(input_dim=100, output_dim=1024)) 
    File "/usr/local/lib/python3.5/dist-packages/keras/models.py", line 299, in add 
    layer.create_input_layer(batch_input_shape, input_dtype) 
    File "/usr/local/lib/python3.5/dist-packages/keras/engine/topology.py", line 397, in create_input_layer 
    dtype=input_dtype, name=name) 
    File "/usr/local/lib/python3.5/dist-packages/keras/engine/topology.py", line 1198, in Input 
    input_tensor=tensor) 
    File "/usr/local/lib/python3.5/dist-packages/keras/engine/topology.py", line 1116, in __init__ 
    name=self.name) 
    File "/usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py", line 321, in placeholder 
    x = tf.placeholder(dtype, shape=shape, name=name) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/array_ops.py", line 1520, in placeholder 
    name=name) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 2149, in _placeholder 
    name=name) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op 
    op_def=op_def) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 2395, in create_op 
    original_op=self._default_original_op, op_def=op_def) 
    File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1264, in __init__ 
    self._traceback = _extract_stack() 

InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'dense_input_1' with dtype float 
    [[Node: dense_input_1 = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/gpu:0"]()]] 
    [[Node: mul_5/_77 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_1018_mul_5", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]] 
+0

任何反馈的答案吗?这有帮助吗? –

+0

@NassimBen对不起,回复晚了。我试图弄清楚问题到底是什么,我终于明白了。 – Adam

+0

很酷,你能和我们分享吗?总是感兴趣:) –

回答

1

首先,我会建议你切换到功能API模型。这些混合模型更容易被功能模型处理。

我不知道为什么你的解决方案没有奏效被honnest,似乎当你的d模型链接到一个新的输入一样,它得到的“破坏”善良,被链接到它。 我发现这个问题的方法是定义图层并将它们用于Discriminator和GAN模型。这里是代码:

import numpy as np 
from keras.layers import * 
import keras.models as km 
import keras.optimizers as ko 
from keras.datasets import mnist 

batch_size = 16 
lr = 0.0001 

def noise_gen(batch_size, z_dim): 
    noise = np.zeros((batch_size, z_dim), dtype=np.float32) 
    for i in range(batch_size): 
     noise[i, :] = np.random.uniform(-1, 1, z_dim) 
    return noise 

# Changes the traiable argument for all the layers of model 
# to the boolean argument "trainable" 
def make_trainable(model, trainable): 
    model.trainable = trainable 
    for l in model.layers: 
     l.trainable = trainable 

# --------------------Generator Model-------------------- 

g_input = Input(shape=(100,)) 

g_hidden = Dense(1024, activation='relu')(g_input) 
g_hidden = Dense(7*7*128, activation='relu')(g_hidden) 
g_hidden = BatchNormalization()(g_hidden) 
g_hidden = Reshape((7,7,128))(g_hidden) 

g_hidden = Deconvolution2D(64,5,5, (None, 14, 14, 64), subsample=(2,2), 
     border_mode='same', activation='relu')(g_hidden) 
g_hidden = BatchNormalization()(g_hidden) 
g_output = Deconvolution2D(1,5,5, (None, 28, 28, 1), subsample=(2,2), 
     border_mode='same')(g_hidden) 

G = km.Model(input=g_input,output=g_output) 
G.compile(loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True)) 
G.summary() 

# --------------------Discriminator Model-------------------- 

d_input = Input(shape=(28,28,1)) 

d_l1 = Convolution2D(64,5,5, subsample=(2,2)) 
d_hidden_1 = d_l1(d_input) 
d_l2 = LeakyReLU(alpha=0.2) 
d_hidden_2 = d_l2(d_hidden_1) 

d_l3 = Convolution2D(128,5,5, subsample=(2,2)) 
d_hidden_3 = d_l3(d_hidden_2) 
d_l4 = BatchNormalization() 
d_hidden_4 = d_l4(d_hidden_3) 
d_l5 = LeakyReLU(alpha=0.2) 
d_hidden_5 = d_l5(d_hidden_4) 

d_l6 = Flatten() 
d_hidden_6 = d_l6(d_hidden_5) 
d_l7 = Dense(1, activation='sigmoid') 
d_output = d_l7(d_hidden_6) 

D = km.Model(input=d_input,output=d_output) 
D.compile(loss='binary_crossentropy',optimizer=ko.SGD(lr=lr,momentum=0.9, nesterov=True)) 
D.summary() 

# --------------------GAN Model-------------------- 
make_trainable(D,False) 

gan_input = Input(shape=(100,)) 
gan_hidden = G(gan_input) 
gan_hidden = d_l1(gan_hidden) 
gan_hidden = d_l2(gan_hidden) 
gan_hidden = d_l3(gan_hidden) 
gan_hidden = d_l4(gan_hidden) 
gan_hidden = d_l5(gan_hidden) 
gan_hidden = d_l6(gan_hidden) 
gan_output = d_l7(gan_hidden) 

GAN = km.Model(input=gan_input,output=gan_output) 
GAN.compile(loss='binary_crossentropy',optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True)) 
GAN.summary() 

# --------------------Main Code-------------------- 
(X, _), _ = mnist.load_data() 
X = X/255. 
X = X[:, :, :, np.newaxis] 

X_batch = X[0:batch_size, :] 
Z1_batch = noise_gen(batch_size, 100) 
Z2_batch = noise_gen(batch_size, 100) 

print(type(X_batch),X_batch.shape) 
print(type(Z1_batch),Z1_batch.shape) 

fake_batch = G.predict(Z1_batch) 
real_batch = X_batch 
print('--------------------Fake Image Generated!--------------------') 

combined_X_batch = np.concatenate((real_batch, fake_batch)) 
combined_y_batch = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1)))) 
print('real_batch={}, fake_batch={}'.format(real_batch.shape, fake_batch.shape)) 
print(type(combined_X_batch),combined_X_batch.dtype,combined_X_batch.shape) 
print(type(combined_y_batch),combined_y_batch.dtype,combined_y_batch.shape) 
make_trainable(D,True) 
d_loss = D.train_on_batch(combined_X_batch, combined_y_batch) 
print('--------------------Discriminator trained!--------------------') 
print(d_loss) 

make_trainable(D,False) 
g_loss = GAN.train_on_batch(Z2_batch, np.ones((batch_size, 1))) 
print('--------------------GAN trained!--------------------') 
print(g_loss) 

这有帮助吗?

1

争取相当长一段时间后,我终于得到它,它是鉴别的BatchNormalization层导致的问题。

如果你只是注释掉鉴的model.add(kl.BatchNormalization())。它会正常工作。

然而,如图@NassimBen,功能API不引起任何问题。

import numpy as np 
import keras.layers as kl 
import keras.models as km 
import keras.optimizers as ko 
from keras.datasets import mnist 

batch_size = 16 
lr = 0.0001 

def noise_gen(batch_size, z_dim): 
    noise = np.zeros((batch_size, z_dim), dtype=np.float32) 
    for i in range(batch_size): 
     noise[i, :] = np.random.uniform(-1, 1, z_dim) 
    return noise 

# --------------------Generator Model-------------------- 

model = km.Sequential() 

model.add(kl.Dense(input_dim=100, output_dim=1024)) 
model.add(kl.Activation('relu')) 

model.add(kl.Dense(7*7*128)) 
model.add(kl.BatchNormalization()) 
model.add(kl.Activation('relu')) 
model.add(kl.Reshape((7, 7, 128), input_shape=(7*7*128,))) 

model.add(kl.Deconvolution2D(64, 5, 5, (None, 14, 14, 64), subsample=(2, 2), 
          input_shape=(7, 7, 128), border_mode='same')) 
model.add(kl.BatchNormalization()) 
model.add(kl.Activation('relu')) 

model.add(kl.Deconvolution2D(1, 5, 5, (None, 28, 28, 1), subsample=(2, 2), 
          input_shape=(14, 14, 64), border_mode='same')) 

G = model 
G.compile( loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True)) 

# --------------------Discriminator Model-------------------- 

model = km.Sequential() 

model.add(kl.Convolution2D(64, 5, 5, subsample=(2, 2), input_shape=(28, 28, 1))) 
model.add(kl.LeakyReLU(alpha=0.2)) 

model.add(kl.Convolution2D(128, 5, 5, subsample=(2, 2))) 
# model.add(kl.BatchNormalization()) 
model.add(kl.LeakyReLU(alpha=0.2)) 

model.add(kl.Flatten()) 
model.add(kl.Dense(1)) 
model.add(kl.Activation('sigmoid')) 

D = model 
D.compile( loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True)) 

# --------------------GAN Model-------------------- 

model = km.Sequential() 
model.add(G) 
D.trainable = False # Is this necessary? 
model.add(D) 
GAN = model 
GAN.compile(loss='binary_crossentropy', optimizer=ko.SGD(lr=lr, momentum=0.9, nesterov=True)) 

# --------------------Main Code-------------------- 
(X, _), _ = mnist.load_data() 
X = X/255. 
X = X[:, :, :, np.newaxis] 

X_batch = X[0:batch_size, :] 
Z1_batch = noise_gen(batch_size, 100) 
Z2_batch = noise_gen(batch_size, 100) 

fake_batch = G.predict(Z1_batch) 
real_batch = X_batch 
print('--------------------Fake Image Generated!--------------------') 

combined_X_batch = np.concatenate((real_batch, fake_batch)) 
combined_y_batch = np.concatenate((np.ones((batch_size, 1)), np.zeros((batch_size, 1)))) 
print('real_batch={}, fake_batch={}'.format(real_batch.shape, fake_batch.shape)) 

D.trainable = True 
d_loss = D.train_on_batch(combined_X_batch, combined_y_batch) 
print('--------------------Discriminator trained!--------------------') 
print(d_loss) 

D.trainable = False 
g_loss = GAN.train_on_batch(Z2_batch, np.ones((batch_size, 1))) 
print('--------------------GAN trained!--------------------') 
print(g_loss) 
+0

很酷,你追踪它!感谢分享,这是一个有趣的现象,我会调查 –

+0

Thx。如果您有任何进展,请让我知道(并可能更新答案)。我将首先使用功能性API。 – Adam

+0

一定会做:) –