2017-04-21 406 views
1

我想用keras建立一个非线性回归模型来预测+ ve连续变量。 对于以下模型,我如何选择以下超参数?如何使用hyperopt进行Keras深度学习网络的超参数优化?

  1. 隐藏层数和神经元
  2. 降比率
  3. 使用BatchNormalization或不
  4. 激活函数出线性的,RELU,双曲正切,乙状结肠
  5. 最佳优化器亚当中使用,rmsprog, SGD

代码

def dnn_reg(): 
    model = Sequential() 
    #layer 1 
    model.add(Dense(40, input_dim=13, kernel_initializer='normal')) 
    model.add(Activation('tanh')) 
    model.add(Dropout(0.2)) 
    #layer 2 
    model.add(Dense(30, kernel_initializer='normal')) 
    model.add(BatchNormalization()) 
    model.add(Activation('relu')) 
    model.add(Dropout(0.4)) 
    #layer 3 
    model.add(Dense(5, kernel_initializer='normal')) 
    model.add(BatchNormalization()) 
    model.add(Activation('relu')) 
    model.add(Dropout(0.4)) 

    model.add(Dense(1, kernel_initializer='normal')) 
    model.add(Activation('relu')) 
    # Compile model 
    model.compile(loss='mean_squared_error', optimizer='adam') 
    return model 

我已经考虑过随机网格搜索,而是想要使用hyperopt,我相信会更快。我最初使用https://github.com/maxpumperla/hyperas实施调整。 Hyperas没有使用最新版本的keras。我怀疑keras正在快速发展,维护人员难以兼容。所以我认为直接使用hyperopt将是一个更好的选择。 PS:我不熟悉用于超参数调整和hyperopt的贝叶斯优化。

回答

6

我已经与Hyperas取得了很多成功。以下是我学会的工作。

1)运行它从终端(而不是从一个IPython的笔记本python脚本) 2)确保你没有在你的代码(Hyperas不喜欢评论!) 3)封装任何评论您的数据和模型按照hyperas自述文件中描述的函数执行。

下面是一个适用于我的Hyperas脚本示例(按照上述说明操作)。

from __future__ import print_function 

from hyperopt import Trials, STATUS_OK, tpe 
from keras.datasets import mnist 
from keras.layers.core import Dense, Dropout, Activation 
from keras.models import Sequential 
from keras.utils import np_utils 
import numpy as np 
from hyperas import optim 
from keras.models import model_from_json 
from keras.models import Sequential 
from keras.layers.core import Dense, Dropout, Activation, Flatten 
from keras.layers.convolutional import Convolution2D, MaxPooling2D 
from keras.optimizers import SGD , Adam 
import tensorflow as tf 
from hyperas.distributions import choice, uniform, conditional 
__author__ = 'JOnathan Hilgart' 



def data(): 
    """ 
    Data providing function: 

    This function is separated from model() so that hyperopt 
    won't reload data for each evaluation run. 
    """ 
    import numpy as np 
    x = np.load('training_x.npy') 
    y = np.load('training_y.npy') 
    x_train = x[:15000,:] 
    y_train = y[:15000,:] 
    x_test = x[15000:,:] 
    y_test = y[15000:,:] 
    return x_train, y_train, x_test, y_test 


def model(x_train, y_train, x_test, y_test): 
    """ 
    Model providing function: 

    Create Keras model with double curly brackets dropped-in as needed. 
    Return value has to be a valid python dictionary with two customary keys: 
     - loss: Specify a numeric evaluation metric to be minimized 
     - status: Just use STATUS_OK and see hyperopt documentation if not feasible 
    The last one is optional, though recommended, namely: 
     - model: specify the model just created so that we can later use it again. 
    """ 
    model_mlp = Sequential() 
    model_mlp.add(Dense({{choice([32, 64,126, 256, 512, 1024])}}, 
         activation='relu', input_shape= (2,))) 
    model_mlp.add(Dropout({{uniform(0, .5)}})) 
    model_mlp.add(Dense({{choice([32, 64, 126, 256, 512, 1024])}})) 
    model_mlp.add(Activation({{choice(['relu', 'sigmoid'])}})) 
    model_mlp.add(Dropout({{uniform(0, .5)}})) 
    model_mlp.add(Dense({{choice([32, 64, 126, 256, 512, 1024])}})) 
    model_mlp.add(Activation({{choice(['relu', 'sigmoid'])}})) 
    model_mlp.add(Dropout({{uniform(0, .5)}})) 
    model_mlp.add(Dense({{choice([32, 64, 126, 256, 512, 1024])}})) 
    model_mlp.add(Activation({{choice(['relu', 'sigmoid'])}})) 
    model_mlp.add(Dropout({{uniform(0, .5)}})) 
    model_mlp.add(Dense(9)) 
    model_mlp.add(Activation({{choice(['softmax','linear'])}})) 
    model_mlp.compile(loss={{choice(['categorical_crossentropy','mse'])}}, metrics=['accuracy'], 
        optimizer={{choice(['rmsprop', 'adam', 'sgd'])}}) 



    model_mlp.fit(x_train, y_train, 
       batch_size={{choice([16, 32, 64, 128])}}, 
       epochs=50, 
       verbose=2, 
       validation_data=(x_test, y_test)) 
    score, acc = model_mlp.evaluate(x_test, y_test, verbose=0) 
    print('Test accuracy:', acc) 
    return {'loss': -acc, 'status': STATUS_OK, 'model': model_mlp} 

    enter code here 

if __name__ == '__main__': 
    import gc; gc.collect() 

    with K.get_session(): ## TF session 
     best_run, best_model = optim.minimize(model=model, 
               data=data, 
               algo=tpe.suggest, 
               max_evals=2, 
               trials=Trials()) 
     X_train, Y_train, X_test, Y_test = data() 
     print("Evalutation of best performing model:") 
     print(best_model.evaluate(X_test, Y_test)) 
     print("Best performing model chosen hyper-parameters:") 
     print(best_run) 

它诱导不同GC序列中,如果第一蟒收集会话,程序将顺利退出,如果蟒收集痛饮存储器(tf_session)首先,程序退出失败。

您可以通过强制蟒蛇德尔会话:

del session 

,或者如果你正在使用keras,你不能获得会话实例,你可以在你的代码的末尾运行下面的代码:

import gc; gc.collect() 
+0

你共享的代码正在运行,但在一些时代后,我收到以下错误。 AssertionError:在以下位置忽略的异常:<位于0x000665667的 >> –

+0

解决方案很好,但缺少OP使用的kernel_initializer。这在hyperas中仍然是一种选择吗? – StatsSorceress

+0

认为hyperas不允许评论是非常奇怪的。我对此不太确定。你有参考吗? – StatsSorceress

0

这也可以是另一种方法:

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials 
from sklearn.metrics import roc_auc_score 
import sys 

X = [] 
y = [] 
X_val = [] 
y_val = [] 

space = {'choice': hp.choice('num_layers', 
        [ {'layers':'two', }, 
        {'layers':'three', 
        'units3': hp.uniform('units3', 64,1024), 
        'dropout3': hp.uniform('dropout3', .25,.75)} 
        ]), 

      'units1': hp.uniform('units1', 64,1024), 
      'units2': hp.uniform('units2', 64,1024), 

      'dropout1': hp.uniform('dropout1', .25,.75), 
      'dropout2': hp.uniform('dropout2', .25,.75), 

      'batch_size' : hp.uniform('batch_size', 28,128), 

      'nb_epochs' : 100, 
      'optimizer': hp.choice('optimizer',['adadelta','adam','rmsprop']), 
      'activation': 'relu' 
     } 

def f_nn(params): 
    from keras.models import Sequential 
    from keras.layers.core import Dense, Dropout, Activation 
    from keras.optimizers import Adadelta, Adam, rmsprop 

    print ('Params testing: ', params) 
    model = Sequential() 
    model.add(Dense(output_dim=params['units1'], input_dim = X.shape[1])) 
    model.add(Activation(params['activation'])) 
    model.add(Dropout(params['dropout1'])) 

    model.add(Dense(output_dim=params['units2'], init = "glorot_uniform")) 
    model.add(Activation(params['activation'])) 
    model.add(Dropout(params['dropout2'])) 

    if params['choice']['layers']== 'three': 
     model.add(Dense(output_dim=params['choice']['units3'], init = "glorot_uniform")) 
     model.add(Activation(params['activation'])) 
     model.add(Dropout(params['choice']['dropout3']))  

    model.add(Dense(1)) 
    model.add(Activation('sigmoid')) 
    model.compile(loss='binary_crossentropy', optimizer=params['optimizer']) 

    model.fit(X, y, nb_epoch=params['nb_epochs'], batch_size=params['batch_size'], verbose = 0) 

    pred_auc =model.predict_proba(X_val, batch_size = 128, verbose = 0) 
    acc = roc_auc_score(y_val, pred_auc) 
    print('AUC:', acc) 
    sys.stdout.flush() 
    return {'loss': -acc, 'status': STATUS_OK} 


trials = Trials() 
best = fmin(f_nn, space, algo=tpe.suggest, max_evals=50, trials=trials) 
print 'best: ' 
print best 

Source

相关问题