2017-10-07 417 views
2

我第一次使用keras + tensorflow。我想指定correlation coefficient作为损失函数。这是有道理的,以便它是一个介于0和1之间的数字,其中0是不好的,1是好的。如何指定相关系数作为keras中的损失函数

我的基本代码,目前看起来像:

def baseline_model(): 
     model = Sequential() 
     model.add(Dense(4000, input_dim=n**2, kernel_initializer='normal', activation='relu')) 
     model.add(Dense(1, kernel_initializer='normal')) 
     # Compile model 
     model.compile(loss='mean_squared_error', optimizer='adam') 
     return model 

estimators = [] 
estimators.append(('standardize', StandardScaler())) 
estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=100, batch_size=32, verbose=2))) 
pipeline = Pipeline(estimators) 
kfold = KFold(n_splits=10, random_state=0) 
results = cross_val_score(pipeline, X, Y, cv=kfold) 
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std())) 

我怎样才能改变这种做法,它优化,而不是减少平方相关系数?


我试过如下:

def correlation_coefficient(y_true, y_pred): 
    pearson_r, _ = tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true) 
    return 1-pearson_r**2 

def baseline_model(): 
# create model 
     model = Sequential() 
     model.add(Dense(4000, input_dim=n**2, kernel_initializer='normal', activation='relu')) 
#  model.add(Dense(2000, kernel_initializer='normal', activation='relu')) 
     model.add(Dense(1, kernel_initializer='normal')) 
     # Compile model 
     model.compile(loss=correlation_coefficient, optimizer='adam') 
     return model 

但这与崩溃:

Traceback (most recent call last): 
    File "deeplearning-det.py", line 67, in <module> 
    results = cross_val_score(pipeline, X, Y, cv=kfold) 
    File "/home/user/.local/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 321, in cross_val_score 
    pre_dispatch=pre_dispatch) 
    File "/home/user/.local/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 195, in cross_validate 
    for train, test in cv.split(X, y, groups)) 
    File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 779, in __call__ 
    while self.dispatch_one_batch(iterator): 
    File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 625, in dispatch_one_batch 
    self._dispatch(tasks) 
    File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 588, in _dispatch 
    job = self._backend.apply_async(batch, callback=cb) 
    File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 111, in apply_async 
    result = ImmediateResult(func) 
    File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 332, in __init__ 
    self.results = batch() 
    File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__ 
    return [func(*args, **kwargs) for func, args, kwargs in self.items] 
    File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in <listcomp> 
    return [func(*args, **kwargs) for func, args, kwargs in self.items] 
    File "/home/user/.local/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 437, in _fit_and_score 
    estimator.fit(X_train, y_train, **fit_params) 
    File "/home/user/.local/lib/python3.5/site-packages/sklearn/pipeline.py", line 259, in fit 
    self._final_estimator.fit(Xt, y, **fit_params) 
    File "/home/user/.local/lib/python3.5/site-packages/keras/wrappers/scikit_learn.py", line 147, in fit 
    history = self.model.fit(x, y, **fit_args) 
    File "/home/user/.local/lib/python3.5/site-packages/keras/models.py", line 867, in fit 
    initial_epoch=initial_epoch) 
    File "/home/user/.local/lib/python3.5/site-packages/keras/engine/training.py", line 1575, in fit 
    self._make_train_function() 
    File "/home/user/.local/lib/python3.5/site-packages/keras/engine/training.py", line 960, in _make_train_function 
    loss=self.total_loss) 
    File "/home/user/.local/lib/python3.5/site-packages/keras/legacy/interfaces.py", line 87, in wrapper 
    return func(*args, **kwargs) 
    File "/home/user/.local/lib/python3.5/site-packages/keras/optimizers.py", line 432, in get_updates 
    m_t = (self.beta_1 * m) + (1. - self.beta_1) * g 
    File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py", line 856, in binary_op_wrapper 
    y = ops.convert_to_tensor(y, dtype=x.dtype.base_dtype, name="y") 
    File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 611, in convert_to_tensor 
    as_ref=False) 
    File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 676, in internal_convert_to_tensor 
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref) 
    File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 121, in _constant_tensor_conversion_function 
    return constant(v, dtype=dtype, name=name) 
    File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 102, in constant 
    tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape, verify_shape=verify_shape)) 
    File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/tensor_util.py", line 364, in make_tensor_proto 
    raise ValueError("None values not supported.") 
ValueError: None values not supported. 

更新1

继答案BEL现在代码运行。不幸的是,correlation_coefficientcorrelation_coefficient_loss函数给出了彼此不同的值,我不确定它们中的任何一个与您从1- scipy.stats.pearsonr()[0] ** 2获得的值是否相同。

Why are loss functions giving the wrong outputs and how can they be corrected to give the same values as 1 - scipy.stats.pearsonr()[0]**2 would give?

这里是完全独立的代码,应该只运行:2

我已经放弃了correlation_coefficient功能,我现在只使用correlation_coefficient_loss一个

import numpy as np 
import sys 
import math 
from scipy.stats import ortho_group 
from scipy.stats import pearsonr 
import matplotlib.pyplot as plt 
from keras.models import Sequential 
from keras.layers import Dense 
from keras.wrappers.scikit_learn import KerasRegressor 
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler 
from sklearn.pipeline import Pipeline 
import tensorflow as tf 
from keras import backend as K 


def permanent(M): 
    n = M.shape[0] 
    d = np.ones(n) 
    j = 0 
    s = 1 
    f = np.arange(n) 
    v = M.sum(axis=0) 
    p = np.prod(v) 
    while (j < n-1): 
     v -= 2*d[j]*M[j] 
     d[j] = -d[j] 
     s = -s 
     prod = np.prod(v) 
     p += s*prod 
     f[0] = 0 
     f[j] = f[j+1] 
     f[j+1] = j+1 
     j = f[0] 
    return p/2**(n-1) 


def correlation_coefficient_loss(y_true, y_pred): 
    x = y_true 
    y = y_pred 
    mx = K.mean(x) 
    my = K.mean(y) 
    xm, ym = x-mx, y-my 
    r_num = K.sum(xm * ym) 
    r_den = K.sum(K.sum(K.square(xm)) * K.sum(K.square(ym))) 
    r = r_num/r_den 
    return 1 - r**2 


def correlation_coefficient(y_true, y_pred): 
    pearson_r, update_op = tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true) 
    # find all variables created for this metric 
    metric_vars = [i for i in tf.local_variables() if 'correlation_coefficient' in i.name.split('/')[1]] 

    # Add metric variables to GLOBAL_VARIABLES collection. 
    # They will be initialized for new session. 
    for v in metric_vars: 
     tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v) 

    # force to update metric values 
    with tf.control_dependencies([update_op]): 
     pearson_r = tf.identity(pearson_r) 
     return 1-pearson_r**2 


def baseline_model(): 
    # create model 
    model = Sequential() 
    model.add(Dense(4000, input_dim=no_rows**2, kernel_initializer='normal', activation='relu')) 
# model.add(Dense(2000, kernel_initializer='normal', activation='relu')) 
    model.add(Dense(1, kernel_initializer='normal')) 
    # Compile model 
    model.compile(loss=correlation_coefficient_loss, optimizer='adam', metrics=[correlation_coefficient]) 
    return model 


no_rows = 8 

print("Making the input data using seed 7", file=sys.stderr) 
np.random.seed(7) 
U = ortho_group.rvs(no_rows**2) 
U = U[:, :no_rows] 
# U is a random orthogonal matrix 
X = [] 
Y = [] 
print(U) 
for i in range(40000): 
     I = np.random.choice(no_rows**2, size = no_rows) 
     A = U[I][np.lexsort(np.rot90(U[I]))] 
     X.append(A.ravel()) 
     Y.append(-math.log(permanent(A)**2, 2)) 

X = np.array(X) 
Y = np.array(Y) 

estimators = [] 
estimators.append(('standardize', StandardScaler())) 
estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=100, batch_size=32, verbose=2))) 
pipeline = Pipeline(estimators) 
X_train, X_test, y_train, y_test = train_test_split(X, Y, 
                train_size=0.75, test_size=0.25) 
pipeline.fit(X_train, y_train) 

更新如下面的JulioDanielReyes所给出的。但是,这仍然是错误的,或者keras显着过度拟合。甚至当我有:

def baseline_model(): 
     model = Sequential() 
     model.add(Dense(40, input_dim=no_rows**2, kernel_initializer='normal', activation='relu')) 
     model.add(Dense(1, kernel_initializer='normal')) 
     model.compile(loss=correlation_coefficient_loss, optimizer='adam', metrics=[correlation_coefficient_loss]) 
     return model 

我得到的损失,例如,0.6653后100时代,但0.857当我测试训练的模型。

How can it be overfitting which such a tiny number of nodes in the hidden layer?

+0

您是否尝试过1-K.square(pearson_r)? –

+0

@DanielMöller不,我没有。你能介绍一下你想要的吗? – eleanora

+0

准确地说,而不是'1 - pearson_r ** 2'。 –

回答

6

keras documentation,你应该通过平方相关系数为函数而不是字符串'mean_squared_error'的。

该功能需要接收2个张量(y_true, y_pred)。你可以看看keras source code的灵感。

还有一个功能tf.contrib.metrics.streaming_pearson_correlation在tensorflow上实现。只是要小心,参数的顺序,应该是这样的:

更新1:初始化局部变量根据这个issue

import tensorflow as tf 
def correlation_coefficient(y_true, y_pred): 
    pearson_r, update_op = tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true, name='pearson_r' 
    # find all variables created for this metric 
    metric_vars = [i for i in tf.local_variables() if 'pearson_r' in i.name.split('/')] 

    # Add metric variables to GLOBAL_VARIABLES collection. 
    # They will be initialized for new session. 
    for v in metric_vars: 
     tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v) 

    # force to update metric values 
    with tf.control_dependencies([update_op]): 
     pearson_r = tf.identity(pearson_r) 
     return 1-pearson_r**2 

... 

model.compile(loss=correlation_coefficient, optimizer='adam') 

更新2:即使你不能使用直接使用scipy函数,您可以查看implementation并使用keras backend将其移植到您的代码中。

更新3:tensorflow功能,因为它可能不是微,你的损失函数需要是这样的:(请在数学)

from keras import backend as K 
def correlation_coefficient_loss(y_true, y_pred): 
    x = y_true 
    y = y_pred 
    mx = K.mean(x) 
    my = K.mean(y) 
    xm, ym = x-mx, y-my 
    r_num = K.sum(tf.multiply(xm,ym)) 
    r_den = K.sqrt(tf.multiply(K.sum(K.square(xm)), K.sum(K.square(ym)))) 
    r = r_num/r_den 

    r = K.maximum(K.minimum(r, 1.0), -1.0) 
    return 1 - K.square(r) 

更新4:结果是上两者的功能不同,但correlation_coefficient_loss给出相同的结果作为scipy.stats.pearsonr 下面是测试它的代码:

import tensorflow as tf 
from keras import backend as K 
import numpy as np 
import scipy.stats 

inputa = np.array([[3,1,2,3,4,5], 
        [1,2,3,4,5,6], 
        [1,2,3,4,5,6]]) 
inputb = np.array([[3,1,2,3,4,5], 
        [3,1,2,3,4,5], 
        [6,5,4,3,2,1]]) 

with tf.Session() as sess: 
    a = tf.placeholder(tf.float32, shape=[None]) 
    b = tf.placeholder(tf.float32, shape=[None]) 
    f1 = correlation_coefficient(a, b) 
    f2 = correlation_coefficient_loss(a, b) 

    sess.run(tf.global_variables_initializer()) 

    for i in range(inputa.shape[0]): 

     f1_result, f2_result = sess.run([f1, f2], feed_dict={a: inputa[i], b: inputb[i]}) 
     scipy_result =1- scipy.stats.pearsonr(inputa[i], inputb[i])[0]**2 
     print("a: "+ str(inputa[i]) + " b: " + str(inputb[i])) 
     print("correlation_coefficient: " + str(f1_result)) 
     print("correlation_coefficient_loss: " + str(f2_result)) 
     print("scipy.stats.pearsonr:" + str(scipy_result)) 

结果:

a: [3 1 2 3 4 5] b: [3 1 2 3 4 5] 
correlation_coefficient: -2.38419e-07 
correlation_coefficient_loss: 0.0 
scipy.stats.pearsonr:0.0 
a: [1 2 3 4 5 6] b: [3 1 2 3 4 5] 
correlation_coefficient: 0.292036 
correlation_coefficient_loss: 0.428571 
scipy.stats.pearsonr:0.428571428571 
a: [1 2 3 4 5 6] b: [6 5 4 3 2 1] 
correlation_coefficient: 0.994918 
correlation_coefficient_loss: 0.0 
scipy.stats.pearsonr:0.0 
+0

另一种选择是使用原语和'keras.backend'实现函数。 –

+0

谢谢。相关性是一个介于-1和1之间的数字,因此将其用于最小化将是一个错误。 keras总是尽量减少? – eleanora

+0

也许返回'返回-pearson_r'可能就足够了 –