2015-12-11 28 views
3

想要使用Gridsearch查找最佳参数并将f1用作评分指标。以f1作为评分函数的网格搜索,多页错误消息

如果我删除评分功能,一切运作良好,我没有得到任何错误。

这里是我的代码:

from sklearn import grid_search 
parameters = {'n_neighbors':(1,3,5,10,15),'weights':('uniform','distance'),'algorithm':('ball_tree','kd_tree','brute'),'leaf_size':(5,10,20,30,50)} 
reg = grid_search.GridSearchCV(estimator=neigh,param_grid=parameters,scoring="f1") 
train_classifier(reg, X_train, y_train) 
train_f1_score = predict_labels(reg, X_train, y_train) 
print reg.best_params_ 
print "F1 score for training set: {}".format(train_f1_score) 
print "F1 score for test set: {}".format(predict_labels(reg, X_test, y_test)) 

当我执行我得到的页面在页面的错误,我不能让它的正面或反面:(

ValueError        Traceback (most recent call last) 
<ipython-input-17-3083ff8a20ea> in <module>() 
     3 parameters = {'n_neighbors':(1,3,5,10,15),'weights':('uniform','distance'),'algorithm':('ball_tree','kd_tree','brute'),'leaf_size':(5,10,20,30,50)} 
     4 reg = grid_search.GridSearchCV(estimator=neigh,param_grid=parameters,scoring="f1") 
----> 5 train_classifier(reg, X_train, y_train) 
     6 train_f1_score = predict_labels(reg, X_train, y_train) 
     7 print reg.best_params_ 

<ipython-input-9-b56ce25fd90b> in train_classifier(clf, X_train, y_train) 
     5  print "Training {}...".format(clf.__class__.__name__) 
     6  start = time.time() 
----> 7  clf.fit(X_train, y_train) 
     8  end = time.time() 
     9  print "Done!\nTraining time (secs): {:.3f}".format(end - start) 

//anaconda/lib/python2.7/site-packages/sklearn/grid_search.pyc in fit(self, X, y) 
    802 
    803   """ 
--> 804   return self._fit(X, y, ParameterGrid(self.param_grid)) 
    805 
    806 

//anaconda/lib/python2.7/site-packages/sklearn/grid_search.pyc in _fit(self, X, y, parameter_iterable) 
    551          self.fit_params, return_parameters=True, 
    552          error_score=self.error_score) 
--> 553     for parameters in parameter_iterable 
    554     for train, test in cv) 
    555 

//anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in __call__(self, iterable) 
    802    self._iterating = True 
    803 
--> 804    while self.dispatch_one_batch(iterator): 
    805     pass 
    806 

//anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in dispatch_one_batch(self, iterator) 
    660     return False 
    661    else: 
--> 662     self._dispatch(tasks) 
    663     return True 
    664 

//anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in _dispatch(self, batch) 
    568 
    569   if self._pool is None: 
--> 570    job = ImmediateComputeBatch(batch) 
    571    self._jobs.append(job) 
    572    self.n_dispatched_batches += 1 

//anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in __init__(self, batch) 
    181   # Don't delay the application, to avoid keeping the input 
    182   # arguments in memory 
--> 183   self.results = batch() 
    184 
    185  def get(self): 

//anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in __call__(self) 
    70 
    71  def __call__(self): 
---> 72   return [func(*args, **kwargs) for func, args, kwargs in self.items] 
    73 
    74  def __len__(self): 

//anaconda/lib/python2.7/site-packages/sklearn/cross_validation.pyc in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, error_score) 
    1548 
    1549  else: 
-> 1550   test_score = _score(estimator, X_test, y_test, scorer) 
    1551   if return_train_score: 
    1552    train_score = _score(estimator, X_train, y_train, scorer) 

//anaconda/lib/python2.7/site-packages/sklearn/cross_validation.pyc in _score(estimator, X_test, y_test, scorer) 
    1604   score = scorer(estimator, X_test) 
    1605  else: 
-> 1606   score = scorer(estimator, X_test, y_test) 
    1607  if not isinstance(score, numbers.Number): 
    1608   raise ValueError("scoring must return a number, got %s (%s) instead." 

//anaconda/lib/python2.7/site-packages/sklearn/metrics/scorer.pyc in __call__(self, estimator, X, y_true, sample_weight) 
    88   else: 
    89    return self._sign * self._score_func(y_true, y_pred, 
---> 90             **self._kwargs) 
    91 
    92 

//anaconda/lib/python2.7/site-packages/sklearn/metrics/classification.pyc in f1_score(y_true, y_pred, labels, pos_label, average, sample_weight) 
    637  return fbeta_score(y_true, y_pred, 1, labels=labels, 
    638      pos_label=pos_label, average=average, 
--> 639      sample_weight=sample_weight) 
    640 
    641 

//anaconda/lib/python2.7/site-packages/sklearn/metrics/classification.pyc in fbeta_score(y_true, y_pred, beta, labels, pos_label, average, sample_weight) 
    754             average=average, 
    755             warn_for=('f-score',), 
--> 756             sample_weight=sample_weight) 
    757  return f 
    758 

//anaconda/lib/python2.7/site-packages/sklearn/metrics/classification.pyc in precision_recall_fscore_support(y_true, y_pred, beta, labels, pos_label, average, warn_for, sample_weight) 
    982     else: 
    983      raise ValueError("pos_label=%r is not a valid label: %r" % 
--> 984          (pos_label, present_labels)) 
    985    labels = [pos_label] 
    986  if labels is None: 

ValueError: pos_label=1 is not a valid label: array(['no', 'yes'], 
     dtype='|S3') 

回答

9

看来你有标签数组值为'no'和'yes',您应该将它们转换为二进制1-0数字表示形式,因为您的错误指出评分函数无法理解0和1在您的标签数组中的哪个位置

其他可能的方式来解决它,而无需修改您的标签阵列:

from sklearn.metrics import f1_score 
from sklearn.metrics import make_scorer 

f1_scorer = make_scorer(f1_score, pos_label="yes") 
reg = grid_search.GridSearchCV(estimator=neigh,param_grid=parameters,scoring=f1_scorer) 
+0

我理解你的想法和我的错误。不幸的是,如果我使用你提出的解决方案,我仍然会得到这些错误:------------ ValueError Traceback(最近调用最后一个) in () 5 parameters '{'n_neighbors':(1,3,5,10,15),'权重':('uniform','distance'),'算法':('ball_tree','kd_tree','野蛮'), 'leaf_size':(5,10,20,30,50)} ........ ValueError:pos_label ='1'不是有效的标签:array(['no','yes'], dtype ='| S3') – hmmmbob

+0

@hmmmbob,哎呀,我忘了你的标签是“是”和“否”,我已经更新了代码,现在就试试。 –

相关问题