2

我试图使用dlib来使用默认数据集(/dlib-19.0/examples/faces/training_with_face_landmarks.xml)和默认训练样本(train_shape_predictor_ex.cpp)训练形状预测器。使用默认数据集和训练的形状预测器的准确性较低

所以我想训练形状的预测,这将是完全一样的默认形状预测(shape_predictor_68_face_landmarks.dat),因为我用同样的数据集和相同的训练码。但我得到一些问题。

训练结束后,我得到我的.dat文件与16.6mb(但默认的dlib预测器shape_predictor_68_face_landmarks.dat有99.7mb)。 在测试我的.dat文件(16.6mb)后,我得到的准确度较低,但在测试默认.dat文件(shape_predictor_68_face_landmarks.dat,16.6mb)之后,我获得了高精度。

我的形状预测: My shape predictor shape_predictor_68_face_landmarks.datshape_predictor_68_face_landmarks.dat

培训:

#include <QCoreApplication> 

#include <dlib/image_processing.h> 
#include <dlib/data_io.h> 
#include <iostream> 

using namespace dlib; 
using namespace std; 

std::vector<std::vector<double> > get_interocular_distances (
     const std::vector<std::vector<full_object_detection> >& objects 
     ); 

int main(int argc, char *argv[]) 
{ 
    QCoreApplication a(argc, argv); 

    try 
    { 

     const std::string faces_directory = "/home/user/Documents/dlib-19.0/examples/faces/"; 

     dlib::array<array2d<unsigned char> > images_train; 
     std::vector<std::vector<full_object_detection> > faces_train; 

     load_image_dataset(images_train, faces_train, faces_directory+"training_with_face_landmarks.xml"); 

     shape_predictor_trainer trainer; 

     trainer.set_oversampling_amount(300); 

     trainer.set_nu(0.05); 
     trainer.set_tree_depth(2); 

     trainer.be_verbose(); 

     shape_predictor sp = trainer.train(images_train, faces_train); 
     cout << "mean training error: "<< 
       test_shape_predictor(sp, images_train, faces_train, get_interocular_distances(faces_train)) << endl; 

     serialize(faces_directory+"sp_default_settings.dat") << sp; 
    } 
    catch (exception& e) 
    { 
     cout << "\nexception thrown!" << endl; 
     cout << e.what() << endl; 
    } 

    return a.exec(); 
} 

double interocular_distance (
     const full_object_detection& det 
     ) 
{ 
    dlib::vector<double,2> l, r; 
    double cnt = 0; 
    // Find the center of the left eye by averaging the points around 
    // the eye. 
    for (unsigned long i = 36; i <= 41; ++i) 
    { 
     l += det.part(i); 
     ++cnt; 
    } 
    l /= cnt; 

    // Find the center of the right eye by averaging the points around 
    // the eye. 
    cnt = 0; 
    for (unsigned long i = 42; i <= 47; ++i) 
    { 
     r += det.part(i); 
     ++cnt; 
    } 
    r /= cnt; 

    // Now return the distance between the centers of the eyes 
    return length(l-r); 
} 

std::vector<std::vector<double> > get_interocular_distances (
     const std::vector<std::vector<full_object_detection> >& objects 
     ) 
{ 
    std::vector<std::vector<double> > temp(objects.size()); 
    for (unsigned long i = 0; i < objects.size(); ++i) 
    { 
     for (unsigned long j = 0; j < objects[i].size(); ++j) 
     { 
      temp[i].push_back(interocular_distance(objects[i][j])); 
     } 
    } 
    return temp; 
} 

测试:

#include <QCoreApplication> 
#include <dlib/image_processing/frontal_face_detector.h> 
#include <dlib/image_processing/render_face_detections.h> 
#include <dlib/image_processing.h> 
#include <dlib/gui_widgets.h> 
#include <dlib/image_io.h> 
#include <dlib/data_io.h> 
#include <iostream> 

using namespace dlib; 
using namespace std; 

int main(int argc, char *argv[]) 
{ 
    QCoreApplication a(argc, argv); 

    try 
     { 

      // We need a face detector. We will use this to get bounding boxes for 
      // each face in an image. 
      frontal_face_detector detector = get_frontal_face_detector(); 
      // And we also need a shape_predictor. This is the tool that will predict face 
      // landmark positions given an image and face bounding box. Here we are just 
      // loading the model from the shape_predictor_68_face_landmarks.dat file you gave 
      // as a command line argument. 
      shape_predictor sp; 
      deserialize("/home/user/Downloads/muct-master/samples/sp_default_settings.dat") >> sp; 

      string srcDir = "/home/user/Downloads/muct-master/samples/selection/"; 
      string dstDir = "/home/user/Downloads/muct-master/samples/my_results_default/"; 

      std::vector<string> vecOfImg; 

      vecOfImg.push_back("i001qa-mn.jpg"); 
      vecOfImg.push_back("i002ra-mn.jpg"); 
      vecOfImg.push_back("i003ra-fn.jpg"); 
      vecOfImg.push_back("i003sa-fn.jpg"); 
      vecOfImg.push_back("i004qa-mn.jpg"); 
      vecOfImg.push_back("i004ra-mn.jpg"); 
      vecOfImg.push_back("i005ra-fn.jpg"); 
      vecOfImg.push_back("i006ra-mn.jpg"); 
      vecOfImg.push_back("i007qa-fn.jpg"); 
      vecOfImg.push_back("i008ra-mn.jpg"); 
      vecOfImg.push_back("i009qa-mn.jpg"); 
      vecOfImg.push_back("i009ra-mn.jpg"); 
      vecOfImg.push_back("i009sa-mn.jpg"); 
      vecOfImg.push_back("i010qa-mn.jpg"); 
      vecOfImg.push_back("i010sa-mn.jpg"); 
      vecOfImg.push_back("i011qa-mn.jpg"); 
      vecOfImg.push_back("i011ra-mn.jpg"); 
      vecOfImg.push_back("i012ra-mn.jpg"); 
      vecOfImg.push_back("i012sa-mn.jpg"); 
      vecOfImg.push_back("i014qa-fn.jpg"); 

      for(int imgC = 0; imgC < vecOfImg.size(); imgC++){ 

       array2d<rgb_pixel> img; 
       load_image(img, srcDir + vecOfImg.at(imgC)); 
       // Make the image larger so we can detect small faces. 
       pyramid_up(img); 

       // Now tell the face detector to give us a list of bounding boxes 
       // around all the faces in the image. 
       std::vector<rectangle> dets = detector(img); 
       cout << "Number of faces detected: " << dets.size() << endl; 

       // Now we will go ask the shape_predictor to tell us the pose of 
       // each face we detected. 
       std::vector<full_object_detection> shapes; 
       for (unsigned long j = 0; j < dets.size(); ++j) 
       { 
        full_object_detection shape = sp(img, dets[j]); 
        cout << "number of parts: "<< shape.num_parts() << endl; 
        cout << "pixel position of first part: " << shape.part(0) << endl; 
        cout << "pixel position of second part: " << shape.part(1) << endl; 

        for(unsigned long i = 0; i < shape.num_parts(); i++){ 
         draw_solid_circle(img, shape.part(i), 2, rgb_pixel(100,255,100)); 
        } 

        save_jpeg(img, dstDir + vecOfImg.at(imgC)); 
        // You get the idea, you can get all the face part locations if 
        // you want them. Here we just store them in shapes so we can 
        // put them on the screen. 
        shapes.push_back(shape); 
       } 

      } 

     } 
     catch (exception& e) 
     { 
      cout << "\nexception thrown!" << endl; 
      cout << e.what() << endl; 
     } 
    return a.exec(); 
} 

是什么违约和我的训练和测试,如果我使用之间的区别默认数据集和示例莱?我如何将shape形状预测器训练成shape_predictor_68_face_landmarks.dat?

+1

即使你在sourceforge页面上提出了一个问题(但没有得到答案),那里仍然有很多信息,很确定这个问题已经被讨论过:) –

回答

0

它正在生成一个16.6MB的DAT文件,因为您要么使用几个图像来训练,要么不使用正确的设置。

根据this Github issue,您在列车过程中没有使用最佳/默认设置。

在您的设置中,培训师具有非常高的过采样量(300),默认值为20. 您还在通过增加正则化(使nu参数变小)和通过使用较小的树来减少模型的容量深处。

您的nu参数:0.05。默认值是0.1

你的树深度:2,默认值为4

通过改变通过试验和错误的PARAMS和训练,你会发现更小的文件大小的最佳精度。

请记住,每个培训过程大约需要45分钟,而且您至少需要一台16GB的RAM计算机。

1

示例数据集(/dlib-19.0/examples/faces/training_with_face_landmarks.xml)太小而无法训练高质量模型。这不是dlib自带的模型训练出来的。

这些示例使用小数据集来使示例运行得更快。所有例子的要点是解释dlib API,而不是有用的程序。他们只是文件。这取决于你使用dlib API做些有趣的事情。

相关问题