2016-05-31 91 views
0

我想调整示例cifar10的代码,我不知道为什么当我运行我的调整cifar10_eval.py时出现分段错误(核心转储)错误。它看起来像这个代码实际上在Mac中的作品,我不知道为什么它不适用于Linux。cifar10示例tensorflow的分段错误(核心转储)错误

感谢您的帮助。

----------------------- Below Code --------------------- ---------

# Copyright 2015 Google Inc. All Rights Reserved. 
# 
# Licensed under the Apache License, Version 2.0 (the "License"); 
# you may not use this file except in compliance with the License. 
# You may obtain a copy of the License at 
# 
#  http://www.apache.org/licenses/LICENSE-2.0 
# 
# Unless required by applicable law or agreed to in writing, software 
# distributed under the License is distributed on an "AS IS" BASIS, 
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
# See the License for the specific language governing permissions and 
# limitations under the License.c 
# ============================================================================== 

"""Evaluation for CIFAR-10 
Accuracy: 
cifar10_train.py achieves 83.0% accuracy after 100K steps (256 epochs 
of data) as judged by cifar10_eval.py. 
Speed: 
On a single Tesla K40, cifar10_train.py processes a single batch of 128 imagecs 
in 0.25-0.35 sec (i.e. 350 - 600 images /sec). The model reaches ~86% 
accuracy after 100K steps in 8 hours of training time. 
Usage: 
Please see the tutorial and website for how to download the CIFAR-10 
data set, compile the program and train the model. 
http://tensorflow.org/tutorials/deep_cnn/ 
""" 
from __future__ import absolute_import 
from __future__ import division 
from __future__ import print_function 

from datetime import datetime 
import math 
import time 

import numpy as np 
import tensorflow as tf 
import os 
import StringIO 
import cv 
import cv2 
import urllib 


from PIL import Image 

import matplotlib 

import glob 

import cifar10 

cur_dir = os.getcwd() 

FLAGS = tf.app.flags.FLAGS 

tf.app.flags.DEFINE_string('eval_dir', '/tmp/cifar10_eval', 
          """Directory where to write event logs.""") 
tf.app.flags.DEFINE_string('eval_data', 'test', 
          """Either 'test' or 'train_eval'.""") 
tf.app.flags.DEFINE_string('checkpoint_dir', '/tmp/cifar10_train', 
          """Directory where to read model checkpoints.""") 
tf.app.flags.DEFINE_integer('eval_interval_secs', 60 * 5, 
          """How often to run the eval.""") 
tf.app.flags.DEFINE_integer('num_examples', 128, 
          """Number of examples to run.""") 
tf.app.flags.DEFINE_boolean('run_once', False, 
         """Whether to run eval only once.""") 


def eval_once(saver, summary_writer, top_k_op, summary_op,images,labels, logits): 
"""Run Eval once. 
Args: 
    saver: Saver. 
    summary_writer: Summary writer. 
    top_k_op: Top K op. 
    summary_op: Summary op. 
""" 
with tf.Session() as sess: 
    ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) 
    if ckpt and ckpt.model_checkpoint_path: 
    # Restores from checkpoint 
    saver.restore(sess, ckpt.model_checkpoint_path) 
    # Assuming model_checkpoint_path looks something like: 
    # /my-favorite-path/cifar10_train/model.ckpt-0, 
    # extract global_step from it. 
    global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] 
    else: 
    print('No checkpoint file found') 
    return 

    # Start the queue runners. 
    coord = tf.train.Coordinator() 
    try: 
    threads = [] 
    for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): 
     threads.extend(qr.create_threads(sess, coord=coord, daemon=True, 
             start=True)) 

    num_iter = int(math.ceil(FLAGS.num_examples/FLAGS.batch_size)) 
    true_count = 0 # Counts the number of correct predictions. 
    total_sample_count = num_iter * FLAGS.batch_size 
    step = 0 





    while step < num_iter and not coord.should_stop(): 
     predictions = sess.run([top_k_op]) 
     true_count += np.sum(predictions) 
     step += 1 
    # Compute precision @ 1. 
     precision = true_count/total_sample_count 
     print('%s: precision @ 1 = %.3f' % (datetime.now(), precision)) 
     e = tf.nn.softmax(logits) 
     log = sess.run(e) 
     #print(log) 
     predict = np.zeros([FLAGS.batch_size]) 
     max_logi = np.zeros([FLAGS.batch_size]) 

     for i in xrange(FLAGS.batch_size): 
     predict[i] = np.argmax(log[i, :]) 
     max_logi[i] = log[i, :].max() 
     lab = sess.run(labels) 
     top = sess.run([top_k_op]) 
     predictions = sess.run([top_k_op]) 
     true_count = 0 
     true_count += np.sum(predictions) 
     # chk = sess.run(images) 
     #print(top)c 
     for i in xrange(FLAGS.batch_size): 
     # tf.cast(images, tf.uint8) 
     img = sess.run(images) 
     save_img = img[i, :] 

     save_img = ((save_img - save_img.min())/(save_img.max() - save_img.min()) * 255) 

     #  save_img2 = Image.fromarray(save_img, "RGB") 

     path = cur_dir + "/result/" 

     if not os.path.exists(path): 
      os.mkdir(path, 0755) 
     if predictions[0][i]==True: 
      path = path + "Correct/" 
     else: 
      path = path + "Incorect/" 

     if not os.path.exists(path): 
      os.mkdir(path, 0755) 
     class_fold = path + str(predict[i]) + "/" 
     # class_fold = path + str(max_logi[i]) + "/ 
     if not os.path.exists(path + str(predict[i]) + "/"): 
      os.mkdir(class_fold, 0755) 

     cv2.imwrite(os.path.join(class_fold, str(i) + ".jpeg"), save_img) 



    summary = tf.Summary() 
    summary.ParseFromString(sess.run(summary_op)) 
    summary.value.add(tag='Precision @ 1', simple_value=precision) 
    summary_writer.add_summary(summary, global_step) 
    except Exception as e: # pylint: disable=broad-except 
    coord.request_stop(e) 

    coord.request_stop() 
    coord.join(threads, stop_grace_period_secs=10) 


def evaluate(): 
"""Eval CIFAR-10 for a number of steps.""" 
with tf.Graph().as_default() as g: 
    # Get images and labels for CIFAR-10. 
    eval_data = FLAGS.eval_data == 'test' 
    images, labels = cifar10.inputs(eval_data=eval_data) 

    # Build a Graph that computes the logits predictions from the 
    # inference model. 
    logits = cifar10.inference(images) 
    true_count = 0 
    # Calculate predictions. 
    top_k_op = tf.nn.in_top_k(logits, labels, 1) 




    # Restore the moving average version of the learned variables for eval. 
    variable_averages = tf.train.ExponentialMovingAverage(
     cifar10.MOVING_AVERAGE_DECAY) 
    variables_to_restore = variable_averages.variables_to_restore() 
    saver = tf.train.Saver(variables_to_restore) 

    # Build the summary operation based on the TF collection of Summaries. 
    summary_op = tf.merge_all_summaries() 

    summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, g) 

    #while True: 
    eval_once(saver, summary_writer, top_k_op, summary_op,images,labels, logits) 
    # if False: 
    # break 
    # time.sleep(FLAGS.eval_interval_secs) 


def main(argv=None): # pylint: disable=unused-argument 
cifar10.maybe_download_and_extract() 
if tf.gfile.Exists(FLAGS.eval_dir): 
    tf.gfile.DeleteRecursively(FLAGS.eval_dir) 
tf.gfile.MakeDirs(FLAGS.eval_dir) 
evaluate() 


if __name__ == '__main__': 
tf.app.run() 

回答

3

这看起来像一个recurring issue其中具有代码TensorFlow Python模块冲突在OpenCV的和/或PIL库。根本原因通常是包含在这些库中的libjpeglibpng的不兼容版本。

在TensorFlow的最新晚上版本中,此问题应该是fixed。作为一个替代的解决方法,你可以尝试移动线:

import tensorflow as tf 

...下面的导入语句cvcv2PIL

+0

似乎没有更多的错误信息了。谢谢你的帮助! – RSBS