如何拆分自己的数据集在Tensorflow中进行训练和验证CNN

我正在使用CNN Tensorflow代码 - >https://www.tensorflow.org/tutorials/layers 我试图运行自己的数据，而不是MNIST数据集。因为我在这方面是新的，我有编码和错误:(如何拆分自己的数据集在Tensorflow中进行训练和验证CNN

我做这它包含在每个图像路径在我的电脑及其label.I有400个图像，灰度级的file.txt的许多斗争，。16×16

这是代码：

from __future__ import absolute_import 
 
from __future__ import division 
 
from __future__ import print_function 
 

 
import numpy as np 
 
import tensorflow as tf 
 

 
... 
 
from PIL import Image 
 
import PIL.Image 
 
#import imageflow 
 
import os 
 
import cv2 
 
#import glob 
 
import __main__ as _main_module 
 
import matplotlib.pyplot as plt 
 
from tensorflow.python.framework import ops 
 
from tensorflow.python.framework import dtypes 
 
from sklearn.model_selection import train_test_split 
 
... 
 

 
from tensorflow.contrib import learn 
 
from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib 
 

 
#tf.logging.set_verbosity(tf.logging.INFO) 
 
    
 
#%%%%%%%%%%%%%%%%%%%%%% MY DATA %%%%%%%%%%%%%%%%%%%%%%% 
 

 
def main(unused_argv): 
 
    
 
    path = 'C:/Users/.../ImageDir-Lables-01.txt' 
 
    filenames = [] 
 
    labels = [] 
 
    
 
    #Reading file and extracting paths and labels 
 
    with open(path, 'r') as File: 
 
     infoFile = File.readlines() #Reading all the lines from File 
 
     for line in infoFile: #Reading line-by-line 
 
      words = line.split() #Splitting lines in words using space character as separator 
 
      filenames.append(words[0]) 
 
      labels.append(int(words[1])) 
 
     
 
    NumFiles = len(filenames) 
 
    print (NumFiles) 
 
    
 
    #Converting filenames and labels into tensors 
 
    tfilenames = ops.convert_to_tensor(filenames, dtype=dtypes.string) 
 
    tlabels = ops.convert_to_tensor(labels, dtype=dtypes.int32) 
 
    
 
    #Creating a queue which contains the list of files to read and the value of the labels 
 
    filename_queue = tf.train.slice_input_producer([tfilenames, tlabels], 
 
                num_epochs=10, 
 
                shuffle=True, 
 
                capacity=NumFiles) 
 
    #Reading the image files and decoding them 
 
    rawIm= tf.read_file(filename_queue[0]) 
 
    decodedIm = tf.image.decode_image(rawIm) # png or jpg decoder 
 
    
 
    #Extracting the labels queue 
 
    label_queue = filename_queue[1] 
 
    
 
    #Initializing Global and Local Variables so we avoid warnings and errors 
 
    init_op = tf.group(tf.local_variables_initializer() ,tf.global_variables_initializer()) 
 
    
 
    #Creating an InteractiveSession so we can run in iPython 
 
    sess = tf.InteractiveSession() 
 
    with sess.as_default(): 
 
     sess.run(init_op) 
 
     # Start populating the filename queue. 
 
     coord = tf.train.Coordinator() 
 
     threads = tf.train.start_queue_runners(coord=coord) 
 

 
     for i in range(NumFiles): #length of your filenames list 
 
      nm, image, lb = sess.run([filename_queue[0], decodedIm, label_queue]) 
 
     
 
      print (image.shape) 
 
      print (nm) 
 
      print (lb) 
 
     
 
      #Showing the current image 
 
     jpgfile = Image.open(nm) 
 
     jpgfile.show() 
 

 
     coord.request_stop() 
 
     coord.join(threads) 
 
     
 
    train_data, train_labels, eval_data, eval_labels = 
 
      tf.train_split([filename_queue[0], filename_queue[1]], frac=.1) 
 
# train_data, eval_data, train_labels, eval_labels = 
 
      train_test_split([filename_queue[0], filename_queue[1]], frac=0.2) 
 
# train_data, train_labels, eval_data, eval_labels = 
 
      tf.split(tf.random_shuffle(filename_queue[0], filename_queue[1], 
 
            frac=0.25)) 
 

 
    return train_data, train_labels, eval_data, eval_labels 
 
    print (train_data.shape) 
 

 
########################################### 
 

 
    # Create the Estimator 
 
    Xray_classifier = learn.Estimator(model_fn=cnn_model_fn, model_dir="/tmp/Xray_convnet_model") 
 
    
 
########################################### 
 
    # Set up logging for predictions 
 
    # Log the values in the "Softmax" tensor with label "probabilities" 
 
    tensors_to_log = {"probabilities": "softmax_tensor"} 
 
    logging_hook = tf.train.LoggingTensorHook(
 
     tensors=tensors_to_log, every_n_iter=50) 
 

 
    # Train the model 
 
    Xray_classifier.fit(
 
     x=train_data, 
 
     y=train_labels, 
 
     batch_size=10, 
 
     steps=20000, 
 
     monitors=[logging_hook]) 
 

 
    # Configure the accuracy metric for evaluation 
 
    metrics = { 
 
     "accuracy": 
 
      learn.MetricSpec(
 
       metric_fn=tf.metrics.accuracy, prediction_key="classes"), 
 
    } 
 

 
    # Evaluate the model and print results 
 
    eval_results = Xray_classifier.evaluate(
 
     x=eval_data, y=eval_labels, metrics=metrics) 
 
    print(eval_results) 
 

 
# Our application logic will be added here 
 
if __name__ == "__main__": 
 
    tf.app.run()

我用3个不同的代码来划分我的数据集我用 - > train_data，train_labels，eval_data，eval_labels = TF。 train_split（image，lb，fra C = 0.1）它给这个错误 - >AttributeError的：模块 'tensorflow' 没有属性 'train_split'

当我用 - > train_data，eval_data，train_labels，eval_labels = train_test_split（[filename_queue [ 0]，filename_queue [1]]，压裂= 0.2）它给出了错误 - >类型错误：传递参数无效：{ '压裂'：0.2}

当我使用 - > train_data，train_labels， eval_data，eval_labels = tf.split（tf.random_shuffle（filename_queue [0]，filename_queue [1]，frac = 0.25））它给出了这个错误 - >TypeError ：random_shuffle（）得到了一个意想不到的关键字参数'frac'

有人有一个想法我应该写什么分裂？任何帮助，将不胜感激。谢谢

来源

2017-06-03 Shittel

我可以通过其他方式解决我的问题。在为Tensorflow做准备之前，我分割了我的数据集，然后为每个列车集和测试集完成了所有步骤。 – Shittel

您可以使用http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html Scikit Learn的train_test_split函数。

来源

2017-06-18 14:18:40

是的这是解决方案，但非常感谢你。 – Shittel

如何拆分自己的数据集在Tensorflow中进行训练和验证CNN

回答

相关问题