TFRecords 2018, Jul 10 Creating a tfrecord from cats dogs data: from random import shuffle import glob import cv2 import tensorflow as tf import numpy as np import sys import tqdm train_filename = 'test1.tfrecords' # address to save the TFRecords file shuffle_data = True # shuffle the addresses before saving cat_dog_train_path = "./all/test1/test1/*jpg" # read addresses and labels from the 'train' folder addrs = glob.glob(cat_dog_train_path) labels = [0 if 'cat' in addr else 1 for addr in addrs] # 0 = Cat, 1 = Dog # to shuffle data if shuffle_data: c = list(zip(addrs, labels)) shuffle(c) addrs, labels = zip(*c) # Divide the data into 60% train, 20% validation, and 20% test train_addrs = addrs[0:int(0.6 * len(addrs))] train_labels = labels[0:int(0.6 * len(labels))] val_addrs = addrs[int(0.6 * len(addrs)):int(0.8 * len(addrs))] val_labels = labels[int(0.6 * len(addrs)):int(0.8 * len(addrs))] test_addrs = addrs[int(0.8 * len(addrs)):] test_labels = labels[int(0.8 * len(labels)):] def load_image(addr): # read an image and resize to (224, 224) # cv2 load images as BGR, convert it to RGB img = cv2.imread(addr, 0) img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC) # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = img.astype(np.uint8) return img def _int64_feature(value): return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) def _bytes_feature(value): return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) # open the TFRecords file writer = tf.python_io.TFRecordWriter(train_filename) for i in tqdm(range(len(train_addrs))): # print how many images are saved every 1000 images if not i % 1000: print('Train data: {}/{}'.format(i, len(train_addrs))) sys.stdout.flush() # Load the image img = load_image(train_addrs[i]) label = train_labels[i] # Create a feature feature = {'train/label': _int64_feature(label), 'train/image': _bytes_feature(tf.compat.as_bytes(img.tostring()))} # Create an example protocol buffer example = tf.train.Example(features=tf.train.Features(feature=feature)) # Serialize to string and write on the file writer.write(example.SerializeToString()) writer.close() sys.stdout.flush() Loading and testing tfrecord images import tensorflow as tf import numpy as np import cv2 import matplotlib.pyplot as plt data_path = 'test1.tfrecords' # address to save the hdf5 file with tf.Session() as sess: feature = {'train/image': tf.FixedLenFeature([], tf.string), 'train/label': tf.FixedLenFeature([], tf.int64)} # Create a list of filenames and pass it to a queue filename_queue = tf.train.string_input_producer([data_path], num_epochs=1) # Define a reader and read the next record reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) # Decode the record read by the reader features = tf.parse_single_example(serialized_example, features=feature) # Convert the image data from string back to the numbers image = tf.decode_raw(features['train/image'], tf.uint8) # Cast label data into int32 label = tf.cast(features['train/label'], tf.int32) # Reshape image data into the original shape image = tf.reshape(image, [224, 224]) # Any preprocessing here ... # Creates batches by randomly shuffling tensors images, labels = tf.train.shuffle_batch([image, label], batch_size=10, capacity=30, num_threads=1, min_after_dequeue=10) # Initialize all global and local variables init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) # Create a coordinator and run all QueueRunner objects coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) for batch_index in range(5): img, lbl = sess.run([images, labels]) # img = img.astype(np.uint8) for j in range(6): plt.subplot(2, 3, j + 1) plt.imshow(img[j, ...]) plt.title('cat' if lbl[j] == 0 else 'dog') plt.show() for j in range(6): print(img[j].shape) cv2.imshow('cat' if lbl[j] == 0 else 'dog', img[j]) cv2.waitKey(0) # Stop the threads coord.request_stop() # Wait for threads to stop coord.join(threads) sess.close() Manuel Cuevas Hello, I'm Manuel Cuevas a Software Engineer with background in machine learning and artificial intelligence. Subscribe here to get our latest updates Email Address Please enable JavaScript to view the comments powered by Disqus.