经过一系列的计算机视觉的公开课视频的学习,回归到本课的最初应用,就是实现对于十类图像的图像分类。之前的Assignment 已经通过knn和svm做过基础的分析。这里我们通过卷积神经网络来实现。首先我们下载cs231n提供的训练集和测试集。
下载地址:http://www.cs.toronto.edu/~kriz/cifar.html
对于训练集一共是十类,每一类1000张,像素值为32*32的图像。而测试集是十类,每一类5000张同等大小的图像。为了最后判断图像分类的准确性,已经在图像的命名中由0~9,分别标注十类图像。
项目结构如下(无拓展名的为文件夹):
>assignment
>>data(测试集和训练集的图像)
. >>>test
>>>train
>>c.py (训练文件)
>>model (存放训练后的模型)
train-test.py
import os from PIL import Image import numpy as np import tensorflow as tf test = True test_dir = "./data/test" model_path = "./model" # 从文件夹读取图片和标签到numpy数组 def read_data(test_dir): datas = [] labels = [] fpaths = [] for fname in os.listdir(test_dir): fpath = os.path.join(test_dir, fname) fpaths.append(fpath) image = Image.open(fpath) data = np.array(image) / 255.0 label = int(fname.split("_")[0]) datas.append(data) labels.append(label) datas = np.array(datas) labels = np.array(labels) print("shape of datas: {}\tshape of labels: {}".format(datas.shape, labels.shape)) return fpaths, datas, labels fpaths, datas, labels = read_data(test_dir) num_classes = len(set(labels)) # 存放输入和标签 datas_placeholder = tf.placeholder(tf.float32, [None, 32, 32, 3]) labels_placeholder = tf.placeholder(tf.int32, [None]) # 存放DropOut dropout_placeholdr = tf.placeholder(tf.float32) # 卷积层 conv0 = tf.layers.conv2d(datas_placeholder, 20, 5, activation=tf.nn.relu) # 池化 pool0 = tf.layers.max_pooling2d(conv0, [2, 2], [2, 2]) # 卷积层 conv1 = tf.layers.conv2d(pool0, 40, 4, activation=tf.nn.relu) # 池化 pool1 = tf.layers.max_pooling2d(conv1, [2, 2], [2, 2]) # 将3维特征转换为1维向量 flatten = tf.layers.flatten(pool1) # 全连接层 fc = tf.layers.dense(flatten, 400, activation=tf.nn.relu) # DropOut层 dropout_fc = tf.layers.dropout(fc, dropout_placeholdr) # 输出层 logits = tf.layers.dense(dropout_fc, num_classes) predicted_labels = tf.arg_max(logits, 1) # 交叉定义损失 losses = tf.nn.softmax_cross_entropy_with_logits( labels=tf.one_hot(labels_placeholder, num_classes), logits=logits ) # 平均损失 mean_loss = tf.reduce_mean(losses) # 定义优化器,指定要优化的损失函数 optimizer = tf.train.AdamOptimizer(learning_rate=1e-2).minimize(losses) # 用于保存和载入模型 saver = tf.train.Saver() with tf.Session() as sess: if test: print("测试") saver.restore(sess, model_path) print("从{}载入模型".format(model_path)) # label和名称的对照关系 label_name_dict = { 0:"飞机", 1:"汽车", 2:"鸟", 3:"猫", 4:"鹿", 5:"狗", 6:"青蛙", 7:"马", 8:"船", 9:"卡车" } # 定义输入和Label test_feed_dict = { datas_placeholder: datas, labels_placeholder: labels, dropout_placeholdr: 0 } predicted_labels_val = sess.run(predicted_labels, feed_dict=test_feed_dict) # 真实label与模型预测label for fpath, real_label, predicted_label in zip(fpaths, labels, predicted_labels_val): # 将label id转换为label名 real_label_name = label_name_dict[real_label] predicted_label_name = label_name_dict[predicted_label] print("{}\t{} => {}".format(fpath, real_label_name, predicted_label_name))