反卷积是指,通过测量输出和已知输入重构未知输入的过程。在神经网络中,反卷积过程并不具备学习的能力,仅仅是用于可视化一个已经训练好的卷积网络模型,没有学习训练的过程。对于一个复杂的深度卷积网络,通过每层若干个卷积核的变换,我们无法知道每个卷积核关注的是什么,变换后的特征是什么样子。通过反卷积的还原,可以对这些问题有个清晰的可视化,以各层得到的特征图作为输入,进行反卷积,得到反卷积结果,用以验证显示各层提取到的特征图。
由于反卷积网络的特性,导致它有许多特别的应用,一般可以用于信道均衡、图像恢复、语音识别等未知输入估计和过程辨识方面的问题。
反卷积并不能复原卷积操作的输入值,仅仅是将卷积变换过程中的步骤反向变换一次而已,通过将卷积核转置,与卷积后的结果再做一遍卷积,所以它还有个名字叫“转置卷积”。虽然它不能还原出原来卷积的样子,但是在作用上具有类似的效果,可以将带有小部分缺失的 信息最大化地恢复,也可以用来恢复被卷积生成后的原始输入。
def conv2d_transpose(value, filter, output_shape, strides, padding="SAME", data_format="NHWC", name=None)
value:代表通过卷积操纵之后的张量,一般用NHWC类型。filter:代表卷积核。output_shape:代表输出的张量形状也是个四维张量。strides:代表步长。padding:代表原数据生成value时使用的补0的方式,是用来检查输入形状和输出形状是否合规的。return:按照output_shape指定的形状。 # 反卷积 import numpy as np import tensorflow as tf img = tf.Variable(tf.constant(1.0, shape=[1, 4, 4, 1])) filter = tf.Variable(tf.constant([1.0, 0, -1, -2], shape=[2, 2, 1, 1])) conv = tf.nn.conv2d(img, filter, strides=[1, 2, 2, 1], padding="VALID") cons = tf.nn.conv2d(img, filter, strides=[1, 2, 2, 1], padding="SAME") print(conv.shape) print(cons.shape) contv = tf.nn.conv2d_transpose(conv, filter, [1, 4, 4, 1], strides=[1, 2, 2, 1], padding="VALID") conts = tf.nn.conv2d_transpose(cons, filter, [1, 4, 4, 1], strides=[1, 2, 2, 1], padding="SAME") with tf.Session() as sess: sess.run(tf.global_variables_initializer()) print("conv:\n", sess.run([conv, filter])) print("cons:\n", sess.run([cons])) print("contv:\n", sess.run([contv])) print("conts:\n", sess.run([conts]))反卷积的结果与原来的全1矩阵不等,说明转置卷积只能恢复部分特征,无法百分百地恢复原始数据。
反池化是属于池化的逆操作,是无法通过池化的结果还原出全部的原始数据。因为池化的过程就是只保留主要信息,舍去部分信息。如想从池化后的这些主要信息恢复出全部信息,则存在着信息缺失,这时只能通过补位来实现最大程度的信息完整。
tf.gradient()求梯度
tf.stop_gradient()梯度停止
实例:用反卷积技术复原卷积网络各层图像,通过tensorboard观察其结果。
import cifar10_input import tensorflow as tf import numpy as np batch_size = 128 print("begin") images_train, labels_train = cifar10_input.inputs(eval_data = False, batch_size = batch_size) images_test, labels_test = cifar10_input.inputs(eval_data = True, batch_size = batch_size) print("begin data") #最大池化 def max_pool_with_argmax(net, stride): _, mask = tf.nn.max_pool_with_argmax( net,ksize=[1, stride, stride, 1], strides=[1, stride, stride, 1],padding='SAME') mask = tf.stop_gradient(mask) net = tf.nn.max_pool(net, ksize=[1, stride, stride, 1],strides=[1, stride, stride, 1], padding='SAME') return net, mask #4*4----2*2--=2*2 【6,8,12,16】 #反池化 def unpool(net, mask, stride): ksize = [1, stride, stride, 1] input_shape = net.get_shape().as_list() output_shape = (input_shape[0], input_shape[1] * ksize[1], input_shape[2] * ksize[2], input_shape[3]) one_like_mask = tf.ones_like(mask) batch_range = tf.reshape(tf.range(output_shape[0], dtype=tf.int64), shape=[input_shape[0], 1, 1, 1]) b = one_like_mask * batch_range y = mask // (output_shape[2] * output_shape[3]) x = mask % (output_shape[2] * output_shape[3]) // output_shape[3] feature_range = tf.range(output_shape[3], dtype=tf.int64) f = one_like_mask * feature_range updates_size = tf.size(net) indices = tf.transpose(tf.reshape(tf.stack([b, y, x, f]), [4, updates_size])) values = tf.reshape(net, [updates_size]) ret = tf.scatter_nd(indices, values, output_shape) return ret def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) def conv2d(x, W): return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding="SAME") def max_pool_2x2(x): return tf.nn.max_pool(x, ksize=[1,2,2,1],strides=[1,6,6,1], padding="SAME") def avg_pool_6x6(x): return tf.nn.avg_pool(x, ksize=[1, 6, 6, 1], strides=[1, 6, 6, 1], padding='SAME') x = tf.placeholder(tf.float32, [batch_size,24,24,3]) y = tf.placeholder(tf.float32, [batch_size, 10]) # 第一层 W_conv1 = weight_variable([5, 5, 3, 64]) b_conv1 = bias_variable([64]) x_image = tf.reshape(x, [-1, 24, 24, 3]) h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) # h_pool1 = max_pool_2x2(h_conv1) h_pool1, mask1 = max_pool_with_argmax(h_conv1, 2) # 第二层 W_conv2 = weight_variable([5, 5, 64, 64]) b_conv2 = bias_variable([64]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) # h_pool2 = max_pool_2x2(h_conv2) ###################################################### h_pool2, mask = max_pool_with_argmax(h_conv2, 2) print(h_pool2.shape) # (128, 6, 6, 64) t_conv2 = unpool(h_pool2, mask, 2) t_pool1 = tf.nn.conv2d_transpose(t_conv2-b_conv2, W_conv2, h_pool1.shape, [1,1,1,1]) print(t_conv2.shape,h_pool1.shape,t_pool1.shape) t_conv1 = unpool(t_pool1, mask1, 2) t_x_image = tf.nn.conv2d_transpose(t_conv1-b_conv1, W_conv1, x_image.shape, [1,1,1,1]) # 第一层卷积还原 t1_conv1 = unpool(h_pool1, mask1, 2) t1_x_image = tf.nn.conv2d_transpose(t1_conv1-b_conv1, W_conv1, x_image.shape, [1,1,1,1]) # 生成最终图像 stitched_decodings = tf.concat((x_image, t1_x_image, t_x_image), axis=2) decoding_summary_op = tf.summary.image("source/cifar", stitched_decodings) ###################################################### # 第三层 W_conv3 = weight_variable([5, 5, 64, 10]) b_conv3 = bias_variable([10]) h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3) nt_hpool3=avg_pool_6x6(h_conv3)#10 nt_hpool3_flat = tf.reshape(nt_hpool3, [-1, 10]) # 分类 y_conv=tf.nn.softmax(nt_hpool3_flat) # 损失计算 cross_entropy = -tf.reduce_sum(y*tf.log(y_conv)) +(tf.nn.l2_loss(W_conv1)+tf.nn.l2_loss(W_conv2)+tf.nn.l2_loss(W_conv3)) train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) # 训练 sess = tf.Session() sess.run(tf.global_variables_initializer()) summary_writer = tf.summary.FileWriter("./log/", sess.graph) tf.train.start_queue_runners(sess=sess) for i in range(15000):#20000 image_batch, label_batch = sess.run([images_train, labels_train]) label_b = np.eye(10,dtype=float)[label_batch] #one hot train_step.run(feed_dict={x:image_batch, y: label_b},session=sess) if i 0 == 0: train_accuracy = accuracy.eval(feed_dict={ x:image_batch, y: label_b},session=sess) print( "step %d, training accuracy %g"%(i, train_accuracy)) print("cross_entropy",cross_entropy.eval(feed_dict={x:image_batch, y: label_b},session=sess)) # 测试集 # 测试集 image_batch, label_batch = sess.run([images_test, labels_test]) label_b = np.eye(10,dtype=float)[label_batch]#one hot print ("finished! test accuracy %g"