import tensorflow
as tf
import numpy
as np
from tensorflow
import keras
def to_onehot(y
,num
):
lables
= np
.zeros
([num
,len(y
)])
for i
in range(len(y
)):
lables
[int(y
[i
]),i
] = 1
return lables
.T
mnist
= keras
.datasets
.fashion_mnist
(train_images
,train_lables
),(test_images
,test_lables
) = mnist
.load_data
()
train_images
= train_images
/ 255.0
test_images
= test_images
/ 255.0
train_lables
= to_onehot
(train_lables
,10)
test_lables
= to_onehot
(test_lables
,10)
train_images_exp
= np
.expand_dims
(train_images
,axis
=3)
test_images_exp
= np
.expand_dims
(test_images
,axis
=3)
input_nodes
= 784
output_nodes
= 10
image_size
= 28
channels
= 1
labels
= 10
conv1_deep
= 32
conv1_size
= 5
conv2_deep
= 64
conv2_size
= 5
fc_size
= 512
def inference(input_tensor
,train
,regularizer
):
with tf
.variable_scope
('layer1_conv1',reuse
=tf
.AUTO_REUSE
):
conv1_weights
= tf
.get_variable
('weight',[conv1_size
,conv1_size
,channels
,conv1_deep
],initializer
=tf
.truncated_normal_initializer
(stddev
=0.1))
conv1_bias
= tf
.get_variable
('bias',[conv1_deep
],initializer
=tf
.constant_initializer
(0.0))
conv1
= tf
.nn
.conv2d
(input_tensor
,conv1_weights
,strides
=[1,1,1,1],padding
="SAME")
relu1
= tf
.nn
.relu
((tf
.nn
.bias_add
(conv1
,conv1_bias
)))
with tf
.variable_scope
('layer2_pool1',reuse
=tf
.AUTO_REUSE
):
pool1
= tf
.nn
.max_pool
(relu1
,ksize
=[1,2,2,1],strides
=[1,2,2,1],padding
="SAME")
with tf
.variable_scope
('layer3_conv2',reuse
=tf
.AUTO_REUSE
):
conv2_weights
= tf
.get_variable
("weights",[conv2_size
,conv2_size
,conv1_deep
,conv2_deep
],initializer
=tf
.truncated_normal_initializer
(stddev
= 0.1))
conv2_bias
= tf
.get_variable
("bias",[conv2_deep
],initializer
=tf
.constant_initializer
(0.0))
conv2
= tf
.nn
.conv2d
(pool1
,conv2_weights
,strides
=[1,1,1,1],padding
="SAME")
relu2
= tf
.nn
.relu
(tf
.nn
.bias_add
(conv2
,conv2_bias
))
with tf
.variable_scope
('layer4_pool2',reuse
=tf
.AUTO_REUSE
):
pool2
= tf
.nn
.max_pool
(relu2
,ksize
=[1,2,2,1],strides
=[1,2,2,1],padding
="SAME")
pool2_shape
= pool2
.get_shape
().as_list
()
nodes
= pool2_shape
[1] * pool2_shape
[2] * pool2_shape
[3]
reshaped
= tf
.reshape
(pool2
,[-1,nodes
])
with tf
.variable_scope
('layer5_fc1',reuse
=tf
.AUTO_REUSE
):
fc1_weights
= tf
.get_variable
("weights",[nodes
,fc_size
],initializer
=tf
.truncated_normal_initializer
(stddev
=0.1))
if regularizer
!=None:
tf
.add_to_collection
("losses",regularizer
(fc1_weights
))
fc1_bias
= tf
.get_variable
("bias",[fc_size
],initializer
=tf
.constant_initializer
(0.0))
fc1
= tf
.nn
.relu
(tf
.matmul
(reshaped
,fc1_weights
) + fc1_bias
)
if train
:
fc1
= tf
.nn
.dropout
(fc1
,0.2)
with tf
.variable_scope
('layer6_fc2',reuse
=tf
.AUTO_REUSE
):
fc2_weights
= tf
.get_variable
("weight",[fc_size
,labels
],initializer
=tf
.truncated_normal_initializer
(stddev
=0.1))
if regularizer
!=None:
tf
.add_to_collection
('losses',regularizer
(fc2_weights
))
fc2_bias
= tf
.get_variable
("bias",[labels
],initializer
=tf
.constant_initializer
(0.0))
logit
= tf
.matmul
(fc1
,fc2_weights
) + fc2_bias
return logit
epchos
= 1000
learning_rate
= 0.001
batch_size
= 128
model_save_path
= "model_conv_1/model.ckpt"
def train_model(data
,train
=True):
trX
,trY
,teX
,teY
= data
X
= tf
.placeholder
(tf
.float32
,[None,image_size
,image_size
,channels
],name
="x_input")
Y_
= tf
.placeholder
(tf
.float32
,[None,labels
],name
= 'y_input')
regularizer
= tf
.contrib
.layers
.l2_regularizer
(0.0000001)
y_hat
= inference
(X
,train
,regularizer
)
if train
:
cross_entropy
= tf
.reduce_mean
(tf
.nn
.softmax_cross_entropy_with_logits_v2
(logits
=y_hat
,labels
=Y_
))
loss
= cross_entropy
+ tf
.add_n
(tf
.get_collection
("losses"))
train_step
= tf
.train
.GradientDescentOptimizer
(learning_rate
=learning_rate
).minimize
(loss
)
acc
= tf
.reduce_mean
(tf
.cast
(tf
.equal
(tf
.argmax
(Y_
,1),tf
.argmax
(y_hat
,1)),tf
.float32
))
saver
= tf
.train
.Saver
()
with tf
.Session
() as sess
:
tf
.global_variables_initializer
().run
()
for i
in range(epchos
):
total_loss
= 0
for j
in range(int(trX
.shape
[0] / batch_size
+ 1)):
x_batch
= trX
[j
*batch_size
:min(trX
.shape
[0],(j
+1)*batch_size
),:]
y_batch
= trY
[j
*batch_size
:min(trY
.shape
[0],(j
+1)*batch_size
),:]
batch_loss
,_
=sess
.run
([cross_entropy
,train_step
],feed_dict
={X
:x_batch
,Y_
:y_batch
})
total_loss
+=batch_loss
total_loss
/= int(trX
.shape
[0] / batch_size
+ 1)
test_acc
= sess
.run
(acc
,feed_dict
={X
:teX
,Y_
:teY
})
print("test acc:",test_acc
)
if i
% 100 == 0:
saver
.save
(sess
,model_save_path
)
print("保存模型成功!")
train_model
((train_images_exp
,train_lables
,test_images_exp
,test_lables
))
在实际按照书上编写代码的时候,博主遇到了一下4个问题没有解决,先放在代码下面,进一步研究后,将解答更新一下,欢迎小伙伴们在评论区留下你们的见解:
strides的参数中1,4位置的参数是定的,在这里为啥是1,类似的还有其他参数如池化层的ksize。padding的“same” 是跟谁same,以及卷积层和池化层的padding具体是怎么操作的。为什么正则化项只加在全连接层,其他的参数可以正则化嘛?如何打乱每次batch内和batch之间的数据的顺序,来实现随机性?