#【Task5】PyTorch实现L1,L2正则化以及Dropout(给代码截图参考)
了解知道Dropout原理用代码实现正则化(L1、L2、Dropout)链接Dropout的numpy实现PyTorch中实现dropout
Dropout原理
维基百科的描述中提到,全连接网络中的参数很多,非常容易过拟合,而使用Dropout能有效的防止这一现象。 花书将Dropout归类为一种bagging的方法,在网络随机dropout隐单元的过程中,可以将原始的全连接网络当作右边的子网络,这样增加了模型的多样性,防止单一模型对于训练数据的过拟合。
代码实现正则化
L1 正则化
regularization_loss
= 0
for param
in model
.parameters
():
regularization_loss
+= torch
.sum(abs(param
))
calssify_loss
= criterion
(pred
,target
)
loss
= classify_loss
+ lamda
* regularization_loss
optimizer
.zero_grad
()
loss
.backward
()
optimizer
.step
()
L2 正则化
optimizer
= torch
.optim
.SGD
(model
.parameters
(),lr
=0.01,weight_decay
=0.001)
Dropout实现
torch
.manual_seed
(1)
N_SAMPLES
= 20
N_HIDDEN
= 300
x
= torch
.unsqueeze
(torch
.linspace
(-1, 1, N_SAMPLES
), 1)
print('x.size()',x
.size
())
y
= x
+ 0.3*torch
.normal
(torch
.zeros
(N_SAMPLES
, 1), torch
.ones
(N_SAMPLES
, 1))
test_x
= torch
.unsqueeze
(torch
.linspace
(-1, 1, N_SAMPLES
), 1)
test_y
= test_x
+ 0.3*torch
.normal
(torch
.zeros
(N_SAMPLES
, 1), torch
.ones
(N_SAMPLES
, 1))
plt
.scatter
(x
.data
.numpy
(), y
.data
.numpy
(), c
='magenta', s
=50, alpha
=0.5, label
='train')
plt
.scatter
(test_x
.data
.numpy
(), test_y
.data
.numpy
(), c
='cyan', s
=50, alpha
=0.5, label
='test')
plt
.legend
(loc
='upper left')
plt
.ylim
((-2.5, 2.5))
plt
.show
()
x
.size
() torch
.Size
([20, 1])
net_overfitting
= torch
.nn
.Sequential
(
torch
.nn
.Linear
(1,N_HIDDEN
),
torch
.nn
.ReLU
(),
torch
.nn
.Linear
(N_HIDDEN
,N_HIDDEN
),
torch
.nn
.ReLU
(),
torch
.nn
.Linear
(N_HIDDEN
,1),
)
net_dropped
= torch
.nn
.Sequential
(
torch
.nn
.Linear
(1,N_HIDDEN
),
torch
.nn
.Dropout
(0.5),
torch
.nn
.ReLU
(),
torch
.nn
.Linear
(N_HIDDEN
,N_HIDDEN
),
torch
.nn
.Dropout
(0.5),
torch
.nn
.ReLU
(),
torch
.nn
.Linear
(N_HIDDEN
,1),
)
Numpy实现DropOUT
"""
inverted dropout(反向随机失活): 推荐实现方式.
在训练的时候drop和调整数值范围,测试时不用任何改变.
"""
p
= 0.5
def train_step(X
):
H1
= np
.maximum
(0, np
.dot
(W1
, X
) + b1
)
U1
= (np
.random
.rand
(*H1
.shape
) < p
) / p
H1
*= U1
H2
= np
.maximum
(0, np
.dot
(W2
, H1
) + b2
)
U2
= (np
.random
.rand
(*H2
.shape
) < p
) / p
H2
*= U2
out
= np
.dot
(W3
, H2
) + b3
def predict(X
):
H1
= np
.maximum
(0, np
.dot
(W1
, X
) + b1
)
H2
= np
.maximum
(0, np
.dot
(W2
, H1
) + b2
)
out
= np
.dot
(W3
, H2
) + b3