pytorch 学习笔记6 —— 一步步解析nn模块

    xiaoxiao2022-07-14  166

    一步步解析nn模块

    来源:https://pytorch.org/tutorials/beginner/nn_tutorial.html#refactor-using-optim

    数据集

    使用的数据集是MNIST,有多种方式可以获取,包括教程中的requests方式。这里不在详细说明,但是说明将数据转化为图片的方式。(MNIST的数据是手写数字)

    使用matplotlib显示图片
    pyplot.imshow(x_train[0].reshape((28, 28)), cmap="gray") print(x_train.shape)
    将数据转化为torch.tensor
    x_train, y_train, x_valid, y_valid = map( torch.tensor, (x_train, y_train, x_valid, y_valid) )
    使用自动计算梯度
    import math weights = torch.randn(784, 10) / math.sqrt(784) weights.requires_grad_() bias = torch.zeros(10, requires_grad=True)
    自定义自己的损失函数

    虽然pytorch提供了很多写好的loss function,activation function给我们,但我们可以自己实现自己的版本。pytorch会自动将代码优化为更快的CPU或者向量化CPU的计算方式。

    def log_softmax(x): return x - x.exp().sum(-1).log().unsqueeze(-1) def model(xb): return log_softmax(xb @ weights + bias)

    其中@代表点乘

    批处理
    bs = 64 # batch size xb = x_train[0:bs] # a mini-batch from x preds = model(xb) # predictions preds[0], preds.shape print(preds[0], preds.shape) Out: tensor([-2.9153, -2.6091, -2.9060, -2.2049, -2.4748, -2.4453, -2.1476, -2.1597, -2.7930, -1.4146], grad_fn=<SelectBackward>) torch.Size([64, 10])
    定义negative log-likelihood loss_func
    def nll(input, target): return -input[range(target.shape[0]), target].mean() loss_func = nll
    测试
    yb = y_train[0:bs] print(loss_func(preds, yb)) Out: tensor(2.3504, grad_fn=<NegBackward>)
    定义计算准确率函数
    def accuracy(out, yb): preds = torch.argmax(out, dim=1) return (preds == yb).float().mean() print(accuracy(preds, yb)) Out: tensor(0.1562)
    使用torch.no_grad()更新权重和偏置

    使用weights.grad.zero_() bias.grad.zero_()是为了准备好进行下一次梯度计算。避免梯度累加。因为每次计算不是覆盖,而是累加上去。

    set_trace() 可以追踪每一步的变量的值。

    from IPython.core.debugger import set_trace lr = 0.5 # learning rate epochs = 2 # how many epochs to train for for epoch in range(epochs): for i in range((n - 1) // bs + 1): # set_trace() start_i = i * bs end_i = start_i + bs xb = x_train[start_i:end_i] yb = y_train[start_i:end_i] pred = model(xb) loss = loss_func(pred, yb) loss.backward() with torch.no_grad(): weights -= weights.grad * lr bias -= bias.grad * lr weights.grad.zero_() bias.grad.zero_() # 再次测试 print(loss_func(model(xb), yb), accuracy(model(xb), yb)) Out: tensor(0.0829, grad_fn=<NegBackward>) tensor(1.)
    使用torch.nn.funcional进行改进
    import torch.nn.functional as F loss_func = F.cross_entropy def model(xb): return xb @ weights + bias print(loss_func(model(xb), yb), accuracy(model(xb), yb)) Out: tensor(0.0829, grad_fn=<NllLossBackward>) tensor(1.)
    使用nn.Module 进行改进

    注意这里的Module是大写的M,小写的module是另一完全不同的库。

    from torch import nn class Mnist_Logistic(nn.Module): def __init__(self): super().__init__() self.weights = nn.Parameter(torch.randn(784, 10) / math.sqrt(784)) self.bias = nn.Parameter(torch.zeros(10)) def forward(self, xb): return xb @ self.weights + self.bias model = Mnist_Logistic() print(loss_func(model(xb), yb)) Out: tensor(2.3437, grad_fn=<NllLossBackward>)
    使用model.parameters() and model.zero_grad()进行改进

    原来的版本:

    with torch.no_grad(): weights -= weights.grad * lr bias -= bias.grad * lr weights.grad.zero_() bias.grad.zero_()

    改进之后的版本:

    with torch.no_grad(): for p in model.parameters(): p -= p.grad * lr model.zero_grad()

    写进fit():

    def fit(): for epoch in range(epochs): for i in range((n - 1) // bs + 1): start_i = i * bs end_i = start_i + bs xb = x_train[start_i:end_i] yb = y_train[start_i:end_i] pred = model(xb) loss = loss_func(pred, yb) loss.backward() with torch.no_grad(): for p in model.parameters(): p -= p.grad * lr model.zero_grad() fit() print(loss_func(model(xb), yb)) Out: tensor(0.0812, grad_fn=<NllLossBackward>)
    使用nn.Linear 进行改进

    使用Linear帮助我们初始化和进行前向操作。

    原来的版本:

    class Mnist_Logistic(nn.Module): def __init__(self): super().__init__() self.weights = nn.Parameter(torch.randn(784, 10) / math.sqrt(784)) self.bias = nn.Parameter(torch.zeros(10)) def forward(self, xb): return xb @ self.weights + self.bias

    改进之后的版本:

    class Mnist_Logistic(nn.Module): def __init__(self): super().__init__() self.lin = nn.Linear(784, 10) def forward(self, xb): return self.lin(xb) model = Mnist_Logistic() print(loss_func(model(xb), yb)) Out: tensor(2.3360, grad_fn=<NllLossBackward>) fit() print(loss_func(model(xb), yb)) Out: tensor(0.0798, grad_fn=<NllLossBackward>)
    使用optim 进行改进

    原来的版本:

    with torch.no_grad(): for p in model.parameters(): p -= p.grad * lr model.zero_grad()

    改进之后的版本:

    from torch import optim # opt.step() # opt.zero_grad() def get_model(): model = Mnist_Logistic() return model, optim.SGD(model.parameters(), lr=lr) model, opt = get_model() print(loss_func(model(xb), yb)) for epoch in range(epochs): for i in range((n - 1) // bs + 1): start_i = i * bs end_i = start_i + bs xb = x_train[start_i:end_i] yb = y_train[start_i:end_i] pred = model(xb) loss = loss_func(pred, yb) loss.backward() opt.step() opt.zero_grad() print(loss_func(model(xb), yb)) Out: tensor(2.3593, grad_fn=<NllLossBackward>) tensor(0.0821, grad_fn=<NllLossBackward>)
    使用fit()进行改进

    如果没有传入opt,那么将不会进行反向传播。

    def loss_batch(model, loss_func, xb, yb, opt=None): loss = loss_func(model(xb), yb) if opt is not None: loss.backward() opt.step() opt.zero_grad() return loss.item(), len(xb) import numpy as np def fit(epochs, model, loss_func, opt, train_dl, valid_dl): for epoch in range(epochs): model.train() for xb, yb in train_dl: loss_batch(model, loss_func, xb, yb, opt) model.eval() with torch.no_grad(): losses, nums = zip( *[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl] ) val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums) print(epoch, val_loss)
    自己定义一个CNN网络
    class Mnist_CNN(nn.Module): def __init__(self): super().__init__() self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1) self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1) self.conv3 = nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1) def forward(self, xb): xb = xb.view(-1, 1, 28, 28) xb = F.relu(self.conv1(xb)) xb = F.relu(self.conv2(xb)) xb = F.relu(self.conv3(xb)) xb = F.avg_pool2d(xb, 4) return xb.view(-1, xb.size(1)) lr = 0.1 model = Mnist_CNN() opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9) fit(epochs, model, loss_func, opt, train_dl, valid_dl) Out: 0 0.346197331571579 1 0.26924657912254335
    使用nn.Sequential

    其实这是一种简化的定义神经网络的方式。

    搭配Lambda:lambda可以在我们定义的模型的时候创建层。其定义如下:

    Lambda will create a layer that we can then use when defining a network with Sequential. class Lambda(nn.Module): def __init__(self, func): super().__init__() self.func = func def forward(self, x): return self.func(x) def preprocess(x): return x.view(-1, 1, 28, 28) model = nn.Sequential( Lambda(preprocess), nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1), nn.ReLU(), nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1), nn.ReLU(), nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1), nn.ReLU(), nn.AvgPool2d(4), Lambda(lambda x: x.view(x.size(0), -1)), ) opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9) fit(epochs, model, loss_func, opt, train_dl, valid_dl) Out: 0 0.38947487244606016 1 0.279226434135437
    使用GPU

    如果可以使用CUDA-capable GPU,将会对代码进行加速。

    判断是否可以用: print(torch.cuda.is_available()) Out: True 接着创建设备: dev = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") 将数据移动到“设备”上 def preprocess(x, y): return x.view(-1, 1, 28, 28).to(dev), y.to(dev) train_dl, valid_dl = get_data(train_ds, valid_ds, bs) train_dl = WrappedDataLoader(train_dl, preprocess) valid_dl = WrappedDataLoader(valid_dl, preprocess) 将模型移动到GPU上 model.to(dev) opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9) fit(epochs, model, loss_func, opt, train_dl, valid_dl)
    最新回复(0)