来源:https://pytorch.org/tutorials/beginner/nn_tutorial.html#refactor-using-optim
使用的数据集是MNIST,有多种方式可以获取,包括教程中的requests方式。这里不在详细说明,但是说明将数据转化为图片的方式。(MNIST的数据是手写数字)
虽然pytorch提供了很多写好的loss function,activation function给我们,但我们可以自己实现自己的版本。pytorch会自动将代码优化为更快的CPU或者向量化CPU的计算方式。
def log_softmax(x): return x - x.exp().sum(-1).log().unsqueeze(-1) def model(xb): return log_softmax(xb @ weights + bias)其中@代表点乘
使用weights.grad.zero_() bias.grad.zero_()是为了准备好进行下一次梯度计算。避免梯度累加。因为每次计算不是覆盖,而是累加上去。
set_trace() 可以追踪每一步的变量的值。
from IPython.core.debugger import set_trace lr = 0.5 # learning rate epochs = 2 # how many epochs to train for for epoch in range(epochs): for i in range((n - 1) // bs + 1): # set_trace() start_i = i * bs end_i = start_i + bs xb = x_train[start_i:end_i] yb = y_train[start_i:end_i] pred = model(xb) loss = loss_func(pred, yb) loss.backward() with torch.no_grad(): weights -= weights.grad * lr bias -= bias.grad * lr weights.grad.zero_() bias.grad.zero_() # 再次测试 print(loss_func(model(xb), yb), accuracy(model(xb), yb)) Out: tensor(0.0829, grad_fn=<NegBackward>) tensor(1.)注意这里的Module是大写的M,小写的module是另一完全不同的库。
from torch import nn class Mnist_Logistic(nn.Module): def __init__(self): super().__init__() self.weights = nn.Parameter(torch.randn(784, 10) / math.sqrt(784)) self.bias = nn.Parameter(torch.zeros(10)) def forward(self, xb): return xb @ self.weights + self.bias model = Mnist_Logistic() print(loss_func(model(xb), yb)) Out: tensor(2.3437, grad_fn=<NllLossBackward>)原来的版本:
with torch.no_grad(): weights -= weights.grad * lr bias -= bias.grad * lr weights.grad.zero_() bias.grad.zero_()改进之后的版本:
with torch.no_grad(): for p in model.parameters(): p -= p.grad * lr model.zero_grad()写进fit():
def fit(): for epoch in range(epochs): for i in range((n - 1) // bs + 1): start_i = i * bs end_i = start_i + bs xb = x_train[start_i:end_i] yb = y_train[start_i:end_i] pred = model(xb) loss = loss_func(pred, yb) loss.backward() with torch.no_grad(): for p in model.parameters(): p -= p.grad * lr model.zero_grad() fit() print(loss_func(model(xb), yb)) Out: tensor(0.0812, grad_fn=<NllLossBackward>)使用Linear帮助我们初始化和进行前向操作。
原来的版本:
class Mnist_Logistic(nn.Module): def __init__(self): super().__init__() self.weights = nn.Parameter(torch.randn(784, 10) / math.sqrt(784)) self.bias = nn.Parameter(torch.zeros(10)) def forward(self, xb): return xb @ self.weights + self.bias改进之后的版本:
class Mnist_Logistic(nn.Module): def __init__(self): super().__init__() self.lin = nn.Linear(784, 10) def forward(self, xb): return self.lin(xb) model = Mnist_Logistic() print(loss_func(model(xb), yb)) Out: tensor(2.3360, grad_fn=<NllLossBackward>) fit() print(loss_func(model(xb), yb)) Out: tensor(0.0798, grad_fn=<NllLossBackward>)原来的版本:
with torch.no_grad(): for p in model.parameters(): p -= p.grad * lr model.zero_grad()改进之后的版本:
from torch import optim # opt.step() # opt.zero_grad() def get_model(): model = Mnist_Logistic() return model, optim.SGD(model.parameters(), lr=lr) model, opt = get_model() print(loss_func(model(xb), yb)) for epoch in range(epochs): for i in range((n - 1) // bs + 1): start_i = i * bs end_i = start_i + bs xb = x_train[start_i:end_i] yb = y_train[start_i:end_i] pred = model(xb) loss = loss_func(pred, yb) loss.backward() opt.step() opt.zero_grad() print(loss_func(model(xb), yb)) Out: tensor(2.3593, grad_fn=<NllLossBackward>) tensor(0.0821, grad_fn=<NllLossBackward>)如果没有传入opt,那么将不会进行反向传播。
def loss_batch(model, loss_func, xb, yb, opt=None): loss = loss_func(model(xb), yb) if opt is not None: loss.backward() opt.step() opt.zero_grad() return loss.item(), len(xb) import numpy as np def fit(epochs, model, loss_func, opt, train_dl, valid_dl): for epoch in range(epochs): model.train() for xb, yb in train_dl: loss_batch(model, loss_func, xb, yb, opt) model.eval() with torch.no_grad(): losses, nums = zip( *[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl] ) val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums) print(epoch, val_loss)其实这是一种简化的定义神经网络的方式。
搭配Lambda:lambda可以在我们定义的模型的时候创建层。其定义如下:
Lambda will create a layer that we can then use when defining a network with Sequential. class Lambda(nn.Module): def __init__(self, func): super().__init__() self.func = func def forward(self, x): return self.func(x) def preprocess(x): return x.view(-1, 1, 28, 28) model = nn.Sequential( Lambda(preprocess), nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1), nn.ReLU(), nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1), nn.ReLU(), nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1), nn.ReLU(), nn.AvgPool2d(4), Lambda(lambda x: x.view(x.size(0), -1)), ) opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9) fit(epochs, model, loss_func, opt, train_dl, valid_dl) Out: 0 0.38947487244606016 1 0.279226434135437如果可以使用CUDA-capable GPU,将会对代码进行加速。
判断是否可以用: print(torch.cuda.is_available()) Out: True 接着创建设备: dev = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") 将数据移动到“设备”上 def preprocess(x, y): return x.view(-1, 1, 28, 28).to(dev), y.to(dev) train_dl, valid_dl = get_data(train_ds, valid_ds, bs) train_dl = WrappedDataLoader(train_dl, preprocess) valid_dl = WrappedDataLoader(valid_dl, preprocess) 将模型移动到GPU上 model.to(dev) opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9) fit(epochs, model, loss_func, opt, train_dl, valid_dl)