PyTorch - 練習 kaggle - Dogs vs. Cats - 使用 Transfer Learning - Fine tune 預訓練模型
在PyTorch 中使用較常見的預訓練模型也非常方便,現在 AlexNet, VGG, ResNet, Inception v3…etc. 都可以直接從 TORCHVISION.MODELS 中直接套用下載預訓練好的權重,然後參考先前練習 Keras 使用預訓練模型的文章。
只需要在原本的代碼中,將建構模型的地方稍作修改,即可丟入訓練,比自己建構 CNN 還要簡潔方便。
修改 model 代碼:
建構模型 - 使用VGG16
vgg16 = models.vgg16(pretrained=True)
vgg16
VGG(
(features): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(6): ReLU(inplace)
(7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): ReLU(inplace)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace)
(12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): ReLU(inplace)
(14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): ReLU(inplace)
(16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): ReLU(inplace)
(19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(20): ReLU(inplace)
(21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(22): ReLU(inplace)
(23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): ReLU(inplace)
(26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(27): ReLU(inplace)
(28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(29): ReLU(inplace)
(30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
(classifier): Sequential(
(0): Linear(in_features=25088, out_features=4096, bias=True)
(1): ReLU(inplace)
(2): Dropout(p=0.5)
(3): Linear(in_features=4096, out_features=4096, bias=True)
(4): ReLU(inplace)
(5): Dropout(p=0.5)
(6): Linear(in_features=4096, out_features=1000, bias=True)
)
)
可以看到最後一層,(6): Linear(in_features=4096, out_features=1000, bias=True),out_features=1000 ,因為是在 ImageNet 1000 分類比賽所使用的模型,所以最後一層 output 是 1000。
我們要凍結預訓練好的卷積層,然後修改 classifier layer。
# Freeze parameters so we don't backprop through them
for param in vgg16.parameters():
param.requires_grad = False
fine tune 最後一層分類數目從 1000 修改成 2。
#fine tune the last classifier layer from 1000 to 2 dim(num. of classifier).
vgg16.classifier[6] = nn.Linear(4096,2)
vgg16 = vgg16.cuda() #use GPU
將 VGG16 model 打印出來:
from torchsummary import summary
summary(vgg16.cuda(), (3, 224, 224))
選擇優化器 & loss function
optimizer = torch.optim.Adam(vgg16.parameters(), lr=LR) # optimize all cnn parameters
criterion = nn.CrossEntropyLoss() # the target label is not one-hotted
訓練模型
接著就可以開始訓練,因為是使用預訓練過的模型,這邊只訓練 3 個 epochs。
if train_on_gpu:
model.cuda()
# number of epochs to train the model
n_epochs = 3
valid_loss_min = np.Inf # track change in validation loss
#train_losses,valid_losses=[],[]
for epoch in range(1, n_epochs+1):
# keep track of training and validation loss
train_loss = 0.0
valid_loss = 0.0
print('running epoch: {}'.format(epoch))
###################
# train the model #
###################
model.train()
for data, target in tqdm(train_loader):
# move tensors to GPU if CUDA is available
if train_on_gpu:
data, target = data.cuda(), target.cuda()
# clear the gradients of all optimized variables
optimizer.zero_grad()
# forward pass: compute predicted outputs by passing inputs to the model
output = model(data)
# calculate the batch loss
loss = criterion(output, target)
# backward pass: compute gradient of the loss with respect to model parameters
loss.backward()
# perform a single optimization step (parameter update)
optimizer.step()
# update training loss
train_loss += loss.item()*data.size(0)
######################
# validate the model #
######################
model.eval()
for data, target in tqdm(valid_loader):
# move tensors to GPU if CUDA is available
if train_on_gpu:
data, target = data.cuda(), target.cuda()
# forward pass: compute predicted outputs by passing inputs to the model
output = model(data)
# calculate the batch loss
loss = criterion(output, target)
# update average validation loss
valid_loss += loss.item()*data.size(0)
# calculate average losses
#train_losses.append(train_loss/len(train_loader.dataset))
#valid_losses.append(valid_loss.item()/len(valid_loader.dataset)
train_loss = train_loss/len(train_loader.dataset)
valid_loss = valid_loss/len(valid_loader.dataset)
# print training/validation statistics
print('\tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
train_loss, valid_loss))
# save model if validation loss has decreased
if valid_loss <= valid_loss_min:
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(
valid_loss_min,
valid_loss))
torch.save(model.state_dict(), 'VGG16.pth')
valid_loss_min = valid_loss
評估模型
def test(loaders, model, criterion, use_cuda):
# monitor test loss and accuracy
test_loss = 0.
correct = 0.
total = 0.
model.eval()
for batch_idx, (data, target) in enumerate(loaders):
# move to GPU
if use_cuda:
data, target = data.cuda(), target.cuda()
# forward pass: compute predicted outputs by passing inputs to the model
output = model(data)
# calculate the loss
loss = criterion(output, target)
# update average test loss
test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
# convert output probabilities to predicted class
pred = output.data.max(1, keepdim=True)[1]
# compare predictions to true label
correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
total += data.size(0)
print('Test Loss: {:.6f}'.format(test_loss))
print('Test Accuracy: %2d%% (%2d/%2d)' % (
100. * correct / total, correct, total))
use_cuda = torch.cuda.is_available()
model.cuda()
test(test_loader, model, criterion, use_cuda)
Test Loss: 0.036374
Test Accuracy: 98% (987/1000)