前文我们实现了googlenet。想必大家已经有感觉,实现一个神经网络不是很难嘛。但是实现一个神经网络知识深度学习的入门和开始,如何训练和使用一个网络将它使用和应用到工作和需求中,才是我们学习神经网络的初衷。所以今天我们就开始看看怎么使用前文实现的这个神经网络。
googlenet在pytorch里面已经有实现了,同时pytorch还提供了一个使用imagenet 1000分类训练过的权重。所以我们这里直接使用这个预训练模型,然后调整网络结构,将输出变为10分类,使用cifar10数据集重新训练(filetuning)一个属于我们自己的模型。pytorch的google可以从:https://pytorch.org/hub/pytorch_vision_googlenet/[1] 这里根据示例代码直接下载使用 也可以从https://github.com/pytorch/vision/blob/master/torchvision/models/googlenet.py [2]找到源代码。
我们这里使用[2] 提供的googlenet实现,[2]中同时提供了加载imagenet预训练权重的函数,我们进行执行对应的代码加载即可,代码如下:
import warnings
from collections import namedtuple
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
try:
from torch.hub import load_state_dict_from_url
except ImportError:
from torch.utils.model_zoo import load_url as load_state_dict_from_url
#from .utils import load_state_dict_from_url
from typing import Optional, Tuple, List, Callable, Any
__all__ = ['GoogLeNet', 'googlenet', "GoogLeNetOutputs", "_GoogLeNetOutputs"]
model_urls = {
# GoogLeNet ported from TensorFlow
'googlenet': 'https://download.pytorch.org/models/googlenet-1378be20.pth',
}
GoogLeNetOutputs = namedtuple('GoogLeNetOutputs', ['logits', 'aux_logits2', 'aux_logits1'])
GoogLeNetOutputs.__annotations__ = {'logits': Tensor, 'aux_logits2': Optional[Tensor],
'aux_logits1': Optional[Tensor]}
# Script annotations failed with _GoogleNetOutputs = namedtuple ...
# _GoogLeNetOutputs set here for backwards compat
_GoogLeNetOutputs = GoogLeNetOutputs
def googlenet(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> "GoogLeNet":
r"""GoogLeNet (Inception v1) model architecture from
`"Going Deeper with Convolutions" <http://arxiv.org/abs/1409.4842>`_.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
progress (bool): If True, displays a progress bar of the download to stderr
aux_logits (bool): If True, adds two auxiliary branches that can improve training.
Default: *False* when pretrained is True otherwise *True*
transform_input (bool): If True, preprocesses the input according to the method with which it
was trained on ImageNet. Default: *False*
"""
if pretrained:
if 'transform_input' not in kwargs:
kwargs['transform_input'] = True
if 'aux_logits' not in kwargs:
kwargs['aux_logits'] = False
if kwargs['aux_logits']:
warnings.warn('auxiliary heads in the pretrained googlenet model are NOT pretrained, '
'so make sure to train them')
original_aux_logits = kwargs['aux_logits']
kwargs['aux_logits'] = True
kwargs['init_weights'] = False
model = GoogLeNet(**kwargs)
state_dict = load_state_dict_from_url(model_urls['googlenet'],
progress=progress)
model.load_state_dict(state_dict)
if not original_aux_logits:
model.aux_logits = False
model.aux1 = None # type: ignore[assignment]
model.aux2 = None # type: ignore[assignment]
return model
return GoogLeNet(**kwargs)
class GoogLeNet(nn.Module):
__constants__ = ['aux_logits', 'transform_input']
def __init__(
self,
num_classes: int = 1000,
aux_logits: bool = True,
transform_input: bool = False,
init_weights: Optional[bool] = None,
blocks: Optional[List[Callable[..., nn.Module]]] = None
) -> None:
super(GoogLeNet, self).__init__()
if blocks is None:
blocks = [BasicConv2d, Inception, InceptionAux]
if init_weights is None:
warnings.warn('The default weight initialization of GoogleNet will be changed in future releases of '
'torchvision. If you wish to keep the old behavior (which leads to long initialization times'
' due to scipy/scipy#11299), please set init_weights=True.', FutureWarning)
init_weights = True
assert len(blocks) == 3
conv_block = blocks[0]
inception_block = blocks[1]
inception_aux_block = blocks[2]
self.aux_logits = aux_logits
self.transform_input = transform_input
self.conv1 = conv_block(3, 64, kernel_size=7, stride=2, padding=3)
self.maxpool1 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
self.conv2 = conv_block(64, 64, kernel_size=1)
self.conv3 = conv_block(64, 192, kernel_size=3, padding=1)
self.maxpool2 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
self.inception3a = inception_block(192, 64, 96, 128, 16, 32, 32)
self.inception3b = inception_block(256, 128, 128, 192, 32, 96, 64)
self.maxpool3 = nn.MaxPool2d(3, stride=2, ceil_mode=True)
self.inception4a = inception_block(480, 192, 96, 208, 16, 48, 64)
self.inception4b = inception_block(512, 160, 112, 224, 24, 64, 64)
self.inception4c = inception_block(512, 128, 128, 256, 24, 64, 64)
self.inception4d = inception_block(512, 112, 144, 288, 32, 64, 64)
self.inception4e = inception_block(528, 256, 160, 320, 32, 128, 128)
self.maxpool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
self.inception5a = inception_block(832, 256, 160, 320, 32, 128, 128)
self.inception5b = inception_block(832, 384, 192, 384, 48, 128, 128)
if aux_logits:
self.aux1 = inception_aux_block(512, num_classes)
self.aux2 = inception_aux_block(528, num_classes)
else:
self.aux1 = None # type: ignore[assignment]
self.aux2 = None # type: ignore[assignment]
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.dropout = nn.Dropout(0.2)
self.fc = nn.Linear(1024, num_classes)
if init_weights:
self._initialize_weights()
def _initialize_weights(self) -> None:
for m in self.modules():
if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
import scipy.stats as stats
X = stats.truncnorm(-2, 2, scale=0.01)
values = torch.as_tensor(X.rvs(m.weight.numel()), dtype=m.weight.dtype)
values = values.view(m.weight.size())
with torch.no_grad():
m.weight.copy_(values)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def _transform_input(self, x: Tensor) -> Tensor:
if self.transform_input:
x_ch0 = torch.unsqueeze(x[:, 0], 1) * (0.229 / 0.5) + (0.485 - 0.5) / 0.5
x_ch1 = torch.unsqueeze(x[:, 1], 1) * (0.224 / 0.5) + (0.456 - 0.5) / 0.5
x_ch2 = torch.unsqueeze(x[:, 2], 1) * (0.225 / 0.5) + (0.406 - 0.5) / 0.5
x = torch.cat((x_ch0, x_ch1, x_ch2), 1)
return x
def _forward(self, x: Tensor) -> Tuple[Tensor, Optional[Tensor], Optional[Tensor]]:
# N x 3 x 224 x 224
x = self.conv1(x)
# N x 64 x 112 x 112
x = self.maxpool1(x)
# N x 64 x 56 x 56
x = self.conv2(x)
# N x 64 x 56 x 56
x = self.conv3(x)
# N x 192 x 56 x 56
x = self.maxpool2(x)
# N x 192 x 28 x 28
x = self.inception3a(x)
# N x 256 x 28 x 28
x = self.inception3b(x)
# N x 480 x 28 x 28
x = self.maxpool3(x)
# N x 480 x 14 x 14
x = self.inception4a(x)
# N x 512 x 14 x 14
aux1 = torch.jit.annotate(Optional[Tensor], None)
if self.aux1 is not None:
if self.training:
aux1 = self.aux1(x)
x = self.inception4b(x)
# N x 512 x 14 x 14
x = self.inception4c(x)
# N x 512 x 14 x 14
x = self.inception4d(x)
# N x 528 x 14 x 14
aux2 = torch.jit.annotate(Optional[Tensor], None)
if self.aux2 is not None:
if self.training:
aux2 = self.aux2(x)
x = self.inception4e(x)
# N x 832 x 14 x 14
x = self.maxpool4(x)
# N x 832 x 7 x 7
x = self.inception5a(x)
# N x 832 x 7 x 7
x = self.inception5b(x)
# N x 1024 x 7 x 7
x = self.avgpool(x)
# N x 1024 x 1 x 1
x = torch.flatten(x, 1)
# N x 1024
x = self.dropout(x)
x = self.fc(x)
# N x 1000 (num_classes)
return x, aux2, aux1
@torch.jit.unused
def eager_outputs(self, x: Tensor, aux2: Tensor, aux1: Optional[Tensor]) -> GoogLeNetOutputs:
if self.training and self.aux_logits:
return _GoogLeNetOutputs(x, aux2, aux1)
else:
return x # type: ignore[return-value]
def forward(self, x: Tensor) -> GoogLeNetOutputs:
x = self._transform_input(x)
x, aux1, aux2 = self._forward(x)
aux_defined = self.training and self.aux_logits
if torch.jit.is_scripting():
if not aux_defined:
warnings.warn("Scripted GoogleNet always returns GoogleNetOutputs Tuple")
return GoogLeNetOutputs(x, aux2, aux1)
else:
return self.eager_outputs(x, aux2, aux1)
class Inception(nn.Module):
def __init__(
self,
in_channels: int,
ch1x1: int,
ch3x3red: int,
ch3x3: int,
ch5x5red: int,
ch5x5: int,
pool_proj: int,
conv_block: Optional[Callable[..., nn.Module]] = None
) -> None:
super(Inception, self).__init__()
if conv_block is None:
conv_block = BasicConv2d
self.branch1 = conv_block(in_channels, ch1x1, kernel_size=1)
self.branch2 = nn.Sequential(
conv_block(in_channels, ch3x3red, kernel_size=1),
conv_block(ch3x3red, ch3x3, kernel_size=3, padding=1)
)
self.branch3 = nn.Sequential(
conv_block(in_channels, ch5x5red, kernel_size=1),
# Here, kernel_size=3 instead of kernel_size=5 is a known bug.
# Please see https://github.com/pytorch/vision/issues/906 for details.
conv_block(ch5x5red, ch5x5, kernel_size=3, padding=1)
)
self.branch4 = nn.Sequential(
nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=True),
conv_block(in_channels, pool_proj, kernel_size=1)
)
def _forward(self, x: Tensor) -> List[Tensor]:
branch1 = self.branch1(x)
branch2 = self.branch2(x)
branch3 = self.branch3(x)
branch4 = self.branch4(x)
outputs = [branch1, branch2, branch3, branch4]
return outputs
def forward(self, x: Tensor) -> Tensor:
outputs = self._forward(x)
return torch.cat(outputs, 1)
class InceptionAux(nn.Module):
def __init__(
self,
in_channels: int,
num_classes: int,
conv_block: Optional[Callable[..., nn.Module]] = None
) -> None:
super(InceptionAux, self).__init__()
if conv_block is None:
conv_block = BasicConv2d
self.conv = conv_block(in_channels, 128, kernel_size=1)
self.fc1 = nn.Linear(2048, 1024)
self.fc2 = nn.Linear(1024, num_classes)
def forward(self, x: Tensor) -> Tensor:
# aux1: N x 512 x 14 x 14, aux2: N x 528 x 14 x 14
x = F.adaptive_avg_pool2d(x, (4, 4))
# aux1: N x 512 x 4 x 4, aux2: N x 528 x 4 x 4
x = self.conv(x)
# N x 128 x 4 x 4
x = torch.flatten(x, 1)
# N x 2048
x = F.relu(self.fc1(x), inplace=True)
# N x 1024
x = F.dropout(x, 0.7, training=self.training)
# N x 1024
x = self.fc2(x)
# N x 1000 (num_classes)
return x
class BasicConv2d(nn.Module):
def __init__(
self,
in_channels: int,
out_channels: int,
**kwargs: Any
) -> None:
super(BasicConv2d, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
def forward(self, x: Tensor) -> Tensor:
x = self.conv(x)
x = self.bn(x)
return F.relu(x, inplace=True)
model = googlenet(True)
p1 = next(model.parameters())
# change the fc layers to fit cifar10 dataset.
model.fc = nn.Linear(1024, 10)
注意到 原始代码中的 load_state_dict_from_url 是通过utils.py简洁import进来的,我们这里直接将其import当前代码中。另外对于InceptionAux模块以及对应的aux head,在进行finetuing的时候需要进行重新训练(pytorch提供的预训练权重未包含auxhead的权重,我们这里的实现里面,并没有finetuning aux head,aux head 和分类head的finetuning原理是一样的)。
我们使用googlenet函数加载预训练模型之后,直接替换model的fc层 (见行326)。这样模型
中其他层都是预训练的,可以用来做特征抽取使用(暂且称为特征抽取层)。我们的finetuning这里主要就是要修改更新fc这一层的权重(这里称为分类层或者分类head)。
值得注意的是,我们可以在使用自己的数据集重新训练的过程中,不更新特征抽取层权重。这种情况下,训练只会调整分类头。这对于数据集与pretrain数据集类似的情况(或者数据集很小不容易训练)比较合适。相反,我们可以选择训练过程中更新特征抽取层权重(极端情况就是完全重新训练),这对于数据集较大且和pretrain数据集区别较大的情况比较合适。
我们这里实现的是 更新特征抽取层的 finetuning(pytorch中, 要禁止某层的权重参数更新只需要将对应属性置为false即可: param.requires_grad = False)
有了加载了pretrain权重并且修改了分类head的模型,我们这里准备下训练数据。这里我们选择cifar10 数据集:http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
这个数据集里面的数据解压完了是一些二进制数据,所以需要进行处理一下才能使用,如下代码:
如代码所示:
import pickle
import numpy as np
from torchvision import transforms
from PIL import Image
def unpickle(file):
"""load the cifar-10 data"""
with open(file, 'rb') as fo:
data = pickle.load(fo, encoding='bytes')
return data
def load_cifar_10_data(data_dir, negatives=False):
"""
Return train_data, train_filenames, train_labels, test_data, test_filenames, test_labels
"""
# get the meta_data_dict
# num_cases_per_batch: 1000
# label_names: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
# num_vis: :3072
meta_data_dict = unpickle(data_dir + "/batches.meta")
cifar_label_names = meta_data_dict[b'label_names']
cifar_label_names = np.array(cifar_label_names)
# training data
cifar_train_data = None
cifar_train_filenames = []
cifar_train_labels = []
# cifar_train_data_dict
# 'batch_label': 'training batch 5 of 5'
# 'data': ndarray
# 'filenames': list
# 'labels': list
for i in range(1, 6):
cifar_train_data_dict = unpickle(data_dir + "/data_batch_{}".format(i))
if i == 1:
cifar_train_data = cifar_train_data_dict[b'data']
else:
cifar_train_data = np.vstack((cifar_train_data, cifar_train_data_dict[b'data']))
cifar_train_filenames += cifar_train_data_dict[b'filenames']
cifar_train_labels += cifar_train_data_dict[b'labels']
cifar_train_data = cifar_train_data.reshape((len(cifar_train_data), 3, 32, 32))
if negatives:
cifar_train_data = cifar_train_data.transpose(0, 2, 3, 1).astype(np.float32)
else:
cifar_train_data = np.rollaxis(cifar_train_data, 1, 4)
cifar_train_filenames = np.array(cifar_train_filenames)
cifar_train_labels = np.array(cifar_train_labels)
# test data
# cifar_test_data_dict
# 'batch_label': 'testing batch 1 of 1'
# 'data': ndarray
# 'filenames': list
# 'labels': list
cifar_test_data_dict = unpickle(data_dir + "/test_batch")
cifar_test_data = cifar_test_data_dict[b'data']
cifar_test_filenames = cifar_test_data_dict[b'filenames']
cifar_test_labels = cifar_test_data_dict[b'labels']
cifar_test_data = cifar_test_data.reshape((len(cifar_test_data), 3, 32, 32))
if negatives:
cifar_test_data = cifar_test_data.transpose(0, 2, 3, 1).astype(np.float32)
else:
cifar_test_data = np.rollaxis(cifar_test_data, 1, 4)
cifar_test_filenames = np.array(cifar_test_filenames)
cifar_test_labels = np.array(cifar_test_labels)
return cifar_train_data, cifar_train_filenames, cifar_train_labels, \
cifar_test_data, cifar_test_filenames, cifar_test_labels, cifar_label_names
train_data, train_filenames, train_labels, test_data, test_filenames, test_labels, label_names\
= load_cifar_10_data(fpath)
def make_data_loader(data, labels):
ret = {}
train_factor = 0.8
print(data.shape)
print(type(data))
train_num = int(data.shape[0]*train_factor)
def makeTensor(da):
preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = preprocess(Image.fromarray(da))
input_batch = input_tensor.unsqueeze(0)
return input_batch
def make_one_hot(id):
#arr = np.array([0 for i in range(10)])
#arr[id] = 1
return torch.from_numpy(np.array([id]))
#pre_ret = torch.from_numpy(arr)
#eturn pre_ret.unsqueeze(0)
data = data[:100,:]
labels = labels[:100]
t_data = data[0:train_num, :]
t_data = [t_data[idx,:] for idx in range(len(t_data))]
t_label = labels[0:train_num]
t_label = [t_label[idx] for idx in range(len(t_label))]
ret_train = [(makeTensor(t_data[idx]),make_one_hot(t_label[idx])) for idx in range(len(t_data)) ]
v_data = data[:train_num:,]
v_label = labels[:train_num:,]
v_data = [v_data[idx,:] for idx in range(len(v_data))]
v_label = [v_label[idx] for idx in range(len(v_label))]
ret_val = [ (makeTensor(v_data[idx]), make_one_hot(v_label[idx])) for idx in range(len(v_data))]
ret["train"] = ret_train
ret["val"] = ret_val
return ret
loader = make_data_loader(train_data, train_labels)
load_cifar_10_data 函数首先对解压后的目录进行处理,区分出训练数据,测试数据,以及对应的标签数据。
make_data_loader 将训练数据,训练数据和标签改为训练使用的格式。并返回一个loader字典。
下面是对加载了pretrain权重的googlenet 进行filetune训练的核心逻辑
import time
import copy
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
since = time.time()
val_acc_history = []
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
print("begin to train")
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for inputs, labels in dataloaders[phase]:
#print(inputs.shape)
#print(labels.shape)
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
# Get model outputs and calculate loss
# Special case for inception because in training it has an auxiliary output. In train
# mode we calculate the loss by summing the final output and the auxiliary output
# but in testing we only consider the final output.
if is_inception and phase == 'train':
# we don't train aux_outputs here.
pass
# From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
outputs, aux_outputs = model(inputs)
loss1 = criterion(outputs, labels)
loss2 = criterion(aux_outputs, labels)
loss = loss1 + 0.4*loss2
else:
outputs = model(inputs)
#output_id = torch.max(outputs,1)[1].float()
#label_id = torch.max(labels,1)[1].long()
#print(outputs)
#print(labels)
loss = criterion(outputs, labels)
_, preds = torch.max(outputs, 1)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
epoch_loss = running_loss / len(dataloaders[phase])
epoch_acc = running_corrects.double() / len(dataloaders[phase])
print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
if phase == 'val':
val_acc_history.append(epoch_acc)
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
return model, val_acc_history
criterion = nn.CrossEntropyLoss()
params_to_update = model.parameters()
print(type(params_to_update))
optimizer_ft = torch.optim.SGD(params_to_update, lr=0.001, momentum=0.9)
train_model(model, loader, criterion, optimizer_ft)
train_model函数主要逻辑也比较简单:
数据分为train 和eval两块
train阶段模型可以更新权重(进行反向传播loss.backward和权重更新optimizer.step)
eval 阶段只进行前向传播和计算准确率
整体就是pytorch训练的一般套路:
1 数据拷入device ; inputs = inputs.to(device)
2 重置梯度; optimizer.zero_grad()
3 前向传播; outputs = model(inputs)
4 计算损失;loss = criterion(outputs, labels)
5 损失反向传播; loss.backward()
6 更新权重 optimizer.step()
损失函数我们使用交叉熵损失。优化器使用SGD,动量为0.9
我们这只使用了训练数据集中前100个数据进行训练。可以看到到了第18个epoch train loss下降到了1.45,val loss 却下降不明显,有可能是过拟合有可能是数据集太少。我们这里只是示意训练,后续详细调优。
<class 'generator'>
begin to train
Epoch 0/24
----------
train Loss: 2.6431 Acc: 0.0900
val Loss: 2.3888 Acc: 0.1200
Epoch 1/24
----------
train Loss: 2.6619 Acc: 0.1100
val Loss: 2.3855 Acc: 0.1300
Epoch 2/24
----------
train Loss: 2.5731 Acc: 0.1100
val Loss: 2.4109 Acc: 0.0900
Epoch 3/24
----------
train Loss: 2.5192 Acc: 0.1000
val Loss: 2.3500 Acc: 0.1200
Epoch 4/24
----------
train Loss: 2.4641 Acc: 0.0900
val Loss: 2.3360 Acc: 0.1000
.
.
.
Epoch 17/24
----------
train Loss: 1.4424 Acc: 0.4700
val Loss: 2.3126 Acc: 0.2100
Epoch 18/24
----------
train Loss: 1.4533 Acc: 0.5100
val Loss: 2.3580 Acc: 0.2300
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。