作者 | n0obcoder
来源 | Medium
编辑 | 代码医生团队
import matplotlib.pyplot as plt%matplotlib inlineimport os, glob, sys def q(text = ''): # Just an exit function print(text) sys.exit() # Input data files are available in the "/kaggle/input/" directory. data_dir = 'mobile-gallery-image-classification-data/mobile_gallery_image_classification/train' for class_dir in glob.glob(data_dir + os.sep + '*'): print(class_dir) # Any results you write to the current directory ('/kaggle/working') are saved as output.
这些是从Mobile Image Gallery数据集中的训练数据中获取的样本图像中的几个。它们各自的类别:Memes(左上),汽车(右上),树木(右下)和山脉(左下)
数据集准备好之后,要做的下一步就是进行一些数据预处理。通过数据预处理,执行一些简单的图像处理操作,例如调整大小,在水平轴上随机翻转图像,将图像(具有介于0到255之间的整数值的像素)转换为张量(具有浮点数范围的像素值)从0.0到1.0),最后但并非最不重要的一点是,通过使用ImageNet统计信息对张量进行归一化(均值= [0.485,0.456,0.406],std = [0.229,0.224,0.225])。请注意,正在处理BGR(彩色)图像,而不是灰度(黑白)图像。
# Let's start by loading in the image dataimport torchfrom torchvision import datasets, transforms # Defining the transforms that we want to apply to the data.# Resizing the image to (224,224),# Randomly flipping the image horizontally(with the default probability of 0.5),# Converting the image to Tensore (converting the pixel values btween 0 and 1),# Normalizing the 3-channel data using the 'Imagenet' statsdata_transforms = transforms.Compose([ transforms.Resize((224, 224)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) print('data_transforms: ', data_transforms) dataset = datasets.ImageFolder(data_dir, transform = data_transforms)print('dataset: ', dataset) # We need to split the dataset between train and val datasetstrain_percentage = 0.8train_size = int(len(dataset)*train_percentage)val_size = len(dataset) - train_sizetrain_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size]) print('\nnumber of examples in train_dataset: ', len(train_dataset))print('number of examples in val_dataset : ', len(val_dataset))
data_transforms: Compose( Resize(size=(224, 224), interpolation=PIL.Image.BILINEAR) RandomHorizontalFlip(p=0.5) ToTensor() Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]))dataset: Dataset ImageFolder Number of datapoints: 1266 Root Location: mobile-gallery-image-classification-data/mobile_gallery_image_classification/train Transforms (if any): Compose( Resize(size=(224, 224), interpolation=PIL.Image.BILINEAR) RandomHorizontalFlip(p=0.5) ToTensor() Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ) Target Transforms (if any): None number of examples in train_dataset: 1012number of examples in val_dataset : 254
# Defining dataloaders which would return data in batchesbatch_size = 64train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size, shuffle = True)val_loader = torch.utils.data.DataLoader(val_dataset, batch_size = batch_size, shuffle = False) print('number of batches in train_loader with a batch_size of {}: {}'.format(batch_size, len(train_loader)))print('number of batches in val_loader with a batch_size of {}: {}'.format(batch_size, len(val_loader)))
number of batches in train_loader with a batch_size of 64: 16number of batches in val_loader with a batch_size of 64: 4
import torchimport torch.nn as nnfrom torchvision import models # Defining the modelmodel = models.resnet34(pretrained = True) # The original architecture of resnet34 has 1000 neurons(corresponding to 1000 classes on which it was originally trained on) in the final layer.# So we need to change the final layer according to the number of classes that we have in our datasetprint('model.fc before: ', model.fc)model_fc_in_features = model.fc.in_featuresmodel.fc = nn.Linear(model_fc_in_features, len(dataset.classes))print('model.fc after : ', model.fc)
model.fc before: Linear(in_features=512, out_features=1000, bias=True)model.fc after : Linear(in_features=512, out_features=6, bias=True)
# Now let's have a look at the requires_grad attributes for all the parameters for name, param in model.named_parameters():print('name: {} has requires_grad: {}'.format(name, param.requires_grad))
看到所有参数在开始时都是可训练的(requires_grad = True表示该参数是可学习的)
for name, module in model.named_children():print('name: ', name)
name: conv1name: bn1name: reluname: maxpoolname: layer1name: layer2name: layer3name: layer4name: avgpoolname: fc
# We would freeze all but the last few layers (layer4 and fc) for name, param in model.named_parameters(): if ('layer4' in name) or ('fc' in name): param.requires_grad = True else: param.requires_grad = False
for name, param in model.named_parameters():print('name: {} has requires_grad: {}'.format(name, param.requires_grad))
* name: conv1.weight has requires_grad: Falsename: bn1.weight has requires_grad: Falsename: bn1.bias has requires_grad: Falsename: layer1.0.conv1.weight has requires_grad: Falsename: layer1.0.bn1.weight has requires_grad: Falsename: layer1.0.bn1.bias has requires_grad: Falsename: layer1.0.conv2.weight has requires_grad: Falsename: layer1.0.bn2.weight has requires_grad: Falsename: layer1.0.bn2.bias has requires_grad: Falsename: layer1.1.conv1.weight has requires_grad: Falsename: layer1.1.bn1.weight has requires_grad: Falsename: layer1.1.bn1.bias has requires_grad: Falsename: layer1.1.conv2.weight has requires_grad: Falsename: layer1.1.bn2.weight has requires_grad: Falsename: layer1.1.bn2.bias has requires_grad: Falsename: layer1.2.conv1.weight has requires_grad: Falsename: layer1.2.bn1.weight has requires_grad: Falsename: layer1.2.bn1.bias has requires_grad: Falsename: layer1.2.conv2.weight has requires_grad: Falsename: layer1.2.bn2.weight has requires_grad: Falsename: layer1.2.bn2.bias has requires_grad: Falsename: layer2.0.conv1.weight has requires_grad: Falsename: layer2.0.bn1.weight has requires_grad: Falsename: layer2.0.bn1.bias has requires_grad: Falsename: layer2.0.conv2.weight has requires_grad: Falsename: layer2.0.bn2.weight has requires_grad: Falsename: layer2.0.bn2.bias has requires_grad: Falsename: layer2.0.downsample.0.weight has requires_grad: Falsename: layer2.0.downsample.1.weight has requires_grad: Falsename: layer2.0.downsample.1.bias has requires_grad: Falsename: layer2.1.conv1.weight has requires_grad: Falsename: layer2.1.bn1.weight has requires_grad: Falsename: layer2.1.bn1.bias has requires_grad: Falsename: layer2.1.conv2.weight has requires_grad: Falsename: layer2.1.bn2.weight has requires_grad: Falsename: layer2.1.bn2.bias has requires_grad: Falsename: layer2.2.conv1.weight has requires_grad: Falsename: layer2.2.bn1.weight has requires_grad: Falsename: layer2.2.bn1.bias has requires_grad: Falsename: layer2.2.conv2.weight has requires_grad: Falsename: layer2.2.bn2.weight has requires_grad: Falsename: layer2.2.bn2.bias has requires_grad: Falsename: layer2.3.conv1.weight has requires_grad: Falsename: layer2.3.bn1.weight has requires_grad: Falsename: layer2.3.bn1.bias has requires_grad: Falsename: layer2.3.conv2.weight has requires_grad: Falsename: layer2.3.bn2.weight has requires_grad: Falsename: layer2.3.bn2.bias has requires_grad: Falsename: layer3.0.conv1.weight has requires_grad: Falsename: layer3.0.bn1.weight has requires_grad: Falsename: layer3.0.bn1.bias has requires_grad: Falsename: layer3.0.conv2.weight has requires_grad: Falsename: layer3.0.bn2.weight has requires_grad: Falsename: layer3.0.bn2.bias has requires_grad: Falsename: layer3.0.downsample.0.weight has requires_grad: Falsename: layer3.0.downsample.1.weight has requires_grad: Falsename: layer3.0.downsample.1.bias has requires_grad: Falsename: layer3.1.conv1.weight has requires_grad: Falsename: layer3.1.bn1.weight has requires_grad: Falsename: layer3.1.bn1.bias has requires_grad: Falsename: layer3.1.conv2.weight has requires_grad: Falsename: layer3.1.bn2.weight has requires_grad: Falsename: layer3.1.bn2.bias has requires_grad: Falsename: layer3.2.conv1.weight has requires_grad: Falsename: layer3.2.bn1.weight has requires_grad: Falsename: layer3.2.bn1.bias has requires_grad: Falsename: layer3.2.conv2.weight has requires_grad: Falsename: layer3.2.bn2.weight has requires_grad: Falsename: layer3.2.bn2.bias has requires_grad: Falsename: layer3.3.conv1.weight has requires_grad: Falsename: layer3.3.bn1.weight has requires_grad: Falsename: layer3.3.bn1.bias has requires_grad: Falsename: layer3.3.conv2.weight has requires_grad: Falsename: layer3.3.bn2.weight has requires_grad: Falsename: layer3.3.bn2.bias has requires_grad: Falsename: layer3.4.conv1.weight has requires_grad: Falsename: layer3.4.bn1.weight has requires_grad: Falsename: layer3.4.bn1.bias has requires_grad: Falsename: layer3.4.conv2.weight has requires_grad: Falsename: layer3.4.bn2.weight has requires_grad: Falsename: layer3.4.bn2.bias has requires_grad: Falsename: layer3.5.conv1.weight has requires_grad: Falsename: layer3.5.bn1.weight has requires_grad: Falsename: layer3.5.bn1.bias has requires_grad: Falsename: layer3.5.conv2.weight has requires_grad: Falsename: layer3.5.bn2.weight has requires_grad: Falsename: layer3.5.bn2.bias has requires_grad: Falsename: layer4.0.conv1.weight has requires_grad: Truename: layer4.0.bn1.weight has requires_grad: Truename: layer4.0.bn1.bias has requires_grad: Truename: layer4.0.conv2.weight has requires_grad: Truename: layer4.0.bn2.weight has requires_grad: Truename: layer4.0.bn2.bias has requires_grad: Truename: layer4.0.downsample.0.weight has requires_grad: Truename: layer4.0.downsample.1.weight has requires_grad: Truename: layer4.0.downsample.1.bias has requires_grad: Truename: layer4.1.conv1.weight has requires_grad: Truename: layer4.1.bn1.weight has requires_grad: Truename: layer4.1.bn1.bias has requires_grad: Truename: layer4.1.conv2.weight has requires_grad: Truename: layer4.1.bn2.weight has requires_grad: Truename: layer4.1.bn2.bias has requires_grad: Truename: layer4.2.conv1.weight has requires_grad: Truename: layer4.2.bn1.weight has requires_grad: Truename: layer4.2.bn1.bias has requires_grad: Truename: layer4.2.conv2.weight has requires_grad: Truename: layer4.2.bn2.weight has requires_grad: Truename: layer4.2.bn2.bias has requires_grad: Truename: fc.weight has requires_grad: Truename: fc.bias has requires_grad: True
是的,已经进行了更改!(请参见,存在于“ layer4”和“ fc”中的参数具有require_grad = True,其余所有其他参数具有require_grad = False)
import torch.optim as optimfrom torch.optim import lr_scheduler # Now we define the Loss Functionloss_fn = nn.CrossEntropyLoss() # Define the optimizerlr = 0.00001optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr = lr)
loader = {'train': train_loader, 'val': val_loader} epochs = 5log_interval = 2 # Let's train the model for 5 epochs !train_losses, val_losses, batch_train_losses, batch_val_losses = trainer(loader, model, loss_fn, optimizer, epochs = epochs, log_interval = log_interval) # Ploting the epoch lossesplt.plot(train_losses)plt.plot(val_losses)plt.legend(['train losses', 'val_losses'])plt.title('Loss vs Epoch') plt.figure()plt.plot(batch_train_losses)plt.title('batch_train_losses') plt.figure()plt.plot(batch_val_losses)plt.title('batch_val_losses') # Saving the model(architecture and weights)torch.save(model, 'stage1.pth')
# We will now freeze the 'layer4' and train just the 'fc' layer of the model for 2 more epochs for name, param in model.named_parameters(): if 'layer4' in name: param.requires_grad = False # layer4 parameters would not get trained now # Define the new learning rate and the new optimizer which would contain only the parameters with requires_grad = Truelr = 0.0003optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr = lr) epochs = 3log_interval = 2 # Let's train the model for 3 more epochs !train_losses, val_losses, batch_train_losses, batch_val_losses = trainer(loader, model, loss_fn, optimizer, epochs = epochs, log_interval = log_interval) # Ploting the epoch lossesplt.plot(train_losses)plt.plot(val_losses)plt.legend(['train losses', 'val_losses'])plt.title('Loss vs Epoch') plt.figure()plt.plot(batch_train_losses)plt.title('batch_train_losses') plt.figure()plt.plot(batch_val_losses)plt.title('batch_val_losses') # Saving the model(architecture and weights)torch.save(model, 'stage2.pth')
Training started...epoch >>> 1/3___TRAINING___batch_loss at batch_idx 01/16: 0.20289725065231323batch_loss at batch_idx 03/16: 0.2349197268486023batch_loss at batch_idx 05/16: 0.2194989025592804batch_loss at batch_idx 07/16: 0.20219461619853973batch_loss at batch_idx 09/16: 0.27012479305267334batch_loss at batch_idx 11/16: 0.20639048516750336batch_loss at batch_idx 13/16: 0.1523684412240982batch_loss at batch_idx 15/16: 0.14577656984329224>>> train loss at epoch 1/3: 0.2009116342887577___VALIDATION___batch_loss at batch_idx 01/16: 0.20299889147281647batch_loss at batch_idx 03/16: 0.19083364307880402>>> val loss at epoch 1/3: 0.20429044950196124=========================epoch >>> 2/3___TRAINING___batch_loss at batch_idx 01/16: 0.14590243995189667batch_loss at batch_idx 03/16: 0.10861243307590485batch_loss at batch_idx 05/16: 0.14622969925403595batch_loss at batch_idx 07/16: 0.1130327433347702batch_loss at batch_idx 09/16: 0.1342758983373642batch_loss at batch_idx 11/16: 0.13757610321044922batch_loss at batch_idx 13/16: 0.15501776337623596batch_loss at batch_idx 15/16: 0.11977922171354294>>> train loss at epoch 2/3: 0.14645167593310474___VALIDATION___batch_loss at batch_idx 01/16: 0.16367006301879883batch_loss at batch_idx 03/16: 0.16462600231170654>>> val loss at epoch 2/3: 0.17527046447663797=========================epoch >>> 3/3___TRAINING___batch_loss at batch_idx 01/16: 0.1762229949235916batch_loss at batch_idx 03/16: 0.10568083077669144batch_loss at batch_idx 05/16: 0.14333905279636383batch_loss at batch_idx 07/16: 0.08794888854026794batch_loss at batch_idx 09/16: 0.1599852591753006batch_loss at batch_idx 11/16: 0.15842339396476746batch_loss at batch_idx 13/16: 0.08625025302171707batch_loss at batch_idx 15/16: 0.12491285800933838>>> train loss at epoch 3/3: 0.13451774695174026___VALIDATION___batch_loss at batch_idx 01/16: 0.15565256774425507batch_loss at batch_idx 03/16: 0.13937778770923615>>> val loss at epoch 3/3: 0.15460531577819914=========================
import cv2import torch.nn.functional as F # Making a 'predict' function which would take the 'model' and the path of the 'test image' as inputs, and predict the class that the test image belongs to.def predict(model, test_img_path): img = cv2.imread(test_img_path) # Visualizing the test image plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) img = transforms.Compose([transforms.ToPILImage()])(img) img = data_transforms(img) img = img.view(1, img.shape[0], img.shape[2], img.shape[2]) # Expanding dimension model.eval() with torch.no_grad(): logits = model(img) probs = F.softmax(logits, dim = 1) max_prob, ind = torch.max(probs, 1) print('This Neural Network thinks that the given image belongs to >>> {} <<< class with confidence of {}%'.format(dataset.classes[ind], round(max_prob.item()*100, 2))) test_data_dir = 'mobile_gallery_image_classification_data/mobile_gallery_image_classification/test' test_img_list = []for class_dir in glob.glob(test_data_dir + os.sep + '*'): test_img_list.append(class_dir) # Loading the trained model(architecture as well as the weights) for making inferencesmodel = torch.load('stage2.pth') # Select the test image index(choose a number from 0 to 6)test_img_index = 3predict(model, test_img_list[test_img_index]) This Neural Network thinks that the given image belongs to >>> Memes <<< class with confidence of 95.21%