|
yolov3 pytorch
- # -*- coding: utf-8 -*-
- """
- Created on Sat Jun 17 23:33:13 2023
- @author: ysw
- """
- #-------------------------------------#
- # 对数据集进行训练
- #-------------------------------------#
- import datetime
- import os
- import math
- import cv2
- from PIL import Image
- import numpy as np
- from collections import OrderedDict
- import torch
- import torch.backends.cudnn as cudnn
- import torch.distributed as dist
- import torch.nn as nn
- import torch.optim as optim
- from torch import nn
- from torch.utils.data import DataLoader
- from torch.utils.data.dataset import Dataset
- # from nets.yolo import YoloBody
- # from nets.yolo_training import YOLOLoss
- # from utils.dataloader import YoloDataset, yolo_dataset_collate
- # from utils.utils import get_anchors, get_classes
- def conv2d(filter_in, filter_out, kernel_size):
- pad = (kernel_size - 1) // 2 if kernel_size else 0
- return nn.Sequential(OrderedDict([
- ("conv", nn.Conv2d(filter_in, filter_out, kernel_size=kernel_size, stride=1, padding=pad, bias=False)),
- ("bn", nn.BatchNorm2d(filter_out)),
- ("relu", nn.LeakyReLU(0.1)),
- ]))
- #------------------------------------------------------------------------#
- # make_last_layers里面一共有七个卷积,前五个用于提取特征。
- # 后两个用于获得yolo网络的预测结果
- #------------------------------------------------------------------------#
- def make_last_layers(filters_list, in_filters, out_filter):
- m = nn.Sequential(
- conv2d(in_filters, filters_list[0], 1),
- conv2d(filters_list[0], filters_list[1], 3),
- conv2d(filters_list[1], filters_list[0], 1),
- conv2d(filters_list[0], filters_list[1], 3),
- conv2d(filters_list[1], filters_list[0], 1),
- conv2d(filters_list[0], filters_list[1], 3),
- nn.Conv2d(filters_list[1], out_filter, kernel_size=1, stride=1, padding=0, bias=True)
- )
- return m
- class YoloBody(nn.Module):
- def __init__(self, anchors_mask, num_classes, phi=0, load_weights = False):
- super(YoloBody, self).__init__()
- #---------------------------------------------------#
- # 生成efficientnet的主干模型,以efficientnetB0为例
- # 获得三个有效特征层,他们的shape分别是:
- # 52, 52, 40
- # 26, 26, 112
- # 13, 13, 320
- #---------------------------------------------------#
- self.backbone = EfficientNet(phi, load_weights = load_weights)
- out_filters = {
- 0: [40, 112, 320],
- 1: [40, 112, 320],
- 2: [48, 120, 352],
- 3: [48, 136, 384],
- 4: [56, 160, 448],
- 5: [64, 176, 512],
- 6: [72, 200, 576],
- 7: [80, 224, 640],
- }[phi]
- self.last_layer0 = make_last_layers([out_filters[-1], int(out_filters[-1]*2)], out_filters[-1], len(anchors_mask[0]) * (num_classes + 5))
- self.last_layer1_conv = conv2d(out_filters[-1], out_filters[-2], 1)
- self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest')
- self.last_layer1 = make_last_layers([out_filters[-2], int(out_filters[-2]*2)], out_filters[-2] + out_filters[-2], len(anchors_mask[1]) * (num_classes + 5))
- self.last_layer2_conv = conv2d(out_filters[-2], out_filters[-3], 1)
- self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest')
- self.last_layer2 = make_last_layers([out_filters[-3], int(out_filters[-3]*2)], out_filters[-3] + out_filters[-3], len(anchors_mask[2]) * (num_classes + 5))
- def forward(self, x):
- #---------------------------------------------------#
- # 获得三个有效特征层,他们的shape分别是:
- # 52, 52, 40
- # 26, 26, 112
- # 13, 13, 320
- #---------------------------------------------------#
- x2, x1, x0 = self.backbone(x)
- #---------------------------------------------------#
- # 第一个特征层
- # out0 = (batch_size,255,13,13)
- #---------------------------------------------------#
- out0_branch = self.last_layer0[:5](x0)
- out0 = self.last_layer0[5:](out0_branch)
- x1_in = self.last_layer1_conv(out0_branch)
- x1_in = self.last_layer1_upsample(x1_in)
- x1_in = torch.cat([x1_in, x1], 1)
- #---------------------------------------------------#
- # 第二个特征层
- # out1 = (batch_size,255,26,26)
- #---------------------------------------------------#
- out1_branch = self.last_layer1[:5](x1_in)
- out1 = self.last_layer1[5:](out1_branch)
- x2_in = self.last_layer2_conv(out1_branch)
- x2_in = self.last_layer2_upsample(x2_in)
- x2_in = torch.cat([x2_in, x2], 1)
- #---------------------------------------------------#
- # 第一个特征层
- # out3 = (batch_size,255,52,52)
- #---------------------------------------------------#
- out2 = self.last_layer2(x2_in)
- return out0, out1, out2
- class YOLOLoss(nn.Module):
- def __init__(self, anchors, num_classes, input_shape, cuda, anchors_mask = [[6,7,8], [3,4,5], [0,1,2]]):
- super(YOLOLoss, self).__init__()
- #-----------------------------------------------------------#
- # 13x13的特征层对应的anchor是[116,90],[156,198],[373,326]
- # 26x26的特征层对应的anchor是[30,61],[62,45],[59,119]
- # 52x52的特征层对应的anchor是[10,13],[16,30],[33,23]
- #-----------------------------------------------------------#
- self.anchors = anchors
- self.num_classes = num_classes
- self.bbox_attrs = 5 + num_classes
- self.input_shape = input_shape
- self.anchors_mask = anchors_mask
- self.giou = True
- self.balance = [0.4, 1.0, 4]
- self.box_ratio = 0.05
- self.obj_ratio = 5 * (input_shape[0] * input_shape[1]) / (416 ** 2)
- self.cls_ratio = 1 * (num_classes / 80)
- self.ignore_threshold = 0.5
- self.cuda = cuda
- def clip_by_tensor(self, t, t_min, t_max):
- t = t.float()
- result = (t >= t_min).float() * t + (t < t_min).float() * t_min
- result = (result <= t_max).float() * result + (result > t_max).float() * t_max
- return result
- def MSELoss(self, pred, target):
- return torch.pow(pred - target, 2)
- def BCELoss(self, pred, target):
- epsilon = 1e-7
- pred = self.clip_by_tensor(pred, epsilon, 1.0 - epsilon)
- output = - target * torch.log(pred) - (1.0 - target) * torch.log(1.0 - pred)
- return output
- def box_giou(self, b1, b2):
- """
- 输入为:
- ----------
- b1: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
- b2: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
- 返回为:
- -------
- giou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)
- """
- #----------------------------------------------------#
- # 求出预测框左上角右下角
- #----------------------------------------------------#
- b1_xy = b1[..., :2]
- b1_wh = b1[..., 2:4]
- b1_wh_half = b1_wh/2.
- b1_mins = b1_xy - b1_wh_half
- b1_maxes = b1_xy + b1_wh_half
- #----------------------------------------------------#
- # 求出真实框左上角右下角
- #----------------------------------------------------#
- b2_xy = b2[..., :2]
- b2_wh = b2[..., 2:4]
- b2_wh_half = b2_wh/2.
- b2_mins = b2_xy - b2_wh_half
- b2_maxes = b2_xy + b2_wh_half
- #----------------------------------------------------#
- # 求真实框和预测框所有的iou
- #----------------------------------------------------#
- intersect_mins = torch.max(b1_mins, b2_mins)
- intersect_maxes = torch.min(b1_maxes, b2_maxes)
- intersect_wh = torch.max(intersect_maxes - intersect_mins, torch.zeros_like(intersect_maxes))
- intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
- b1_area = b1_wh[..., 0] * b1_wh[..., 1]
- b2_area = b2_wh[..., 0] * b2_wh[..., 1]
- union_area = b1_area + b2_area - intersect_area
- iou = intersect_area / union_area
- #----------------------------------------------------#
- # 找到包裹两个框的最小框的左上角和右下角
- #----------------------------------------------------#
- enclose_mins = torch.min(b1_mins, b2_mins)
- enclose_maxes = torch.max(b1_maxes, b2_maxes)
- enclose_wh = torch.max(enclose_maxes - enclose_mins, torch.zeros_like(intersect_maxes))
- #----------------------------------------------------#
- # 计算对角线距离
- #----------------------------------------------------#
- enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1]
- giou = iou - (enclose_area - union_area) / enclose_area
-
- return giou
-
- def forward(self, l, input, targets=None):
- #----------------------------------------------------#
- # l代表的是,当前输入进来的有效特征层,是第几个有效特征层
- # input的shape为 bs, 3*(5+num_classes), 13, 13
- # bs, 3*(5+num_classes), 26, 26
- # bs, 3*(5+num_classes), 52, 52
- # targets代表的是真实框。
- #----------------------------------------------------#
- #--------------------------------#
- # 获得图片数量,特征层的高和宽
- # 13和13
- #--------------------------------#
- bs = input.size(0)
- in_h = input.size(2)
- in_w = input.size(3)
- #-----------------------------------------------------------------------#
- # 计算步长
- # 每一个特征点对应原来的图片上多少个像素点
- # 如果特征层为13x13的话,一个特征点就对应原来的图片上的32个像素点
- # 如果特征层为26x26的话,一个特征点就对应原来的图片上的16个像素点
- # 如果特征层为52x52的话,一个特征点就对应原来的图片上的8个像素点
- # stride_h = stride_w = 32、16、8
- # stride_h和stride_w都是32。
- #-----------------------------------------------------------------------#
- stride_h = self.input_shape[0] / in_h
- stride_w = self.input_shape[1] / in_w
- #-------------------------------------------------#
- # 此时获得的scaled_anchors大小是相对于特征层的
- #-------------------------------------------------#
- scaled_anchors = [(a_w / stride_w, a_h / stride_h) for a_w, a_h in self.anchors]
- #-----------------------------------------------#
- # 输入的input一共有三个,他们的shape分别是
- # bs, 3*(5+num_classes), 13, 13 => batch_size, 3, 13, 13, 5 + num_classes
- # batch_size, 3, 26, 26, 5 + num_classes
- # batch_size, 3, 52, 52, 5 + num_classes
- #-----------------------------------------------#
- prediction = input.view(bs, len(self.anchors_mask[l]), self.bbox_attrs, in_h, in_w).permute(0, 1, 3, 4, 2).contiguous()
-
- #-----------------------------------------------#
- # 先验框的中心位置的调整参数
- #-----------------------------------------------#
- x = torch.sigmoid(prediction[..., 0])
- y = torch.sigmoid(prediction[..., 1])
- #-----------------------------------------------#
- # 先验框的宽高调整参数
- #-----------------------------------------------#
- w = prediction[..., 2]
- h = prediction[..., 3]
- #-----------------------------------------------#
- # 获得置信度,是否有物体
- #-----------------------------------------------#
- conf = torch.sigmoid(prediction[..., 4])
- #-----------------------------------------------#
- # 种类置信度
- #-----------------------------------------------#
- pred_cls = torch.sigmoid(prediction[..., 5:])
- #-----------------------------------------------#
- # 获得网络应该有的预测结果
- #-----------------------------------------------#
- y_true, noobj_mask, box_loss_scale = self.get_target(l, targets, scaled_anchors, in_h, in_w)
- #---------------------------------------------------------------#
- # 将预测结果进行解码,判断预测结果和真实值的重合程度
- # 如果重合程度过大则忽略,因为这些特征点属于预测比较准确的特征点
- # 作为负样本不合适
- #----------------------------------------------------------------#
- noobj_mask, pred_boxes = self.get_ignore(l, x, y, h, w, targets, scaled_anchors, in_h, in_w, noobj_mask)
- if self.cuda:
- y_true = y_true.type_as(x)
- noobj_mask = noobj_mask.type_as(x)
- box_loss_scale = box_loss_scale.type_as(x)
- #--------------------------------------------------------------------------#
- # box_loss_scale是真实框宽高的乘积,宽高均在0-1之间,因此乘积也在0-1之间。
- # 2-宽高的乘积代表真实框越大,比重越小,小框的比重更大。
- #--------------------------------------------------------------------------#
- box_loss_scale = 2 - box_loss_scale
-
- loss = 0
- obj_mask = y_true[..., 4] == 1
- n = torch.sum(obj_mask)
- if n != 0:
- if self.giou:
- #---------------------------------------------------------------#
- # 计算预测结果和真实结果的giou
- #----------------------------------------------------------------#
- giou = self.box_giou(pred_boxes, y_true[..., :4]).type_as(x)
- loss_loc = torch.mean((1 - giou)[obj_mask])
- else:
- #-----------------------------------------------------------#
- # 计算中心偏移情况的loss,使用BCELoss效果好一些
- #-----------------------------------------------------------#
- loss_x = torch.mean(self.BCELoss(x[obj_mask], y_true[..., 0][obj_mask]) * box_loss_scale[obj_mask])
- loss_y = torch.mean(self.BCELoss(y[obj_mask], y_true[..., 1][obj_mask]) * box_loss_scale[obj_mask])
- #-----------------------------------------------------------#
- # 计算宽高调整值的loss
- #-----------------------------------------------------------#
- loss_w = torch.mean(self.MSELoss(w[obj_mask], y_true[..., 2][obj_mask]) * box_loss_scale[obj_mask])
- loss_h = torch.mean(self.MSELoss(h[obj_mask], y_true[..., 3][obj_mask]) * box_loss_scale[obj_mask])
- loss_loc = (loss_x + loss_y + loss_h + loss_w) * 0.1
- loss_cls = torch.mean(self.BCELoss(pred_cls[obj_mask], y_true[..., 5:][obj_mask]))
- loss += loss_loc * self.box_ratio + loss_cls * self.cls_ratio
- loss_conf = torch.mean(self.BCELoss(conf, obj_mask.type_as(conf))[noobj_mask.bool() | obj_mask])
- loss += loss_conf * self.balance[l] * self.obj_ratio
- # if n != 0:
- # print(loss_loc * self.box_ratio, loss_cls * self.cls_ratio, loss_conf * self.balance[l] * self.obj_ratio)
- return loss
- def calculate_iou(self, _box_a, _box_b):
- #-----------------------------------------------------------#
- # 计算真实框的左上角和右下角
- #-----------------------------------------------------------#
- b1_x1, b1_x2 = _box_a[:, 0] - _box_a[:, 2] / 2, _box_a[:, 0] + _box_a[:, 2] / 2
- b1_y1, b1_y2 = _box_a[:, 1] - _box_a[:, 3] / 2, _box_a[:, 1] + _box_a[:, 3] / 2
- #-----------------------------------------------------------#
- # 计算先验框获得的预测框的左上角和右下角
- #-----------------------------------------------------------#
- b2_x1, b2_x2 = _box_b[:, 0] - _box_b[:, 2] / 2, _box_b[:, 0] + _box_b[:, 2] / 2
- b2_y1, b2_y2 = _box_b[:, 1] - _box_b[:, 3] / 2, _box_b[:, 1] + _box_b[:, 3] / 2
- #-----------------------------------------------------------#
- # 将真实框和预测框都转化成左上角右下角的形式
- #-----------------------------------------------------------#
- box_a = torch.zeros_like(_box_a)
- box_b = torch.zeros_like(_box_b)
- box_a[:, 0], box_a[:, 1], box_a[:, 2], box_a[:, 3] = b1_x1, b1_y1, b1_x2, b1_y2
- box_b[:, 0], box_b[:, 1], box_b[:, 2], box_b[:, 3] = b2_x1, b2_y1, b2_x2, b2_y2
- #-----------------------------------------------------------#
- # A为真实框的数量,B为先验框的数量
- #-----------------------------------------------------------#
- A = box_a.size(0)
- B = box_b.size(0)
- #-----------------------------------------------------------#
- # 计算交的面积
- #-----------------------------------------------------------#
- max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2), box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
- min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2), box_b[:, :2].unsqueeze(0).expand(A, B, 2))
- inter = torch.clamp((max_xy - min_xy), min=0)
- inter = inter[:, :, 0] * inter[:, :, 1]
- #-----------------------------------------------------------#
- # 计算预测框和真实框各自的面积
- #-----------------------------------------------------------#
- area_a = ((box_a[:, 2]-box_a[:, 0]) * (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter) # [A,B]
- area_b = ((box_b[:, 2]-box_b[:, 0]) * (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter) # [A,B]
- #-----------------------------------------------------------#
- # 求IOU
- #-----------------------------------------------------------#
- union = area_a + area_b - inter
- return inter / union # [A,B]
-
- def get_target(self, l, targets, anchors, in_h, in_w):
- #-----------------------------------------------------#
- # 计算一共有多少张图片
- #-----------------------------------------------------#
- bs = len(targets)
- #-----------------------------------------------------#
- # 用于选取哪些先验框不包含物体
- #-----------------------------------------------------#
- noobj_mask = torch.ones(bs, len(self.anchors_mask[l]), in_h, in_w, requires_grad = False)
- #-----------------------------------------------------#
- # 让网络更加去关注小目标
- #-----------------------------------------------------#
- box_loss_scale = torch.zeros(bs, len(self.anchors_mask[l]), in_h, in_w, requires_grad = False)
- #-----------------------------------------------------#
- # batch_size, 3, 13, 13, 5 + num_classes
- #-----------------------------------------------------#
- y_true = torch.zeros(bs, len(self.anchors_mask[l]), in_h, in_w, self.bbox_attrs, requires_grad = False)
- for b in range(bs):
- if len(targets[b])==0:
- continue
- batch_target = torch.zeros_like(targets[b])
- #-------------------------------------------------------#
- # 计算出正样本在特征层上的中心点
- #-------------------------------------------------------#
- batch_target[:, [0,2]] = targets[b][:, [0,2]] * in_w
- batch_target[:, [1,3]] = targets[b][:, [1,3]] * in_h
- batch_target[:, 4] = targets[b][:, 4]
- batch_target = batch_target.cpu()
-
- #-------------------------------------------------------#
- # 将真实框转换一个形式
- # num_true_box, 4
- #-------------------------------------------------------#
- gt_box = torch.FloatTensor(torch.cat((torch.zeros((batch_target.size(0), 2)), batch_target[:, 2:4]), 1))
- #-------------------------------------------------------#
- # 将先验框转换一个形式
- # 9, 4
- #-------------------------------------------------------#
- anchor_shapes = torch.FloatTensor(torch.cat((torch.zeros((len(anchors), 2)), torch.FloatTensor(anchors)), 1))
- #-------------------------------------------------------#
- # 计算交并比
- # self.calculate_iou(gt_box, anchor_shapes) = [num_true_box, 9]每一个真实框和9个先验框的重合情况
- # best_ns:
- # [每个真实框最大的重合度max_iou, 每一个真实框最重合的先验框的序号]
- #-------------------------------------------------------#
- best_ns = torch.argmax(self.calculate_iou(gt_box, anchor_shapes), dim=-1)
- for t, best_n in enumerate(best_ns):
- if best_n not in self.anchors_mask[l]:
- continue
- #----------------------------------------#
- # 判断这个先验框是当前特征点的哪一个先验框
- #----------------------------------------#
- k = self.anchors_mask[l].index(best_n)
- #----------------------------------------#
- # 获得真实框属于哪个网格点
- #----------------------------------------#
- i = torch.floor(batch_target[t, 0]).long()
- j = torch.floor(batch_target[t, 1]).long()
- #----------------------------------------#
- # 取出真实框的种类
- #----------------------------------------#
- c = batch_target[t, 4].long()
- #----------------------------------------#
- # noobj_mask代表无目标的特征点
- #----------------------------------------#
- noobj_mask[b, k, j, i] = 0
- #----------------------------------------#
- # tx、ty代表中心调整参数的真实值
- #----------------------------------------#
- if not self.giou:
- #----------------------------------------#
- # tx、ty代表中心调整参数的真实值
- #----------------------------------------#
- y_true[b, k, j, i, 0] = batch_target[t, 0] - i.float()
- y_true[b, k, j, i, 1] = batch_target[t, 1] - j.float()
- y_true[b, k, j, i, 2] = math.log(batch_target[t, 2] / anchors[best_n][0])
- y_true[b, k, j, i, 3] = math.log(batch_target[t, 3] / anchors[best_n][1])
- y_true[b, k, j, i, 4] = 1
- y_true[b, k, j, i, c + 5] = 1
- else:
- #----------------------------------------#
- # tx、ty代表中心调整参数的真实值
- #----------------------------------------#
- y_true[b, k, j, i, 0] = batch_target[t, 0]
- y_true[b, k, j, i, 1] = batch_target[t, 1]
- y_true[b, k, j, i, 2] = batch_target[t, 2]
- y_true[b, k, j, i, 3] = batch_target[t, 3]
- y_true[b, k, j, i, 4] = 1
- y_true[b, k, j, i, c + 5] = 1
- #----------------------------------------#
- # 用于获得xywh的比例
- # 大目标loss权重小,小目标loss权重大
- #----------------------------------------#
- box_loss_scale[b, k, j, i] = batch_target[t, 2] * batch_target[t, 3] / in_w / in_h
- return y_true, noobj_mask, box_loss_scale
- def get_ignore(self, l, x, y, h, w, targets, scaled_anchors, in_h, in_w, noobj_mask):
- #-----------------------------------------------------#
- # 计算一共有多少张图片
- #-----------------------------------------------------#
- bs = len(targets)
- #-----------------------------------------------------#
- # 生成网格,先验框中心,网格左上角
- #-----------------------------------------------------#
- grid_x = torch.linspace(0, in_w - 1, in_w).repeat(in_h, 1).repeat(
- int(bs * len(self.anchors_mask[l])), 1, 1).view(x.shape).type_as(x)
- grid_y = torch.linspace(0, in_h - 1, in_h).repeat(in_w, 1).t().repeat(
- int(bs * len(self.anchors_mask[l])), 1, 1).view(y.shape).type_as(x)
- # 生成先验框的宽高
- scaled_anchors_l = np.array(scaled_anchors)[self.anchors_mask[l]]
- anchor_w = torch.Tensor(scaled_anchors_l).index_select(1, torch.LongTensor([0])).type_as(x)
- anchor_h = torch.Tensor(scaled_anchors_l).index_select(1, torch.LongTensor([1])).type_as(x)
-
- anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(w.shape)
- anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(h.shape)
- #-------------------------------------------------------#
- # 计算调整后的先验框中心与宽高
- #-------------------------------------------------------#
- pred_boxes_x = torch.unsqueeze(x + grid_x, -1)
- pred_boxes_y = torch.unsqueeze(y + grid_y, -1)
- pred_boxes_w = torch.unsqueeze(torch.exp(w) * anchor_w, -1)
- pred_boxes_h = torch.unsqueeze(torch.exp(h) * anchor_h, -1)
- pred_boxes = torch.cat([pred_boxes_x, pred_boxes_y, pred_boxes_w, pred_boxes_h], dim = -1)
-
- for b in range(bs):
- #-------------------------------------------------------#
- # 将预测结果转换一个形式
- # pred_boxes_for_ignore num_anchors, 4
- #-------------------------------------------------------#
- pred_boxes_for_ignore = pred_boxes[b].view(-1, 4)
- #-------------------------------------------------------#
- # 计算真实框,并把真实框转换成相对于特征层的大小
- # gt_box num_true_box, 4
- #-------------------------------------------------------#
- if len(targets[b]) > 0:
- batch_target = torch.zeros_like(targets[b])
- #-------------------------------------------------------#
- # 计算出正样本在特征层上的中心点
- #-------------------------------------------------------#
- batch_target[:, [0,2]] = targets[b][:, [0,2]] * in_w
- batch_target[:, [1,3]] = targets[b][:, [1,3]] * in_h
- batch_target = batch_target[:, :4].type_as(x)
- #-------------------------------------------------------#
- # 计算交并比
- # anch_ious num_true_box, num_anchors
- #-------------------------------------------------------#
- anch_ious = self.calculate_iou(batch_target, pred_boxes_for_ignore)
- #-------------------------------------------------------#
- # 每个先验框对应真实框的最大重合度
- # anch_ious_max num_anchors
- #-------------------------------------------------------#
- anch_ious_max, _ = torch.max(anch_ious, dim = 0)
- anch_ious_max = anch_ious_max.view(pred_boxes[b].size()[:3])
- noobj_mask[b][anch_ious_max > self.ignore_threshold] = 0
- return noobj_mask, pred_boxes
-
- #---------------------------------------------------------#
- # 将图像转换成RGB图像,防止灰度图在预测时报错。
- # 代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB
- #---------------------------------------------------------#
- def cvtColor(image):
- if len(np.shape(image)) == 3 and np.shape(image)[2] == 3:
- return image
- else:
- image = image.convert('RGB')
- return image
-
- #---------------------------------------------------#
- # 对输入图像进行resize
- #---------------------------------------------------#
- def resize_image(image, size, letterbox_image):
- iw, ih = image.size
- w, h = size
- if letterbox_image:
- scale = min(w/iw, h/ih)
- nw = int(iw*scale)
- nh = int(ih*scale)
- image = image.resize((nw,nh), Image.BICUBIC)
- new_image = Image.new('RGB', size, (128,128,128))
- new_image.paste(image, ((w-nw)//2, (h-nh)//2))
- else:
- new_image = image.resize((w, h), Image.BICUBIC)
- return new_image
- #---------------------------------------------------#
- # 获得类
- #---------------------------------------------------#
- def get_classes(classes_path):
- with open(classes_path, encoding='utf-8') as f:
- class_names = f.readlines()
- class_names = [c.strip() for c in class_names]
- return class_names, len(class_names)
- #---------------------------------------------------#
- # 获得先验框
- #---------------------------------------------------#
- def get_anchors(anchors_path):
- '''loads the anchors from a file'''
- with open(anchors_path, encoding='utf-8') as f:
- anchors = f.readline()
- anchors = [float(x) for x in anchors.split(',')]
- anchors = np.array(anchors).reshape(-1, 2)
- return anchors, len(anchors)
- #---------------------------------------------------#
- # 获得学习率
- #---------------------------------------------------#
- def get_lr(optimizer):
- for param_group in optimizer.param_groups:
- return param_group['lr']
- def preprocess_input(image):
- image /= 255.0
- return image
- class YoloDataset(Dataset):
- def __init__(self, annotation_lines, input_shape, num_classes, train):
- super(YoloDataset, self).__init__()
- self.annotation_lines = annotation_lines
- self.input_shape = input_shape
- self.num_classes = num_classes
- self.length = len(self.annotation_lines)
- self.train = train
- def __len__(self):
- return self.length
- def __getitem__(self, index):
- index = index % self.length
- #---------------------------------------------------#
- # 训练时进行数据的随机增强
- # 验证时不进行数据的随机增强
- #---------------------------------------------------#
- image, box = self.get_random_data(self.annotation_lines[index], self.input_shape[0:2], random = self.train)
- image = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))
- box = np.array(box, dtype=np.float32)
- if len(box) != 0:
- box[:, [0, 2]] = box[:, [0, 2]] / self.input_shape[1]
- box[:, [1, 3]] = box[:, [1, 3]] / self.input_shape[0]
- box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
- box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
- return image, box
- def rand(self, a=0, b=1):
- return np.random.rand()*(b-a) + a
- def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True):
- line = annotation_line.split()
- #------------------------------#
- # 读取图像并转换成RGB图像
- #------------------------------#
- image = Image.open(line[0])
- image = cvtColor(image)
- #------------------------------#
- # 获得图像的高宽与目标高宽
- #------------------------------#
- iw, ih = image.size
- h, w = input_shape
- #------------------------------#
- # 获得预测框
- #------------------------------#
- box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
- if not random:
- scale = min(w/iw, h/ih)
- nw = int(iw*scale)
- nh = int(ih*scale)
- dx = (w-nw)//2
- dy = (h-nh)//2
- #---------------------------------#
- # 将图像多余的部分加上灰条
- #---------------------------------#
- image = image.resize((nw,nh), Image.BICUBIC)
- new_image = Image.new('RGB', (w,h), (128,128,128))
- new_image.paste(image, (dx, dy))
- image_data = np.array(new_image, np.float32)
- #---------------------------------#
- # 对真实框进行调整
- #---------------------------------#
- if len(box)>0:
- np.random.shuffle(box)
- box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
- box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
- box[:, 0:2][box[:, 0:2]<0] = 0
- box[:, 2][box[:, 2]>w] = w
- box[:, 3][box[:, 3]>h] = h
- box_w = box[:, 2] - box[:, 0]
- box_h = box[:, 3] - box[:, 1]
- box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
- return image_data, box
-
- #------------------------------------------#
- # 对图像进行缩放并且进行长和宽的扭曲
- #------------------------------------------#
- new_ar = iw/ih * self.rand(1-jitter,1+jitter) / self.rand(1-jitter,1+jitter)
- scale = self.rand(.25, 2)
- if new_ar < 1:
- nh = int(scale*h)
- nw = int(nh*new_ar)
- else:
- nw = int(scale*w)
- nh = int(nw/new_ar)
- image = image.resize((nw,nh), Image.BICUBIC)
- #------------------------------------------#
- # 将图像多余的部分加上灰条
- #------------------------------------------#
- dx = int(self.rand(0, w-nw))
- dy = int(self.rand(0, h-nh))
- new_image = Image.new('RGB', (w,h), (128,128,128))
- new_image.paste(image, (dx, dy))
- image = new_image
- #------------------------------------------#
- # 翻转图像
- #------------------------------------------#
- flip = self.rand()<.5
- if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
- image_data = np.array(image, np.uint8)
- #---------------------------------#
- # 对图像进行色域变换
- # 计算色域变换的参数
- #---------------------------------#
- r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1
- #---------------------------------#
- # 将图像转到HSV上
- #---------------------------------#
- hue, sat, val = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV))
- dtype = image_data.dtype
- #---------------------------------#
- # 应用变换
- #---------------------------------#
- x = np.arange(0, 256, dtype=r.dtype)
- lut_hue = ((x * r[0]) % 180).astype(dtype)
- lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
- lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
- image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
- image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)
- #---------------------------------#
- # 对真实框进行调整
- #---------------------------------#
- if len(box)>0:
- np.random.shuffle(box)
- box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
- box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
- if flip: box[:, [0,2]] = w - box[:, [2,0]]
- box[:, 0:2][box[:, 0:2]<0] = 0
- box[:, 2][box[:, 2]>w] = w
- box[:, 3][box[:, 3]>h] = h
- box_w = box[:, 2] - box[:, 0]
- box_h = box[:, 3] - box[:, 1]
- box = box[np.logical_and(box_w>1, box_h>1)]
-
- return image_data, box
-
- # DataLoader中collate_fn使用
- def yolo_dataset_collate(batch):
- images = []
- bboxes = []
- for img, box in batch:
- images.append(img)
- bboxes.append(box)
- images = torch.from_numpy(np.array(images)).type(torch.FloatTensor)
- bboxes = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in bboxes]
- return images, bboxes
- if __name__ == "__main__":
- #---------------------------------#
- # input_shape 输入的shape大小,一定要是32的倍数
- #------------------------------------------------------#
- input_shape = [416, 416]
- img_channel = 1
- img_size = 416
- num_epochs = 300
- lr_init = 0.001
- num_workers = 0
- #----------------------------------------------------#
- # 获得图片路径和标签
- #----------------------------------------------------#
- train_annotation_path = '2007_train.txt'
- val_annotation_path = '2007_val.txt'
-
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
- #----------------------------------------------------#
- # 获取classes和anchor
- #----------------------------------------------------#
- # class_names, num_classes = get_classes(classes_path)
- num_classes = 1
- class_names = ["Pos"]*num_classes
- # anchors, num_anchors = get_anchors(anchors_path)
- anchors = np.array([[ 10., 13.],
- [ 16., 30.],
- [ 33., 23.],
- [ 30., 61.],
- [ 62., 45.],
- [ 59., 119.],
- [116., 90.],
- [156., 198.],
- [373., 326.]])
- num_anchors = len(anchors)
- anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
-
- #------------------------------------------------------#
- # 创建yolo模型
- #------------------------------------------------------#
- model = YoloBody(anchors_mask, num_classes, phi=0, load_weights=False)
- # if not pretrained:
- # weights_init(model)
- yolo_loss = YOLOLoss(anchors, num_classes, input_shape, True, anchors_mask)
-
- #------------------------------------------------------------------#
- # torch 1.2不支持amp,建议使用torch 1.7.1及以上正确使用fp16
- # 因此torch1.2这里显示"could not be resolve"
- #------------------------------------------------------------------#
-
- GPU_index = 0
- # model_train = torch.nn.DataParallel(model)
- cudnn.benchmark = True
-
- #---------------------------#
- # 读取数据集对应的txt
- #---------------------------#
- with open(train_annotation_path) as f:
- train_lines = f.readlines()
- with open(val_annotation_path) as f:
- val_lines = f.readlines()
- num_train = len(train_lines)
- num_val = len(val_lines)
-
- #------------------------------------------------------#
- #-------------------------------------------------------------------#
- # 如果不冻结训练的话,直接设置batch_size为Unfreeze_batch_size
- #-------------------------------------------------------------------#
- batch_size = 8
-
- optimizer = torch.optim.Adam([dict(params=model.parameters(), lr = lr_init)])
-
- #---------------------------------------#
- # 构建数据集加载器。
- #---------------------------------------#
- train_dataset = YoloDataset(train_lines, input_shape, num_classes, train = True)
- val_dataset = YoloDataset(val_lines, input_shape, num_classes, train = False)
-
- train_dataloaders = DataLoader(train_dataset, shuffle = True, batch_size = batch_size, num_workers = num_workers, pin_memory=True,
- drop_last=True, collate_fn=yolo_dataset_collate, sampler=None)
- val_dataloaders = DataLoader(val_dataset , shuffle = True, batch_size = batch_size, num_workers = num_workers, pin_memory=True,
- drop_last=True, collate_fn=yolo_dataset_collate, sampler=None)
-
-
- model.cuda(GPU_index)
- model.train()
- train_sum_loss = np.inf
- val_sum_loss = np.inf
- for epoch in range(num_epochs):
- print('Epoch {}/{}'.format(epoch, num_epochs - 1))
- # dt_size = len(train_dataloaders.dataset)
- step = 0
- epoch_loss = 0
- model.train()
- for x, y in train_dataloaders:
- step += 1
- inputs = x.to(device)
- labels = y.to(device)
- labels = [ann.cuda(0) for ann in labels]
-
- optimizer.zero_grad()
- outputs = model(inputs)
-
- loss_value_all = 0
- for l in range(len(outputs)):
- loss_item = yolo_loss(l, outputs[l], outputs)
- loss_value_all += loss_item
- loss = loss_value_all
-
- loss.backward()
- optimizer.step()
- epoch_loss += loss.item()
-
- print("%d/%d,train_loss:%0.3f,train_metric:%0.3f" % (step, (dt_size - 1) // train_dataloaders.batch_size + 1, loss.item(), epoch_metric))
- del x, y, inputs, labels, outputs
- torch.cuda.empty_cache()
- # print("epoch %d loss:%0.3f" % (epoch, epoch_loss))
- if(epoch_loss < train_sum_loss):
- train_sum_loss = epoch_loss
- model.eval()
- example = torch.rand(1, img_channel, img_size, img_size).type(torch.FloatTensor).cuda(GPU_index)
- pt_model = torch.jit.trace(model, example)
- pt_model.save( os.path.join(train_project_path, "Model", str(epoch) +'-Loss.pt') )
复制代码
参考:
【1】https://github.com/bubbliiiing/efficientnet-yolo3-pytorch
【2】COCO-2014下载
https://blog.csdn.net/boy854456187/article/details/119277637
【3】人脸检测+口罩:(3条消息) 使用YOLOv5实现人脸口罩佩戴检测(详细)_Stick_2的博客-CSDN博客
【4】人脸检测:(3条消息) 用yolov5做人脸检测_yolov5人脸识别_noob_qing的博客-CSDN博客
|
|