VarifocalLoss在Yolov8中的应用

调用VFL Loss

在ultralytics/utils/loss.py可以发现v8实现了VarifocalLoss，但是好像和原论文有点不一样，这里有待考证
原文地址：论文
在cls损失处

 # Cls lossloss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum  # VFL way# loss[1] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum  # BCE

这里可以看到调用varifocal_loss的地方是注释的，同时里面的target_labels是找不到的

实现损失的计算

将_, target_bboxes, target_scores, fg_mask, _ = self.assigner替换为target_labels,…具体如下

target_labels, target_bboxes, target_scores, fg_mask, _ = self.assigner(pred_scores.detach().sigmoid(),(pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),anchor_points * stride_tensor,gt_labels,gt_bboxes,mask_gt,)

本人找到了两种处理target_labels的方法，建议第二种，官方认证github issues
第一种：target_labels = torch.where(target_scores > 0 , 1, 0)
第二种：

target_labels = target_labels.unsqueeze(-1).expand(-1, -1, self.nc)  # self.nc: class num
one_hot = torch.zeros(target_labels.size(), device=self.device)
target_labels = one_hot.scatter_(-1, target_labels, 1)

完整代码

class v8DetectionLoss:"""Criterion class for computing training losses."""def __init__(self, model):  # model must be de-paralleled"""Initializes v8DetectionLoss with the model, defining model-related properties and BCE loss function."""device = next(model.parameters()).device  # get model deviceh = model.args  # hyperparameters# import ipdb;ipdb.set_trace()m = model.model[-1]  # Detect() moduleself.bce = nn.BCEWithLogitsLoss(reduction="none")self.hyp = hself.stride = m.stride  # model stridesself.nc = m.nc  # number of classesself.no = m.nc + m.reg_max * 4self.reg_max = m.reg_maxself.device = deviceself.varifocal_loss=VarifocalLoss().to(device)self.use_dfl = m.reg_max > 1self.assigner = TaskAlignedAssigner(topk=10, num_classes=self.nc, alpha=0.5, beta=6.0)self.bbox_loss = BboxLoss(m.reg_max - 1, use_dfl=self.use_dfl).to(device)self.proj = torch.arange(m.reg_max, dtype=torch.float, device=device)def preprocess(self, targets, batch_size, scale_tensor):"""Preprocesses the target counts and matches with the input batch size to output a tensor."""if targets.shape[0] == 0:out = torch.zeros(batch_size, 0, 5, device=self.device)else:i = targets[:, 0]  # image index_, counts = i.unique(return_counts=True)counts = counts.to(dtype=torch.int32)out = torch.zeros(batch_size, counts.max(), 5, device=self.device)for j in range(batch_size):matches = i == jn = matches.sum()if n:out[j, :n] = targets[matches, 1:]out[..., 1:5] = xywh2xyxy(out[..., 1:5].mul_(scale_tensor))return outdef bbox_decode(self, anchor_points, pred_dist):"""Decode predicted object bounding box coordinates from anchor points and distribution."""if self.use_dfl:b, a, c = pred_dist.shape  # batch, anchors, channelspred_dist = pred_dist.view(b, a, 4, c // 4).softmax(3).matmul(self.proj.type(pred_dist.dtype))# pred_dist = pred_dist.view(b, a, c // 4, 4).transpose(2,3).softmax(3).matmul(self.proj.type(pred_dist.dtype))# pred_dist = (pred_dist.view(b, a, c // 4, 4).softmax(2) * self.proj.type(pred_dist.dtype).view(1, 1, -1, 1)).sum(2)return dist2bbox(pred_dist, anchor_points, xywh=False)def __call__(self, preds, batch):"""Calculate the sum of the loss for box, cls and dfl multiplied by batch size."""loss = torch.zeros(3, device=self.device)  # box, cls, dflfeats = preds[1] if isinstance(preds, tuple) else predspred_distri, pred_scores = torch.cat([xi.view(feats[0].shape[0], self.no, -1) for xi in feats], 2).split((self.reg_max * 4, self.nc), 1)pred_scores = pred_scores.permute(0, 2, 1).contiguous()pred_distri = pred_distri.permute(0, 2, 1).contiguous()dtype = pred_scores.dtypebatch_size = pred_scores.shape[0]imgsz = torch.tensor(feats[0].shape[2:], device=self.device, dtype=dtype) * self.stride[0]  # image size (h,w)anchor_points, stride_tensor = make_anchors(feats, self.stride, 0.5)# Targetstargets = torch.cat((batch["batch_idx"].view(-1, 1), batch["cls"].view(-1, 1), batch["bboxes"]), 1)targets = self.preprocess(targets.to(self.device), batch_size, scale_tensor=imgsz[[1, 0, 1, 0]])gt_labels, gt_bboxes = targets.split((1, 4), 2)  # cls, xyxymask_gt = gt_bboxes.sum(2, keepdim=True).gt_(0)# Pboxespred_bboxes = self.bbox_decode(anchor_points, pred_distri)  # xyxy, (b, h*w, 4)target_labels, target_bboxes, target_scores, fg_mask, _ = self.assigner(pred_scores.detach().sigmoid(),(pred_bboxes.detach() * stride_tensor).type(gt_bboxes.dtype),anchor_points * stride_tensor,gt_labels,gt_bboxes,mask_gt,)target_scores_sum = max(target_scores.sum(), 1)# target_labels = torch.where(target_scores > 0 , 1, 0)target_labels = target_labels.unsqueeze(-1).expand(-1, -1, self.nc)  # self.nc: class numone_hot = torch.zeros(target_labels.size(), device=self.device)target_labels = one_hot.scatter_(-1, target_labels, 1)# Cls lossloss[1] = self.varifocal_loss(pred_scores, target_scores, target_labels) / target_scores_sum  # VFL way# loss[1] = self.bce(pred_scores, target_scores.to(dtype)).sum() / target_scores_sum  # BCE# Bbox lossif fg_mask.sum():target_bboxes /= stride_tensorloss[0], loss[2] = self.bbox_loss(pred_distri, pred_bboxes, anchor_points, target_bboxes, target_scores, target_scores_sum, fg_mask)loss[0] *= self.hyp.box  # box gainloss[1] *= self.hyp.cls  # cls gainloss[2] *= self.hyp.dfl  # dfl gainreturn loss.sum() * batch_size, loss.detach()  # loss(box, cls, dfl)

参考

参考1
参考2
参考3

VarifocalLoss在Yolov8中的应用

调用VFL Loss

实现损失的计算

参考

相关文章

XML基础学习

【蓝桥杯每日一题】扫雷——暴力搜索

C# Winform双色纸牌接龙小游戏源码

maven权威指南（读书笔记一）

Function 和 BiFunction 的使用例

git bash中文显示问题

C05S11-MySQL数据库索引

mybatisPlus使用步骤详解