深度学习之目标检测篇——残差网络与FPN结合

news/2024/12/20 13:52:28/
  • 特征金字塔
  • 多尺度融合
  • 特征金字塔的网络原理
    在这里插入图片描述
  • 这里是基于resnet网络与Fpn做的结合,主要把resnet中的特征层利用FPN的思想一起结合,实现resnet_fpn。增强目标检测backone的有效性。
  • 代码实现如下:
import torch
from torch import Tensor
from collections import OrderedDict
import torch.nn.functional as F
from torch import nn
from torch.jit.annotations import Tuple, List, Dictclass Bottleneck(nn.Module):expansion = 4def __init__(self, in_channel, out_channel, stride=1, downsample=None, norm_layer=None):super(Bottleneck, self).__init__()if norm_layer is None:norm_layer = nn.BatchNorm2dself.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,kernel_size=(1,1), stride=(1,1), bias=False)  # squeeze channelsself.bn1 = norm_layer(out_channel)# -----------------------------------------self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,kernel_size=(3,3), stride=(stride,stride), bias=False, padding=(1,1))self.bn2 = norm_layer(out_channel)# -----------------------------------------self.conv3 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel * self.expansion,kernel_size=(1,1), stride=(1,1), bias=False)  # unsqueeze channelsself.bn3 = norm_layer(out_channel * self.expansion)self.relu = nn.ReLU(inplace=True)self.downsample = downsampledef forward(self, x):identity = xif self.downsample is not None:identity = self.downsample(x)out = self.conv1(x)out = self.bn1(out)out = self.relu(out)out = self.conv2(out)out = self.bn2(out)out = self.relu(out)out = self.conv3(out)out = self.bn3(out)out += identityout = self.relu(out)return outclass ResNet(nn.Module):def  __init__(self, block, blocks_num, num_classes=1000, include_top=True, norm_layer=None):''':param block:块:param blocks_num:块数:param num_classes: 分类数:param include_top::param norm_layer: BN'''super(ResNet, self).__init__()if norm_layer is None:norm_layer = nn.BatchNorm2dself._norm_layer = norm_layerself.include_top = include_topself.in_channel = 64self.conv1 = nn.Conv2d(in_channels=3, out_channels=self.in_channel, kernel_size=(7,7), stride=(2,2),padding=(3,3), bias=False)self.bn1 = norm_layer(self.in_channel)self.relu = nn.ReLU(inplace=True)self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)self.layer1 = self._make_layer(block, 64, blocks_num[0])self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)if self.include_top:self.avgpool = nn.AdaptiveAvgPool2d((1, 1))  # output size = (1, 1)self.fc = nn.Linear(512 * block.expansion, num_classes)'''初始化'''for m in self.modules():if isinstance(m, nn.Conv2d):nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')def _make_layer(self, block, channel, block_num, stride=1):norm_layer = self._norm_layerdownsample = Noneif stride != 1 or self.in_channel != channel * block.expansion:downsample = nn.Sequential(nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=(1,1), stride=(stride,stride), bias=False),norm_layer(channel * block.expansion))layers = []layers.append(block(self.in_channel, channel, downsample=downsample,stride=stride, norm_layer=norm_layer))self.in_channel = channel * block.expansionfor _ in range(1, block_num):layers.append(block(self.in_channel, channel, norm_layer=norm_layer))return nn.Sequential(*layers)def forward(self, x):x = self.conv1(x)x = self.bn1(x)x = self.relu(x)x = self.maxpool(x)x = self.layer1(x)x = self.layer2(x)x = self.layer3(x)x = self.layer4(x)if self.include_top:x = self.avgpool(x)x = torch.flatten(x, 1)x = self.fc(x)return xclass IntermediateLayerGetter(nn.ModuleDict):"""Module wrapper that returns intermediate layers from a modelsIt has a strong assumption that the modules have been registeredinto the models in the same order as they are used.This means that one should **not** reuse the same nn.Moduletwice in the forward if you want this to work.Additionally, it is only able to query submodules that are directlyassigned to the models. So if `models` is passed, `models.feature1` canbe returned, but not `models.feature1.layer2`.Arguments:model (nn.Module): models on which we will extract the featuresreturn_layers (Dict[name, new_name]): a dict containing the namesof the modules for which the activations will be returned asthe key of the dict, and the value of the dict is the nameof the returned activation (which the user can specify)."""__annotations__ = {"return_layers": Dict[str, str],}def __init__(self, model, return_layers):if not set(return_layers).issubset([name for name, _ in model.named_children()]):raise ValueError("return_layers are not present in models")# {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'}orig_return_layers = return_layersreturn_layers = {k: v for k, v in return_layers.items()}layers = OrderedDict()# 遍历模型子模块按顺序存入有序字典# 只保存layer4及其之前的结构,舍去之后不用的结构for name, module in model.named_children():layers[name] = moduleif name in return_layers:del return_layers[name]if not return_layers:breaksuper(IntermediateLayerGetter, self).__init__(layers)self.return_layers = orig_return_layersdef forward(self, x):out = OrderedDict()# 依次遍历模型的所有子模块,并进行正向传播,# 收集layer1, layer2, layer3, layer4的输出for name, module in self.named_children():x = module(x)if name in self.return_layers:out_name = self.return_layers[name]out[out_name] = xreturn outclass FeaturePyramidNetwork(nn.Module):"""Module that adds a FPN from on top of a set of feature maps. This is based on`"Feature Pyramid Network for Object Detection" <https://arxiv.org/abs/1612.03144>`_.The feature maps are currently supposed to be in increasing depthorder.The input to the models is expected to be an OrderedDict[Tensor], containingthe feature maps on top of which the FPN will be added.Arguments:in_channels_list (list[int]): number of channels for each feature map thatis passed to the moduleout_channels (int): number of channels of the FPN representationextra_blocks (ExtraFPNBlock or None): if provided, extra operations willbe performed. It is expected to take the fpn features, the originalfeatures and the names of the original features as input, and returnsa new list of feature maps and their corresponding names"""def __init__(self, in_channels_list, out_channels, extra_blocks=None):super(FeaturePyramidNetwork, self).__init__()# 用来调整resnet特征矩阵(layer1,2,3,4)的channel(kernel_size=1)self.inner_blocks = nn.ModuleList()# 对调整后的特征矩阵使用3x3的卷积核来得到对应的预测特征矩阵self.layer_blocks = nn.ModuleList()for in_channels in in_channels_list:if in_channels == 0:continueinner_block_module = nn.Conv2d(in_channels, out_channels, (1,1))layer_block_module = nn.Conv2d(out_channels, out_channels, (3,3), padding=(1,1))self.inner_blocks.append(inner_block_module)self.layer_blocks.append(layer_block_module)# initialize parameters now to avoid modifying the initialization of top_blocksfor m in self.children():if isinstance(m, nn.Conv2d):nn.init.kaiming_uniform_(m.weight, a=1)nn.init.constant_(m.bias, 0)self.extra_blocks = extra_blocksdef get_result_from_inner_blocks(self, x, idx):# type: (Tensor, int) -> Tensor"""This is equivalent to self.inner_blocks[idx](x),but torchscript doesn't support this yet"""num_blocks = len(self.inner_blocks)if idx < 0:idx += num_blocksi = 0out = xfor module in self.inner_blocks:if i == idx:out = module(x)i += 1return outdef get_result_from_layer_blocks(self, x, idx):# type: (Tensor, int) -> Tensor"""This is equivalent to self.layer_blocks[idx](x),but torchscript doesn't support this yet"""num_blocks = len(self.layer_blocks)if idx < 0:idx += num_blocksi = 0out = xfor module in self.layer_blocks:if i == idx:out = module(x)i += 1return outdef forward(self, x):# type: (Dict[str, Tensor]) -> Dict[str, Tensor]"""Computes the FPN for a set of feature maps.Arguments:x (OrderedDict[Tensor]): feature maps for each feature level.Returns:results (OrderedDict[Tensor]): feature maps after FPN layers.They are ordered from highest resolution first."""# unpack OrderedDict into two lists for easier handlingnames = list(x.keys())x = list(x.values())# 将resnet layer4的channel调整到指定的out_channels# last_inner = self.inner_blocks[-1](x[-1])last_inner = self.get_result_from_inner_blocks(x[-1], -1)# result中保存着每个预测特征层results = []# 将layer4调整channel后的特征矩阵,通过3x3卷积后得到对应的预测特征矩阵# results.append(self.layer_blocks[-1](last_inner))results.append(self.get_result_from_layer_blocks(last_inner, -1))# 倒序遍历resenet输出特征层,以及对应inner_block和layer_block# layer3 -> layer2 -> layer1 (layer4已经处理过了)# for feature, inner_block, layer_block in zip(#         x[:-1][::-1], self.inner_blocks[:-1][::-1], self.layer_blocks[:-1][::-1]# ):#     if not inner_block:#         continue#     inner_lateral = inner_block(feature)#     feat_shape = inner_lateral.shape[-2:]#     inner_top_down = F.interpolate(last_inner, size=feat_shape, mode="nearest")#     last_inner = inner_lateral + inner_top_down#     results.insert(0, layer_block(last_inner))for idx in range(len(x) - 2, -1, -1):inner_lateral = self.get_result_from_inner_blocks(x[idx], idx)feat_shape = inner_lateral.shape[-2:]inner_top_down = F.interpolate(last_inner, size=feat_shape, mode="nearest")last_inner = inner_lateral + inner_top_downresults.insert(0, self.get_result_from_layer_blocks(last_inner, idx))# 在layer4对应的预测特征层基础上生成预测特征矩阵5if self.extra_blocks is not None:results, names = self.extra_blocks(results, names)# make it back an OrderedDictout = OrderedDict([(k, v) for k, v in zip(names, results)])return outclass LastLevelMaxPool(torch.nn.Module):"""Applies a max_pool2d on top of the last feature map"""def forward(self, x, names):# type: (List[Tensor], List[str]) -> Tuple[List[Tensor], List[str]]names.append("pool")x.append(F.max_pool2d(x[-1], 1, 2, 0))return x, namesclass BackboneWithFPN(nn.Module):"""Adds a FPN on top of a models.Internally, it uses torchvision.models._utils.IntermediateLayerGetter toextract a submodel that returns the feature maps specified in return_layers.The same limitations of IntermediatLayerGetter apply here.Arguments:backbone (nn.Module)return_layers (Dict[name, new_name]): a dict containing the namesof the modules for which the activations will be returned asthe key of the dict, and the value of the dict is the nameof the returned activation (which the user can specify).in_channels_list (List[int]): number of channels for each feature mapthat is returned, in the order they are present in the OrderedDictout_channels (int): number of channels in the FPN.Attributes:out_channels (int): the number of channels in the FPN"""def __init__(self, backbone, return_layers, in_channels_list, out_channels):''':param backbone: 特征层:param return_layers: 返回的层数:param in_channels_list: 输入通道数:param out_channels: 输出通道数'''super(BackboneWithFPN, self).__init__()'返回有序字典模型'self.body = IntermediateLayerGetter(backbone, return_layers=return_layers)self.fpn = FeaturePyramidNetwork(in_channels_list=in_channels_list,out_channels=out_channels,extra_blocks=LastLevelMaxPool(),)# super(BackboneWithFPN, self).__init__(OrderedDict(#     [("body", body), ("fpn", fpn)]))self.out_channels = out_channelsdef forward(self, x):x = self.body(x)x = self.fpn(x)return xdef resnet50_fpn_backbone():# FrozenBatchNorm2d的功能与BatchNorm2d类似,但参数无法更新# norm_layer=misc.FrozenBatchNorm2dresnet_backbone = ResNet(Bottleneck, [3, 4, 6, 3],include_top=False)# freeze layers# 冻结layer1及其之前的所有底层权重(基础通用特征)for name, parameter in resnet_backbone.named_parameters():if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name:'''冻结权重,不参与训练'''parameter.requires_grad_(False)# 字典名字return_layers = {'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3'}# in_channel 为layer4的输出特征矩阵channel = 2048in_channels_stage2 = resnet_backbone.in_channel // 8in_channels_list = [in_channels_stage2,  # layer1 out_channel=256in_channels_stage2 * 2,  # layer2 out_channel=512in_channels_stage2 * 4,  # layer3 out_channel=1024in_channels_stage2 * 8,  # layer4 out_channel=2048]out_channels = 256return BackboneWithFPN(resnet_backbone, return_layers, in_channels_list, out_channels)if __name__ == '__main__':net = resnet50_fpn_backbone()x = torch.randn(1,3,224,224)for key,value in net(x).items():print(key,value.shape)
  • 测试结果
    在这里插入图片描述

http://www.ppmy.cn/news/1556654.html

相关文章

架构未来:深入探索最热门的编程语言开发框架

开发框架组件简介 开发框架是程序员用来加速开发并确保代码质量的基础组件之一。在不同的编程语言中&#xff0c;开发框架为开发人员提供了丰富的工具、库和最佳实践&#xff0c;帮助构建高效、可维护和安全的应用程序。本文将介绍几种常见编程语言的开发框架组件&#xff0c;…

web3跨链预言机协议-BandProtocol

项目简介 Band Protocol 项目最初于 2017年成立并建立在 ETH 之上。后于2020年转移到了 Cosmos 网络上&#xff0c;基于 Cosmos SDK 搭建了一条 Band Chain 。这是一条 oracle-specific chain&#xff0c;主要功能是提供跨链预言机服务。Cosmos生态上第一个&#xff0c;也是目…

「iOS」通过CoreLocation Framework深入了解MVC架构

「iOS」通过CoreLocation Framework重新了解多界面传值以及MVC架构 文章目录 「iOS」通过CoreLocation Framework重新了解多界面传值以及MVC架构前言CoreLocation了解根据需求建模设计属性方法设计协议传值Block传值KVONotification通知方式 总结参考文章 前言 在这个学期的前…

面试题整理1---正向代理和反向代理的含义及异同

面试题整理1---正向代理和反向代理的含义及异同 1. 正向代理 (Forward Proxy)1.1 正向代理定义&#xff1a;1.2 正向代理的工作流程&#xff1a;1.3 正向代理的应用场景&#xff1a; 2. 反向代理 (Reverse Proxy)2.1 反向代理的定义&#xff1a;2.2 反向代理的工作流程&#xf…

网络协议与网络安全学习记录

SSL(Secure Sockets Layer 安全套接层),及其继任者传输层安全&#xff08;Transport Layer Security&#xff0c;TLS&#xff09;是为网络通信提供安全及数据完整性的一种安全协议。TLS与SSL在传输层对网络连接进行加密 HTTPS,代表Hyper Text Transfer Protocol Secure,将SSL/T…

Dalsa线阵CCD相机使用开发手册

要使用Dalsa工业相机进行二次开发&#xff0c;看用户开发手册顺便做下笔记&#xff1a;&#xff08;欢迎加QQ讨论&#xff1a;77248031&#xff0c; 或QQ群&#xff1a;585068192&#xff09; 由于“本公主”用的.NET开发&#xff0c;软件支持只翻译了手册中.NET部分&#xff0…

CRC校验例题详解

CRC校验例题详解 示例题目 给定数据帧1101001和生成多项式G(x)x4x3x21&#xff0c;求该数据帧的CRC校验码&#xff0c;并验证传输过程中是否会出现错误。 解题步骤 第一步转换生成多项式&#xff1a; 接下来是对这一步骤的详细解答&#xff1a; 生成多项式的二进制表示 当我们…

《网络安全编程基础》之Socket编程

我的代码 server.c // server.cpp : Defines the entry point for the console application. //#include "stdafx.h" #include <Winsock2.h> #pragma comment(lib,"ws2_32.lib") //添加静态链接库文件 void main(int argc,char* argv[]) {WSADATA …