PyTorch训练关键点

news/2024/9/23 8:13:58/

1.背景

        在网上找了一些资料用来训练关键点,一般都是人脸或者车牌关键点训练,或者是联合检测一起训练。很少有是单独基于轻量级网络训练单独关键点模型的工程,本文简单介绍一种简单方法和代码。

2.代码模块

(1)网络结构

文件:model.py

import torch.nn as nn
import torch
import torch.nn.functional as F
import torch.nn.init as init

class Fire(nn.Module):

    def __init__(self, inplanes, squeeze_planes,
                 expand1x1_planes, expand3x3_planes):
        super(Fire, self).__init__()
        self.inplanes = inplanes
        self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1)
        
        self.squeeze_activation = nn.ReLU(inplace=True)
        
        self.expand1x1 = nn.Conv2d(squeeze_planes, expand1x1_planes,
                                   kernel_size=1)
        #self.expand1x1_activation = nn.ReLU(inplace=True)
        self.expand3x3 = nn.Conv2d(squeeze_planes, expand3x3_planes,
                                   kernel_size=3, padding=1)
        #self.expand3x3_activation = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.squeeze_activation(self.squeeze(x))
        return torch.cat([
            self.expand1x1(x),
            self.expand3x3(x)
        ], 1)

class RegressNet(nn.Module):   
    def __init__(self,version=1.0,export=False):
        super(RegressNet, self).__init__()
        if version not in [1.0, 1.1]:
            raise ValueError("Unsupported RegressNet version {version}:"
                             "1.0 or 1.1 expected".format(version=version))
        self.export = export
        print(version)
        if version == 1.0:
            self.features = nn.Sequential(
                nn.Conv2d(3, 16, kernel_size=3,padding=(1,1), stride=1),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
                Fire(16, 16, 32, 32),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
                Fire(64, 32, 32, 32),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
                Fire(64, 32, 64, 64),
                nn.ReLU(inplace=True),
                Fire(128, 32, 64, 64),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                nn.Conv2d(128, 128, kernel_size=3,padding=(0,0), stride=2),
            )
        else:
            self.features = nn.Sequential(
                nn.Conv2d(3, 64, kernel_size=3, stride=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(64, 16, 64, 64),
                Fire(128, 16, 64, 64),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(128, 32, 128, 128),
                Fire(256, 32, 128, 128),
                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),
                Fire(256, 48, 192, 192),
                Fire(384, 48, 192, 192),
                Fire(384, 64, 256, 256),
                Fire(512, 64, 256, 256),
            )
        # Final convolution is initialized differently form the rest
        #final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1)
        #self.classifier = nn.Sequential(
        #    nn.Dropout(p=0.5),
        #    final_conv,
        #    nn.ReLU(inplace=True),
        #    nn.AdaptiveAvgPool2d((1, 1))
        #)
        self.fc= nn.Linear(128,8)
        MAE_Loss = torch.nn.L1Loss()
        self.loss = MAE_Loss
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                init.kaiming_uniform_(m.weight)
                if m.bias is not None:
                    init.constant_(m.bias, 0)

    def forward(self, x):
        x = self.features(x)
        #x = x.squeeze()
        #x = x.flatten(0)
        x=x.view(-1,128)#使用view函数
        x = self.fc(x)
        #print(x)
        return x  

(2)训练工程

文件:train.py 以训练四个关键点为例

import numpy as np
from math import radians, cos, sin
import torchvision.transforms as transforms
import torchvision.transforms.functional as TF
#import imutils
import torch
from PIL import Image
import random
import cv2
import xml.etree.ElementTree as ET
from torch.utils.data import Dataset
import os

import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
import torch.nn.init as init

import torch.optim as optim
import time
from tqdm import tqdm


from model import RegressNet

class Transforms():
    def __init__(self):
        pass

    def rotate(self, image, landmarks, angle):
        # 随机生成一个在 -angle 到 +angle 范围内的旋转角度
        angle = random.uniform(-angle, +angle)

        # 基于二维平面上的旋转变换的数学特性构建旋转矩阵
        transformation_matrix = torch.tensor([
            [+cos(radians(angle)), -sin(radians(angle))],
            [+sin(radians(angle)), +cos(radians(angle))]
        ])

        # 对图像进行旋转:相比于 PIL 的图像旋转计算开销更小
        image = imutils.rotate(np.array(image), angle)

        # 将关键点坐标中心化:简化旋转变换的计算,同时确保关键点的变换和图像变换的对应关系
        landmarks = landmarks - 0.5
        # 将关键点坐标应用旋转矩阵
        new_landmarks = np.matmul(landmarks, transformation_matrix)
        # 恢复关键点坐标范围
        new_landmarks = new_landmarks + 0.5

        return Image.fromarray(image), new_landmarks

    def resize(self, image, landmarks, img_size):
        # 调整图像大小
        image = TF.resize(image, img_size)
        return image, landmarks

    def color_jitter(self, image, landmarks):
        # 定义颜色调整的参数:亮度、对比度、饱和度和色调
        color_jitter = transforms.ColorJitter(brightness=0.3,
                                              contrast=0.3,
                                              saturation=0.3,
                                              hue=0.1)
        # 对图像进行颜色调整
        image = color_jitter(image)
        return image, landmarks

    def crop_face(self, image, landmarks, crops):
        # 获取裁剪参数
        left = int(crops['left'])
        top = int(crops['top'])
        width = int(crops['width'])
        height = int(crops['height'])

        # 对图像进行裁剪
        image = TF.crop(image, top, left, height, width)

        # 获取裁剪后的图像形状
        img_shape = np.array(image).shape
        # 对关键点坐标进行裁剪后的调整
        landmarks = torch.tensor(landmarks) - torch.tensor([[left, top]])
        # 归一化关键点坐标
        landmarks = landmarks / torch.tensor([img_shape[1], img_shape[0]])
        return image, landmarks

    def __call__(self, image, landmarks):
        # 将图像从数组转换为 PIL 图像对象
        image = Image.fromarray(image)
        # 裁剪图像并调整关键点

        # 调整图像大小
        image, landmarks = self.resize(image, landmarks, (64, 64))
        # 对图像进行颜色调整
        image, landmarks = self.color_jitter(image, landmarks)
        # 对图像和关键点进行旋转变换
        #image, landmarks = self.rotate(image, landmarks, angle=10)

        # 将图像从 PIL 图像对象转换为 Torch 张量
        image = TF.to_tensor(image)
        # 标准化图像像素值
        image = TF.normalize(image, [0.5], [0.5])
        return image, landmarks
 

(3)dataset定义,数据长度为8 x1,y1,x2,y2,x3,y3,x4,y4

#标签排列规则

XXX.jpg x1/width y1/height x2/width y2/height x3/width y3/height x4/width y4/height

class FaceLandmarksDataset(Dataset):
    def __init__(self, transform=None):

        #root = os.listdir(r"C:/")
        with open(r"C:\DL_Work\test_pics\path.txt", 'r', encoding="utf-8") as r:
            root = r.readlines()
        # 初始化变量
        self.image_filenames = []
        self.landmarks = []
        self.crops = []
        self.transform = transform
        self.root_dir = r'C:\DL_Work\test_pics/'

        # 遍历 XML 数据:root[2] 表示 XML 中的第三个元素,即 <images> 部分,其中包含了每张图像的标注信息
        for filename in root:
            pic_path = filename.split(" ")[0]

            self.image_filenames.append(os.path.join(self.root_dir, pic_path))

            #self.crops.append(filename)

            landmark = []
            for num in range(4):
                x_coordinate = int( filename.split(" ")[num*2+1])
                y_coordinate = int(filename.split(" ")[num*2+2])
                landmark.append([x_coordinate, y_coordinate])
            self.landmarks.append(landmark)

        self.landmarks = np.array(self.landmarks).astype('float32')

        assert len(self.image_filenames) == len(self.landmarks)

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, index):
        # 读取图像以及关键点坐标
        image = cv2.imread(self.image_filenames[index])  # 以彩色模式读取图像
        # image = cv2.imread(self.image_filenames[index], 0) # 以灰色模式读取图像
        landmarks = self.landmarks[index]

        if self.transform:
            # 如果存在预处理变换,应用变换
            image, landmarks = self.transform(image, landmarks)

        landmarks = landmarks - 0.5  # 进行中心化操作

        return image, landmarks


# 创建数据集对象,并应用预处理变换
dataset = FaceLandmarksDataset(Transforms())

len_valid_set = int(0.1 * len(dataset))
len_train_set = len(dataset) - len_valid_set

#print("The length of Train set is {}".format(len_train_set))
#print("The length of Valid set is {}".format(len_valid_set))

train_dataset, valid_dataset, = torch.utils.data.random_split(dataset, [len_train_set, len_valid_set])

# shuffle and batch the datasets
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=1)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1, shuffle=True, num_workers=1)

(4)train

def train():

    # 记录每个 epoch 的训练和验证损失
    train_losses = []
    valid_losses = []

    # 设置设备
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    torch.autograd.set_detect_anomaly(True)

    #network = Network().to(device)
    network = RegressNet().to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(network.parameters(), lr=0.0001)

    loss_min = np.inf
    num_epochs = 10

    start_time = time.time()
    for epoch in range(1, num_epochs + 1):
        loss_train = 0
        loss_valid = 0
        running_loss = 0

        network.train()

        for step in tqdm(range(1, len(train_loader) + 1)):
            images, landmarks = next(iter(train_loader))

            images = images.to(device)
            landmarks = landmarks.view(landmarks.size(0), -1).to(device)

            predictions = network(images)

            optimizer.zero_grad()
            loss_train_step = criterion(predictions, landmarks)
            loss_train_step.backward()
            optimizer.step()

            loss_train += loss_train_step.item()
            running_loss = loss_train / step

        network.eval()
        with torch.no_grad():
            for step in range(1, len(valid_loader) + 1):
                images, landmarks = next(iter(valid_loader))

                images = images.to(device)
                landmarks = landmarks.view(landmarks.size(0), -1).to(device)

                predictions = network(images)
                loss_valid_step = criterion(predictions, landmarks)

                loss_valid += loss_valid_step.item()
                running_loss = loss_valid / step

        loss_train /= len(train_loader)
        loss_valid /= len(valid_loader)

        train_losses.append(loss_train)
        valid_losses.append(loss_valid)

        print('\n--------------------------------------------------')
        print('Epoch: {}  Train Loss: {:.4f}  Valid Loss: {:.4f}'.format(epoch, loss_train, loss_valid))
        print('--------------------------------------------------')

        if loss_valid < loss_min:
            loss_min = loss_valid
            torch.save(network.state_dict(), 'plate_landmark.pth')
            print("\nMinimum Validation Loss of {:.4f} at epoch {}/{}".format(loss_min, epoch, num_epochs))
            print('Model Saved\n')

    print('Training Complete')
    print("Total Elapsed Time: {} s".format(time.time() - start_time))

if __name__ == '__main__':
    train()

3.导出onnx

#export.py

import torch
import torch.nn
import onnx
from onnxsim import simplify
from model import RegressNet
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cpu')


model = RegressNet()

model_statedict  = torch.load(r'./plate_landmark.pth', map_location=device)
#model.eval()

model.load_state_dict(model_statedict)
input_names = ['input0']
output_names = ['output0']

x = torch.randn(1, 3, 64, 64, device=device)


torch.onnx.export(model, x, 'plate_landmark.onnx', opset_version=11, verbose=True, input_names=input_names, output_names = output_names,dynamic_axes={'input0': {0: 'batch'},
                                    'output0': {0: 'batch'}
                                   })

onnx_model = onnx.load("plate_landmark.onnx")# 简化模型
simplified_model, check = simplify(onnx_model)# 保存简化后的模型
onnx.save_model(simplified_model, "plate_landmark_sim.onnx")


http://www.ppmy.cn/news/1465581.html

相关文章

java通过解密身份证计算年龄(精确到日)

一、需求&#xff1a;对于系统中满55岁女性在职的党员和满60岁男性在职党员通过xxl-job进行定时提醒该人本级和上级管理员进行转出。 二、控制层 package com.cnpc.dj.party.controller;import com.cnpc.dj.common.JsonResult; import com.cnpc.dj.common.exception.BusiExc…

如何使用 Nginx 创建临时和永久重定向

前些天发现了一个巨牛的人工智能学习网站,通俗易懂,风趣幽默,忍不住分享一下给大家。点击跳转到网站。 简介 HTTP 重定向 是将一个域名或地址指向另一个的方法。有几种不同类型的重定向,每种对客户端浏览器意味着不同的事情。最常见的两种类型是临时重定向和永久重定向。…

知识见闻 - 人和动物的主要区别

人类和动物的主要区别之一确实在于理性&#xff0c;但这只是众多区别中的一个方面。以下是一些更全面的比较&#xff0c;突出人类和动物之间的主要区别&#xff1a; 理性和抽象思维&#xff1a; 人类&#xff1a;人类具有高度发展的理性能力&#xff0c;可以进行抽象思维、逻辑…

【第2章】SpringBoot实战篇之接口参数校验和全局异常处理

文章目录 前言一、参数校验1. 引入库2. 全局异常处理3. 控制器类4. 响应 二、对象校验1.实体类2.控制器类3. 全局异常处理4. 响应 总结 前言 对接口请求参数校验是每一个开发人员都必须熟知且善用的功能&#xff0c;是保证程序健壮性的基石。 除引入方式不同&#xff0c;使用…

vue3 中可缓存的方法

场景&#xff1a;在列表中&#xff0c;有这么一个属性&#xff0c;需要通过同行的其他属性&#xff0c;进行复杂的计算&#xff0c;才能得出&#xff0c;如果我们用方法&#xff0c;然后传参&#xff0c;得到这个属性&#xff0c;那么每次更改列表后&#xff0c;每行都会重新计…

机器学习-6-对随机梯度下降算法SGD的理解

参考一文带您了解随机梯度下降(Stochastic Gradient Descent):python代码示例 参考sklearn-SGDClassifier 1 梯度下降 在机器学习领域,梯度下降扮演着至关重要的角色。梯度下降是一种优化算法,通过迭代沿着由梯度定义的最陡下降方向,以最小化函数。类似于图中的场景,可以…

【蓝桥杯嵌入式】第十四届省赛 更新中

0 前言 刚刚做完第十四届的省赛&#xff0c;这届题目比我想象中的要简单&#xff1b;不过我去年参加的14届单片机的省赛都比往年的国赛还难&#xff0c;挺离谱的~ 1 展示 1.1 源码 1.2 演示视频 1.3 题目展示 2 CubeMX配置(第十四届省赛真题) 设置下载线 HSE时钟设置 时钟树…

C++容器之前向链表(std::forward_list)

目录 1 概述2 使用实例3 接口使用3.1 construct3.2 assigns3.3 iterators3.4 capacity3.5 access3.6 assign3.7 emplace_front3.8 push_front3.9 pop_front3.10 emplace_after3.11 insert_after