Python实现ONNXRuntime推理YOLOv11模型

news/2025/3/22 6:54:03/

Python实现ONNXRuntime推理YOLOv11模型,主要在于onnxruntime推理后的后处理部分

1、安装依赖

pip install opencv-python onnxruntime numpy

ONNX_6">2、ONNX模型导出(可选)

from ultralytics import YOLO# Load a model
model = YOLO("yolo11n.pt")  # load an official model
model = YOLO("best.pt")  # load a custom trained model# Export the model
model.export(format="onnx", simplify=True, half=True)

3、Python推理代码

import cv2
import numpy as np
import onnxruntime as ort
from math import exp# 常量配置
CLASSES = ['class1']  # 模型类别np.random.seed(1)
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))  # 随机颜色
meshgrid = []
class_num = len(CLASSES)
headNum = 3
strides = [8, 16, 32]
mapSize = [[80, 80], [40, 40], [20, 20]]
input_imgH = 640
input_imgW = 640class DetectBox:"""检测框类"""def __init__(self, classId, score, xmin, ymin, xmax, ymax):self.classId = classIdself.score = scoreself.xmin = xminself.ymin = yminself.xmax = xmaxself.ymax = ymaxclass YOLODetector:def __init__(self, model_path='./yolov11n.onnx', conf_thresh=0.5, iou_thresh=0.45):self.model_path = model_pathself.conf_thresh = conf_threshself.iou_thresh = iou_threshself.ort_session = ort.InferenceSession(self.model_path)self.generate_meshgrid()@staticmethoddef sigmoid(x):return 1 / (1 + exp(-x))@staticmethoddef preprocess_image(img_src, resize_w, resize_h):image = cv2.resize(img_src, (resize_w, resize_h), interpolation=cv2.INTER_LINEAR)image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)image = image.astype(np.float32)image /= 255.0return imagedef generate_meshgrid(self):for index in range(headNum):for i in range(mapSize[index][0]):for j in range(mapSize[index][1]):meshgrid.append(j + 0.5)meshgrid.append(i + 0.5)def iou(self, xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2):xmin = max(xmin1, xmin2)ymin = max(ymin1, ymin2)xmax = min(xmax1, xmax2)ymax = min(ymax1, ymax2)innerWidth = max(0, xmax - xmin)innerHeight = max(0, ymax - ymin)innerArea = innerWidth * innerHeightarea1 = (xmax1 - xmin1) * (ymax1 - ymin1)area2 = (xmax2 - xmin2) * (ymax2 - ymin2)total = area1 + area2 - innerAreareturn innerArea / totaldef nms(self, detectResult):predBoxs = []sort_detectboxs = sorted(detectResult, key=lambda x: x.score, reverse=True)for i in range(len(sort_detectboxs)):if sort_detectboxs[i].classId != -1:predBoxs.append(sort_detectboxs[i])for j in range(i + 1, len(sort_detectboxs), 1):if sort_detectboxs[i].classId == sort_detectboxs[j].classId:iou = self.iou(sort_detectboxs[i].xmin, sort_detectboxs[i].ymin,sort_detectboxs[i].xmax, sort_detectboxs[i].ymax,sort_detectboxs[j].xmin, sort_detectboxs[j].ymin,sort_detectboxs[j].xmax, sort_detectboxs[j].ymax)if iou > self.iou_thresh:sort_detectboxs[j].classId = -1return predBoxsdef postprocess(self, out, img_h, img_w):detectResult = []output = out[0][0]  # 去掉 batch 维度,变为 (5 + num_classes, num_boxes)# 提取预测框信息reg = output[0:4, :]  # 回归框的 x, y, w, hconf = output[4, :]  # 置信度# 检查是否是多类别模型if output.shape[0] > 5:  # 如果输出维度大于 5,说明是多类别模型class_probs = output[5:, :]  # 类别概率 (num_classes, num_boxes)is_multiclass = Trueelse:is_multiclass = Falsescale_h = img_h / input_imgHscale_w = img_w / input_imgWfor i in range(reg.shape[1]):  # 遍历所有预测框x, y, w, h = reg[:, i]score = self.sigmoid(conf[i])  # 使用 sigmoid 激活置信度# score = float(conf[i])  # 使用 sigmoid 激活置信度if is_multiclass:  # 多类别模型# 使用 softmax 激活类别概率class_prob = np.exp(class_probs[:, i]) / np.sum(np.exp(class_probs[:, i]))class_id = np.argmax(class_prob)  # 获取最大概率的类别索引class_score = class_prob[class_id]  # 获取该类别的概率# 综合置信度和类别概率final_score = score * class_scoreelse:  # 单类别模型class_id = 0  # 单类别情况下,类别索引固定为 0final_score = score  # 置信度即为最终得分if final_score > self.conf_thresh:  # 过滤低置信度框xmin = max(0, (x - w / 2) * scale_w)ymin = max(0, (y - h / 2) * scale_h)xmax = min(img_w, (x + w / 2) * scale_w)ymax = min(img_h, (y + h / 2) * scale_h)# 添加框信息box = DetectBox(classId=class_id, score=final_score, xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax)detectResult.append(box)predBox = self.nms(detectResult)  # 非极大值抑制return predBoxdef detect(self, img_path):if isinstance(img_path, str):orig = cv2.imread(img_path)else:orig = img_pathimg_h, img_w = orig.shape[:2]image = self.preprocess_image(orig, input_imgW, input_imgH)image = image.transpose((2, 0, 1))image = np.expand_dims(image, axis=0)pred_results = self.ort_session.run(None, {'images': image})# 打印模型输出形状(调试用)print(f"Model output shape: {pred_results[0].shape}")predbox = self.postprocess(pred_results, img_h, img_w)boxes = []scores = []class_ids = []for box in predbox:boxes.append([int(box.xmin), int(box.ymin), int(box.xmax), int(box.ymax)])scores.append(box.score)class_ids.append(box.classId)return boxes, scores, class_idsdef draw_detections(self, image, boxes, scores, class_ids, mask_alpha=0.3):"""Combines drawing masks, boxes, and text annotations on detected objects.Parameters:- image: Input image.- boxes: Array of bounding boxes.- scores: Confidence scores for each detected object.- class_ids: Detected object class IDs.- mask_alpha: Transparency of the mask overlay."""det_img = image.copy()img_height, img_width = image.shape[:2]font_size = min([img_height, img_width]) * 0.001text_thickness = int(min([img_height, img_width]) * 0.001)mask_img = image.copy()# Draw bounding boxes, masks, and text annotationsfor class_id, box, score in zip(class_ids, boxes, scores):color = COLORS[class_id]x1, y1, x2, y2 = box[0], box[1], box[2], box[3]# Draw fill rectangle for maskcv2.rectangle(mask_img, (x1, y1), (x2, y2), color, -1)# Draw bounding boxcv2.rectangle(det_img, (x1, y1), (x2, y2), color, 2)# Prepare text (label and score)label = CLASSES[class_id]caption = f'{label} {int(score * 100)}%'# Calculate text size and position(tw, th), _ = cv2.getTextSize(text=caption, fontFace=cv2.FONT_HERSHEY_SIMPLEX,fontScale=font_size, thickness=text_thickness)th = int(th * 1.2)# Draw filled rectangle for text backgroundcv2.rectangle(det_img, (x1, y1), (x1 + tw, y1 - th), color, -1)# Draw text over the filled rectanglecv2.putText(det_img, caption, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, font_size,(255, 255, 255), text_thickness, cv2.LINE_AA)# Blend the mask image with the original imagedet_img = cv2.addWeighted(mask_img, mask_alpha, det_img, 1 - mask_alpha, 0)return det_imgif __name__ == "__main__":model_path = './models/epoch_100_N.onnx'img_path = "/mnt/d/data/001/SD-001-00920/SI-001-01708.png"detector = YOLODetector(model_path=model_path, conf_thresh=0.55, iou_thresh=0.7)boxes, scores, class_ids = detector.detect(img_path)print("检测到的框:", boxes)print("检测到的分数:", scores)print("检测到的类别:", class_ids)image = cv2.imread(img_path)result_img = detector.draw_detections(image, boxes, scores, class_ids)cv2.imwrite('result.jpg', result_img)cv2.imshow('Detection Results', result_img)cv2.waitKey(0)cv2.destroyAllWindows()

4、yolo11架构图

在这里插入图片描述


http://www.ppmy.cn/news/1581083.html

相关文章

【汽车开发工具选型指南】Jama Connect® for Automotive解决方案解析

本文来源jamasoftware.com,由Jama Software授权合作伙伴-龙智翻译整理。 Jama Connect for Automotive是什么? Jama Connect for Automotive 旨在为开发团队提供一个统一平台,用于构建安全关键型和网络安全关键型产品。提供满足行业标准和法…

Spring Boot + Spring Integration整合MQTT打造双向通信客户端

1. 概述 本文分两个章节讲解MQTT相关的知识,第一部份主要讲解MQTT的原理和相关配置,第二个章节主要讲和Spring boot的integration相结合代码的具体实现,如果想快速实现功能,可直接跳过第一章节查看第二章讲。 1.1 MQTT搭建 为了…

QT日志级别设置

开发版本代码包含了大量的qDebug, 发布版本可能导致未知异常。 QLoggingCategory::setFilterRules用于设置日志过滤规则,从而控制日志的输出。 以下是一个完整的示例,展示如何通过设置日志过滤规则来禁用qDebug()输出: …

Python functools 模块的 @lru_cache 装饰器介绍

functools.lru_cache 是 Python 标准库 functools 模块中的一个装饰器,用于实现简单的缓存机制。它通过缓存函数的返回值来提高函数的执行效率,特别是对于那些被多次调用且参数相同的函数。 LRU 缓存机制 LRU 代表 Least Recently Used,即最…

分布式中间件:基于 Redis 实现分布式锁

分布式中间件:基于 Redis 实现分布式锁 一、背景引入 在当今的互联网应用中,分布式系统变得越来越常见。在分布式环境下,多个服务实例可能会同时对共享资源进行读写操作,这就很容易引发数据不一致等问题。比如电商系统中的库存扣…

分布式容器技术是什么

‌分布式容器技术‌是一种将应用程序和其依赖的所有组件打包成一个可移植的容器,以便在任何支持容器化的平台上运行的技术。容器技术提供了一种轻量级、可移植、可扩展的应用程序部署和运行方法,解决了传统部署方法中的一系列问题,使得软件开…

Rust 入门之闭包(Closures)

Rust 入门之闭包(Closures) 本文已同步本人博客网站 本文相关源码已上传Github 前言 先说概念 Rust 的 闭包(closures)是可以保存在变量中或作为参数传递给其他函数的匿名函数。你可以在一个地方创建闭包,然后在不…

Python 视频爬取教程

文章目录 前言基本原理环境准备Python安装选择Python开发环境安装必要库 示例 1:爬取简单直链视频示例 2:爬取基于 HTML5 的视频(以某简单视频网站为例) 前言 以下是一个较为完整的 Python 视频爬取教程,包含基本原理…