1.首先是我手里生成的一个yolo11的.rknn模型:
2.比对一下yolov5的模型:
2.1 yolov5模型的后期处理:
outputs = rknn.inference(inputs=[img2], data_format=['nhwc'])np.save('./onnx_yolov5_0.npy', outputs[0])np.save('./onnx_yolov5_1.npy', outputs[1])np.save('./onnx_yolov5_2.npy', outputs[2])print('done')# post processinput0_data = outputs[0]input1_data = outputs[1]input2_data = outputs[2]input0_data = input0_data.reshape([3, -1]+list(input0_data.shape[-2:]))input1_data = input1_data.reshape([3, -1]+list(input1_data.shape[-2:]))input2_data = input2_data.reshape([3, -1]+list(input2_data.shape[-2:]))input_data = list()input_data.append(np.transpose(input0_data, (2, 3, 0, 1)))input_data.append(np.transpose(input1_data, (2, 3, 0, 1)))input_data.append(np.transpose(input2_data, (2, 3, 0, 1)))
然后:
def yolov5_post_process(input_data):masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],[59, 119], [116, 90], [156, 198], [373, 326]]boxes, classes, scores = [], [], []for input, mask in zip(input_data, masks):b, c, s = process(input, mask, anchors)b, c, s = filter_boxes(b, c, s)boxes.append(b)classes.append(c)scores.append(s)
然后:
def process(input, mask, anchors):anchors = [anchors[i] for i in mask]grid_h, grid_w = map(int, input.shape[0:2])box_confidence = input[..., 4]box_confidence = np.expand_dims(box_confidence, axis=-1)box_class_probs = input[..., 5:]box_xy = input[..., :2]*2 - 0.5col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)grid = np.concatenate((col, row), axis=-1)box_xy += gridbox_xy *= int(IMG_SIZE/grid_h)box_wh = pow(input[..., 2:4]*2, 2)box_wh = box_wh * anchorsbox = np.concatenate((box_xy, box_wh), axis=-1)return box, box_confidence, box_class_probs
3.修改1 - 基于昨天在宿主机上成功执行的onnx代码:
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
import os
import sys
from math import exp
import cv2
import numpy as npROOT = os.getcwd()
if str(ROOT) not in sys.path:sys.path.append(str(ROOT))RKNN_MODEL = r'/home/firefly/app/models/sim_moonpie-640-640_rk3588.rknn'
IMG_PATH = '/home/firefly/app/images/cake26.jpg'
QUANTIZE_ON = FalseCLASSES = ['moonpie', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light','fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow','elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee','skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard','tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple','sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch','potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone','microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear','hair drier', 'toothbrush', 'moonpie']meshgrid = []class_num = len(CLASSES)
headNum = 3
strides = [8, 16, 32]
mapSize = [[80, 80], [40, 40], [20, 20]]
nmsThresh = 0.45
objectThresh = 0.5input_imgH = 640
input_imgW = 640from rknn.api import RKNN
def rk3588_detect(model, pic, classes):rknn = RKNN(verbose=True)'''# model configrknn.config(mean_values=[[0, 0, 0]],std_values=[[255,255,255]],quant_img_RGB2BGR=False,target_platform='rk3588')'''rknn.load_rknn(path=model)rknn.init_runtime(target="rk3588", core_mask=RKNN.NPU_CORE_AUTO)outputs = rknn.inference(inputs=[pic], data_format=['nhwc'])return outputsclass DetectBox:def __init__(self, classId, score, xmin, ymin, xmax, ymax):self.classId = classIdself.score = scoreself.xmin = xminself.ymin = yminself.xmax = xmaxself.ymax = ymaxclass YOLOV11DetectObj:def __init__(self):passdef GenerateMeshgrid(self):for index in range(headNum):for i in range(mapSize[index][0]):for j in range(mapSize[index][1]):meshgrid.append(j + 0.5)meshgrid.append(i + 0.5)def IOU(self, xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2):xmin = max(xmin1, xmin2)ymin = max(ymin1, ymin2)xmax = min(xmax1, xmax2)ymax = min(ymax1, ymax2)innerWidth = xmax - xmininnerHeight = ymax - ymininnerWidth = innerWidth if innerWidth > 0 else 0innerHeight = innerHeight if innerHeight > 0 else 0innerArea = innerWidth * innerHeightarea1 = (xmax1 - xmin1) * (ymax1 - ymin1)area2 = (xmax2 - xmin2) * (ymax2 - ymin2)total = area1 + area2 - innerAreareturn innerArea / totaldef NMS(self, detectResult):predBoxs = []sort_detectboxs = sorted(detectResult, key=lambda x: x.score, reverse=True)for i in range(len(sort_detectboxs)):xmin1 = sort_detectboxs[i].xminymin1 = sort_detectboxs[i].yminxmax1 = sort_detectboxs[i].xmaxymax1 = sort_detectboxs[i].ymaxclassId = sort_detectboxs[i].classIdif sort_detectboxs[i].classId != -1:predBoxs.append(sort_detectboxs[i])for j in range(i + 1, len(sort_detectboxs), 1):if classId == sort_detectboxs[j].classId:xmin2 = sort_detectboxs[j].xminymin2 = sort_detectboxs[j].yminxmax2 = sort_detectboxs[j].xmaxymax2 = sort_detectboxs[j].ymaxiou = IOU(xmin1, ymin1, xmax1, ymax1, xmin2, ymin2, xmax2, ymax2)if iou > nmsThresh:sort_detectboxs[j].classId = -1return predBoxsdef sigmoid(self, x):return 1 / (1 + exp(-x))def postprocess(self, out, img_h, img_w):print('postprocess ... ')detectResult = []output = []for i in range(len(out)):print(out[i].shape)output.append(out[i].reshape((-1)))scale_h = img_h / input_imgHscale_w = img_w / input_imgWgridIndex = -2cls_index = 0cls_max = 0for index in range(headNum):reg = output[index * 2 + 0]cls = output[index * 2 + 1]for h in range(mapSize[index][0]):for w in range(mapSize[index][1]):gridIndex += 2if 1 == class_num:cls_max = sigmoid(cls[0 * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w])cls_index = 0else:for cl in range(class_num):cls_val = cls[cl * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w]if 0 == cl:cls_max = cls_valcls_index = clelse:if cls_val > cls_max:cls_max = cls_valcls_index = clcls_max = self.sigmoid(cls_max)if cls_max > objectThresh:regdfl = []for lc in range(4):sfsum = 0locval = 0for df in range(16):temp = exp(reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w])reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w] = tempsfsum += tempfor df in range(16):sfval = reg[((lc * 16) + df) * mapSize[index][0] * mapSize[index][1] + h * mapSize[index][1] + w] / sfsumlocval += sfval * dfregdfl.append(locval)x1 = (meshgrid[gridIndex + 0] - regdfl[0]) * strides[index]y1 = (meshgrid[gridIndex + 1] - regdfl[1]) * strides[index]x2 = (meshgrid[gridIndex + 0] + regdfl[2]) * strides[index]y2 = (meshgrid[gridIndex + 1] + regdfl[3]) * strides[index]xmin = x1 * scale_wymin = y1 * scale_hxmax = x2 * scale_wymax = y2 * scale_hxmin = xmin if xmin > 0 else 0ymin = ymin if ymin > 0 else 0xmax = xmax if xmax < img_w else img_wymax = ymax if ymax < img_h else img_hbox = DetectBox(cls_index, cls_max, xmin, ymin, xmax, ymax)detectResult.append(box)# NMSprint('detectResult:', len(detectResult))predBox = self.NMS(detectResult)return predBoxdef precess_image(self, img_src, resize_w, resize_h):print(f'{type(img_src)}')image = cv2.resize(img_src, (resize_w, resize_h), interpolation=cv2.INTER_LINEAR)image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)image = image.astype(np.float32)image /= 255.0return imagedef detect(self, img_path):self.GenerateMeshgrid()orig = cv2.imread(img_path)if orig is None:print(f"无法读取图像: {img_path}")returnimg_h, img_w = orig.shape[:2]image = self.precess_image(orig, input_imgW, input_imgH)image = image.transpose((2, 0, 1))image = np.expand_dims(image, axis=0)#image = np.ones((1, 3, 640, 640), dtype=np.uint8)# print(image.shape)#ort_session = ort.InferenceSession(ONNX_MODEL)#pred_results = (ort_session.run(None, {'data': image}))pred_results = rk3588_detect(RKNN_MODEL, image, CLASSES)out = []for i in range(len(pred_results)):out.append(pred_results[i])predbox = self.postprocess(out, img_h, img_w)print('obj num is :', len(predbox))for i in range(len(predbox)):xmin = int(predbox[i].xmin)ymin = int(predbox[i].ymin)xmax = int(predbox[i].xmax)ymax = int(predbox[i].ymax)classId = predbox[i].classIdscore = predbox[i].scorecv2.rectangle(orig, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)ptext = (xmin, ymin)title = CLASSES[classId] + "%.2f" % scorecv2.putText(orig, title, ptext, cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2, cv2.LINE_AA)cv2.imwrite('./test_onnx_result.jpg', orig)if __name__ == '__main__':print('This is main ....')img_path = IMG_PATHobj = YOLOV11DetectObj()obj.detect(img_path)
输出不对:
firefly@firefly:~/app/test$ python3 ./detect_rk3588.py
This is main ....
<class 'numpy.ndarray'>
I rknn-toolkit2 version: 2.3.0
I target set by user is: rk3588
postprocess ...
(1, 64, 80, 80)
(1, 81, 80, 80)
(1, 64, 40, 40)
(1, 81, 40, 40)
(1, 64, 20, 20)
(1, 81, 20, 20)
detectResult: 0
obj num is : 0
4.修改2 - 基于yolov5.rknn代码
4.1 似乎有如下映射关系:
似乎:yolo11.output5 == yolo5.output2,
yolo11.output3 == yolo5.output1,
yolo11.output1 == yolo5.output
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
import os
import sys
import urllib
import traceback
import time
import numpy as np
import cv2
from rknn.api import RKNNROOT = os.getcwd()
if str(ROOT) not in sys.path:sys.path.append(str(ROOT))# Model from https://github.com/airockchip/rknn_model_zoo yolov11->selftrained. yolo11s?
ONNX_MODEL = r'/home/firefly/app/models/yolo11_selfgen.onnx'
RKNN_MODEL = r'/home/firefly/app/models/new_moonpie_yolo11_640x640.rknn'
IMG_PATH = r'/home/firefly/app/images/cake26.jpg'
QUANTIZE_ON = True
DATASET=r'./dataset.txt'CLASSES = ['moonpie', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light','fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow','elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee','skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard','tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple','sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch','potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone','microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear','hair drier', 'toothbrush']meshgrid = []class_num = len(CLASSES)
headNum = 3
strides = [8, 16, 32]
mapSize = [[80, 80], [40, 40], [20, 20]]
input_imgH = 640
input_imgW = 640
IMG_SIZE = input_imgH
QUANTIZE_ON = TrueOBJ_THRESH = 0.25
NMS_THRESH = 0.45def xywh2xyxy(x):# Convert [x, y, w, h] to [x1, y1, x2, y2]y = np.copy(x)y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left xy[:, 1] = x[:, 1] - x[:, 3] / 2 # top left yy[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right xy[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right yreturn ydef process(input, mask, anchors):anchors = [anchors[i] for i in mask]grid_h, grid_w = map(int, input.shape[0:2])box_confidence = input[..., 4]box_confidence = np.expand_dims(box_confidence, axis=-1)box_class_probs = input[..., 5:]box_xy = input[..., :2]*2 - 0.5col = np.tile(np.arange(0, grid_w), grid_w).reshape(-1, grid_w)row = np.tile(np.arange(0, grid_h).reshape(-1, 1), grid_h)col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2)grid = np.concatenate((col, row), axis=-1)box_xy += gridbox_xy *= int(IMG_SIZE/grid_h)box_wh = pow(input[..., 2:4]*2, 2)box_wh = box_wh * anchorsbox = np.concatenate((box_xy, box_wh), axis=-1)return box, box_confidence, box_class_probsdef filter_boxes(boxes, box_confidences, box_class_probs):"""Filter boxes with box threshold. It's a bit different with origin yolov5 post process!# Argumentsboxes: ndarray, boxes of objects.box_confidences: ndarray, confidences of objects.box_class_probs: ndarray, class_probs of objects.# Returnsboxes: ndarray, filtered boxes.classes: ndarray, classes for boxes.scores: ndarray, scores for boxes."""boxes = boxes.reshape(-1, 4)box_confidences = box_confidences.reshape(-1)box_class_probs = box_class_probs.reshape(-1, box_class_probs.shape[-1])_box_pos = np.where(box_confidences >= OBJ_THRESH)boxes = boxes[_box_pos]box_confidences = box_confidences[_box_pos]box_class_probs = box_class_probs[_box_pos]class_max_score = np.max(box_class_probs, axis=-1)classes = np.argmax(box_class_probs, axis=-1)_class_pos = np.where(class_max_score >= OBJ_THRESH)boxes = boxes[_class_pos]classes = classes[_class_pos]scores = (class_max_score* box_confidences)[_class_pos]return boxes, classes, scoresdef nms_boxes(boxes, scores):"""Suppress non-maximal boxes.# Argumentsboxes: ndarray, boxes of objects.scores: ndarray, scores of objects.# Returnskeep: ndarray, index of effective boxes."""x = boxes[:, 0]y = boxes[:, 1]w = boxes[:, 2] - boxes[:, 0]h = boxes[:, 3] - boxes[:, 1]areas = w * horder = scores.argsort()[::-1]keep = []while order.size > 0:i = order[0]keep.append(i)xx1 = np.maximum(x[i], x[order[1:]])yy1 = np.maximum(y[i], y[order[1:]])xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)inter = w1 * h1ovr = inter / (areas[i] + areas[order[1:]] - inter)inds = np.where(ovr <= NMS_THRESH)[0]order = order[inds + 1]keep = np.array(keep)return keepdef yolov5_post_process(input_data):masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]anchors = [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45],[59, 119], [116, 90], [156, 198], [373, 326]]boxes, classes, scores = [], [], []for input, mask in zip(input_data, masks):b, c, s = process(input, mask, anchors)b, c, s = filter_boxes(b, c, s)boxes.append(b)classes.append(c)scores.append(s)boxes = np.concatenate(boxes)boxes = xywh2xyxy(boxes)classes = np.concatenate(classes)scores = np.concatenate(scores)nboxes, nclasses, nscores = [], [], []for c in set(classes):inds = np.where(classes == c)b = boxes[inds]c = classes[inds]s = scores[inds]keep = nms_boxes(b, s)nboxes.append(b[keep])nclasses.append(c[keep])nscores.append(s[keep])if not nclasses and not nscores:return None, None, Noneboxes = np.concatenate(nboxes)classes = np.concatenate(nclasses)scores = np.concatenate(nscores)return boxes, classes, scoresdef draw(image, boxes, scores, classes):"""Draw the boxes on the image.# Argument:image: original image.boxes: ndarray, boxes of objects.classes: ndarray, classes of objects.scores: ndarray, scores of objects.all_classes: all classes name."""print("{:^12} {:^12} {}".format('class', 'score', 'xmin, ymin, xmax, ymax'))print('-' * 50)for box, score, cl in zip(boxes, scores, classes):top, left, right, bottom = boxtop = int(top)left = int(left)right = int(right)bottom = int(bottom)cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),(top, left - 6),cv2.FONT_HERSHEY_SIMPLEX,0.6, (0, 0, 255), 2)print("{:^12} {:^12.3f} [{:>4}, {:>4}, {:>4}, {:>4}]".format(CLASSES[cl], score, top, left, right, bottom))def letterbox(im, new_shape=(640, 640), color=(0, 0, 0)):# Resize and pad image while meeting stride-multiple constraintsshape = im.shape[:2] # current shape [height, width]if isinstance(new_shape, int):new_shape = (new_shape, new_shape)# Scale ratio (new / old)r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])# Compute paddingratio = r, r # width, height ratiosnew_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh paddingdw /= 2 # divide padding into 2 sidesdh /= 2if shape[::-1] != new_unpad: # resizeim = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))left, right = int(round(dw - 0.1)), int(round(dw + 0.1))im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add borderreturn im, ratio, (dw, dh)def detect_on_ubuntu(onnx_model_path=ONNX_MODEL, rknn_path=RKNN_MODEL, image_path=IMG_PATH, classes=CLASSES):# Create RKNN objectrknn = RKNN(verbose=True)# pre-process configprint('--> Config model')rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], target_platform='rk3588')print('done')# Load ONNX modelprint('--> Loading model')ret = rknn.load_onnx(model=onnx_model_path)if ret != 0:print('Load model failed!')exit(ret)print('done')# Build modelprint('--> Building model')ret = rknn.build(do_quantization=QUANTIZE_ON, dataset=DATASET)if ret != 0:print('Build model failed!')exit(ret)print('done')# Export RKNN modelprint('--> Export rknn model')ret = rknn.export_rknn(rknn_path)if ret != 0:print('Export rknn model failed!')exit(ret)print('done')# Init runtime environmentprint('--> Init runtime environment')ret = rknn.init_runtime()if ret != 0:print('Init runtime environment failed!')exit(ret)print('done')# Set inputsimg = cv2.imread(image_path)# img, ratio, (dw, dh) = letterbox(img, new_shape=(IMG_SIZE, IMG_SIZE))img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))# Inferenceprint('--> Running model')img2 = np.expand_dims(img, 0)np.save('./raw_yolov11_in.npy', [img2])outputs = rknn.inference(inputs=[img2], data_format=['nhwc'])np.save('./onnx_yolov11_0_raw.npy', outputs[1])np.save('./onnx_yolov11_1_raw.npy', outputs[3])np.save('./onnx_yolov11_2_raw.npy', outputs[5])print('done')# post processinput0_data = outputs[1]input1_data = outputs[3]input2_data = outputs[5]# 创建一个全零数组,用于填充# 计算需要填充的通道数pad_channels = 255 - len(classes)padding = np.zeros((1, pad_channels, 80, 80), dtype=np.float32)input0_data = np.concatenate((input0_data, padding), axis=1)padding = np.zeros((1, pad_channels, 40, 40), dtype=np.float32)input1_data = np.concatenate((input1_data, padding), axis=1)padding = np.zeros((1, pad_channels, 20, 20), dtype=np.float32)input2_data = np.concatenate((input2_data, padding), axis=1)np.save('./onnx_yolov11_0.npy', input0_data)np.save('./onnx_yolov11_1.npy', input1_data)np.save('./onnx_yolov11_2.npy', input2_data)input0_data = input0_data.reshape([3, -1]+list(input0_data.shape[-2:]))input1_data = input1_data.reshape([3, -1]+list(input1_data.shape[-2:]))input2_data = input2_data.reshape([3, -1]+list(input2_data.shape[-2:]))input_data = list()input_data.append(np.transpose(input0_data, (2, 3, 0, 1)))input_data.append(np.transpose(input1_data, (2, 3, 0, 1)))input_data.append(np.transpose(input2_data, (2, 3, 0, 1)))boxes, classes, scores = yolov5_post_process(input_data)img_1 = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)if boxes is not None:draw(img_1, boxes, scores, classes)cv2.imwrite('result.jpg', img_1)print('Save results to result.jpg!')rknn.release()def precess_image(img_src, resize_w, resize_h):orig = cv2.imread(img_src)if orig is None:print(f"无法读取图像: {img_path}")returnimg_h, img_w = orig.shape[:2]print(f'{type(orig)}')image = cv2.resize(orig, (resize_w, resize_h), interpolation=cv2.INTER_LINEAR)image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)image = image.astype(np.float32)image /= 255.0'''# Set inputsimg = cv2.imread(IMG_PATH)# img, ratio, (dw, dh) = letterbox(img, new_shape=(IMG_SIZE, IMG_SIZE))img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))# Inferenceprint('--> Running model')img2 = np.expand_dims(img, 0)'''return imagedef detect_on_rk3588(rknn_path=RKNN_MODEL, image_path=IMG_PATH, classes=CLASSES):# Create RKNN objectrknn = RKNN(verbose=True)# pre-process configprint('--> Config model')rknn.config(mean_values=[[0, 0, 0]], std_values=[[255, 255, 255]], target_platform='rk3588')print('done')# Load ONNX modelret = rknn.load_rknn(rknn_path)if ret != 0:print('Export rknn model failed!')exit(ret)print('done')# Init runtime environmentprint('--> Init runtime environment')ret = rknn.init_runtime(target="rk3588", core_mask=RKNN.NPU_CORE_AUTO)if ret != 0:print('Init runtime environment failed!')exit(ret)print('done')img2 = precess_image(image_path, input_imgW, input_imgH)np.save('./raw_yolov11_in.npy', [img2])outputs = rknn.inference(inputs=[img2], data_format=['nhwc'])np.save('./raw_yolov11_0.npy', outputs[1])np.save('./raw_yolov11_1.npy', outputs[3])np.save('./raw_yolov11_2.npy', outputs[5])print('done')# 创建一个全零数组,用于填充# 计算需要填充的通道数pad_channels = 255 - len(classes)padding = np.zeros((1, pad_channels, 80, 80), dtype=np.float32)input0_data = np.concatenate((input0_data, padding), axis=1)padding = np.zeros((1, pad_channels, 40, 40), dtype=np.float32)input1_data = np.concatenate((input1_data, padding), axis=1)padding = np.zeros((1, pad_channels, 20, 20), dtype=np.float32)input2_data = np.concatenate((input2_data, padding), axis=1)np.save('./modified_yolov11_0.npy', input0_data)np.save('./modified_yolov11_1.npy', input1_data)np.save('./modified_yolov11_2.npy', input2_data)input0_data = input0_data.reshape([3, -1]+list(input0_data.shape[-2:]))input1_data = input1_data.reshape([3, -1]+list(input1_data.shape[-2:]))input2_data = input2_data.reshape([3, -1]+list(input2_data.shape[-2:]))input_data = list()input_data.append(np.transpose(input0_data, (2, 3, 0, 1)))input_data.append(np.transpose(input1_data, (2, 3, 0, 1)))input_data.append(np.transpose(input2_data, (2, 3, 0, 1)))boxes, classes, scores = yolov5_post_process(input_data)img = cv2.imread(image_path)img_1 = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)if boxes is not None:draw(img_1, boxes, scores, classes)cv2.imwrite('rknn_detect_result.jpg', img_1)print('Save results to rknn_detect_result.jpg!')else:print('target can not be found!')rknn.release()if __name__ == '__main__':detect_on_ubuntu(ONNX_MODEL, RKNN_MODEL, IMG_PATH, CLASSES)#detect_on_rk3588(RKNN_MODEL, IMG_PATH, CLASSES)
5.结论:
最终通过仔细核对yolov5, yolo11, onnx几个模型的输入输出参数,发现了这样的事情:
- yolov11 的输出参数有6个,大概率,按照上面的结论是,1,3,5相当于原来的输出参数0,1,2
- yolov11的输出的6个参数,第二维尺寸,现在不是255(-1)个,而是len(classes)个。
- 我的问题在于我在训练时把classes设置为81,而在导出.rknn时,仍然导出为80.所以,结果就是onnx模式访问正常,而.rknn方式访问错误。
- yolo detect代码中的.save是瑞芯微的那些同志在调试接口时留下的一些调试语句,它们不必存在。
- rknn代码输入输出参数建议u8化,在simluation环境传递的是float32。这个修改结束,应该速度会快不少。
- 还有,yolo detect的matrix pack in pack out时,效率很低,他还在进行float转换。这个我没有仔细看代码,理论上,onnx的识别信息析取是更快的。
还在测试。如果确认最终稿的结论成立。我会在这个帖子里留下标记。上面的两端代码没有大问题,我在测试成功后会更新,现在就是对的。