Linux测试处理fps为30、1920*1080、一分钟的视频性能

devtools/2025/1/22 21:02:38/

前置条件

模拟fps为30、1920*1080、一分钟的视频

项目CMakeLists.txt

cmake_minimum_required(VERSION 3.30)
project(testOpenGl)set(CMAKE_CXX_STANDARD 11)add_executable(testOpenGl main.cpptestOpenCl.cpptestOpenCl.hTestCpp.cppTestCpp.hTestCppThread.cppTestCppThread.hTestSIMD.cppTestSIMD.h)# 查找OpenCL
find_package(OpenCL REQUIRED)# 链接OpenCl库
target_include_directories(testOpenGl PRIVATE ${OpenCL_INCLUDE_DIRS})
target_link_libraries(testOpenGl PRIVATE ${OpenCL_LIBRARIES})# 检测SIMD支持并添加编译选项
include(CheckCXXCompilerFlag)check_cxx_compiler_flag("-mavx" COMPILER_SUPPORTS_AVX)
check_cxx_compiler_flag("-mavx2" COMPILER_SUPPORTS_AVX2)if(COMPILER_SUPPORTS_AVX2)target_compile_options(testOpenGl PRIVATE -mavx2)
elseif (COMPILER_SUPPORTS_AVX)target_compile_options(testOpenGl PRIVATE -mavx)
else ()message(FATAL_ERROR "AVX or AVX2 is not supported by compiler")
endif ()

C++代码

//
// Created by lai on 2025/1/17.
//#include "TestCpp.h"#include <iostream>
#include <vector>
#include <random>
#include <chrono>// 灰度转换函数
void to_gray(const std::vector<unsigned char>& input, std::vector<unsigned char>& output, int width, int height) {for (int i = 0; i < width * height; ++i) {int offset = i * 3;  // RGB 分量unsigned char r = input[offset];unsigned char g = input[offset + 1];unsigned char b = input[offset + 2];// 灰度公式output[i] = static_cast<unsigned char>(0.299f * r + 0.587f * g + 0.114f * b);}
}
void TestCpp::runTest() {const int width = 1920;         // 视频宽度const int height = 1080;        // 视频高度const int fps = 30;             // 帧率const int duration = 60;        // 视频持续时间(秒)const int frameCount = fps * duration; // 总帧数// 模拟视频帧数据:随机生成每帧的 RGB 数据std::vector<unsigned char> inputFrame(width * height * 3);std::vector<unsigned char> outputFrame(width * height);std::random_device rd;std::mt19937 gen(rd());std::uniform_int_distribution<> dis(0, 255);// 开始处理auto startTime = std::chrono::high_resolution_clock::now();for (int frame = 0; frame < frameCount; ++frame) {// 随机生成模拟的 RGB 数据for (auto& pixel : inputFrame) {pixel = dis(gen);}// 调用灰度转换函数to_gray(inputFrame, outputFrame, width, height);// 打印进度if (frame % 30 == 0) {std::cout << "Processed frame: " << frame + 1 << "/" << frameCount << std::endl;}}auto endTime = std::chrono::high_resolution_clock::now();double elapsedTime = std::chrono::duration<double>(endTime - startTime).count();// 打印处理时间std::cout << "Processed " << frameCount << " frames in " << elapsedTime << " seconds." << std::endl;std::cout << "Average time per frame: " << (elapsedTime / frameCount) << " seconds." << std::endl;}

C++多线程

//
// Created by lai on 2025/1/17.
//#include "TestCppThread.h"#include <iostream>
#include <vector>
#include <random>
#include <chrono>
#include <thread>// 灰度转换函数,每个线程处理一部分图像
void to_gray_chunk(const std::vector<unsigned char>& input, std::vector<unsigned char>& output, int width, int height, int start, int end) {for (int i = start; i < end; ++i) {int offset = i * 3;  // RGB 分量unsigned char r = input[offset];unsigned char g = input[offset + 1];unsigned char b = input[offset + 2];// 灰度公式output[i] = static_cast<unsigned char>(0.299f * r + 0.587f * g + 0.114f * b);}
}void TestCppThread::runTest() {const int width = 1920;         // 视频宽度const int height = 1080;        // 视频高度const int fps = 30;             // 帧率const int duration = 60;        // 视频持续时间(秒)const int frameCount = fps * duration; // 总帧数const int numThreads = std::thread::hardware_concurrency(); // 获取可用线程数// 模拟视频帧数据:随机生成每帧的 RGB 数据std::vector<unsigned char> inputFrame(width * height * 3);std::vector<unsigned char> outputFrame(width * height);std::random_device rd;std::mt19937 gen(rd());std::uniform_int_distribution<> dis(0, 255);// 开始处理auto startTime = std::chrono::high_resolution_clock::now();for (int frame = 0; frame < frameCount; ++frame) {// 随机生成模拟的 RGB 数据for (auto& pixel : inputFrame) {pixel = dis(gen);}// 启动多个线程来处理图像std::vector<std::thread> threads;int chunkSize = width * height / numThreads; // 每个线程处理的像素块大小for (int t = 0; t < numThreads; ++t) {int start = t * chunkSize;int end = (t == numThreads - 1) ? (width * height) : (start + chunkSize); // 最后一个线程处理剩余的像素threads.emplace_back(to_gray_chunk, std::cref(inputFrame), std::ref(outputFrame), width, height, start, end);}// 等待所有线程完成for (auto& t : threads) {t.join();}// 打印进度if (frame % 30 == 0) {std::cout << "Processed frame: " << frame + 1 << "/" << frameCount << std::endl;}}auto endTime = std::chrono::high_resolution_clock::now();double elapsedTime = std::chrono::duration<double>(endTime - startTime).count();// 打印处理时间std::cout << "Processed " << frameCount << " frames in " << elapsedTime << " seconds." << std::endl;std::cout << "Average time per frame: " << (elapsedTime / frameCount) << " seconds." << std::endl;}

CPU版本的Opencl

cmake中添加

# 查找OpenCL
find_package(OpenCL REQUIRED)# 链接OpenCl库
target_include_directories(testOpenGl PRIVATE ${OpenCL_INCLUDE_DIRS})
target_link_libraries(testOpenGl PRIVATE ${OpenCL_LIBRARIES})

测试代码

//
// Created by lai on 2025/1/16.
//
#include "testOpenCl.h"#include <chrono>
#include <CL/cl.h>
#include <iostream>
#include <vector>
#include <random>// OpenCL 内核代码
const char* kernelSource = R"(
__kernel void to_gray(__global unsigned char* input,__global unsigned char* output,const int width,const int height)
{int id = get_global_id(0);  // 每个线程处理一个像素if (id < width * height) {int offset = id * 3;  // RGB 分量unsigned char r = input[offset];unsigned char g = input[offset + 1];unsigned char b = input[offset + 2];// 灰度公式output[id] = (unsigned char)(0.299f * r + 0.587f * g + 0.114f * b);}
}
)";
void TestOpenCl::runTests() {const int width = 1920;         // 视频宽度const int height = 1080;        // 视频高度const int fps = 30;             // 帧率const int duration = 60;        // 视频持续时间(秒)const int frameCount = fps * duration; // 总帧数// 模拟视频帧数据:随机生成每帧的 RGB 数据std::vector<unsigned char> inputFrame(width * height * 3);std::vector<unsigned char> outputFrame(width * height);std::random_device rd;std::mt19937 gen(rd());std::uniform_int_distribution<> dis(0, 255);// 初始化 OpenCLcl_int err;cl_platform_id platform;clGetPlatformIDs(1, &platform, nullptr);cl_device_id device;clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, nullptr);cl_context context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err);cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err);cl_program program = clCreateProgramWithSource(context, 1, &kernelSource, nullptr, &err);clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);cl_kernel kernel = clCreateKernel(program, "to_gray", &err);// 创建 OpenCL 缓冲区cl_mem inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, inputFrame.size(), nullptr, &err);cl_mem outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, outputFrame.size(), nullptr, &err);// 开始处理auto startTime = std::chrono::high_resolution_clock::now();for (int frame = 0; frame < frameCount; ++frame) {// 随机生成模拟的 RGB 数据for (auto& pixel : inputFrame) {pixel = dis(gen);}// 写入数据到 OpenCL 缓冲区clEnqueueWriteBuffer(queue, inputBuffer, CL_TRUE, 0, inputFrame.size(), inputFrame.data(), 0, nullptr, nullptr);// 设置内核参数clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputBuffer);clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputBuffer);clSetKernelArg(kernel, 2, sizeof(int), &width);clSetKernelArg(kernel, 3, sizeof(int), &height);// 定义工作区大小size_t globalSize = width * height;// 执行内核clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &globalSize, nullptr, 0, nullptr, nullptr);// 读取处理后的灰度数据clEnqueueReadBuffer(queue, outputBuffer, CL_TRUE, 0, outputFrame.size(), outputFrame.data(), 0, nullptr, nullptr);// 打印进度if (frame % 30 == 0) {std::cout << "Processed frame: " << frame + 1 << "/" << frameCount << std::endl;}}auto endTime = std::chrono::high_resolution_clock::now();double elapsedTime = std::chrono::duration<double>(endTime - startTime).count();// 打印处理时间std::cout << "Processed " << frameCount << " frames in " << elapsedTime << " seconds." << std::endl;std::cout << "Average time per frame: " << (elapsedTime / frameCount) << " seconds." << std::endl;// 释放 OpenCL 资源clReleaseMemObject(inputBuffer);clReleaseMemObject(outputBuffer);clReleaseKernel(kernel);clReleaseProgram(program);clReleaseCommandQueue(queue);clReleaseContext(context);
}

内存对齐的SIMD指令集

cmake添加

# 检测SIMD支持并添加编译选项
include(CheckCXXCompilerFlag)check_cxx_compiler_flag("-mavx" COMPILER_SUPPORTS_AVX)
check_cxx_compiler_flag("-mavx2" COMPILER_SUPPORTS_AVX2)if(COMPILER_SUPPORTS_AVX2)target_compile_options(testOpenGl PRIVATE -mavx2)
elseif (COMPILER_SUPPORTS_AVX)target_compile_options(testOpenGl PRIVATE -mavx)
else ()message(FATAL_ERROR "AVX or AVX2 is not supported by compiler")
endif ()
//
// Created by lai on 2025/1/17.
//#include "TestSIMD.h"#include <iostream>
#include <vector>
#include <random>
#include <chrono>
#include <immintrin.h> // SIMD 指令集
#include <cstdlib>  // 用于posix_memalignvoid to_gray_simd(const unsigned char* input, unsigned char* output, int width, int height) {const int pixelCount = width * height;const __m256 scale_r = _mm256_set1_ps(0.299f); // 红色通道的权重const __m256 scale_g = _mm256_set1_ps(0.587f); // 绿色通道的权重const __m256 scale_b = _mm256_set1_ps(0.114f); // 蓝色通道的权重int i = 0;for (; i <= pixelCount - 8; i += 8) {// 加载 8 组 RGB 像素__m256i pixel_r = _mm256_loadu_si256((__m256i*)&input[i * 3]);  // 确保内存对齐__m256i pixel_g = _mm256_loadu_si256((__m256i*)&input[i * 3 + 1]);__m256i pixel_b = _mm256_loadu_si256((__m256i*)&input[i * 3 + 2]);// 转换为浮点数以便计算__m256 r_f = _mm256_cvtepi32_ps(pixel_r);__m256 g_f = _mm256_cvtepi32_ps(pixel_g);__m256 b_f = _mm256_cvtepi32_ps(pixel_b);// 灰度转换公式__m256 gray_f = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(r_f, scale_r), _mm256_mul_ps(g_f, scale_g)),_mm256_mul_ps(b_f, scale_b));// 转回整数__m256i gray_i = _mm256_cvtps_epi32(gray_f);// 存储结果_mm256_storeu_si256((__m256i*)&output[i], gray_i);}// 处理剩余像素(非对齐部分)for (; i < pixelCount; ++i) {int offset = i * 3;unsigned char r = input[offset];unsigned char g = input[offset + 1];unsigned char b = input[offset + 2];output[i] = static_cast<unsigned char>(0.299f * r + 0.587f * g + 0.114f * b);}
}void TestSIMD::runTest() {const int width = 1920;         // 视频宽度const int height = 1080;        // 视频高度const int fps = 30;             // 帧率const int duration = 60;        // 视频持续时间(秒)const int frameCount = fps * duration; // 总帧数size_t size = width * height * 3 * sizeof(unsigned char);// 模拟视频帧数据:随机生成每帧的 RGB 数据// 使用posix_memalign分配对齐内存unsigned char* inputFrame;unsigned char* outputFrame;int alignment = 32; // 使用32字节对齐int resultInput = posix_memalign((void**)&inputFrame, alignment, size);int resultOutput = posix_memalign((void**)&outputFrame, alignment, size);if (resultInput != 0 || resultOutput != 0) {std::cerr << "memory allocation failed" << std::endl;return;}std::random_device rd;std::mt19937 gen(rd());std::uniform_int_distribution<> dis(0, 255);// 开始处理auto startTime = std::chrono::high_resolution_clock::now();for (int frame = 0; frame < frameCount; ++frame) {// 随机生成模拟的 RGB 数据for (int i = 0; i < width * height * 3; ++i) {inputFrame[i] = dis(gen);}// 使用 SIMD 转换灰度to_gray_simd(inputFrame, outputFrame, width, height);// 打印进度if (frame % 30 == 0) {std::cout << "Processed frame: " << frame + 1 << "/" << frameCount << std::endl;}}auto endTime = std::chrono::high_resolution_clock::now();double elapsedTime = std::chrono::duration<double>(endTime - startTime).count();// 打印处理时间std::cout << "Processed " << frameCount << " frames in " << elapsedTime << " seconds." << std::endl;std::cout << "Average time per frame: " << (elapsedTime / frameCount) << " seconds." << std::endl;
}

结论

C++
Processed 1800 frames in 251.789 seconds.
Average time per frame: 0.139883 seconds.C++ thread
Processed 1800 frames in 229.571 seconds.
Average time per frame: 0.12754 seconds.CPU版本POCL的OPENCL
Processed 1800 frames in 233.25 seconds.
Average time per frame: 0.129583 seconds.SIMD 内存对齐以后
Processed 1800 frames in 191.015 seconds.
Average time per frame: 0.106119 seconds.

SIMD的性能明显由于其他几项,但是还需要测试GPU版本的OPencl和多线程指令集优化对性能的提升


http://www.ppmy.cn/devtools/152689.html

相关文章

【Linux入门】2w字详解yum、vim、gcc/g++、gdb、makefile以及进度条小程序

文章目录 Ⅰ. Linux 软件包管理器 yum一、什么是软件包&#xff1f;二、查找软件包三、安装与卸载软件包 拓展&#xff1a;lrzsz简介拓&#xff1a;配置 yum 源路径的方法Ⅱ. Linux开发工具vim编辑器一、vim 的基本概念二、vim 的基本操作三、vim 命令模式的操作四、vim 底行模…

【大厂面试题】软件测试面试题整理(附答案)

以下面试题为最近大厂面试整理的内容&#xff0c;可供应届生参考。 目录 1. 实习期间用python写过哪些东西&#xff1f; 2. Opencv如何使&#xff1f; 3. 用Python写过什么&#xff0c;在大学期间是必修语言吗&#xff0c;当时考了多少分&#xff1f; 4. Python学下来比较困…

Visual Studio2019调试DLL

1、编写好DLL代码之后&#xff0c;对DLL项目的属性进行设置&#xff0c;选择待注入的DLL&#xff0c;如下图所示 2、生成DLL文件 3、将DLL设置为启动项目之后&#xff0c;按F5启动调试。弹出选择注入的exe的界面之后&#xff0c;使用代码注入器注入步骤2中生成的dll&#xff0…

网络安全技术深度解析与实践案例

网络安全技术深度解析与实践案例 随着信息技术的飞速发展,网络已成为现代社会不可或缺的一部分。然而,网络空间的开放性也带来了各种安全隐患。为了保障网络和数据的安全,网络安全技术显得尤为重要。本文将深入探讨网络安全技术的核心要素,并通过Python代码示例展示其具体…

Text2Sql:开启自然语言与数据库交互新时代(30/30)

一、Text2Sql 简介 在当今数字化时代&#xff0c;数据处理和分析的需求日益增长。对于众多非技术专业人员而言&#xff0c;数据库操作的复杂性常常成为他们获取所需信息的障碍。而 Text2Sql 技术的出现&#xff0c;为这一问题提供了有效的解决方案。 Text2Sql&#xff0c;即文…

BUUCTF_Web([GYCTF2020]Ezsqli)

1.输入1 &#xff0c;正常回显。 2.输入1 &#xff0c;报错false&#xff0c;为字符型注入&#xff0c;单引号闭合。 原因&#xff1a; https://mp.csdn.net/mp_blog/creation/editor/145170456 3.尝试查询字段&#xff0c;回显位置&#xff0c;数据库&#xff0c;都是这个。…

Linux容器(初学了解)

目录 一、容器 1.1、容器技术 1.2、容器和虚拟机之间的差异 1.3、Rootless 和 Rootful 容器 1.4、设计基于容器的架构 1.5、容器管理工具 1.6、容器镜像和注册表 1.7、配置容器注册表 1.8、使用容器文件构建容器镜像 二、部署容器 2.1、Podman 实用程序 2.2、安装容…

如何解决 Apache Shutdown Unexpectedly 错误 ?

遇到 “Apache shutdown unexpectedly”错误可能会令人沮丧。当 Apache 由于冲突或配置问题而无法启动时&#xff0c;通常会发生此错误。此错误经常出现在像 XAMPP 控制面板这样的开发工具中。 Step 1: 检查端口冲突 Identify Port Usage Apache 通常在端口 80&#xff08;H…