C# OnnxRuntime Gaze-LLE 凝视目标估计,通过利用冻结的DINOv2编码器的特征来简化注视目标估计,预测一个人在场景中看的位置。

server/2024/12/19 20:54:00/

目录

说明

效果

​编辑模型信息

det_face.onnx

gazelle_dinov2_vitl14_inout_1x3x448x448_1xNx4.onnx

项目

代码

下载

参考


说明

github地址:https://github.com/fkryan/gazelle

This is the official implementation for Gaze-LLE, a transformer approach for estimating gaze targets that leverages the power of pretrained visual foundation models. Gaze-LLE provides a streamlined gaze architecture that learns only a lightweight gaze decoder on top of a frozen, pretrained visual encoder (DINOv2). Gaze-LLE learns 1-2 orders of magnitude fewer parameters than prior works and doesn't require any extra input modalities like depth and pose!

效果

模型信息

det_face.onnx

Model Properties
-------------------------
---------------------------------------------------------------

Inputs
-------------------------
name:input.1
tensor:Float[1, 3, -1, -1]
---------------------------------------------------------------

Outputs
-------------------------
name:448
tensor:Float[12800, 1]
name:471
tensor:Float[3200, 1]
name:494
tensor:Float[800, 1]
name:451
tensor:Float[12800, 4]
name:474
tensor:Float[3200, 4]
name:497
tensor:Float[800, 4]
name:454
tensor:Float[12800, 10]
name:477
tensor:Float[3200, 10]
name:500
tensor:Float[800, 10]
---------------------------------------------------------------

gazelle_dinov2_vitl14_inout_1x3x448x448_1xNx4.onnx

Model Properties
-------------------------
---------------------------------------------------------------

Inputs
-------------------------
name:image_bgr
tensor:Float[1, 3, 448, 448]
name:bboxes_x1y1x2y2
tensor:Float[1, -1, 4]
---------------------------------------------------------------

Outputs
-------------------------
name:heatmap
tensor:Float[-1, 64, 64]
name:inout
tensor:Float[-1]
---------------------------------------------------------------

项目

代码

using OpenCvSharp;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.Windows.Forms;

namespace Onnx_Demo
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
        string image_path = "";
        DateTime dt1 = DateTime.Now;
        DateTime dt2 = DateTime.Now;

        Mat image;
        Mat result_image;

        FaceDet face_det;
        GazeLLE gazelle;

        private void button1_Click(object sender, EventArgs e)
        {
            OpenFileDialog ofd = new OpenFileDialog();
            ofd.Filter = fileFilter;
            if (ofd.ShowDialog() != DialogResult.OK) return;
            pictureBox1.Image = null;
            image_path = ofd.FileName;
            pictureBox1.Image = new Bitmap(image_path);
            textBox1.Text = "";
            image = new Mat(image_path);
            pictureBox2.Image = null;
        }

        private void button2_Click(object sender, EventArgs e)
        {
            if (image_path == "")
            {
                return;
            }

            button2.Enabled = false;
            Application.DoEvents();

            image = new Mat(image_path);
            result_image = image.Clone();

            dt1 = DateTime.Now;
            List<Bbox> head_boxes = face_det.Detect(image);

            foreach (var item in head_boxes)
            {
                Rect rect = Rect.FromLTRB((int)item.xmin, (int)item.ymin, (int)item.xmax, (int)item.ymax);
                Cv2.Rectangle(result_image, rect, Scalar.Red);
            }

            List<Mat> resized_heatmaps = gazelle.Predict(image, head_boxes);
            dt2 = DateTime.Now;

            DrawGaze(result_image, head_boxes, resized_heatmaps);

            pictureBox2.Image = new Bitmap(result_image.ToMemoryStream());
            textBox1.Text = "推理耗时:" + (dt2 - dt1).TotalMilliseconds + "ms";

            button2.Enabled = true;
        }

        void DrawGaze(Mat frame, List<Bbox> head_boxes, List<Mat> heatmaps, float thr = 0.0f)
        {
            int num_box = head_boxes.Count;
            for (int i = 0; i < num_box; i++)
            {
                double max_score;
                OpenCvSharp.Point classIdPoint;
                double minVal;
                OpenCvSharp.Point minLoc;
                Cv2.MinMaxLoc(heatmaps[i], out minVal, out max_score, out minLoc, out classIdPoint);
                int cx = classIdPoint.X;
                int cy = classIdPoint.Y;
                if (max_score >= thr)
                {
                    int head_cx = (int)((head_boxes[i].xmin + head_boxes[i].xmax) * 0.5);
                    int head_cy = (int)((head_boxes[i].ymin + head_boxes[i].ymax) * 0.5);
                   
                    Cv2.ArrowedLine(frame, new OpenCvSharp.Point(head_cx, head_cy), new OpenCvSharp.Point(cx, cy), new Scalar(0, 255, 0), 2, LineTypes.AntiAlias);
                }
            }
        }

        private void Form1_Load(object sender, EventArgs e)
        {
            face_det = new FaceDet("model\\det_face.onnx");
            gazelle = new GazeLLE("model\\gazelle_dinov2_vitl14_inout_1x3x448x448_1xNx4.onnx");

            image_path = "test_img\\1.jpg";
            pictureBox1.Image = new Bitmap(image_path);
        }

        private void button3_Click(object sender, EventArgs e)
        {
            if (pictureBox2.Image == null)
            {
                return;
            }
            Bitmap output = new Bitmap(pictureBox2.Image);
            SaveFileDialog sdf = new SaveFileDialog();
            sdf.Title = "保存";
            sdf.Filter = "Images (*.jpg)|*.jpg|Images (*.png)|*.png|Images (*.bmp)|*.bmp|Images (*.emf)|*.emf|Images (*.exif)|*.exif|Images (*.gif)|*.gif|Images (*.ico)|*.ico|Images (*.tiff)|*.tiff|Images (*.wmf)|*.wmf";
            if (sdf.ShowDialog() == DialogResult.OK)
            {
                switch (sdf.FilterIndex)
                {
                    case 1:
                        {
                            output.Save(sdf.FileName, ImageFormat.Jpeg);
                            break;
                        }
                    case 2:
                        {
                            output.Save(sdf.FileName, ImageFormat.Png);
                            break;
                        }
                    case 3:
                        {
                            output.Save(sdf.FileName, ImageFormat.Bmp);
                            break;
                        }
                    case 4:
                        {
                            output.Save(sdf.FileName, ImageFormat.Emf);
                            break;
                        }
                    case 5:
                        {
                            output.Save(sdf.FileName, ImageFormat.Exif);
                            break;
                        }
                    case 6:
                        {
                            output.Save(sdf.FileName, ImageFormat.Gif);
                            break;
                        }
                    case 7:
                        {
                            output.Save(sdf.FileName, ImageFormat.Icon);
                            break;
                        }

                    case 8:
                        {
                            output.Save(sdf.FileName, ImageFormat.Tiff);
                            break;
                        }
                    case 9:
                        {
                            output.Save(sdf.FileName, ImageFormat.Wmf);
                            break;
                        }
                }
                MessageBox.Show("保存成功,位置:" + sdf.FileName);
            }
        }
    }
}

using OpenCvSharp;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.Windows.Forms;namespace Onnx_Demo
{public partial class Form1 : Form{public Form1(){InitializeComponent();}string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";string image_path = "";DateTime dt1 = DateTime.Now;DateTime dt2 = DateTime.Now;Mat image;Mat result_image;FaceDet face_det;GazeLLE gazelle;private void button1_Click(object sender, EventArgs e){OpenFileDialog ofd = new OpenFileDialog();ofd.Filter = fileFilter;if (ofd.ShowDialog() != DialogResult.OK) return;pictureBox1.Image = null;image_path = ofd.FileName;pictureBox1.Image = new Bitmap(image_path);textBox1.Text = "";image = new Mat(image_path);pictureBox2.Image = null;}private void button2_Click(object sender, EventArgs e){if (image_path == ""){return;}button2.Enabled = false;Application.DoEvents();image = new Mat(image_path);result_image = image.Clone();dt1 = DateTime.Now;List<Bbox> head_boxes = face_det.Detect(image);foreach (var item in head_boxes){Rect rect = Rect.FromLTRB((int)item.xmin, (int)item.ymin, (int)item.xmax, (int)item.ymax);Cv2.Rectangle(result_image, rect, Scalar.Red);}List<Mat> resized_heatmaps = gazelle.Predict(image, head_boxes);dt2 = DateTime.Now;DrawGaze(result_image, head_boxes, resized_heatmaps);pictureBox2.Image = new Bitmap(result_image.ToMemoryStream());textBox1.Text = "推理耗时:" + (dt2 - dt1).TotalMilliseconds + "ms";button2.Enabled = true;}void DrawGaze(Mat frame, List<Bbox> head_boxes, List<Mat> heatmaps, float thr = 0.0f){int num_box = head_boxes.Count;for (int i = 0; i < num_box; i++){double max_score;OpenCvSharp.Point classIdPoint;double minVal;OpenCvSharp.Point minLoc;Cv2.MinMaxLoc(heatmaps[i], out minVal, out max_score, out minLoc, out classIdPoint);int cx = classIdPoint.X;int cy = classIdPoint.Y;if (max_score >= thr){int head_cx = (int)((head_boxes[i].xmin + head_boxes[i].xmax) * 0.5);int head_cy = (int)((head_boxes[i].ymin + head_boxes[i].ymax) * 0.5);Cv2.ArrowedLine(frame, new OpenCvSharp.Point(head_cx, head_cy), new OpenCvSharp.Point(cx, cy), new Scalar(0, 255, 0), 2, LineTypes.AntiAlias);}}}private void Form1_Load(object sender, EventArgs e){face_det = new FaceDet("model\\det_face.onnx");gazelle = new GazeLLE("model\\gazelle_dinov2_vitl14_inout_1x3x448x448_1xNx4.onnx");image_path = "test_img\\1.jpg";pictureBox1.Image = new Bitmap(image_path);}private void button3_Click(object sender, EventArgs e){if (pictureBox2.Image == null){return;}Bitmap output = new Bitmap(pictureBox2.Image);SaveFileDialog sdf = new SaveFileDialog();sdf.Title = "保存";sdf.Filter = "Images (*.jpg)|*.jpg|Images (*.png)|*.png|Images (*.bmp)|*.bmp|Images (*.emf)|*.emf|Images (*.exif)|*.exif|Images (*.gif)|*.gif|Images (*.ico)|*.ico|Images (*.tiff)|*.tiff|Images (*.wmf)|*.wmf";if (sdf.ShowDialog() == DialogResult.OK){switch (sdf.FilterIndex){case 1:{output.Save(sdf.FileName, ImageFormat.Jpeg);break;}case 2:{output.Save(sdf.FileName, ImageFormat.Png);break;}case 3:{output.Save(sdf.FileName, ImageFormat.Bmp);break;}case 4:{output.Save(sdf.FileName, ImageFormat.Emf);break;}case 5:{output.Save(sdf.FileName, ImageFormat.Exif);break;}case 6:{output.Save(sdf.FileName, ImageFormat.Gif);break;}case 7:{output.Save(sdf.FileName, ImageFormat.Icon);break;}case 8:{output.Save(sdf.FileName, ImageFormat.Tiff);break;}case 9:{output.Save(sdf.FileName, ImageFormat.Wmf);break;}}MessageBox.Show("保存成功,位置:" + sdf.FileName);}}}
}

下载

源码下载

参考

https://github.com/hpc203/Gaze-LLE-onnxrun


http://www.ppmy.cn/server/151536.html

相关文章

C#核心(17)密封类

前言 我们先前已经学完了继承中的大部分知识&#xff0c;今天我们就来学习最后一个知识点&#xff0c;密封类。 其实我不喜欢叫他密封类&#xff0c;我比较喜欢叫他结扎。 在Unity中&#xff0c;C#的密封类&#xff08;sealed class&#xff09;是一个重要的概念&#xff0c…

2019陕西ICPC-Grid with Arrows

Grid with Arrows 题意 一个总规模为n m 的矩阵&#xff0c;矩阵上的每个位置有其下一位置的信息&#xff0c;询问是否存在一种解法从某一点出发&#xff0c;使得整个矩阵的每个位置都被访问到&#xff0c;如果越界或者遇到重复访问位置的解法被认为失败。 解决思路 求是否…

射频测试入门学习(三)——程控仪器是怎样和电脑连接通信的

目录 一、程控仪器需要哪些条件 二、可程控仪器 三、专业的仪器通信软件、驱动 四、编程语言 五、电脑控制仪器条件汇总 六、仪器指令查询 七、结语 一、程控仪器需要哪些条件 1、需要具备硬件条件(可程控的仪器、个人计算机(PC)) 2、专业的仪器通信软件、驱动 3、…

Git-分支(branch)常用命令

分支 我们在做项目开发的时候&#xff0c;无论是软件项目还是其他机械工程项目&#xff0c;我们为了提高效率以及合理的节省时间等等原因&#xff0c;现在都不再是线性进行&#xff0c;而是将一个项目抽离出诸进行线&#xff0c;每一条线在git中我们就叫做分支&#xff0c;bran…

CSS Grid 布局:属性及使用详解

CSS Grid 布局&#xff1a;属性及使用详解 一、CSS Grid 布局的基础概念二、主要的 CSS Grid 属性1、display: grid / display: inline-grid声明 Grid 容器2、grid-template-columns / grid-template-rowsGrid 容器中列和行的尺寸3、 grid-template-areas命名布局区域4、gap/ g…

自动驾驶控制与规划——Project 2: 车辆横向控制

目录 零、任务介绍一、环境配置二、算法三、代码实现四、效果展示 零、任务介绍 补全src/ros-bridge/carla_shenlan_projects/carla_shenlan_stanley_pid_controller/src/stanley_controller.cpp中的TODO部分。 一、环境配置 上一次作业中没有配置docker使用gpu&#xff0c;…

马尔可夫决策过程

目录标题 一、简单介绍什么是马尔可夫决策过程二、马尔可夫过程2.1 随机过程2.2 马尔可夫的性质2.2 马尔可夫过程 三、马尔可夫奖励过程3.1 回报3.2 价值函数 四、马尔可夫决策过程4.1 策略4.2 状态价值函数4.3 动作价值函数4.4 贝尔曼期望方程 五、蒙特卡洛方法六、占用度量七…

解决 Ubuntu 20.04 上编译 OpenCV 3.2 时的类型不匹配错误

解决 Ubuntu 20.04 上编译 OpenCV 3.2 时的类型不匹配错误 make[2]: *** [modules/python3/CMakeFiles/opencv_python3.dir/build.make:329&#xff1a;modules/python3/CMakeFiles/opencv_python3.dir/__/src2/cv2.cpp.o] 错误 1 make[1]: *** [CMakeFiles/Makefile2:11856&a…