PaddleOCR 截图自动文字识别

news/2025/2/4 19:43:18/

春节假期在家无聊,撸了三个小工具:PC截图+编辑/PC录屏(用于meeting录屏)/PC截屏文字识别。因为感觉这三个小工具是工作中常常需要用到的,github上也有很多开源的,不过总有点或多或少的小问题,不利于自己的使用。脚本的编写尽量减少对三方库的使用。

已全部完成,这是其中的一个,后续将三个集成在在一个工具中。

python">import tkinter as tk
from tkinter import ttk, messagebox, font, filedialog
from PIL import Image, ImageTk, ImageGrab
import sys
import tempfile
import threading
from pathlib import Path
import ctypes
import logging.handlers
from datetime import datetime# 最小化控制台窗口
def minimize_console():ctypes.windll.user32.ShowWindow(ctypes.windll.kernel32.GetConsoleWindow(), 6)minimize_console()  # 调用最小化函数# 获取脚本所在目录路径
def get_script_directory():return Path(__file__).parent# 配置日志文件路径和日志级别
log_file_path = get_script_directory() / 'ocr_errors.log'
logging.basicConfig(filename=log_file_path,level=logging.DEBUG,format='%(asctime)s - %(levelname)s - %(message)s'
)
# 添加日志轮转
handler = logging.handlers.RotatingFileHandler(log_file_path, maxBytes=1024*1024*5, backupCount=3)
logger = logging.getLogger()
logger.addHandler(handler)# 保存临时图片到磁盘
def save_temp_image(image, suffix='.png'):with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:image.save(temp_file.name)return Path(temp_file.name)class OCRApp:def __init__(self):try:self.root = tk.Tk()self.root.withdraw()# 禁用最大化按钮self.root.resizable(False, True)self.screenshot = Noneself.ocr_model = None  # 延迟初始化self.recognized_text = ""self.main_frame = Noneself.load_win = None  # 初始化 load_win 为 None# 启动后台线程加载OCR模型以优化性能,使run脚本后能马上进入截图状态threading.Thread(target=self.load_ocr_model, daemon=True).start()# 立即开始截图选择self.start_selection()except Exception as e:self.show_crash_message(f"程序启动失败: {str(e)}")sys.exit(1)def load_ocr_model(self):from paddleocr import PaddleOCRtry:self.ocr_model = PaddleOCR(use_angle_cls=True, show_log=False, lang='ch')except Exception as e:logger.error(f"OCR模型加载失败: {str(e)}")# 开始截图选择区域def start_selection(self):self.selection_win = tk.Toplevel()self.selection_win.attributes("-fullscreen", True)self.selection_win.attributes("-alpha", 0.3)# 绑定整个窗口的 ESC 键事件self.selection_win.bind("<Escape>", self.on_escape)self.canvas = tk.Canvas(self.selection_win,cursor="cross",bg="gray30",highlightthickness=0)self.canvas.pack(fill=tk.BOTH, expand=True)self.start_x = self.start_y = 0self.rect_id = Noneself.crosshair_ids = []self.canvas.bind("<Button-1>", self.on_mouse_down)self.canvas.bind("<B1-Motion>", self.on_mouse_drag)self.canvas.bind("<ButtonRelease-1>", self.on_mouse_up)self.canvas.bind("<Motion>", self.on_mouse_move)self.escape_label = tk.Label(self.selection_win,text="按ESC键退出截图",fg="yellow",bg="gray20",font=("Helvetica", 12, "bold"))self.escape_label.place(x=10, y=10)self.update_crosshair(0, 0)# 鼠标按下事件处理def on_mouse_down(self, event):self.start_x = event.xself.start_y = event.yself.clear_crosshair()if self.rect_id:self.canvas.delete(self.rect_id)self.rect_id = None# 鼠标拖动事件处理def on_mouse_drag(self, event):current_x = event.xcurrent_y = event.yif self.rect_id:self.canvas.coords(self.rect_id, self.start_x, self.start_y, current_x, current_y)else:self.rect_id = self.canvas.create_rectangle(self.start_x, self.start_y,current_x, current_y,outline="blue", width=2, fill="gray75", tags="rect")# 鼠标释放事件处理def on_mouse_up(self, event):try:x1 = min(self.start_x, event.x)y1 = min(self.start_y, event.y)x2 = max(self.start_x, event.x)y2 = max(self.start_y, event.y)if (x2 - x1) < 10 or (y2 - y1) < 10:raise ValueError("选区过小,请选择更大的区域")if (x2 - x1) > self.canvas.winfo_width() or (y2 - y1) > self.canvas.winfo_height():raise ValueError("选区过大,请选择更小的区域")self.screenshot = ImageGrab.grab(bbox=(x1, y1, x2, y2))self.selection_win.destroy()self.initialize_ocr_and_process()except Exception as e:logger.error(f"截图错误: {str(e)}")messagebox.showerror("截图错误", str(e))self.restart_selection()# 初始化OCR引擎并处理截图def initialize_ocr_and_process(self):try:if self.ocr_model is None:self.load_win = self.show_loading("OCR模型正在加载中,请稍后...")self.root.after(100, self.check_ocr_model)  # 每100毫秒检查一次else:self.process_ocr()self.setup_main_ui()self.root.deiconify()except Exception as e:logger.error(f"OCR初始化失败: {str(e)}")if self.load_win:self.load_win.destroy()self.handle_ocr_init_error(str(e))def check_ocr_model(self):if self.ocr_model is None:self.root.after(100, self.check_ocr_model)  # 每100毫秒检查一次else:if self.load_win:self.load_win.destroy()self.process_ocr()self.setup_main_ui()self.root.deiconify()# 执行OCR处理def process_ocr(self):try:temp_image_path = save_temp_image(self.screenshot)result = self.ocr_model.ocr(str(temp_image_path), cls=True)temp_image_path.unlink()  # 确保临时文件被删除# 后处理识别结果,合并同一行的文字merged_text = self.merge_lines(result[0])self.recognized_text = merged_textexcept Exception as e:logger.error(f"OCR处理失败: {str(e)}")messagebox.showerror("识别错误", f"OCR处理失败: {str(e)}")self.restart_selection()# 合并同一行的文字def merge_lines(self, ocr_result):merged_text = []current_line = []current_y1 = Nonecurrent_y2 = Noneline_threshold = 5  # 设置行间距阈值,可以根据需要调整for line in ocr_result:# 提取坐标点x1, y1 = line[0][0]  # 第一个坐标点x2, y2 = line[0][2]  # 第三个坐标点text = line[1][0]  # 提取文本if current_y1 is None or current_y2 is None:current_y1 = y1current_y2 = y2current_line.append(text)elif abs(y1 - current_y1) <= line_threshold and abs(y2 - current_y2) <= line_threshold:current_line.append(text)else:merged_text.append(" ".join(current_line))current_line = [text]current_y1 = y1current_y2 = y2if current_line:merged_text.append(" ".join(current_line))return "\n".join(merged_text)# 设置主界面UIdef setup_main_ui(self):if self.main_frame is None:self.main_frame = ttk.Frame(self.root, padding=20)self.main_frame.grid(row=0, column=0, sticky="nsew")self.root.grid_rowconfigure(0, weight=1)self.root.grid_columnconfigure(0, weight=1)# 使用 PanedWindow 来分割图片框和文本框self.paned_window = ttk.PanedWindow(self.main_frame, orient=tk.VERTICAL)self.paned_window.grid(row=0, column=0, sticky="nsew")# 创建一个 Frame 来包含图片和滚动条self.image_frame = ttk.Frame(self.paned_window)self.image_frame.pack(fill=tk.BOTH, expand=True)# 使用 Canvas 来显示图片并添加滚动条self.image_canvas = tk.Canvas(self.image_frame, highlightbackground=self.root.cget("bg"), highlightthickness=0)self.image_canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)self.image_scrollbar = ttk.Scrollbar(self.image_frame, orient=tk.VERTICAL, command=self.image_canvas.yview)self.image_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)self.image_canvas.config(yscrollcommand=self.image_scrollbar.set)self.image_canvas.bind("<Configure>", self.on_canvas_configure)self.image_container = ttk.Frame(self.image_canvas)self.image_container_id = self.image_canvas.create_window((0, 0), window=self.image_container, anchor="nw")self.img_label = ttk.Label(self.image_container)self.img_label.pack(fill=tk.BOTH, expand=True)# 定义字体custom_font = font.Font(family="Microsoft YaHei", size=9)self.text_area = tk.Text(self.paned_window,wrap=tk.WORD,font=custom_font,  # 设置字体height=15  # 初始高度设置为15行)self.text_area.pack(fill=tk.BOTH, expand=True)self.paned_window.add(self.image_frame)self.paned_window.add(self.text_area)btn_frame = ttk.Frame(self.main_frame)btn_frame.grid(row=1, column=0, sticky="ew", pady=10)# 确保按钮行不会被压缩self.main_frame.grid_rowconfigure(0, weight=1)self.main_frame.grid_rowconfigure(1, weight=0)ttk.Button(btn_frame,text="重新选择",command=self.restart_selection).pack(side=tk.LEFT, padx=5)ttk.Button(btn_frame,text="复制文本",command=self.copy_result).pack(side=tk.LEFT, padx=5)ttk.Button(btn_frame,text="保存图片",command=self.save_image).pack(side=tk.LEFT, padx=5)ttk.Button(btn_frame,text="退出",command=self.safe_exit).pack(side=tk.RIGHT, padx=5)# 设置窗口标题self.root.title("文字识别@PDM3")self.update_image_display()self.text_area.delete(1.0, tk.END)self.text_area.insert(tk.END, self.recognized_text.strip())self.update_text_area_height()  # 更新文本框高度# 设置窗口总是最顶层self.root.attributes('-topmost', True)# 更新图片显示def update_image_display(self):if self.screenshot:photo = ImageTk.PhotoImage(self.screenshot)self.img_label.config(image=photo)self.img_label.image = photo# 获取图片的实际大小img_width, img_height = self.screenshot.size# 获取屏幕高度screen_height = self.root.winfo_screenheight()# 计算图片框的最大高度max_image_height = screen_height // 2# 设置 Canvas 的滚动区域self.image_canvas.config(scrollregion=(0, 0, img_width, img_height))# 调整 image_canvas 的高度if img_height > max_image_height:self.image_canvas.config(height=max_image_height)else:self.image_canvas.config(height=img_height)# 配置 Canvas 大小def on_canvas_configure(self, event):# 更新 Canvas 的滚动区域self.image_canvas.config(scrollregion=self.image_canvas.bbox("all"))# 显示加载中的窗口def show_loading(self, message):load_win = tk.Toplevel()load_win.title("请稍候")frame = ttk.Frame(load_win, padding=20)frame.pack()ttk.Label(frame, text=message).pack(pady=10)progress = ttk.Progressbar(frame, mode='indeterminate')progress.pack(pady=5)progress.start()return load_win# 处理OCR初始化错误def handle_ocr_init_error(self, error_msg):choice = messagebox.askretrycancel("OCR初始化失败",f"{error_msg}\n\n是否重试?",icon='error')if choice:threading.Thread(target=self.initialize_ocr_and_process).start()else:self.safe_exit()# 重新开始截图选择def restart_selection(self):if self.root.winfo_exists():self.root.withdraw()self.screenshot = Noneself.recognized_text = ""self.clear_ui()self.start_selection()# 清理UI界面def clear_ui(self):if hasattr(self, 'img_label'):self.img_label.config(image='')self.img_label.image = Noneif hasattr(self, 'text_area'):self.text_area.delete(1.0, tk.END)# 复制识别结果到剪贴板def copy_result(self):self.root.clipboard_clear()self.root.clipboard_append(self.recognized_text)messagebox.showinfo("成功", "已复制到剪贴板")# 安全退出程序def safe_exit(self):if self.root.winfo_exists():self.root.destroy()sys.exit(0)# 显示程序崩溃错误信息def show_crash_message(self, message):crash_win = tk.Tk()crash_win.withdraw()messagebox.showerror("致命错误", message)crash_win.destroy()# 按下ESC键时退出程序def on_escape(self, event):self.selection_win.destroy()self.safe_exit()# 鼠标移动事件处理def on_mouse_move(self, event):current_x = event.xcurrent_y = event.yself.update_crosshair(current_x, current_y)# 更新十字线位置def update_crosshair(self, x, y):self.clear_crosshair()self.crosshair_ids.append(self.canvas.create_line(0, y, self.canvas.winfo_width(), y,tags="crosshair", fill="yellow", width=2))self.crosshair_ids.append(self.canvas.create_line(x, 0, x, self.canvas.winfo_height(),tags="crosshair", fill="yellow", width=2))# 清除十字线def clear_crosshair(self):for crosshair_id in self.crosshair_ids:self.canvas.delete(crosshair_id)self.crosshair_ids = []# 保存图片def save_image(self):if self.screenshot:# 获取用户桌面路径desktop_path = Path.home() / 'Desktop'# 生成当前日期和时间的字符串current_datetime = datetime.now().strftime("%Y%m%d_%H%M%S")default_filename = f"screenshot_{current_datetime}.png"file_path = filedialog.asksaveasfilename(initialdir=desktop_path,  # 设置初始目录为用户桌面initialfile=default_filename,  # 设置默认文件名defaultextension=".png",filetypes=[("PNG files", "*.png"), ("JPEG files", "*.jpg"), ("All files", "*.*")])if file_path:self.screenshot.save(file_path)messagebox.showinfo("保存成功", f"图片已保存到 {file_path}")# 更新文本框高度def update_text_area_height(self):# 计算当前文本行数line_count = int(self.text_area.index('end-1c').split('.')[0])if line_count > 15:self.text_area.config(height=15)  # 如果行数超过15行,固定高度为15行else:self.text_area.config(height=line_count)  # 否则根据内容调整高度# 运行主循环def run(self):self.root.mainloop()if __name__ == "__main__":app = OCRApp()app.run()


http://www.ppmy.cn/news/1569309.html

相关文章

6 [新一代Github投毒针对网络安全人员钓鱼]

0x01 前言 在Github上APT组织“海莲花”发布存在后门的提权BOF&#xff0c;通过该项目针对网络安全从业人员进行钓鱼。不过其实早在几年前就已经有人对Visual Studio项目恶意利用进行过研究&#xff0c;所以投毒的手法也不算是新的技术。但这次国内有大量的安全从业者转发该钓…

Linux+Docer 容器化部署之 Shell 语法入门篇 【Shell 循环类型】

文章目录 一、Shell 循环类型二、Shell while 循环三、Shell for 循环四、Shell until 循环五、Shell select 循环六、总结 一、Shell 循环类型 循环是一个强大的编程工具&#xff0c;使您能够重复执行一组命令。在本教程中&#xff0c;您将学习以下类型的循环 Shell 程序&…

UE学习日志#19 C++笔记#5 基础复习5 引用1

C中的引用&#xff08;reference&#xff09;是另一个变量的别名。对引用的所有修改都会更改其引用的变量的值。可以将引用视为隐式指针&#xff0c;它省去了获取变量地址和解引用指针的麻烦。另外&#xff0c;可以将引用视为原始变量的另一个名称。可以创建独立的引用变量&…

使用mybatisPlus插件生成代码步骤及注意事项

使用mybatisPlus插件可以很方便的生成与数据库对应的PO对象&#xff0c;以及对应的controller、service、ImplService、mapper代码&#xff0c;生成这种代码的方式有很多&#xff0c;包括mybatis-plus提供的代码生成器&#xff0c;以及idea提供的代码生成器&#xff0c;无论哪一…

实践网络安全:常见威胁与应对策略详解

&#x1f4dd;个人主页&#x1f339;&#xff1a;一ge科研小菜鸡-CSDN博客 &#x1f339;&#x1f339;期待您的关注 &#x1f339;&#x1f339; 引言 在数字化转型的浪潮中&#xff0c;网络安全的重要性已达到前所未有的高度。无论是个人用户、企业&#xff0c;还是政府机构…

92,[8] 攻防世界 web Web_php_wrong_nginx_config

进入靶场 admin 123 还尝试了很多&#xff0c;都是建设中 进行目录扫描&#xff0c;扫描到了admin和robots.txt 这句话就应该想到BP 抓包 这样修改请求即可登录 BP加载不出来 F12修改 管理中心点击后会发生url的改变 变成这样 目录穿越&#xff0c;不断尝试 /admin/admi…

Git进阶之旅:分支管理策略

第一章&#xff1a; Git stash&#xff1a; 介绍&#xff1a; 当正在 dev 分支上开发某个项目&#xff0c;这时项目中出现一个 bug&#xff0c;需要修复&#xff0c;但是项目只完成一半&#xff0c;还不想提交&#xff0c;这时可以使用 git stash 命令将修改的内容保存至堆栈区…

汽车蓝牙钥匙定位仿真小程序

此需求来自于粉丝的真实需求,假期没事,牛刀小试。 一、项目背景 如今,智能车钥匙和移动端定位技术已经相当普及。为了探索蓝牙 Beacon 在短距离定位场景下的可行性,我们搭建了一个简易原型:利用 UniApp 在移动端采集蓝牙信标的 RSSI(信号强度),通过三边定位算法估算钥…