PaddleOCR 截图自动文字识别

devtools/2025/2/3 6:16:30/

春节假期在家无聊,撸了三个小工具:PC截图+编辑/PC录屏(用于meeting录屏)/PC截屏文字识别。因为感觉这三个小工具是工作中常常需要用到的,github上也有很多开源的,不过总有点或多或少的小问题,不利于自己的使用。脚本的编写尽量减少对三方库的使用。

已全部完成,这是其中的一个,后续将三个集成在在一个工具中。

python">import tkinter as tk
from tkinter import ttk, messagebox
from PIL import Image, ImageTk, ImageGrab
import sys
import logging
import tempfile
import threading
from pathlib import Path
import ctypes  # 导入 ctypes 库# 最小化控制台窗口
def minimize_console():ctypes.windll.user32.ShowWindow(ctypes.windll.kernel32.GetConsoleWindow(), 6)minimize_console()  # 调用最小化函数# 获取脚本所在目录路径
def get_script_directory():return Path(__file__).parent# 配置日志文件路径和日志级别
log_file_path = get_script_directory() / 'ocr_errors.log'
logging.basicConfig(filename=log_file_path,level=logging.DEBUG,format='%(asctime)s - %(levelname)s - %(message)s'
)# 保存临时图片到磁盘
def save_temp_image(image, suffix='.png'):with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:image.save(temp_file.name)return Path(temp_file.name)class OCRApp:def __init__(self):try:self.root = tk.Tk()self.root.withdraw()self.screenshot = Noneself.ocr_model = None  # 延迟初始化self.recognized_text = ""self.main_frame = None# 启动后台线程加载OCR模型以优化性能,使run脚本后能马上进入截图状态threading.Thread(target=self.load_ocr_model, daemon=True).start()# 立即开始截图选择self.start_selection()except Exception as e:self.show_crash_message(f"程序启动失败: {str(e)}")sys.exit(1)def load_ocr_model(self):from paddleocr import PaddleOCRtry:self.ocr_model = PaddleOCR(use_angle_cls=True, show_log=False, lang='ch')except Exception as e:logging.error(f"OCR模型加载失败: {str(e)}")# 开始截图选择区域def start_selection(self):self.selection_win = tk.Toplevel()self.selection_win.attributes("-fullscreen", True)self.selection_win.attributes("-alpha", 0.3)self.selection_win.bind("<Escape>", self.on_escape)self.canvas = tk.Canvas(self.selection_win,cursor="cross",bg="gray20",highlightthickness=0)self.canvas.pack(fill=tk.BOTH, expand=True)self.start_x = self.start_y = 0self.rect_id = Noneself.crosshair_ids = []self.canvas.bind("<Button-1>", self.on_mouse_down)self.canvas.bind("<B1-Motion>", self.on_mouse_drag)self.canvas.bind("<ButtonRelease-1>", self.on_mouse_up)self.canvas.bind("<Motion>", self.on_mouse_move)self.escape_label = tk.Label(self.selection_win,text="按ESC键退出截图",fg="yellow",bg="gray10",font=("Helvetica", 12, "bold"))self.escape_label.place(x=10, y=10)self.update_crosshair(0, 0)# 鼠标按下事件处理def on_mouse_down(self, event):self.start_x = event.xself.start_y = event.yself.clear_crosshair()self.canvas.delete("rect")# 鼠标拖动事件处理def on_mouse_drag(self, event):current_x = event.xcurrent_y = event.yself.canvas.delete("rect")self.update_crosshair(current_x, current_y)if self.rect_id:self.canvas.delete(self.rect_id)self.rect_id = self.canvas.create_rectangle(self.start_x, self.start_y,current_x, current_y,outline="yellow", width=2, fill="", tags="rect")# 鼠标释放事件处理def on_mouse_up(self, event):try:x1 = min(self.start_x, event.x)y1 = min(self.start_y, event.y)x2 = max(self.start_x, event.x)y2 = max(self.start_y, event.y)if (x2 - x1) < 10 or (y2 - y1) < 10:self.clear_crosshair()  # 清除十字线raise ValueError("选区过小,请选择更大的区域")if (x2 - x1) > self.canvas.winfo_width() or (y2 - y1) > self.canvas.winfo_height():self.clear_crosshair()  # 清除十字线raise ValueError("选区过大,请选择更小的区域")self.screenshot = ImageGrab.grab(bbox=(x1, y1, x2, y2))self.selection_win.destroy()self.initialize_ocr_and_process()except Exception as e:logging.error(f"截图错误: {str(e)}")messagebox.showerror("截图错误", str(e))self.restart_selection()# 初始化OCR引擎并处理截图def initialize_ocr_and_process(self):try:if self.ocr_model is None:self.load_win = self.show_loading("OCR模型正在加载中,请稍后...")self.root.after(100, self.check_ocr_model)  # 每100毫秒检查一次else:self.process_ocr()self.setup_main_ui()self.root.deiconify()except Exception as e:logging.error(f"OCR初始化失败: {str(e)}")self.load_win.destroy()self.handle_ocr_init_error(str(e))def check_ocr_model(self):if self.ocr_model is None:self.root.after(100, self.check_ocr_model)  # 每100毫秒检查一次else:self.load_win.destroy()self.process_ocr()self.setup_main_ui()self.root.deiconify()# 执行OCR处理def process_ocr(self):try:temp_image_path = save_temp_image(self.screenshot)result = self.ocr_model.ocr(str(temp_image_path), cls=True)self.recognized_text = "\n".join([line[1][0] for line in result[0]])temp_image_path.unlink()except Exception as e:logging.error(f"OCR处理失败: {str(e)}")messagebox.showerror("识别错误", f"OCR处理失败: {str(e)}")self.restart_selection()# 设置主界面UIdef setup_main_ui(self):if self.main_frame is None:self.main_frame = ttk.Frame(self.root, padding=20)self.main_frame.pack(fill=tk.BOTH, expand=True)self.img_label = ttk.Label(self.main_frame)self.img_label.pack(pady=10)self.text_area = tk.Text(self.main_frame,height=15,width=60,wrap=tk.WORD)self.text_area.pack(pady=10)btn_frame = ttk.Frame(self.main_frame)btn_frame.pack(pady=10)ttk.Button(btn_frame,text="重新选择",command=self.restart_selection).pack(side=tk.LEFT, padx=5)ttk.Button(btn_frame,text="复制结果",command=self.copy_result).pack(side=tk.LEFT, padx=5)ttk.Button(btn_frame,text="退出",command=self.safe_exit).pack(side=tk.RIGHT, padx=5)self.update_image_display()self.text_area.delete(1.0, tk.END)self.text_area.insert(tk.END, self.recognized_text)# 设置窗口不总是最顶层self.root.attributes('-topmost', False)# 更新图片显示def update_image_display(self):if self.screenshot:photo = ImageTk.PhotoImage(self.screenshot)self.img_label.config(image=photo)self.img_label.image = photo# 显示加载中的窗口def show_loading(self, message):load_win = tk.Toplevel()load_win.title("请稍候")frame = ttk.Frame(load_win, padding=20)frame.pack()ttk.Label(frame, text=message).pack(pady=10)progress = ttk.Progressbar(frame, mode='indeterminate')progress.pack(pady=5)progress.start()return load_win# 处理OCR初始化错误def handle_ocr_init_error(self, error_msg):choice = messagebox.askretrycancel("OCR初始化失败",f"{error_msg}\n\n是否重试?",icon='error')if choice:threading.Thread(target=self.initialize_ocr_and_process).start()else:self.safe_exit()# 重新开始截图选择def restart_selection(self):if self.root.winfo_exists():self.root.withdraw()self.screenshot = Noneself.recognized_text = ""self.clear_ui()self.start_selection()# 清理UI界面def clear_ui(self):if hasattr(self, 'img_label'):self.img_label.config(image='')self.img_label.image = Noneif hasattr(self, 'text_area'):self.text_area.delete(1.0, tk.END)# 复制识别结果到剪贴板def copy_result(self):self.root.clipboard_clear()self.root.clipboard_append(self.recognized_text)messagebox.showinfo("成功", "已复制到剪贴板")# 安全退出程序def safe_exit(self):if self.root.winfo_exists():self.root.destroy()sys.exit(0)# 显示程序崩溃错误信息def show_crash_message(self, message):crash_win = tk.Tk()crash_win.withdraw()messagebox.showerror("致命错误", message)crash_win.destroy()# 按下ESC键时退出程序def on_escape(self, event):self.selection_win.destroy()self.safe_exit()# 鼠标移动事件处理def on_mouse_move(self, event):current_x = event.xcurrent_y = event.yself.update_crosshair(current_x, current_y)# 更新十字线位置def update_crosshair(self, x, y):self.clear_crosshair()self.crosshair_ids.append(self.canvas.create_line(0, y, self.canvas.winfo_width(), y, dash=(4, 4), tags="crosshair", fill="yellow", width=4))self.crosshair_ids.append(self.canvas.create_line(x, 0, x, self.canvas.winfo_height(), dash=(4, 4), tags="crosshair", fill="yellow", width=4))# 清除十字线def clear_crosshair(self):for crosshair_id in self.crosshair_ids:self.canvas.delete(crosshair_id)self.crosshair_ids = []# 运行主循环def run(self):self.root.mainloop()if __name__ == "__main__":app = OCRApp()app.run()


http://www.ppmy.cn/devtools/155647.html

相关文章

计算机网络 性能指标相关

目录 吞吐量 时延 时延带宽积 往返时延RTT 利用率 吞吐量 时延 时延带宽积 往返时延RTT 利用率

如何实现滑动网格的功能

文章目录 1 概念介绍2 使用方法3 示例代码 我们在上一章回中介绍了SliverList组件相关的内容&#xff0c;本章回中将介绍SliverGrid组件.闲话休提&#xff0c;让我们一起Talk Flutter吧。 1 概念介绍 我们在本章回中介绍的SliverGrid组件是一种网格类组件&#xff0c;主要用来…

Mac上有哪些好用的开源粘贴板app

在Mac上&#xff0c;有几款开源且好用的粘贴板管理工具值得推荐&#xff1a; Maccy 特点&#xff1a;Maccy是一款开源、轻量级的剪贴板管理工具&#xff0c;支持多种功能&#xff0c;包括搜索、Pin单条记录、忽略格式粘贴等。它采用键盘优先设计&#xff0c;操作组合键可减少鼠…

React第二十七章(Suspense)

Suspense Suspense 是一种异步渲染机制&#xff0c;其核心理念是在组件加载或数据获取过程中&#xff0c;先展示一个占位符&#xff08;loading state&#xff09;&#xff0c;从而实现更自然流畅的用户界面更新体验。 应用场景 异步组件加载&#xff1a;通过代码分包实现组件…

游戏引擎分层架构与总体管线

资源层 管理游戏引擎生态的资源池分配 每个资产的实时生命周期 Resource 各种文件格式的资源 转换importing Asset 资产&#xff08;高效数据&#xff09; 引擎中最重要的是资产之间的关联 reference GUID &#xff1a;游戏资产的唯一识别号 运行中资产管理器 Runtime Asse…

Deepseek智能AI--国产之光

以下是以每个核心问题为独立章节的高质量技术博客整理&#xff0c;采用学术级论述框架并增强可视化呈现&#xff1a; 大型语言模型深度解密&#xff1a;从哲学思辨到系统工程 目录 当服务器关闭&#xff1a;AI的终极告解与技术隐喻情感计算&#xff1a;图灵测试未触及的认知深…

Java 分布式与微服务架构:现代企业应用开发的新范式

Java学习资料 Java学习资料 Java学习资料 一、引言 在当今数字化时代&#xff0c;企业应用面临着越来越高的性能、可扩展性和灵活性要求。传统的单体架构在应对大规模用户访问、复杂业务逻辑和频繁的功能迭代时&#xff0c;逐渐暴露出诸多问题。Java 分布式与微服务架构应运…

【Elasticsearch】中数据流需要配置索引模板吗?

是的&#xff0c;数据流需要配置索引模板。在Elasticsearch中&#xff0c;数据流&#xff08;Data Streams&#xff09;是一种用于处理时间序列数据的高级结构&#xff0c;它背后由多个隐藏的索引组成&#xff0c;这些索引被称为后备索引&#xff08;Backing Indices&#xff0…