PaddleOCR 截图自动文字识别

春节假期在家无聊，撸了三个小工具：PC截图+编辑/PC录屏(用于meeting录屏)/PC截屏文字识别。因为感觉这三个小工具是工作中常常需要用到的，github上也有很多开源的，不过总有点或多或少的小问题，不利于自己的使用。脚本的编写尽量减少对三方库的使用。
已全部完成，这是其中的一个，后续将三个集成在在一个工具中。
python">import tkinter as tk
from tkinter import ttk, messagebox
from PIL import Image, ImageTk, ImageGrab
import sys
import logging
import tempfile
import threading
from pathlib import Path
import ctypes  # 导入 ctypes 库# 最小化控制台窗口
def minimize_console():ctypes.windll.user32.ShowWindow(ctypes.windll.kernel32.GetConsoleWindow(), 6)minimize_console()  # 调用最小化函数# 获取脚本所在目录路径
def get_script_directory():return Path(__file__).parent# 配置日志文件路径和日志级别
log_file_path = get_script_directory() / 'ocr_errors.log'
logging.basicConfig(filename=log_file_path,level=logging.DEBUG,format='%(asctime)s - %(levelname)s - %(message)s'
)# 保存临时图片到磁盘
def save_temp_image(image, suffix='.png'):with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:image.save(temp_file.name)return Path(temp_file.name)class OCRApp:def __init__(self):try:self.root = tk.Tk()self.root.withdraw()self.screenshot = Noneself.ocr_model = None  # 延迟初始化self.recognized_text = ""self.main_frame = None# 启动后台线程加载OCR模型以优化性能，使run脚本后能马上进入截图状态threading.Thread(target=self.load_ocr_model, daemon=True).start()# 立即开始截图选择self.start_selection()except Exception as e:self.show_crash_message(f"程序启动失败: {str(e)}")sys.exit(1)def load_ocr_model(self):from paddleocr import PaddleOCRtry:self.ocr_model = PaddleOCR(use_angle_cls=True, show_log=False, lang='ch')except Exception as e:logging.error(f"OCR模型加载失败: {str(e)}")# 开始截图选择区域def start_selection(self):self.selection_win = tk.Toplevel()self.selection_win.attributes("-fullscreen", True)self.selection_win.attributes("-alpha", 0.3)self.selection_win.bind("<Escape>", self.on_escape)self.canvas = tk.Canvas(self.selection_win,cursor="cross",bg="gray20",highlightthickness=0)self.canvas.pack(fill=tk.BOTH, expand=True)self.start_x = self.start_y = 0self.rect_id = Noneself.crosshair_ids = []self.canvas.bind("<Button-1>", self.on_mouse_down)self.canvas.bind("<B1-Motion>", self.on_mouse_drag)self.canvas.bind("<ButtonRelease-1>", self.on_mouse_up)self.canvas.bind("<Motion>", self.on_mouse_move)self.escape_label = tk.Label(self.selection_win,text="按ESC键退出截图",fg="yellow",bg="gray10",font=("Helvetica", 12, "bold"))self.escape_label.place(x=10, y=10)self.update_crosshair(0, 0)# 鼠标按下事件处理def on_mouse_down(self, event):self.start_x = event.xself.start_y = event.yself.clear_crosshair()self.canvas.delete("rect")# 鼠标拖动事件处理def on_mouse_drag(self, event):current_x = event.xcurrent_y = event.yself.canvas.delete("rect")self.update_crosshair(current_x, current_y)if self.rect_id:self.canvas.delete(self.rect_id)self.rect_id = self.canvas.create_rectangle(self.start_x, self.start_y,current_x, current_y,outline="yellow", width=2, fill="", tags="rect")# 鼠标释放事件处理def on_mouse_up(self, event):try:x1 = min(self.start_x, event.x)y1 = min(self.start_y, event.y)x2 = max(self.start_x, event.x)y2 = max(self.start_y, event.y)if (x2 - x1) < 10 or (y2 - y1) < 10:self.clear_crosshair()  # 清除十字线raise ValueError("选区过小，请选择更大的区域")if (x2 - x1) > self.canvas.winfo_width() or (y2 - y1) > self.canvas.winfo_height():self.clear_crosshair()  # 清除十字线raise ValueError("选区过大，请选择更小的区域")self.screenshot = ImageGrab.grab(bbox=(x1, y1, x2, y2))self.selection_win.destroy()self.initialize_ocr_and_process()except Exception as e:logging.error(f"截图错误: {str(e)}")messagebox.showerror("截图错误", str(e))self.restart_selection()# 初始化OCR引擎并处理截图def initialize_ocr_and_process(self):try:if self.ocr_model is None:self.load_win = self.show_loading("OCR模型正在加载中，请稍后...")self.root.after(100, self.check_ocr_model)  # 每100毫秒检查一次else:self.process_ocr()self.setup_main_ui()self.root.deiconify()except Exception as e:logging.error(f"OCR初始化失败: {str(e)}")self.load_win.destroy()self.handle_ocr_init_error(str(e))def check_ocr_model(self):if self.ocr_model is None:self.root.after(100, self.check_ocr_model)  # 每100毫秒检查一次else:self.load_win.destroy()self.process_ocr()self.setup_main_ui()self.root.deiconify()# 执行OCR处理def process_ocr(self):try:temp_image_path = save_temp_image(self.screenshot)result = self.ocr_model.ocr(str(temp_image_path), cls=True)self.recognized_text = "\n".join([line[1][0] for line in result[0]])temp_image_path.unlink()except Exception as e:logging.error(f"OCR处理失败: {str(e)}")messagebox.showerror("识别错误", f"OCR处理失败: {str(e)}")self.restart_selection()# 设置主界面UIdef setup_main_ui(self):if self.main_frame is None:self.main_frame = ttk.Frame(self.root, padding=20)self.main_frame.pack(fill=tk.BOTH, expand=True)self.img_label = ttk.Label(self.main_frame)self.img_label.pack(pady=10)self.text_area = tk.Text(self.main_frame,height=15,width=60,wrap=tk.WORD)self.text_area.pack(pady=10)btn_frame = ttk.Frame(self.main_frame)btn_frame.pack(pady=10)ttk.Button(btn_frame,text="重新选择",command=self.restart_selection).pack(side=tk.LEFT, padx=5)ttk.Button(btn_frame,text="复制结果",command=self.copy_result).pack(side=tk.LEFT, padx=5)ttk.Button(btn_frame,text="退出",command=self.safe_exit).pack(side=tk.RIGHT, padx=5)self.update_image_display()self.text_area.delete(1.0, tk.END)self.text_area.insert(tk.END, self.recognized_text)# 设置窗口不总是最顶层self.root.attributes('-topmost', False)# 更新图片显示def update_image_display(self):if self.screenshot:photo = ImageTk.PhotoImage(self.screenshot)self.img_label.config(image=photo)self.img_label.image = photo# 显示加载中的窗口def show_loading(self, message):load_win = tk.Toplevel()load_win.title("请稍候")frame = ttk.Frame(load_win, padding=20)frame.pack()ttk.Label(frame, text=message).pack(pady=10)progress = ttk.Progressbar(frame, mode='indeterminate')progress.pack(pady=5)progress.start()return load_win# 处理OCR初始化错误def handle_ocr_init_error(self, error_msg):choice = messagebox.askretrycancel("OCR初始化失败",f"{error_msg}\n\n是否重试？",icon='error')if choice:threading.Thread(target=self.initialize_ocr_and_process).start()else:self.safe_exit()# 重新开始截图选择def restart_selection(self):if self.root.winfo_exists():self.root.withdraw()self.screenshot = Noneself.recognized_text = ""self.clear_ui()self.start_selection()# 清理UI界面def clear_ui(self):if hasattr(self, 'img_label'):self.img_label.config(image='')self.img_label.image = Noneif hasattr(self, 'text_area'):self.text_area.delete(1.0, tk.END)# 复制识别结果到剪贴板def copy_result(self):self.root.clipboard_clear()self.root.clipboard_append(self.recognized_text)messagebox.showinfo("成功", "已复制到剪贴板")# 安全退出程序def safe_exit(self):if self.root.winfo_exists():self.root.destroy()sys.exit(0)# 显示程序崩溃错误信息def show_crash_message(self, message):crash_win = tk.Tk()crash_win.withdraw()messagebox.showerror("致命错误", message)crash_win.destroy()# 按下ESC键时退出程序def on_escape(self, event):self.selection_win.destroy()self.safe_exit()# 鼠标移动事件处理def on_mouse_move(self, event):current_x = event.xcurrent_y = event.yself.update_crosshair(current_x, current_y)# 更新十字线位置def update_crosshair(self, x, y):self.clear_crosshair()self.crosshair_ids.append(self.canvas.create_line(0, y, self.canvas.winfo_width(), y, dash=(4, 4), tags="crosshair", fill="yellow", width=4))self.crosshair_ids.append(self.canvas.create_line(x, 0, x, self.canvas.winfo_height(), dash=(4, 4), tags="crosshair", fill="yellow", width=4))# 清除十字线def clear_crosshair(self):for crosshair_id in self.crosshair_ids:self.canvas.delete(crosshair_id)self.crosshair_ids = []# 运行主循环def run(self):self.root.mainloop()if __name__ == "__main__":app = OCRApp()app.run()
PaddleOCR 截图自动文字识别

相关文章

计算机网络性能指标相关

如何实现滑动网格的功能

Mac上有哪些好用的开源粘贴板app

React第二十七章(Suspense)

游戏引擎分层架构与总体管线

Deepseek智能AI--国产之光

Java 分布式与微服务架构：现代企业应用开发的新范式

【Elasticsearch】中数据流需要配置索引模板吗？