GPT 本地运行输出界面简洁美观(命令行、界面、网页)

server/2025/2/2 15:01:45/

目录

展示图

代码配合命令行

界面运行

网页运行


展示图

命令行运行 

tkinter界面运行

网页运行

代码配合命令行

 

import os
import time
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamerstart_time = time.time()model_path = os.path.join(os.path.dirname(__file__), "Qwen2.5")# 加载本地模型和分词器
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path,device_map="cpu",  # 自动分配GPU/CPUtorch_dtype="auto"  # 自动选择精度(如FP16)
)# 修改填充标记
if tokenizer.pad_token is None:tokenizer.pad_token = tokenizer.eos_token# 构建对话
messages = [{"role": "user", "content": "你是?"}
]
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)# 继承方法放入一些处理方法
class Streamer(TextStreamer):def __init__(self, tokenizer, skip_prompt=True, skip_special_tokens=True):super().__init__(tokenizer, skip_prompt=skip_prompt, skip_special_tokens=skip_special_tokens)def on_finalized_text(self, text, stream_end=False):print(text, flush=True, end="" if not stream_end else None)# 创建文本流处理器
streamer = Streamer(tokenizer)
# 生成回复
inputs = tokenizer(input_text, padding=True, truncation=True, return_tensors="pt").to(model.device)
outputs = model.generate(inputs.input_ids,max_new_tokens=5,do_sample=True,temperature=0.6,top_p=0.95,streamer=streamer
)# 解码完整结果(用于统计)
response = tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True)
# 输出统计信息
response_tokens = tokenizer(response, return_tensors="pt")
token_count = len(response_tokens["input_ids"][0])print(f"token 数量: {token_count}")
print(f"token: {len(response) / (time.time() - start_time):.2f} /s")
python">import os
import time
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamerstart_time = time.time()model_path = os.path.join(os.path.dirname(__file__), "Qwen2.5")# 加载本地模型和分词器
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path,device_map="cpu",  # 自动分配GPU/CPUtorch_dtype="auto"  # 自动选择精度(如FP16)
)# 修改填充标记
if tokenizer.pad_token is None:tokenizer.pad_token = tokenizer.eos_token# 构建对话
messages = [{"role": "user", "content": "你是?"}
]
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)# 继承方法放入一些处理方法
class Streamer(TextStreamer):def __init__(self, tokenizer, skip_prompt=True, skip_special_tokens=True):super().__init__(tokenizer, skip_prompt=skip_prompt, skip_special_tokens=skip_special_tokens)def on_finalized_text(self, text, stream_end=False):print(text, flush=True, end="" if not stream_end else None)# 创建文本流处理器
streamer = Streamer(tokenizer)
# 生成回复
inputs = tokenizer(input_text, padding=True, truncation=True, return_tensors="pt").to(model.device)
outputs = model.generate(inputs.input_ids,max_new_tokens=5,do_sample=True,temperature=0.6,top_p=0.95,streamer=streamer
)# 解码完整结果(用于统计)
response = tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True)
# 输出统计信息
response_tokens = tokenizer(response, return_tensors="pt")
token_count = len(response_tokens["input_ids"][0])print(f"token 数量: {token_count}")
print(f"token: {len(response) / (time.time() - start_time):.2f} /s")

界面运行

import os
import time
import tkinter as tk
from tkinter import scrolledtext
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
import threading# 加载本地模型和分词器
model_path = os.path.join(os.path.dirname(__file__), "Qwen2.5")
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path,device_map="cpu",  # 自动分配GPU/CPUtorch_dtype="auto"  # 自动选择精度(如FP16)
)# 修改填充标记
if tokenizer.pad_token is None:tokenizer.pad_token = tokenizer.eos_token# 继承方法放入一些处理方法
class Streamer(TextStreamer):def __init__(self, tokenizer, text_widget, skip_prompt=True, skip_special_tokens=True):super().__init__(tokenizer, skip_prompt=skip_prompt, skip_special_tokens=skip_special_tokens)self.text_widget = text_widgetdef on_finalized_text(self, text, stream_end=False):self.text_widget.insert(tk.END, text)self.text_widget.see(tk.END)def generate_response(input_text_str, start_time):# 创建文本流处理器streamer = Streamer(tokenizer, output_text)# 生成回复inputs = tokenizer(input_text_str, padding=True, truncation=True, return_tensors="pt").to(model.device)outputs = model.generate(inputs.input_ids,max_new_tokens=10,do_sample=True,temperature=0.6,top_p=0.95,streamer=streamer)# 解码完整结果(用于统计)response = tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True)# 输出统计信息response_tokens = tokenizer(response, return_tensors="pt")token_count = len(response_tokens["input_ids"][0])output_text.insert(tk.END, f"\ntoken 数量: {token_count}")output_text.insert(tk.END, f"\ntoken: {len(response) / (time.time() - start_time):.2f} /s")def send_message():# 清空显示框output_text.delete(1.0, tk.END)# 获取输入框的内容input_content = input_entry.get()# 构建对话messages = [{"role": "user", "content": input_content}]input_text_str = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)start_time = time.time()# 创建并启动线程thread = threading.Thread(target=generate_response, args=(input_text_str, start_time))thread.start()# 清空输入框input_entry.delete(0, tk.END)# 创建主窗口
root = tk.Tk()
root.title("聊天界面")# 设置
root.config(highlightthickness=0)  # 设置边框为0bg_color = '#F0F0F0'
# root.overrideredirect(True)
root.wm_attributes('-transparentcolor', bg_color)# 创建显示框
output_text = scrolledtext.ScrolledText(root, width=60, height=20, bg=bg_color, font=("黑体", 15),fg="#FFFFFF")
output_text.pack(pady=10)# 创建一个新的Frame作为输入区域的容器
input_frame = tk.Frame(root)
input_frame.pack(side=tk.BOTTOM, fill=tk.X)# 创建输入框,父容器为input_frame
input_entry = tk.Entry(input_frame, width=80)
input_entry.pack(side=tk.LEFT, padx=10, pady=10)# 创建发送按钮,父容器为input_frame
send_button = tk.Button(input_frame, text="发送", command=send_message, bg="white")
send_button.pack(side=tk.RIGHT, padx=10, pady=10)# 运行主循环
root.mainloop()
python">import os
import time
import tkinter as tk
from tkinter import scrolledtext
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
import threading# 加载本地模型和分词器
model_path = os.path.join(os.path.dirname(__file__), "Qwen2.5")
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path,device_map="cpu",  # 自动分配GPU/CPUtorch_dtype="auto"  # 自动选择精度(如FP16)
)# 修改填充标记
if tokenizer.pad_token is None:tokenizer.pad_token = tokenizer.eos_token# 继承方法放入一些处理方法
class Streamer(TextStreamer):def __init__(self, tokenizer, text_widget, skip_prompt=True, skip_special_tokens=True):super().__init__(tokenizer, skip_prompt=skip_prompt, skip_special_tokens=skip_special_tokens)self.text_widget = text_widgetdef on_finalized_text(self, text, stream_end=False):self.text_widget.insert(tk.END, text)self.text_widget.see(tk.END)def generate_response(input_text_str, start_time):# 创建文本流处理器streamer = Streamer(tokenizer, output_text)# 生成回复inputs = tokenizer(input_text_str, padding=True, truncation=True, return_tensors="pt").to(model.device)outputs = model.generate(inputs.input_ids,max_new_tokens=10,do_sample=True,temperature=0.6,top_p=0.95,streamer=streamer)# 解码完整结果(用于统计)response = tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True)# 输出统计信息response_tokens = tokenizer(response, return_tensors="pt")token_count = len(response_tokens["input_ids"][0])output_text.insert(tk.END, f"\ntoken 数量: {token_count}")output_text.insert(tk.END, f"\ntoken: {len(response) / (time.time() - start_time):.2f} /s")def send_message():# 清空显示框output_text.delete(1.0, tk.END)# 获取输入框的内容input_content = input_entry.get()# 构建对话messages = [{"role": "user", "content": input_content}]input_text_str = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)start_time = time.time()# 创建并启动线程thread = threading.Thread(target=generate_response, args=(input_text_str, start_time))thread.start()# 清空输入框input_entry.delete(0, tk.END)# 创建主窗口
root = tk.Tk()
root.title("聊天界面")# 设置
root.config(highlightthickness=0)  # 设置边框为0bg_color = '#F0F0F0'
# root.overrideredirect(True)
root.wm_attributes('-transparentcolor', bg_color)# 创建显示框
output_text = scrolledtext.ScrolledText(root, width=60, height=20, bg=bg_color, font=("黑体", 15),fg="#FFFFFF")
output_text.pack(pady=10)# 创建一个新的Frame作为输入区域的容器
input_frame = tk.Frame(root)
input_frame.pack(side=tk.BOTTOM, fill=tk.X)# 创建输入框,父容器为input_frame
input_entry = tk.Entry(input_frame, width=80)
input_entry.pack(side=tk.LEFT, padx=10, pady=10)# 创建发送按钮,父容器为input_frame
send_button = tk.Button(input_frame, text="发送", command=send_message, bg="white")
send_button.pack(side=tk.RIGHT, padx=10, pady=10)# 运行主循环
root.mainloop()

网页运行

# streamlit run 网页运行.py
import os
import time
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
import torchtorch.manual_seed(0)text_all = ""# 加载本地模型和分词器
model_path = os.path.join(os.path.dirname(__file__), "Qwen2.5")
try:tokenizer = AutoTokenizer.from_pretrained(model_path)
except Exception as e:st.error(f"分词器加载失败: {e}")st.stop()def load_model(option):try:if option == "cpu":model = AutoModelForCausalLM.from_pretrained(model_path,device_map="cpu",torch_dtype=torch.float32)elif option == "gpu":device = torch.device("cuda" if torch.cuda.is_available() else "cpu")if not torch.cuda.is_available():st.warning("GPU 不可用,将使用 CPU 运行。")model = AutoModelForCausalLM.from_pretrained(model_path,device_map="cpu",torch_dtype=torch.float32)else:model = AutoModelForCausalLM.from_pretrained(model_path,device_map=device,torch_dtype=torch.float16)else:model = AutoModelForCausalLM.from_pretrained(model_path,device_map="cpu",torch_dtype=torch.float32)return modelexcept Exception as e:st.error(f"模型加载失败: {e}")st.stop()# 继承方法放入一些处理方法
class Streamer(TextStreamer):def __init__(self, tokenizer, st_container, skip_prompt=True, skip_special_tokens=True):super().__init__(tokenizer, skip_prompt=skip_prompt, skip_special_tokens=skip_special_tokens)self.st_container = st_containerdef on_finalized_text(self, text, stream_end=False):global text_alltext_all += textself.st_container.write(text_all, unsafe_allow_html=True)def generate_response(text_number, input_content, text_temperature, text_top_p, place, place_token,place_time, option, place_all):model = load_model(option)# 修改填充标记if tokenizer.pad_token is None:tokenizer.pad_token = tokenizer.eos_token# 构建对话messages = [{"role": "user", "content": input_content}]input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)start_time = time.time()# 创建文本流处理器streamer = Streamer(tokenizer, place)# 生成输入的编码及注意力掩码inputs = tokenizer(input_text, padding=True, truncation=True, return_tensors="pt").to(model.device)attention_mask = inputs.get('attention_mask')input_ids = inputs['input_ids']# 一次性生成所需文本with torch.no_grad():outputs = model.generate(input_ids=input_ids,attention_mask=attention_mask,max_new_tokens=text_number,temperature=text_temperature,top_p=text_top_p,repetition_penalty=1.02,do_sample=True,streamer=streamer)# 解码完整结果(用于统计)response = tokenizer.decode(outputs[0][len(input_ids[0]):], skip_special_tokens=True)response_tokens = tokenizer(response, return_tensors="pt")token_count = len(response_tokens["input_ids"][0])# 计算已经过去的时间(秒)elapsed_time = time.time() - start_time# 计算并打印每秒 token 速率if elapsed_time > 0:tokens_per_second = token_count / elapsed_timeplace_token.write(f"平均 token: {tokens_per_second:.2f}/s")place_time.write(f"耗时: {elapsed_time:.2f}s")place_all.write(f"token: {token_count} ")print("输出完成")if __name__ == '__main__':st.header(':blue[Qwen2.5] :sunglasses:', divider='rainbow')with st.chat_message("assistant"):st.write("你好 👋 我是:blue[Qwen2.5]")option = st.sidebar.selectbox('选择使用 cpu 还是 gpu',('cpu', 'gpu'))text_number = st.sidebar.slider('文本上限', 10, 500, 100, 10)text_temperature = st.sidebar.slider('随机性', 0.1, 1.0, 0.8, 0.1)text_top_p = st.sidebar.slider('多样性', 0.1, 1.0, 0.8, 0.1)st.sidebar.button(":blue[暂停]")prompt = st.chat_input("写些什么……")if prompt:st.write(f"输入文本为:{prompt}")st.write(f'共{len(prompt)} 字符.')st.divider()with st.spinner('稍等一会...'):with st.chat_message("assistant"):st.write('提问是:' + prompt)place = st.empty()place_token = st.empty()place_time = st.empty()place_all = st.empty()generate_response(text_number, prompt, text_temperature, text_top_p, place, place_token,place_time, option, place_all)st.success('完成!')
python"># streamlit run 网页运行.py
import os
import time
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
import torchtorch.manual_seed(0)text_all = ""# 加载本地模型和分词器
model_path = os.path.join(os.path.dirname(__file__), "Qwen2.5")
try:tokenizer = AutoTokenizer.from_pretrained(model_path)
except Exception as e:st.error(f"分词器加载失败: {e}")st.stop()def load_model(option):try:if option == "cpu":model = AutoModelForCausalLM.from_pretrained(model_path,device_map="cpu",torch_dtype=torch.float32)elif option == "gpu":device = torch.device("cuda" if torch.cuda.is_available() else "cpu")if not torch.cuda.is_available():st.warning("GPU 不可用,将使用 CPU 运行。")model = AutoModelForCausalLM.from_pretrained(model_path,device_map="cpu",torch_dtype=torch.float32)else:model = AutoModelForCausalLM.from_pretrained(model_path,device_map=device,torch_dtype=torch.float16)else:model = AutoModelForCausalLM.from_pretrained(model_path,device_map="cpu",torch_dtype=torch.float32)return modelexcept Exception as e:st.error(f"模型加载失败: {e}")st.stop()# 继承方法放入一些处理方法
class Streamer(TextStreamer):def __init__(self, tokenizer, st_container, skip_prompt=True, skip_special_tokens=True):super().__init__(tokenizer, skip_prompt=skip_prompt, skip_special_tokens=skip_special_tokens)self.st_container = st_containerdef on_finalized_text(self, text, stream_end=False):global text_alltext_all += textself.st_container.write(text_all, unsafe_allow_html=True)def generate_response(text_number, input_content, text_temperature, text_top_p, place, place_token,place_time, option, place_all):model = load_model(option)# 修改填充标记if tokenizer.pad_token is None:tokenizer.pad_token = tokenizer.eos_token# 构建对话messages = [{"role": "user", "content": input_content}]input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)start_time = time.time()# 创建文本流处理器streamer = Streamer(tokenizer, place)# 生成输入的编码及注意力掩码inputs = tokenizer(input_text, padding=True, truncation=True, return_tensors="pt").to(model.device)attention_mask = inputs.get('attention_mask')input_ids = inputs['input_ids']# 一次性生成所需文本with torch.no_grad():outputs = model.generate(input_ids=input_ids,attention_mask=attention_mask,max_new_tokens=text_number,temperature=text_temperature,top_p=text_top_p,repetition_penalty=1.02,do_sample=True,streamer=streamer)# 解码完整结果(用于统计)response = tokenizer.decode(outputs[0][len(input_ids[0]):], skip_special_tokens=True)response_tokens = tokenizer(response, return_tensors="pt")token_count = len(response_tokens["input_ids"][0])# 计算已经过去的时间(秒)elapsed_time = time.time() - start_time# 计算并打印每秒 token 速率if elapsed_time > 0:tokens_per_second = token_count / elapsed_timeplace_token.write(f"平均 token: {tokens_per_second:.2f}/s")place_time.write(f"耗时: {elapsed_time:.2f}s")place_all.write(f"token: {token_count} ")print("输出完成")if __name__ == '__main__':st.header(':blue[Qwen2.5] :sunglasses:', divider='rainbow')with st.chat_message("assistant"):st.write("你好 👋 我是:blue[Qwen2.5]")option = st.sidebar.selectbox('选择使用 cpu 还是 gpu',('cpu', 'gpu'))text_number = st.sidebar.slider('文本上限', 10, 500, 100, 10)text_temperature = st.sidebar.slider('随机性', 0.1, 1.0, 0.8, 0.1)text_top_p = st.sidebar.slider('多样性', 0.1, 1.0, 0.8, 0.1)st.sidebar.button(":blue[暂停]")prompt = st.chat_input("写些什么……")if prompt:st.write(f"输入文本为:{prompt}")st.write(f'共{len(prompt)} 字符.')st.divider()with st.spinner('稍等一会...'):with st.chat_message("assistant"):st.write('提问是:' + prompt)place = st.empty()place_token = st.empty()place_time = st.empty()place_all = st.empty()generate_response(text_number, prompt, text_temperature, text_top_p, place, place_token,place_time, option, place_all)st.success('完成!')


http://www.ppmy.cn/server/164359.html

相关文章

qt之数据库的使用二

本章主要介绍qt的数据库的查询功能。 在上一篇qt之数据库的文章基础上增加了数据库的查询功能,软件界面上增加了首记录,前一条,后一条,尾记录按钮。软件界面如下 部分程序如下 MainWindow.h如下 private:QSqlDatabase DB;//数据…

大数据挖掘--两个角度理解相似度计算理论

文章目录 0 相似度计算可以转换成什么问题1 集合相似度的应用1.1 集合相似度1.1文档相似度1.2 协同过滤用户-用户协同过滤物品-物品协同过滤 1.2 文档的shingling--将文档表示成集合1.2.1 k-shingling1.2.2 基于停用词的 shingling 1.3 最小哈希签名1.4 局部敏感哈希算法&#…

如何在Windows、Linux和macOS上安装Rust并完成Hello World

如何在Windows、Linux和macOS上安装Rust并完成Hello World 如果你刚刚开始学习Rust,第一步就是安装Rust并运行你的第一个程序!本文将详细介绍如何在Windows、Linux和macOS上安装Rust,并编写一个简单的“Hello, World!”程序。 1. 安装Rust …

Signature

Signature 题目是: import ecdsaimport random​def ecdsa_test(dA,k):​sk ecdsa.SigningKey.from_secret_exponent(secexpdA,curveecdsa.SECP256k1)sig1 sk.sign(databHi., kk).hex()sig2 sk.sign(databhello., kk).hex()#不同的kr1 int(sig1[:64], 16)s1 i…

JavaEE:多线程编程中的同步与并发控制

JavaEE:多线程进阶2 一、Callable 接口1. 基本定义和接口签名2. Callable 接口的特点2.1 返回值2.2 异常处理2.3 灵活性 3. Callable 接口的劣势4. Callable 接口的使用场景4.1 需要返回结果的任务4.2 可能抛出异常的任务4.3 需要组合多个任务的结果 5. 总结 二、Re…

OFDM系统仿真

1️⃣ OFDM的原理 1.1 介绍 OFDM是一种多载波调制技术,将输入数据分配到多个子载波上,每个子载波上可以独立使用 QAM、PSK 等传统调制技术进行调制。这些子载波之间互相正交,从而可以有效利用频谱并减少干扰。 1.2 OFDM的核心 多载波调制…

FreeRTOS 列表和列表项

在 FreeRTOS 的源码中大量地使用了列表和列表项,因此想要深入学习 FreeRTOS,列表和列表项是必备的基础知识。这里所说的列表和列表项,是 FreeRTOS 源码中 List 和 List Item 的直译,事实上,FreeRTOS 中的列表和列表项就…

LLM:BERT or BART 之BERT

文章目录 前言一、BERT1. Decoder-only2. Encoder-only3. Use of Bidirectional Context4. Masked Language Model (MLM)5. Next Sentence Prediction (NSP)6. Fine-tune1、情感分析2、句对分析3、命名实体识别(NER) 7. BERT总结 总结 前言 NLP选手对这…