https://huggingface.co/docs/transformers/installation#fetch-models-and-tokenizers-to-use-offline
以下代码下载模型到 ./123/chatglm3-6b
from transformers import AutoTokenizer, AutoModel
from huggingface_hub.hf_api import HfFolderHfFolder.save_token('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')from transformers import AutoTokenizer, AutoModel
from transformers import AutoTokenizertokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm3-6b", trust_remote_code=True)
model = AutoModel.from_pretrained("THUDM/chatglm3-6b", trust_remote_code=True, device='cpu')
model = model.eval()
tokenizer.save_pretrained("./123/chatglm3-6b")
model.save_pretrained("./123/chatglm3-6b")
使用已经下载好的模型:
Ubuntu虚拟机, 32G内存,运行需要15分钟左右。
from transformers import AutoTokenizer, AutoModel
from transformers import AutoModelForSeq2SeqLM
from huggingface_hub.hf_api import HfFolderHfFolder.save_token('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')from transformers import AutoTokenizer, AutoModel
from transformers import AutoTokenizer, AutoModelForSeq2SeqLMtokenizer = AutoTokenizer.from_pretrained("./123/chatglm3-6b",trust_remote_code=True)
model = AutoModel.from_pretrained("./123/chatglm3-6b",trust_remote_code=True,device='cpu')
response = model.chat(tokenizer, "小明的书是妹妹的三倍,小明给妹妹9本后他们一样多,妹妹有多少本书", history=[])
print(response)