vllm版本
0.6.6
请求样例
python">def send_call_json(prompt="You are a helpful assistant.",msg="",top_p=1.0,temperature=0.7):import requests, json, traceback, timeretry_count = 5data = {"model":"Qwen2__5-72B-Instruct","messages": [{"role": "system","content": prompt},{"role": "user","content": msg},],"stream":0,"stop": ["<|EOT|>", "<|im_end|>"],"top_p":top_p,"repetition_penalty":1.05,"temperature":temperature,# 此参数用以强制返回json"response_format": {"type": "json_object"}}for i in range(0, retry_count):try:url = "自己的地址"headers = {"Content-Type": "application/json"}print(f"请求\n{data}")response = requests.post(url,data=json.dumps(data, ensure_ascii=False).encode('utf8'),headers=headers,timeout=300)response.encoding = 'utf-8'print(f"响应\n{response.text}")response_json = response.json()content = response_json["choices"][0]["message"]["content"]return contentexcept Exception:'''模型不停输出导致超时时,修改下面两个参数为默认值'''data["top_p"] = 1.0data["temperature"] = 0.7print(f"报错再次尝试 {i} {traceback.format_exc()}")time.sleep(5)return ""