效果如下:
实现流程:
一、Pyppeteer打开网址
import asyncio
from pyppeteer import launch
import pdb
import random# 启动 Pyppeteer
browser = await launch({'headless': False})
page = await browser.newPage()# 打开登录页面
await page.goto('http://localhost:8080/login.html')
二、调用后台springboot接口,springboot调用验证码ocr识别接口识别,返回识别结果
核心代码如下:
java">public static String getImgWord(String body) {// 【1】请求地址 支持http 和 https 及 WEBSOCKETString host = "https://imgurlocr.market.alicloudapi.com";// 【2】后缀String path = "/urlimages";// 【3】开通服务后 买家中心-查看AppCode,有100次免费String appcode = "xxxxx";// 【4】请求参数,详见文档描述String urlSend = host + path; // 【5】拼接请求链接try {URL url = new URL(urlSend);HttpURLConnection httpURLCon = (HttpURLConnection) url.openConnection();httpURLCon.setRequestMethod("POST");httpURLCon.setRequestProperty("Authorization", "APPCODE " + appcode);// 格式StringBuilder postData = new StringBuilder(body);byte[] postDataBytes = postData.toString().getBytes("UTF-8");httpURLCon.setDoOutput(true);OutputStream out = httpURLCon.getOutputStream();out.write(postDataBytes);out.close();int httpCode = httpURLCon.getResponseCode();if (httpCode == 200) {String json = read(httpURLCon.getInputStream());System.out.println("正常请求计费(其他均不计费)");System.out.println("获取返回的json:");System.out.print(json);return json.substring(json.indexOf("words\":\"")).replace("words\":\"", "").replace("\"}]}", "");} else {Map<String, List<String>> map = httpURLCon.getHeaderFields();String error = map.get("X-Ca-Error-Message").get(0);if (httpCode == 400 && error.equals("Invalid AppCode")) {System.out.println("AppCode错误 ");} else if (httpCode == 400 && error.equals("Invalid Url")) {System.out.println("请求的 Method、Path 或者环境错误");} else if (httpCode == 400 && error.equals("Invalid Param Location")) {System.out.println("参数错误");} else if (httpCode == 403 && error.equals("Unauthorized")) {System.out.println("服务未被授权(或URL和Path不正确)");} else if (httpCode == 403 && error.equals("Quota Exhausted")) {System.out.println("套餐包次数用完 ");} else if (httpCode == 403 && error.equals("Api Market Subscription quota exhausted")) {System.out.println("套餐包次数用完,请续购套餐");} else {System.out.println(httpCode);System.out.println("参数名错误 或 其他错误");System.out.println(error);}return error;}} catch (MalformedURLException e) {System.out.println("URL格式错误");return e.getMessage();} catch (UnknownHostException e) {System.out.println("URL地址错误");return e.getMessage();} catch (Exception e) {// 打开注释查看详细报错异常信息// e.printStackTrace();return e.getMessage();}}/** 读取返回结果*/private static String read(InputStream is) throws IOException {StringBuffer sb = new StringBuffer();BufferedReader br = new BufferedReader(new InputStreamReader(is));String line = null;while ((line = br.readLine()) != null) {line = new String(line.getBytes(), "utf-8");sb.append(line);}br.close();return sb.toString();}
三、将验证码识别结果自动填充到input组件
python"># 执行JavaScript函数并传递参数,等待结果
response_text = await page.evaluate(postFunction, data)
print(response_text)input_verify_code = await page.xpath("//input[@name='verifyCode']")
await input_verify_code[0].type(response_text, {'delay': random.randint(100, 151) - 50})input_username = await page.xpath("//input[@name='username']")
await input_username[0].type('admin', {'delay': random.randint(100, 151) - 50})input_password = await page.xpath("//input[@name='password']")
await input_password[0].type('123456', {'delay': random.randint(100, 151) - 50})
四、自动登录
python"># 自动点击"立即登录"按钮
button = await page.xpath('//button[@type="submit"]')# 如果找到了button,则执行回车操作
if button:await button[0].press('Enter')
python完整代码:
python">import asyncio
from pyppeteer import launch
import pdb
import randomasync def main():# 启动 Pyppeteerbrowser = await launch({'headless': False})page = await browser.newPage()# 打开登录页面await page.goto('http://localhost:8080/login.html')# 等待await asyncio.sleep(5)# 获取验证码图片组件img = await page.xpath("//*[@id='vCode']")# 获取img的srcsrc = await (await img[0].getProperty('src')).jsonValue()# 定义ajax post请求函数postFunction = """(data) => {// 这里使用fetch API发起POST请求return fetch('http://localhost:8080/getImgWord', {method: 'POST',headers: {'Content-Type': 'application/json',},body: JSON.stringify(data)}).then(response => response.text())}"""# 传递参数给JavaScript函数data = {'img': src}# 执行JavaScript函数并传递参数,等待结果response_text = await page.evaluate(postFunction, data)print(response_text)input_verify_code = await page.xpath("//input[@name='verifyCode']")await input_verify_code[0].type(response_text, {'delay': random.randint(100, 151) - 50})input_username = await page.xpath("//input[@name='username']")await input_username[0].type('admin', {'delay': random.randint(100, 151) - 50})input_password = await page.xpath("//input[@name='password']")await input_password[0].type('123456', {'delay': random.randint(100, 151) - 50})# 自动点击"立即登录"按钮button = await page.xpath('//button[@type="submit"]')# 如果找到了button,则执行回车操作if button:await button[0].press('Enter')# 关闭浏览器# await browser.close() # 运行爬虫
asyncio.get_event_loop().run_until_complete(main())
完整资源包:
https://download.csdn.net/download/svygh123/89254844