人人都笑金角,人人都是金角
推荐文章:
1、https://playwright.dev/python/docs/api/class-playwright //官方文档
2、https://cuiqingcai.com/36045.html //崔庆才教程
3、https://github.com/qqq732004709/ //实战参考
4、https://www.cnblogs.com/carl-/p/15761861.html //实战参考
5、https://www.cnblogs.com/james-wangx/p/16106304.html //实战参考
案例一:tx滑块(playwright)
目标网站:aHR0cHM6Ly93d3cudXJidGl4LmhrL2xvZ2lu
1、创建Chromium实例(如果不设置为 False,默认是无头模式启动浏览器)
async with async_playwright() as p:browser = await p.chromium.launch(headless=False, args=['--start-maximized'])
2、最大化窗口
context = await browser.new_context(viewport={"width": 1920, "height": 1080}, no_viewport=True)
3、新建标签页
page = await context.new_page()
4、加载过检测js
await page.add_init_script(js) #stealth.min.js
5、监听response事件
async def on_response(response):if '/cap_union_new_getcapbysig' in response.url and response.status == 200:#对背景图以及滑块图进行拦截然后保存if 'img_index=1' in response.url:with open("bg_picture.jpg", "wb") as f:f.write(requests.get(response.url).content)elif 'img_index=0' in response.url:with open("cut_picture.png", "wb") as f:f.write(requests.get(response.url).content)print("response.url:", response.url)if 'cap_union_new_verify' in response.url and response.status == 200:#滑块通过后获取参数result = await response.text()print("response.url:", response.url,result)page.on('response',on_response)
6、打开网页、触发滑块
await page.goto('aHR0cHM6Ly93d3cudXJidGl4LmhrL2xvZ2lu')
await page.wait_for_timeout(1500)
await page.click('xpath=//*[@id="root"]/div/div[3]/div/div/div[5]/div/div')
await page.wait_for_timeout(500)
await page.click('xpath=//*[@id="root"]/div/div[3]/div/div/div[8]/div[2]/div')
7、识别坐标
def get_gap_offset():"""识别坐标,滑块的图片需要切割"""det = ddddocr.DdddOcr(det=False, ocr=False, show_ad=False)img = Image.open('cut_picture.png')region = img.crop((160, 508, 243, 595)) #region.save(f'cut_picture.png')with open('bg_picture.jpg', 'rb') as f:target_bytes = f.read()with open('cut_picture.png', 'rb') as f:background_bytes = f.read()res = det.slide_match(target_bytes, background_bytes, simple_target=True)print("识别到的坐标位置:", res)distance = int(res['target'][0])return distance
8、找到滑动起始点,并滑动
async def move_down(page):#定位iframenew_frame = page.frame_locator('iframe[id="tcaptcha_iframe_dy"]')#定位起始点move_tag = new_frame.locator('xpath=//*[@id="tcOperation"]/div[6]')#找到这个元素在当前页面的坐标box = await move_tag.bounding_box() print("目前点击的位置",box)# 讲鼠标移动到到其实元素的中心await page.mouse.move(box["x"] + box["width"] / 2, box["y"] + box["height"] / 2) # 按下鼠标await page.mouse.down() #延时1.2sawait page.wait_for_timeout(1200)# 这里获取到x坐标中心点位置x = box["x"] + box["width"] / 2 #识别到坐标后与网页上的比例distance = int(get_gap_offset()/1.97)-30#轨迹move_distance = get_track_list(distance)print("最终坐标:",distance,"轨迹:",move_distance)for i in move_distance:x += iawait page.mouse.move(x, box["y"])await page.mouse.up()
9、关闭窗口
await browser.close()
至此tx滑块的分析就结束了
然后我还写了一版selenium的,相比于playwright就会麻烦一些
案例一:tx滑块(selenium)
对于我们日常使用而言两者主要区别在于:
1、selenium只支持同步,playwright可以支持异步的
2、操作iframe,selenium来回切换iframe非常麻烦,而playwright只需要定位元素即可
2、在监听请求这一点上,playwright的page.on非常好用,而selenium一般是借助browsermobproxy通过代理的方式进行拦截使用方式:(1)https://github.com/lightbody/browsermob-proxy/releases,下载并解压(2)安装证书,参考链接https://www.bilibili.com/read/cv21263644/(3)调用方式server = Server('browsermob-proxy-2.1.4/bin/browsermob-proxy')server.start()proxy = server.create_proxy(params={'trustAllServers':'true'})option = ChromeOptions()option.add_argument('--proxy-server={0}'.format(self.proxy.proxy))driver = webdriver.Chrome(options=option)
这里就不细致讲解了,主要代码如下
class Tencent():def __init__(self):server = Server('browsermob-proxy-2.1.4/bin/browsermob-proxy')server.start()self.proxy = server.create_proxy(params={'trustAllServers':'true'})self.url = 'aHR0cHM6Ly93d3cudXJidGl4LmhrL2xvZ2lu'option = ChromeOptions()option.add_experimental_option('excludeSwitches', ['enable-automation'])option.add_experimental_option('useAutomationExtension', False)option.add_argument('--proxy-server={0}'.format(self.proxy.proxy))self.proxy.new_har(options={'captureContent': True,'captureHeaders': True})self.driver = webdriver.Chrome(options=option)self.driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {'source': 'Object.defineProperty(navigator,"webdriver",{get: () => undefined})'})with open('stealth.min.js') as f:js = f.read()self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": js})self.driver.maximize_window()self.det = ddddocr.DdddOcr(det=False, ocr=False, show_ad=False)self.headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'}def index(self):"""主流程"""self.driver.get(self.url)time.sleep(5)print("正在打开网页~~~")self.driver.find_element(by=By.XPATH, value=f'//*[@id="root"]/div/div[3]/div/div/div[5]/div/div').click()time.sleep(1)self.driver.find_element(by=By.XPATH, value=f'//*[@id="root"]/div/div[3]/div/div/div[8]/div[2]/div').click()time.sleep(5)self.driver.switch_to.frame('tcaptcha_iframe_dy')bg_style = self.driver.find_element('id','slideBg').get_attribute("style")cut_style = self.driver.find_element(by=By.XPATH, value=f'//*[@id="tcOperation"]/div[8]').get_attribute("style")bg_url = re.findall('url\("(.*?)"\)',str(bg_style))[0]cut_url = re.findall('url\("(.*?)"\)', str(cut_style))[0]print("获取到背景图片url:",bg_url)print("获取到滑块图片url:",cut_url)with open("bg_picture.jpg", "wb") as f:f.write(requests.get(bg_url).content)with open("cut_picture.png", "wb") as f:f.write(requests.get(cut_url).content)def get_gap_offset(self):"""识别坐标"""img = Image.open('cut_picture.png')region = img.crop((160, 508, 243, 595)) #region.save(f'cut_picture.png')with open('bg_picture.jpg', 'rb') as f:target_bytes = f.read()with open('cut_picture.png', 'rb') as f:background_bytes = f.read()res = self.det.slide_match(target_bytes, background_bytes, simple_target=True)print("识别到的坐标位置:",res)distance = int(res['target'][0])return distancedef get_track(self, offset):'''计算滑块的移动轨迹'''offset -= 30 # 滑块并不是从0开始移动,有一个初始值a = offset / 4track = [a, a, a, a]return trackdef shake_mouse(self):"""模拟人手释放鼠标抖动"""ActionChains(self.driver).move_by_offset(xoffset=-2, yoffset=0).perform()ActionChains(self.driver).move_by_offset(xoffset=2, yoffset=0).perform()def operate_slider(self, track):"""拖动滑块:param track: 运动轨迹"""# 定位到拖动按钮slider_bt = self.driver.find_element(by=By.XPATH,value ='//*[@id="tcOperation"]/div[6]')# 点击拖动按钮不放ActionChains(self.driver).click_and_hold(slider_bt).perform()# 按正向轨迹移动for i in track:ActionChains(self.driver).move_by_offset(xoffset=i, yoffset=0).perform()time.sleep(random.random() / 100) # 每移动一次随机停顿0-1/100秒之间骗过了极验,通过率很高time.sleep(random.random())# 按逆向轨迹移动back_tracks = [-1, -0.5, -1]for i in back_tracks:time.sleep(random.random() / 100)ActionChains(self.driver).move_by_offset(xoffset=i, yoffset=0).perform()# 模拟人手抖动self.shake_mouse()time.sleep(random.random())# 松开滑块按钮ActionChains(self.driver).release().perform()time.sleep(2)def login(self):'''实现主要的登陆逻辑'''self.index()distance = self.get_gap_offset()distance = int(distance/1.97)track = self.get_track(distance)self.operate_slider(track)result = self.proxy.harfor entry in result['log']['entries']:if entry['request']['url'] == 'https://t.captcha.qq.com/cap_union_new_verify':print(entry['request']['url'],entry['response']['content'])print(entry['response']['content']['text'])
案例二:阿里225(playwright)
目标网站:aHR0cHM6Ly9wYXNzcG9ydC5kYW1haS5jbi9sb2dpbg==
1、前面的初始化流程
async with async_playwright() as p:browser = await p.chromium.launch(headless=False, args=['--start-maximized'])context = await browser.new_context(viewport={"width": 1920, "height": 1080}, no_viewport=True)context.set_default_timeout(8000)page = await context.new_page()await page.add_init_script(js)print("打开网页~~~")await page.goto('aHR0cHM6Ly9wYXNzcG9ydC5kYW1haS5jbi9sb2dpbg==')await page.wait_for_timeout(1000)page.on('response', on_response)
2、输入账号密码
#这里需要注意这个iframe,前面的iframe和后面出滑块之后的iframe属于包含关系new_frame = page.frame_locator('iframe[id="alibaba-login-box"]')await page.wait_for_timeout(1000)await new_frame.locator('#fm-login-id').fill("正确的手机号码")await page.wait_for_timeout(1000)await new_frame.locator('#fm-login-password').fill("错误的密码") await page.wait_for_timeout(1000)await new_frame.get_by_role("button", name="登录").click()await page.wait_for_timeout(1000)
3、强制弹出滑块,并判断
#这里为了让它出滑块要先输出错误的密码,然后一直click,直到出滑块为止
while True:try:new_frame2 = new_frame.frame_locator('iframe[id="baxia-dialog-content"]')move_tag = new_frame2.locator('xpath=//*[@id="nc_1_n1z"]')number = await move_tag.count()if number>=1:box = await move_tag.bounding_box()print("目前点击的位置", box)breakelse:print(f"没出滑块,重新点击")await page.wait_for_timeout(1000)await new_frame.get_by_role("button", name="登录").click()except:await new_frame.get_by_role("button", name="登录").click()
4、定位以及滑动
async def move_down(page,box):await page.mouse.move(box["x"] + box["width"] / 2, box["y"] + box["height"] / 2)await page.mouse.down() # 按下鼠标await page.wait_for_timeout(1200)x = box["x"] + box["width"] / 2 # 这里获取到x坐标中心点位置move_distance = get_track_list(265)print("轨迹:",move_distance)for i in move_distance:x += iawait page.mouse.move(x, box["y"])await page.mouse.up()await page.wait_for_timeout(500)
至此ali滑块的分析就结束了
当脚下的路走起来比以前轻松了,是不是该问自己是否在走下坡路了,我也不知道呢