1.12306首页
url:https://www.12306.cn/index/index.html
用到的包类
from selenium import webdriver
from selenium.webdriver import ActionChains # 用于控制鼠标滑动
from chaojiying import Chaojiying_Client # 超级鹰验证码识别
import base64
import re
from xml import etree
import time
import config # 账号密码配置文件
from selenium.webdriver.common.keys import Keys # 模拟点击
import requests
import json
# self.url = 'https://www.12306.cn/index/index.html'
self.driver = webdriver.Chrome()
self.driver.get(url = self.url)
2.进入登录页面
点击登录进入登录页面
self.driver.find_element_by_xpath('//*[@id="J-header-login"]/a[1]').click()
选择账号密码登录
self.driver.find_element_by_xpath('/html/body/div[2]/div[2]/ul/li[2]/a').click()
输入账号密码
self.driver.find_element_by_id('J-userName').send_keys(self.tk_user)
self.driver.find_element_by_id('J-password').send_keys(self.tk_pw)
3.处理图片验证码
超级鹰处理验证码
验证码的识别我采用的是超级鹰第三方软件识别
准备工作
进入超级鹰,完成账号密码注册
进入用户中心,并购买题分
(一般1-10块即可)
生成软件ID
并在开发文档下载相应语言的开发案例,这里我使用python,将文件解压到项目目录中
chaojiying.py
#!/usr/bin/env python
# coding:utf-8import requests
from hashlib import md5class Chaojiying_Client(object):def __init__(self, username, password, soft_id):self.username = usernamepassword = password.encode('utf8')self.password = md5(password).hexdigest()self.soft_id = soft_idself.base_params = {'user': self.username,'pass2': self.password,'softid': self.soft_id,}self.headers = {'Connection': 'Keep-Alive','User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',}def PostPic(self, im, codetype):"""im: 图片字节codetype: 题目类型 参考 http://www.chaojiying.com/price.html"""params = {'codetype': codetype,}params.update(self.base_params)files = {'userfile': ('ccc.jpg', im)}r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)return r.json()def ReportError(self, im_id):"""im_id:报错题目的图片ID"""params = {'id': im_id,}params.update(self.base_params)r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)return r.json()if __name__ == '__main__':chaojiying = Chaojiying_Client('超级鹰用户名', '超级鹰密码', '软件ID') #用户中心>>软件ID 生成一个替换 96001im = open('a.jpg', 'rb').read() #本地图片文件路径 来替换 a.jpg 有时WIN系统须要//print(chaojiying.PostPic(im, 1902)['pic_str']) #1902 验证码类型 官方网站>>价格体系 3.4+版 print 后要加()
实现原理
超级鹰识别验证码类型
原理就是我们将验证码图片截取下来保存到指定的位置上,在保存之后,我们将保存的图片信息传递给第三方软件,在他识别之后会返回给我们x,y坐标,这些x,y坐标就是图片需要点击的x,y坐标,有一点需要注意,这些x,y是相对坐标,是相对于图片左上角(0,0)坐标而言的。
具体验证码坐标获取流程
首先对12306登陆首页全屏截图,再定位到验证码图片,借助超级鹰进行识别。注意到12306验证码比较复杂,可以根据超级鹰价格体系中可以选取对应的验证码类型,本文选择9004。
#save_screenshot:全屏截屏
self.driver.save_screenshot('all.png')
sleep(1)
#定位验证码图片位置并截图
img = self.driver.find_element_by_xpath('//*[@id="J-loginImg"]')
img.screenshot('./code.png')
#将验证码图片提交给超级鹰进行识别
chaojiying = Chaojiying_Client('超级鹰用户名', '超级鹰密码', '软件ID')
im = open('./code.png', 'rb').read()
#id = chaojiying.PostPic(im,9004)['pic_id']
#获取到验证码图片的相对位置
# 9004是我们发送的验证码的格式,然后这个指令的返回值是一个字典,在键为['pic_str']的值的时候,保存地视返回的需要点击的坐标
result = chaojiying.PostPic(im, 9004)['pic_str']
result返回值类似:240,68|121,84。下面我们需要对坐标进行分割存储到列表中,类似:[[240,68],[128,84]]。同时注意到这个坐标是相对坐标,所以需要使用动作链ActionChains.move_to_element_with_offset进行相对位置的定位,再选中正确答案。
position_list = []
# 若存在多个点
if '|' in position:position_list = [i.split(',') for i in result.split('|')]
# 若只有一个点
else:position_list.append(result.split(','))
print(position_list) # [['107', '140'], ['253', '138']]# 获取坐标后,使用动作链ActionChains.move_to_element_with_offset进行相对位置的定位,再选中正确答案
for l in position_list:# 注意转换为int()x = int(l[0])y = int(l[-1])# perform() 为执行ActionChains(self.driver).move_to_element_with_offset(img, x, y).click().perform()time.sleep(1)
选择好验证码后,点击登录
self.driver.find_element_by_xpath('//*[@id="J-login"]').click()
但是在运行过程中,你会发现有时候这个第三方软件并不能特别准确的识别验证码,有时也会出错,或者在图片保存的时候,有时也会出现图片没有正确保存的情况,对于这种情况,我们就想设置一个循环,将这个操作放到循环里面,只有验证码正确识别之后,我们才给它进行下一步
while True:try:# 图片验证码# 将页面截屏self.driver.save_screenshot('./all_screen.png')# 截取验证码部分的图img = self.driver.find_element_by_id('J-loginImgArea')img.screenshot('./code1.png')position = self.process_cjy('./code1.png')# print(position) # 107,140|253,138# position为每个答案点的x,y坐标,为x1,y1|x2,y2|x3,y3,若只有一个点,为x1,y1,处理为[[x1,y1],[x2,y2]...]position_list = []if '|' in position:position_list = [i.split(',') for i in position.split('|')]else:position_list.append(position.split(','))print(position_list) # [['107', '140'], ['253', '138']]# 获取坐标后,使用动作链ActionChains.move_to_element_with_offset进行相对位置的定位,再选中正确答案for l in position_list:x = int(l[0])y = int(l[-1])# perform() 为执行ActionChains(self.driver).move_to_element_with_offset(img, x, y).click().perform()time.sleep(1)# 点击登录按钮self.driver.find_element_by_xpath('//*[@id="J-login"]').click()time.sleep(1)except:print('验证成功')breakdef process_cjy(self, img_path):# 使用超级鹰处理验证码# 登录超级鹰cjy = Chaojiying_Client(self.cjy_user, self.cjy_pw, self.cjy_id)with open(img_path, 'rb') as f:img = f.read()# 9004是我们发送的验证码的格式,然后这个指令的返回值是一个字典,在键为['pic_str']的值的时候,保存地视返回的需要点击的坐标# result为每个点的x,y坐标,为x1,y1|x2,y2|x3,y3return cjy.PostPic(img, 9004)['pic_str']
4.处理滑动验证码
图片验证码验证成功后,点击登录,出现滑动验证码
我们定位到滑块这里,然后我们会发现在二维码识别之前和识别之后,这个滑块代码其实一直都在html中,只不过没有正确识别之前,里面的属性display的值为none,而正确识别之后呢,display属性就没了。所以其实我们只需要判断这个里面的display属性值是不是none,我们就可以判断有没有正确识别成功了
正在识别
识别时候,滑块的相对位置移动300即可完成解锁
# 选中滑块
span = self.driver.find_element_by_xpath('//*[contains(@class,"nc_iconfont btn_slid")]')# 即动作链
action = ActionChains(self.driver)
# perform都表示执行的意思,
# 这个其实就是长按点击目标元素
action.click_and_hold(span).perform()
# 这个其实就是拖动目标元素相对偏移量x,y
action.move_to_element_with_offset(span , 400, 0).perform()
# 这个其实就是释放刚才长按的鼠标
action.release().perform()
验证失败
但是也存在验证失败的情况
可以发现两个提示信息的xpath并不相同
# 若果滑动出错,提示信息
info = self.driver.find_element_by_xpath('//*[@id="J-slide-passcode"]/div/div/div/span').text
# 若滑动未出错,提示信息
info = self.driver.find_element_by_xpath('//*[@id="J-slide-passcode"]/div/div/div/span').text
但是12306会识别是否为爬虫,导致滑动正确却验证失败,解决办法如下:
self.driver = webdriver.Chrome()
# 避免被检测,导致滑动失败
self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": """Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"""
})
实现代码
# 滑动验证码
while True:
try:# 若果滑动出错,获取提示信息,并刷新info = self.driver.find_element_by_xpath('//*[@id="J-slide-passcode"]/div/div/div/span').textprint(info)if info == '哎呀,出错了,点击刷新再来一次':# 若滑动出错,则点击‘刷新’self.driver.find_element_by_xpath('//*[@id="J-slide-passcode"]/div/span/a').click()time.sleep(1)# 选中滑块span = self.driver.find_element_by_xpath('//*[contains(@class,"nc_iconfont btn_slid")]')action = ActionChains(self.driver)# 点击长按指定滑块并滑动action.click_and_hold(span).perform()# 滑动到left300 便可解锁action.drag_and_drop_by_offset(span, 400, 0).perform()# 释放鼠标action.release()time.sleep(7)else:# 若滑动未出错误提示,正常滑动,获取滑块info = self.driver.find_element_by_xpath('//*[@id="J-slide-passcode"]/div/div/div/span').textprint(info)time.sleep(1)# 选中滑块span = self.driver.find_element_by_xpath('//*[contains(@class,"nc_iconfont btn_slid")]')action = ActionChains(self.driver)# 点击长按指定滑块并滑动action.click_and_hold(span).perform()# 滑动到left300 便可解锁action.drag_and_drop_by_offset(span, 400, 0).perform()# 释放鼠标action.release()time.sleep(7)
except:# 判断当前页面url 发生跳转则说明登录成功print('登录成功')print(self.driver.current_url) # https://kyfw.12306.cn/otn/view/index.htmlbreak
5.完整代码
config.py
# 12306 账号 密码
TICKET_USER = ''
TICKET_PASSWORD = ''# 超级鹰账号密码软件id
CHAOJIYING_USER = ''
CHAOJIYING_PASSWORD = ''
CHAOJIYING_ID = ''
login.py
from selenium import webdriver
from selenium.webdriver import ActionChains # 用于控制鼠标滑动
from chaojiying import Chaojiying_Client # 超级鹰验证码识别
import base64
import re
from xml import etree
import time
import config # 账号密码配置文件class Login(object):def __init__(self):self.tk_user = config.TICKET_USERself.tk_pw = config.TICKET_PASSWORDself.cjy_user = config.CHAOJIYING_USERself.cjy_pw = config.CHAOJIYING_PASSWORDself.cjy_id = config.CHAOJIYING_ID # 软件idself.url = 'https://www.12306.cn/index/'self.driver = webdriver.Chrome()# 避免被检测,导致滑动失败self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": """Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"""})def login(self):# 点击首页的登录按钮self.driver.find_element_by_xpath('//*[@id="J-header-login"]/a[1]').click()time.sleep(1)# 跳转后,输入账号,密码self.driver.find_element_by_xpath('/html/body/div[2]/div[2]/ul/li[2]/a').click()# 账号密码self.driver.find_element_by_id('J-userName').send_keys(self.tk_user)self.driver.find_element_by_id('J-password').send_keys(self.tk_pw)while True:try:# 图片验证码# 将页面截屏self.driver.save_screenshot('./all_screen.png')# 截取验证码部分的图img = self.driver.find_element_by_id('J-loginImgArea')img.screenshot('./code1.png')position = self.process_cjy('./code1.png')# print(position) # 107,140|253,138# position为每个答案点的x,y坐标,为x1,y1|x2,y2|x3,y3,若只有一个点,为x1,y1,处理为[[x1,y1],[x2,y2]...]position_list = []if '|' in position:position_list = [i.split(',') for i in position.split('|')]else:position_list.append(position.split(','))print(position_list) # [['107', '140'], ['253', '138']]# 获取坐标后,使用动作链ActionChains.move_to_element_with_offset进行相对位置的定位,再选中正确答案for l in position_list:x = int(l[0])y = int(l[-1])# perform() 为执行ActionChains(self.driver).move_to_element_with_offset(img, x, y).click().perform()time.sleep(1)# 点击登录按钮self.driver.find_element_by_xpath('//*[@id="J-login"]').click()time.sleep(1)except:print('验证成功')break# 滑动验证码while True:try:# 若果滑动出错,获取提示信息,并刷新info = self.driver.find_element_by_xpath('//*[@id="J-slide-passcode"]/div/div/div/span').textprint(info)if info == '哎呀,出错了,点击刷新再来一次':# 若滑动出错,则点击‘刷新’self.driver.find_element_by_xpath('//*[@id="J-slide-passcode"]/div/span/a').click()time.sleep(1)# 选中滑块span = self.driver.find_element_by_xpath('//*[contains(@class,"nc_iconfont btn_slid")]')action = ActionChains(self.driver)# 点击长按指定滑块并滑动action.click_and_hold(span).perform()# 滑动到left300 便可解锁action.drag_and_drop_by_offset(span, 400, 0).perform()# 释放鼠标action.release()time.sleep(7)else:# 若滑动未出错误提示,正常滑动,获取滑块info = self.driver.find_element_by_xpath('//*[@id="J-slide-passcode"]/div/div/div/span').textprint(info)time.sleep(1)# 选中滑块span = self.driver.find_element_by_xpath('//*[contains(@class,"nc_iconfont btn_slid")]')action = ActionChains(self.driver)# 点击长按指定滑块并滑动action.click_and_hold(span).perform()# 滑动到left300 便可解锁action.drag_and_drop_by_offset(span, 400, 0).perform()# 释放鼠标action.release()time.sleep(7)except:# 判断当前页面url 发生跳转则说明登录成功print('登录成功')print(self.driver.current_url) # https://kyfw.12306.cn/otn/view/index.htmlbreakdef process_cjy(self, img_path):# 使用超级鹰处理验证码# 登录超级鹰cjy = Chaojiying_Client(self.cjy_user, self.cjy_pw, self.cjy_id)with open(img_path, 'rb') as f:img = f.read()# 9004是我们发送的验证码的格式,然后这个指令的返回值是一个字典,在键为['pic_str']的值的时候,保存地视返回的需要点击的坐标# result为每个点的x,y坐标,为x1,y1|x2,y2|x3,y3return cjy.PostPic(img, 9004)['pic_str']def run(self):self.driver.get(url = self.url)time.sleep(1)self.login()time.sleep(3)self.driver.quit()if __name__ == '__main__':login = Login()login.run()
6.查询车票
登录成功后需要点击弹窗的确定,才可进行下一步
# 点击登录成功后的确认窗口
self.driver.find_element_by_xpath('//*[@class="dzp-confirm"]/div/a').click()
当需要购票时候,点击导航窗格,隐藏菜单显示后,才可以通过selenium进行点击
参考
# 选择单程购票
# 导航栏隐藏窗口处理,需要使用动作链,获取隐藏菜单后才可以进行点击
nav_bar = self.driver.find_element_by_xpath('//*[@id="J-chepiao"]')
ActionChains(self.driver).move_to_element(nav_bar).perform()
self.driver.find_element_by_xpath('//*[contains(text(), "单程")]').click()
进入单程购票页面,点击确定
# 跳转来到订票页面,点击弹出提示窗口的确定
self.driver.find_element_by_xpath('//*[@id="qd_closeDefaultWarningWindowDialog_id"]').click()
输入出发地、目的地、日期等进行查询
参考
# 开始查询车票,默认为单程-出发地
# 在出发地、目的地、出发日等窗口输入内容,需要先点击输入框
start_city = input('请输入出发城市:')
self.driver.find_element_by_xpath('//*[@id="fromStationText"]').click()
self.driver.find_element_by_xpath('//*[@id="fromStationText"]').send_keys(start_city)
# 这里输入完成后,需要send_keys(Keys.ENTER)模拟键盘回车,这里和之前上号密码的不一样,账号密码输入不要回车就可以,不回车里面内容不生效
self.driver.find_element_by_xpath('//*[@id="fromStationText"]').send_keys(Keys.ENTER)# 目的地
end_city = input('请输入到达城市:')
self.driver.find_element_by_xpath('//*[@id="toStationText"]').click()
self.driver.find_element_by_xpath('//*[@id="toStationText"]').send_keys(end_city)
self.driver.find_element_by_xpath('//*[@id="toStationText"]').send_keys(Keys.ENTER)# 出发日期
# 将日期的只读属性去掉便于下面输入日期
js = """document.getElementById('train_date').removeAttribute('readonly')"""
self.driver.execute_script(js)
self.driver.find_element_by_id("train_date").clear() # 清空默认日期值
train_date = input('请输入出发日期,如1999-07-09形式:')
self.driver.find_element_by_id("train_date").send_keys(train_date)
self.driver.find_element_by_id("train_date").send_keys(Keys.ENTER)
# 隐藏日期下方的详细日期列表
js = """document.querySelector('body > div.cal-wrap').style.display='none'"""
self.driver.execute_script(js)# 选择查询车次类型 - 默认高铁
self.driver.find_element_by_xpath('//*[@id="_ul_station_train_code"]/li[1]').click()# 点击查询
self.driver.find_element_by_xpath('//*[@id="query_ticket"]').click()
time.sleep(2)
开始查询
参考
车票信息抓包
票价抓包
def search_ticket_info(self):# 点击登录成功后的确认窗口self.driver.find_element_by_xpath('//*[@class="dzp-confirm"]/div/a').click()# 选择单程购票# 导航栏隐藏窗口处理,需要使用动作链,获取隐藏菜单后才可以进行点击nav_bar = self.driver.find_element_by_xpath('//*[@id="J-chepiao"]')ActionChains(self.driver).move_to_element(nav_bar).perform()self.driver.find_element_by_xpath('//*[contains(text(), "单程")]').click()# 跳转来到订票页面,点击弹出提示窗口的确定self.driver.find_element_by_xpath('//*[@id="qd_closeDefaultWarningWindowDialog_id"]').click()# 开始查询车票,默认为单程-出发地# 获取城市名和城市缩写名列表city_name, city_eg_name = self.get_city_name_id()while True:# 在出发地、目的地、出发日等窗口输入内容,需要先点击输入框1start_city = input('请输入出发城市:')self.driver.find_element_by_xpath('//*[@id="fromStationText"]').click()self.driver.find_element_by_xpath('//*[@id="fromStationText"]').send_keys(start_city)# 这里输入完成后,需要send_keys(Keys.ENTER)模拟键盘回车,这里和之前上号密码的不一样,账号密码输入不要回车就可以,不回车里面内容不生效self.driver.find_element_by_xpath('//*[@id="fromStationText"]').send_keys(Keys.ENTER)# 目的地end_city = input('请输入到达城市:')self.driver.find_element_by_xpath('//*[@id="toStationText"]').click()self.driver.find_element_by_xpath('//*[@id="toStationText"]').send_keys(end_city)self.driver.find_element_by_xpath('//*[@id="toStationText"]').send_keys(Keys.ENTER)train_date = input('请输入出发日期,如1999-09-09形式:')check_result = self.check_info(city_name, start_city, end_city, train_date)if not check_result:print('输入有误,请重新输入!')else:print('验证通过')break# 出发日期# 将日期的只读属性去掉便于下面输入日期js = """document.getElementById('train_date').removeAttribute('readonly')"""self.driver.execute_script(js)self.driver.find_element_by_id("train_date").clear() # 清空默认日期值self.driver.find_element_by_id("train_date").send_keys(train_date)self.driver.find_element_by_id("train_date").send_keys(Keys.ENTER)# 隐藏日期下方的详细日期列表js = """document.querySelector('body > div.cal-wrap').style.display='none'"""self.driver.execute_script(js)# 选择查询车次类型 - 默认高铁self.driver.find_element_by_xpath('//*[@id="_ul_station_train_code"]/li[1]').click()# 点击查询self.driver.find_element_by_xpath('//*[@id="query_ticket"]').click()time.sleep(2)# 保存查询信息result = []train_info = self.driver.find_element_by_xpath('//*[@id="sear-result"]/p[1]/strong[1]').text + ' 共计' + str(len(self.driver.find_elements_by_xpath('//*[@id="queryLeftTable"]/tr'))) + '车次'# 用于拼接'https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=2021-07-09&leftTicketDTO.from_station=SHH&leftTicketDTO.to_station=HZH&purpose_codes=ADULT' 获得查询车票结果start_city = self.driver.find_element_by_xpath('//*[@id="queryLeftTable"]/tr[1]/td/div/div[2]/strong[1]').textend_city = self.driver.find_element_by_xpath('//*[@id="queryLeftTable"]/tr[1]/td/div/div[2]/strong[2]').textstart_city_eg_name = city_eg_name[city_name.index(start_city)]end_city_eg_name = city_eg_name[city_name.index(end_city)]query_url = 'https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date={}&leftTicketDTO.from_station={}&leftTicketDTO.to_station={}&purpose_codes=ADULT'.format(train_date, start_city_eg_name, end_city_eg_name)print(start_city_eg_name, end_city_eg_name)print(query_url)# url = 'https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=2021-07-09&leftTicketDTO.from_station=AOH&leftTicketDTO.to_station=HGH&purpose_codes=ADULT'# response = requests.get(url).content.decode()# print(response) 使用request 会被重定向# 使用selenium 新开窗口,# 注意:这里必须加""js = 'window.open("{}");'.format(query_url)self.driver.execute_script(js)# 获取当前窗口句柄handles = self.driver.window_handles# print(handles) # ['CDwindow-09080EA2328F0CB9CCCBB02C22A02F74', 'CDwindow-27C3895DC5E7A3D638CAF68E3835062C']# 切换窗口self.driver.switch_to.window(handles[-1])query_res = self.driver.find_element_by_xpath('/html/body/pre').text# 关闭窗口self.driver.close()# 重新获取句柄并切换回当前窗口handles = self.driver.window_handlesself.driver.switch_to.window(handles[-1])# 获取信息query_json = json.loads(query_res)ticket_info = query_json['data']['result']for info in ticket_info:item = {}temp = info.split('|')item['车次信息'] = train_infoitem['日期'] = train_dateitem['编号'] = temp[2]item['车次'] = temp[3]item['首发站'] = temp[4]item['终点站'] = temp[5]item['上车站'] = query_json['data']['map'][temp[6]] # 车站与它对应编号的映射item['下车站'] = query_json['data']['map'][temp[7]]item['出发时间'] = temp[8]item['到达时间'] = temp[9]item['历时'] = temp[10]item['是否可预订'] = temp[11]item['上车站编号'] = temp[16]item['下车站编号'] = temp[17]item['高级软卧'] = [temp[21]]item['软卧一等卧'] = [temp[23]]item['软座'] = [temp[24]]item['无座'] = [temp[26]]item['硬卧二等卧'] = [temp[28]]item['硬座'] = [temp[29]]item['二等座'] = [temp[30]]item['一等座'] = [temp[31]]item['商务座特等座'] = [temp[32]]item['动卧'] = [temp[33]]item['其他'] = '--'# 用于拼接价格url的信息,如 train_no、from_station_no、to_station_no、seat_types、train_dateitem['train_no'] = temp[2]item['from_station_no'] = temp[16]item['to_station_no'] = temp[17]item['seat_types'] = temp[35]time.sleep(1)print(item)result.append(item)return result
票价获取
def get_price_info(self, result):for i in range(len(result)):# 获取票价# 拼接https://kyfw.12306.cn/otn/leftTicket/queryTicketPrice?train_no=5l000G178640&from_station_no=01&to_station_no=07&seat_types=OM9&train_date=2021-07-09# print(tr_id_list) # ['ticket', '5l000G754171', '01', '04']price_url = 'https://kyfw.12306.cn/otn/leftTicket/queryTicketPrice?train_no={}&from_station_no={}&to_station_no={}&seat_types={}&train_date={}'.format(result[i]['train_no'], result[i]['from_station_no'], result[i]['to_station_no'], result[i]['seat_types'], result[i]['日期'])print(price_url)js = 'window.open("{}")'.format(price_url)self.driver.execute_script(js)handles = self.driver.window_handlesself.driver.switch_to.window(handles[-1])price_res = self.driver.find_element_by_xpath('/html/body/pre').textself.driver.close()time.sleep(1)# 切回窗口# 重新获取句柄并切换回当前窗口handles = self.driver.window_handlesself.driver.switch_to.window(handles[-1])# 获取信息price_json = json.loads(price_res)# print(price_json)price_info = price_json['data']# 有些车次没有某种类型的座位,判断是否有该座位seat_types = price_info.keys()if 'A4' in seat_types and result[i]['软卧一等卧'][0] !='':result[i]['软卧一等卧'].append(price_info['A4'])# if '' in seat_types:# item['软座'].append(price_info[''])if 'WZ' in seat_types and result[i]['无座'][0] !='':result[i]['无座'].append(price_info['WZ'])if 'A1' in seat_types and result[i]['硬座'][0] !='':result[i]['硬座'].append(price_info['WZ'])if 'O' in seat_types and result[i]['二等座'][0] !='':result[i]['二等座'].append(price_info['O'])if 'M' in seat_types and result[i]['一等座'][0] !='':result[i]['一等座'].append(price_info['M'])if 'A9' in seat_types and result[i]['商务座特等座'][0] !='':result[i]['商务座特等座'].append(price_info['A9'])if 'F' in seat_types and result[i]['动卧'][0] !='':result[i]['动卧'].append(price_info['F'])if 'A6' in seat_types and result[i]['A6'][0] !='':result[i]['高级软卧'].append(price_info['A6'])if 'A3' in seat_types and result[i]['硬卧二等卧'][0] !='':result[i]['硬卧二等卧'].append(price_info['A3'])time.sleep(3)print(result[i])return result
座位类型对照
A9或P: 商务座;M: 一等座;O: 二等座;A6: 高级软卧;A4: 软卧一等卧; F: 动卧;A3: 硬卧二等卧;A2: 软座;A1: 硬座;WZ: 无座;
7.完整代码
增加了数据保存和输入验证的函数
from selenium import webdriver
from selenium.webdriver import ActionChains # 用于控制鼠标滑动
from chaojiying import Chaojiying_Client # 超级鹰验证码识别
import base64
import re
from xml import etree
import time
import config # 账号密码配置文件
from selenium.webdriver.common.keys import Keys # 模拟点击
import requests
import jsonclass Login(object):def __init__(self):self.tk_user = config.TICKET_USERself.tk_pw = config.TICKET_PASSWORDself.cjy_user = config.CHAOJIYING_USERself.cjy_pw = config.CHAOJIYING_PASSWORDself.cjy_id = config.CHAOJIYING_ID # 软件idself.url = 'https://www.12306.cn/index/'self.driver = webdriver.Chrome()# 避免被检测,导致滑动失败self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {"source": """Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"""})def login(self):# 点击首页的登录按钮self.driver.find_element_by_xpath('//*[@id="J-header-login"]/a[1]').click()time.sleep(1)# 跳转后,输入账号,密码self.driver.find_element_by_xpath('/html/body/div[2]/div[2]/ul/li[2]/a').click()# 账号密码self.driver.find_element_by_id('J-userName').send_keys(self.tk_user)self.driver.find_element_by_id('J-password').send_keys(self.tk_pw)while True:try:# 图片验证码# 将页面截屏self.driver.save_screenshot('./all_screen.png')# 截取验证码部分的图img = self.driver.find_element_by_id('J-loginImgArea')img.screenshot('./code1.png')position = self.process_cjy('./code1.png')# print(position) # 107,140|253,138# position为每个答案点的x,y坐标,为x1,y1|x2,y2|x3,y3,若只有一个点,为x1,y1,处理为[[x1,y1],[x2,y2]...]position_list = []if '|' in position:position_list = [i.split(',') for i in position.split('|')]else:position_list.append(position.split(','))print(position_list) # [['107', '140'], ['253', '138']]# 获取坐标后,使用动作链ActionChains.move_to_element_with_offset进行相对位置的定位,再选中正确答案for l in position_list:x = int(l[0])y = int(l[-1])# perform() 为执行ActionChains(self.driver).move_to_element_with_offset(img, x, y).click().perform()time.sleep(1)# 点击登录按钮self.driver.find_element_by_xpath('//*[@id="J-login"]').click()time.sleep(1)except:print('验证成功')break# 滑动验证码while True:try:# 若果滑动出错,获取提示信息,并刷新info = self.driver.find_element_by_xpath('//*[@id="J-slide-passcode"]/div/div/div/span').textprint(info)if info == '哎呀,出错了,点击刷新再来一次':# 若滑动出错,则点击‘刷新’self.driver.find_element_by_xpath('//*[@id="J-slide-passcode"]/div/span/a').click()time.sleep(1)# 选中滑块span = self.driver.find_element_by_xpath('//*[contains(@class,"nc_iconfont btn_slid")]')action = ActionChains(self.driver)# 点击长按指定滑块并滑动action.click_and_hold(span).perform()# 滑动到left300 便可解锁action.drag_and_drop_by_offset(span, 400, 0).perform()# 释放鼠标action.release()time.sleep(7)else:# 若滑动未出错误提示,正常滑动,获取滑块info = self.driver.find_element_by_xpath('//*[@id="J-slide-passcode"]/div/div/div/span').textprint(info)time.sleep(1)# 选中滑块span = self.driver.find_element_by_xpath('//*[contains(@class,"nc_iconfont btn_slid")]')action = ActionChains(self.driver)# 点击长按指定滑块并滑动action.click_and_hold(span).perform()# 滑动到left300 便可解锁action.drag_and_drop_by_offset(span, 400, 0).perform()# 释放鼠标action.release()time.sleep(7)except:# 判断当前页面url 发生跳转则说明登录成功print('登录成功')print(self.driver.current_url) # https://kyfw.12306.cn/otn/view/index.htmlbreakdef process_cjy(self, img_path):# 使用超级鹰处理验证码# 登录超级鹰cjy = Chaojiying_Client(self.cjy_user, self.cjy_pw, self.cjy_id)with open(img_path, 'rb') as f:img = f.read()# 9004是我们发送的验证码的格式,然后这个指令的返回值是一个字典,在键为['pic_str']的值的时候,保存地视返回的需要点击的坐标# result为每个点的x,y坐标,为x1,y1|x2,y2|x3,y3return cjy.PostPic(img, 9004)['pic_str']def search_ticket_info(self):# 点击登录成功后的确认窗口self.driver.find_element_by_xpath('//*[@class="dzp-confirm"]/div/a').click()# 选择单程购票# 导航栏隐藏窗口处理,需要使用动作链,获取隐藏菜单后才可以进行点击nav_bar = self.driver.find_element_by_xpath('//*[@id="J-chepiao"]')ActionChains(self.driver).move_to_element(nav_bar).perform()self.driver.find_element_by_xpath('//*[contains(text(), "单程")]').click()# 跳转来到订票页面,点击弹出提示窗口的确定self.driver.find_element_by_xpath('//*[@id="qd_closeDefaultWarningWindowDialog_id"]').click()# 开始查询车票,默认为单程-出发地# 获取城市名和城市缩写名列表city_name, city_eg_name = self.get_city_name_id()while True:# 在出发地、目的地、出发日等窗口输入内容,需要先点击输入框1start_city = input('请输入出发城市:')self.driver.find_element_by_xpath('//*[@id="fromStationText"]').click()self.driver.find_element_by_xpath('//*[@id="fromStationText"]').send_keys(start_city)# 这里输入完成后,需要send_keys(Keys.ENTER)模拟键盘回车,这里和之前上号密码的不一样,账号密码输入不要回车就可以,不回车里面内容不生效self.driver.find_element_by_xpath('//*[@id="fromStationText"]').send_keys(Keys.ENTER)# 目的地end_city = input('请输入到达城市:')self.driver.find_element_by_xpath('//*[@id="toStationText"]').click()self.driver.find_element_by_xpath('//*[@id="toStationText"]').send_keys(end_city)self.driver.find_element_by_xpath('//*[@id="toStationText"]').send_keys(Keys.ENTER)train_date = input('请输入出发日期,如1999-09-09形式:')check_result = self.check_info(city_name, start_city, end_city, train_date)if not check_result:print('输入有误,请重新输入!')else:print('验证通过')break# 出发日期# 将日期的只读属性去掉便于下面输入日期js = """document.getElementById('train_date').removeAttribute('readonly')"""self.driver.execute_script(js)self.driver.find_element_by_id("train_date").clear() # 清空默认日期值self.driver.find_element_by_id("train_date").send_keys(train_date)self.driver.find_element_by_id("train_date").send_keys(Keys.ENTER)# 隐藏日期下方的详细日期列表js = """document.querySelector('body > div.cal-wrap').style.display='none'"""self.driver.execute_script(js)# 选择查询车次类型 - 默认高铁self.driver.find_element_by_xpath('//*[@id="_ul_station_train_code"]/li[1]').click()# 点击查询self.driver.find_element_by_xpath('//*[@id="query_ticket"]').click()time.sleep(2)# 保存查询信息result = []train_info = self.driver.find_element_by_xpath('//*[@id="sear-result"]/p[1]/strong[1]').text + ' 共计' + str(len(self.driver.find_elements_by_xpath('//*[@id="queryLeftTable"]/tr'))) + '车次'# 用于拼接'https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=2021-07-09&leftTicketDTO.from_station=SHH&leftTicketDTO.to_station=HZH&purpose_codes=ADULT' 获得查询结果start_city = self.driver.find_element_by_xpath('//*[@id="queryLeftTable"]/tr[1]/td/div/div[2]/strong[1]').textend_city = self.driver.find_element_by_xpath('//*[@id="queryLeftTable"]/tr[1]/td/div/div[2]/strong[2]').textstart_city_eg_name = city_eg_name[city_name.index(start_city)]end_city_eg_name = city_eg_name[city_name.index(end_city)]query_url = 'https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date={}&leftTicketDTO.from_station={}&leftTicketDTO.to_station={}&purpose_codes=ADULT'.format(train_date, start_city_eg_name, end_city_eg_name)print(start_city_eg_name, end_city_eg_name)print(query_url)# url = 'https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=2021-07-09&leftTicketDTO.from_station=AOH&leftTicketDTO.to_station=HGH&purpose_codes=ADULT'# response = requests.get(url).content.decode()# print(response) 使用request 会被重定向# 使用selenium 新开窗口,# 注意:这里必须加""js = 'window.open("{}");'.format(query_url)self.driver.execute_script(js)# 获取当前窗口句柄handles = self.driver.window_handles# print(handles) # ['CDwindow-09080EA2328F0CB9CCCBB02C22A02F74', 'CDwindow-27C3895DC5E7A3D638CAF68E3835062C']# 切换窗口self.driver.switch_to.window(handles[-1])query_res = self.driver.find_element_by_xpath('/html/body/pre').text# 关闭窗口self.driver.close()# 重新获取句柄并切换回当前窗口handles = self.driver.window_handlesself.driver.switch_to.window(handles[-1])# 获取信息query_json = json.loads(query_res)ticket_info = query_json['data']['result']for info in ticket_info:item = {}temp = info.split('|')item['车次信息'] = train_infoitem['日期'] = train_dateitem['编号'] = temp[2]item['车次'] = temp[3]item['首发站'] = temp[4]item['终点站'] = temp[5]item['上车站'] = query_json['data']['map'][temp[6]] # 车站与它对应编号的映射item['下车站'] = query_json['data']['map'][temp[7]]item['出发时间'] = temp[8]item['到达时间'] = temp[9]item['历时'] = temp[10]item['是否可预订'] = temp[11]item['上车站编号'] = temp[16]item['下车站编号'] = temp[17]item['高级软卧'] = [temp[21]]item['软卧一等卧'] = [temp[23]]item['软座'] = [temp[24]]item['无座'] = [temp[26]]item['硬卧二等卧'] = [temp[28]]item['硬座'] = [temp[29]]item['二等座'] = [temp[30]]item['一等座'] = [temp[31]]item['商务座特等座'] = [temp[32]]item['动卧'] = [temp[33]]item['其他'] = '--'# 用于获取价格的信息,如 train_no、from_station_no、to_station_no、seat_types、train_dateitem['train_no'] = temp[2]item['from_station_no'] = temp[16]item['to_station_no'] = temp[17]item['seat_types'] = temp[35]time.sleep(1)print(item)result.append(item)return resultdef get_price_info(self, result):for i in range(len(result)):# 获取票价# 拼接https://kyfw.12306.cn/otn/leftTicket/queryTicketPrice?train_no=5l000G178640&from_station_no=01&to_station_no=07&seat_types=OM9&train_date=2021-07-09# print(tr_id_list) # ['ticket', '5l000G754171', '01', '04']price_url = 'https://kyfw.12306.cn/otn/leftTicket/queryTicketPrice?train_no={}&from_station_no={}&to_station_no={}&seat_types={}&train_date={}'.format(result[i]['train_no'], result[i]['from_station_no'], result[i]['to_station_no'], result[i]['seat_types'], result[i]['日期'])print(price_url)js = 'window.open("{}")'.format(price_url)self.driver.execute_script(js)handles = self.driver.window_handlesself.driver.switch_to.window(handles[-1])price_res = self.driver.find_element_by_xpath('/html/body/pre').textself.driver.close()time.sleep(1)# 切回窗口# 重新获取句柄并切换回当前窗口handles = self.driver.window_handlesself.driver.switch_to.window(handles[-1])# 获取信息price_json = json.loads(price_res)# print(price_json)price_info = price_json['data']# 有些车次没有某种类型的座位,判断是否有该座位seat_types = price_info.keys()if 'A4' in seat_types and result[i]['软卧一等卧'][0] !='':result[i]['软卧一等卧'].append(price_info['A4'])# if '' in seat_types:# item['软座'].append(price_info[''])if 'WZ' in seat_types and result[i]['无座'][0] !='':result[i]['无座'].append(price_info['WZ'])if 'A1' in seat_types and result[i]['硬座'][0] !='':result[i]['硬座'].append(price_info['WZ'])if 'O' in seat_types and result[i]['二等座'][0] !='':result[i]['二等座'].append(price_info['O'])if 'M' in seat_types and result[i]['一等座'][0] !='':result[i]['一等座'].append(price_info['M'])if 'A9' in seat_types and result[i]['商务座特等座'][0] !='':result[i]['商务座特等座'].append(price_info['A9'])if 'F' in seat_types and result[i]['动卧'][0] !='':result[i]['动卧'].append(price_info['F'])if 'A6' in seat_types and result[i]['A6'][0] !='':result[i]['高级软卧'].append(price_info['A6'])if 'A3' in seat_types and result[i]['硬卧二等卧'][0] !='':result[i]['硬卧二等卧'].append(price_info['A3'])time.sleep(3)print(result[i])return result# 校验输入地点,时间等信息是否有误def check_info(self, city_name, start_city, end_city, train_date):if start_city not in city_name:print('出发城市输入有误!')return Falseelif end_city not in city_name:print('抵达城市输入有误!')return Falseelif not re.match(r'(\d{4})-(\d{2})-(\d{2})', train_date):print('日期输入有误')return Falseelse:return Truedef save_data(self, result):with open('./query_data.json', 'w', encoding = 'utf-8') as f:for i in result:f.write(json.dumps(i, ensure_ascii=False) + ',\n')def get_city_name_id(self):response = requests.get('https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9198').content.decode()# var station_names = '@bjb|北京北|VAP|beijingbei|bjb|0city_name = [i for i in response.split('\'')[1].split('|')][1::5] # 北京北city_eg_name = [i for i in response.split('\'')[1].split('|')][2::5] # VAPreturn city_name, city_eg_namedef run(self):self.driver.get(url = self.url)self.driver.maximize_window()time.sleep(1)self.login()time.sleep(1)result = self.search_ticket_info()print(result)result = self.get_price_info(result)print(result)self.save_data(result)if __name__ == '__main__':login = Login()login.run()
买票参考
requests参考
票价信息获取及prettytable显示
8.结果对比
查询时候,是5辆高铁,爬取为全部火车信息,应该是在获取列车信息时候,url有点问题,但不影响整体逻辑
高铁列表
全部列表
{"车次信息": "北京北 --> 成都东(7月9日 周五) 共计10车次", "日期": "2021-07-09", "编号": "2400000G890E", "车次": "G89", "首发站": "BXP", "终点站": "ICW", "上车站": "北京西", "下车站": "成都东", "出发时间": "06:53", "到达时间": "14:38", "历时": "07:45", "是否可预订": "Y", "上车站编号": "01", "下车站编号": "05", "高级软卧": [""], "软卧一等卧": [""], "软座": [""], "无座": [""], "硬卧二等卧": [""], "硬座": [""], "二等座": ["有", "¥778.5"], "一等座": ["有", "¥1246.0"], "商务座特等座": ["无", "¥2417.0"], "动卧": [""], "其他": "--", "train_no": "2400000G890E", "from_station_no": "01", "to_station_no": "05", "seat_types": "OM9"},
{"车次信息": "北京北 --> 成都东(7月9日 周五) 共计10车次", "日期": "2021-07-09", "编号": "240000K81718", "车次": "K817", "首发站": "BXP", "终点站": "CDW", "上车站": "北京西", "下车站": "成都", "出发时间": "08:01", "到达时间": "12:32", "历时": "28:31", "是否可预订": "Y", "上车站编号": "01", "下车站编号": "24", "高级软卧": [""], "软卧一等卧": ["16", "¥689.0"], "软座": [""], "无座": ["无", "¥252.0"], "硬卧二等卧": ["有", "¥437.0"], "硬座": ["有", "¥252.0"], "二等座": [""], "一等座": [""], "商务座特等座": [""], "动卧": [""], "其他": "--", "train_no": "240000K81718", "from_station_no": "01", "to_station_no": "24", "seat_types": "3141"},
{"车次信息": "北京北 --> 成都东(7月9日 周五) 共计10车次", "日期": "2021-07-09", "编号": "240000G3090K", "车次": "G309", "首发站": "BXP", "终点站": "CUW", "上车站": "北京西", "下车站": "成都东", "出发时间": "08:18", "到达时间": "17:56", "历时": "09:38", "是否可预订": "Y", "上车站编号": "01", "下车站编号": "16", "高级软卧": [""], "软卧一等卧": [""], "软座": [""], "无座": [""], "硬卧二等卧": [""], "硬座": [""], "二等座": ["有", "¥778.5"], "一等座": ["无", "¥1246.0"], "商务座特等座": ["无", "¥2417.0"], "动卧": [""], "其他": "--", "train_no": "240000G3090K", "from_station_no": "01", "to_station_no": "16", "seat_types": "OM9"},
{"车次信息": "北京北 --> 成都东(7月9日 周五) 共计10车次", "日期": "2021-07-09", "编号": "240000G57115", "车次": "G571", "首发站": "BXP", "终点站": "CXW", "上车站": "北京西", "下车站": "成都东", "出发时间": "09:22", "到达时间": "18:55", "历时": "09:33", "是否可预订": "Y", "上车站编号": "01", "下车站编号": "16", "高级软卧": [""], "软卧一等卧": [""], "软座": [""], "无座": [""], "硬卧二等卧": [""], "硬座": [""], "二等座": ["有", "¥778.5"], "一等座": ["5", "¥1246.0"], "商务座特等座": ["4", "¥2417.0"], "动卧": [""], "其他": "--", "train_no": "240000G57115", "from_station_no": "01", "to_station_no": "16", "seat_types": "OM9"},
{"车次信息": "北京北 --> 成都东(7月9日 周五) 共计10车次", "日期": "2021-07-09", "编号": "240000G3070Q", "车次": "G307", "首发站": "BXP", "终点站": "ICW", "上车站": "北京西", "下车站": "成都东", "出发时间": "09:38", "到达时间": "19:14", "历时": "09:36", "是否可预订": "Y", "上车站编号": "01", "下车站编号": "17", "高级软卧": [""], "软卧一等卧": [""], "软座": [""], "无座": [""], "硬卧二等卧": [""], "硬座": [""], "二等座": ["有", "¥778.5"], "一等座": ["4", "¥1246.0"], "商务座特等座": ["3", "¥2417.0"], "动卧": [""], "其他": "--", "train_no": "240000G3070Q", "from_station_no": "01", "to_station_no": "17", "seat_types": "OM9"},
{"车次信息": "北京北 --> 成都东(7月9日 周五) 共计10车次", "日期": "2021-07-09", "编号": "2400000Z490L", "车次": "Z49", "首发站": "BXP", "终点站": "CDW", "上车站": "北京西", "下车站": "成都", "出发时间": "11:28", "到达时间": "08:56", "历时": "21:28", "是否可预订": "Y", "上车站编号": "01", "下车站编号": "10", "高级软卧": [""], "软卧一等卧": ["有", "¥687.5"], "软座": [""], "无座": ["无", "¥254.5"], "硬卧二等卧": ["有", "¥434.5"], "硬座": ["有", "¥254.5"], "二等座": [""], "一等座": [""], "商务座特等座": [""], "动卧": [""], "其他": "--", "train_no": "2400000Z490L", "from_station_no": "01", "to_station_no": "10", "seat_types": "4311"},
{"车次信息": "北京北 --> 成都东(7月9日 周五) 共计10车次", "日期": "2021-07-09", "编号": "240000K11724", "车次": "K117", "首发站": "BXP", "终点站": "CDW", "上车站": "北京西", "下车站": "成都", "出发时间": "11:36", "到达时间": "16:21", "历时": "28:45", "是否可预订": "Y", "上车站编号": "01", "下车站编号": "20", "高级软卧": [""], "软卧一等卧": ["15", "¥702.0"], "软座": [""], "无座": ["无", "¥259.0"], "硬卧二等卧": ["有", "¥448.0"], "硬座": ["有", "¥259.0"], "二等座": [""], "一等座": [""], "商务座特等座": [""], "动卧": [""], "其他": "--", "train_no": "240000K11724", "from_station_no": "01", "to_station_no": "20", "seat_types": "3141"},
{"车次信息": "北京北 --> 成都东(7月9日 周五) 共计10车次", "日期": "2021-07-09", "编号": "240000G34907", "车次": "G349", "首发站": "BXP", "终点站": "ICW", "上车站": "北京西", "下车站": "成都东", "出发时间": "15:13", "到达时间": "22:58", "历时": "07:45", "是否可预订": "Y", "上车站编号": "01", "下车站编号": "05", "高级软卧": [""], "软卧一等卧": [""], "软座": [""], "无座": [""], "硬卧二等卧": [""], "硬座": [""], "二等座": ["有", "¥778.5"], "一等座": ["有", "¥1246.0"], "商务座特等座": ["8", "¥2417.0"], "动卧": [""], "其他": "--", "train_no": "240000G34907", "from_station_no": "01", "to_station_no": "05", "seat_types": "OM9"},
{"车次信息": "北京北 --> 成都东(7月9日 周五) 共计10车次", "日期": "2021-07-09", "编号": "24000000T71C", "车次": "T7", "首发站": "BXP", "终点站": "CDW", "上车站": "北京西", "下车站": "成都", "出发时间": "16:40", "到达时间": "20:44", "历时": "28:04", "是否可预订": "Y", "上车站编号": "01", "下车站编号": "16", "高级软卧": [""], "软卧一等卧": ["4", "¥629.0"], "软座": [""], "无座": ["无", "¥236.0"], "硬卧二等卧": ["有", "¥399.0"], "硬座": ["有", "¥236.0"], "二等座": [""], "一等座": [""], "商务座特等座": [""], "动卧": [""], "其他": "--", "train_no": "24000000T71C", "from_station_no": "01", "to_station_no": "16", "seat_types": "3411"},
{"车次信息": "北京北 --> 成都东(7月9日 周五) 共计10车次", "日期": "2021-07-09", "编号": "24000K436310", "车次": "K4363", "首发站": "BXP", "终点站": "CDW", "上车站": "北京西", "下车站": "成都", "出发时间": "22:06", "到达时间": "05:01", "历时": "30:55", "是否可预订": "Y", "上车站编号": "01", "下车站编号": "20", "高级软卧": [""], "软卧一等卧": ["无", "¥640.0"], "软座": [""], "无座": ["无", "¥240.0"], "硬卧二等卧": ["有", "¥408.0"], "硬座": ["有", "¥240.0"], "二等座": [""], "一等座": [""], "商务座特等座": [""], "动卧": [""], "其他": "--", "train_no": "24000K436310", "from_station_no": "01", "to_station_no": "20", "seat_types": "4311"},