说明
上一篇文章已经爬虫目的, 脉脉模拟登陆。
领英模拟登陆难度略大于脉脉
操作
1、登陆接口
其中session_key是用户名,session_password是密码。
关键点
1、loginCsrfParam
2、cookie
3、csrf_token
寻找参数
1、loginCsrfParam
这个参数是你访问领英首页就会响应的一个结果,可以通过xpath定位
//input[@id="loginCsrfParam-login"]/@value
2、cookie
此时的cookie是并没有进行登陆的cookie,此时我们需要保存本次的cookie,因为模拟登陆的接口是需要本次cookie的。
3、csrf_token
这个参数是和loginCsrfParam成对出现的,当你携带loginCsrfParam访问了首页,在首页的响应cookie中,会有JSESSIONID字段,JSESSIONID字段的值就是csrf_token的值。
当我们把所有的参数都找到之后就可以直接模拟登陆了。
完整代码
import requests
from lxml import etree
import demjsonclass Login:def __init__(self):# 输入用户名和密码self.username = ""self.password = ""def get_index_csrf_and_cookie(self):session = requests.Session()url = "https://www.linkedin.com/"querystring = {"trk": "brandpage_baidu_pc-mainlink"}headers = {'authority': "www.linkedin.com",'cache-control': "max-age=0,no-cache",'upgrade-insecure-requests': "1",'user-agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36",'accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",'referer': "https://www.baidu.com/s?ie=UTF-8&wd=%E9%A2%86%E8%8B%B1",'accept-encoding': "gzip, deflate, br",'accept-language': "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",}response = session.request("GET", url, headers=headers, params=querystring)cookie = ""csrf_token = ""for c in session.cookies:if c.name == "JSESSIONID":csrf_token = c.valuecookie = cookie + c.name + "=" + c.value + "; "html = etree.HTML(response.text)csrf = html.xpath('//input[@id="loginCsrfParam-login"]/@value')[0]index_login_data = {"cookie": cookie,"csrf": csrf,"csrf_token": csrf_token}with open("index_cookie.json", "w") as f:f.write(str(index_login_data))def login(self):with open("index_cookie.json", "r") as f:index_cookie = f.read()json_cookie = demjson.decode(index_cookie)csrf = json_cookie.get("csrf")cookie = json_cookie.get("cookie")s = requests.session()url = "https://www.linkedin.com/uas/login-submit"querystring = {"loginSubmitSource": "GUEST_HOME"}payload = "session_key={0}&session_password={1}&isJsEnabled=false&loginCsrfParam={2}&fp_data=default&undefined=".format(self.username, self.password, csrf)headers = {'authority': "www.linkedin.com",'cache-control': "max-age=0,no-cache",'origin': "https://www.linkedin.com",'upgrade-insecure-requests': "1",'content-type': "application/x-www-form-urlencoded",'user-agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36",'accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",'referer': "https://www.linkedin.com/",'accept-encoding': "gzip, deflate, br",'accept-language': "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",'cookie': cookie}s.request("POST", url, data=payload, headers=headers, params=querystring)cookie = ""csrf_token = ""for c in s.cookies:if c.name == "JSESSIONID":csrf_token = c.valuecookie = cookie + c.name + "=" + c.value + "; "cookie_data = {"cookie": cookie,"csrf_token": csrf_token}with open("cookie.json", "w") as f:f.write(str(cookie_data))def test_login(self):with open("cookie.json", "r") as f:cookie_text = f.read()json_cookie = demjson.decode(cookie_text)csrf_token = json_cookie['csrf_token']cookie = json_cookie['cookie']headers1 = {'x-li-track': '{"clientVersion":"1.2.7373","osName":"web","timezoneOffset":8,"deviceFormFactor":"DESKTOP","mpName":"voyager-web"}','cookie': cookie,'accept-encoding': "gzip, deflate, br",'x-li-lang': "zh_CN",'accept-language': "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",'user-agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36",'x-li-page-instance': "urn:li:page:d_flagship3_feed;LvtQ1iY2QeGSOhIoA8jaQQ==",'accept': "application/vnd.linkedin.normalized+json+2.1",'csrf-token': csrf_token.replace('"', ""),'x-restli-protocol-version': "2.0.0",'authority': "www.linkedin.com",'referer': "https://www.linkedin.com/feed/",'cache-control': "no-cache",}url = "https://www.linkedin.com/feed/"response = requests.request("GET", url, headers=headers1)response.encoding = "utf-8"print(response.text)# u = "https://www.linkedin.com/voyager/api/onboarding/launchpadCard"# querystring = {"context": "FEED", "q": "context"}# r = requests.get(u, headers=headers1, params=querystring)# print(r.text)if __name__ == "__main__":login = Login()# login.get_index_csrf_and_cookie()# login.login()# with open("index_cookie.json", "r") as f:# session = f.read()# se = demjson.decode(session)# print(se.get("csrf_token"))## with open("cookie.json", "r") as f:# cookie = f.read()# json_cookie = demjson.decode(cookie)# print(json_cookie['cookie'])login.test_login()