爬取思路
主要用requests请求,urllib保存
请求得到数据,str.find()方法和切片方法初步处理数据,再把处理好的数据转换成json数据,再字典取值的方式得到全部的url,最后请求url保存图片
代码实现
import requests
from urllib import request
import urllib.parse
import json#发起请求并获取响应的内容
url = 'https://apps.game.qq.com/cgi-bin/ams/module/ishow/V1.0/query/workList_inc.cgi?activityId=2735&sVerifyCode=ABCD' \'&sDataType=JSON&iListNum=20&totalpage=0&page=0&iOrder=0&iSortNumClose=1&jsoncallback' \'=jQuery17108066783266449418_1638085376881&iAMSActivityId=51991&_everyRead=true&iTypeId=2&iFlowId=267733&iActId' \'=2735&iModuleId=2735&_=1638085377078'
res = requests.get(url).content.decode('utf-8')#初步处理数据
find_1 = res.find('[')
find_2 = res.find(']')
res_text = res[find_1:find_2+1]#再次处理数据,转成json数据
data_li = json.loads(res_text)
print(data_li)#把全部图片的url放到一个列表里面
url_li = []
for i in data_li:No_1 = urllib.parse.unquote(i['sProdImgNo_1'])[0:-3] + '0'url_li.append(No_1)No_2 = urllib.parse.unquote(i['sProdImgNo_2'])[0:-3] + '0'url_li.append(No_2)No_3 = urllib.parse.unquote(i['sProdImgNo_3'])[0:-3] + '0'url_li.append(No_3)No_4 = urllib.parse.unquote(i['sProdImgNo_4'])[0:-3] + '0'url_li.append(No_4)No_5 = urllib.parse.unquote(i['sProdImgNo_5'])[0:-3] + '0'url_li.append(No_5)No_6 = urllib.parse.unquote(i['sProdImgNo_6'])[0:-3] + '0'url_li.append(No_6)No_7 = urllib.parse.unquote(i['sProdImgNo_7'])[0:-3] + '0'url_li.append(No_7)No_8 = urllib.parse.unquote(i['sProdImgNo_8'])[0:-3] + '0'url_li.append(No_8)#请求图片的url并保存图片
for i in range(len(url_li)):request.urlretrieve(url_li[i], str(i) + '.png')print("正在下载第"+str(i)+"张")