python爬取可爱女生图片

爬虫学的好，私货少不了

在这里插入图片描述

复习金融学实属无聊，思想开了点小差，巧了，最近正好在学爬虫，很快啊，漂亮小姐姐们嗖嗖嗖的跑到小的地文件夹来啦!
爬取mz图片链接: https://sc.chinaz.com/tupian/xingganmeinvtupian.html
因为临近考试秃头夜，这里就不细说啦，具体看下面代码啦!

import urllib.request
import urllib.parse
from lxml import etree
import time
import osdef handle_request(url, page):# 构造url# 由于第一页和后面的页码规律不一样，所有进行判断if (page == 1) or (page == 0):# 页面位1或0时的链接# https://sc.chinaz.com/tupian/xingganmeinvtupian.htmlurl = url.format('')else:# 其余链接# https://sc.chinaz.com/tupian/xingganmeinvtupian_2.htmlurl = url.format('_' + str(page))# print(url)# ua伪装headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36','Cookie': 'UM_distinctid=172bafc075930f-0b3214295ea2fd-f313f6d-1fa400-172bafc075a521; __gads=ID=7343205fec19267e:T=1592274993:S=ALNI_MZuCx78VBx2WBiIEBOXsKZoldvefg'}# 发起请求request = urllib.request.Request(url=url, headers=headers)return request# 解析内容，下载图片
def parse_content(content):# 将内容转化位xpath能抓取的对象tree = etree.HTML(content)# 抓取图片列表# //*[@id="container"]/div[2]/div/a/imgimage_list = tree.xpath('//div[@id="container"]/div/div/a/img/@src2')  # 懒加载技术  可视区只有那么大，只要图片出现在眼前，就会加载# 实现方式:# <img  src2=<"图片路径"> 通过js代码动态添加<img src = "图片路径",src2="">for image_src in image_list:download_image(image_src)def download_image(image_src):dirpath = '站长美女图片爬取'# 创建文件夹if not os.path.exists(dirpath):os.mkdir(dirpath)# 搞个文件名filename = os.path.basename(image_src)print(filename)# 搞图片路径filepath = os.path.join(dirpath, filename)print(filepath)# 发送请求保存图片headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36','Cookie': 'UM_distinctid=172bafc075930f-0b3214295ea2fd-f313f6d-1fa400-172bafc075a521; __gads=ID=7343205fec19267e:T=1592274993:S=ALNI_MZuCx78VBx2WBiIEBOXsKZoldvefg'}# https://sc.chinaz.com/tupian/201228241995.htm# 进行拼接image_src = 'https:' + image_src# 像美女图片发起请求request = urllib.request.Request(url=image_src, headers=headers)# 获取响应response = urllib.request.urlopen(request)# 以二进制的形式保存下来with open(filepath, 'wb') as fp:print(f'正在爬取{filename}')fp.write(response.read())print(f'{filename}爬取完毕')time.sleep(2)def main():url = 'http://sc.chinaz.com/tupian/xingganmeinvtupian{}.html'start_page = int(input('请输入起始页码:'))end_page = int(input('请输入结束页码:'))for page in range(start_page, end_page + 1):request = handle_request(url, page)print('开始爬取第{}页'.format(page))response = urllib.request.urlopen(request)content = response.read().decode()# print(content)parse_content(content)print('第{}页爬取结束'.format(page))if __name__ == '__main__':start = time.time()main()print('蜘蛛结网完毕，收工')end = time.time()print(f'爬取所有妹子图片用时: {end-start}s')