selenium爬取猫咪图片并转换为统一尺寸(内附爬取并转换尺寸的的图片下载链接)
朋友机器学习需要几百张-一千张左右的照片,就简单写了个selenium的自动化操作爬虫,对猫咪的图片进行爬虫并进行尺寸的转换
下面有已经爬取好的5种猫咪每个种类五百多张图片
爬虫代码
import requests
from lxml import etree
import time
from selenium.webdriver import Chrome
import os# 1.爬取网页
maomi=['德文','布偶','缅因','蓝猫','加菲','暹罗']
for page in maomi:web = Chrome()n = 1web.get('https://cn.bing.com/images/search?q={}&qs=n&form=QBIR&sp=-1&pq=jia%27fei%27m&sc=8-9&cvid=36F2A5142F944DF1921F45FB416B9E46&first=1&tsc=ImageBasicHover'.format(page+"猫"))# 2.分析网页,查找照片链接time.sleep(2)js="var q=document.documentElement.scrollTop=10000"web.maximize_window()time.sleep(2)for i in range(12):web.execute_script(js)time.sleep(2)response=html_source = web.page_sourcelst=etree.HTML(response)# 3.获取照片链接src=lst.xpath("//div[@class='img_cont hoff']/img/@src")for img_page in src:try:try:os.mkdir("猫咪/"+page);print("创建成功!")except:passwith open('猫咪/{}/壁纸{}.jpg'.format(page,n),'wb')as f:f.write(requests.get(img_page).content)print('下载壁纸第{}张完成!'.format(n))n+=1except Exception as e:print(e)print("网页访问失败!")web.quit()
尺寸转换代码
import os
from PIL import Image
import globdef convertImgSize(filename, outdir, width=128, height=128):img = Image.open(filename)try:new = img.resize((width, height), Image.BILINEAR)p = os.path.basename(filename)print(p)new.save(os.path.join(outdir, os.path.basename(filename)))except Exception as e:print(e)if __name__ == '__main__':# 查找给定路径下图片文件,并修改其大小maomi = ['德文','布偶','缅因','蓝猫','加菲','暹罗'] # 加菲for i in maomi:for filename in glob.glob(r'D:/pythonProject1/testurllib/猫咪/{}/*.jpg'.format(i)):print(filename)convertImgSize(filename,'D:/pythonProject1/testurllib/猫咪/{}1'.format(i))
不过如果有需要的朋友记得修改爬虫的路径
下载链接
点击去后github中的图片.zip