壁纸网址:https://bing.ioliu.cn/ranking,爬取的是排行榜上的壁纸
源码:
import bs4,re,os,requests
session = requests.session()
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0"}
def fetch_content(url): # 获取任意网页内容,并用 beautifulsoup 格式化
req = session.get(url,headers = headers)
htmls = req.text
soup = bs4.BeautifulSoup(htmls,features = 'lxml')
return soup
print('此程序会从必应壁纸网站下载壁纸,并将壁纸保存在 D:\必应壁纸 目录下。')
s = input('键入任意键键开始,否则直接关闭程序:')
flag = False
if s != None:
flag = True
if flag:
print("目录已存在")
else:
os.mkdir('D:\必应壁纸')
print('目录创建成功')
k = 1
p = input('请输入要下载的页数(每页12张,建议不超过10页):')
child_flag = False
if p.isdigit() == True:
child_flag = True
if child_flag:
while k
url = 'https://bing.ioliu.cn/ranking?p=' + str(k) # p=6 即获取第六页的内容
soup = fetch_content(url)
description = soup.find_all(class_='description')
# 获取图片标题
for i in range(len(pic_url)):
title = description[i].h3.string
name = ''.join(re.findall('([\s\S]*?)\W',title))
print('正在下载:\t', name) # 打印图片所属子页面以及图片标题
img = 'https://bing.ioliu.cn' + pic_url[i].get('href')[:-9] + 'download'
img = session.get(img,proxies = {'http':'120.78.199.148'},headers = headers) # 下载
try:
if os.path.exists('D:/必应壁纸/' + name + '.jpg') == False:
with open('D:/必应壁纸/' + name + '.jpg','wb') as f:
f.write(img.content)
else:
pass
except:
pass
print('第%s页:%s张下载完成!'%(k,i + 1))
k += 1
print('%s页%s张全部下载完成!'%(int(p),int(p) * 12))
else:
print('请输入数字!')
exit('程序发生错误!')
else:
exit('程序发生错误!')