对于re来说,公式就是 . ∗ ? .*? .∗?贪婪搜索第一次匹配成功就返回,.*则返回所有成功匹配的
北京新发地尝试
那么对于bs4来说公式就是BeautifulSoup(“html源码”, “html.parser”)
import requests
from bs4 import BeautifulSoup# url = 'http://www.xinfadi.com.cn/getPriceData.html'
#
# headers = {
# "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 Edg/126.0.0.0"
# }
# data = {
# "limit" : "",
# "current" : "",
# "pubDataStartTime" : "",
# "pubDataEndTime" : "",
# "prodPcatid" : "",
# "prodCatid" : "",
# "prodName" : "大白菜",
# }
# resp = requests.post(url,data=data, headers=headers, verify=False)\
#
# print(resp.text)
#
# resp.close()
#
# # 生成bs对象
# page = BeautifulSoup(resp.text, "html.parser")
# # 从bs对象中查找数据
# # find找第一个(标签,属性=值)
# # find_all全找(标签,属性=值)
# table = page.find("table", attrs={
# "class":"hq_table"
# })
# # 拿到所有数据行
# trs = table.find_all("tr")[1:]
# for tr in trs:
# tds = tr.find_all("td")
# for td in tds:
# print(td.text, end=" ")
# print()# 优美图库
url = 'https://www.umei.cc/update.htm'resp = requests.get(url)
resp.encoding = 'utf-8'# 源代码交给bs
main_page = BeautifulSoup(resp.text, "html.parser")
lst = main_page.find("div", class_="Clbc_top table").find_all("a")for a in lst:# src = a.find("img").get("src")# print(a.get('src'))# 拿到子页面源代码# child_page_resp = reqprint(a)# 写入图片,二进制写入# wb write
resp.close()