下载lxml
python">!pip install lxml
python">import requests
headers={"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.95 Safari/537.36"
}
url="https://movie.douban.com/chart"
response = requests.get(url,headers=headers)
构建xpath的对象
python">from lxml import etree
tree = etree.HTML(response.text)
使用xpath规则进行提取
python">lists = tree.xpath("//div[@class='pl2']/a/span")
for list in lists:print(list.text)
输出
获取a链接
python">links = tree.xpath("//div[@class='pl2']/a/@href")
for link in links:print(link)
输出
数据存储
存储音乐
python">import requests
url="http://m801.music.126.net/20241218224557/987ec52295da84beb07585379448bf96/jdymusic/obj/wo3DlMOGwrbDjj7DisKw/35836053117/6356/740c/463a/5ad8c9d7ac9f59ea148b9cdf1802e87a.mp3"
response=requests.get(url)
content=response.content
with open("music.mp3","wb") as file:file.write(content)
存储文本
建议用csv
python">import csv
data=[[1,2,3],[4,5,6],[1,3,5]]
with open("data.csv","w",encoding="utf-8") as file: #如果不想被覆盖,可以把“w”改查“a”就会追加csv_writer=csv.writer(file)csv_writer.writerows(data)#writerows添加多个元素csv_writer.writerow([8,8,8,8])#writerow添加一个元素