测试链接:https://lishi.tianqi.com/guangzhou/202003.html
源码:
import requests, pymysql from lxml import etree class ThSpider(object):def __init__(self):# 初始化self.month_list = ['202101', '202102', '202103', '202104', '202105', '202106', '202107', '202108', '202109', '202110', '202111', '202112', '202201', '202202']# 链接数据库self.cz = pymysql.connect(host='127.0.0.1',port=3306,user='root',password='root',db='基本操作',charset='utf8')# 创建游标self.kit = self.cz.cursor()# 创建数据库table_sql = \'''create table if not exists 天气数据 (日期 varchar (50),最高气温 varchar (50),最低气温 varchar (50),天气 varchar (50),风向 varchar (50));'''self.kit.execute(table_sql)def request_start_url(self):# 发送请求 + 得到响应for month in self.month_list:start_url = 'https://lishi.tianqi.com/shanghai/{}.html'.format(month)self.headers = {'Cookie': 'UserId=17209281674394559; Hm_lvt_7c50c7060f1f743bccf8c150a646e90a=1720928176; HMACCOUNT=66A8254591DC78E3; Hm_lpvt_7c50c7060f1f743bccf8c150a646e90a=1720941400','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'}response = requests.get(start_url, headers=self.headers).textself.parse_response(response)self.commit_close() # 也可在此处调用提交函数 请求一个月份,解析一个月份,然后提交数据库def parse_response(self, response):# 解析响应 + 得到字段A = etree.HTML(response)li_list = A.xpath('//ul[@class="thrui"]/li')for li in li_list:# 日期rq = li.xpath('./div[1]/text()')[0]# 最高气温gw = li.xpath('./div[2]/text()')[0]# 最低气温dw = li.xpath('./div[3]/text()')[0]# 天气tq = li.xpath('./div[4]/text()')[0]# 风向fx = li.xpath('./div[5]/text()')[0]# print(rq, gw, dw, tq, fx)insert_sql = \'''insert into 天气数据 values ("{}", "{}", "{}", "{}", "{}")'''.format(rq, gw, dw, tq, fx)self.kit.execute(insert_sql)print('ok --{}'.format(rq))def commit_close(self):# 数据库的提交和关闭self.kit.close()self.cz.commit()self.cz.close()def main(self):self.request_start_url()# self.commit_close()if __name__ == '__main__':th = ThSpider()th.main()
运行效果: