一、优化了,输入城市代码,获取城市信息
# coding=utf-8
import requests
import re
import csv
import datetimeclass WeatherForecast(object):def __init__(self,city_code,start_year,end_year,end_month):self.city_code=city_codeself.start_year=start_yearself.end_year=end_yearself.end_month=end_month# self.url = 'https://tianqi.2345.com/Pc/GetHistory?areaInfo%5BareaId%5D=59493&areaInfo%5BareaType%5D=2&date%5Byear%5D={0}&date%5Bmonth%5D={1}'# #西安地址链接 57036--->BareaId%5D=59493# self.url = 'https://tianqi.2345.com/Pc/GetHistory?areaInfo%5BareaId%5D=57036&areaInfo%5BareaType%5D=2&date%5Byear%5D={0}&date%5Bmonth%5D={1}'# 咸阳地址链接57048--->%5BareaId%5D=59493# self.url = 'https://tianqi.2345.com/Pc/GetHistory?areaInfo%5BareaId%5D=57048&areaInfo%5BareaType%5D=2&date%5Byear%5D={0}&date%5Bmonth%5D={1}'self.headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.24','accept-encoding': 'gzip, deflate, br'}self.data_list = []def get_content(self, url):res = requests.get(url=url, headers=self.headers)content = res.json()# print(content['data'])return content['data']def parse_data(self, content):result = re.compile(r'<td>(?P<date>.*?)</td>.*?<td style="color:#ff5040;">(?P<max>.*?)</td>'r'.*?<td style="color:#3097fd;" >(?P<min>.*?)</td>.*?<td>(?P<weather>.*?)</td>'r'.*?<td>(?P<cloud>.*?)</td>.*?<td><span class="history-aqi wea-aqi.*?>(?P<sky>.*?)</span></td>',re.S)find_result = result.finditer(content)for it in find_result:data_dict = it.groupdict()# print(data_dict)self.data_list.append(data_dict)return self.data_listdef write_csv(self, data_list):curent_date=datetime.datetime.now().strftime("%Y%m%d")save_name=str(self.city_code)+curent_date+'.csv'with open(save_name, 'w',newline='') as f:writer = csv.writer(f)writer.writerow(['日期', '最高温度', '最低温度', '天气', '风力风向', '空气质量'])for i in data_list:writer.writerow(i.values())print(i.values())# 爬取数据的事项,只能获取整年,如果到月份的话 if year==2024 & month>6:# continue 通过他跳过指定的月份,获取数据def run(self):for year in range(self.start_year, self.end_year+1, 1):for month in range(1, 13, 1):if year>self.end_year:continueelif year==self.end_year:if month>self.end_month:continueurl = f'https://tianqi.2345.com/Pc/GetHistory?areaInfo%5BareaId%5D={self.city_code}&areaInfo%5BareaType%5D=2&date%5Byear%5D={year}&date%5Bmonth%5D={month}'print('正在获取第{0}年{1}月的天气!'.format(year, month))content = self.get_content(url)data = self.parse_data(content)self.write_csv(data)print('全部获取完毕,请在程序目录获取下载xxxx.csv!')# 使用办法 就是换里面的url地址,以及修改下面的年月
# 具体内容查看 “数据分析.txt”里面的信息
# 天气预报查询接口
# https://tianqi.2345.com/wea_history/57036.htm
if __name__ == '__main__':print("输入起始年,结束年,结束月")city_code = int(input("起始城市代码:"))start_year=int(input("起始年:"))end_year=int(input("结束年:"))end_month=int(input("结束年月:"))weather = WeatherForecast(city_code,start_year,end_year,end_month)weather.run()# 调整获取指定年份的指定月份之前的数据
二、代码分析
本次对数据提取进行分析:---》数据解析的分析
def parse_data(self, content):result = re.compile(r'<td>(?P<date>.*?)</td>.*?<td style="color:#ff5040;">(?P<max>.*?)</td>'r'.*?<td style="color:#3097fd;" >(?P<min>.*?)</td>.*?<td>(?P<weather>.*?)</td>'r'.*?<td>(?P<cloud>.*?)</td>.*?<td><span class="history-aqi wea-aqi.*?>(?P<sky>.*?)</span></td>',re.S)find_result = result.finditer(content)for it in find_result:data_dict = it.groupdict()# print(data_dict)self.data_list.append(data_dict)return self.data_list
上面返回的内容是个json文件,json中data就是需要的数据
那么这个数据内容是什么呢?我们打印出来看下
result = re.compile(r'<td>(?P<date>.*?)</td>.*?<td style="color:#ff5040;">(?P<max>.*?)</td>'r'.*?<td style="color:#3097fd;" >(?P<min>.*?)</td>.*?<td>(?P<weather>.*?)</td>'r'.*?<td>(?P<cloud>.*?)</td>.*?<td><span class="history-aqi wea-aqi.*?>(?P<sky>.*?)</span></td>',re.S)
通过上面的正则就是可以提取出来其中的内容了