上代码:
import requests
from lxml import etree
import pandas as pddef get_url(url): # 请求url的方法,返回htmlheaders = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36',}response = requests.get(url, headers=headers) # 获取请求的返回数据response.encoding = 'utf-8' # 定义编码,不然中文输出会乱码;if response.status_code == 200: # 如果请求成功,则返回;return response.textreturn Noneif __name__ == '__main__':res = pd.DataFrame(columns=('开奖日期', '双色球'))#创建表格指定列名for q in range(1, 153): # for循环,一共153页;url = 'http://kaijiang.zhcw.com/zhcw/html/ssq/list_%s.html' % (q) # 定义请求的链接html = get_url(url) # 请求url获取返回代码xpath_html = etree.HTML(html) # xpath初始化html代码dates = xpath_html.xpath('//table[@class="wqhgt"]//tr//td[1]//text()') # 获取开奖日期result = xpath_html.xpath('//table[@class="wqhgt"]//tr//em//text()') # 获取上色球号sta = 0end = 7for n in range(len(result) // 7): # 双色球7个号一组,if len(result) > end:res = res._append(pd.DataFrame({'开奖日期': str(dates[n]),'双色球': str(result[sta:end])}, index=[n]))#append数据到ressta = sta + 7end = end + 7res.to_excel('E:\mmp\data.xlsx', index=False)#保存到本地excel#执行频次分析data = pd.read_excel('E:\mmp\data.xlsx') # 导入数据res = pd.DataFrame(columns=('数字1', '频次1', '数字2', '频次2', '数字3', '频次3', '数字4', '频次4','数字5', '频次5', '数字6', '频次6', '数字7', '频次7')) # 创建表格指定列名for i in range(1, 8):d = data[i].value_counts()for num, feq in enumerate(d):res.loc[num, '数字' + str(i)] = d.index.tolist()[num]res.loc[num, '频次' + str(i)] = feqres.to_excel('E:\mmp\data_result.xlsx', index=False) # 保存到本地excel
结果如下:
数字1 | 频次1 | 数字2 | 频次2 | 数字3 | 频次3 | 数字4 | 频次4 | 数字5 | 频次5 | 数字6 | 频次6 | 数字7 | 频次7 |
'01' | 454 | '07' | 214 | '14' | 172 | '22' | 169 | '27' | 211 | '33' | 381 | '01' | 164 |
'02' | 371 | '06' | 213 | '15' | 165 | '19' | 168 | '26' | 210 | '32' | 364 | '15' | 156 |
'03' | 275 | '08' | 188 | '13' | 161 | '17' | 168 | '25' | 187 | '31' | 292 | '16' | 156 |
'04' | 238 | '09' | 179 | '12' | 155 | '20' | 164 | '28' | 174 | '30' | 238 | '12' | 155 |
'05' | 219 | '10' | 170 | '11' | 155 | '23' | 158 | '24' | 150 | '29' | 227 | '09' | 154 |
'06' | 184 | '05' | 161 | '16' | 144 | '18' | 147 | '29' | 150 | '28' | 167 | '07' | 151 |
'07' | 127 | '04' | 144 | '10' | 142 | '16' | 134 | '23' | 145 | '27' | 151 | '06' | 151 |
'08' | 117 | '12' | 140 | '17' | 141 | '24' | 130 | '30' | 145 | '26' | 130 | '02' | 150 |
'09' | 105 | '11' | 134 | '18' | 132 | '21' | 130 | '22' | 136 | '25' | 92 | '04' | 147 |
'10' | 67 | '14' | 133 | '09' | 122 | '14' | 123 | '21' | 121 | '24' | 78 | '14' | 144 |
'12' | 50 | '03' | 119 | '19' | 121 | '15' | 119 | '20' | 118 | '22' | 55 | '03' | 142 |
'11' | 47 | '13' | 115 | '08' | 107 | '26' | 115 | '19' | 108 | '23' | 53 | '08' | 142 |
'13' | 32 | '15' | 86 | '07' | 92 | '25' | 102 | '31' | 103 | '21' | 33 | '05' | 140 |
'14' | 19 | '02' | 71 | '20' | 92 | '13' | 87 | '18' | 87 | '18' | 23 | '10' | 137 |
'15' | 16 | '16' | 66 | '21' | 80 | '12' | 68 | '32' | 74 | '19' | 21 | '11' | 136 |
'16' | 14 | '17' | 60 | '22' | 78 | '27' | 66 | '17' | 63 | '20' | 21 | '13' | 131 |
'17' | 7 | '18' | 54 | '06' | 62 | '11' | 60 | '15' | 42 | '17' | 13 | ||
'21' | 5 | '19' | 27 | '24' | 49 | '28' | 52 | '16' | 38 | '16' | 9 | ||
'19' | 3 | '20' | 25 | '23' | 46 | '10' | 52 | '14' | 32 | '15' | 5 | ||
'20' | 2 | '21' | 24 | '05' | 40 | '29' | 40 | '13' | 24 | '11' | 1 | ||
'22' | 2 | '22' | 15 | '25' | 26 | '09' | 30 | '12' | 13 | '10' | 1 | ||
'18' | 1 | '23' | 9 | '04' | 25 | '08' | 21 | '11' | 11 | '14' | 1 | ||
'24' | 1 | '26' | 3 | '26' | 19 | '30' | 16 | '10' | 7 | ||||
'24' | 3 | '27' | 16 | '31' | 15 | '09' | 4 | ||||||
'25' | 2 | '03' | 8 | '06' | 12 | '08' | 2 | ||||||
'28' | 1 | '29' | 3 | '07' | 8 | '07' | 1 | ||||||
'28' | 2 | '04' | 1 | ||||||||||
'30' | 1 | '05' | 1 |