python解析网页上的json数据落地到EXCEL

news/2024/11/17 4:28:39/

安装必要的库

python">import requests
import pandas as pd
import os
import sys
import io
import urllib3
import json

测试数据

  • 网页上的数据结构如下
{"success": true,"code": "CIFM_0000","encode": null,"message": "ok","url": null,"total": 3,"items": [{"summaryDate": "20240611","summaryType": "naturalDay","workday": true,"newCustNum": 1,"haveCustNum": 1691627,"newAccountNum": 2,"haveAccountNum": 1692934,"totalShare": 4947657341.69,"netCash": -3523387.25,"yield": 0.01386},{"summaryDate": "20240612","summaryType": "naturalDay","workday": true,"newCustNum": 5,"haveCustNum": 1672766,"newAccountNum": 5,"haveAccountNum": 1674071,"totalShare": 4927109080.29,"netCash": -20735233.55,"yield": 0.01387},{"summaryDate": "20240613","summaryType": "naturalDay","workday": true,"newCustNum": 4,"haveCustNum": 1662839,"newAccountNum": 5,"haveAccountNum": 1664146,"totalShare": 4927405885.59,"netCash": 110659.8,"yield": 0.01389}],"data": null,"info": null
}

详细逻辑代码

python">import requests
import pandas as pd
import os
import sys
import io
import urllib3
import jsonurllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')url = "https://ip/ma/web/trade/dailySummary?startDate={pi_startdate}&endDate={pi_enddate}"
headers = {"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7","Accept-Language": "zh-CN,zh;q=0.9","User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0",
}def save_data(data, columns, excel_path, sheet_name):df = pd.DataFrame(data, columns=columns)if not os.path.exists(excel_path):df.to_excel(excel_path, sheet_name=sheet_name, index=False)else:with pd.ExcelWriter(excel_path, engine='openpyxl', mode='a') as writer:df.to_excel(writer, sheet_name=sheet_name, index=False)def json2list(response_text):# 把json数据转化为python用的类型json_dict = json.loads(response_text)src_total = json_dict["total"]print("src_total: {}".format(src_total))items = json_dict["items"]excel_columns = ['summaryDate','summaryType','workday','newCustNum','haveCustNum','newAccountNum','haveAccountNum','totalShare','netCash','yield']excel_data = []# 使用XPath定位元素并打印内容for item in items:excel_row_data = []for column_index in range(len(excel_columns)):data = str(item[excel_columns[column_index]])if excel_columns[column_index] == 'workday':data = str(0 if data == "False" else 1)excel_row_data.append(data)excel_data.append(excel_row_data)trg_total = len(excel_data)# 稽核print("trg_total: {}".format(trg_total))vn_biasval = trg_total - src_totalif vn_biasval != 0:print("This audit-rule is not passed,diff: {}".format(vn_biasval))exit(-1)else:print("This audit-rule is passed,diff: {}".format(vn_biasval))return excel_columns, excel_dataif __name__ == '__main__':try:excel_path = "C:/xxx/temp/ylb_dailySummary_{pi_startdate}_{pi_enddate}.xlsx"sheet_name = 'result_data'pi_startdate = 20240611pi_enddate = 20240613excel_path = excel_path.format(pi_startdate=pi_startdate, pi_enddate=pi_enddate)url = url.format(pi_startdate=pi_startdate, pi_enddate=pi_enddate)print("url:{}".format(url))print("excel_path:{}".format(excel_path))response_text = requests.get(url, headers=headers, timeout=(21, 300), verify=False).content.decode("utf8")excel_columns, excel_data = json2list(response_text)print("=================excel_columns=======================")print(excel_columns)print("=================excel_data==========================")for x in excel_data:print(x)print("=====================================================")# 文件存在,则删除if os.path.exists(excel_path):os.remove(excel_path)# 保存文件save_data(excel_data, excel_columns, excel_path, sheet_name)print("save_data is end.")except Exception as e:print("[ERROR]:" + str(e))exit(-1)

代码解析

  • 请求头
    构造请求头
python">urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')url = "https://ip/ma/web/trade/dailySummary?startDate={pi_startdate}&endDate={pi_enddate}"
headers = {"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7","Accept-Language": "zh-CN,zh;q=0.9","User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0",
}
  • 数据保存到excel
    如果excel已经存在,那么则会将数据追加到excel
python">def save_data(data, columns, excel_path, sheet_name):df = pd.DataFrame(data, columns=columns)if not os.path.exists(excel_path):df.to_excel(excel_path, sheet_name=sheet_name, index=False)else:with pd.ExcelWriter(excel_path, engine='openpyxl', mode='a') as writer:df.to_excel(writer, sheet_name=sheet_name, index=False)
  • 解析json数据获取字段名称以及对应的数据list列表
python">def json2list(response_text):# 把json数据转化为python用的类型json_dict = json.loads(response_text)src_total = json_dict["total"]print("src_total: {}".format(src_total))items = json_dict["items"]excel_columns = ['summaryDate','summaryType','workday','newCustNum','haveCustNum','newAccountNum','haveAccountNum','totalShare','netCash','yield']excel_data = []# 使用XPath定位元素并打印内容for item in items:excel_row_data = []for column_index in range(len(excel_columns)):data = str(item[excel_columns[column_index]])if excel_columns[column_index] == 'workday':data = str(0 if data == "False" else 1)excel_row_data.append(data)excel_data.append(excel_row_data)trg_total = len(excel_data)# 稽核print("trg_total: {}".format(trg_total))vn_biasval = trg_total - src_totalif vn_biasval != 0:print("This audit-rule is not passed,diff: {}".format(vn_biasval))exit(-1)else:print("This audit-rule is passed,diff: {}".format(vn_biasval))return excel_columns, excel_data
  • 测试方法入口
python">if __name__ == '__main__':

测试结果

会生成ylb_dailySummary_20240611_20240613.xlsx文件
result_data


http://www.ppmy.cn/news/1547625.html

相关文章

【Linux】————信号

作者主页: 作者主页 本篇博客专栏:Linux 创作时间 :2024年11月12日 信号和信号量 首先说明这两者之间没有任何关系 信号:信号是在软件层次对中断机制的一种模拟,是一种异步通知机制,用于通知进程发生…

Linux下编译安装Nginx

以下是在Linux下编译安装Nginx的详细步骤: 一、安装依赖库 安装基本编译工具和库 在Debian/Ubuntu系统中,使用以下命令安装:sudo apt -y update sudo apt -y install build - essential libpcre3 - dev zlib1g - dev libssl - dev在CentOS/…

QTcpSocket 服务端和客户端

前提&#xff1a; pro文件中添加 QT network 服务端主要采用信号槽机制&#xff0c;代码如如下 核心代码头文件#ifndef TCPSERVER_H #define TCPSERVER_H#include <QObject>#include <QTcpServer> #include <QTcpSocket> #include <QDebug> #inclu…

用 Python 从零开始创建神经网络(七):梯度下降(Gradient Descent)/导数(Derivatives)

梯度下降&#xff08;Gradient Descent&#xff09;/导数&#xff08;Derivatives&#xff09; 引言1. 参数对输出的影响2. 斜率&#xff08;The Slope&#xff09;3. 数值导数&#xff08;The Numerical Derivative&#xff09;4. 解析导数&#xff08;The Analytical Derivat…

七:如何用Chrome的Network面板分析HTTP报文

在Web开发和调试中,分析HTTP请求和响应报文可以帮助开发者了解浏览器和服务器之间的通信细节,定位并解决各种问题。Chrome浏览器的Network(网络)面板是一个强大的开发工具,它可以详细展示HTTP请求的各个方面,包括请求方法、状态码、头部信息、负载数据等。本文将介绍如何…

Mock.js生成随机数据,拦截 Ajax 请求

Mock.js 是一个用于模拟数据的 JavaScript 库&#xff0c;特别适合用于前端开发过程中生成假数据进行接口测试。它可以拦截 Ajax 请求并生成随机数据&#xff0c;还可以模拟服务器的响应来加速前端开发。 一、安装 Mock.js 可以通过以下几种方式引入 Mock.js&#xff1a; CDN…

1909. 删除一个元素使数组严格递增【简单】

题目描述 给你一个下标从 0 开始的整数数组 nums &#xff0c;如果 恰好 删除 一个 元素后&#xff0c;数组 严格递增 &#xff0c;那么请你返回 true &#xff0c;否则返回 false 。如果数组本身已经是严格递增的&#xff0c;请你也返回 true 。 数组 nums 是 严格递增 的定…