设计一个基于多个带标签的PL/SQL模板作为配置文件和多组参数的Python Pandas代码程序,实现根据不同的输入参数自动批量地将Oracle数据库中的数据导出为CSV和Excel文件到指定目录上,标签和多个参数(以“_”分割)为组成导出数据文件名,文件已经存在则覆盖原始文件。
该解决方案提供了灵活的配置方式和可靠的错误处理机制,能够满足批量数据导出的需求,同时保证生成文件的规范性和一致性。使用Pandas和cx_Oracle实现多模板多参数的Oracle数据导出程序:
python">import os
import cx_Oracle
import pandas as pd
import yaml
import json
from pathlib import Pathclass OracleDataExporter:def __init__(self, config_path, params_path, output_dir, db_conn_str):"""初始化导出器:param config_path: SQL模板配置文件路径:param params_path: 参数配置文件路径:param output_dir: 输出目录:param db_conn_str: 数据库连接字符串"""self.config_path = config_pathself.params_path = params_pathself.output_dir = Path(output_dir)self.db_conn_str = db_conn_strself.connection = None# 创建输出目录self.output_dir.mkdir(parents=True, exist_ok=True)def _load_config(self):"""加载SQL模板配置"""with open(self.config_path) as f:return yaml.safe_load(f)['templates']def _load_params(self):"""加载参数配置"""with open(self.params_path) as f:return json.load(f)def _generate_filename(self, label, params, param_order):"""生成标准文件名:param label: 模板标签:param params: 参数字典:param param_order: 参数顺序列表"""param_values = [str(params[key]) for key in param_order]return f"{label}_{'_'.join(param_values)}"def _export_files(self, df, base_filename):"""导出数据文件"""csv_path = self.output_dir / f"{base_filename}.csv"excel_path = self.output_dir / f"{base_filename}.xlsx"# 导出CSV(覆盖模式)df.to_csv(csv_path, index=False, mode='w')# 导出Excel(覆盖模式)df.to_excel(excel_path, index=False, engine='openpyxl')return csv_path, excel_pathdef _validate_parameters(self, params, required_params):"""验证参数完整性"""missing = [p for p in required_params if p not in params]if missing:raise ValueError(f"Missing required parameters: {missing}")def execute(self):"""执行导出流程"""# 加载配置templates = self._load_config()all_params = self._load_params()try:# 建立数据库连接self.connection = cx_Oracle.connect(self.db_conn_str)# 遍历所有模板for template in templates:label = template['label']sql_template = template['sql']param_order = template['params_order']param_groups = all_params.get(label, [])if not param_groups:print(f"⚠️ No parameters found for template '{label}', skipping...")continueprint(f"🔨 Processing template: {label}")# 处理每个参数组for params in param_groups:try:self._validate_parameters(params, param_order)base_name = self._generate_filename(label, params, param_order)print(f"🔧 Executing with parameters: {params}")df = pd.read_sql(sql_template, self.connection, params=params)csv_path, excel_path = self._export_files(df, base_name)print(f"✅ Exported: {csv_path.name} and {excel_path.name}")except Exception as e:print(f"❌ Error processing parameters {params}: {str(e)}")continuefinally:if self.connection:self.connection.close()if __name__ == "__main__":# 配置参数CONFIG = {"config_path": "config/sql_templates.yaml","params_path": "config/parameters.json","output_dir": "exports","db_conn_str": "username/password@hostname:port/service_name"}# 执行导出exporter = OracleDataExporter(**CONFIG)exporter.execute()
配套配置文件示例
1. SQL模板配置文件 (sql_templates.yaml)
templates:- label: sales_reportsql: |SELECT *FROM sales_dataWHERE region = :regionAND year = :yearAND product_category = :categoryparams_order: [region, year, category]- label: customer_listsql: |SELECT customer_id, name, statusFROM customersWHERE registration_date >= :start_dateAND registration_date <= :end_dateparams_order: [start_date, end_date]
2. 参数配置文件 (parameters.json)
{"sales_report": [{"region": "North", "year": 2023, "category": "Electronics"},{"region": "South", "year": 2022, "category": "Furniture"}],"customer_list": [{"start_date": "2023-01-01", "end_date": "2023-03-31"},{"start_date": "2023-04-01", "end_date": "2023-06-30"}]
}
关键功能说明
-
模板管理:
- 使用YAML格式定义SQL模板
- 每个模板包含唯一标签、SQL语句和参数顺序定义
- 支持多行SQL语句和命名参数
-
参数管理:
- JSON格式存储参数组
- 参数组按模板标签分组管理
- 自动验证参数完整性
-
文件生成:
- 自动生成标准化文件名(标签_参数1_参数2…)
- 同时生成CSV和Excel文件
- 自动覆盖已存在文件
-
错误处理:
- 参数完整性验证
- 数据库连接安全管理
- 异常捕获和友好提示
-
扩展性:
- 易于添加新模板和参数组
- 支持复杂的SQL查询
- 可自定义输出目录和文件命名规则
使用说明
- 安装依赖:
pip install cx-Oracle pandas pyyaml openpyxl
- 配置文件结构:
project/
├── config/
│ ├── sql_templates.yaml
│ └── parameters.json
├── exports/
├── main.py
- 运行程序:
python">python main.py
- 输出结果示例:
exports/
├── sales_report_North_2023_Electronics.csv
├── sales_report_North_2023_Electronics.xlsx
├── sales_report_South_2022_Furniture.csv
├── sales_report_South_2022_Furniture.xlsx
├── customer_list_2023-01-01_2023-03-31.csv
├── customer_list_2023-01-01_2023-03-31.xlsx
├── customer_list_2023-04-01_2023-06-30.csv
└── customer_list_2023-04-01_2023-06-30.xlsx