一、引言
在日常开发中,有些需要对SQL进行解析的场景,比如读取表结构信息,生成文档、逆向工程或是生成一些业务代码。例如,当我们需要对Oracle的DDL(数据定义语言)进行解析时,一个强大的SQL语法解析器可以帮助我们直接提取出表的结构信息,包括表名、备注、字段信息以及索引信息。这一功能在以下场景中尤为有用:
- 代码生成:通过解析DDL,我们可以自动生成对应的实体类、数据库访问接口甚至是完整的数据库操作代码,极大地减少了手动编写重复代码的工作量,提高了开发效率。
- 数据库逆向工程:在接手一个遗留系统或者对现有系统进行重构时,通过解析现有的DDL,可以快速地理解和重构数据库模型,为后续的开发工作打下坚实的基础。
- 数据库文档化:自动从DDL中提取表结构信息,可以帮助我们生成数据库文档,使得数据库的设计和结构更加清晰,便于团队成员之间的沟通和协作。
- 数据库迁移:在数据库迁移过程中,解析源数据库的DDL可以帮助我们快速地构建目标数据库的结构,减少迁移过程中的错误和风险。
本文将记录了基于Druid SQLParser这一工具,对DDL进行解析的详细过程。
二、环境
- JDK 17
- Maven依赖
<dependency><groupId>com.alibaba</groupId><artifactId>druid</artifactId><version>1.2.16</version>
</dependency>
三、待解析的DDL
以下内容是基于PowerDesigner做的Oracle 10g的DDL脚本,也是我们需要解析的内容,其中包含drop、create、comment等语句,这里我们只对表、字段信息进行解析。
drop index "Index_1";
drop table "sr_user_info" cascade constraints;
/*==============================================================*/
/* Table: "sr_user_info" */
/*==============================================================*/
create table "sr_user_info" ("user_id" VARCHAR2(64) not null,"type" VARCHAR2(64) not null,"login_name" VARCHAR2(64),"user_name" VARCHAR2(64),"display_name" VARCHAR2(256),"organization" VARCHAR2(64),"region" VARCHAR2(64),"login_count" INTEGER,"request_count" bigint,"status" INTEGER,"last_login_date" DATE,"create_date" DATE,"update_date" DATE,"money" number(9,2),constraint PK_SR_USER_INFO primary key ("user_id")
);
comment on table "sr_user_info" is '记录并统计用户行为';
comment on column "sr_user_info"."user_id" is '用户ID';
comment on column "sr_user_info"."type" is '类型';
comment on column "sr_user_info"."login_name" is '登录名';
comment on column "sr_user_info"."user_name" is '用户名';
comment on column "sr_user_info"."display_name" is '显示名称';
comment on column "sr_user_info"."organization" is '组织结构';
comment on column "sr_user_info"."region" is '区域';
comment on column "sr_user_info"."login_count" is '登录次数';
comment on column "sr_user_info"."request_count" is '请求次数';
comment on column "sr_user_info"."status" is '状态';
comment on column "sr_user_info"."last_login_date" is '最后登录时间';
comment on column "sr_user_info"."create_date" is '创建时间';
comment on column "sr_user_info"."update_date" is '修改时间';
comment on column "sr_user_info"."money" is '金钱';
/*==============================================================*/
/* Index: "Index_1" */
/*==============================================================*/
create unique index "Index_1" on "sr_user_info" ("user_id" ASC,"region" ASC
);
四、解析后的对象结构
以下是DDL解析后的对象结构。
{"TableCode":"sr_user_info","comment":"记录并统计用户行为","columns":[{"colCode":"user_id","comment":"用户ID","length":64,"pk":false,"type":"VARCHAR2"},{"colCode":"type","comment":"类型","length":64,"pk":false,"type":"VARCHAR2"},{"colCode":"login_name","comment":"登录名","length":64,"pk":false,"type":"VARCHAR2"},{"colCode":"user_name","comment":"用户名","length":64,"pk":false,"type":"VARCHAR2"},{"colCode":"display_name","comment":"显示名称","length":256,"pk":false,"type":"VARCHAR2"},{"colCode":"organization","comment":"组织结构","length":64,"pk":false,"type":"VARCHAR2"},{"colCode":"region","comment":"区域","length":64,"pk":false,"type":"VARCHAR2"},{"colCode":"login_count","comment":"登录次数","pk":false,"type":"INTEGER"},{"colCode":"request_count","comment":"请求次数","pk":false,"type":"bigint"},{"colCode":"status","comment":"状态","pk":false,"type":"INTEGER"},{"colCode":"last_login_date","comment":"最后登录时间","pk":false,"type":"DATE"},{"colCode":"create_date","comment":"创建时间","pk":false,"type":"DATE"},{"colCode":"update_date","comment":"修改时间","pk":false,"type":"DATE"},{"colCode":"money","length":9,"pk":false,"precision":2,"type":"number"}]
}
五、完整的UT类
运行下面的单元测试方法ddlParserTest()可以看到完整效果。需要注意的是如果表名、字段名中有些转意字符可以用SQLUtils.normalize(tableName)方法去掉。
java">import com.alibaba.druid.DbType;
import com.alibaba.druid.sql.SQLUtils;
import com.alibaba.druid.sql.ast.SQLExpr;
import com.alibaba.druid.sql.ast.expr.SQLIntegerExpr;
import com.alibaba.druid.sql.ast.statement.SQLColumnDefinition;
import com.alibaba.druid.sql.ast.statement.SQLCommentStatement;
import com.alibaba.druid.sql.ast.statement.SQLTableElement;
import com.alibaba.druid.sql.dialect.oracle.ast.stmt.OracleCreateTableStatement;
import com.alibaba.druid.sql.dialect.oracle.ast.stmt.OraclePrimaryKey;
import com.alibaba.druid.support.json.JSONUtils;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;import java.util.ArrayList;
import java.util.List;
import java.util.Map;public class SqlParserTest {private String ddl = "";public static class TabInfo{public String TableCode;public String comment;public List<ColumnInfo> columns = new ArrayList<>();}public static class ColumnInfo{public String colCode;public String comment;public String type;public Integer length;public Integer precision;public boolean pk = false;}@Testpublic void ddlParserTest(){//解析DDLvar statements = SQLUtils.parseStatements(ddl,DbType.oracle);//获取索引值java.util.function.BiFunction<List<SQLExpr>,Integer,Integer> getArg=(exprs, index)->{if(exprs==null || exprs.size()<index+1)return null;var expr = exprs.get(index);if(!(expr instanceof SQLIntegerExpr))return null;var intExpr = (SQLIntegerExpr) expr;var n = intExpr.getNumber().intValue();return n;};TabInfo tabInfo = new TabInfo();List<String> pkCols = new ArrayList<>();Map<String,ColumnInfo> colMap = new java.util.HashMap<>();//解析表名java.util.function.Consumer parserCreateTable = (statement) -> {OracleCreateTableStatement table = (OracleCreateTableStatement) statement;var tableName = table.getTableName();tableName = SQLUtils.normalize(tableName);System.out.println("--------------表名------------------");System.out.println("表名\t"+tableName);tabInfo.TableCode = tableName;for (SQLTableElement em : table.getTableElementList()) {if(em instanceof SQLColumnDefinition){var col = (SQLColumnDefinition) em;var colName = col.getColumnName();//字段名colName = SQLUtils.normalize(colName);var dataType = col.getDataType();//数据类型对象var type = dataType.getName();//类型var len = getArg.apply(dataType.getArguments(),0);//长度var precision = getArg.apply(dataType.getArguments(),1);//精度System.out.println(String.format("字段:%s\t%s\t%s\t%s", colName, type, len, precision));//var colInfo = new ColumnInfo();colInfo.colCode = colName;colInfo.type = type;colInfo.length = len;colInfo.precision = precision;colInfo.pk = pkCols.contains(colName);//是否主键字段colMap.put(colName, colInfo);tabInfo.columns.add(colInfo);}else if(em instanceof OraclePrimaryKey){var pk = (OraclePrimaryKey) em;var pkName = pk.getName().getSimpleName();//主键名pkName = SQLUtils.normalize(pkName);//主键字段列表pk.getColumns().forEach(x-> {var colName = x.getExpr().toString();colName = SQLUtils.normalize(colName);pkCols.add(colName);});System.out.println(String.format("主键:%s\t%s\t%s\t%s", pkName, pk.getComment()//注释, pkCols, JSONUtils.toJSONString(pk.getAttributes())));}}};//解析注释java.util.function.Consumer parserComment = (statement) -> {var com = (SQLCommentStatement) statement;System.out.println("------------------注释----------------");if(com.getType() == SQLCommentStatement.Type.TABLE){var tab = com.getOn();var tabName = tab.getName().getSimpleName();//表明var tabComment = com.getComment().toString();//表注释tabComment = SQLUtils.normalize(tabComment);System.out.println(String.format("tab注释:%s\t%s",tabName,tabComment));//tabInfo.comment = tabComment;}else if(com.getType() == SQLCommentStatement.Type.COLUMN){var tab = com.getOn();var tabName = tab.getName().toString();var colName = tab.getName().getSimpleName();//字段名colName = SQLUtils.normalize(colName);var comment = String.valueOf(com.getComment());//字段注释comment = SQLUtils.normalize(comment);System.out.println(String.format("col注释:%s\t%s",colName,comment));//var colInfo = colMap.get(colName);colInfo.comment = comment;}else if(com.getType() == SQLCommentStatement.Type.INDEX){}else if(com.getType() == SQLCommentStatement.Type.VIEW){}else{System.out.println("未知类型"+com.getType());}};//解析语句statements.forEach(statement -> {if(statement instanceof OracleCreateTableStatement){parserCreateTable.accept(statement);}else if(statement instanceof SQLCommentStatement){parserComment.accept(statement);}});System.out.println("\n------------------JSON----------------");String jsonStr = com.alibaba.fastjson.JSON.toJSONString(tabInfo,true);System.out.println(jsonStr);}@BeforeTestpublic void beforeTest() {ddl = "drop index \"Index_1\";\n" +"drop table \"sr_user_info\" cascade constraints;\n" +"/*==============================================================*/\n" +"/* Table: \"sr_user_info\" */\n" +"/*==============================================================*/\n" +"create table \"sr_user_info\" (\n" +" \"user_id\" VARCHAR2(64) not null,\n" +" \"type\" VARCHAR2(64) not null,\n" +" \"login_name\" VARCHAR2(64),\n" +" \"user_name\" VARCHAR2(64),\n" +" \"display_name\" VARCHAR2(256),\n" +" \"organization\" VARCHAR2(64),\n" +" \"region\" VARCHAR2(64),\n" +" \"login_count\" INTEGER,\n" +" \"request_count\" bigint,\n" +" \"status\" INTEGER,\n" +" \"last_login_date\" DATE,\n" +" \"create_date\" DATE,\n" +" \"update_date\" DATE,\n" +" \"money\" number(9,2),\n" +" constraint PK_SR_USER_INFO primary key (\"user_id\")\n" +");\n" +"comment on table \"sr_user_info\" is '记录并统计用户行为';\n" +"comment on column \"sr_user_info\".\"user_id\" is '用户ID';\n" +"comment on column \"sr_user_info\".\"type\" is '类型';\n" +"comment on column \"sr_user_info\".\"login_name\" is '登录名';\n" +"comment on column \"sr_user_info\".\"user_name\" is '用户名';\n" +"comment on column \"sr_user_info\".\"display_name\" is '显示名称';\n" +"comment on column \"sr_user_info\".\"organization\" is '组织结构';\n" +"comment on column \"sr_user_info\".\"region\" is '区域';\n" +"comment on column \"sr_user_info\".\"login_count\" is '登录次数';\n" +"comment on column \"sr_user_info\".\"request_count\" is '请求次数';\n" +"comment on column \"sr_user_info\".\"status\" is '状态';\n" +"comment on column \"sr_user_info\".\"last_login_date\" is '最后登录时间';\n" +"comment on column \"sr_user_info\".\"create_date\" is '创建时间';\n" +"comment on column \"sr_user_info\".\"update_date\" is '修改时间';\n" +"comment on column \"sr_user_info\".\"money\" is '金钱';\n" +"/*==============================================================*/\n" +"/* Index: \"Index_1\" */\n" +"/*==============================================================*/\n" +"create unique index \"Index_1\" on \"sr_user_info\" (\n" +" \"user_id\" ASC,\n" +" \"region\" ASC\n" +");\n"; }
}
六、控制台输出
--------------表名------------------
表名 sr_user_info
字段:user_id VARCHAR2 64 null
字段:type VARCHAR2 64 null
字段:login_name VARCHAR2 64 null
字段:user_name VARCHAR2 64 null
字段:display_name VARCHAR2 256 null
字段:organization VARCHAR2 64 null
字段:region VARCHAR2 64 null
字段:login_count INTEGER null null
字段:request_count bigint null null
字段:status INTEGER null null
字段:last_login_date DATE null null
字段:create_date DATE null null
字段:update_date DATE null null
字段:money number 9 2
主键:PK_SR_USER_INFO null [user_id] {}
------------------注释----------------
tab注释:"sr_user_info" 记录并统计用户行为
col注释:user_id 用户ID
col注释:type 类型
col注释:login_name 登录名
col注释:user_name 用户名
col注释:display_name 显示名称
col注释:organization 组织结构
col注释:region 区域
col注释:login_count 登录次数
col注释:request_count 请求次数
col注释:status 状态
col注释:last_login_date 最后登录时间
col注释:create_date 创建时间
col注释:update_date 修改时间
总结
不同的DDL定义语法会解析为不同的statment对象,这里只演示了一种DDL格式,比较原生。开源项目中有些很多测试用例其中继承OracleSchemaStatVisitor解析,看着内聚性更强。还需要注意的是,有些DDL将字段注释写在create语句中,上面的代码还要做相应的修改。