hive针对带有特殊字符非法json数据解析

news/2024/12/2 20:29:54/

一、背景

有的时候前端或者后端进行埋点日志,会把json里面的数据再加上双引号,或者特殊字符,在落日志的时候,组装的格式就不是正常的json数据了,我们就需要将带有特殊字符的json数据解析成正常的json数据。

二、正则表达式基础介绍

1.正则表达式匹配引号

匹配双引号"用"
匹配单引号’用’

2.正则表达式匹配正则表达式中用到的特殊符号时需加\

如()[]{}/|-+
匹配[用\[
匹配]用\]
匹配\用\\
匹配/用\/
匹配|用\|
匹配-用\-
匹配+用\+

匹配大写英文或小写英文或数字或下划线用\w或0-9a-zA-Z_

3.正则表达式中各种扩号()[]{}作用

中括号[]表示匹配单个字符,匹配中扩号里列出的任意一个字符

[dsa]//匹配d或s或a

小括号()表示匹配字符串,匹配小扩号里列出的所有字符构成的字符串

(dsaff) //仅能匹配dsaff

大括号{}表示匹配的次数,放于()或[]之后

[dsa]{1,8}//匹配1-8次[dsa],如匹配d,dd,dddddddd
(dsa){1,8}//匹配1-8次(dsa),如匹配dsa,dsadsadsadsadsadsadsadsa

4.常用匹配例子
例子(匹配英语键盘上的任意非空字符)

QRegExp re("^[\\w~!@#$%^&*()+`={}:;<>?,.|'\"\[\\]\\-\\/\\\\]+$");QString test("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ`1234567890-=~!@#$%^&*()_+[]{}|;:'\"\\/,.<>?");bool match = re.exactMatch(test);  //match=true

例子(匹配任意合法表示的有理数)

QRegExp reg("^(\\-(?!0(?!\\.))|\\+(?!0(?!\\.)))?(0|[1-9]\\d*)(\\.\\d+)?$"); QString test("41424.4155346");bool match = re.exactMatch(test);  //match=true

例子(匹配任意合法表示的非负有理数)

QRegExp reg("^(\\+(?!0(?!\\.)))?(0|[1-9]\\d*)(\\.\\d+)?$"); QString test("41424.4155346");bool match = re.exactMatch(test);  //match=true

例子(匹配任意合法表示的正有理数)

QRegExp reg("^(\\+)?(0(?=\\.)|[1-9]\\d*)(\\.\\d+)?$"); QString test("41424.4155346");bool match = re.exactMatch(test);  //match=true

例子(匹配任意合法表示的整型数字)

QRegExp reg("^(\\-(?!0)|\\+(?!0))?(0|[1-9]\\d*)$");QString test("414246");bool match = re.exactMatch(test);  //match=true

例子(匹配任意合法表示的非负整型数字)

QRegExp reg("^(\\+(?!0))?(0|[1-9]\\d*)$"); QString test("414246");bool match = re.exactMatch(test);  //match=true

例子(匹配任意合法表示的正整型数字)

QRegExp reg("^(\\+)?([1-9]\\d*)$"); QString test("414246");bool match = re.exactMatch(test);  //match=true

例子(匹配任意合法表示的密码)

QRegExp reg("^[\\w~!@#$%^&*()+`={}:;<>?,.|'\"\[\\]\\-\\/\\\\]+$");if (!reg.exactMatch(value.data())) {message_ =QObject::tr("The password can only contanin numbers, English ""characters or special characters  ").toStdString();return false;}return true;

例子(匹配任意合法表示的密码)

QRegExp reg("^[\\w~!@#$%^&*()+`={}:;<>?,.|'\"\[\\]\\-\\/\\\\]+$");if (!reg.exactMatch(value.data())) {message_ =QObject::tr("The password can only contanin numbers, English ""characters or special characters  ").toStdString();return false;}return true;

三、带有特殊字符的json

{"event":"eventDiagnose","extra":"{"url":"http://10.60.12.19/yhyy/jyxxgx/wgcf/c01001Action.do?aac001=21000743804&appkey=34507162&djsj=&hzlx=1&jzsxh=MT0097220602524289&kh=6214670060006182632&ksmc=%e5%86%85%e5%88%86%e6%b3%8c%e4%b8%93%e4%b8%9a&method=guahao.common.timestamp&mtbz=M01600&mzsxh=MT0097220602524289&nl=50&rtbm=RT010&rtbz=1&sfzh=120113197212251618&sign=045E6C8DD93FD34183E481131AADB5C5&sqbm=&timestamp=20220602115508&version=1&xb=1&xm=%E7%B3%96%E4%BA%BA%E5%A4%B4%E6%82%A3%E8%80%85&xz=310&ysbm=D120111040341&ysmc=%E5%BC%A0%E5%90%8C%E4%BC%9A&yybm=H12011100949&yymc=%E4%B8%AD%E5%8C%97%E9%95%87&zzbz","requestBaseUrl":"","channelType":"channel_prescription","patientIdentityNo":"Dl2nIFwb061tFNa5O3oeI9BIicr0FZaQ4A3YC+f4FyRH6b2yn+VjLvqt7icJr14jv9PyumO7c3WMRpi3A9wS9A==","doctorCode":"D120111040341","orgCode":"H12011100949","renderType":"dialog","id":"float-news","hideRenderSuspensionBall":false,"version":"8e55c7d2e657142b1237297aaa6b2592-1689920468320","keyboardClose":true,"source":"yh","hospitalCode":"H12011100949","departmentCode":"","departmentName":"全科医疗科","patientIdNo":"Dl2nIFwb061tFNa5O3oeI9BIicr0FZaQ4A3YC+f4FyRH6b2yn+VjLvqt7icJr14jv9PyumO7c3WMRpi3A9wS9A==","medicalType":"2","diseaseNum":"M01600","clinicNumber":"MT0097220602524289","pharmacyCode":"^F13","pharmacyName":"健共体西药房中北镇","pharmacyType":"0","diagnoseList":[[{"zdbm":"E11.900","zdmc":"2型糖尿病"}],[]],"token":"2322e30f74114a60b011fbf4ba21dde8","appKey":"k6XSr6QnRCL7IE25","diagnoses":[{"diagnosisCode":"E11.900","diagnosisName":"2型糖尿病","diagnosisType":1}],"suspensionBallDialogIndex":1}","source":"yh","userId":"120113197212251618","visitCode":"MT0097220602524289"}

四、去除特殊字符(不同的函数解析正则表达式的方法和功能不太一样,regexp_replace会比translate更完善一些)

insert overwrite table wedw_dw.chdisease_gpt_opt_log_df
select id            
,trace_id      
,per_user_type 
,oper_user_id  
,oper_user_name
,oper_type     
,oper_module   
,data_owner    
,oper_data_id  
,regexp_replace(regexp_replace(translate(translate(translate(translate(log_desc,'\n',''),'\r',' '),'\t',' '),'\\',''),'(\\"\\{)','\\{'),'(\\}\\")','\\}') as  log_desc     
,gmt_created   
,regexp_replace(regexp_replace(translate(translate(translate(translate(oper_result,'\n',''),'\r',' '),'\t',' '),'\\',''),'(\\"\\{)','\\{'),'(\\}\\")','\\}') as   oper_result 
,is_deleted    
from wedw_ods.chdisease_gpt_opt_log_df;

正常的json字符串:

{"event":"eventPrescription","extra":{"url":"http://172.16.100.203/yhyy/jyxxgx/wgcf/c01001Action.do?aac001=21001009308&appkey=02358167&djsj=&hzlx=1&jzsxh=MZ0949231022475927&kh=&ksmc=%e5%85%a8%e7%a7%91%e5%8c%bb%e7%96%97&method=guahao.common.timestamp&mtbz=&mzsxh=MZ0949231022475927&nl=60&rtbm=&rtbz=&sfzh=120224196302163822&sign=FD6B6FFF46B006C61EB662830960775C&timestamp=20231022165238&version=1&xb=2&xm=%e9%ab%98%e5%86%9b%e7%84%b6&xz=310&ysbm=D120111040344&ysmc=%e5%91%a8%e6%96%b0%e6%96%87&yybm=H12011100949&yymc=%e5%a4%a9%e6%b4%a5%e5%b8%82%e8%a5%bf%e9%9d%92%e5%8c%ba%e4%b8%ad%e5%8c%97%e9%95%87%e7%a4%be%e5%8c%ba%e5%8d%ab%e7%94%9f%e6%9c%8d%e5%8a%a1%e4%b8%ad%e5%bf%83&zzbz=","requestBaseUrl":"","channelType":"channel_prescription","patientIdentityNo":"cc66GFf5OM8EX6ravk8XjYGP1m8/nUJGqD8UDfWtk3T62SW19E6z3nPq0BVThI0VuLPgbxiFSdI1Zuo7hRnRgw==","doctorCode":"D120111040344","orgCode":"H12011100949","renderType":"dialog","id":"float-news","hideRenderSuspensionBall":false,"version":"9400120f464b403b873913713f5b6b7b-1690977749121","keyboardClose":true,"source":"yh","hospitalCode":"H12011100949","departmentCode":"","departmentName":"u5168u79d1u533bu7597u79d1","patientIdNo":"cc66GFf5OM8EX6ravk8XjYGP1m8/nUJGqD8UDfWtk3T62SW19E6z3nPq0BVThI0VuLPgbxiFSdI1Zuo7hRnRgw==","medicalType":"1","clinicNumber":"MZ0949231022475927","pharmacyCode":"^F13","pharmacyName":"u5065u5171u4f53u897fu836fu623fu4e2du5317u9547","pharmacyType":"0","diagnoseList":[[{"zdbm":"E11.501+I79.2*","zdmc":"2u578bu7cd6u5c3fu75c5u6027u5468u56f4u8840u7ba1u75c5u53d8"}],[{"zdbm":"E11.401+G63.2*","zdmc":"2u578bu7cd6u5c3fu75c5u6027u5468u56f4u795eu7ecfu75c5"},{"zdbm":"E78.500","zdmc":"u9ad8u8102u8840u75c7"}]],"token":"089b85839db94a80a6c4f09352f97104","appKey":"4QG1dS38sdBQyhSe","diagnoses":[{"diagnosisCode":"E11.501+I79.2*","diagnosisName":"2u578bu7cd6u5c3fu75c5u6027u5468u56f4u8840u7ba1u75c5u53d8","diagnosisType":1},{"diagnosisCode":"E11.401+G63.2*","diagnosisName":"2u578bu7cd6u5c3fu75c5u6027u5468u56f4u795eu7ecfu75c5","diagnosisType":2},{"diagnosisCode":"E78.500","diagnosisName":"u9ad8u8102u8840u75c7","diagnosisType":2}],"suspensionBallDialogIndex":1,"medicines":[{"pscId":"WG202310220538601","commonName":"u745eu8212u4f10u4ed6u6c40u9499u7247","productName":"u745eu8212u4f10u4ed6u6c40u9499u7247"},{"pscId":"WG202310220538601","commonName":"u4f9du5e15u53f8u4ed6u7247","productName":"u5510u6797"},{"pscId":"WG202310220538601","commonName":"u80f0u6fc0u80bdu539fu9176u80a0u6eb6u7247","productName":"u6021u5f00"}]},"source":"yh","userId":"120224196302163822","visitCode":"MZ0949231022475927"}

http://www.ppmy.cn/news/1174135.html

相关文章

AD20~PCB的板层设计和布线

1、打开51单片机最小系统的工程文件。 2、完成原理图后续工作&#xff1a;打开原理图文件&#xff0c;双击元件“CH340X”窗口右边弹出元件内部属性设置界面&#xff0c;在窗口下方点击“Footprint ->Add…”按钮进入添加元件类型界面&#xff0c;进入元件封装选择界面&…

CPU眼里的C/C++:1.2 查看变量和函数在内存中的存储位置

写一个很简单的 c 代码&#xff0c;打印一些“地址”&#xff0c; 也就是变量、函数的“存储位置”&#xff1a;当程序被加载到内存后&#xff0c;它们具体是存在哪里&#xff0c;可以用精确的数值来表示&#xff0c;这就是内存地址。 https://godbolt.org/z/Ghh9ThY5Y #inc…

Java反射获取内部类方法

Java反射获取内部类方法 结论一、案例准备二、测试方法&#xff1a;使用反射获取类的成员内部类和方法具体操作具体操作&#xff08;使用getDeclaredClasses&#xff09; 结论 Java 通过反射可以获得内部类&#xff0c;包括内部类属性信息和方法。 一、案例准备 创建了一个类…

数据库基础(一)【MySQL】

文章目录 安装 MySQL修改密码连接和退出数据库服务器使用 systemctl 管理服务器进程配置数据库从文件角度看待数据库查看连接情况 安装 MySQL 这是在 Linux 中安装 MySQL 的教程&#xff1a;Linux 下 MySQL 安装。本系列测试用的 MySQL 版本是 5.7&#xff0c;机器是 centOS7.…

华为OD 拼接URL(100分)【java】B卷

华为OD统一考试A卷+B卷 新题库说明 你收到的链接上面会标注A卷还是B卷。目前大部分收到的都是B卷。 B卷对应20022部分考题以及新出的题目,A卷对应的是新出的题目。 我将持续更新最新题目 获取更多免费题目可前往夸克网盘下载,请点击以下链接进入: 我用夸克网盘分享了「华为O…

读书笔记之《敏捷测试从零开始》(一)

大家好&#xff0c;我是rainbowzhou。 子曰&#xff1a;学而时习之&#xff0c;不亦说乎&#xff1f;今天我想和大家分享一本测试书籍——《敏捷测试从零开始》。以下为我的读书笔记&#xff1a; 精彩片段摘录&#xff1a; 焦虑往往来自于对比&#xff0c;当你在自己的圈子里面…

Calibre拾遗:FDI (Foreign Database Interface)系统简介

Calibre是强大的GDS处理工具&#xff0c;包括查看&#xff0c;验证&#xff0c;分析等操作&#xff0c;操作由浅入深&#xff0c;除过手动编辑GDS的不是很灵活外&#xff0c;其他各种命令和操作策略&#xff0c;都是远&#xff08;遥&#xff09;远&#xff08;遥&#xff09;走…

MySQL进阶(数据库引擎)——MyISAM和InnoDB引擎的区别

1.是否支持行级锁 MyISAM 只有表级锁&#xff0c;而InnoDB 支持行级锁和表级锁&#xff0c;默认为行级锁。 &#xff08;1&#xff09;MySQL大致可以归纳为以下3种锁&#xff1a; 表级锁&#xff1a;开销小&#xff0c;加锁快&#xff1b;不会出现死锁&#xff1b;锁的粒度大…