1. SQL中设置常量
set var :pi_sysdate = 20241114;
Variable PI_SYSDATE set to 20241114
2. CDP中impala 创建内外表
set default_transactional_type= none;
create external table stg. hd_aml_mac_ip_ext ( machinedate string, vc_fundacco string, ip string
)
stored as textfile
tblproperties ( 'objcapabilities' = 'extread,extwrite' ) ; create external table stg. hd_aml_mac_ip ( machinedate string, vc_fundacco string, ip string
)
stored as parquet
tblproperties ( "parquet.compression" = "snappy" ) ;
hive _26">3. hive 导出逗号分隔文件到本地
hive - e "SELECT * from student" | sed 's/\t/,/g' > / tmp/student. csv
hive _on_mr___31">4. hive on mr 的参数设置 开启动态分区
set hive . exec. dynamic. partition=true;
set hive . exec. dynamic. partition. mode=nonstrict;
set hive . exec. max. dynamic. partitions=500000;
set hive . exec. max. dynamic. partitions. pernode=100000;
set mapreduce. reduce. memory. mb=4096;
hive _40">5. MYSQL hive 元数据
set session group_concat_max_len = 20480 ;
select concat_ws( '' , a. create_body_str, CHAR ( 10 ) , c. tbl_comment, CHAR ( 10 ) , b. partition_str, CHAR ( 10 ) , a. stored_format, CHAR ( 10 ) , d. compress_str, ';' ) AS create_sql
FROM (
select t. TBL_ID, t. TBL_NAME, case when k. INPUT_FORMAT like '%.parquet%' then 'STORED AS PARQUET' when k. INPUT_FORMAT like '%.SequenceFile%' then 'STORED AS SEQUENCEFILE' when k. INPUT_FORMAT like '%.Text%' then '' else 'STORED AS NULL' end AS stored_format, concat_ws( '' , 'CREATE' , CASE t. TBL_TYPE WHEN 'EXTERNAL_TABLE' THEN ' EXTERNAL' ELSE '' END , ' TABLE IF NOT EXISTS ${schema}.' , t. TBL_NAME, '(' , CHAR ( 10 ) , group_concat( concat_ws( '' , g. COLUMN_NAME, ' ' , g. TYPE_NAME, ' COMMENT ' , '''' , REPLACE ( REPLACE ( g. COMMENT , ';' , ' ' ) , '; ' , ' ' ) , '''' , CHAR ( 10 ) ) ORDER BY g. INTEGER_IDX separator ',' ) , ')' ) AS create_body_str
from hive . TBLS t, hive . SDS k, hive . COLUMNS_V2 g, hive . DBS s
where t. SD_ID = k. SD_ID
and k. CD_ID = g. CD_ID
and s. DB_ID = t. DB_ID
and k. INPUT_FORMAT not like '%.kudu%'
and s. NAME = 'stg'
group by t. TBL_ID
) a
left join ( select t. TBL_ID, concat_ws( '' , 'COMMENT ' , '''' , t. param_value, '''' ) AS tbl_commentfrom hive . TABLE_PARAMS twhere t. param_key = 'comment' group by t. TBL_ID) c on c. tbl_id = a. tbl_id
left join ( select t. TBL_ID, concat_ws( '' , 'PARTITIONED BY (' , group_concat( concat_ws( '' , t. pkey_name, ' ' , t. pkey_type, ' ' , 'COMMENT ' , '''' , t. pkey_comment, '''' ) order by t. integer_idx separator ',' ) , ')' ) AS partition_strfrom hive . PARTITION_KEYS t group by t. TBL_ID) b ON b. tbl_id = a. tbl_idleft join ( select t. TBL_ID, concat_ws( '' , 'TBLPROPERTIES (' , '''' , t. PARAM_KEY, '''' , '=' , '''' , t. PARAM_VALUE, ''')' ) as compress_strfrom hive . TABLE_PARAMS t where t. param_key like '%compression%' group by t. TBL_ID, t. param_key, t. param_value) d on d. tbl_id = a. tbl_id order by a. tbl_name;
6.修复数据
invalidate metadata ods. tablename; msck repair table ods. tablename;
7. impala中时间戳转(DATE)指定格式的字符串
SELECT from_timestamp( now ( ) , 'yyyyMMdd' ) ;
SELECT to_timestamp( '20230710' , 'yyyyMMdd' )
select from_timestamp( date_add( to_timestamp( '20231201' , 'yyyyMMdd' ) , 1 ) , 'yyyyMMdd' )
select date_add( now ( ) , interval - 1 years) ;
8. 使用UDF函数
use default ;
show functions;
show create function default . genseq;
hdfs dfs - put / home/ app_adm/ etl/ udf/ udf_0608. jar / user / hive / warehouse/ udf_0608. jar
hdfs dfs - chown hive :hive / user / hive / warehouse/ udf_0608. jar
hdfs dfs - chmod 777 / user / hive / warehouse/ udf_0608. jar
DROP FUNCTION DEFAULT . udf10( STRING, STRING) ;
drop function default . udf10;
create function default . clnseq as 'cn.com.businessmatrix.udf.HLSequenceCleaner' using jar 'hdfs:///user/hive /warehouse/udf_0608' ;
create function default . genseq as 'cn.com.businessmatrix.udf.HLSequenceGenerator' using jar 'hdfs:///user/hive /warehouse/udf_0608' ;
hdfs dfs - put - f / home/ file / ylb_trade_transfer_ext_out / tmp/ hive / stg/ ylb_trade_transfer_ext_out
sudo - u hdfs hadoop fs - chown - R hive :supergroup / tmp/ hive / stg/ ylb_trade_transfer_ext
9. impala更新KUDU表 指定主键
upsert into ${var:schema_ods}. mdm_ip_cust( sk_invpty_of_cust, gp_flag
)
select t. sk_invpty_of_cust, 0 as gp_flagfrom ods. mdm_ip_cust twhere t. gp_flag is null ;
set hive . archive . enabled= true ;
set hive . archive . har. parentdir. settable= true ;
set har. partfile. size= 1099511627776 ;
ALTER TABLE A ARCHIVE PARTITION ( dt= '2020-12-24' , hr= '12' ) ;
ALTER TABLE A UNARCHIVE PARTITION ( dt= '2020-12-24' , hr= '12' ) ;
11.HBASE基本操作
hbase shell
create 'student' , 'info'
put 'student' , '1001' , 'info:sex' , 'male'
scan 'student'
scan 'student' , {STARTROW = > '1001' , STOPROW = > '1001' }
scan 'student' , {STARTROW = > '1001' }
describe 'student'
put 'student' , '1001' , 'info:name' , 'Nick'
get 'student' , '1001'
get 'student' , '1001' , 'info:name'
count 'student'
alter 'student' , {NAME= > 'info' , VERSIONS= > 3 }
get 'student' , '1001' , {COLUMN = > 'info:name' , VERSIONS= > 3 }
deleteall 'student' , '1001'
delete 'student' , '1002' , 'info:sex'
truncate 'student'
truncate 'student'
drop 'student'
list_namespace
create_namespace 'bigdata'
create 'bigdata:student' , 'info'
drop_namespace 'bigdata'
hive __237">12.hive 脱敏 中文不会脱敏
select mask( '不不不bbb123' ) ;
序号 策略名 策略说明 Hive 系统函数
1 Redact 用 x 屏蔽字母字符,用 n 屏蔽数字字符 mask
2 Partial mask: show last 4 仅显示最后四个字符,其他用 x 代替 mask_show_last_n
3 Partial mask: show first 4 仅显示前四个字符,其他用 x 代替 mask_show_first_n
4 Hash 用值的哈希值替换原值 mask_hash
5 Nullify 用 NULL 值替换原值 Ranger 自身实现
6 Unmasked 原样显示 Ranger 自身实现
7 Date : show only year 仅显示日期字符串的年份 mask
8 Custom Hive UDF 来自定义策略
13.基于CDH5升级到CDH6.3.x造成的语法兼容
1. 传入参数使用STRING
2. '''||value||''' - > "'||value||'"
3. "" - > ""
4. 调整成collect_set( ) 函数的使用 hive on mr , 其他使用impala跑进临时表
14.SQL脚本需在代码头部添加说明注释
[ 示例]
[ 示例]
nvl( c. en_ratio, 0 ) as plg_rati,
hive __276">15.hive 修改字段并指定位置
ALTER TABLE test_change CHANGE a a1 STRING AFTER b;
ORDER BY 全局排序,只有一个Reduce任务
SORT BY 只在本机做排序
hive Oracle_288">17. sqoop将hive 中的数据导出到Oracle
sqoop export
-- connect jdbc:oracle:jdbc:oracle:thin:@locallhost:1521/testdb
-- username test
-- password 123456
-- table t_test
-- export-dir '/apps/hive /warehouse/dbi.db/t_test/pdt=20191229'
-- columns ID, data_date, data_type, c1, c2, c3
-- input-fields-terminated-by '\001'
-- input-lines-terminated-by '\n'
-- input-null-string '\\N'
-- input-null-non-string '\\N' > sqoop_oracle. log 2>&1 &
select new_fundaccount, new_bk_tradeaccount, bk_product from stg. tt0liquidateschema_tmp01
LATERAL VIEW explode( split( fundaccount, ',' ) ) fundaccount as new_fundaccount
LATERAL VIEW explode( split( bk_tradeaccount, ',' ) ) bk_tradeaccount as new_bk_tradeaccount;
create table tmp_dz as
select '000855' as bk_product,
'372402834320,37345435345435,37345343434' as fundaccount,
'982342242322342,9842423424,98345333' as tradeaccount from dual; insert into tmp_dz
select '000845' as bk_product,
'37345343454' as fundaccount,
'98345333433' as tradeaccount from dual; select nvl( new_fundaccount, fundaccount) as fundaccount, nvl( new_tradeaccount, tradeaccount) as tradeaccount, bk_productfrom (
SELECT REGEXP_SUBSTR( fundaccount, '[^,]+' , 1 , ROWNUM) as new_fundaccount, REGEXP_SUBSTR( tradeaccount, '[^,]+' , 1 , ROWNUM) as new_tradeaccount, t. * FROM tmp_dz t
CONNECT BY ROWNUM <= regexp_count( fundaccount, ',' ) + 1
) t;
hive ___340">19.hive 列传行 多列逗号分割的字段
with temp as ( select '1,2,3' as id, 'a,b,c' as name union select '4,5,6' as id, 'd,e,f' as name)
select id, name, s_id, s_namefrom temp lateral view posexplode( split( id, ',' ) ) t as s_id_index, s_id lateral view posexplode( split( name, ',' ) ) t as s_name_index, s_namewhere s_id_index = s_name_index
hive _355">20.CDPhive 支持事务,增删改查
create table cdhadmin_table_hive ( col1 int ) ;
insert into table cdhadmin_table_hive values ( 1 ) ;
insert into table cdhadmin_table_hive values ( 51 ) ;
insert into table cdhadmin_table_hive values ( 2 ) ;
insert into table cdhadmin_table_hive values ( 3 ) ;
select * from cdhadmin_table_hive ;
delete from cdhadmin_table_hive where col1 = 51 ;
select * from cdhadmin_table_hive ;
update cdhadmin_table_hive set col1= 300 where col1= 3 ;
select * from cdhadmin_table_hive ;
grant select on database default to user cdhadmin;