数仓搭建(hive):DWS层(服务数据层)

embedded/2025/2/25 2:15:17/

DWS层示例: 搭建日主题宽表

需求

维度

步骤

在hive中建数据库dws >>建表
CREATE DATABASE if NOT EXISTS DWS;
建表sql
CREATE TABLE yp_dws.dws_sale_daycount(
--维度
city_id string COMMENT '城市id',
city_name string COMMENT '城市name',
trade_area_id string COMMENT '商圈id',
trade_area_name string COMMENT '商圈名称',
store_id string COMMENT '店铺的id',
store_name string COMMENT '店铺名称',
brand_id string COMMENT '品牌id',
brand_name string COMMENT '品牌名称',
max_class_id string COMMENT '商品大类id',
max_class_name string COMMENT '大类名称',
mid_class_id string COMMENT '中类id',
mid_class_name string COMMENT '中类名称',
min_class_id string COMMENT '小类id',
min_class_name string COMMENT '小类名称',
group_type string COMMENT '分组类型:store,trade_area,city,brand,
min_class,mid_class,max_class,all',
-- =======日统计=======
sale_amt DECIMAL(38,2) COMMENT '销售收入',
plat_amt DECIMAL(38,2) COMMENT '平台收入',
deliver_sale_amt DECIMAL(38,2) COMMENT '配送成交额',
mini_app_sale_amt DECIMAL(38,2) COMMENT '小程序成交额',
android_sale_amt DECIMAL(38,2) COMMENT '安卓APP成交额',
ios_sale_amt DECIMAL(38,2) COMMENT '苹果APP成交额',
pcweb_sale_amt DECIMAL(38,2) COMMENT 'PC商城成交额',
order_cnt BIGINT COMMENT '成交单量',
eva_order_cnt BIGINT COMMENT '参评单量comment=>cmt',
bad_eva_order_cnt BIGINT COMMENT '差评单量negtive-comment=>ncmt',
deliver_order_cnt BIGINT COMMENT '配送单量',
refund_order_cnt BIGINT COMMENT '退款单量',
miniapp_order_cnt BIGINT COMMENT '小程序成交单量',
android_order_cnt BIGINT COMMENT '安卓APP订单量',
ios_order_cnt BIGINT COMMENT '苹果APP订单量',
pcweb_order_cnt BIGINT COMMENT 'PC商城成交单量'
)
COMMENT '销售主题日统计宽表'
PARTITIONED BY(dt STRING)
ROW format delimited fields terminated BY '\t'
stored AS orc tblproperties ('orc.compress' = 'SNAPPY');
查询数据sql
set hive.exec.mode.local.auto=true;
WITH TEMP AS (SELECT-- 先抽取维度字段O.dt-- 城市,S.city_id,S.city_name-- 商圈,S.trade_area_id,S.trade_area_name-- 店铺,S.id,S.store_name-- 品牌,G.brand_id,G.brand_name-- 大类,G.max_class_id,G.max_class_name-- 中,G.mid_class_id,G.mid_class_name-- 小,G.min_class_id,G.min_class_name-- 抽取字段字段-- 订单量指标,O.order_id-- 金额指标,O.order_amount,O.goods_price,O.plat_fee,O.settlement_amount,O.dispatcher_money,O.order_from,O.evaluation_state,O.geval_scores,O.is_delivery  -- 是否配送,O.refund_id  -- 退款单号-- 去重,ROW_NUMBER()OVER(PARTITION BY O.order_id ORDER BY order_id) RNFROM DWB.DWB_ORDER_DETAIL1 OLEFT JOIN DWB.DWB_SHOP_DETAIL SON O.store_id = S.idLEFT JOIN DWB.dwb_goods_detail GON G.store_id = S.id
)
SELECTT.city_id,T.city_name,T.trade_area_id,T.trade_area_name,T.ID AS STORE_ID,T.store_name,T.brand_id,T.brand_name,T.max_class_id,T.max_class_name,T.mid_class_id,T.mid_class_name,T.min_class_id,T.min_class_name,(CASE WHEN T.ID IS NOT NULLTHEN '店铺'WHEN T.trade_area_id IS NOT NULLTHEN '商圈'WHEN T.city_id IS NOT NULLTHEN '城市'WHEN T.min_class_id IS NOT NULLTHEN '小类'WHEN T.mid_class_id IS NOT NULLTHEN '中类'WHEN T.max_class_id IS NOT NULLTHEN '大类'WHEN T.brand_id IS NOT NULLTHEN '品牌'ELSE '日期'END)  AS GROUP_TYPE,SUM(CASE WHEN RN = 1 THEN T.order_amount END) AS SALE_AMT,SUM(CASE WHEN RN = 1 THEN T.plat_fee END) AS PLAT_AMT,SUM(CASE WHEN RN = 1 AND T.is_delivery = 1 THEN T.order_amount END) AS DELIVER_SALE_AMT,SUM(CASE WHEN RN = 1 AND T.order_from = 'miniapp' THEN T.order_amount END) AS MINI_APP_SALE_AMT,SUM(CASE WHEN RN = 1 AND T.order_from = 'android' THEN T.order_amount END) AS android_SALE_AMT,SUM(CASE WHEN RN = 1 AND T.order_from = 'ios' THEN T.order_amount END) AS ios_SALE_AMT,SUM(CASE WHEN RN = 1 AND T.order_from = 'pcweb' THEN T.order_amount END) AS PCWEB_SALE_AMT,COUNT(CASE WHEN RN = 1 THEN T.order_id END ) AS ORDER_CNT,COUNT(CASE WHEN RN = 1 AND T.evaluation_state = 1 THEN 1 END) AS EVA_ORDER_CNT,COUNT(CASE WHEN RN = 1 AND T.geval_scores < 3 THEN 1 END) AS BAD_EVA_ORDER_CNT,COUNT(CASE WHEN RN = 1 AND T.is_delivery = 1 THEN 1 END) AS DELIVER_ORDER_CNT,COUNT(CASE WHEN RN = 1 AND T.refund_id IS NOT NULL THEN 1 END) AS  REFUND_ORDER_CNT,COUNT(CASE WHEN RN = 1 AND T.order_from = 'miniapp' THEN 1 END) AS MINI_APP_SALE_CNT,COUNT(CASE WHEN RN = 1 AND T.order_from = 'android' THEN 1 END) AS android_SALE_CNT,COUNT(CASE WHEN RN = 1 AND T.order_from = 'ios' THEN 1 END) AS ios_SALE_CNT,COUNT(CASE WHEN RN = 1 AND T.order_from = 'pcweb' THEN 1 END) AS PCWEB_SALE_CNT,T.dtFROM TEMP T
GROUP BYT.dt,T.city_id,T.city_name,T.trade_area_id,T.trade_area_name,T.id,T.store_name,T.brand_id,T.brand_name,T.max_class_id,T.max_class_name,T.mid_class_id,T.mid_class_name,T.min_class_id,T.min_class_name
GROUPING SETS ((T.dt),(T.dt,T.city_id,T.city_name),(T.dt,T.city_id,T.city_name,T.trade_area_id,T.trade_area_name),(T.dt,T.city_id,T.city_name,T.trade_area_id,T.trade_area_name,T.id,T.store_name),(T.dt,T.brand_id,T.brand_name),(T.dt,T.max_class_id,T.max_class_name),(T.dt,T.max_class_id,T.max_class_name,T.mid_class_id,T.mid_class_name),(T.dt,T.max_class_id,T.max_class_name,T.mid_class_id,T.mid_class_name,T.min_class_id,T.min_class_name)
);

在hive中查询数据很慢

方法一:

打开hive的本地模式 (默认是false关闭状态)
set hive.exec.mode.local.auto=true;

方法二: Hue上执行

插入数据

查看表结构

要插入的目标表是分区表 >> 开启动态插入模式/非严格模式

在hive中,insert into 要紧跟select

set hive.exec.mode.local.auto=true; -- 本地模式
SET hive.exec.dynamic.partition = true; -- 动态分区
SET hive.exec.dynamic.partition.mode=nonstrict; -- 非严格模式
WITH TEMP AS (SELECT-- 先抽取维度字段O.dt-- 城市,S.city_id,S.city_name-- 商圈,S.trade_area_id,S.trade_area_name-- 店铺,S.id,S.store_name-- 品牌,G.brand_id,G.brand_name-- 大类,G.max_class_id,G.max_class_name-- 中,G.mid_class_id,G.mid_class_name-- 小,G.min_class_id,G.min_class_name-- 抽取字段字段-- 订单量指标,O.order_id-- 金额指标,O.order_amount,O.goods_price,O.plat_fee,O.settlement_amount,O.dispatcher_money,O.order_from,O.evaluation_state,O.geval_scores,O.is_delivery  -- 是否配送,O.refund_id  -- 退款单号-- 去重,ROW_NUMBER()OVER(PARTITION BY O.order_id ORDER BY order_id) RNFROM DWB.DWB_ORDER_DETAIL1 OLEFT JOIN DWB.DWB_SHOP_DETAIL SON O.store_id = S.idLEFT JOIN DWB.dwb_goods_detail GON G.store_id = S.id
)insert into dws.dws_sale_daycount(dt)SELECTT.city_id,T.city_name,T.trade_area_id,T.trade_area_name,T.ID AS STORE_ID,T.store_name,T.brand_id,T.brand_name,T.max_class_id,T.max_class_name,T.mid_class_id,T.mid_class_name,T.min_class_id,T.min_class_name,(CASE WHEN T.ID IS NOT NULLTHEN '店铺'WHEN T.trade_area_id IS NOT NULLTHEN '商圈'WHEN T.city_id IS NOT NULLTHEN '城市'WHEN T.min_class_id IS NOT NULLTHEN '小类'WHEN T.mid_class_id IS NOT NULLTHEN '中类'WHEN T.max_class_id IS NOT NULLTHEN '大类'WHEN T.brand_id IS NOT NULLTHEN '品牌'ELSE '日期'END)  AS GROUP_TYPE,SUM(CASE WHEN RN = 1 THEN T.order_amount END) AS SALE_AMT,SUM(CASE WHEN RN = 1 THEN T.plat_fee END) AS PLAT_AMT,SUM(CASE WHEN RN = 1 AND T.is_delivery = 1 THEN T.order_amount END) AS DELIVER_SALE_AMT,SUM(CASE WHEN RN = 1 AND T.order_from = 'miniapp' THEN T.order_amount END) AS MINI_APP_SALE_AMT,SUM(CASE WHEN RN = 1 AND T.order_from = 'android' THEN T.order_amount END) AS android_SALE_AMT,SUM(CASE WHEN RN = 1 AND T.order_from = 'ios' THEN T.order_amount END) AS ios_SALE_AMT,SUM(CASE WHEN RN = 1 AND T.order_from = 'pcweb' THEN T.order_amount END) AS PCWEB_SALE_AMT,COUNT(CASE WHEN RN = 1 THEN T.order_id END ) AS ORDER_CNT,COUNT(CASE WHEN RN = 1 AND T.evaluation_state = 1 THEN 1 END) AS EVA_ORDER_CNT,COUNT(CASE WHEN RN = 1 AND T.geval_scores < 3 THEN 1 END) AS BAD_EVA_ORDER_CNT,COUNT(CASE WHEN RN = 1 AND T.is_delivery = 1 THEN 1 END) AS DELIVER_ORDER_CNT,COUNT(CASE WHEN RN = 1 AND T.refund_id IS NOT NULL THEN 1 END) AS  REFUND_ORDER_CNT,COUNT(CASE WHEN RN = 1 AND T.order_from = 'miniapp' THEN 1 END) AS MINI_APP_SALE_CNT,COUNT(CASE WHEN RN = 1 AND T.order_from = 'android' THEN 1 END) AS android_SALE_CNT,COUNT(CASE WHEN RN = 1 AND T.order_from = 'ios' THEN 1 END) AS ios_SALE_CNT,COUNT(CASE WHEN RN = 1 AND T.order_from = 'pcweb' THEN 1 END) AS PCWEB_SALE_CNT,T.dtFROM TEMP T
GROUP BYT.dt,T.city_id,T.city_name,T.trade_area_id,T.trade_area_name,T.id,T.store_name,T.brand_id,T.brand_name,T.max_class_id,T.max_class_name,T.mid_class_id,T.mid_class_name,T.min_class_id,T.min_class_name
GROUPING SETS ((T.dt),(T.dt,T.city_id,T.city_name),(T.dt,T.city_id,T.city_name,T.trade_area_id,T.trade_area_name),(T.dt,T.city_id,T.city_name,T.trade_area_id,T.trade_area_name,T.id,T.store_name),(T.dt,T.brand_id,T.brand_name),(T.dt,T.max_class_id,T.max_class_name),(T.dt,T.max_class_id,T.max_class_name,T.mid_class_id,T.mid_class_name),(T.dt,T.max_class_id,T.max_class_name,T.mid_class_id,T.mid_class_name,T.min_class_id,T.min_class_name)
);​

查询数据sql分析

数据来源>>DWB层的数据表

下面分析维度/指标需要哪些数据信息(字段)以及来自哪些表

维度: 

日期(dt)>>DWB_ORDER_DETAIL1

城市(city_id, city_name) >>DWB_SHOP_DETAIL

商圈(trade_area_id, trade_area_name)>>DWB_SHOP_DETAIL

店铺(id, store_name)>>DWB_SHOP_DETAIL

品牌(brand_id, brand_name)>>dwb_goods_detail

大类(max_class_id, max_class_name)>>dwb_goods_detail

中类(mid_class_id, mid_class_name)>> dwb_goods_detail

小类(min_class_id, min_class_name) >>dwb_goods_detail

指标

订单量(order_id)>>DWB_ORDER_DETAIL1

金额(order_amount, goods_price, plat_fee, settlement_amount, dispatcher_money, order_from, evaluation_state, geval_scores, is_delivery, refund_id)>>DWB_ORDER_DETAIL1

维度/指标的数据来源于3张表, 且订单表(DWB_ORDER_DETAIL1)最多>>表连接时把订单表作为主表

使用with  as 做一个公共表达式先把指标/维度相关的字段数据抽取出来>>再用select 语句对数据进行分组汇总>>插入数据

抽取数据注意: 订单表的重复数据>>去重>>ROW_NUMBER()

重复数据原因:  

订单表是一个宽表, 由多张事实表连接到一起, 容易产生重复数据

比如一张订单里面有多家商铺的商品,那么就会产生多条同样的订单号

select 语句的字段及数据类型, 注意和目标表的字段相对应

目标表

目标表的group_type字段,用case when 实现

字段信息:   group_type    string COMMENT '分组类型:store,trade_area,city,brand,
min_class,mid_class,max_class,all',

注意: '分组类型'字段在用case when进行条件判断时, 只有false不满足条件才进行下一步判断>>先判断小维度,再到大维度>>减少sql 量

比如: 在维度中, 店铺 < 商圈 < 城市 ;  小类 < 中类 < 大类  ;    

目标表的指标字段

select 语句中的指标数据汇总实现

(目标表)收入/成交额>>sum() 汇总

(目标表)单量>>count() 汇总

注意: case when 去重/过滤条件

目标表的分区字段  dt >>T.dt


http://www.ppmy.cn/embedded/164931.html

相关文章

C#导出dataGridView数据

方法一&#xff1a;导出为CSV文件&#xff08;简单快速&#xff09; 优点&#xff1a;无需依赖库&#xff0c;但格式简单。 private void ExportToCSV(DataGridView dataGridView, string filePath) {using (StreamWriter sw new StreamWriter(filePath, false, Encoding.UT…

臻识相机,华夏相机,芊熠车牌识别相机加密解密

臻识&#xff0c;华夏&#xff0c;芊熠这三种车牌识别相机解密我都试过了&#xff0c;可以正常解密成功&#xff0c;其它品牌我暂时没有测试。超级简单&#xff0c;免费的&#xff0c;白嫖无敌&#xff01; 流程&#xff1a; ①&#xff1a;先导出配置文件&#xff0c;例如我以…

安全面试4

文章目录 给的源码是ThinkPHP框架的话&#xff0c;审计起来和没有使用框架的有什么不同&#xff0c;从流程上或者从关注的点上有什么不同框架代码审计的流程无框架代码审计的流程 反序列的时候&#xff0c;unserialize()反序列一个字符串的时候&#xff0c;对象会有一些魔术方法…

从卡顿到丝滑:火山引擎DeepSeek-R1引领AI工具新体验

方舟大模型体验中心全新上线&#xff0c;免登录体验满血联网版Deep Seek R1 模型及豆包最新版模型:https://www.volcengine.com/experience/ark?utm_term202502dsinvite&acDSASUQY5&rcGO9H7M38 告别DeepSeek卡顿&#xff0c;探索火山引擎DeepSeek-R1的丝滑之旅 在A…

网络运维学习笔记 016网工初级(HCIA-Datacom与CCNA-EI)PPP点对点协议和PPPoE以太网上的点对点协议(此处只讲华为)

文章目录 PPP&#xff08;Point to Point Protocol&#xff0c;点对点协议&#xff0c;华为默认使用&#xff09;实验1&#xff1a;设置为pap模式实验2&#xff1a;设置为chap模式&#xff08;实验1的基础上&#xff09; 串口封装协议HDLC&#xff08;High-Level Data Link Con…

QT串口通信之二,实现单个温湿度传感器数据的采集(采用Qt-modbus实现)

接上 QT串口通信之一,实现单个温湿度传感器数据的采集 上述文章中用QSerialPort实现了温湿度传感器的采集,实际上比较麻烦的,因为需要自定义解析帧, 接下来,用Qt-modbus-封装度更高的协议,来实现温湿度的采集; #include "MainWindow.h" #include "ui_M…

RK Android11 WiFi模组 AIC8800 驱动移植调试记录

RK Android11 WiFi模组 AIC8800 驱动移植调试记录 作者&#xff1a;Witheart更新时间&#xff1a;20250221 概要&#xff1a;本文记录了 RK3568 平台上移植并调试 WiFi6 模组 AIC8800 的过程&#xff0c;涵盖 WiFi 和蓝牙驱动的适配与问题排查。 配置流程在另一篇文章&#x…

Day9,Hot100(图论)

图论 图论部分推荐 acm 模式&#xff0c;因为图的输入处理不一样 DFS&#xff0c;类似二叉树的递归遍历 BFS&#xff0c;类似二叉树的层次遍历 208. 实现 Trie (前缀树) 数据结构大概如下&#xff1a; 可以看成是 二十六叉树 &#xff08;因为26个小写字母&#xff09; …