1. List分区
业务上,用户可以选择城市或者其他枚举值进行partition,对于这种枚举类型数据列进行分区就可以使用List分区。List分区列支持 BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, LARGEINT, DATE, DATETIME, CHAR, VARCHAR 数据类型,分区值为枚举值。只有当数据为目标分区枚举值其中之一时,才可以命中分区。
1.1 创建List分区方式
Partition 支持通过 VALUES IN (...) 来指定每个分区包含的枚举值。举例如下,创建List分区表example_db.example_list_tbl1如下:
CREATE TABLE IF NOT EXISTS example_db.example_list_tbl1
(
`user_id` LARGEINT NOT NULL COMMENT "用户id",
`date` DATE NOT NULL COMMENT "数据灌入日期时间",
`timestamp` DATETIME NOT NULL COMMENT "数据灌入的时间戳",
`city` VARCHAR(20) NOT NULL COMMENT "用户所在城市",
`age` SMALLINT COMMENT "用户年龄",
`sex` TINYINT COMMENT "用户性别",
`last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",
`cost` BIGINT SUM DEFAULT "0" COMMENT "用户总消费",
`max_dwell_time` INT MAX DEFAULT "0" COMMENT "用户最大停留时间",
`min_dwell_time` INT MIN DEFAULT "99999" COMMENT "用户最小停留时间"
)
ENGINE=olap
AGGREGATE KEY(`user_id`, `date`, `timestamp`, `city`, `age`, `sex`)
PARTITION BY LIST(`city`)
(
PARTITION `p_cn` VALUES IN ("Beijing", "Shanghai", "Hong Kong"),
PARTITION `p_usa` VALUES IN ("New York", "San Francisco"),
PARTITION `p_jp` VALUES IN ("Tokyo")
)
DISTRIBUTED BY HASH(`user_id`) BUCKETS 16
PROPERTIES
(
"replication_num" = "3"
);
创建完成表example_db.example_list_tbl1之后,会自动生成如下3个分区:
p_cn: ("Beijing", "Shanghai", "Hong Kong")
p_usa: ("New York", "San Francisco")
p_jp: ("Tokyo")
1.2 增删分区
执行如下命令对表example_db.example_list_tbl1 增加分区:
#增加分区 p_uk VALUES IN ("London")
mysql> ALTER TABLE example_db.example_list_tbl1 ADD PARTITION p_uk VALUES IN ("London");
Query OK, 0 rows affected (0.04 sec)#分区结果如下:
p_cn: ("Beijing", "Shanghai", "Hong Kong")
p_usa: ("New York", "San Francisco")
p_jp: ("Tokyo")
p_uk: ("London")
执行如下命令对表example_db.example_list_tbl1删除分区:
#删除分区 p_jp
mysql> ALTER TABLE example_db.example_list_tbl1 DROP PARTITION p_jp;
Query OK, 0 rows affected (0.01 sec)#分区结果如下:
p_cn: ("Beijing", "Shanghai", "Hong Kong")
p_usa: ("New York", "San Francisco")
p_uk: ("London")
向表example_db.example_list_tbl1中插入如下数据,观察数据所属分区情况:
#向表中插入如下数据,数据对应的city都能匹配对应分区
insert into example_db.example_list_tbl1 values
(10000,"2017-10-01","2017-10-01 08:00:05","Beijing",20,0,"2017-10-01 06:00:00",20,10,10),
(10000,"2017-10-01","2017-10-01 09:00:05","Shanghai",20,0,"2017-10-01 07:00:00",15,2,2),
(10001,"2017-10-01","2017-10-01 18:12:10","Hong Kong",30,1,"2017-10-01 17:05:45",2,22,22),
(10002,"2017-10-02","2017-10-02 13:10:00","New York",20,1,"2017-10-02 12:59:12",200,5,5),
(10003,"2017-10-02","2017-10-02 13:15:00","San Francisco",32,0,"2017-10-02 11:20:00",30,11,11),
(10004,"2017-10-01","2017-10-01 12:12:48","London",35,0,"2017-10-01 10:00:15",100,3,3);#查询 p_cn 分区数据,查询其他分区数据一样语法
mysql> select * from example_db.example_list_tbl1 partition p_cn;
+---------+------------+---------------------+-----------+
| user_id | date | timestamp | city |...
+---------+------------+---------------------+-----------+
| 10001 | 2017-10-01 | 2017-10-01 18:12:10 | Hong Kong |...
| 10000 | 2017-10-01 | 2017-10-01 08:00:05 | Beijing |...
| 10000 | 2017-10-01 | 2017-10-01 09:00:05 | Shanghai |...
+---------+------------+---------------------+-----------+#向表中插入如下数据,不属于表中任何分区会报错
insert into example_db.example_list_tbl1 values
(10004,"2017-10-03","2017-10-03 12:38:20","Tokyo",35,0,"2017-10-03 10:20:22",11,6,6);
1.3 多列分区
List分区也支持多列分区。创建多列分区表example_db.example_list_tbl2如下:
CREATE TABLE IF NOT EXISTS example_db.example_list_tbl2
(
`id` LARGEINT NOT NULL COMMENT "用户id",
`date` DATE NOT NULL COMMENT "数据灌入日期时间",
`city` VARCHAR(20) NOT NULL COMMENT "用户所在城市",
`age` SMALLINT COMMENT "用户年龄",
`cost` BIGINT SUM DEFAULT "0" COMMENT "用户总消费"
)
ENGINE=olap
AGGREGATE KEY(`id`, `date`, `city`, `age`)
PARTITION BY LIST(`id`, `city`)
(PARTITION `p1_city` VALUES IN (("1", "Beijing"), ("1", "Shanghai")),PARTITION `p2_city` VALUES IN (("2", "Beijing"), ("2", "Shanghai")),PARTITION `p3_city` VALUES IN (("3", "Beijing"), ("3", "Shanghai"))
)
DISTRIBUTED BY HASH(`id`) BUCKETS 16
PROPERTIES
(
"replication_num" = "3"
);
以上表是以id、city列创建的多列分区,分区信息如下:
p1_city: [("1", "Beijing"), ("1", "Shanghai")]
p2_city: [("2", "Beijing"), ("2", "Shanghai")]
p3_city: [("3", "Beijing"), ("3", "Shanghai")]
当数据插入到表中匹配时也是按照每列顺序进行匹配,向表中插入如下数据:
#向表中插入如下数据,每条数据可以对应到已有分区中
insert into example_db.example_list_tbl12 values
(1,"2017-10-01","Beijing",18,100),
(1,"2017-10-02","Shanghai",18,101),
(2,"2017-10-03","Shanghai",20,102),
(3,"2017-10-04","Beijing",21,103)#向表中插入如下数据,每条数据都不能匹配已有分区,报错。
insert into example_db.example_list_tbl2 values
(1,"2017-10-05","Tianjin",22,104),
(4,"2017-10-06","Beijing",23,105);
以上几条数据匹配分区情况如下:
数据 ---> 分区
1, Beijing ---> p1_city
1, Shanghai ---> p1_city
2, Shanghai ---> p2_city
3, Beijing ---> p3_city
1, Tianjin ---> 无法导入
4, Beijing ---> 无法导入