Doris-1.2.6集群

news/2024/11/29 13:33:30/

1、系统软件

1.1 、系统版本

系统:centos 7.1及以上;Ubuntu 16.04及以上
软件:Java 1.8及以上;GCC 4.8.2及以上;

1.2、环境检查

1.2.1、gcc安装

1、Centos
ansible cluster -m shell -a "yum -y install gcc gcc-c++ autoconf pcre pcre-devel make automake"
ansible cluster -m shell -a "yum -y install wget httpd-tools vim"
ansible cluster -m shell -a "gcc --version"
2、Ubuntu
sudo apt install -y gcc

1.3、时钟同步(必须)

1.4、关闭交换分区(swap)(必须)

echo "0" > /proc/sys/vm/swappiness
echo "0" > /proc/sys/vm/overcommit_memory
sysctl -p

1.5、把limits调到最大(必须)

cat << EOF >> /etc/security/limits.conf 
*       soft   noproc  65535
*       hard   noproc  65535
*       soft   nofile  65535
*       hard   nofile  65535
*       hard memlock unlimited
*       soft memlock unlimited
EOF

1.6、Jdk是1.8

# jdk
export JAVA_HOME=/usr/java/jdk1.8.0_201-amd64
export CLASSPATH=.:$JAVA_HOME/jre/lib/rt.jar:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$JAVA_HOME/bin:$PATH

1.7、系统参数优化

cat > /etc/sysctl.conf << EOF 
vm.max_map_count = 262144
net.ipv4.tcp_retries2 = 5
fs.file-max = 6553560
net.ipv4.tcp_max_tw_buckets = 6000
net.ipv4.tcp_sack = 1
net.ipv4.tcp_window_scaling = 1
net.ipv4.tcp_rmem = 4096 87380 4194304
net.ipv4.tcp_wmem = 4096 16384 4194304
net.ipv4.tcp_max_syn_backlog = 16384
net.core.netdev_max_backlog = 32768
net.core.somaxconn = 32768
net.core.wmem_default = 8388608
net.core.rmem_default = 8388608
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.ipv4.tcp_timestamps = 1
net.ipv4.tcp_fin_timeout = 20
net.ipv4.tcp_synack_retries = 2
net.ipv4.tcp_syn_retries = 2
net.ipv4.tcp_syncookies = 1
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_mem = 94500000 915000000 927000000
net.ipv4.tcp_max_orphans = 3276800
net.ipv4.ip_local_port_range = 1024 65000
net.ipv6.conf.all.disable_ipv6 = 1
net.ipv6.conf.default.disable_ipv6 = 1
kernel.numa_balancing = 0
kernel.shmmax = 68719476736
kernel.printk = 5
kernel.sysrq = 1
vm.overcommit_memory = 0
vm.swappiness = 1
vm.dirty_background_ratio = 5
vm.dirty_ratio = 80
EOF
sysctl -p

1.8、检查avx2指令

查看是否支持avx2指令集,返回0,说明不支持

cat /proc/cpuinfo | grep avx2 | wc -l

1.9、下载安装包

根据JDK版本,CPU架构,是否支持avx2,下载对应的Doris安装包

下载地址:https://doris.apache.org/zh-CN/download

在这里插入图片描述

2、集群部署

2.1、服务器规划及配置

在这里插入图片描述

2.2、部署目录规划

2.2.1、FE

部署目录: /opt/module/doris/fe
日志目录: /data/doris/log/fe
元数据目录: /data/doris/data/meta

2.2.2、BE

部署目录: /opt/module/doris/be

日志目录: /data/doris/log/be

数据存储目录: /data/doris/data/storage

2.2.3、Broker

部署目录: /opt/module/doris/hdfs_broker

2.2.4、创建目录

ansible cluster -m shell -a "mkdir -p /opt/module/doris/"
ansible cluster -m shell -a "mkdir -p /data/doris/log/{fe,be}"
ansible cluster -m shell -a "mkdir -p /data/doris/data/{meta,storage.SSD}"

2.3、集群部署

2.3.1、解压

tar -xvJf apache-doris-1.2.6-bin-x64.tar.xz 
chown -R root:root apache-doris-1.2.6-bin-x64

2.3.2、修改FE配置

AVA_HOME=/usr/java/jdk1.8.0_201-amd64# the output dir of stderr and stdout 
LOG_DIR = /data/doris/log/feDATE = `date +%Y%m%d-%H%M%S`
JAVA_OPTS="-Xmx8192m -XX:+UseMembar -XX:SurvivorRatio=8 -XX:MaxTenuringThreshold=7 -XX:+PrintGCDateStamps -XX:+PrintGCDetails -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+CMSClassUnloadingEnabled -XX:-CMSParallelRemarkEnabled -XX:CMSInitiatingOccupancyFraction=80 -XX:SoftRefLRUPolicyMSPerMB=0 -Xloggc:/data/doris/log/fe/fe.gc.log.$DATE"# For jdk 9+, this JAVA_OPTS will be used as default JVM options
JAVA_OPTS_FOR_JDK_9="-Xmx8192m -XX:SurvivorRatio=8 -XX:MaxTenuringThreshold=7 -XX:+CMSClassUnloadingEnabled -XX:-CMSParallelRemarkEnabled -XX:CMSInitiatingOccupancyFraction=80 -XX:SoftRefLRUPolicyMSPerMB=0 -Xlog:gc*:/data/doris/log/fe/fe.gc.log.$DATE:time"##
## the lowercase properties are read by main program.
### INFO, WARN, ERROR, FATAL
sys_log_level = INFO# store metadata, must be created before start FE.
# Default value is ${DORIS_HOME}/doris-meta
# meta_dir = ${DORIS_HOME}/doris-meta
meta_dir = /data/doris/data/meta# Default dirs to put jdbc drivers,default value is ${DORIS_HOME}/jdbc_drivers
# jdbc_drivers_dir = ${DORIS_HOME}/jdbc_drivershttp_port = 8030
rpc_port = 9020
query_port = 9030
edit_log_port = 9010
mysql_service_nio_enabled = true# Choose one if there are more than one ip except loopback address. 
# Note that there should at most one ip match this list.
# If no ip match this rule, will choose one randomly.
# use CIDR format, e.g. 10.10.10.0/24
# Default value is empty.
# priority_networks = 10.10.10.0/24;192.168.0.0/16
priority_networks = 192.168.0.122/24
# Advanced configurations 
# log_roll_size_mb = 1024
sys_log_dir = /data/doris/log/fe
# sys_log_roll_num = 10
# sys_log_verbose_modules = org.apache.doris
audit_log_dir = /data/doris/log/fe
# audit_log_modules = slow_query, query
# audit_log_roll_num = 10
# meta_delay_toleration_second = 10
# qe_max_connection = 1024
# max_conn_per_user = 100
# qe_query_timeout_second = 300
# qe_slow_log_ms = 5000
default_storage_medium = SSD
dynamic_partition_enable = true

2.3.3、修改BE配置

JAVA_HOME=/usr/java/jdk1.8.0_201-amd64PPROF_TMPDIR="/data/doris/log/be"CUR_DATE=`date +%Y%m%d-%H%M%S`
JAVA_OPTS="-Xmx1024m -DlogPath=/data/doris/log/be/jni.log -Xloggc:/data/doris/log/be/be.gc.log.$CUR_DATE -Dsun.java.command=DorisBE -XX:-CriticalJNINatives -DJDBC_MIN_POOL=1 -DJDBC_MAX_POOL=100 -DJDBC_MAX_IDEL_TIME=300000 -DJDBC_MAX_WAIT_TIME=5000"# For jdk 9+, this JAVA_OPTS will be used as default JVM options
JAVA_OPTS_FOR_JDK_9="-Xmx1024m -DlogPath=/data/doris/log/be/jni.log -Xlog:gc:/data/doris/log/be/be.gc.log.$CUR_DATE -Dsun.java.command=DorisBE -XX:-CriticalJNINatives -DJDBC_MIN_POOL=1 -DJDBC_MAX_POOL=100 -DJDBC_MAX_IDEL_TIME=300000 -DJDBC_MAX_WAIT_TIME=5000"# since 1.2, the JAVA_HOME need to be set to run BE process.
# JAVA_HOME=/path/to/jdk/# INFO, WARNING, ERROR, FATAL
sys_log_level = INFO# ports for admin, web, heartbeat service 
be_port = 9060
webserver_port = 18040
heartbeat_service_port = 9050
brpc_port = 8060# Choose one if there are more than one ip except loopback address. 
# Note that there should at most one ip match this list.
# If no ip match this rule, will choose one randomly.
# use CIDR format, e.g. 10.10.10.0/24
# Default value is empty.
# priority_networks = 10.10.10.0/24;192.168.0.0/16
priority_networks = 192.168.0.129/24# data root path, separate by ';'
# you can specify the storage medium of each root path, HDD or SSD
# you can add capacity limit at the end of each root path, separate by ','
# eg:
# storage_root_path = /home/disk1/doris.HDD,50;/home/disk2/doris.SSD,1;/home/disk2/doris
# /home/disk1/doris.HDD, capacity limit is 50GB, HDD;
# /home/disk2/doris.SSD, capacity limit is 1GB, SSD;
# /home/disk2/doris, capacity limit is disk capacity, HDD(default)
# 
# you also can specify the properties by setting '<property>:<value>', separate by ','
# property 'medium' has a higher priority than the extension of path
#
# Default value is ${DORIS_HOME}/storage, you should create it by hand.
# storage_root_path = ${DORIS_HOME}/storage
storage_root_path = /data/doris/data/storage.SSD# Default dirs to put jdbc drivers,default value is ${DORIS_HOME}/jdbc_drivers
# jdbc_drivers_dir = ${DORIS_HOME}/jdbc_drivers# Advanced configurations
sys_log_dir = /data/doris/log/be
# sys_log_roll_mode = SIZE-MB-1024
# sys_log_roll_num = 10
# sys_log_verbose_modules = *
# log_buffer_level = -1
# palo_cgroups 
# 更快的导入速度,默认为2
flush_thread_num_per_store = 5
# 获得更好的查询性能
enable_storage_vectorization = true
enable_low_cardinality_optimize = true
enable_segcompaction = true
# 使用新的compaction算法
enable_vertical_compaction = true

2.3.4、分发安装文件

ansible cluster -m copy -a 'src=/opt/module/apache-doris-1.2.6-bin-x64/fe dest=/opt/module/doris/'ansible cluster -m copy -a 'src=/opt/module/apache-doris-1.2.6-bin-x64/be dest=/opt/module/doris/'ansible cluster -m copy -a 'src=/opt/module/apache-doris-1.2.6-bin-x64/extensions/apache_hdfs_broker dest=/opt/module/doris/'ansible cluster -m copy -a 'src=/opt/module/apache-doris-1.2.6-bin-x64/extensions/audit_loader dest=/opt/module/doris/'

2.3.5、修改BE节点配置

priority_networks = 192.168.0.129/24
priority_networks = 192.168.0.137/24

2.3.6、FE节点启动MySQL客户端

wget https://cdn.mysql.com/archives/mysql-8.0/mysql-community-common-8.0.25-1.el7.x86_64.rpm
wget https://cdn.mysql.com/archives/mysql-8.0/mysql-community-client-plugins-8.0.25-1.el7.x86_64.rpm
wget https://cdn.mysql.com/archives/mysql-8.0/mysql-community-libs-8.0.25-1.el7.x86_64.rpm
wget https://cdn.mysql.com/archives/mysql-8.0/mysql-community-client-8.0.25-1.el7.x86_64.rpmrpm -ivh mysql-community-common-8.0.25-1.el7.x86_64.rpm
rpm -ivh mysql-community-client-plugins-8.0.25-1.el7.x86_64.rpm
rpm -ivh mysql-community-libs-8.0.25-1.el7.x86_64.rpm
rpm -ivh mysql-community-client-8.0.25-1.el7.x86_64.rpm
或
rpm -Uvh *.rpm --nodeps --force

2.4、启动FE

2.4.1、启动FE

bin/start_fe.sh --daemon

2.4.1、访问FE

注:8030是默认端口

http://192.168.0.122:8030

2.4.2、MySQL连接FE

# 无密码登录
mysql -h127.0.0.1 -P9030 -urootSET PASSWORD FOR 'root' = PASSWORD('Boshi*2023');#退出再次登录即可
mysql -h192.168.0.122 -P9030 -uroot -pBoshi*2023

2.4.3、查看FE状态

mysql-client 连接到 FE

# IsMaster、Join 和 Alive 三列均为true,则表示节点正常
SHOW PROC '/frontends';

通过URL来访问
http://fe_host:fe_http_port/api/bootstrap
fe_host FE节点ip
fe_http_port FE安装节点conf/fe.conf中配置的端口

http://192.168.0.122:8030/api/bootstrap

2.4.4、FE 扩容-读写高可用

1、配置及启动Follower
# 首先第一次启动时,需执行以下命令:
fe/bin/start_fe.sh --helper leader_fe_host:edit_log_port --daemonfe/bin/start_fe.sh --helper xx.xx.xx.xx:9010 --daemon
2、将 Follower加入到集群
# 使用 mysql-client 连接到已启动的 FE,并执行:
ALTER SYSTEM ADD FOLLOWER "follower_host:edit_log_port";ALTER SYSTEM ADD FOLLOWER "xx.xx.xx.xx:9010";
3、 再次加入fe2, fe3

# 再次加入fe2, fe3:
fe/bin/start_fe.sh --helper xx.xx.xx.xx:9010 --daemonALTER SYSTEM ADD FOLLOWER "xx.xx.xx.xx:9010";
ALTER SYSTEM ADD FOLLOWER "xx.xx.xx.xx:9010";
4、 查看是否成功
SHOW PROC '/frontends';

2.4.5、FE 扩容-读高可用

1、配置及启动Observer
# 首先第一次启动时,需执行以下命令:
fe/bin/start_fe.sh --helper leader_fe_host:edit_log_port --daemonfe/bin/start_fe.sh --helper xx.xx.xx.xx:9010 --daemon
2、将 Observer 加入到集群
# 使用 mysql-client 连接到已启动的 FE,并执行:
ALTER SYSTEM ADD OBSERVER "observer_host:edit_log_port";ALTER SYSTEM ADD FOLLOWER "xx.xx.xx.xx:9010";

2.5、启动BE

2.5.1、启动BE

sysctl -w vm.max_map_count=2000000
ulimit -n 65536bin/start_be.sh --daemon

2.5.2、增删 BE

#添加 BE
ALTER SYSTEM ADD BACKEND "192.168.0.129:9050";  
ALTER SYSTEM ADD BACKEND "192.168.0.137:9050";# 删除BE
ALTER SYSTEM DROPP BACKEND "hostname:9050";ALTER SYSTEM DROPP BACKEND "192.168.0.129:9050";  
ALTER SYSTEM DROPP BACKEND "192.168.0.137:9050";

2.5.3、查看BE状态

mysql-client 连接到 FE

# isAlive 列应为 true
SHOW PROC '/backends' \G; 

通过URL来访问
http://be_host:webserver_port/api/health
webserver_port BE安装节点conf/be.conf中配置的端口

http://192.168.0.129:18040/api/health

3、集群管理

3.1、WEBUI

http://fe_host:fe_http_porthttp://192.168.0.122:8030

3.2、创建用户

CREATE USER 'doris'@'%' IDENTIFIED BY 'Doris*2023' DEFAULT ROLE 'admin';SHOW GRANTS FOR doris@'%';# 登录
mysql -h192.168.0.122 -P9030 -udoris -pDoris*2023

3.3、创建库

create database tmp;
create database ods;
create database dwd;
create database dws;
create database ads;

3.3、创建表

3.3.1、主键唯一模型

CREATE TABLE tmp.dept
(
`deptno` int, 
`dname` varchar(14),
`loc` varchar(13)
)
UNIQUE KEY(deptno)
COMMENT "部门表"
DISTRIBUTED BY HASH (deptno) BUCKETS 8
PROPERTIES(
"replication_num" = "1"
);insert into tmp.dept values (10,'accounting','new york'),(20,'research','dallas'),(30,'sales','chicago'),(40,'operations','boston');CREATE TABLE tmp.emp
(`empno` int,`ename` varchar(10),  `job` varchar(9),  `mgr` int,  `hiredate` date,  `sal` float,  `comm` float,  `deptno` int
)
UNIQUE KEY(empno)
COMMENT "员工表"
DISTRIBUTED BY HASH (empno) BUCKETS 8
PROPERTIES(
"replication_num" = "1"
);insert into tmp.emp values (7369,'smith','clerk',7902,'1980-12-17',800,null,20),
(7499,'allen','salesman',7698,'1981-02-20',1600,300,30),
(7521,'ward','salesman',7698,'1981-02-22',1250,500,30),
(7566,'jones','manager',7839,'1981-04-02',2975,null,20),
(7654,'martin','salesman',7698,'1981-09-28',1250,1400,30),
(7698,'blake','manager',7839,'1981-05-01',2850,null,30),
(7782,'clark','manager',7839,'1981-06-09',2450,null,10),
(7788,'scott','analyst',7566,'1987-07-13',3000,null,20),
(7839,'king','president',null,'1981-11-07',5000,null,10),
(7844,'turner','salesman',7698,'1981-09-08',1500,0,30),
(7876,'adams','clerk',7788,'1987-07-13',1100,null,20),
(7900,'james','clerk',7698,'1981-12-03',950,null,30),
(7902,'ford','analyst',7566,'1981-12-03',3000,null,20),
(7934,'miller','clerk',7782,'1982-01-23',1300,null,10);

3.3.2、明细模型

CREATE TABLE tmp.salgrade
(`grade` int, `losal` int,  `hisal` int
)
DUPLICATE KEY(grade,losal,hisal)
COMMENT "工资水平"
DISTRIBUTED BY HASH(losal,hisal) BUCKETS 8
PROPERTIES (
"replication_num" = "1"
);insert into tmp.salgrade values (1,700,1200),(2,1201,1400),(3,1401,2000),(4,2001,3000),(5,3001,9999);

3.3.2、聚合模型

1、用户消费和行为聚合模型
-- 这是一个用户消费和行为记录的数据表
CREATE TABLE IF NOT EXISTS tmp.user_info
(`user_id` LARGEINT NOT NULL COMMENT "用户 id",`date` DATE NOT NULL COMMENT "数据灌入日期时间",`city` VARCHAR(20) COMMENT "用户所在城市",`age` SMALLINT COMMENT "用户年龄",`sex` TINYINT COMMENT "用户性别",`last_visit_date` DATETIME REPLACE  DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",`cost` BIGINT SUM DEFAULT "0" COMMENT "用户总消费",`max_dwell_time` INT MAX DEFAULT "0" COMMENT "用户最大停留时间",`min_dwell_time` INT MIN DEFAULT "99999" COMMENT "用户最小停留时间" )
ENGINE=olap
AGGREGATE KEY(`user_id`, `date`, `city`, `age`, `sex`)
COMMENT "用户消费和行为记录表"
-- 分区
-- 分桶
DISTRIBUTED BY HASH(`user_id`) BUCKETS 8
PROPERTIES (
"replication_num" = "1"
);insert into tmp.user_info values
(10000,'2017-10-01','北京',20,0,'2017-10-01 06:00:00',20,10,10),
(10000,'2017-10-01','北京',20,0,'2017-10-01 07:00:00',15,2,2),
(10001,'2017-10-01','北京',30,1,'2017-10-01 17:05:45',2,22,22),
(10002,'2017-10-02','上海',20,1,'2017-10-02 12:59:12',200,5,5),
(10003,'2017-10-02','广州',32,0,'2017-10-02 11:20:00',30,11,11),
(10004,'2017-10-01','深圳',35,0,'2017-10-01 10:00:15',100,3,3),
(10004,'2017-10-03','深圳',35,0,'2017-10-03 10:20:22',11,6,6);
2、订单聚合模型
# 自动聚合出如下结果:
# 订单日期,订单id,userId,商品id,购买得总件数,支付总额create table tmp.order_info
(
`date` date COMMENT "日期时间",
`oid` bigint COMMENT "订单id",
`userid` varchar(255) COMMENT "用户id",
`spid` varchar(255) COMMENT "商品id",
`total` int sum COMMENT "商品总数",
`pay` int sum COMMENT "支付总金额"
)
ENGINE=olap
AGGREGATE KEY(`date`,`oid`,`userid`,`spid`)
COMMENT "订单聚合表"
partition by range(`date`)
(
partition `p20221201` values less than ("2022-12-02"),
partition `p20221202` values less than ("2022-12-03"),
partition `p20221203` values less than ("2022-12-04")
)
DISTRIBUTED BY HASH(`userid`) BUCKETS 2
PROPERTIES (
"replication_num" = "1"
);-- 插入数据
-- 订单日期,订单id,userId,商品id,购买件数,支付的金额
insert into tmp.order_info values
('2022-12-01',1,'u01','p01',2,20),
('2022-12-01',1,'u01','p02',1,10),
('2022-12-01',1,'u01','p01',1,10),
('2022-12-01',2,'u02','p03',2,40);

4、使用手册


http://www.ppmy.cn/news/1086650.html

相关文章

J. Med. Chem 2022|TocoDecoy+: 针对机器学习打分函数训练和测试的无隐藏偏差的数据集构建新方法

原文标题&#xff1a;TocoDecoy: A New Approach to Design Unbiased Datasets for Training and Benchmarking Machine-Learning Scoring Functions 论文链接&#xff1a;https://pubs.acs.org/doi/10.1021/acs.jmedchem.2c00460 论文代码&#xff1a;GitHub - 5AGE-zhang/T…

基于体系结构架构设计-架构真题(十五)

基于体系结构开发设计&#xff08;Architecture-Base Software Design&#xff09;ABSD&#xff0c;是指构成体系结构的&#xff08;&#xff09;组合驱动&#xff0c;ABSC方法是一个自项向下、递归细化的方法&#xff0c;软件系统的体系结构通过该方法细化&#xff0c;直到能产…

LeetCode-435-无重叠区间

题目链接&#xff1a; 力扣435 -无重叠区间 解题思路&#xff1a;和之前的合并区间、汇总区间都比较相似&#xff0c; 先对二维数组排序&#xff0c;按照左边界升序&#xff1b;当 当前区间的左区间 < 前一个区间的右区间&#xff0c;说明有重叠&#xff0c;res1,还要更新当…

【黑马头条之项目部署_持续集成Jenkins】

本笔记内容为黑马头条项目的项目部署_持续集成部分 目录 一、内容介绍 1、什么是持续集成 2、持续集成的好处 3、今日内容 二、软件开发模式 1、软件开发生命周期 2、软件开发瀑布模型 3、软件的敏捷开发 三、Jenkins安装配置 1、Jenkins介绍 2、Jenkins环境搭建 …

软件设计师学习笔记8-操作系统+进程

目录 1.操作系统 1.1操作系统层次图 1.2操作系统的作用 1.3操作系统的任务 2.特殊的操作系统 3.进程 3.1进程的概念 3.2进程与程序 3.3进程与线程 3.4进程的状态 3.4.1三态模型 3.4.2基于三态模型的五态模型 1.操作系统 1.1操作系统层次图 该图片来自希赛软考 1.…

深入理解作用域、作用域链和闭包

​ &#x1f3ac; 岸边的风&#xff1a;个人主页 &#x1f525; 个人专栏 :《 VUE 》 《 javaScript 》 ⛺️ 生活的理想&#xff0c;就是为了理想的生活 ! ​ 目录 &#x1f4da; 前言 &#x1f4d8; 1. 词法作用域 &#x1f4d6; 1.2 示例 &#x1f4d6; 1.3 词法作用域的…

【机器学习】人工智能概述(文末送书)

&#x1f935;‍♂️ 个人主页&#xff1a;艾派森的个人主页 ✍&#x1f3fb;作者简介&#xff1a;Python学习者 &#x1f40b; 希望大家多多支持&#xff0c;我们一起进步&#xff01;&#x1f604; 如果文章对你有帮助的话&#xff0c; 欢迎评论 &#x1f4ac;点赞&#x1f4…

掌握Python编程:从入门到精通的完整指南

Python是一门流行的编程语言,适用于多种应用领域,包括Web开发、数据分析、人工智能、机器学习等。以下是一个Python入门的大纲,适用于初学者: 第一部分:基础概念 1. Python简介 Python的历史和发展Python的优点和应用领域2. 安装Python 下载和安装Python配置Python开发环…