ECS单机部署Hadoop
系统准备
-
更新系统
sudo yum update -y sudo yum install -y wget vim net-tools openssh-server
-
关闭防火墙
sudo systemctl stop firewalld -- 关闭防火墙 sudo systemctl disable firewalld -- 禁止自启动 sudo systemctl status firewalld -- 查看防火墙的状态
安装Java
-
安装OpenJDK 8
# 安装OpenJDK 8 sudo yum install -y java-1.8.0-openjdk-devel
-
查看版本
# 验证安装 java -version
-
查看Java 安装路径
# 执行命令 readlink -f $(which java) # 命令输出 /usr/lib/jvm/java-8-konajdk-8.0.20-1.oc9/bin/java# 安装路径 /usr/lib/jvm/java-8-konajdk-8.0.20-1.oc9
Hadoop用户
-
创建用户
# 创建用户 sudo useradd hadoop sudo passwd hadoop # 设置密码(如:hadoop)# 赋予sudo权限 sudo echo "hadoop ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers# 切换到hadoop用户 su - hadoop
-
免密登录
# 生成密钥(一路回车) ssh-keygen -t rsa# 将公钥写入授权文件 cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys# 测试免密登录 ssh localhost # 输入yes后应直接登录 exit
下载安装Hadoop
-
下载Hadoop
# 下载Hadoop(以3.3.6为例) wget https://downloads.apache.org/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz
-
解压Hadoop
# 解压并移动到指定目录 tar -zxvf hadoop-3.3.6.tar.gz sudo mv hadoop-3.3.6 /opt/hadoop sudo chown -R hadoop:hadoop /opt/hadoop
-
配置环境
-
编辑
~/.bashrc
文件vim ~/.bashrc# 添加以下内容 export JAVA_HOME=/usr/lib/jvm/java-8-konajdk-8.0.20-1.oc9 -- 这里地址参照 readlink -f $(which java) 命令输出地址 export HADOOP_HOME=/opt/hadoop export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
-
生效配置
source ~/.bashrc
-
修改Hadoop配置
-
hadoop-env.sh
vim $HADOOP_HOME/etc/hadoop/hadoop-env.sh # 修改JAVA_HOME export JAVA_HOME=/usr/lib/jvm/java-8-konajdk-8.0.20-1.oc9 -- 这里地址参照 readlink -f $(which java) 命令输出地址
-
core-site.xml
<configuration><property><name>fs.defaultFS</name><value>hdfs://localhost:9000</value></property><property><name>hadoop.tmp.dir</name><value>/opt/hadoop/tmp</value></property> </configuration>
-
hdfs-site.xml
<configuration><property><name>dfs.replication</name><value>1</value></property><property><name>dfs.namenode.name.dir</name><value>/opt/hadoop/hdfs/namenode</value></property><property><name>dfs.datanode.data.dir</name><value>/opt/hadoop/hdfs/datanode</value></property> </configuration>
-
mapred-site.xml
<configuration><property><name>mapreduce.framework.name</name><value>yarn</value></property> </configuration>
-
yarn-site.xml
<configuration><property><name>yarn.nodemanager.aux-services</name><value>mapreduce_shuffle</value></property><property><name>yarn.nodemanager.env-whitelist</name><value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value></property> </configuration>
初始化HDFS
-
创建目录
目录参照上面配置创建
# 创建目录 mkdir -p /opt/hadoop/tmp mkdir -p /opt/hadoop/hdfs/{namenode,datanode}
-
格式化NameNode
# 格式化NameNode hdfs namenode -format
启动Hadoop
-
启动HDFS
start-dfs.sh
-
启动Yarn
start-yarn.sh
-
查看Hadoop进程
jps # 应看到以下进程: # NameNode # DataNode # ResourceManager # NodeManager # SecondaryNameNode
验证部署
-
访问Web UI
- HDFS: http://服务器IP:9870
- YARN: http://服务器IP:8088
-
测试HDFS操作
# HDFS 创建文件夹 hdfs dfs -mkdir /test # HDFS 查看文件 hdfs dfs -ls /test # hdfs 上传文件 hdfs dfs -put test.txt /test # hdfs 下载文件 hdfs dfs -get /test/test.txt hdfs_test.txt
-
运行MapReduce示例
# 生成测试文件echo "Hello World Hello Hadoop" > input.txt# 创建input目录 hdfs dfs -mkdir /input# 上传到测试目录 hdfs dfs -put input.txt /input# 运行官方案例 hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.6.jar wordcount /input /output# 查看输出结果 hdfs dfs -cat /output/*