-
安装git, vimplus ,openssh-server
#!/bin/bash yum install -y update yum install -y git yum install -y vim # 安装openssh服务端 yum install -y openssh-server # 下载vimplus编辑器 git clone https://github.com/chxuan/vimplus.git ~/.vimplus # 安装vimplus cd ~/.vimplus ./install.sh # 选择python3
-
配置ssh 免密登陆
ssh-keygen -t rsa # 都默认回车 # 私钥id_rsa 公钥 id_rsa.pub # 将公钥复制到登陆基地 cp ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys ####################################################################### # id_rsa.pub # # ssh-rsa AAAAB3Nza1yc2EAACAADAQABAAABAQDegPVNiZ16wA4hAxRPmPpp0qkXQ # # i4UMoPjieO73SX2pjGrvuVZbTwUtKkN93Cu05VQOoK1ElrO4nBfhVd7L1+mPFTIh+Fx # # n8P1kOPP0uMJJXjU2fuSyO5T9/bwGgfHrFfXeL2n5SQ/YlaYv2toraN6fqaiwIWcK/+ # # thhCEUKDUMe+IVdpzNuvtfAsECNi/AiKJ76bu/xkJmcx1soxFGwHIJfkcmbDkq1o8uc # # 4bbMyRTVNq8bCJ6qZ4WmLz7HqTObHA07uuoUyLEoh8IJ+c2dhnF2Ceqc3reSE/gWZ8c # # fNMCxb9msUe9eNRKhlazvqw4C3kbn4068IrU3shA7WBqbJO root@hou # ####################################################################### # 测试登陆本机 ssh localhost # 登陆成功,exit退出 ####################################################################### # [root@hou .ssh]# ssh localhost # # Last login: Wed Sep 16 22:24:16 2020 # # Welcome to Alibaba Cloud Elastic Compute Service ! # # [root@hou ~]# exit # # 登出 # # Connection to localhost closed. # # [root@hou .ssh]# # ####################################################################### ## 如果authorized_keys文件无法删除,则说明被锁住了。 ## 查看加锁: lsattr .ssh/authorized_keys ####################################################################### # ----i--------e-- .ssh/authorized_keys # ####################################################################### ## 解索 chattr -i authorized_keys
-
安装hadoop(root用户)
- 上传安装包
scp -r big_data_tools root@47.113.123.32:/root/
- 安装jdk
## 新建安装目录 mkdir /apps # 存放安装的框架 mkdir /data # 存放临时数据、HDFS数据、程序代码or脚本 ## 复制安装包并解压 cp ~/big_data_tools/hadoop-3.0.0.tar.gz /apps/ cd /apps tar -xzvf jdk-8u191-linux-x64.tar.gz ## 重命名 mv /apps/jdk1.8.0_191/ /apps/java rm /apps/jdk-8u191-linux-x64.tar.gz ## 配置环境变量 vim ~/.bashrc # ,在文件末尾添加 # Java export JAVA_HOME=/apps/java export PATH=$JAVA_HOME/bin:$PATH ## 使配置生效 source ~/.bashrc ## 验证 java -version ####################################################################### # java version "1.8.0_191" # # Java(TM) SE Runtime Environment (build 1.8.0_191-b12) # # Java HotSpot(TM) 64-Bit Server VM (build 25.191-b12, mixed mode) # #######################################################################
- 安装hadoop
## 复制安装包、解压 cp ~/big_data_tools/jdk-8u191-linux-x64.tar.gz /apps/ cd /apps tar -xzvf hadoop-3.0.0.tar.gz mv /apps/hadoop-3.0.0/ /apps/hadoop sudo vim ~/.bashrc ## 添加环境变量 # Hadoop export HADOOP_HOME=/apps/hadoop export PATH=$HADOOP_HOME/bin:$PATH ## 生效 source ~/.bashrc ## 验证 hadoop version ####################################################################### # Hadoop 3.0.0 # # Source code repository https://git-wip-us.apache.org/repos # # /asf/hadoop.git -r c25427ceca461ee979d30edd7a4b0f50718e6533 # # Compiled by andrew on 2017-12-08T19:16Z # # Compiled with protoc 2.5.0 # # From source with checksum 397832cb5529187dc8cd74ad54ff22 # # This command was run using /apps/hadoop/share/hadoop/common/ # # hadoop-common-3.0.0.jar # #######################################################################
-
至此,单机 Hadoop 就安装好了。接下来对 Hadoop 进行配置,以实现伪分布式。伪分布式是在一台机器上模拟一个只有一个节点的集群。
- 为hadoop指定JAVA_HOME
cd /apps/hadoop/etc/hadoop vim hadoop-env.sh # 修改54行 ####################################################################### # 52 # The java implementation to use. By default, this environment # # 53 # variable is REQUIRED on ALL platforms except OS X! # # 54 export JAVA_HOME=/apps/java # #######################################################################
- 配置core-site.xml
vim core-site.xml ####################################################################### <configuration> <property> <name>hadoop.tmp.dir</name> <value>/data/tmp/hadoop/tmp</value> </property> <property> <name>fs.defaultFS</name> <value>hdfs://localhost:9000</value> </property> </configuration> ####################################################################### ## hadoop.tmp.dir,配置 hadoop 处理过程中,临时文件的存储位置。这里的目录/data/tmp/hadoop/tmp 需要提前创建。 mkdir -p /data/tmp/hadoop/tmp ## fs.defaultFS,配置 HDFS 文件系统的地址和端口
- 配置hdfs-site.xml
vim hdfs-site.xml ####################################################################### <configuration> <property> <name>dfs.namenode.name.dir</name> <value>/data/tmp/hadoop/hdfs/name</value> </property><property> <name>dfs.datanode.data.dir</name> <value>/data/tmp/hadoop/hdfs/data</value> </property> <property> <name>dfs.replication</name> <value>1</value> </property><property> <name>dfs.permissions.enabled</name> <value>false</value> </property> </configuration> ####################################################################### ## dfs.namenode.name.dir,配置 NameNode 元数据存储位置; ## dfs.datanode.data.dir,配置 DataNode 元数据存储位置; ## dfs.replication,配置每份数据备份数,由于目前我们使用 1 台节点,所以,设置为 1, ## 如果设置为 2 的话,运行会报错。 ## dfs.permissions.enabled,配置 hdfs 是否启用权限认证。 ## 另外/data/tmp/hadoop/hdfs 路径,需要提前创建 mkdir -p /data/tmp/hadoop/hdfs
- 配置workers
vim workers ## 将集群中 slave 角色的节点的主机名,添加进 workers 文件中。目前只有一台节点,所以 ## workers 文件内容为 ####################################################################### # localhost # #######################################################################
- 指定root用户
vim /apps/hadoop/sbin/start-dfs.sh ## 添加 ####################################################################### HDFS_DATANODE_USER=root HADOOP_SECURE_DN_USER=hdfs HDFS_NAMENODE_USER=root HDFS_SECONDARYNAMENODE_USER=root ####################################################################### vim /apps/hadoop/sbin/start-yarn.sh ## 添加 ####################################################################### YARN_RESOURCEMANAGER_USER=root HADOOP_SECURE_DN_USER=yarn YARN_NODEMANAGER_USER=root #######################################################################
- 格式化 HDFS 文件系统
hadoop namenode -format ## 如果没有报错,说明格式化成功。 切换目录到/apps/hadoop/sbin 目录下,启动 hadoop ## 的 hdfs 相关进程。 cd /apps/hadoop/sbin/ ./start-dfs.sh ####################################################################### # WARNING: HADOOP_SECURE_DN_USER has been replaced by # HDFS_DATANODE_SECURE_USER. Using value of HADOOP_SECURE_DN_USER. # Starting namenodes on [localhost] # 上一次登录:三 9月 16 22:30:40 CST 2020从 127.0.0.1pts/2 上 # localhost: namenode is running as process 22446. Stop it first. # Starting datanodes # 上一次登录:三 9月 16 22:33:45 CST 2020pts/1 上 # localhost: datanode is running as process 22608. Stop it first. # Starting secondary namenodes [hou] # 上一次登录:三 9月 16 22:33:48 CST 2020pts/1 上 # hou: secondarynamenode is running as process 22815. Stop it first. ####################################################################### ## 查看java进程 jps ####################################################################### # 22608 DataNode # 22446 NameNode # 7407 Jps # 22815 SecondaryNameNode ####################################################################### ## 进一步验证 HDFS 运行状态。先在 HDFS 上创建一个目录。 hadoop fs -mkdir /myhadoop ## 执行下面命令,查看目录是否创建成功 hadoop fs -ls / ####################################################################### # drwxr-xr-x - root supergroup 0 2020-09-16 17:17 /myhadoop # #######################################################################
- 下面来配置 MapReduce。配置mapred-site.xml
cd /apps/hadoop/etc/hadoop vim mapred-site.xml ####################################################################### <configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property><property> <name>yarn.application.classpath</name> <value>/apps/hadoop/etc/hadoop:/apps/hadoop/share/hadoop/commo n/lib/*:/apps/hadoop/share/hadoop/common/*:/apps/hadoop/share/ hadoop/hdfs:/apps/hadoop/share/hadoop/hdfs/lib/*:/apps/hadoop/ share/hadoop/hdfs/*:/apps/hadoop/share/hadoop/mapreduce/*:/app s/hadoop/share/hadoop/yarn:/apps/hadoop/share/hadoop/yarn/lib/ *:/apps/hadoop/share/hadoop/yarn/*</value> </property> </configuration> #######################################################################
- 配置yarn-site.xml
vim yarn-site.xml ####################################################################### <configuration> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> </configuration> ####################################################################### [if you can't run the latter mapreduce program ,try this](https://blog.csdn.net/weixin_44626279/article/details/108672681) ## 配置 NodeManager 上运行的附属服务。需配置成 mapreduce_shuffle,才可运行 ## MapReduce 程序。 ## 启动计算层面相关进程,切换到 hadoop 启动目录,执行一下命令启动 Yarn。 cd /apps/hadoop/sbin/ ./start-yarn.sh ####################################################################### Starting resourcemanager 上一次登录:三 9月 16 22:46:57 CST 2020从 36.23.94.7pts/1 上 Starting nodemanagers 上一次登录:三 9月 16 22:54:05 CST 2020pts/1 上 localhost: nodemanager is running as process 25422. Stop it first.Starting resourcemanager 上一次登录:三 9月 16 22:54:09 CST 2020pts/1 上 resourcemanager is running as process 7924. Stop it first. Starting nodemanagers 上一次登录:三 9月 16 22:57:37 CST 2020pts/1 上 localhost: nodemanager is running as process 25422. Stop it first.######################################################################### 输入jps,查看当前运行的进程,算上 jps,一共六个进程。 ## 如果缺少,则说明start-dfs.sh或者start-yarn.sh有启动失败的地方 ####################################################################### 7924 ResourceManager 8982 DataNode 9288 Jps 25422 NodeManager 22446 NameNode 22815 SecondaryNameNode #######################################################################
- 测试
## 切换到/apps/hadoop/share/hadoop/mapreduce 目录下。 cd /apps/hadoop/share/hadoop/mapreduce ## 在该目录下跑一个 mapreduce 程序,来检测一下 hadoop 是否能正常运行。 hadoop jar hadoop-mapreduce-examples-3.0.0.jar pi 3 3 ####################################################################### ## Number of Maps = 3 ## Samples per Map = 3 ## Wrote input for Map #0 ## Wrote input for Map #1 ## Wrote input for Map #2 ## Starting Job ## 2020-09-16 23:06:29,757 INFO client.RMProxy: Connecting to ResourceManager ## at /0.0.0.0:8032 ## 2020-09-16 23:06:32,294 INFO mapreduce.JobResourceUploader: Disabling ## ## Erasure Coding for path: /tmp/hadoop- ## yarn/staging/root/.staging/job_1600268050982_0001 ## 2020-09-16 23:06:33,368 INFO input.FileInputFormat: Total input files to ## process : 3 ## 2020-09-16 23:06:34,679 INFO mapreduce.JobSubmitter: number of splits:3 ## 2020-09-16 23:06:34,938 INFO Configuration.deprecation: ##yarn.resourcemanager.system-metrics-publisher.enabled is deprecated. ##Instead, ## use yarn.system-metrics-publisher.enabled ## Java HotSpot(TM) 64-Bit Server VM warning: INFO: ##os::commit_memory(0x00000000f4900000, 89128960, 0) failed; error='无法分配内存' (errno=12) ## # ## # There is insufficient memory for the Java Runtime Environment to continue. ## # Native memory allocation (mmap) failed to map 89128960 bytes for committing reserved memory. ## # An error report file with more information is saved as: # /apps/hadoop/share/hadoop/mapreduce/hs_err_pid9306.log####################################################################### ## 内存不足 pkill -9 java ## 重新启动 cd /apps/hadoop/sbin/ ./start-dfs.sh ./start-yarn.sh hadoop jar hadoop-mapreduce-examples-3.0.0.jar pi 3 3
- Web界面