    一、预备 安装java环境,jdk1.8.0_171 首先去Oracle下载jdk,复制到虚拟机

    root@hadoopmaster module]# pwd /opt/module [root@hadoopmaster module]# ll total 186424 drwxr-xr-x. 8 root root 4096 Mar 28 17:18 jdk1.8.0_171 -rwxrw-rw-. 1 root root 190890122 Jun 1 06:10 jdk-8u171-linux-x64.tar.gz [root@hadoopmaster module]#


    [root@hadoopmaster module]# vi /etc/profile


    #JAVA_HOME export JAVA_HOME=/opt/module/jdk1.8.0_171 export PATH=${PATH}:${JAVA_HOME}/bin


    [root@hadoopmaster module]# source /etc/profile


    [root@hadoopmaster module]# java -version java version "1.8.0_171" Java(TM) SE Runtime Environment Java HotSpot(TM) 64-Bit Server VM

    jdk环境搭好了,下面安装hadoop 二、单机模式 1、下载Hadoop-2.6.5.tar.gz 2、安装 复制到虚拟机:

    [root@hadoopmaster module]# pwd /opt/module [root@hadoopmaster module]# ll total 194964 -rwxrw-rw-. 1 root root 199635269 Aug 8 23:19 hadoop-2.6.5.tar.gz drwxr-xr-x. 8 root root 4096 Mar 28 17:18 jdk1.8.0_171 [root@hadoopmaster module]#


    [root@hadoopmaster module]# tar -zxvf hadoop-2.6.5.tar.gz


    [root@hadoopmaster module]# vi /etc/profile #HADOOP_HOME export HADOOP_HOME=/opt/module/hadoop-2.6.5 export PATH=${PATH}:${HADOOP_HOME}/bin [root@hadoopmaster module]# source /etc/profile

    查看是否设置成功:[root@hadoopmaster module]# hadoop version Hadoop 2.6.5 Subversion https://github.com/apache/hadoop.git -r e8c9fe0b4c252caf2ebf1464220599650f119997 Compiled by sjlee on 2016-10-02T23:43Z 配置成功了,但是有一点需要注意,在hadoop环境配置文件中需要将JAVA_HOME由原来${JAVA_HOME}换成具体路径,这样在集群环境中才不会出现问题:

    [root@hadoopmaster module]# vi /opt/module/hadoop-2.6.5/etc/hadoop/hadoop-env.sh export JAVA_HOME=/opt/module/jdk1.8.0_171

    保存,测试:`[root@hadoopmaster module]# hadoop version Hadoop 2.6.5 Subversion https://github.com/apache/hadoop.git -r e8c9fe0b4c252caf2ebf1464220599650f119997 Compiled by sjlee on 2016-10-02T23:43Z Compiled with protoc 2.5.0 From source with checksum f05c9fa095a395faa9db9f7ba5d754

    3、测试 接下来需要运行一个实例来进行测试: 进入hadoop根目录:

    [root@hadoopmaster hadoop-2.6.5]# pwd /opt/module/hadoop-2.6.5 [root@hadoopmaster hadoop-2.6.5]# ll total 116 drwxrwxr-x. 2 j j 4096 Oct 2 2016 bin drwxrwxr-x. 3 j j 19 Oct 2 2016 etc drwxrwxr-x. 2 j j 101 Oct 2 2016 include drwxrwxr-x. 3 j j 19 Oct 2 2016 lib drwxrwxr-x. 2 j j 4096 Oct 2 2016 libexec -rw-rw-r–. 1 j j 84853 Oct 2 2016 LICENSE.txt -rw-rw-r–. 1 j j 14978 Oct 2 2016 NOTICE.txt -rw-rw-r–. 1 j j 1366 Oct 2 2016 README.txt drwxrwxr-x. 2 j j 4096 Oct 2 2016 sbin drwxrwxr-x. 4 j j 29 Oct 2 2016 share


    [root@hadoopmaster hadoop-2.6.5]# mkdir input [root@hadoopmaster hadoop-2.6.5]# ll total 116 drwxrwxr-x. 2 j j 4096 Oct 2 2016 bin drwxrwxr-x. 3 j j 19 Oct 2 2016 etc drwxrwxr-x. 2 j j 101 Oct 2 2016 include drwxr-xr-x. 2 root root 6 Aug 10 23:18 input drwxrwxr-x. 3 j j 19 Oct 2 2016 lib drwxrwxr-x. 2 j j 4096 Oct 2 2016 libexec -rw-rw-r–. 1 j j 84853 Oct 2 2016 LICENSE.txt -rw-rw-r–. 1 j j 14978 Oct 2 2016 NOTICE.txt -rw-rw-r–. 1 j j 1366 Oct 2 2016 README.txt drwxrwxr-x. 2 j j 4096 Oct 2 2016 sbin drwxrwxr-x. 4 j j 29 Oct 2 2016 share


    [root@hadoopmaster hadoop-2.6.5]# cd input [root@hadoopmaster input]# ll total 40 -rwxrw-rw-. 1 root root 39654 Aug 9 08:28 log.txt


    [root@hadoopmaster hadoop-2.6.5]# hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.5.jar wordcount input output input是文件输入的文件夹,output是结果输出文件夹 需要注意的是output文件夹不能存在,存在就会报错 可以看到,运行成功,输出目录在haoop目录下的output文件夹中


    [root@hadoopmaster hadoop-2.6.5]# cd output [root@hadoopmaster output]# ll total 12 -rw-r–r--. 1 root root 9295 Aug 10 23:22 part-r-00000 -rw-r–r--. 1 root root 0 Aug 10 23:22 _SUCCESS [root@hadoopmaster output]# vi part-r-00000 #结果,GET请求61次,post请求282次。 “-” 17 "CONNECT 2 "GET 61 "OPTIONS 10 "POST 282

    这样,单机模式就搭建完成了! 二、伪分布模式 前面安装教程和单机模式一模一样,但是需要修改一些配置文件: 目录:/opt/module/hadoop-2.6.5/etc/hadoop/ 修改core-site.xml:

    [root@hadoopmaster hadoop]# vi core-site.xml

    <configuration> <!-- 指定HDFS中NameNode的地址 --> <property> <name>fs.defaultFS</name> <!--hadoopmaster是我的主机名,可以换成ip或localhost--> <value>hdfs://hadoopmaster:9000</value> </property> <property> <!--这个配置是将hadoop的临时目录改成自定义的目录下--> <name>hadoop.tmp.dir</name> <value>/opt/module/hadoop-2.6.5/data/tmp</value> </property> </configuration>


    [root@hadoopmaster hadoop]# vi hdfs-site.xml

    基于yarn的完全分布式模式 环境 首先将单机模式中的环境搭建好并且克隆多台虚拟机设备,这里使用三台来搭建分布式集群

    修改静态ip 将三台虚拟机都启动起来,进行静态ip设置: 进入目录,查看网卡名称:

    [root@hadoop001 hadoop-2.6.5]# cd /etc/sysconfig/network-scripts [root@hadoop001 network-scripts]# ll total 248 -rw-r–r--. 1 root root 603 Aug 9 03:44 ifcfg-eno16777736 -rw-r–r--. 1 root root 254 Jan 2 2018 ifcfg-lo lrwxrwxrwx. 1 root root 24 Jun 2 03:01 ifdown -> …/…/…/usr/sbin/ifdown


    [root@hadoop001 hadoop-2.6.5]# hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.5.jar wordcount /user/data/input/ /user/data/output 18/08/11 02:11:45 INFO client.RMProxy: Connecting to ResourceManager at hadoop002/ 18/08/11 02:11:46 INFO input.FileInputFormat: Total input paths to process : 1 18/08/11 02:11:46 INFO mapreduce.JobSubmitter: number of splits:1 18/08/11 02:11:47 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1533978355521_0001 18/08/11 02:11:47 INFO impl.YarnClientImpl: Submitted application application_1533978355521_0001 18/08/11 02:11:47 INFO mapreduce.Job: The url to track the job: http://hadoop002:8088/proxy/application_1533978355521_0001/ 18/08/11 02:11:47 INFO mapreduce.Job: Running job: job_1533978355521_0001 18/08/11 02:12:22 INFO mapreduce.Job: Job job_1533978355521_0001 running in uber mode : false 18/08/11 02:12:22 INFO mapreduce.Job: map 0% reduce 0% 18/08/11 02:12:45 INFO mapreduce.Job: map 100% reduce 0% 18/08/11 02:12:55 INFO mapreduce.Job: map 100% reduce 100% 18/08/11 02:12:55 INFO mapreduce.Job: Job job_1533978355521_0001 completed successfully 18/08/11 02:12:55 INFO mapreduce.Job: Counters: 49 File System Counters FILE: Number of bytes read=10812 FILE: Number of bytes written=236323 FILE: Number of read operations=0 FILE: Number of large read operations=0 FILE: Number of write operations=0 HDFS: Number of bytes read=39764 HDFS: Number of bytes written=9295 HDFS: Number of read operations=6 HDFS: Number of large read operations=0 HDFS: Number of write operations=2 Job Counters Launched map tasks=1 Launched reduce tasks=1 Data-local map tasks=1 Total time spent by all maps in occupied slots (ms)=19409 Total time spent by all reduces in occupied slots (ms)=6661 Total time spent by all map tasks (ms)=19409 Total time spent by all reduce tasks (ms)=6661 Total vcore-milliseconds taken by all map tasks=19409 Total vcore-milliseconds taken by all reduce tasks=6661 Total megabyte-milliseconds taken by all map tasks=19874816 Total megabyte-milliseconds taken by all reduce tasks=6820864 Map-Reduce Framework Map input records=372 Map output records=3686 Map output bytes=54398 Map output materialized bytes=10812 Input split bytes=110 Combine input records=3686 Combine output records=388 Reduce input groups=388 Reduce shuffle bytes=10812 Reduce input records=388 Reduce output records=388 Spilled Records=776 Shuffled Maps =1 Failed Shuffles=0 Merged Map outputs=1 GC time elapsed (ms)=225 CPU time spent (ms)=2320 Physical memory (bytes) snapshot=397717504 Virtual memory (bytes) snapshot=4202889216 Total committed heap usage (bytes)=276824064 Shuffle Errors BAD_ID=0 CONNECTION=0 IO_ERROR=0 WRONG_LENGTH=0 WRONG_MAP=0 WRONG_REDUCE=0 File Input Format Counters Bytes Read=39654 File Output Format Counters Bytes Written=9295

