HDFS

    xiaoxiao2025-06-07  13

    安装 请点击下载hadoop-2.6.4

    解压到 /home/hdfs/hadoop

    useradd hdfs // 创建hdfs用户 passwd hdfs // 设置hdfs用户的登陆密码 su hdfs // 切换到hdfs用户登陆 cd ~ // 进入hdfs的家目录 rz // 上传windows本地的文件到此家目录 tar -zxvf hadoop-2.6.4.tar.gz // 解压安装包 mv hadoop-2.6.4 hadoop // 重命名解压后的文件名

    hdfs机器免密验证设置 cd ~/.ssh ssh-keygen -t rsa // 一路摁回车 id_rsa // 私钥 id_rsa.pub // 公钥 cat id_rsa.pub >> authorized_keys // 在master上将公钥放到authorized_keys(已认证的keys)里 sudo scp authorized_keys hdfs@192.168.153.129:~/.ssh // 将master上的authorized_keys放到其他(需要免密的节点)linux的~/.ssh目录下 sudo scp authorized_keys hdfs@192.168.153.128:~/.ssh // 将master上的authorized_keys放到其他(需要免密的节点)linux的~/.ssh目录下 chmod 644 authorized_keys // 修改钥的权限

    设置Java环境变量 vi ~/bashrc export JAVA_HOME=/java_install_dir/jdk1.8.0_91 export JRE_HOME=/java_install_dir/jdk1.8.0_91/jre export CLASSPATH=.: C L A S S P A T H : CLASSPATH: CLASSPATH:JAVA_HOME/lib: J R E H O M E / l i b e x p o r t P A T H = JRE_HOME/lib export PATH= JREHOME/libexportPATH=PATH: J A V A H O M E / b i n : JAVA_HOME/bin: JAVAHOME/bin:JRE_HOME/bin source ~/.bashrc // 让配置生效 echo $JAVA_HOME /java_install_dir/jdk1.8.0_91

    配置 *.xml 配置hadoop-env.sh export JAVA_HOME=/java_install_dir/jdk1.8.0_91 1 配置 yarn-env.sh export JAVA_HOME=/java_install_dir/jdk1.8.0_91 1 配置 core-site.xml

    <configuration> <property> <name>fs.defaultFS</name> <value>hdfs://wtf-ubuntu:9000</value> </property> <property> <name>io.file.buffer.size</name> <value>131072</value> </property> <property> <name>hadoop.tmp.dir</name> <value>file:/home/hdfs/hadoop/tmp</value> <description>Abasefor other temporary directories.</description> </property> <property> <name>hadoop.proxyuser.spark.hosts</name> <value>*</value> </property> <property> <name>hadoop.proxyuser.spark.groups</name> <value>*</value> </property> </configuration>

    配置hdfs-site.xml dfs.namenode.secondary.http-address wtf-ubuntu:9001

    <property> <name>dfs.namenode.name.dir</name> <value>file:/home/hdfs/hadoop/name</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>file:/home/hdfs/hadoop/data</value> </property> <property> <name>dfs.replication</name> <value>3</value> </property> <property> <name>dfs.webhdfs.enabled</name> <value>true</value> </property>

    配置 mapred-site.xml 文件

    cp mapred-site.xml.template mapred-site.xml // 复制一份模板重命名为 mapred-site.xml <configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <name>mapreduce.jobhistory.address</name> <value>wtf-ubuntu:10020</value> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>wtf-ubuntu:19888</value> </property> </configuration>

    配置 yarn-site.xml

    <configuration> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.resourcemanager.address</name> <value>wtf-ubuntu:8032</value> </property> <property> <name>yarn.resourcemanager.scheduler.address</name> <value>wtf-ubuntu:8030</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address</name> <value>wtf-ubuntu:8035</value> </property> <property> <name>yarn.resourcemanager.admin.address</name> <value>wtf-ubuntu:8033</value> </property> <property> <name>yarn.resourcemanager.webapp.address</name> <value>wtf-ubuntu:8088</value> </property>

    启动 格式化NameNode ./bin/hdfs namenode -format 1 操作HDFS ./sbin/start-dfs.sh ./sbin/stop-dfs.sh ./sbin/start-yarn.sh ./sbin/stop-yarn.sh ./bin/hdfs dfsadmin -report

    ./bin/hadoop fs -mkdir /tmp ./bin/hadoop fs -mkdir /tmp/input ./bin/hadoop fs -ls /tmp/ ./bin/hadoop fs -put logs/ /tmp ./bin/hadoop fs -ls /tmp/ ./bin/hadoop fs -cat /tmp/logs/ ./bin/hadoop fs -cat /tmp/logs/hadoop-hdfs-namenode-wtf-ubuntu.log

    最新回复(0)