1、安装配置JDK ①官网下载Java开发工具安装包jdk-8u201-windows-x64.exe:https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html ②运行安装包,第一次选择JDK安装路径,第二次选择JRE安装路径,开始安装。 ③配置JDK的环境变量,Path添加JDK的\bin的安装路径,cmd命令行运行javac,查看是否成功安装配置。 2、安装Eclipse ①官网下载Eclipse安装包eclipse-inst-win64:https://www.eclipse.org/downloads/ ②运行安装包,选择Eclipse IDE for Java Developers,开始安装。过程可能较慢。 3、安装Maven ① Maven官网http://maven.apache.org/download.cgi,选择最近的镜像,选择Maven压缩包apache-maven-3.6.0-bin.tar.gz开始下载。 ②解压Maven压缩包apache-maven-3.6.0-bin.tar.gz,解压后的文件夹\apache-maven-3.6.0,将其考入自定义路径,如C:\eclipse\apache-maven-3.6.0。 ③配置Maven的环境变量,Path添加Maven的\bin的安装路径,cmd命令行运行mvn -v,查看是否成功安装配置。 4、Eclipse配置Maven ①修改settings.xml 在安装所在文件夹\apache-maven-3.6.0下面,新建\repository文件夹,作为Maven本地仓库。在文件settings.xml里添加 C:\eclipse\apache-maven-3.6.0\repository。 ②配置Maven的installation和User Settings 【Preferences】→【Maven】→【Installations】配置Maven安装路径,【User Settings】配置settings.xml的路径。 ③添加pom.xml依赖 依赖(Maven Repository: hadoop)所在网址:https://mvnrepository.com/tags/hadoop ,找到对应版本的三个依赖(如下),拷贝至pom.xml的与之间,保存之后自动生成Maven Dependencies。
<dependencies> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.7.3</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.7.3</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.7.3</version> </dependency> </dependencies>安装配置成功后,Maven项目树显示如下两项: 5、在Eclipse里新建Maven Project 在Eclipse里新建Maven Project 输入com.bla,项目名称HDFSTest。 自动生成Maven项目,新建包hdfs.files,包里新建class:4个java文件如下图所示 6、HDFS的Java程序 一、HDFSMKdir.java新建HDFS目录/aadir。
package hdfs.files; import java.io.File; import java.io.IOException; import org.apache.hadoop.fs.*; import org.apache.log4j.BasicConfigurator; public class HDFSMKdir { public static void main(String[] args) throws IOException{ BasicConfigurator.configure(); System.setProperty("HADOOP_USER_NAME", "root"); org.apache.hadoop.conf.Configuration conf=new org.apache.hadoop.conf.Configuration(); conf.set("fs.defaultFS", "hdfs://192.168.178.131:9000"); FileSystem client=FileSystem.get(conf); client.mkdirs(new Path("/aadir1")); client.close(); System.out.println("successfully!"); } }二、HDFSUpload.java写入/上传 本地文件c:\hdfs\aa.txt 到HDFS的/aadir目录下。`
package hdfs.files; import java.io.FileInputStream; import java.io.*; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.http.impl.io.IdentityInputStream; import org.apache.log4j.BasicConfigurator;; public class HDFSUpload { private static InputStream input; private static OutputStream output; public static void main(String[] args) throws IOException { BasicConfigurator.configure(); System.setProperty("HADOOP_USER_NAME", "root"); org.apache.hadoop.conf.Configuration conf=new org.apache.hadoop.conf.Configuration(); conf.set("fs.defaultFS", "hdfs://192.168.178.131:9000"); FileSystem client=FileSystem.get(conf); input =new FileInputStream("d:\\hdfs\\aa.txt"); output =client.create(new Path("aadir/aaout1.txt")); byte[] buffer=new byte[1024]; int len=0; while((len=input.read(buffer))!=-1) { output.write(buffer,0,len); } output.flush(); input.close(); output.close(); } }三、HDFSDownload.java读/下载 HDFS的根目录文件/bb.txt 到本地c:\hdfs目录下。
package hdfs.files; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.log4j.BasicConfigurator; public class HDFSDownload { private static InputStream input; private static OutputStream output; public static void main(String[] args) throws IOException { // TODO Auto-generated method stub BasicConfigurator.configure(); System.setProperty("HADOOP_USER_NAME", "root"); Configuration conf=new Configuration(); conf.set("fs.defaultFS", "hdfs://192.168.178.131:9000"); FileSystem client=FileSystem.get(conf); input=new FileInputStream("D:\\hdfs\\bbout.txt"); output=client.create(new Path("/bb.txt")); byte[] buffer=new byte[1024]; int len=0; while((len=input.read(buffer))!=-1) { output.write(buffer,0,len); } output.flush(); input.close(); output.close(); } }四、HDFSFileIfExist.java查看HDFS文件/bb.txt是否存在。
package hdfs.files; import java.io.IOException; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.log4j.BasicConfigurator; public class HDFSFileIfExist { public static void main(String[] args) throws IOException{ BasicConfigurator.configure(); System.setProperty("HADOOP_USER_NAME", "root"); org.apache.hadoop.conf.Configuration conf=new org.apache.hadoop.conf.Configuration(); conf.set("fs.defaultFS", "hdfs://192.168.178.131:9000"); FileSystem client=FileSystem.get(conf); String fileName="/bb.txt"; if(client.exists(new Path(fileName))) { System.out.println("文件存在!"); }else { System.out.println("文件不存在!"); } } }实验步骤
实验步骤 1.搭建Hadoop伪分布式模式、或者完全分布式模式 2.Windows里安装配置JDK+Eclipse+Maven 3.在Eclipse里新建Maven Project,新建包,新建class 4.编程上述4个Java程序 5.虚拟机的namenode主机上,启动Hadoop 6.在eclipse里运行上述4个Java程序 7.使用hdfs的shell命令查看运行结果 8.使用web console查看运行结果