HDFS命令和Java API

    xiaoxiao2022-07-02  97

    HDFS命令和Java API

    一、安装eclipse

    1、官网下载Eclipse安装包eclipse-inst-win64:https://www.eclipse.org/downloads/ 2、运行安装包,选择Eclipse IDE for Java Developers,开始安装。

    二、安装Maven

    1、Maven官网http://maven.apache.org/download.cgi,选择最近的镜像,选择Maven压缩包apache-maven-3.6.0-bin.tar.gz开始下载。 2、解压Maven压缩包apache-maven-3.6.0-bin.tar.gz,解压后的文件夹\apache-maven-3.6.0,将其考入自定义路径,如C:\eclipse\apache-maven-3.6.0。 3、配置Maven的环境变量,Path添加Maven的\bin的安装路径,cmd命令行运行mvn -v,查看是否成功安装配置。

    三、Eclipse配置Maven

    1、修改settings.xml 在安装所在文件夹\apache-maven-3.6.0下面,新建\repository文件夹,作为Maven本地仓库。在文件settings.xml里添加 C:\eclipse\apache-maven-3.6.0\repository。 2、配置Maven的installation和User Settings 【Preferences】→【Maven】→【Installations】配置Maven安装路径,【User Settings】配置settings.xml的路径。 3、添加pom.xml依赖 依赖(Maven Repository: hadoop)所在网址:https://mvnrepository.com/tags/hadoop ,找到对应版本的三个依赖(如下),拷贝至pom.xml的与之间,保存之后自动生成Maven Dependencies。 pom.xml代码:

    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>download</groupId> <artifactId>Download</artifactId> <version>0.0.1-SNAPSHOT</version> <dependencies> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.7.3</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.7.3</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.7.3</version> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <version>3.1</version> <configuration> <source>1.8</source> <target>1.8</target> </configuration> </plugin> <plugin> <artifactId> maven-assembly-plugin </artifactId> <configuration> <descriptorRefs> <descriptorRef>jar-with-dependencies</descriptorRef> </descriptorRefs> <archive> <manifest> <mainClass>hdfs.files.HDFSDownload</mainClass> </manifest> </archive> </configuration> <executions> <execution> <id>make-assembly</id> <phase>package</phase> <goals> <goal>single</goal> </goals> </execution> </executions> </plugin> </plugins> </build> </project>

    四、在Eclipse里新建Maven Project

    五、HDFS的Java程序

    HDFSMKdir.java新建HDFS目录/aadir

    package hdfs.files; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class HDFSMKdir { public static void main(String[] args) throws IOException { //设置root权限 System.setProperty("HADOOP_USER_NAME", "root"); //创建HDFS连接对象client Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://hadoop:9000"); FileSystem client = FileSystem.get(conf); //在HDFS的根目录下创建目录aadir client.mkdirs(new Path("/aadir")); //关闭连接对象 client.close(); //输出“successfull” System.out.println("successfully!"); } }

    HDFSUpload.java写入/上传 本地文件c:\hdfs\aa.txt 到HDFS的/aadir目录下。

    package hdfs.files; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class HDFSUpload { private static InputStream input; private static OutputStream output; public static void main(String[] args) throws IOException{ System.setProperty("HADOOP_USER_NAME", "root"); Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://hadoop:9000"); FileSystem client = FileSystem.get(conf); input = new FileInputStream("/usr/local/hdfs/aa.txt"); output = client.create(new Path("/aadir/aaout.txt")); byte[] buffer = new byte[1024]; int len = 0; while ((len=input.read(buffer))!=-1){ output.write(buffer, 0, len); } output.flush(); //IOUtils.copy(input, output); input.close(); output.close(); } }

    HDFSDownload.java读/下载 HDFS的根目录文件/bb.txt 到本地c:\hdfs目录下。

    package hdfs.files; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class HDFSDownload { // 声明输入流、输出流 private static InputStream input; private static OutputStream output; public static void main(String[] args) throws IOException { //设置root权限 System.setProperty("HADOOP_USER_NAME", "root"); //创建HDFS连接对象client Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://hadoop:9000"); FileSystem client = FileSystem.get(conf); //创建本地文件的输入流 input = new FileInputStream("/usr/local/hdfs/bbout.txt"); //创建HDFS的输出流 output = client.create(new Path("/bb.txt")); //写文件到HDFS byte[] buffer = new byte[1024]; int len = 0; while ((len=input.read(buffer))!=-1){ output.write(buffer, 0, len); } //防止输出数据不完整 output.flush(); //使用工具类IOUtils上传或下载 //IOUtils.copy(input, output); //关闭输入输出流 input.close(); output.close(); } }

    HDFSFileIfExist.java查看HDFS文件/bb.txt是否存在。

    package hdfs.files; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class HDFSFileIfExist { public static void main(String[] args) throws IOException{ System.setProperty("HADOOP_USER_NAME", "root"); Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://hadoop:9000"); FileSystem client = FileSystem.get(conf); String fileName = "/bb.txt"; if (client.exists(new Path(fileName))) { System.out.println("seccessfully!"); }else { System.out.println("file no exist!"); } } }

    运行Java程序,运行成功后cmd中打包(命令:mvn assembly:assembly),通过xftp打包上传到xshell

    六、在xshell中运行Java程序

    最新回复(0)