实验
目的
要求
目的:
了解HDFS文件系统;掌握HDFS的架构及核心组件的职能;掌握HDFS数据的读写操作;HDFS常用操作(Shell。Java API)了解Hadoop2.0中HDFS相关的新特性
实
验
环
境
Java jdk 1.8;apache-maven-3.6.0;Myeclipse C10;Hadoop集群;
1、Test类测试连接;
关键代码:
package org.gy.myhadoop.mr; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.junit.Before; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.URI; import java.net.URISyntaxException; /** * * @author yanxukun * @date 2019年3月7日 上午11:46:23 */ public class Test { @Before public static void main(String[] args )throws Exception { System.out.println("Hello World!"); Configuration conf = new Configuration(); conf.set("fs.defaultFS","hdfs://192.168.10.111:9000"); FileSystem fs = null; fs = FileSystem.get(conf); fs.mkdirs(new Path("hdfs://192.168.10.111:9000/user/input/test2")); } }运行结果:
关键代码:
package org.gy.myhadoop.mr; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.junit.Before; import org.junit.After; import org.junit.Test; public class HDFSApp { public static final String HDFS_PATH = "hdfs://192.168.10.111:9000"; public static Configuration configuration = null; public static FileSystem fileSystem = null; @Before public void setUp() throws Exception { System.out.println("HDFSApp.setUp()"); configuration = new Configuration(); configuration.set("fs.defaultFS", "hdfs://192.168.10.111:9000"); fileSystem = FileSystem.get(configuration); } @After public void tearDown() throws Exception{ fileSystem = null; configuration = null; System.out.println("HDFSApp.tearDown()"); } @Test public void rename() throws Exception{ Path oldPath = new Path("hdfs://192.168.10.111:9000/user/test/a.txt"); Path newPath = new Path("hdfs://192.168.10.111:9000/user/test/b.txt"); System.out.println(fileSystem.rename(oldPath, newPath)); } @Test public void copyFromLocalFile() throws Exception{ Path src = new Path("C:/luke/hello.txt"); Path dist = new Path("hdfs://192.168.10.111:9000/user/test/"); fileSystem.copyFromLocalFile(src, dist); } @Test public void listFiles() throws Exception{ FileStatus[] listStatus = fileSystem.listStatus(new Path("hdfs://192.168.2.100:9000/user/test")); for (FileStatus fileStatus : listStatus) { String isDir = fileStatus.isDirectory() ? "文件夹" : "文件"; String permission = fileStatus.getPermission().toString(); short replication = fileStatus.getReplication(); long len = fileStatus.getLen(); String path = fileStatus.getPath().toString(); System.out.println(isDir+"\t"+permission+"\t"+replication+"\t"+len+"\t"+path); } } }运行结果:
1、SequenceFile文件的写操作;
关键代码:
package org.gy.myhadoop.mr; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.Date; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; public class SequenceFileWriter { private static Configuration configuration = new Configuration(); private static String url = "hdfs://192.168.10.111:9000"; private static String[] data = {"a,b,c,d,e,f,g","e,f,g,h,i,j,k","l,m,n,o,p,q,r,s","t,u,v,w,x,y,z"}; public static void main(String[] args) throws Exception { FileSystem fs = FileSystem.get(configuration); Path outputPath = new Path("MySequenceFile.seq"); IntWritable key = new IntWritable(); Text value = new Text(); SequenceFile.Writer writer = SequenceFile.createWriter(fs, configuration, outputPath, IntWritable.class, Text.class); for (int i=0;i<10;i++) { key.set(10-i); value.set(data[i