①官网下载Java开发工具安装包jdk-8u201-windows-x64.exe: https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html ②运行安装包,第一次选择JDK安装路径,第二次选择JRE安装路径,开始安装。 ③配置JDK的环境变量,Path添加JDK的\bin的安装路径,cmd命令行运行javac,查看是否成功安装配置。
链接: https://pan.baidu.com/s/1zi-XdBP2Q36GF64Np3cbcA 提取码: y51q
或者官网下载Eclipse https://www.eclipse.org/downloads/
Eclipse的安装比较简单,就不再记录了,我是参照百度经验安装的
① Maven官网: http://maven.apache.org/download.cgi 选择最近的镜像,选择Maven压缩包apache-maven-3.6.0-bin.tar.gz开始下载
②解压Maven压缩包apache-maven-3.6.0-bin.tar.gz,解压后的文件夹\apache-maven-3.6.0,将其考入自定义路径,如C:\eclipse\apache-maven-3.6.0。
我进行实验时选择了 apache-maven-3.6.1-bin.tar.gz 需要的自取 链接: https://pan.baidu.com/s/1foDlafuM0C4CX61KV4jmqg 提取码: 6b23
③配置Maven的环境变量,Path添加Maven的\bin的安装路径,cmd命令行运行mvn -v,查看是否成功安装配置
①修改settings.xml
在安装所在文件夹\apache-maven-3.6.0下面,新建\repository文件夹,作为Maven本地仓库。
在文件settings.xml里添加
<localRepository>C:\eclipse\apache-maven-3.6.0\repository</localRepository>②配置Maven的installation和User Settings
【Preferences】→【Maven】→【Installations】配置Maven安装路径,【User Settings】配置settings.xml的路径
③添加pom.xml依赖
依赖(Maven Repository: hadoop)所在网址 https://mvnrepository.com/tags/hadoop 找到对应版本的三个依赖(如下),拷贝至pom.xml的<project 与</project 之间,保存之后会自动生成Maven Dependencies
<dependencies> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.7.3</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.7.3</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.7.3</version> </dependency> </dependencies>安装配置成功后,Maven项目树显示如下两项:
(1)打开Eclipse,在Eclipse里新建中找到 Maven project 如图勾选 (2)如图,输入com.bla,项目名称HDFSTest,然后点击【Finish】 (3) 在自动生成Maven项目下新建包hdfs.files并在包里新建class:4个java文件
①HDFSMKdir.java 新建HDFS目录/aadir
package hdfs.files; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import java.io.IOException; public class HDFSMKdir { public static void main(String[] args) throws IOException { //设置root权限 System.setProperty("HADOOP_USER_NAME", "root"); //创建HDFS连接对象client Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://bigdata128:9000"); FileSystem client = FileSystem.get(conf); //在HDFS的根目录下创建aadir client.mkdirs(new Path("/aadir")); //关闭连接对象 client.close(); //输出"successful!" System.out.println("successfully!"); } }②HDFSUpload.java 写入/上传 本地文件c:\hdfs\aa.txt 到HDFS的/aadir目录下
package hdfs.files; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; public class HDFSUpload { //声明输入流、输出流 private static InputStream input; private static OutputStream output; public static void main(String[] args) throws IOException { //设置root权限 System.setProperty("HADOOP_USER_NAME", "root"); //创建HDFS连接对象client Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://bigdata128:9000"); FileSystem client = FileSystem.get(conf); //创建本地文件的输入流 input = new FileInputStream("D:\\aa.txt"); //创建HDFS的输出流 output = client.create(new Path("/aadir/aaout.txt")); //写文件到HDFS byte[] buffer = new byte[1024]; int len = 0; while ((len = input.read(buffer)) != -1) { output.write(buffer, 0, len); } //防止输出数据不完整 output.flush(); //使用工具类IOUtils上传或下载 //IOUtils.copy(input, output); //关闭输入输出流 input.close(); output.close(); System.out.println("成功!"); } }③HDFSDownload.java读/下载 HDFS的根目录文件/bb.txt 到本地c:\hdfs目录下
package hdfs.files; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import java.io.*; public class HDFSDownload { //声明输入流、输出流 private static InputStream input; private static OutputStream output; public static void main(String[] args) throws IOException { //设置root权限 System.setProperty("HADOOP_USER_NAME", "root"); //创建HDFS连接对象client Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://bigdata128:9000"); FileSystem client = FileSystem.get(conf); //创建本地文件的输出流 output = new FileOutputStream("d:\\bbout.txt"); //创建HDFS的输入流 input = client.open(new Path("/aadir/aaout.txt")); //写文件到HDFS byte[] buffer = new byte[1024]; int len = 0; while ((len = input.read(buffer)) != -1) { output.write(buffer, 0, len); } //防止输出数据不完整 output.flush(); //使用工具类IOUtils上传或下载 //IOUtils.copy(input, output); //关闭输入输出流 input.close(); output.close(); System.out.println("成功!"); } }④HDFSFileIfExist.java 查看HDFS文件/bb.txt是否存在
package hdfs.files; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import java.io.IOException; public class HDFSFilelfExist { public static void main(String[] args) throws IOException { //设置root权限 System.setProperty("HADOOP_USER_NAME", "root"); //创建HDFS连接对象client Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://bigdata128:9000"); FileSystem client = FileSystem.get(conf); //声明文件对象 String fileName = "/aadir/aaout.txt"; //判断文件是否存在 if (client.exists(new Path(fileName))) { System.out.println("文件存在!"); } else { System.out.println("文件不存在!"); } } }WordCountDriver.java
package hdfs.files; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; public class WordCountDriver { public static class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable>{ protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] words=line.split(" "); for(String w:words) { context.write(new Text(w), new IntWritable(1)); } } } public static class WordCountReducer extends Reducer <Text, IntWritable, Text, IntWritable>{ protected void reduce(Text Key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum=0; for(IntWritable v:values) { sum +=v.get(); } context.write(Key, new IntWritable(sum)); } } public static void main(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException { System.setProperty("HADOOP_USER_NAME", "root"); Configuration conf=new Configuration(); Job job=Job.getInstance(conf); job.setJarByClass(WordCountDriver.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, new Path("/usr/local/hdfs/input/cc.txt")); FileOutputFormat.setOutputPath(job, new Path("/usr/local/hdfs/output")); Boolean rs=job.waitForCompletion(true); System.exit(rs?0:1); } }