JAVA程序实现mapreduce的wordcount

xiaoxiao2023-11-18 117

JAVA程序实现mapreduce的wordcount

程序代码：

package hdfs.files; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; public class WordCountDriver { public static class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable>{ protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] words=line.split(" "); for(String w:words) { context.write(new Text(w), new IntWritable(1)); } } } public static class WordCountReducer extends Reducer <Text, IntWritable, Text, IntWritable>{ protected void reduce(Text Key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum=0; for(IntWritable v:values) { sum +=v.get(); } context.write(Key, new IntWritable(sum)); } } public static void main(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException { System.setProperty("HADOOP_USER_NAME", "root"); Configuration conf=new Configuration(); Job job=Job.getInstance(conf); job.setJarByClass(WordCountDriver.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, new Path("/usr/local/hdfs/input/cc.txt")); FileOutputFormat.setOutputPath(job, new Path("/usr/local/hdfs/output")); Boolean rs=job.waitForCompletion(true); System.exit(rs?0:1); } }

最新回复(0)