这篇文章主要介绍了hadoop2x WordCount MapReduce怎么用,具有一定借鉴价值,感兴趣的朋友可以参考下,希望大家阅读完这篇文章之后大有收获,下面让小编带着大家一起了解一下。
package com.jhl.haoop.examples;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount {
// map区域
public static class TokenizerMapper extends
Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);//每个单词统计一次
private Text word = new Text();
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//进行分割 [空格 制表符 \t 换行 \n 回车符\r \f]
// public StringTokenizer(String str) {
//this(str, " \t\n\r\f", false);
// }
StringTokenizer itr = new StringTokenizer(value.toString());//获取每行数据的值value.toString()
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());//设置map输出的key值
context.write(word, one);//上下文输出map的key和value值
}
}
}
//reduce 区域
public static class IntSumReducer extends
Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {//循环遍历Iterable
sum += val.get();//累加
}
result.set(sum);//设置总次数
context.write(key, result);
}
}
//client区域
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();//获取配置信息
//GenericOptionsParser 用来常用的Hadoop命令选项,并根据需要,为Configuration对象设置相应的取值。
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: wordcount ");
System.exit(2);
}
Job job = new Job(conf, "WordCount");//创建Job、设置Job配置和名称
job.setJarByClass(WordCount.class);//设置Job 运行的类
job.setMapperClass(TokenizerMapper.class);//设置Mapper类和Reducer类
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));//设置输入文件的路径和输出文件的路径
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
job.setOutputKeyClass(Text.class);//设置输出结果的key和value类型
job.setOutputValueClass(IntWritable.class);
boolean isSuccess = job.waitForCompletion(true);//提交Job,等待运行结果,并在客户端显示运行信息
System.exit(isSuccess ? 0 : 1);//结束程序
}
}
感谢你能够认真阅读完这篇文章,希望小编分享的“hadoop2x WordCount MapReduce怎么用”这篇文章对大家有帮助,同时也希望大家多多支持天达云,关注天达云行业资讯频道,更多相关知识等着你来学习!