- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
台风|拒绝惊天逆转,中国00后10-7晋级世锦赛16强,9-2赢赛点被连追5局
-
『艺迹育儿』还说孕妈生孩子娇气吗?产房男人可以陪同吗?,了解顺产过程的痛
-
柠檬树|家庭养花不如种树,盆栽柠檬、清香木、九里香,屋里四季飘香
-
科技匠 内忧外患,我们应该怎么办?,科技产品屡屡被限制
-
土豆的7种花样做法,赶紧收藏,解决近期做饭吃饭大难题
-
电竞|“只要你每天练习8小时,也可以这么优秀,王者有手就行”
-
-
-
IT之家14锐龙版0点预售,3999元:联想小新Air
-
电鳗快报|美瑞新材IPO疑云:低价向实控人原东家买材料被输送利润?
-
『央视』累计确诊2792例,泰国新增27例新冠肺炎确诊病例
-
牛肝菌和什么在一起吃最好?牛肝菌怎么做好吃?能不能红烧或者炖汤?
-
家族战队|华为、苹果、小米、OPPO、vivo手机报价
-
-
#苏州口腔医院牙医陆博#牙齿痛得要死了,为什么医生还不让拔?
-
暮年|@所有人:一份来自2020深圳跨境电商供应链博览会的邀请函
-
-
[火星]火星有座奥林匹斯山,如果搬到地球,估计没人能爬到峰顶
-
-