- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
直观视界|12在5000元以下,在5G的加持下,还能再创辉煌吗?,如果iPhone
-
[软件]联合国突然宣布,马云始料未及,腾讯也猝不及防!
-
-
充电头网 PD快充移动电源上市,打造国潮数码精品,故宫元素30W
-
#特朗普#新冠确诊150万,美国再遇突发重大紧急事件,洛杉矶爆炸腾蘑菇云
-
回忆传奇那些青春@热血传奇176复古金币武器喝幸运7的小技巧
-
【腾讯】“有图有真相”未必靠谱?看腾讯如何告赢这起侵权案
-
-
制造业|8月财新制造业PMI创新高,经济复苏向好动力强劲
-
张庭|张庭夫妇相关传销案听证会排期中,所涉金额或逾100亿元
-
视线|画面触目惊心!女子惨遭货车碾压身亡,事发时她正在……
-
好喜欢这个“爹”!钟汉良藏海传造型,留了胡子,权谋感满满!
-
-
3DMGAME|纪念碑谷团队新作《阿尔芭:野生动物冒险》12月12日登陆iOS/PC
-
-
2023美妆口碑榜深度解读 从裸妆美拉德卷出的彩妆潮流
-
讯美传媒柏康|十款便宜又好用的面膜排行榜,面膜哪个牌子好用不贵
-
相亲,余磊,约会|相亲宴中,女人“拖家带口”去蹭饭,男人逃单:走为上计绝不扶贫
-
-