- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
小镇一哥|中长健康李海涛:京东:翻身仗后走向国际化!
-
-
苏-30|这次不是印巴!亚洲两国出动重炮疯狂对轰,苏30升空直扑敏感地带
-
-
-
人民政协网|幸福东北 | 大连:打造人才“幸福之城”
-
母婴参考|孕妈都尽量别用这类物品,伤身不说,还易造成胎儿缺氧,天气再热
-
幸福一箩筐|去年被吐槽全是竖排镜头设计,小米今年火力全开,后盖设计玩出花
-
-
橙子宝妈育儿■专家给3个方案解决亲子冲突,“被孩子骂得离家出走”朋友哭诉
-
曝陈若琳颜值高也难嫁霍启山,原生家庭复杂,非门当户对
-
-
游戏百晓生|腾讯成立洛杉矶工作室,鹅厂次时代3A沙盒主机游戏要来了
-
对你宣说|萌到爆!《以家人之名》李尖尖小时候扮演者小葱花,到底什么来历
-
澎湃新闻|开学、中秋、国庆将近,多地提示鸡蛋价格上涨
-
「汉中陶大夫」三八妇女节来临之前,一名医生对护士的“表白”
-
NG视频|ORICO条纹hub入手体验!轻薄本必备、USB设备来得更猛烈些吧
-
汤小小|为什么她死活不离婚?,《我的前半生》罗子群老公又穷又懒还家暴
-
子非鱼|亏损20亿, 神药卖不动, 国人醒悟了?,国产“神药”假话被揭开?
-
新加坡16.5w新币年薪和在北京上海多少收入生活质量相当?