- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
北京最“寒碜”火车站,等车只能蹲路边,本地人都不爱去
-
-
特朗普:6月23日!心腹倒戈相向,特朗普担心成真,或被军队请出白宫!
-
-
北京城市副中心|太棒了!副中心这个地新添一所小学!设24个教学班,计划9月迎新生
-
当红女星|?baby穿透视背心秀身材!薄背细腰似纸片人,银色短发造型英气十足!
-
-
『体坛扒客』C罗潜在下家2选1,法甲巨人成热门,重磅!乔治娜将在巴黎买房
-
-
人民日报中央厨房创业之星 | 从陶瓷工艺飞向“北斗三号”的超高温特种涂层
-
辣妈的美就是不一般,灰色短袖衫搭搭配健美裤,美的自信又洒脱
-
青蛙侃球|赵睿扭伤脚踝被队友架着离开赛场,赛后两字回应让球迷放心
-
-
-
-
-
-
智能穿戴真无线蓝牙耳机什么牌子运动舒适?2020平价学生党蓝牙耳机
-
-
「超微半导体」 运行安卓9,双核四线程!AMD锐龙3 3250C曝光