载入中。。。 'S bLog
 
载入中。。。
 
载入中。。。
载入中。。。
载入中。。。
载入中。。。
载入中。。。
 
填写您的邮件地址,订阅我们的精彩内容:


 
《Hadoop in Action》第四章习题
[ 2012/2/26 22:07:00 | By: 梦翔儿 ]
 

《Hadoop in Action》第四章习题:

0.MaxValue:要求输出cite75_99.txt中最大的CITED值:

要点:

1.Mapper只输出它所处理的数据中的最大值。(重写cleanup()函数)

2.设置Reducer数目为一个 -D mapred.reduce.tasks=1,同时也只输出所处理的最大值。(重写cleanup()函数)

3.cleanup()函数:在任务结束时执行一次。详见API。

代码如下:

  1. /*
  2. * MaxValues
  3. * 函数作用:输出Patent中最大数值
  4. * Author: jokes000
  5. * Date: 2011-12-15
  6. */
  7. import java.io.IOException;
  8. import org.apache.hadoop.conf.Configuration;
  9. import org.apache.hadoop.conf.Configured;
  10. import org.apache.hadoop.fs.Path;
  11. import org.apache.hadoop.io.IntWritable;
  12. import org.apache.hadoop.io.LongWritable;
  13. import org.apache.hadoop.io.Text;
  14. import org.apache.hadoop.mapreduce.Job;
  15. import org.apache.hadoop.mapreduce.Mapper;
  16. import org.apache.hadoop.mapreduce.Reducer;
  17. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  18. import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
  19. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  20. import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
  21. import org.apache.hadoop.util.Tool;
  22. import org.apache.hadoop.util.ToolRunner;
  23. public class MaxValue extends Configured implements Tool {
  24. public static class MapClass extends Mapper<LongWritable,Text,Text,Text> {
  25. int max = 0;
  26. // Map Method
  27. public void map(LongWritable key, Text value, Context context){
  28. String[] citation = value.toString().split(",", 2);
  29. try {
  30. int tmp = Integer.parseInt(citation[0]);
  31. if( tmp > max ) max = tmp;
  32. } catch(NumberFormatException e){
  33. // do nothing.
  34. }
  35. //context.write(new Text(citation[0]), new Text(citation[0]));
  36. }
  37. @Override
  38. protected void cleanup(Context context) throws IOException, InterruptedException {
  39. context.write(new Text(max+""), new Text(max+""));
  40. }
  41. }
  42. public static class Reduce extends Reducer<Text,Text,Text,IntWritable> {
  43. int max = 0;
  44. // Reduce Method
  45. public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
  46. //IntWritable[] top = new IntWritable[10];
  47. for(Text value : values) {
  48. try {
  49. int tmp = Integer.parseInt(value.toString());
  50. if( tmp > max ) max = tmp;
  51. } catch(NumberFormatException e) {
  52. // do nothing.
  53. }
  54. }
  55. //context.write(new Text("Max"), new IntWritable(max));
  56. }
  57. @Override
  58. protected void cleanup(Context context) throws IOException, InterruptedException {
  59. context.write(new Text("Max"), new IntWritable(max));
  60. }
  61. }
  62. @Override
  63. public int run(String[] arg0) throws Exception {
  64. Job job = new Job();
  65. job.setJarByClass(MaxValue.class);
  66. FileInputFormat.addInputPath(job, new Path(arg0[0]));
  67. FileOutputFormat.setOutputPath(job, new Path(arg0[1]));
  68. job.setMapperClass(MapClass.class);
  69. job.setReducerClass(Reduce.class);
  70. job.setInputFormatClass(TextInputFormat.class);
  71. job.setOutputFormatClass(TextOutputFormat.class);
  72. job.setOutputKeyClass(Text.class);
  73. job.setOutputValueClass(Text.class);
  74. job.waitForCompletion(true);
  75. return 0;
  76. }
  77. public static void main(String[] args) throws Exception {
  78. int res = ToolRunner.run(new Configuration(), new MaxValue(), args);
  79. System.exit(res);
  80. }
  81. }


1.Top K Values: 要求输出adapt63_99.txt中的第9列CLAIMS值的最大的K个值:

要点:

1.Mapper值输出它所处理的数据中的最大的K个值。(重写 cleanup()函数)

2.设置Reducer数目为1 -D mapred.reduce.tasks=1,同时对Mapper中输出进行排序,输出最大的K个值(重写 cleanup()函数)

代码如下:

  1. /*
  2. * TopKValues
  3. * 函数作用:输出CLAIMS中最大的几个数值
  4. * Author: jokes000
  5. * Date: 2011-12-15
  6. */
  7. import java.io.IOException;
  8. import java.util.Arrays;
  9. import org.apache.hadoop.conf.Configuration;
  10. import org.apache.hadoop.conf.Configured;
  11. import org.apache.hadoop.fs.Path;
  12. import org.apache.hadoop.io.IntWritable;
  13. import org.apache.hadoop.io.LongWritable;
  14. import org.apache.hadoop.io.Text;
  15. import org.apache.hadoop.mapreduce.Job;
  16. import org.apache.hadoop.mapreduce.Mapper;
  17. import org.apache.hadoop.mapreduce.Reducer;
  18. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  19. import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
  20. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  21. import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
  22. import org.apache.hadoop.util.Tool;
  23. import org.apache.hadoop.util.ToolRunner;
  24. public class TopKValues extends Configured implements Tool {
  25. public static class MapClass extends Mapper<LongWritable,Text,Text,IntWritable> {
  26. // 全局变量
  27. int len; // K值
  28. int[] top; // 用于保存的数组
  29. // Map Method
  30. public void map(LongWritable key, Text value, Context context) {
  31. String[] fields = value.toString().split(",",-20);
  32. try {
  33. int claims = Integer.parseInt(fields[8]);
  34. add(claims);
  35. } catch(NumberFormatException e) {
  36. // do nothing..
  37. }
  38. }
  39. private void add(int value) {
  40. top[0] = value;
  41. Arrays.sort(top);
  42. }
  43. @Override
  44. protected void setup(Context context) {
  45. // 获取设置的"K"值,若没有K值,则设置该值为10
  46. len = context.getConfiguration().getInt("K", 10);
  47. top = new int[len+1];
  48. }
  49. @Override
  50. protected void cleanup(Context context) throws IOException, InterruptedException {
  51. for( int i = 1; i <= len; ++ i ) {
  52. context.write(new Text(top[i]+""), new IntWritable(top[i]));
  53. }
  54. }
  55. }
  56. public static class Reduce extends Reducer<Text,IntWritable,Text,IntWritable> {
  57. int[] top;
  58. int len;
  59. @Override
  60. protected void setup(Context context) {
  61. len = context.getConfiguration().getInt("K", 10);
  62. top = new int[len+1];
  63. }
  64. private void add(int value) {
  65. top[0] = value;
  66. Arrays.sort(top);
  67. }
  68. // Reduce Method
  69. public void reduce(Text key, Iterable<IntWritable> values, Context context) {
  70. for(IntWritable value : values) {
  71. add(value.get());
  72. }
  73. }
  74. @Override
  75. protected void cleanup(Context context) throws IOException, InterruptedException {
  76. for( int i = len; i > 0; -- i ) {
  77. context.write(new Text("No."+(len-i+1)), new IntWritable(top[i]));
  78. }
  79. }
  80. }
  81. @Override
  82. public int run(String[] arg0) throws Exception {
  83. Job job = new Job();
  84. job.setJarByClass(TopKValues.class);
  85. FileInputFormat.addInputPath(job, new Path(arg0[0]));
  86. FileOutputFormat.setOutputPath(job, new Path(arg0[1]));
  87. try{
  88. int K = Integer.parseInt(arg0[2]);
  89. getConf().setInt("K", K);
  90. } catch(NumberFormatException e) {
  91. // do nothing..
  92. getConf().setInt("K", 20);
  93. }
  94. job.setMapperClass(MapClass.class);
  95. job.setReducerClass(Reduce.class);
  96. job.setInputFormatClass(TextInputFormat.class);
  97. job.setOutputFormatClass(TextOutputFormat.class);
  98. job.setOutputKeyClass(Text.class);
  99. job.setOutputValueClass(IntWritable.class);
  100. job.waitForCompletion(true);
  101. return 0;
  102. }
  103. public static void main(String[] args) throws Exception {
  104. int res = ToolRunner.run(new Configuration(), new TopKValues(), args);
  105. System.exit(res);
  106. }
  107. }

http://blog.csdn.net/jokes000/article/details/7075344

 
 
  • 标签:Hadoop 
  • 发表评论:
    载入中。。。

     
     
     

    梦翔儿网站 梦飞翔的地方 http://www.dreamflier.net
    中华人民共和国信息产业部TCP/IP系统 备案序号:辽ICP备09000550号

    Powered by Oblog.