2015-10-07 77 views
0

输入数据文件:的Hadoop MapReduce的实践

名,月,类别,支出

hitesh,1,A1,10020 
hitesh,2,A2,10300 
hitesh,3,A3,10400 
hitesh,4,A4,11000 
hitesh,5,A1,21000 
hitesh,6,A2,5000 
hitesh,7,A3,9000 
hitesh,8,A4,1000 
hitesh,9,A1,111000  
hitesh,10,A2,12000 
hitesh,11,A3,71000 
hitesh,12,A4,177000  
kuwar,1,A1,10700 
kuwar,2,A2,17000 
kuwar,3,A3,10070 
kuwar,4,A4,10007 

人明智的总支出和计数独特的类别花费。 (输出需要的样子:姓名,总支出,独特的类别总数)

我曾尝试.....我的代码

人 - 聪明的总支出

public class Emp 
    { 
    public static class MyMap extends Mapper<LongWritable,Text,Text,IntWritable> 
    { 
     public void map(LongWritable k,Text v, Context con) 
     throws IOException, InterruptedException 
     { 
     String line = v.toString(); 
     String[] w=line.split(","); 
     String person=w[0]; 
     int exp=Integer.parseInt(w[3]); 
     con.write(new Text(person), new IntWritable(exp)); 
     } 
    } 
    public static class MyRed extends Reducer<Text,IntWritable,Text,IntWritable> 
    { 
     public void reduce(Text k, Iterable<IntWritable> vlist, Context con) 
     throws IOException , InterruptedException 
     { 
     int tot =0; 
     for(IntWrit 

able v:vlist) 
    tot+=v.get(); 
    con.write(k,new IntWritable(tot)); 
    } 
} 
public static void main(String[] args) throws Exception 
{ 
    Configuration c = new Configuration(); 
    Job j= new Job(c,"person-wise"); 
    j.setJarByClass(Emp.class); 
    j.setMapperClass(MyMap.class); 
    j.setReducerClass(MyRed.class); 
    j.setOutputKeyClass(Text.class); 
    j.setOutputValueClass(IntWritable.class); 
    Path p1 = new Path(args[0]); 
    Path p2 = new Path(args[1]); 
    FileInputFormat.addInputPath(j,p1); 
    FileOutputFormat.setOutputPath(j,p2); 
    System.exit(j.waitForCompletion(true) ? 0:1); 
} 

} 

如何以获得该计划中独特类别的总数,以及如何使输出看起来像名称,总支出,独特类别的总数。

感谢

回答

0

您可以创建一个自定义写IntWritabe的pair和一个文本为类开支和其他和使用,作为地图的价值。否则将一些分离器的支出和类别传递到一个单独的字符串中,然后将其分解到还原器一侧。

一旦你得到那对总循环相同的总费用和类别把所有的类别放入一个Java集合在同一个循环内,然后使用set.size()来获得唯一类别的数量和在context.write中打印。再次打印减少边值时,您可以遵循用于传递地图值的相同技术。

在Mapper方面,用字符串生成器添加类别和支出,并将其作为地图值传递。

StringBuilder sb = new StringBuilder(); 
String sep=":"; 
sb.append(w[2]); 
sb.append(sep); 
sb.append(w[3]); 

con.write(new Text(person), new Text(sb.toString())); 

在减少方分割与地图侧使用的值并总结花费和计算创建的类别的大小。该代码未经测试,如果在下面的代码中遗漏了这些变量,则会投射这些变量。

public void reduce(Text k, Iterable<Text> vlist, Context con) 
     throws IOException , InterruptedException 
     { 
     int tot =0; 
     String myval; 
     Strng[] split_val; 
     Set<String> myset=new HashSet<String>(); 
     int uniq_category; 
     StringBuilder sb1 = new StringBuilder(); 
     for(Text v:vlist) 
     { 
     myval=v.toString(); 
     split_val=myval.split(":"); 
     myset.add(split_val[0]); 
     tot+=Integer.ParseInt(split_val[1]); 
     } 
     uniq_category=myset.size(); 
     String sep=" "; 
    sb1.append(uniq_category); 
    sb1.append(sep); 
    sb1.append(tot); 
    con.write(k,new Text(sb1.toString())); 
    } 
} 

或者创建一个pair与IntWritable和文本在地图和前面提到的减少值。

+0

@aashish_soni完成编辑帖子 –

+0

@aashish_soni这有助于解决您的问题吗? –

0

已经完成了代码中的修改。希望这是有用的。

public class Emp 
     { 
     public static class MyMap extends Mapper<LongWritable,Text,Text,Text> 
     { 
      public void map(LongWritable k,Text v, Context con) 
      throws IOException, InterruptedException 
      { 
      String line = v.toString(); 
      String[] w=line.split(","); 
      String person=w[0]; 
      int exp=Integer.parseInt(w[3]); 
      con.write(new Text(person), new Text(line)); 
      } 
     } 
     public static class MyRed extends Reducer<Text,Text,Text,Text> 
     { 
      public void reduce(Text k, Iterable<Text> vlist, Context con) 
      throws IOException , InterruptedException 
      { 
      int tot =0; 
      Set<String> cat = new HashSet<String>(); 
      for(Text v:vlist){ 
       String data = v.toString(); 
       String[] dataArray = data.Split(","); 
       tot+ = Integer.parseInt((dataArray[3]); //calculating the total spend 
       cat.add(dataArray[2]);// finding the number of unique categories 

     } 
      con.write(k,new Text(tot.toString()+","+cat.size().toString()));// writing the name,total spend and total unique categories to the output 
    } 
    public static void main(String[] args) throws Exception 
    { 
     Configuration c = new Configuration(); 
     Job j= new Job(c,"person-wise"); 
     j.setJarByClass(Emp.class); 
     j.setMapperClass(MyMap.class); 
     j.setReducerClass(MyRed.class); 
     j.setOutputKeyClass(Text.class); 
     j.setOutputValueClass(IntWritable.class); 
     Path p1 = new Path(args[0]); 
     Path p2 = new Path(args[1]); 
     FileInputFormat.addInputPath(j,p1); 
     FileOutputFormat.setOutputPath(j,p2); 
     System.exit(j.waitForCompletion(true) ? 0:1); 
    } 

    }