2012-05-28 48 views
1

我试图实现Book Hadoop在Action中给出的一个用例,但我不是要编译代码。我是Java新手,因此无法理解错误背后的确切原因。Hadoop MapReduce中的DataJoins

有趣的是,使用相同的类和方法的另一个编码编译成功。

[email protected]:~/hadoop-0.20.2/playground/src$ javac -classpath /home/hadoop/hadoop-0.20.2/hadoop-0.20.2-core.jar:/home/hadoop/hadoop-0.20.2/lib/commons-cli-1.2.jar:/home/hadoop/hadoop-0.20.2/contrib/datajoin/hadoop-0.20.2-datajoin.jar -d ../classes DataJoin2.java 
DataJoin2.java:49: cannot find symbol 
symbol : constructor TaggedWritable(org.apache.hadoop.io.Text) 
location: class DataJoin2.TaggedWritable 
      TaggedWritable retv = new TaggedWritable((Text) value); 
           ^
DataJoin2.java:69: cannot find symbol 
symbol : constructor TaggedWritable(org.apache.hadoop.io.Text) 
location: class DataJoin2.TaggedWritable 
      TaggedWritable retv = new TaggedWritable(new Text(joinedStr)); 
           ^
DataJoin2.java:113: setMapperClass(java.lang.Class<? extends org.apache.hadoop.mapreduce.Mapper>) in org.apache.hadoop.mapreduce.Job cannot be applied to (java.lang.Class<DataJoin2.MapClass>) 
     job.setMapperClass(MapClass.class); 
     ^
DataJoin2.java:114: setReducerClass(java.lang.Class<? extends org.apache.hadoop.mapreduce.Reducer>) in org.apache.hadoop.mapreduce.Job cannot be applied to (java.lang.Class<DataJoin2.Reduce>) 
     job.setReducerClass(Reduce.class); 
     ^
4 errors 

----------------代码----------------------

import java.io.DataInput; 
import java.io.DataOutput; 
import java.io.IOException; 


import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapred.KeyValueTextInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 
import org.apache.hadoop.util.GenericOptionsParser; 

// DataJoin Classes 
import org.apache.hadoop.contrib.utils.join.DataJoinMapperBase; 
import org.apache.hadoop.contrib.utils.join.TaggedMapOutput; 
import org.apache.hadoop.contrib.utils.join.DataJoinReducerBase; 

import org.apache.hadoop.io.Writable; 
import org.apache.hadoop.io.WritableComparable; 


public class DataJoin2 
{ 
    public static class MapClass extends DataJoinMapperBase 
    { 
     protected Text generateInputTag(String inputFile) 
     { 
      String datasource = inputFile.split("-")[0]; 
      return new Text(datasource);    
     } 

     protected Text generateGroupKey(TaggedMapOutput aRecord) 
     { 
      String line = ((Text) aRecord.getData()).toString(); 
      String[] tokens = line.split(","); 
      String groupKey = tokens[0]; 
      return new Text(groupKey); 
     } 

     protected TaggedMapOutput generateTaggedMapOutput(Object value) 
     { 
      TaggedWritable retv = new TaggedWritable((Text) value); 
      retv.setTag(this.inputTag); 
      return retv; 
     } 
    } // End of class MapClass 

    public static class Reduce extends DataJoinReducerBase 
    { 
     protected TaggedMapOutput combine(Object[] tags, Object[] values) 
     { 
      if (tags.length < 2) return null; 
      String joinedStr = ""; 
      for (int i=0;i<values.length;i++) 
      { 
       if (i>0) joinedStr += ","; 
       TaggedWritable tw = (TaggedWritable) values[i]; 
       String line = ((Text) tw.getData()).toString(); 
       String[] tokens = line.split(",",2); 
       joinedStr += tokens[1]; 
      } 
      TaggedWritable retv = new TaggedWritable(new Text(joinedStr)); 
      retv.setTag((Text) tags[0]); 
      return retv; 
     } 
    } // End of class Reduce 

    public static class TaggedWritable extends TaggedMapOutput 
    { 
     private Writable data; 

     public TaggedWritable() 
     { 
      this.tag = new Text(""); 
      this.data = data; 
     } 

     public Writable getData() 
     { 
      return data; 
     } 

     public void write(DataOutput out) throws IOException 
     { 
      this.tag.write(out); 
      this.data.write(out); 
     } 

     public void readFields(DataInput in) throws IOException 
     { 
      this.tag.readFields(in); 
      this.data.readFields(in); 
     }  
    } // End of class TaggedWritable 

    public static void main(String[] args) throws Exception 
    { 
     Configuration conf = new Configuration(); 
     String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); 
     if (otherArgs.length != 2) { 
      System.err.println("Usage: DataJoin2 <in> <out>"); 
      System.exit(2); 
     } 
     Job job = new Job(conf, "DataJoin"); 
     job.setJarByClass(DataJoin2.class);  
     job.setMapperClass(MapClass.class); 
     job.setReducerClass(Reduce.class); 
     job.setInputFormatClass(TextInputFormat.class); 

     job.setOutputKeyClass(Text.class); 
     job.setOutputValueClass(TaggedWritable.class); 

     FileInputFormat.addInputPath(job, new Path(otherArgs[0])); 
     FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); 
     System.exit(job.waitForCompletion(true) ? 0 : 1);    
    } 
} 

回答

1

错误信息没有任何含义。它告诉你,你没有为TaggedWritable提供一个构造函数,它需要Text类型的参数。您只在您发布的代码中显示无参数构造函数。

+0

感谢您指点....当来自Oracle背景的人尝试使用Java时,会发生这种情况 – Sandeep

1

为了您前两个错误信息,编译器错误清楚告诉你,你没有一个构造TaggedWritable接受Text类型的参数。在我看来,你正在TaggedWritable作为包装为Writable添加一个标签,所以我可以建议与添加的构造函数:

public TaggedWritable(Writable data) { 
    this.tag = new Text(""); 
    this.data = data; 
} 

事实上,正如你写它,这条线

this.data = data; 

只是重新分配data自己,所以我敢肯定你打算有一个名为data的构造函数参数。看到我上面的推理为什么我认为你应该使它Writable而不是Text。由于Text实现了Writable,因此解决了您的前两个错误消息。

但是,您需要将保留为默认的无参数构造函数。这是因为Hadoop将使用反射来实例化实例Writable的值,因为它会在地图缩小阶段之间通过网络将它们序列化。我认为你有一个混乱的一点点在这里默认的无参数的构造函数:

public TaggedWritable() { 
    this.tag = new Text(""); 
} 

,我认为这是一个混乱的原因是,如果你不分配给TaggedWritable.data的有效实例什么你的包装Writable值是,当在TaggedWritable.readFields(DataInput)中调用this.data.readFields(in)时,将获得NullPointerException。由于它是一个通用包装器,因此您应该将TaggedWritable设置为泛型类型,然后在默认的无参数构造函数中使用反射来指定TaggedWritable.data

为了您最后两个编译器错误,使用hadoop-datajoin我注意到,你需要使用旧的API类是。因此,所有这些

org.apache.hadoop.mapreduce.Job; 
org.apache.hadoop.mapreduce.Mapper; 
org.apache.hadoop.mapreduce.Reducer; 
org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 

应替换为其旧的API等效。所以org.apache.hadoop.mapred.JobConf而不是org.apache.hadoop.mapreduce.Job等,这将处理您的最后两个错误消息。