2015-04-04 118 views
1

有没有一种方法来按值排序MapReduce输出而不改变键和值的输出顺序?hadoop MapReduce按价值排序

原始输出等(由键排列):

A 1

B 2

的C 1

d 3

,我需要这样的输出(按价值排序):

D 3

B 2

A 1

的C 1

我试图通过添加使用Inversemapper交换的键和值的另一分拣工作,使其以使得输出由值排序,它的工作,但输出是这样的:

3 d

2 B

1甲

图1C

反正有扭转的键和值的输出格式?

还是有没有其他的方法来按价值排序?

谢谢

回答

1

您可以使用自定义值并实现WritableComparable接口来实现compareTo(),或者您可以继承WritableComparator类来覆盖compare()方法。选择是你的。下面给出的是自定义按键和自定义值类
CustKey.java

package in.aniruddha.mapreduce.custFormat; 

import java.io.DataInput; 
import java.io.DataOutput; 
import java.io.IOException; 

import org.apache.hadoop.io.Text; 
import org.apache.hadoop.io.WritableComparable; 

public class CustKey implements Writable { 
    protected Text customerId; 
    //default constructor 
    public CustKey() 
    { 
     super(); 
     customerId=new Text(); 
    } 
    public CustKey(Text customerId) 
    { 
     super(); 
     this.customerId=customerId; 
    } 
    public CustKey(String customerId) 
    { 
     super(); 
     this.customerId=new Text(customerId); 
    } 
    public CustKey(CustKey k) 
    { 
     super(); 
     this.customerId=k.customerId; 
    } 
    /** 
    * @return the customerId 
    */ 
    public Text getCustomerId() { 
     return customerId; 
    } 
    /** 
    * @param customerId the customerId to set 
    */ 
    public void setCustomerId(Text customerId) { 
     this.customerId = customerId; 
    } 
    public void setCustomerId(String customerId) { 
     this.customerId = new Text(customerId); 
    } 

    public void readFields(DataInput arg0) throws IOException { 
     this.customerId.readFields(arg0); 
    } 


    public void write(DataOutput arg0) throws IOException { 
     this.customerId.write(arg0); 
    } 



    /* (non-Javadoc) 
    * @see java.lang.Object#hashCode() 
    */ 
    @Override 
    public int hashCode() { 
     final int prime = 31; 
     int result = 1; 
     result = prime * result 
       + ((customerId == null) ? 0 : customerId.hashCode()); 
     return result; 
    } 
    /* (non-Javadoc) 
    * @see java.lang.Object#equals(java.lang.Object) 
    */ 
    @Override 
    public boolean equals(Object obj) { 
     if (this == obj) 
      return true; 
     if (obj == null) 
      return false; 
     if (getClass() != obj.getClass()) 
      return false; 
     CustKey other = (CustKey) obj; 
     if (customerId == null) { 
      if (other.customerId != null) 
       return false; 
     } else if (!customerId.equals(other.customerId)) 
      return false; 
     return true; 
    } 
} 

同样自定义值格式 CustValue.java

package in.aniruddha.mapreduce.custFormat; 

import java.io.DataInput; 
import java.io.DataOutput; 
import java.io.IOException; 

import org.apache.hadoop.io.Text; 
import org.apache.hadoop.io.WritableComparable; 

public class CustValue implements WritableComparable<CustValue> { 
    protected CustKey custId; 
    protected Text firstName,lastName,age,profession; 
    public CustValue() 
    { 
     super(); 
     custId=new CustKey(); 
     firstName=new Text(); 
     lastName=new Text(); 
     age=new Text(); 
     profession=new Text(); 
    } 

    public CustValue(CustKey custId, Text firstName, Text lastName, Text age, 
      Text profession) { 
     super(); 
     this.custId = new CustKey(custId); 
     this.firstName = firstName; 
     this.lastName = lastName; 
     this.age = age; 
     this.profession = profession; 
    } 
    public CustValue(String custId, String firstName, String lastName, String age, 
      String profession) { 
     super(); 
     this.custId = new CustKey(custId); 
     this.firstName = new Text(firstName); 
     this.lastName =new Text(lastName); 
     this.age = new Text(age); 
     this.profession = new Text(profession); 
    } 
    /** 
    * @return the custId 
    */ 
    public CustKey getCustId() { 
     return custId; 
    } 

    /** 
    * @param custId the custId to set 
    */ 
    public void setCustId(CustKey custId) { 
     this.custId = custId; 
    } 

    /** 
    * @return the firstName 
    */ 
    public Text getFirstName() { 
     return firstName; 
    } 

    /** 
    * @param firstName the firstName to set 
    */ 
    public void setFirstName(Text firstName) { 
     this.firstName = firstName; 
    } 

    /** 
    * @return the lastName 
    */ 
    public Text getLastName() { 
     return lastName; 
    } 

    /** 
    * @param lastName the lastName to set 
    */ 
    public void setLastName(Text lastName) { 
     this.lastName = lastName; 
    } 

    /** 
    * @return the age 
    */ 
    public Text getAge() { 
     return age; 
    } 

    /** 
    * @param age the age to set 
    */ 
    public void setAge(Text age) { 
     this.age = age; 
    } 

    /** 
    * @return the profession 
    */ 
    public Text getProfession() { 
     return profession; 
    } 

    /** 
    * @param profession the profession to set 
    */ 
    public void setProfession(Text profession) { 
     this.profession = profession; 
    } 

    @Override 
    public void readFields(DataInput arg0) throws IOException { 
     this.custId.readFields(arg0); 
     this.age.readFields(arg0); 
     this.profession.readFields(arg0); 
     this.lastName.readFields(arg0); 
     this.firstName.readFields(arg0); 
     } 

    @Override 
    public void write(DataOutput arg0) throws IOException { 
     this.custId.write(arg0); 
     this.age.write(arg0); 
     this.profession.write(arg0); 
     this.lastName.write(arg0); 
     this.firstName.write(arg0); 
     } 

    @Override 
    public int compareTo(CustValue o) { 
     /* 
     * Here we're gonna compare customerid and the age 
     */ 
     int comp=this.custId.customerId.compareTo(o.custId.customerId); 
     if(comp!=0) 
     { 
      return comp; 
     } 
     else return this.age.compareTo(o.age); 
    } 

} 

在这里,如果你不想对键进行排序,但值仅在自定义Value类中实现WritableComparable,并且由于您希望按照值对键进行排序,因此Key必须实现Writable接口。


如果您有疑问,请回复我。 Thankyou :)

+0

这很有帮助!谢谢! – Huanyan 2015-12-10 11:35:39

3

M/R总是按键排序。如果要按值排序,则需要创建另一个作业,将要排序的值映射到键中。

0

您可以使用辅助排序基于值进行排序。定义组合键并覆盖SortComparator以基于值进行排序。这将在reducer中提供排序值。

+0

很确定这不会做你认为它的作用。您当然可以在一个公共键范围内对值进行排序(二级排序),但是如果比较器忽略该键,那么最终所有值都将在排序/排序阶段被路由到同一个reducer。 – 2015-04-05 18:57:49

+0

您是对的。感谢您的注意。我错过了第二种排序是为了在键内排序值。它需要额外的工作来产生排序的输出。如果要求不产生全局排序的值,则逆映射逻辑可以修改为以所需格式发出输出。 – InfamousCoconut 2015-04-05 19:25:50