2017-08-24 106 views
0

我进入DW测试并需要比较源数据到目标数据。源数据存储在hive/RDBMS中,而目标数据加载到Hbase中。我是Hbase的新手。任何人都可以帮助我采取我可以采取的方法。我正在寻找的是与“MINUS”类似的功能。可能吗 ?Hive与表中数据的比较

回答

0

你应该写Java文件中,可以组合:

HBase: 
import java.io.IOException; 

// HBASE 
import org.apache.hadoop.conf.Configuration; 

import org.apache.hadoop.hbase.HBaseConfiguration; 
import org.apache.hadoop.hbase.client.Get; 
import org.apache.hadoop.hbase.client.HTable; 
import org.apache.hadoop.hbase.client.Result; 
import org.apache.hadoop.hbase.util.Bytes; 



public class RetriveData{ 

    public static void main(String[] args) throws IOException, Exception{ 

     // Instantiating Configuration class 
     Configuration config = HBaseConfiguration.create(); 

     // Instantiating HTable class 
     HTable table = new HTable(config, "emp"); 

     // Instantiating Get class 
     Get g = new Get(Bytes.toBytes("row1")); 

     // Reading the data 
     Result result = table.get(g); 

     // Reading values from Result class object 
     byte [] value = result.getValue(Bytes.toBytes("personal"),Bytes.toBytes("name")); 

     byte [] value1 = result.getValue(Bytes.toBytes("personal"),Bytes.toBytes("city")); 

     // Printing the values 
     String name = Bytes.toString(value); 
     String city = Bytes.toString(value1); 

**// CALL THE HIVE CLASS(HiveQLOrderBy)...YOU CAN COMPARE** 

     System.out.println("name: " + name + " city: " + city); 
    } 
} 


//HIVE 

import java.sql.SQLException; 
import java.sql.Connection; 
import java.sql.ResultSet; 
import java.sql.Statement; 
import java.sql.DriverManager; 

public class HiveQLOrderBy { 
    private static String driverName = "org.apache.hadoop.hive.jdbc.HiveDriver"; 

    public static void main(String[] args) throws SQLException { 

     // Register driver and create driver instance 
     Class.forName(driverName); 

     // get connection 
     Connection con = DriverManager.getConnection("jdbc:hive://localhost:10000/userdb", "", ""); 

     // create statement 
     Statement stmt = con.createStatement(); 

     // execute statement 
     Resultset res = stmt.executeQuery("SELECT * FROM employee ORDER BY DEPT;"); 
     System.out.println(" ID \t Name \t Salary \t Designation \t Dept "); 

     while (res.next()) { 
     System.out.println(res.getInt(1) + " " + res.getString(2) + " " + res.getDouble(3) + " " + res.getString(4) + " " + res.getString(5)); 
     } 

     con.close(); 
    } 
} 
+0

感谢@R Palanivel,但我从上面的Java文件不解的是,它会单独打印从HBase的表和蜂巢结果那么将不得不手动比较它。如果我错了,请纠正我。到目前为止,我还没能实现它。请建议。 –