2017-07-31 86 views
-1

当我在Hive上运行我的java函数时,我经常得到ArrayOutOfBounds错误。Hive Java UDTF错误:ArrayIndexOutofBounds:1

这是我正在使用的Java代码,我打包成一个Jive文件以便与Hive一起使用: 在此请查找用于实现上述Java代码的配置单元代码。

public class similarity_report extends GenericUDTF 
{ 
private PrimitiveObjectInspector stringOI = null; 
@Override 
public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException 
{ 
//if (args.length != 1) 
// 
{ // throw new UDFArgumentException("similarityReport() takes exactly one argument"); // } 
if (args[0].getCategory() != ObjectInspector.Category.PRIMITIVE 
&& ((PrimitiveObjectInspector) args[0]).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) 
{ throw new UDFArgumentException("similarityReport() takes a string as a parameter"); } 
stringOI = (PrimitiveObjectInspector) args[0]; 
List<String> fieldNames = new ArrayList<String>(41); 
List<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(41); 
fieldNames.add("NAME_x"); 
fieldNames.add("VOTER ID_x"); 
fieldNames.add("FATHERS' NAME_x"); 
fieldNames.add("PIN CODE_x"); 
fieldNames.add("AREA_x"); 
fieldNames.add("TEHSIL_x"); 
fieldNames.add("DISTRICT_x"); 
fieldNames.add("POLICE STATION_x"); 
fieldNames.add("AGE_x"); 
fieldNames.add("Y-O-B_x"); 
fieldNames.add("GENDER_x"); 
fieldNames.add("HOUSE NUMBER_x"); 
fieldNames.add("STREET ADDRESS_x"); 
fieldNames.add("UNIQUE ID_x"); 
fieldNames.add("EDIT MAX_x"); 
fieldNames.add("MATCH ID_x"); 
fieldNames.add("FAKE MAX_x"); 
fieldNames.add("NAME_y"); 
fieldNames.add("VOTER ID_y"); 
fieldNames.add("FATHERS' NAME_y"); 
fieldNames.add("PIN CODE_y"); 
fieldNames.add("AREA_y"); 
fieldNames.add("TEHSIL_y"); 
fieldNames.add("DISTRICT_y"); 
fieldNames.add("POLICE STATION_y"); 
fieldNames.add("AGE_y"); 
fieldNames.add("Y-O-B_y"); 
fieldNames.add("GENDER_y"); 
fieldNames.add("HOUSE NUMBER_y"); 
fieldNames.add("STREET ADDRESS_y"); 
fieldNames.add("UNIQUE ID_y"); 
fieldNames.add("EDIT MAX_y"); 
fieldNames.add("MATCH ID_y"); 
fieldNames.add("FAKE MAX_y"); 
fieldNames.add("NAME SCORE"); 
fieldNames.add("ADDRESS SCORE"); 
fieldNames.add("CITY MATCH"); 
fieldNames.add("ZIP MATCH"); 
fieldNames.add("RELATIVE NAME SCORE"); 
fieldNames.add("VOTER ID MATCH"); 
fieldNames.add("KEY"); 

fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); 

。 。 。

fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); 
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); 
} 
public ArrayList<Object[]> processInputRecord(String row) 
{ 
ArrayList<Object[]> result = new ArrayList<Object[]>(); 
//ensure none of the fields are empty 
String[] tokens = row.split("\t"); 

System.out.println(tokens.length); 

按照建议我加入上述线^^

String Name_x = tokens[0]; 
String VoterID_x = tokens[1]; 
String FathersName_x = tokens[2]; 
String PinCode_x = tokens[3]; 
String Area_x = tokens[4]; 
String Tehsil_x = tokens[5]; 
String District_x = tokens[6]; 
String PoliceStation_x = tokens[7]; 
String Age_x = tokens[8]; 
String YOB_x = tokens[9]; 
String Gender_x = tokens[10]; 
String HouseNumber_x = tokens[11]; 
String StreetAddress_x = tokens[12]; 
String UniqueID_x = tokens[1]; 
String EditMax_x = tokens[14]; 
String MatchID_x = tokens[15]; 
String FakeMax_x = tokens[16]; 
String Name_y = tokens[17]; 
String VoterID_y = tokens[18]; 
String FathersName_y = tokens[19]; 
String PinCode_y = tokens[20]; 
String Area_y = tokens[21]; 
String Tehsil_y = tokens[22]; 
String District_y = tokens[23]; 
String PoliceStation_y = tokens[24]; 
String Age_y = tokens[25]; 
String YOB_y = tokens[26]; 
String Gender_y = tokens[27]; 
String HouseNumber_y = tokens[28]; 
String StreetAddress_y = tokens[29]; 
String UniqueID_y = tokens[18]; 
String EditMax_y = tokens[31]; 
String MatchID_y = tokens[32]; 
String FakeMax_y = tokens[33]; 
String NameScore = tokens[34]; 
String AddressScore = tokens[35]; 
String CityMatch = tokens[36]; 
String ZipMatch = tokens[37]; 
String RelativeNameScore = tokens[38]; 
String VoterIDMatch = tokens[39]; 
String Key = tokens[40]; 
String Address_x; 
String Address_y; 
String matchType = ""; 

if (matchType == "similar") 
{ 
result.add(new Object[] 
{ Name_x, VoterID_x, FathersName_x, PinCode_x, Area_x, Tehsil_x, District_x, PoliceStation_x, Age_x, YOB_x, Gender_x, HouseNumber_x, StreetAddress_x, UniqueID_x, EditMax_x, MatchID_x, FakeMax_x, NameScore, AddressScore, CityMatch, ZipMatch, RelativeNameScore, VoterIDMatch, Key}); 
result.add(new Object[] { Name_y, VoterID_y, FathersName_y, PinCode_y, Area_y, Tehsil_y, District_y, PoliceStation_y, Age_y, YOB_y, Gender_y, HouseNumber_y, StreetAddress_y, UniqueID_y, EditMax_y, MatchID_y, FakeMax_y, NameScore, AddressScore, CityMatch, ZipMatch, RelativeNameScore, VoterIDMatch, Key}); 

} 

else if (matchType == "identical") 
{ 

result.add(new Object[] { Name_x, VoterID_x, FathersName_x, PinCode_x, Area_x, Tehsil_x, District_x, PoliceStation_x, Age_x, YOB_x, Gender_x, HouseNumber_x, StreetAddress_x, UniqueID_x, EditMax_x, MatchID_x, FakeMax_x, NameScore, AddressScore, CityMatch, ZipMatch, RelativeNameScore, VoterIDMatch, Key} 
); 
} 
else if (matchType == "different") 
{ 
result.add(new Object[] 
{ Name_x, VoterID_x, FathersName_x, PinCode_x, Area_x, Tehsil_x, District_x, PoliceStation_x, Age_x, YOB_x, Gender_x, HouseNumber_x, StreetAddress_x, UniqueID_x, EditMax_x, MatchID_x, FakeMax_x, NameScore, AddressScore, CityMatch, ZipMatch, RelativeNameScore, VoterIDMatch, Key} 
); 
result.add(new Object[] 
{ Name_y, VoterID_y, FathersName_y, PinCode_y, Area_y, Tehsil_y, District_y, PoliceStation_y, Age_y, YOB_y, Gender_y, HouseNumber_y, StreetAddress_y, UniqueID_y, EditMax_y, MatchID_y, FakeMax_y, NameScore, AddressScore, CityMatch, ZipMatch, RelativeNameScore, VoterIDMatch, Key} 
); 
} 
return result; 
} 
@Override 
public void process(Object[] record) throws HiveException 
{ 
final String row = stringOI.getPrimitiveJavaObject(record[0]).toString(); 
ArrayList<Object[]> results = processInputRecord(row); 
Iterator<Object[]> it = results.iterator(); 
while (it.hasNext()) 
{ Object[] r = it.next(); forward(r); } 
} 
@Override 
public void close() throws HiveException 
{ // do nothing } 
} 

这是蜂房代码来处理上述代码上的蜂房表:

set mapred.job.queue.name=buanlst; 
CREATE DATABASE IF NOT EXISTS saihieldb; 
USE saihieldb; 
CREATE TABLE datafile_to_dedupe (name_x String, voterid_x String, fathersname_x String, pincode_x String, area_x String, tehsil_x String, district_x String, policestation_x String, age_x String, yob_x String, gender_x String, housenumber_x String, streetaddress_x String) 
ROW FORMAT DELIMITED 
FIELDS TERMINATED BY '\t' 
LINES TERMINATED BY '\n' 
STORED AS TEXTFILE; 
LOAD DATA LOCAL INPATH '/idn/home/sbaks31/APRIORI_MUMBAI_SAMPLE_TAB_DELIMITED.txt' OVERWRITE INTO TABLE datafile_to_dedupe; 
ALTER TABLE datafile_to_dedupe ADD COLUMNS (uniqueid_x String, editmax_x String, matchid_x String, fakemax_x String); 
CREATE TABLE datafile_to_dedupe1 (name_y String, voterid_y String, fathersname_y String, pincode_y String, area_y String, tehsil_y String, district_y String, policestation_y String, age_y String, yob_y String, gender_y String, housenumber_y String, streetaddress_y String) 
ROW FORMAT DELIMITED 
FIELDS TERMINATED BY '\t' 
LINES TERMINATED BY '\n' 
STORED AS TEXTFILE; 
LOAD DATA LOCAL INPATH '/idn/home/sbaks31/APRIORI_MUMBAI_SAMPLE_TAB_DELIMITED.txt' OVERWRITE INTO TABLE datafile_to_dedupe1; 
ALTER TABLE datafile_to_dedupe ADD COLUMNS (uniqueid_y String, editmax_y String, matchid_y String, fakemax_y String); 
CREATE TABLE crossed (name_x String, voterid_x String, fathersname_x String, pincode_x String, area_x String, tehsil_x String, district_x String, policestation_x String, age_x String, yob_x String, gender_x String, housenumber_x String, streetaddress_x String, uniqueid_x String, editmax_x String, matchid_x String, fakemax_x String, name_y String, voterid_y String, fathersname_y String, pincode_y String, area_y String, tehsil_y String, district_y String, policestation_y String, age_y String, yob_y String, gender_y String, housenumber_y String, streetaddress_y String, uniqueid_y String, editmax_y String, matchid_y String, fakemax_y String) 
ROW FORMAT DELIMITED 
FIELDS TERMINATED BY '\t' 
LINES TERMINATED BY '\n' 
STORED AS TEXTFILE; 
INSERT OVERWRITE TABLE crossed SELECT * FROM saihieldb.datafile_to_dedupe CROSS JOIN saihieldb.datafile_to_dedupe1 on (datafile_to_dedupe.name_x = datafile_to_dedupe1.name_y); 
ALTER TABLE crossed ADD COLUMNS (namescore String, addressscore String, citymatch String, zipmatch String, relativenamescore String, voteridmatch String, Key String); 
add jar /idn/home/sbaks31/DedupeFinal1.jar.filepart; 
create temporary function fun3 as 'com.similarity_report'; 
CREATE VIEW newview4 AS select fun3(name_x, voterid_x, fathersname_x, pincode_x, area_x, tehsil_x, district_x, policestation_x, age_x, yob_x, gender_x, housenumber_x, streetaddress_x, uniqueid_x, editmax_x, matchid_x, fakemax_x, name_y, voterid_y, fathersname_y, pincode_y, area_y, tehsil_y, district_y, policestation_y, age_y, yob_y, gender_y, housenumber_y, streetaddress_y, uniqueid_y, editmax_y, matchid_y, fakemax_y, namescore, addressscore, citymatch, zipmatch, relativenamescore, voteridmatch, Key) from saihieldb.crossed; 
select * from newview4 limit 10; 

^^这是我收到错误。请让我知道发生了什么问题?

当我尝试打印令牌数组的长度时,我得到的长度只有1 ..有人可以澄清为什么它只是在数组中的1值?

+2

Can你把它缩小到几行代码,你认为问题可能在哪里?导致这只是太多的代码。请阅读如何创建一个[mcve] – philantrovert

+0

@philantrovert我进一步缩小了代码范围,但由于新的错误是我的令牌数组长度只有一个,所以我不确定问题出在哪里.. –

+0

可能因为你试图在这里分割的字符串不是制表符分隔的。 – philantrovert

回答

0

这行代码后:

String[] tokens = row.split("\t");

写:

System.out.println(tokens.length);

并把在该行一个破发点,以调试。 这将允许你看看你的数组实际是多长。 因为如果您的数组“标记”长度为5,当您尝试调用标记[6],标记[7],< ...>,标记[40]等时,您会得到一个例外

+0

加入你建议的行后打印1,因为在数组的长度只有1 ..你为什么认为这是这种情况?谢谢! –