0
我有以下DataFrames和它们之间的连接操作,但连接失败时未引用任何实际错误。Apache Spark DataFrames连接失败,使用scala
//HospitalFacility class to fill in
case class HospitalFacility(Name: String, Rating: Int, Cost: Int);
//I pass the pid as an input parameter.
//hc : HiveConext successfully created
//Provider_Facility & Facility_Master are my two hive tables.
def fetchHospitalFacilityData(pid: String): String = {
val filteredProviderSpecilaityDF = hc.sql("select FacilityId, Rating, Cost from Provider_Facility where ProviderId='" + pid + "'");
println(filteredProviderSpecilaityDF);
filteredProviderSpecilaityDF.foreach (println); //Prints perfectly
val allFacilityDF = hc.sql("select id, Name from Facility_Master");
println(allFacilityDF);
allFacilityDF.foreach(println); //Prints perfectly
//The below line throws error.
val resultDF = filteredProviderSpecilaityDF.join(allFacilityDF,filteredProviderSpecilaityDF("FacilityId") === allFacilityDF("id") ,"right_outer");
println(resultDF);
val filteredFacilityList = resultDF.rdd.map { spec => HospitalFacility(spec.getString(0).toString(), spec.getInt(3), spec.getInt(4)) }.collect();
filteredFacilityList.foreach(println); //does not reach this point
return result;
}
时引发下面列出的错误:
Exception in thread "broadcast-hash-join-0" java.lang.NoSuchMethodError: org.apache.spark.util.Utils$.tryOrIOException(Lscala/Function0;)V at org.apache.spark.sql.execution.joins.UnsafeHashedRelation.writeExternal(HashedRelation.scala:264) at java.io.ObjectOutputStream.writeExternalData(ObjectOutputStream.java:1458) at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1429) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:347) at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:44) at org.apache.spark.broadcast.TorrentBroadcast$.blockifyObject(TorrentBroadcast.scala:203) at org.apache.spark.broadcast.TorrentBroadcast.writeBlocks(TorrentBroadcast.scala:102) at org.apache.spark.broadcast.TorrentBroadcast.(TorrentBroadcast.scala:85) at org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34) at org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:63) at org.apache.spark.SparkContext.broadcast(SparkContext.scala:1326) at org.apache.spark.sql.execution.joins.BroadcastHashOuterJoin$$anonfun$broadcastFuture$1$$anonfun$apply$1.apply(BroadcastHashOuterJoin.scala:94) at org.apache.spark.sql.execution.joins.BroadcastHashOuterJoin$$anonfun$broadcastFuture$1$$anonfun$apply$1.apply(BroadcastHashOuterJoin.scala:82) at org.apache.spark.sql.execution.SQLExecution$.withExecutionId(SQLExecution.scala:100) at org.apache.spark.sql.execution.joins.BroadcastHashOuterJoin$$anonfun$broadcastFuture$1.apply(BroadcastHashOuterJoin.scala:82) at org.apache.spark.sql.execution.joins.BroadcastHashOuterJoin$$anonfun$broadcastFuture$1.apply(BroadcastHashOuterJoin.scala:82) at scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24) at scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745)
谁能请帮助我。
检查你的scala/spark库和集群版本!你似乎有一种不匹配 – eliasah
这是从Spark-shell工作,但不是从Scala程序工作 –
你能创建一个最低可重现和可验证的例子吗? (建立文件,代码,火花版本,你如何运行它等)我们将无法帮助你,否则 – eliasah