2016-06-28 145 views
0
出口data.frame在Excel/CSV

我的JSON文件看起来像如下:错误,使用R

/* 0 */ 
{ 
    "_id" : "93ccbdb6-8947", 
    "uiSearchRequest" : { 
    "travelDate" : 20151206, 
    "travelDuration" : 7, 
    "shopperDuration" : 30, 
    "oneWay" : false, 
    "userId" : "ATP1KKP", 
    "queryId" : "93ccbdb6-8947", 
    "subRequests" : [{ 
     "origin" : "WAS", 
     "destination" : "LON", 
     "carrier" : "AA", 
     "fareClasses" : "", 
     "owrt" : "1,2" 
     }] 
    }, 
    "downloadCount" : 0, 
    "requestDate" : 20151205, 
    "totalRecords" : 0, 
    "status" : "SUCCESS" 
} 

/* 1 */ 
{ 
    "_id" : "b736c374-b8ae", 
    "uiSearchRequest" : { 
    "travelDate" : 20151206, 
    "travelDuration" : 7, 
    "shopperDuration" : 30, 
    "oneWay" : false, 
    "userId" : "ATP1KKP", 
    "queryId" : "b736c374-b8ae", 
    "subRequests" : [{ 
     "origin" : "WAS", 
     "destination" : "LON", 
     "carrier" : "AA", 
     "fareClasses" : "", 
     "owrt" : "1,2" 
     }] 
    }, 
    "downloadCount" : 0, 
    "requestDate" : 20151205, 
    "totalRecords" : 0, 
    "status" : "SUCCESS" 
} 

/* 2 */ 
{ 
    "_id" : "3312605f-8304", 
    "uiSearchRequest" : { 
    "travelDate" : 20151206, 
    "travelDuration" : 7, 
    "shopperDuration" : 30, 
    "oneWay" : false, 
    "userId" : "ATP1SXE", 
    "queryId" : "3312605f-8304", 
    "subRequests" : [{ 
     "origin" : "LON", 
     "destination" : "IAD", 
     "carrier" : "AA", 
     "fareClasses" : "", 
     "owrt" : "1,2" 
     }] 
    }, 
    "downloadCount" : 2, 
    "requestDate" : 20151205, 
    "totalRecords" : 0, 
    "status" : "SUCCESS" 
} 

/* 3 */ 
{ 
    "_id" : "6b668cfa-9b79", 
    "uiSearchRequest" : { 
    "travelDate" : 20151206, 
    "travelDuration" : 7, 
    "shopperDuration" : 30, 
    "oneWay" : false, 
    "userId" : "ATP1NXA", 
    "queryId" : "6b668cfa-9b79", 
    "subRequests" : [{ 
     "origin" : "WAS", 
     "destination" : "LON", 
     "carrier" : "AA", 
     "fareClasses" : "", 
     "owrt" : "1,2" 
     }] 
    }, 
    "downloadCount" : 1, 
    "requestDate" : 20151205, 
    "totalRecords" : 1388, 
    "status" : "SUCCESS" 
} 

/* 4 */ 
{ 
    "_id" : "41c373a1-e4cb", 
    "uiSearchRequest" : { 
    "travelDate" : 20151206, 
    "travelDuration" : 7, 
    "shopperDuration" : 30, 
    "oneWay" : false, 
    "userId" : "ATP6CXS", 
    "queryId" : "41c373a1-e4cb", 
    "subRequests" : [{ 
     "origin" : "WAS", 
     "destination" : "LON", 
     "carrier" : "AA", 
     "fareClasses" : "", 
     "owrt" : "1,2" 
     }] 
    }, 
    "downloadCount" : 0, 
    "requestDate" : 20151205, 
    "totalRecords" : 1388, 
    "status" : "SUCCESS" 
} 

/* 5 */ 
{ 
    "_id" : "2c8331c4-21ca", 
    "uiSearchRequest" : { 
    "travelDate" : 20151206, 
    "travelDuration" : 7, 
    "shopperDuration" : 30, 
    "oneWay" : false, 
    "userId" : "ATP1KKP", 
    "queryId" : "2c8331c4-21ca", 
    "subRequests" : [{ 
     "origin" : "WAS", 
     "destination" : "LON", 
     "carrier" : "AA", 
     "fareClasses" : "", 
     "owrt" : "1,2" 
     }] 
    }, 
    "downloadCount" : 0, 
    "requestDate" : 20151205, 
    "totalRecords" : 1388, 
    "status" : "SUCCESS" 
} 

/* 6 */ 
{ 
    "_id" : "71a09900-1c13", 
    "uiSearchRequest" : { 
    "travelDate" : 20151206, 
    "travelDuration" : 7, 
    "shopperDuration" : 30, 
    "oneWay" : false, 
    "userId" : "ATP6CXS", 
    "queryId" : "71a09900-1c13", 
    "subRequests" : [{ 
     "origin" : "WAS", 
     "destination" : "LON", 
     "carrier" : "AF", 
     "fareClasses" : "", 
     "owrt" : "1,2" 
     }, { 
     "origin" : "WAS", 
     "destination" : "LON", 
     "carrier" : "AA", 
     "fareClasses" : "", 
     "owrt" : "1,2" 
     }, { 
     "origin" : "WAS", 
     "destination" : "LON", 
     "carrier" : "DL", 
     "fareClasses" : "", 
     "owrt" : "1,2" 
     }, { 
     "origin" : "WAS", 
     "destination" : "LON", 
     "carrier" : "LH", 
     "fareClasses" : "", 
     "owrt" : "1,2" 
     }, { 
     "origin" : "WAS", 
     "destination" : "LON", 
     "carrier" : "BA", 
     "fareClasses" : "", 
     "owrt" : "1,2" 
     }] 
    }, 
    "downloadCount" : 0, 
    "requestDate" : 20151205, 
    "totalRecords" : 6941, 
    "status" : "SUCCESS" 
} 

/* 7 */ 
{ 
    "_id" : "a036a42a-918b", 
    "uiSearchRequest" : { 
    "travelDate" : 20151206, 
    "travelDuration" : 7, 
    "shopperDuration" : 30, 
    "oneWay" : false, 
    "userId" : "ATP1MMM", 
    "queryId" : "a036a42a-918b", 
    "subRequests" : [{ 
     "origin" : "WAS", 
     "destination" : "LON", 
     "carrier" : "AA", 
     "fareClasses" : "", 
     "owrt" : "1,2" 
     }] 
    }, 
    "downloadCount" : 0, 
    "requestDate" : 20151205, 
    "totalRecords" : 1388, 
    "status" : "SUCCESS" 
} 

/* 8 */ 
{ 
    "_id" : "c547be36-805c", 
    "uiSearchRequest" : { 
    "travelDate" : 20151206, 
    "travelDuration" : 7, 
    "shopperDuration" : 30, 
    "oneWay" : false, 
    "userId" : "ATP1SXB", 
    "queryId" : "c547be36-805c", 
    "subRequests" : [{ 
     "origin" : "CHI", 
     "destination" : "LON", 
     "carrier" : "BA", 
     "fareClasses" : "", 
     "owrt" : "1,2" 
     }] 
    }, 
    "downloadCount" : 2, 
    "requestDate" : 20151205, 
    "totalRecords" : 1072, 
    "status" : "SUCCESS" 
} 

我的代码如下:

raw <- readLines("mydata.txt") 
#since my JSON file isn't in the correct format, I've added the below 3 lines of code 

# get rid of the "/* 0 */" lines 
json <- grep("^/\\* [0-9]* \\*/", raw, value = TRUE, invert = TRUE) 

# add missing comma after } 
n <- length(json) 
json[-n] <- gsub("^}$", "},", json[-n]) 

# add brakets at the beginning and end 
json <- c("[", json, "]") 


library(jsonlite) 
table <- fromJSON(json) 

final <- flatten(table) 
final1 <- as.data.frame.matrix(final) 

class(final1) 
[1] "data.frame" 

write.xlsx(final1, file="JSON2excel.xlsx",row.names = FALSE) 
file.show("JSON2excel.xlsx") 

我收到以下错误:

Error in .jcall(cell, "V", "setCellValue", value) : 
    method setCellValue with signature ([Ljava/lang/String;)V not found 
In addition: Warning message: 
In if (is.na(value)) { : 
    the condition has length > 1 and only the first element will be used 

我已经浏览了多个具有相同类型的错误,但解决方案只是没有在我的情况下工作的stackoverflow问题。任何帮助表示赞赏。

+0

听起来像一个'rJava'问题 – Carl

+0

@Carl我已经运行过的rJava功能,如果这是你在暗示什么。 –

回答

1

错误原因是您的最终结果数据框包含json文件中嵌套子请求部分的嵌套数据框。您可以使用str(final1)来查看。所以即使是基本函数write.table()write.csv()(包括xlsx包的write.xlsx())也无法输出为平面格式。

考虑通过绑定子请求数据框并使用行的id变量将它们合并到较大的final1列来展平。最终,您将获得13个观察值的数据框(不是来自json的9个元素,因为其中一个包含5个嵌套子请求:id = 71a09900-1c13)。

# SUBREQUEST BINDING (PULLING CORRESPONDING ID) 
dfList <- lapply(1:nrow(final1), function(i){ 
       cbind(id = final1$`_id`[[i]], 
         final1$uiSearchRequest.subRequests[[i]]) 
}) 

# USE DPLYR'S bind_rows() IF dfs DIFFER IN NUMBER OF COLUMNS 
subdf <- bind_rows(dfList) 
# subdf <- data.frame(do.call(rbind, dfList)) 

# FINAL1 EXTRACTION 
fdf <- data.frame(
       id = final1$`_id`, 
       travelDate = final1$uiSearchRequest.travelDate, 
       travelDuration = final1$uiSearchRequest.travelDuration, 
       shopperDuration = final1$uiSearchRequest.shopperDuration, 
       oneway = final1$uiSearchRequest.oneWay, 
       userId = final1$uiSearchRequest.userId, 
       queryId = final1$uiSearchRequest.queryId, 
       downloadCount = final1$downloadCount, 
       requestDate = final1$requestDate, 
       totalRecords = final1$totalRecords, 
       status = final1$status, 

       stringsAsFactors = FALSE, 
       row.names = NULL 
      ) 

# MERGE 
finaldf <- merge(fdf, subdf, by="id") 

或者,您也可以通过行迭代绑定:

dfList <- lapply(1:nrow(final1), function(i){  
       data.frame(
       id = final1$`_id`[[i]], 
       travelDate = final1$uiSearchRequest.travelDate[[i]], 
       travelDuration = final1$uiSearchRequest.travelDuration[[i]], 
       shopperDuration = final1$uiSearchRequest.shopperDuration[[i]], 
       oneway = final1$uiSearchRequest.oneWay[[i]], 
       userId = final1$uiSearchRequest.userId[[i]], 
       queryId = final1$uiSearchRequest.queryId[[i]], 
       final1$uiSearchRequest.subRequests[[i]], 
       downloadCount = final1$downloadCount[[i]], 
       requestDate = final1$requestDate[[i]], 
       totalRecords = final1$totalRecords[[i]], 
       status = final1$status[[i]], 

       stringsAsFactors = FALSE, 
       row.names = NULL 
      )  
     }) 

finaldf <- do.call(rbind_rows, dfList) 
+0

当我运行dfList < - lapply(1:nrow(df),function(i){cbind(id = final1 $ _id [[i]],final1 $ uiSearchRequest.subRequests [[i]])})I即使final1中没有空值,也会得到以下错误:错误1:nrow(df):长度为0的参数 –

+0

Whoops ...'nrow()'应该使用final1而不是df。此外,列* _id *必须包含反引号。下划线本身不被接受为标识符。 – Parfait

+0

工作正常。谢谢。 –