2017-04-11 46 views
0

我正在尝试使用包含在键控JSON名称中的信息将上下文添加到包含在其嵌套矩阵中的数据。矩阵有不同数量的行,并且缺少一些矩阵(列表元素为NULL)。我能够使用purrr包中的map和at_depth从层次结构中提取相关数据并保留信息作为列表名称,但是我无法找到一种干净的方式将其转换为单个数据框。矩阵列表(从JSON)列表到单个数据框架 - 呜呜声有不同行号的问题?

我试图purrr使用:::转例证here,我已经使用tidyr ::: UNNEST如图所示here试过,但我认为他们想要的结果,并输入不同足够从我自己是不适用。不同的行名和/或缺失的矩阵似乎存在太多问题。我也是新来的咕噜包,所以可能有一些简单的东西,我错过了这里。

这是我自己的尝试,它产生了几乎所需的结果,我想我可以修改它多一点,以删除for循环,并有另一层'应用'功能,但我怀疑有更好的方法来解决这个问题。

最小的可重复的例子

#Download data 
json <- getURL("http://maps2.dnr.state.mn.us/cgi-bin/lakefinder/detail.cgi?type=lake_survey&id=69070100") 
#Surveys are the relevant data 
data.listed <- fromJSON(json, simplifyDataFrame=F) 
surveys <- data.listed$result$surveys 

#Get list of lists of matrices - fish size count data 
fcounts <- map(surveys, "lengths") %>% 
    at_depth(2, "fishCount") %>% 
    at_depth(2, data.frame) # side note: is this a good way to the inner matrices to data.frames? 
#top-level - list - surveys 
    #2nd-level - list - species in each survey 
     #3rd-level - data.frame - X1: measured_size, X2: counts 
#use survey IDs as names for top level list 
#just as species are used as names for 2nd level lists 
names(fcounts) <- sapply(surveys, function(s) {return(s$surveyID)}) 

#This produces nearly the correct result 

for (i in 1:length(fcounts)){ 
    surv.id <- names(fcounts)[[i]] 
    if (length(fcounts[[i]]) > 0) { 
    listed.withSpecies <- lapply(names(fcounts[[i]]), function(species) cbind(fcounts[[i]][[species]], species)) 
    surv.fishCounts <- do.call(rbind, listed.withSpecies) 
    colnames(surv.fishCounts) <- c("size", "count", "species") 
    surv.fishCounts$survey.ID <- surv.id 
    print(surv.fishCounts) 
    } 
} 

回答

2

这是获得长度的嵌套数据帧的一种方式计算成一个大的数据帧:

library(httr) 
library(tidyverse) 

res <- GET("http://maps2.dnr.state.mn.us/cgi-bin/lakefinder/detail.cgi", 
      query = list(type="lake_survey", id="69070100")) 

content(res, as="text") %>% 
    jsonlite::fromJSON(simplifyDataFrame = FALSE, flatten=FALSE) -> x 

x$result$surveys %>% 
    map_df(~{ 
    tmp_df <- flatten_df(.x[c("surveyDate", "surveyID", "surveyType", "surveySubType")]) 
    lens <- .x$lengths 
    if (length(lens) > 0) { 
     fish <- names(lens) 
     data_frame(fish, 
       max_length = map_dbl(lens, "maximum_length"), 
       min_length = map_dbl(lens, "minimum_length"), 
       lens = map(lens, "fishCount") %>% 
        map(~set_names(as_data_frame(.), c("catch_len", "ct")))) %>% 
     mutate(surveyDate = tmp_df$surveyDate, 
       surveyType = tmp_df$surveyType, 
       surveySubType = tmp_df$surveySubType, 
       surveyID = tmp_df$surveyID) -> tmp_df 
    } 
    tmp_df 
    }) -> lengths_df 

glimpse(lengths_df) 
## Observations: 21 
## Variables: 8 
## $ surveyDate <chr> "1988-07-19", "1995-07-17", "1995-07-17", "1995-07-17", "1995-07-17", "1995-07-17", "1995-07-... 
## $ surveyID  <chr> "107278", "107539", "107539", "107539", "107539", "107539", "107539", "107539", "107539", "10... 
## $ surveyType <chr> "Standard Survey", "Standard Survey", "Standard Survey", "Standard Survey", "Standard Survey"... 
## $ surveySubType <chr> "Population Assessment", "Re-Survey", "Re-Survey", "Re-Survey", "Re-Survey", "Re-Survey", "Re... 
## $ fish   <chr> NA, "PMK", "BLB", "LMB", "YEP", "BLG", "WTS", "WAE", "NOP", "GSF", "BLC", NA, "HSF", "PMK", "... 
## $ max_length <dbl> NA, 6, 12, 16, 6, 7, 18, 18, 36, 4, 10, NA, 8, 7, 12, 12, 6, 8, 23, 38, 12 
## $ min_length <dbl> NA, 3, 10, 1, 3, 3, 16, 16, 6, 4, 4, NA, 7, 4, 10, 12, 5, 3, 12, 9, 7 
## $ lens   <list> [NULL, <c("3", "6"), c("1", "3")>, <c("10", "11", "12"), c("1", "1", "4")>, <c("1", "16", "2... 

print(lengths_df, n=nrow(lengths_df)) 
## # A tibble: 21 × 8 
## surveyDate surveyID  surveyType   surveySubType fish max_length min_length    lens 
##   <chr> <chr>   <chr>     <chr> <chr>  <dbl>  <dbl>   <list> 
## 1 1988-07-19 107278 Standard Survey Population Assessment <NA>   NA   NA   <NULL> 
## 2 1995-07-17 107539 Standard Survey    Re-Survey PMK   6   3 <tibble [2 × 2]> 
## 3 1995-07-17 107539 Standard Survey    Re-Survey BLB   12   10 <tibble [3 × 2]> 
## 4 1995-07-17 107539 Standard Survey    Re-Survey LMB   16   1 <tibble [6 × 2]> 
## 5 1995-07-17 107539 Standard Survey    Re-Survey YEP   6   3 <tibble [3 × 2]> 
## 6 1995-07-17 107539 Standard Survey    Re-Survey BLG   7   3 <tibble [5 × 2]> 
## 7 1995-07-17 107539 Standard Survey    Re-Survey WTS   18   16 <tibble [3 × 2]> 
## 8 1995-07-17 107539 Standard Survey    Re-Survey WAE   18   16 <tibble [2 × 2]> 
## 9 1995-07-17 107539 Standard Survey    Re-Survey NOP   36   6 <tibble [17 × 2]> 
## 10 1995-07-17 107539 Standard Survey    Re-Survey GSF   4   4 <tibble [1 × 2]> 
## 11 1995-07-17 107539 Standard Survey    Re-Survey BLC   10   4 <tibble [6 × 2]> 
## 12 1992-07-24 107587 Standard Survey    Re-Survey <NA>   NA   NA   <NULL> 
## 13 2005-07-11 107906 Standard Survey Population Assessment HSF   8   7 <tibble [2 × 2]> 
## 14 2005-07-11 107906 Standard Survey Population Assessment PMK   7   4 <tibble [4 × 2]> 
## 15 2005-07-11 107906 Standard Survey Population Assessment BLB   12   10 <tibble [3 × 2]> 
## 16 2005-07-11 107906 Standard Survey Population Assessment LMB   12   12 <tibble [1 × 2]> 
## 17 2005-07-11 107906 Standard Survey Population Assessment YEP   6   5 <tibble [2 × 2]> 
## 18 2005-07-11 107906 Standard Survey Population Assessment BLG   8   3 <tibble [6 × 2]> 
## 19 2005-07-11 107906 Standard Survey Population Assessment WAE   23   12 <tibble [8 × 2]> 
## 20 2005-07-11 107906 Standard Survey Population Assessment NOP   38   9 <tibble [20 × 2]> 
## 21 2005-07-11 107906 Standard Survey Population Assessment BLC   12   7 <tibble [4 × 2]> 

您可以展开嵌套的捕获意见是这样的:

filter(lengths_df, !map_lgl(lens, is.null)) %>% 
    unnest(lens) 
## # A tibble: 98 × 9 
## surveyDate surveyID  surveyType surveySubType fish max_length min_length catch_len ct 
##   <chr> <chr>   <chr>   <chr> <chr>  <dbl>  <dbl>  <int> <int> 
## 1 1995-07-17 107539 Standard Survey  Re-Survey PMK   6   3   3  1 
## 2 1995-07-17 107539 Standard Survey  Re-Survey PMK   6   3   6  3 
## 3 1995-07-17 107539 Standard Survey  Re-Survey BLB   12   10  10  1 
## 4 1995-07-17 107539 Standard Survey  Re-Survey BLB   12   10  11  1 
## 5 1995-07-17 107539 Standard Survey  Re-Survey BLB   12   10  12  4 
## 6 1995-07-17 107539 Standard Survey  Re-Survey LMB   16   1   1  1 
## 7 1995-07-17 107539 Standard Survey  Re-Survey LMB   16   1  16  1 
## 8 1995-07-17 107539 Standard Survey  Re-Survey LMB   16   1   2  6 
## 9 1995-07-17 107539 Standard Survey  Re-Survey LMB   16   1   4  4 
## 10 1995-07-17 107539 Standard Survey  Re-Survey LMB   16   1   5  2 
## # ... with 88 more rows