2017-09-24 131 views
1

如何在使用扩散函数时保持数字的等级时展开?如何在保持列名顺序的同时扩展tidyr :: spread()?

library(tidyverse) 

data.frame(time = paste0("t_", 1:100)) %>% 
    rowwise() %>% 
    mutate(rnd = sample(1:100, size=1)) %>% 
    spread(time, rnd) 

上面显示的代码的执行结果的列名是t_1, t_11, t_100, ....。 我想按照数字顺序获取列名(t_1, t_2, t_3, ...)。

+3

您可以在末尾添加'%>%。[gtools :: mixedorder(names(。))]''。此外,在这里不需要'rowwise()',只需改变'mutate(rnd = sample(100))'('sample'是矢量化的)。 –

+0

尝试'library(tidyverse) data.frame(time = paste0(“t_”,1:100))%>% rowwise()%>% mutate(rnd = sample(1:100,size = 1 ))%>% mutate(time = factor(time,levels = paste0(“t_”,1:100)))%>% spread(time,rnd) –

回答

2

您可以尝试两件事情:

(1)让 “时间” 并用匹配你想要的顺序水平的因素:

data.frame(time = factor(paste0("t_", 1:100), levels = paste0("t_", 1:100))) %>% 
    rowwise() %>% 
    mutate(rnd = sample(1:100, size=1)) %>% 
    spread(time, rnd) 

(2)使用SELECT语句强制命令:

data.frame(time = paste0("t_", 1:100)) %>% 
    rowwise() %>% 
    mutate(rnd = sample(1:100, size=1)) %>% 
    spread(time, rnd) %>% 
    select(paste0("t_", 1:100)) 
0

这是一个保留列顺序的新函数。只需要一个小小的更改(请参阅注释):

my_spread <- function (data, key, value, fill = NA, convert = FALSE, drop = TRUE, 
      sep = NULL) { 
    key_col <- tidyr:::col_name(substitute(key)) 
    value_col <- tidyr:::col_name(substitute(value)) 
    tbl_df(my_spread_(data, key_col, value_col, fill = fill, convert = convert, 
        drop = drop, sep = sep)) 
} 

my_spread_ <- function (data, key_col, value_col, fill = NA, convert = FALSE, 
         drop = TRUE, sep = NULL) { 
    col <- data[key_col] 
    #col_id <- tidyr:::id(col, drop = drop)         # Old line 
    col_id <- seq_len(nrow(data))            # New line 1 
    attr(col_id, 'n') <- nrow(data)           # New line 2 
    col_labels <- tidyr:::split_labels(col, col_id, drop = drop) 
    rows <- data[setdiff(names(data), c(key_col, value_col))] 
    if (length(rows) == 0) { 
    row_id <- structure(1L, n = 1L) 
    row_labels <- as.data.frame(matrix(nrow = 1, ncol = 0)) 
    } 
    else { 
    row_id <- id(rows, drop = drop) 
    row_labels <- tidyr:::split_labels(rows, row_id, drop = drop) 
    rownames(row_labels) <- NULL 
    } 
    overall <- tidyr:::id(list(col_id, row_id), drop = FALSE) 
    n <- attr(overall, "n") 
    if (anyDuplicated(overall)) { 
    groups <- split(seq_along(overall), overall) 
    groups <- groups[vapply(groups, length, integer(1)) > 
         1] 
    str <- vapply(
     groups, 
     function(x) paste0("(", paste0(x, collapse = ", "), ")"), character(1) 
    ) 
    stop("Duplicate identifiers for rows ", paste(str, collapse = ", "), 
     call. = FALSE) 
    } 
    if (length(overall) < n) { 
    overall <- match(seq_len(n), overall, nomatch = NA) 
    } 
    else { 
    overall <- order(overall) 
    } 
    value <- data[[value_col]] 
    ordered <- value[overall] 
    if (!is.na(fill)) { 
    ordered[is.na(ordered)] <- fill 
    } 
    if (convert && !is.character(ordered)) { 
    ordered <- as.character(ordered) 
    } 
    dim(ordered) <- c(attr(row_id, "n"), attr(col_id, "n")) 
    colnames(ordered) <- enc2utf8(tidyr:::col_names(col_labels, sep = sep)) 
    ordered <- tidyr:::as_data_frame_matrix(ordered) 
    if (convert) { 
    ordered[] <- lapply(ordered, type.convert, as.is = TRUE) 
    } 
    tidyr:::append_df(row_labels, ordered) 
}