2017-08-08 95 views
1

我想按列排列值。按列排列的值排列值

我有以下数据帧:

dput(test) 
structure(list(Name = c("A", "B", "C", "D"), Margin = c(744, 
3196.4722, 0, 394), T1 = c(420, 200, 2150, 70), T2 = c(630, 285, 
2365, 84), T3 = c(630, 335, 2580, 105), T4 = c(666, 410, 2795, 
128), T5 = c(2244, 2961.7931, 3010, 142), T6 = c(2244, 3652.472, 
3440, 151), T7 = c(2244, 3722.472, 3870, 168), T8 = c(2244, 3887.472, 
5160, 187), T9 = c(2244, 4112.472, 6450, 225), T10 = c(2244, 
4337.472, 6450, 225), T11 = c(798, 3567.472, 4300, 112), T12 = c(630, 
3582.472, 4300, 111), T13 = c(702, 3582.472, 4300, 112), T14 = c(3600, 
4637.472, 3440, 78), T15 = c(744, 3067.306, 2580, 274), T16 = c(744, 
2770.5666, 2580, 197), T17 = c(744, 3138.806, 2580, 80), T18 = c(2244, 
3920.0836, 3870, 401), T19 = c(2244, 2789.1117, 1290, 127)), .Names = c("Name", 
"Margin", "T1", "T2", "T3", "T4", "T5", "T6", "T7", "T8", "T9", 
"T10", "T11", "T12", "T13", "T14", "T15", "T16", "T17", "T18", 
"T19"), row.names = c(NA, -4L), class = c("tbl_df", "tbl", "data.frame" 
)) 

每行具有在名称唯一的ID,并且我想排名的列,以确定哪些列是等于或至少小到在空白处列的值。

理想的输出将是:

Name Margin Closest_Column 
A  744.000  T15 

打破平局可能是随机的。

我尝试:

nm1 <- paste("rank", names(test)[3:21], sep="_") 
test[nm1] <- mutate_all(test[3:21],funs(rank(., ties.method="first"))) 

回答

1

如果我们需要使用tidyverse,一种方法是rowwise,然后找到“保证金”等栏目之间的最小差值的指数来获得列名

test %>% 
     rowwise() %>% 
     do(data.frame(.[1:2], Closest_column = names(.)[3:21][which.min(abs(.[[2]]- 
         unlist(.[3:21])))])) 
# A tibble: 4 x 3 
# Name Margin Closest_column 
#* <chr> <dbl>   <chr> 
#1  A 744.000   T15 
#2  B 3196.472   T17 
#3  C 0.000   T19 
#4  D 394.000   T18 

或者另一种选择是

library(tidyverse) 
gather(test, Closest_column, val, T1:T19) %>% 
     group_by(Name) %>% 
     slice(which.min(abs(Margin - val))) %>% 
     select(-val) 
# A tibble: 4 x 3 
# Groups: Name [4] 
# Name Margin Closest_column 
# <chr> <dbl>   <chr> 
#1  A 744.000   T15 
#2  B 3196.472   T17 
#3  C 0.000   T19 
#4  D 394.000   T18 

随着base R一个有效的办法是max.col

cbind(test[1:2], 
    Closest_column = names(test)[3:21][max.col(-abs(test[3:21]-test[[2]]), 'first')]) 
# Name Margin Closest_column 
#1 A 744.000   T15 
#2 B 3196.472   T17 
#3 C 0.000   T19 
#4 D 394.000   T18 
+0

只是一个快速跟进。如果我想将条件更改为:Val <= Margin,slice(which.min(abs(val <= Margin)))不会给我正确的答案。任何线索哪里我错了? – Prometheus

+0

@Prometheus你可以尝试'收集(测试,最近的列,val,T1:T19)%>%group_by(Name)%>%slice(其中(val <= Margin)[1]) – akrun

2

使用cbind.data.frame的前两列排列,从选为具有最小的列减去保证金的绝对值名称构造柱:

cbind(test[1:2], Closest_Column = 
    apply(test[-1], 1, function(x) names(x[-1])[which.min(abs(x[-1]-x[1]))])) 
    Name Margin Closest_Column 
1 A 744.000   T15 
2 B 3196.472   T17 
3 C 0.000   T19 
4 D 394.000   T18 
3

我会去长格式

library(tidyr) 
library(dplyr) 

test %>% 
    gather(Variable, Value, -(Name:Margin)) %>% 
    group_by(Name, Margin) %>% 
    summarise(Closest = Variable[which.min(abs(Value - Margin))]) 

# A tibble: 4 x 3 
# Groups: Name [?] 
# Name Margin Closest 
# <chr> <dbl> <chr> 
# 1  A 744.000  T15 
# 2  B 3196.472  T17 
# 3  C 0.000  T19 
# 4  D 394.000  T18 

或者使用data.table

library(data.table) 
melt(setDT(test), 1:2 
    )[, .(Closest = variable[which.min(abs(value - Margin))]), 
     by = .(Name, Margin)] 
# Name Margin Closest 
# 1: A 744.000  T15 
# 2: B 3196.472  T17 
# 3: C 0.000  T19 
# 4: D 394.000  T18 
+0

这实际上非常直观。我应该想到它。 – Prometheus