2017-09-15 95 views
2

我有一些数据我想用R中的一些汇总值进行适当格式化。我已经使用了aggregate和其他东西,如summaryBy,但没有一个产生了什么我想。在r中为每个变量逐个汇总数据

这里的数据

data <- data.frame(id = c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48), 
        x1 = c(0.2846,0.3741,0.4208,0.3756,0.3476,0.3664,0.2852,0.3537,0.3116,0.3124,0.364,0.3934,0.3456,0.3034,0.3139,0.2766,0.3034,0.3159,0.3648,0.4046,0.3961,0.3451,0.2059,0.3184,0.2481,0.3503,0.331,0.3166,0.3203,0.1868,0.245,0.1625,0.2227,0.196,0.1697,0.2064,0.1369,0.1938,0.1498,0.1315,0.1523,0.2151,0.168,0.1427,0.3083,0.301,0.2328,0.2747), 
        x2 = c(-0.4364,-0.5262,-0.5338,-0.5037,-0.4758,-0.5003,-0.4359,-0.5002,-0.4027,-0.424,-0.4811,-0.5492,-0.3846,-0.3899,-0.4473,-0.3688,-0.3946,-0.4112,-0.4833,-0.4909,-0.4865,-0.368,0.295,-0.3221,-0.2482,-0.5424,-0.5021,-0.4453,-0.3952,0.3915,0.4472,0.364,0.436,0.3877,0.4077,0.2737,0.3104,0.3514,0.3256,0.287,0.3126,0.3648,-0.2596,-0.1913,-0.3656,-0.4598,-0.3198,-0.3685), 
        x3 = c(0.6043,0.5141,0.4638,0.486,0.3691,0.4104,0.426,0.3846,0.3191,0.4347,0.5842,0.4638,0.4418,0.523,0.5009,0.4568,0.5105,0.5421,0.4857,0.4063,0.391,0.4114,0.5189,0.5248,0.4942,0.2855,0.6107,0.4712,0.2009,0.4632,0.4457,0.3914,0.4547,0.4801,0.4873,0.5501,0.4442,0.4458,0.4651,0.5748,0.5231,0.4869,0.1769,0.099,0.5013,0.4543,0.4601,0.4396), 
        x4 = c(0.4895,0.6991,0.6566,0.6106,0.6976,0.6883,0.6533,0.6951,0.6852,0.5062,0.5682,0.6172,0.5073,0.6514,0.577,0.5228,0.6571,0.6132,0.4893,0.7904,0.6519,0.6582,0.6919,0.6011,0.6145,0.5943,0.4608,0.5997,0.4431,0.4082,0.5641,0.4535,0.5448,0.4632,0.4237,0.6187,0.4115,0.4995,0.4504,0.4103,0.4511,0.527,0.3654,0.2537,0.6317,0.478,0.5915,0.5283), 
        trt = c("A","A","A","A","A","A","A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B","B","B","B","B","B","B","B","C","C","C","C","C","C","C","C","C","C","C","C","C","D","D","D","D","D","D") 
        ) 

而且我想数据通过以下方式来概括。

  A    |   B   |   C   |   D 
-------------------+------------+----------+-----------+-----------+------------+-----------+------------- 
|  Mean  | Std.Dev | Mean  | Std.Dev | Mean  | Std.Dev | Mean  | Std.Dev | 
-----+-------------+------------+----------+-----------+-----------+------------+-----------+------------- 
| X1 | 0.3456  | 0.04104 |0.3207333 | 0.0514311 | 0.1821923 | 0.0350107 | 0.2379167 | 0.06966645 | 
-----+-------------+------------+----------+-----------+-----------+------------+-----------+------------- 
| X2 | -0.4674143 | 0.05489628 |-0.37406 | 0.2003379 | 0.3584308 | 0.05489583 | -0.3274333| 0.0936547 | 
-----+-------------+------------+----------+-----------+-----------+------------+-----------+------------- 
| X3 | 0.4589214 | 0.07952784 |0.45406 | 0.1036369 | 0.4778769 | 0.04866813 | 0.3552 | 0.1713025 | 
-----+-------------+------------+----------+-----------+-----------+------------+-----------+------------- 
| X4 | 0.6232571 | 0.0762495 |0.5976867 | 0.0914621 | 0.4789231 | 0.06686731 | 0.4747667 | 0.1428023 | 
-------------------+------------+----------+-----------+-----------+------------+-----------+------------- 

其中之一,我试图用总做方式如下:

library(dplyr) 
t(data[,2:5] %>% group_by(data$trt) %>% summarise_each(funs(mean, sd))) 

,但这种格式制作:

  [,1]   [,2]   [,3]   [,4]   
data$trt "A"   "B"   "C"   "D"   
x1_mean "0.3456000" "0.3207333" "0.1821923" "0.2379167" 
x2_mean "-0.4674143" "-0.3740600" " 0.3584308" "-0.3274333" 
x3_mean "0.4589214" "0.4540600" "0.4778769" "0.3552000" 
x4_mean "0.6232571" "0.5976867" "0.4789231" "0.4747667" 
x1_sd "0.04104517" "0.05143110" "0.03501070" "0.06966645" 
x2_sd "0.05489628" "0.20033792" "0.05489583" "0.09365470" 
x3_sd "0.07952784" "0.10363689" "0.04866813" "0.17130249" 
x4_sd "0.07624950" "0.09146218" "0.06686731" "0.14280235" 

是否可以做我想做的在R?

回答

2

下面是做这件事:

data %>% 
    select(-id) %>% 
    gather(row, val, -trt) %>% 
    group_by(trt, row) %>% 
    summarise_all(funs(Mean=mean, `Std.Dev`=sd)) %>% 
    gather(col, val, Mean, `Std.Dev`) %>% 
    unite("col", trt, col) %>% 
    spread(col, val) 
# # A tibble: 4 x 9 
# row A_Mean A_Std.Dev B_Mean B_Std.Dev C_Mean C_Std.Dev D_Mean D_Std.Dev 
# * <chr> <dbl>  <dbl> <dbl>  <dbl> <dbl>  <dbl> <dbl>  <dbl> 
# 1 x1  0.346 0.0410 0.321 0.0514 0.182 0.0350 0.238 0.0697 
# 2 x2 -0.467 0.0549 -0.374 0.200 0.358 0.0549 -0.327 0.0937 
# 3 x3  0.459 0.0795 0.454 0.104 0.478 0.0487 0.355 0.171 
# 4 x4  0.623 0.0762 0.598 0.0915 0.479 0.0669 0.475 0.143 

您可加入%>% tibble::column_to_rownames("row")把第一列进行的名称,然而,这是不推荐使用。

+0

我收到以下错误做到这一点:(。,-id)'的错误选择:未使用的参数(-id)'。任何想法为什么?我没有改变你的代码中的任何东西。 – Kuni

+0

@Kuni请参阅https://www.google.com/search?q=dplyr+"Error+in+select"+"unused+argument“。可能与另一个包冲突()。尝试'dplyr :: select'而不是'select'。 – lukeA

+0

我清除了'history'和'environment',但仍然显示错误。不知道为什么。但是,我删除了'select'功能,但是在'data'中删除了它。有效。这是我正在寻找的。可能不是我所需要的,但只要我有这个,我总是可以在某些文字处理软件中将它变成我想要的。谢谢@lukeA。这非常有帮助。 – Kuni

1

这是一种利用基础R和aggregate

apply(data[,2:5], 2, function(x) aggregate(x, by=list(data$trt), FUN=summary)) 
$x1 
    Group.1 x.Min. x.1st Qu. x.Median x.Mean x.3rd Qu. x.Max. 
1  A 0.2846 0.3118 0.3506 0.3456 0.3722 0.4208 
2  B 0.2059 0.3086 0.3184 0.3207 0.3477 0.4046 
3  C 0.1315 0.1523 0.1868 0.1822 0.2064 0.2450 
4  D 0.1427 0.1842 0.2538 0.2379 0.2944 0.3083 

$x2 
    Group.1 x.Min. x.1st Qu. x.Median x.Mean x.3rd Qu. x.Max. 
1  A -0.5492 -0.5028 -0.4784 -0.4674 -0.4270 -0.3846 
2  B -0.5424 -0.4849 -0.4112 -0.3741 -0.3684 0.2950 
3  C 0.2737 0.3126 0.3640 0.3584 0.3915 0.4472 
4  D -0.4598 -0.3678 -0.3427 -0.3274 -0.2746 -0.1913 

$x3 
    Group.1 x.Min. x.1st Qu. x.Median x.Mean x.3rd Qu. x.Max. 
1  A 0.3191 0.4143 0.4528 0.4589 0.5071 0.6043 
2  B 0.2009 0.4088 0.4857 0.4541 0.5147 0.6107 
3  C 0.3914 0.4458 0.4651 0.4779 0.4873 0.5748 
4  D 0.0990 0.2426 0.4470 0.3552 0.4586 0.5013 

$x4 
    Group.1 x.Min. x.1st Qu. x.Median x.Mean x.3rd Qu. x.Max. 
1  A 0.4895 0.5788 0.6524 0.6233 0.6875 0.6991 
2  B 0.4431 0.5499 0.6011 0.5977 0.6545 0.7904 
3  C 0.4082 0.4237 0.4535 0.4789 0.5270 0.6187 
4  D 0.2537 0.3936 0.5032 0.4748 0.5757 0.6317