2017-05-09 82 views
2

我有以下的数据帧:如何在tibble /加列DF使用RNORM和参数从另一列的值取

library(tidyverse) 
df <- structure(list(var = c("X", "X", "X", "X", "X", "X", "X", "X", 
"X", "X", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Z", 
"Z", "Z", "Z", "Z", "Z", "Z", "Z", "Z", "Z"), mod_est_mean = c(0.649790620181318, 
0.649790620181318, 0.649790620181318, 0.649790620181318, 0.649790620181318, 
0.649790620181318, 0.649790620181318, 0.649790620181318, 0.649790620181318, 
0.649790620181318, 1.65651567796795, 1.65651567796795, 1.65651567796795, 
1.65651567796795, 1.65651567796795, 1.65651567796795, 1.65651567796795, 
1.65651567796795, 1.65651567796795, 1.65651567796795, 2.78544973796179, 
2.78544973796179, 2.7854497379617, 2.78544973796179, 2.78544973796179, 
2.78544973796179, 2.78544973796179, 2.78544973796179, 2.78544973796179, 
2.78544973796179), mod_est_sd = c(0.37898907459421, 0.37898907459421, 
0.37898907459421, 0.37898907459421, 0.37898907459421, 0.37898907459421, 
0.37898907459421, 0.37898907459421, 0.37898907459421, 0.37898907459421, 
1.27340261798159, 1.27340261798159, 1.27340261798159, 1.27340261798159, 
1.27340261798159, 1.27340261798159, 1.27340261798159, 1.27340261798159, 
1.27340261798159, 1.27340261798159, 2.38265470031565, 2.38265470031565, 
2.38265470031565, 2.38265470031565, 2.38265470031565, 2.38265470031565, 
2.38265470031565, 2.38265470031565, 2.38265470031565, 2.38265470031565 
), mod_std_mean = c(0.119846868403716, 0.119846868403716, 0.119846868403716, 
0.119846868403716, 0.119846868403716, 0.119846868403716, 0.119846868403716, 
0.119846868403716, 0.119846868403716, 0.119846868403716, 0.40268526512431, 
0.40268526512431, 0.40268526512431, 0.40268526512431, 0.40268526512431, 
0.40268526512431, 0.40268526512431, 0.40268526512431, 0.40268526512431, 
0.40268526512431, 0.753461573070337, 0.753461573070337, 0.753461573070337, 
0.753461573070337, 0.753461573070337, 0.753461573070337, 0.753461573070337, 
0.753461573070337, 0.753461573070337, 0.753461573070337), mod_std_error = c(0.0847445333522391, 
0.0847445333522391, 0.0847445333522391, 0.0847445333522391, 0.0847445333522391, 
0.0847445333522391, 0.0847445333522391, 0.0847445333522391, 0.0847445333522391, 
0.0847445333522391, 0.284741481653302, 0.284741481653302, 0.284741481653302, 
0.284741481653302, 0.284741481653302, 0.284741481653302, 0.284741481653302, 
0.284741481653302, 0.284741481653302, 0.284741481653302, 0.532777787681519, 
0.532777787681519, 0.532777787681519, 0.532777787681519, 0.532777787681519, 
0.532777787681519, 0.532777787681519, 0.532777787681519, 0.532777787681519, 
0.532777787681519), vec = c(0.626453810742332, 0.183643324222082, 
0.835628612410047, 1.59528080213779, 0.329507771815361, 0.820468384118015, 
0.487429052428485, 0.738324705129217, 0.575781351653492, 0.305388387156356, 
3.0235623369017, 0.779686472822862, 1.24248116108361, 4.429399774355, 
2.24986183628622, 0.0898672180304617, 0.0323805261978922, 1.8876724213706, 
1.64244239019618, 1.18780264243502, 3.67590948643287, 3.12854520292427, 
0.298259933460762, 7.95740678345349, 2.47930299157884, 0.224514958116003, 
0.623182026821317, 5.8830095355971, 1.91260022043448, 1.67176624079881 
), dist_name = c("normal", "normal", "normal", "normal", "normal", 
"normal", "normal", "normal", "normal", "normal", "normal", "normal", 
"normal", "normal", "normal", "normal", "normal", "normal", "normal", 
"normal", "normal", "normal", "normal", "normal", "normal", "normal", 
"normal", "normal", "normal", "normal")), .Names = c("var", "mod_est_mean", 
"mod_est_sd", "mod_std_mean", "mod_std_error", "vec", "dist_name" 
), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-30L)) 

它看起来像这样:

df %>% print(df,n=100) 
# A tibble: 30 × 7 
    var mod_est_mean mod_est_sd mod_std_mean mod_std_error  vec dist_name 
    <chr>  <dbl>  <dbl>  <dbl>   <dbl>  <dbl>  <chr> 
1  X 0.6497906 0.3789891 0.1198469 0.08474453 0.62645381 normal 
2  X 0.6497906 0.3789891 0.1198469 0.08474453 0.18364332 normal 
3  X 0.6497906 0.3789891 0.1198469 0.08474453 0.83562861 normal 
4  X 0.6497906 0.3789891 0.1198469 0.08474453 1.59528080 normal 
5  X 0.6497906 0.3789891 0.1198469 0.08474453 0.32950777 normal 
6  X 0.6497906 0.3789891 0.1198469 0.08474453 0.82046838 normal 
7  X 0.6497906 0.3789891 0.1198469 0.08474453 0.48742905 normal 
8  X 0.6497906 0.3789891 0.1198469 0.08474453 0.73832471 normal 
9  X 0.6497906 0.3789891 0.1198469 0.08474453 0.57578135 normal 
10  X 0.6497906 0.3789891 0.1198469 0.08474453 0.30538839 normal 
11  Y 1.6565157 1.2734026 0.4026853 0.28474148 3.02356234 normal 
12  Y 1.6565157 1.2734026 0.4026853 0.28474148 0.77968647 normal 
13  Y 1.6565157 1.2734026 0.4026853 0.28474148 1.24248116 normal 
14  Y 1.6565157 1.2734026 0.4026853 0.28474148 4.42939977 normal 
15  Y 1.6565157 1.2734026 0.4026853 0.28474148 2.24986184 normal 
16  Y 1.6565157 1.2734026 0.4026853 0.28474148 0.08986722 normal 
17  Y 1.6565157 1.2734026 0.4026853 0.28474148 0.03238053 normal 
18  Y 1.6565157 1.2734026 0.4026853 0.28474148 1.88767242 normal 
19  Y 1.6565157 1.2734026 0.4026853 0.28474148 1.64244239 normal 
20  Y 1.6565157 1.2734026 0.4026853 0.28474148 1.18780264 normal 
21  Z 2.7854497 2.3826547 0.7534616 0.53277779 3.67590949 normal 
22  Z 2.7854497 2.3826547 0.7534616 0.53277779 3.12854520 normal 
23  Z 2.7854497 2.3826547 0.7534616 0.53277779 0.29825993 normal 
24  Z 2.7854497 2.3826547 0.7534616 0.53277779 7.95740678 normal 
25  Z 2.7854497 2.3826547 0.7534616 0.53277779 2.47930299 normal 
26  Z 2.7854497 2.3826547 0.7534616 0.53277779 0.22451496 normal 
27  Z 2.7854497 2.3826547 0.7534616 0.53277779 0.62318203 normal 
28  Z 2.7854497 2.3826547 0.7534616 0.53277779 5.88300954 normal 
29  Z 2.7854497 2.3826547 0.7534616 0.53277779 1.91260022 normal 
30  Z 2.7854497 2.3826547 0.7534616 0.53277779 1.67176624 normal 

我想要做的是添加一个新的列new_vec估计从mod_est_mean, mod_est_sdvar

例如,在X wget的这个RNORM:

> set.seed(1) 
> rnorm(dim(df %>% filter(var=="X") %>% select(vec))[1], mean= 0.6497906, sd= 0.3789891) 
[1] 0.4123714 0.7193894 0.3330965 1.2543846 0.7746705 0.3388420 0.8345209 0.9296076 0.8680055 0.5340517 

因此,我们有载体,看起来像这样的结尾:

 var mod_est_mean mod_est_sd mod_std_mean mod_std_error  vec dist_name new_vec 
1  X 0.6497906 0.3789891 0.1198469 0.08474453 0.62645381 normal 0.4123714 
2  X 0.6497906 0.3789891 0.1198469 0.08474453 0.18364332 normal 0.7193894 
3  X 0.6497906 0.3789891 0.1198469 0.08474453 0.83562861 normal 0.3330965 
4  X 0.6497906 0.3789891 0.1198469 0.08474453 1.59528080 normal 1.2543846 
5  X 0.6497906 0.3789891 0.1198469 0.08474453 0.32950777 normal 0.7746705 
6  X 0.6497906 0.3789891 0.1198469 0.08474453 0.82046838 normal 0.3388420 
7  X 0.6497906 0.3789891 0.1198469 0.08474453 0.48742905 normal 0.8345209 
8  X 0.6497906 0.3789891 0.1198469 0.08474453 0.73832471 normal 0.9296076 
9  X 0.6497906 0.3789891 0.1198469 0.08474453 0.57578135 normal 0.8680055 
10  X 0.6497906 0.3789891 0.1198469 0.08474453 0.30538839 normal 0.5340517 

我们怎样才能做到这一点的X,Y,Z ?

最好使用tidyverse。

+1

一个选项的first元件:'库(tidyverse) ; df%>%mutate(new_vec = map2_dbl(mod_est_mean,mod_est_sd,rnorm,n = 1))' – alistaire

回答

1

按'var'分组后,通过将'n'指定为n()即得到rnorm即。行的组中的号,mean为“mod_est_mean”的first元件(因为它是每个基团是相同的)和sd为“mod_est_sd”

set.seed(1) 
df %>% 
    group_by(var) %>% 
    mutate(new_vec = rnorm(n(), mean = first(mod_est_mean),sd = first(mod_est_sd))) 
# var mod_est_mean mod_est_sd mod_std_mean mod_std_error  vec dist_name new_vec 
# <chr>  <dbl>  <dbl>  <dbl>   <dbl>  <dbl>  <chr>  <dbl> 
#1  X 0.6497906 0.3789891 0.1198469 0.08474453 0.6264538 normal 0.4123715 
#2  X 0.6497906 0.3789891 0.1198469 0.08474453 0.1836433 normal 0.7193894 
#3  X 0.6497906 0.3789891 0.1198469 0.08474453 0.8356286 normal 0.3330965 
#4  X 0.6497906 0.3789891 0.1198469 0.08474453 1.5952808 normal 1.2543846 
#5  X 0.6497906 0.3789891 0.1198469 0.08474453 0.3295078 normal 0.7746705 
#6  X 0.6497906 0.3789891 0.1198469 0.08474453 0.8204684 normal 0.3388421 
#7  X 0.6497906 0.3789891 0.1198469 0.08474453 0.4874291 normal 0.8345209 
#8  X 0.6497906 0.3789891 0.1198469 0.08474453 0.7383247 normal 0.9296076 
#9  X 0.6497906 0.3789891 0.1198469 0.08474453 0.5757814 normal 0.8680055 
#10  X 0.6497906 0.3789891 0.1198469 0.08474453 0.3053884 normal 0.5340518 
# ... with 20 more rows 
相关问题