2017-09-04 48 views
1

我想将4个变量添加到我的长格式数据集并通过它进行筛选。 基本上我会以5年(2016-2020)开始。 2016年有更多的行相对于其他年份,所以我想总结2016年到2020年作为新的列 - 变量复制到2016年每行的值。在Excel中,我会通过SUMIFS做到这一点。我有点在dplyr管理,但我得到0。数据管理:如何添加条件变量

newdt <- dt %>% 
    group_by(time, country, age5, sex1, geo) %>% 
    summarise(T.age.2017 = sum(value[time==2017]), 
     T.age.2018 = sum(value[time==2018]), 
     T.age.2019 = sum(value[time==2019]), 
     T.age.2020 = sum(value[time==2020])) %>% 
    ungroup() %>% 
    filter(time==2016) 

这是我想要得到的,只有作为总结列过滤时间== 2016和保持其他的岁月又有什么:

time country geo age5 sex1 value T.age.2017 T.age.2018 
2016 AT  AT11 0 1  6137 420814  427950 
2016 AT  AT11 5 1  6582 411300  416616 
2016 AT  AT11 10 1  6922 419810  418522 
2016 AT  AT11 15 1  7461 444286  439986 
2016 AT  AT11 0 2  5839 420814  427950 
2016 AT  AT11 5 2  6354 411300  416616 
2016 AT  AT11 10 2  6552 419810  418522 
2016 AT  AT11 15 2  6769 444286  439986 
2016 AT  AT12 0 1  39017 420814  427950 

dput:

dt = structure(list(time = c(2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 
2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2017L, 
2017L, 2017L, 2017L, 2018L, 2018L, 2018L, 2018L, 2019L, 2019L, 
2019L, 2019L, 2020L, 2020L, 2020L, 2020L), country = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "AT", class = "factor"), 
geo = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 
11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("AT", 
"AT1", "AT11", "AT12", "AT13", "AT2", "AT21", "AT22", "AT3", 
"AT31", "AT32", "AT33", "AT34"), class = "factor"), age5 = c(0L, 
5L, 10L, 15L, 0L, 5L, 10L, 15L, 0L, 5L, 10L, 15L, 0L, 5L, 
10L, 15L, 0L, 5L, 10L, 15L, 0L, 5L, 10L, 15L, 0L, 5L, 10L, 
15L, 0L, 5L, 10L, 15L, 0L, 5L, 10L, 15L, 0L, 5L, 10L, 15L, 
0L, 5L, 10L, 15L, 0L, 5L, 10L, 15L, 0L, 5L, 10L, 15L, 0L, 
5L, 10L, 15L, 0L, 5L, 10L, 15L, 0L, 5L, 10L, 15L, 0L, 5L, 
10L, 15L, 0L, 5L, 10L, 15L, 0L, 5L, 10L, 15L, 0L, 5L, 10L, 
15L, 0L, 5L, 10L, 15L, 0L, 5L, 10L, 15L, 0L, 5L, 10L, 15L, 
0L, 5L, 10L, 15L, 0L, 5L, 10L, 15L, 0L, 5L, 10L, 15L, 0L, 
5L, 10L, 15L, 0L, 5L, 10L, 15L, 0L, 5L, 10L, 15L, 0L, 5L, 
10L, 15L), sex1 = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA), value = c(214538L, 210372L, 215218L, 
239274L, 200991L, 200273L, 203787L, 221272L, 94210L, 91724L, 
92439L, 100055L, 88171L, 87172L, 86792L, 93008L, 6137L, 6582L, 
6922L, 7461L, 5839L, 6354L, 6552L, 6769L, 39017L, 40381L, 
43136L, 47241L, 36855L, 38487L, 40292L, 42981L, 49056L, 44761L, 
42381L, 45353L, 45477L, 42331L, 39948L, 43258L, 40212L, 40464L, 
41996L, 47804L, 37323L, 38646L, 39861L, 44218L, 12266L, 12928L, 
13451L, 15108L, 11554L, 12204L, 12827L, 13938L, 27946L, 27536L, 
28545L, 32696L, 25769L, 26442L, 27034L, 30280L, 80116L, 78184L, 
80783L, 91415L, 75497L, 74455L, 77134L, 84046L, 37488L, 36674L, 
37456L, 42495L, 35219L, 34859L, 35959L, 38955L, 13774L, 13185L, 
14012L, 16206L, 12907L, 12629L, 13265L, 14534L, 18414L, 17944L, 
18544L, 20879L, 17482L, 17045L, 17609L, 19752L, 10440L, 10381L, 
10771L, 11835L, 9889L, 9922L, 10301L, 10805L, 420814L, 411300L, 
419810L, 444286L, 427950L, 416616L, 418522L, 439986L, 435796L, 
420548L, 420646L, 436501L, 444137L, 425721L, 420044L, 437446L 
)), .Names = c("time", "country", "geo", "age5", "sex1", 
"value"), class = "data.frame", row.names = c(NA, -120L)) 

回答

1

是你在寻找这样的东西吗?

dt %>% 
    group_by(time, country, age5, sex1, geo) %>% 
    summarise(T.age = sum(value)) %>% 
    filter(time==2016) %>% 
    left_join(.,dt %>% 
       group_by(time, country, age5, geo) %>% 
       summarise(T.age = sum(value)) %>% 
       mutate(time2 = 2016) %>% 
       filter(time != 2016) %>% 
       spread(time, T.age), 
      by = c('time' = 'time2', 'country', 'age5')) %>% 
    select(-geo.y) %>% 
    arrange(time, country, geo.x, sex1, age5) 

的是结果:

# A tibble: 104 x 10 
# Groups: time, country, age5, sex1 [8] 
    time country age5 sex1 geo.x T.age `2017` `2018` `2019` `2020` 
    <dbl> <fctr> <int> <int> <fctr> <int> <int> <int> <int> <int> 
1 2016  AT  0  1  AT 214538 420814 427950 435796 444137 
2 2016  AT  5  1  AT 210372 411300 416616 420548 425721 
3 2016  AT 10  1  AT 215218 419810 418522 420646 420044 
4 2016  AT 15  1  AT 239274 444286 439986 436501 437446 
5 2016  AT  0  2  AT 200991 420814 427950 435796 444137 
6 2016  AT  5  2  AT 200273 411300 416616 420548 425721 
7 2016  AT 10  2  AT 203787 419810 418522 420646 420044 
8 2016  AT 15  2  AT 221272 444286 439986 436501 437446 
9 2016  AT  0  1 AT1 94210 420814 427950 435796 444137 
10 2016  AT  5  1 AT1 91724 411300 416616 420548 425721 
# ... with 94 more rows 
+0

是的,确切的说,非常感谢! – Ale