与by
考虑基础包这对于mgr_id(不仅仅是C)每一级创建DF列表:
方案1
dfList <- by(df, df$mgr_id, function(i){
names(i) <- paste0(names(i), "_") # SUFFIX UNDERSCORE (TO AVOID DUP COLUMNS)
child <- merge(i, df, by.x="mgr_id_", by.y="emp_id")[,1:2]
grandchild <- merge(child, df, by.x="emp_id_", by.y="mgr_id")[c("mgr_id_", "emp_id")]
names(child) <- gsub("*_$", "", names(child)) # REMOVE LAST UNDERSCORE
names(grandchild) <- gsub("*_$", "", names(grandchild)) # REMOVE LAST UNDERSCORE
rbind(child, grandchild)
})
dfList$C
# mgr_id emp_id
# 1 C D
# 2 C E
# 3 C F
# 4 C D1
# 5 C D2
# 6 C E1
方案2 (其中所选择的列孙子改变,然后第一列的重命名)
dfList <- by(df, df$mgr_id, function(i){
names(i) <- paste0(names(i), "_") # SUFFIX UNDERSCORE (TO AVOID DUP COLUMNS)
child <- merge(i, df, by.x="mgr_id_", by.y="emp_id")[,1:2]
grandchild <- merge(child, df, by.x="emp_id_", by.y="mgr_id")[c("emp_id_", "emp_id")]
names(child) <- gsub("*_$", "", names(child)) # REMOVE LAST UNDERSCORE
names(grandchild) <- gsub(".*_$", "", names(grandchild)) # REMOVE LAST UNDERSCORE
names(grandchild)[1] <- "mgr_id"
rbind(child, grandchild)
})
dfList$C
# mgr_id emp_id
# 1 C D
# 2 C E
# 3 C F
# 4 D D1
# 5 D D2
# 6 E E1
谢谢你们的回应,并且都对我提供的虚拟数据以及我拥有的演示csv有很大帮助。让我对真实数据执行此操作,并尽快提供更新。 – Vinay