2014-10-30 61 views
1

我有以下设置: 对于每个名称,都有一个组号。所以现在我想为每个小组,有多少成员以及哪些成员。组的元素

1030 3083 1900 4778 1714 3394 1253 2838 4463 3350 1334 5400 890 3048 2366 3848 1684 2918 3655 
    1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 
5706 3592 4875 222 4307 2785 1223 5283 4249 2115 2953 1448 5639 2913 4352 1435 545 6030 4985 
    20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 
5600 5250 2857 5480 2835 554 4748 2723 330 1720 859 5832 4109 3692 3958 4442 3385 263 2464 
    39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 
4687 72 318 519 2639 2534 1994 4376 6159 132 4067 3729 5390 2609 3496 4134 3436 5827 3028 
    58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 
310 1255 3661 5205 858 5835 1975 949 5291 888 193 5393 113 2393 2859 3225 4110 3298 2207 
    60 77 78 79 80 81 82 83 84 85 86 70 87 88 89 90 91 92 93 
5379 4684 5356 4969 6036 587 508 212 5189 473 5197 4504 3336 3962 906 41 1096 4518 3607 
    94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 
3750 3516 6075 5319 6126 902 3596 861 6119 4899 2447 5798 5881 2352 1371 6107 487 1472 5261 
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 
2824 3314 481 2127 4661 46 184 4854 5194 2362 5167 5685 2102 5470 5934 1860 1883 1297 3829 
132 133 134 135 136 137 138 139 140 15 141 142 143 144 145 146 147 148 149 
952 3954 6162 376 4232 4901 5136 4374 6077 1807 1201 2726 4032 1597 2001 3208 608 5368 4949 
150 151 152 153 154 155 156 157 121 158 159 160 161 162 163 164 165 96 166 
3884 4783 3354 4450 3319 2139 4451 3568 3940 5472 4982 1214 4005 3271 1638 5657 1260 4603 3913 
167 168 169 170 171 172 173 174 175 176 177 178 111 179 180 181 182 183 184 
815 5576 4091 3425 5917 3969 2705 5585 392 5628 
185 186 187 188 189 190 191 192 193 194 

因此,例如, 组60中含有 “310” 和 “318”

结构:

structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 
26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 
39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 
52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 
65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 60L, 
77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 70L, 87L, 88L, 
89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 
101L, 102L, 103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 
112L, 113L, 114L, 115L, 116L, 117L, 118L, 119L, 120L, 121L, 122L, 
123L, 124L, 125L, 126L, 127L, 128L, 129L, 130L, 131L, 132L, 133L, 
134L, 135L, 136L, 137L, 138L, 139L, 140L, 15L, 141L, 142L, 143L, 
144L, 145L, 146L, 147L, 148L, 149L, 150L, 151L, 152L, 153L, 154L, 
155L, 156L, 157L, 121L, 158L, 159L, 160L, 161L, 162L, 163L, 164L, 
165L, 96L, 166L, 167L, 168L, 169L, 170L, 171L, 172L, 173L, 174L, 
175L, 176L, 177L, 178L, 111L, 179L, 180L, 181L, 182L, 183L, 184L, 
185L, 186L, 187L, 188L, 189L, 190L, 191L, 192L, 193L, 194L), .Names = c("1030", 
"3083", "1900", "4778", "1714", "3394", "1253", "2838", "4463", 
"3350", "1334", "5400", "890", "3048", "2366", "3848", "1684", 
"2918", "3655", "5706", "3592", "4875", "222", "4307", "2785", 
"1223", "5283", "4249", "2115", "2953", "1448", "5639", "2913", 
"4352", "1435", "545", "6030", "4985", "5600", "5250", "2857", 
"5480", "2835", "554", "4748", "2723", "330", "1720", "859", 
"5832", "4109", "3692", "3958", "4442", "3385", "263", "2464", 
"4687", "72", "318", "519", "2639", "2534", "1994", "4376", "6159", 
"132", "4067", "3729", "5390", "2609", "3496", "4134", "3436", 
"5827", "3028", "310", "1255", "3661", "5205", "858", "5835", 
"1975", "949", "5291", "888", "193", "5393", "113", "2393", "2859", 
"3225", "4110", "3298", "2207", "5379", "4684", "5356", "4969", 
"6036", "587", "508", "212", "5189", "473", "5197", "4504", "3336", 
"3962", "906", "41", "1096", "4518", "3607", "3750", "3516", 
"6075", "5319", "6126", "902", "3596", "861", "6119", "4899", 
"2447", "5798", "5881", "2352", "1371", "6107", "487", "1472", 
"5261", "2824", "3314", "481", "2127", "4661", "46", "184", "4854", 
"5194", "2362", "5167", "5685", "2102", "5470", "5934", "1860", 
"1883", "1297", "3829", "952", "3954", "6162", "376", "4232", 
"4901", "5136", "4374", "6077", "1807", "1201", "2726", "4032", 
"1597", "2001", "3208", "608", "5368", "4949", "3884", "4783", 
"3354", "4450", "3319", "2139", "4451", "3568", "3940", "5472", 
"4982", "1214", "4005", "3271", "1638", "5657", "1260", "4603", 
"3913", "815", "5576", "4091", "3425", "5917", "3969", "2705", 
"5585", "392", "5628")) 
+0

你可以发布'dput(df)'吗?我不知道如何读取您的数据 – 2014-10-30 13:32:35

回答

2

由于您的群体特征是价值观和个人身份的名称,拆分names(x)根据x

ss <- split(names(x),x) 
ss[["60"]] 
## [1] "318" "310" 

数量各组的元素:

lengthvec <- sapply(ss,length) 
lengthvec["60"] 
## 60 
## 2 

由字符小心指数(组ID),而不是数字(位置矢量或列表内)。

4

这里有一个更通用的解决方案,它提供了完整的结果(假设df是您的数据集)

library(data.table) 
df2 <- setNames(data.frame(df), "Group") 
df2 <- setDT(transform(df2, Members = row.names(df2)))[, 
      list(Size = .N, Members = paste(Members, collapse = ", ")), by = Group] 
df2 
# Group Size Members 
# 1: 1 1 1030 
# 2: 2 1 3083 
# 3: 3 1 1900 
# 4: 4 1 4778 
# 5: 5 1 1714 
# ---     
# 190: 190 1 3969 
# 191: 191 1 2705 
# 192: 192 1 5585 
# 193: 193 1  392 
# 194: 194 1 5628 

或者通过@KFB所暗示的,它可以被简写为

dt <- setnames(as.data.table(df, keep.rownames = TRUE), 2, "Group") 
dt2 <- dt[, list(Size = .N, Members = paste(rn, collapse = ", ")), by = Group] 
+2

或'dt < - as.data.table(df,keep.rownames = TRUE)'然后'setnames(dt,c(“rn”,“df”),c( “member”,“group”))'得到一个数据表来处理。 – KFB 2014-10-30 14:07:29

+0

@KFB,感谢'keep.rownames',我其实并没有提到那个 – 2014-10-30 14:20:36