2017-04-04 56 views
0

我有一个非常大的表,它与下面描述的有点类似。使用R转换多个列值的数据

RF1 <- structure(list(FullCloneName = structure(c(1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 6L, 7L, 8L, 9L, 
10L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 13L), .Label = c("010.4279.5G6", "010.X105732.87-272-7B6.B10", 
"1068.Z32346.20H4.9.8", "1100.2868.6A1.E2", "1100.2990.16E9.H6", 
"1100.3113.11A6.F1", "1100.3114.16H1.F11", "1100.3115.14F11.H4", 
"1100.3115.19A9.F10", "1100.3117.2F7.B11", "1100.Z78346.13E8.C10", 
"1100.Z78346.18G2.F3.G4", "1100.Z78347.18C6.B11"), class = "factor"), 
Domain = structure(c(2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 
1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L 
), .Label = c("VH", "VK"), class = "factor"), FieldName = structure(c(5L, 
6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 3L, 5L, 1L, 5L, 6L, 1L, 3L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 5L, 6L, 5L, 6L, 
1L, 1L, 3L, 1L, 3L, 1L, 3L, 5L), .Label = c("HeavyChainObservedMass1", 
"HeavyChainObservedMass2", "HeavyChainRelativeAbundance1", 
"HeavyChainRelativeAbundance2", "LightChainObservedMass1", 
"LightChainRelativeAbundance1"), class = "factor"), ResultValue = c(23838L, 
100L, 49057L, 49185L, 82L, 18L, 24219L, 100L, 49191L, 100L, 
23787L, 50108L, 24218L, 100L, 49421L, 100L, 24349L, 24131L, 
23911L, 24197L, 24247L, 24345L, 23925L, 23675L, 23676L, 23675L, 
100L, 23675L, 100L, 23675L, 100L, 49828L, 49829L, 100L, 49830L, 
100L, 49830L, 100L, 23850L)), .Names = c("FullCloneName", 
"Domain", "FieldName", "ResultValue"), class = "data.frame", row.names = c(NA, 
-39L)) 

library(reshape2) 
pivotRF1 <- dcast(RF1, FullCloneName + Domain ~ FieldName) 

我试图摆动数据,但是,不能。请指教!

+2

'dcast(RF,FCN +测试〜FN)'? – rawr

+0

随着tidyr,'RF%>%的价差(FN,ResultValue)' – alistaire

+0

Hi @rawr,您的解决方案的示例数据就像魅力一样。但是当我实时使用相同类型的数据时,我仍然面临这个问题。有什么办法可以与您分享部分实时数据以检查您的结局吗? – RanonKahn

回答

2

您在数据集中有错误。一旦你确定每个值都对应于一个变量组合,你应该很乐意去。

例如,见

> RF1[duplicated(RF1[, 1:3]), ] 
      FullCloneName Domain     FieldName ResultValue 
25 1100.Z78346.18G2.F3.G4  VK  LightChainObservedMass1  23676 
26 1100.Z78346.18G2.F3.G4  VK  LightChainObservedMass1  23675 
28 1100.Z78346.18G2.F3.G4  VK  LightChainObservedMass1  23675 
29 1100.Z78346.18G2.F3.G4  VK LightChainRelativeAbundance1   100 
30 1100.Z78346.18G2.F3.G4  VK  LightChainObservedMass1  23675 
31 1100.Z78346.18G2.F3.G4  VK LightChainRelativeAbundance1   100 
33 1100.Z78346.18G2.F3.G4  VH  HeavyChainObservedMass1  49829 
35 1100.Z78346.18G2.F3.G4  VH  HeavyChainObservedMass1  49830 
36 1100.Z78346.18G2.F3.G4  VH HeavyChainRelativeAbundance1   100 
37 1100.Z78346.18G2.F3.G4  VH  HeavyChainObservedMass1  49830 
38 1100.Z78346.18G2.F3.G4  VH HeavyChainRelativeAbundance1   100 

注意,线25,26,27,30是几乎相同的(来自相同FieldNameDomain到来。同样为线29和31等等...

通过@rawr提出的方法应该工作。

> xy <- data.frame(var1 = rep(LETTERS[1:3], each = 4), 
+     var2 = rep(letters[1:6], each = 2), 
+     var3 = rep(1:2, times = 6), 
+     value = round(rchisq(12, df = 20))) 

> xy 
    var1 var2 var3 value 
1  A a 1 17 
2  A a 2 22 
3  A b 1  9 
4  A b 2 21 
5  B c 1 21 
6  B c 2 10 
7  B d 1  9 
8  B d 2 29 
9  C e 1 24 
10 C e 2 20 
11 C f 1 16 
12 C f 2 18 

> dcast(xy, var1 + var2 ~ var3) 
    var1 var2 1 2 
1 A a 17 22 
2 A b 9 21 
3 B c 21 10 
4 B d 9 29 
5 C e 24 20 
6 C f 16 18