合并和R中的匹配函数之间的差异

我每个人我删除我的最后一篇文章，使我的问题的可重复exmaple exmaple。我跟旁边的工作数据帧a1（dput结构）：合并和R中的匹配函数之间的差异

structure(list(r04_numero_operacion = c("0050475725", "0050490602", 
"0050491033", "0050496386", "0050518985", "0050630090", "0050631615", 
"0060235906", "0060238732", "0060241333", "0060244391", "0060245813", 
"0060260056", "0060266356", "0800041441", "0800054041", "0800055382", 
"0800058554", "2020200062", "2020200073", "CAR1010001706000", 
"CAR1010001795000", "CAR1010001803000", "CAR1010001871000", "CAR1010001962000", 
"CAR1010002002000", "CAR1010002120000", "CAR1010002189000", "CAR1010002215000", 
"CAR1010002250000"), perdida3 = c(523.12, 265.43, 8371.66, 5242.13, 
4960.51, 8473.27, 3743.45, 1283.32, 2229.25, 8001.27, 8653.94, 
3670.13, 4536.02, 8216.55, 2481.36, 288.94, 1637.28, 4566.89, 
1573.63, 11217.92, 0, 0, 0, 0, 0, 0, 0, 0, 9633.9, 0), Saldo = c(1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 288.94, 1637.28, 4566.89, 
1, 1, 481.59, 299.52, 258.13, 603.84, 231.61, 631.68, 220.6, 
210.54, 1, 1224.44), Bvencida = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 603.84, 0, 631.68, 
0, 0, 0, 0), Cvencida = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1224.44), 
    Dvencida = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), vencida = c(1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 288.94, 1637.28, 
    4566.89, 1, 1, 0, 0, 0, 603.84, 0, 631.68, 0, 0, 1, 1224.44 
    ), V1 = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), .Names = c("r04_numero_operacion", 
"perdida3", "Saldo", "Bvencida", "Cvencida", "Dvencida", "vencida", 
"V1"), codepage = 1252L, row.names = c(NA, 30L), class = "data.frame")

而且a2数据帧（dput结构）：

structure(list(r04_numero_operacion = c("0050475725", "0050490602", 
"0050491033", "0050496386", "0050518985", "0050630090", "0050631615", 
"0060235906", "0060238732", "0060241333", "0060244391", "0060245813", 
"0060260056", "0060266356", "0800041441", "0800054041", "0800055382", 
"0800058554", "2020200073", "CAR1010002002000", "CAR1010002189000", 
"CAR1010002215000", "CAR1010002250000", "CAR1010002264000", "CAR1010002297000", 
"CAR1010002401000", "CAR1010002412000", "CAR1010002436000", "CAR1010002529000", 
"CAR1010002709000"), perdida3 = c(523.12, 265.43, 8371.66, 5242.13, 
4960.51, 8473.27, 3743.45, 1283.32, 2229.25, 8001.27, 8653.94, 
3670.13, 4536.02, 8216.55, 2481.36, 288.94, 1637.28, 4566.89, 
11217.92, 0, 0, 9633.9, 0, 0, 0, 0, 0, 0, 0, 0), Saldo = c(1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 288.94, 1637.28, 4566.89, 
1, 317.72, 210.54, 1, 868.93, 242.91, 298.78, 120.63, 255.01, 
357.68, 284.08, 308.83), Bvencida = c(0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 317.72, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0), Cvencida = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 868.93, 0, 0, 0, 0, 0, 0, 0), Dvencida = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0), vencida = c(1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 288.94, 1637.28, 4566.89, 1, 317.72, 0, 
1, 868.93, 0, 0, 0, 0, 0, 0, 0), V2 = c(2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2)), .Names = c("r04_numero_operacion", "perdida3", "Saldo", 
"Bvencida", "Cvencida", "Dvencida", "vencida", "V2"), class = "data.frame", row.names = c(NA, 
30L))

我的问题是，当我使用merge()和match()功能。 merge()比match()功能更多，但是当我使用merge()时，我无法获得与match()相同的结果。首先，我用merge()与a2和a1与下面的代码创建DF：

DF=merge(a2,a1,all.x=TRUE)

它补充从a1V1变量DF，我得到这个摘要DF$V1：我创建了一个后

Min. 1st Qu. Median Mean 3rd Qu. Max. NA's 
    1  1  1  1  1  1  9

a2的副本名为DF，我用r04_numero_operacion与r04_numero_operacion进行了匹配，使用此代码将V1变量从a1到a2：

a2$V1<-a1[match(a2$r04_numero_operacion,a1$r04_numero_operacion),"V1"]

它补充`V1到DF但结果却是给merge()方式不同。我在match()解决方案得到这个摘要DF$V1：

Min. 1st Qu. Median Mean 3rd Qu. Max. NA's 
    1  1  1  1  1  1  7

我的问题是我想要让我做match()但使用merge()功能同样由于这个功能比match()更多飞机的强大动力。谢谢你的帮助。

来源

2013-10-31 Duck

亲爱的@RomanLuštrik我做了一个可重复的例子。在此先感谢 – Duck

在使用match(a2$r04_numero_operacion,a1$r04_numero_operacion)时，a2 $ r04_numero_operacion值与a1中的相应列匹配，而在使用merge(a2,a1,all.x=TRUE)时，a1所有匹配列都与a2中的匹配列名匹配。如果只匹配第一列，则NA计数匹配：

summary(merge(a2,a1,by=1,all.x=TRUE)$V1) 
    Min. 1st Qu. Median Mean 3rd Qu. Max. NA's 
     1  1  1  1  1  1  7

来源

2013-10-31 21:14:07

谢谢@DWin但问题是，我只想保留'a2'的原始变量，当我在'merge'中添加'by'时，我得到了更多的变量。是否有可能只保留'a2'的变量。谢谢。 – Duck

如果你只想保留一个变量，你可以在合并之前排除其他变量：'merge（a2，a1 [，c（1,8）]，all.x = T，by = 1）' – mrip

当然。只需传入一列进行匹配并添加V1-column：'merge（a2，a1 [c（“04_numero_operacion”，“V1”）]，by = 1，all.x = TRUE）......或“他说什么“。 –

合并和R中的匹配函数之间的差异

回答

相关问题