2015-12-14 61 views
-1

为什么函数':='不使用'by'聚合唯一行然而LHS:RHS使用'by'聚合?下面是包含58个变量的20行数据的.csv文件。一个简单的副本,粘贴,delim = .csv的作品。我仍然试图找到将样本数据发布到SO的最佳方式。我的代码的2种变体:LHS:RHS vs data.table中的函数

prodMatrix <- so.sample[, ':=' (Count = .N), by = eval(names(so.sample)[2:28])] 

---这个版本不使用汇总ROWID ---

prodMatrix <- so.sample[, (Count = .N), by = eval(names(so.sample)[2:28])] 

---这个版本则使用by--的ROWID汇总 -

"CID","NetIncome_length_Auto Advantage","NetIncome_length_Certificates","NetIncome_length_Comm. Share Draft","NetIncome_length_Escrow Shares","NetIncome_length_HE Fixed","NetIncome_length_HE Variable","NetIncome_length_Holiday Club","NetIncome_length_IRA Certificates","NetIncome_length_IRA Shares","NetIncome_length_Indirect Balloon","NetIncome_length_Indirect New","NetIncome_length_Indirect RV","NetIncome_length_Indirect Used","NetIncome_length_Loanline/CR","NetIncome_length_New Auto","NetIncome_length_Non-Owner","NetIncome_length_Personal","NetIncome_length_Preferred Plus Shares","NetIncome_length_Preferred Shares","NetIncome_length_RV","NetIncome_length_Regular Shares","NetIncome_length_S/L Fixed","NetIncome_length_S/L Variable","NetIncome_length_SBA","NetIncome_length_Share Draft","NetIncome_length_Share/CD Secured","NetIncome_length_Used Auto","NetIncome_sum_Auto Advantage","NetIncome_sum_Certificates","NetIncome_sum_Comm. Share Draft","NetIncome_sum_Escrow Shares","NetIncome_sum_HE Fixed","NetIncome_sum_HE Variable","NetIncome_sum_Holiday Club","NetIncome_sum_IRA Certificates","NetIncome_sum_IRA Shares","NetIncome_sum_Indirect Balloon","NetIncome_sum_Indirect New","NetIncome_sum_Indirect RV","NetIncome_sum_Indirect Used","NetIncome_sum_Loanline/CR","NetIncome_sum_New Auto","NetIncome_sum_Non-Owner","NetIncome_sum_Personal","NetIncome_sum_Preferred Plus Shares","NetIncome_sum_Preferred Shares","NetIncome_sum_RV","NetIncome_sum_Regular Shares","NetIncome_sum_S/L Fixed","NetIncome_sum_S/L Variable","NetIncome_sum_SBA","NetIncome_sum_Share Draft","NetIncome_sum_Share/CD Secured","NetIncome_sum_Used Auto","totNI","Count","totalNI" 
93,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,-123.2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,212.97,0,0,0,-71.36,0,0,0,49.01,0,0,67.42,6,404.52 
114,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,4,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14.54,0,0,0,0,0,-285.44,0,0,0,49.01,0,0,-221.89,90,-19970.1 
1112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60.23,0,0,0,0,-101.55,0,-71.36,0,0,0,98.02,0,0,-14.66,28,-410.48 
5366,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,49.01,0,0,-22.35,77631,-1735052.85 
6078,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,7,0,0,0,1,0,0,0,0,0,0,0,0,-17.44,0,0,0,0,0,0,0,14.54,0,0,0,0,0,-499.52,0,0,0,49.01,0,0,-453.41,3,-1360.23 
11684,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,49.01,0,0,-22.35,77631,-1735052.85 
47358,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,-14.43,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,0,0,0,-85.79,3194,-274013.26 
193761,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-101.55,0,-71.36,0,0,0,49.01,0,0,-123.9,9973,-1235654.7 
232530,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,49.01,0,0,-22.35,77631,-1735052.85 
604897,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,49.01,0,0,-22.35,77631,-1735052.85 
1021309,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,0,0,0,-71.36,43262,-3087176.32 
1023633,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,0,0,0,-71.36,43262,-3087176.32 
1029726,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,60.23,0,0,0,0,0,0,-71.36,0,0,0,49.01,0,0,37.88,8688,329101.44 
1040005,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,49.01,0,0,-22.35,77631,-1735052.85 
1040092,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,49.01,0,0,-22.35,77631,-1735052.85 
1064453,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14.54,0,212.97,0,0,0,-142.72,0,0,0,0,0,0,84.79,49,4154.71 
1067508,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,-123.2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,0,0,0,-194.56,4162,-809758.72 
1080303,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-71.36,0,0,0,0,0,0,-71.36,43262,-3087176.32 
1181005,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-101.55,0,-142.72,0,0,0,98.02,0,0,-146.25,614,-89797.5 
1200484,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-101.55,0,-285.44,0,0,0,0,0,0,-386.99,50,-19349.5 
+0

为什么引号':='?. – Heroka

+0

当使用以下函数形式时,需要使用back ticks:=。 SO已经保留了后面的勾号,所以我试图替换它们。 – user3067851

+1

我不明白你的问题。你在问为什么':='按照文件记录? – Roland

回答

0

由于:=通过引用使操作。这意味着它不会调用数据集的内存中副本,但会在原地更新它。
对数据集进行汇总是其原始未汇总表单的副本。
你可以在Reference semantics小插曲中阅读更多关于它的内容。

这是一个data.table设计概念,:=通过引用和其他形式的用于更新 - .()list()或直接表达用于查询的数据。并且查询数据不是参考操作。 通过引用操作无法对行进行聚合,因此它可以计算聚合并将其置于数据集中。查询能够聚合数据集,因为查询结果与原始数据表不同。

+0

@ jangorecki因此,对于:=操作都不应该复制或聚合。我几次读过这个小插曲。 LHS:RHS和功能表格应该运行相同,正确。唯一不同的是功能表单允许您发表一些意见,并且可能更容易遵循是我的理解。 – user3067851

+0

@ user3067851添加了可能有所帮助的说明 – jangorecki