2017-06-14 123 views
-1

我想对数据集做一系列的测试:在数据框内做多个测试。

具体来说,我想为每个基因1-5和6-10做单独的ttest。

我试着这样做:

>goi2 <- (goi[-1]) 
control <- goi2[1:5,] 
stress <- goi2[6:10,] 

for (i in 1:92){ 
    x <- control[,i] 
    y <- stress[,i] 
    x <= t.test(x, y) 
# print(x=i) 
} 

,但我得到这个错误:

Error: Can't use matrix or array for column indexing

我试过几个品种的这个,但无法弄清楚,为什么这不会工作。

我是一个完整的R,但不是一般的编程。

数据集:


Gene,Rat_1,Rat_2,Rat_3,Rat_4,Rat_5,Rat_6,Rat_7,Rat_8,Rat_9,Rat_10 Oprd1,2.746,1.387,2.25,3.363,3.191,2.432,1.985,1.75,2.752,1.771 Grin2a,3.134,2.644,2.962,5.168,2.484,3.54,2.596,1.535,3.197,2.232 Grin2d(2),4.496,5.528,2.631,4.684,3.934,6.047,0.98,0.077,4.381,2.327 Oprm1,1.998,1.804,1.611,1.712,3.672,3.215,0.249,1.248,1.758,2.671 Scn2b,137.35,97.158,113.65,141.93,77.295,133.02,88.872,75.586,108.96,97.626 Ntf3,0.989,1.835,1.604,1.133,0.889,0.782,0.918,2.241,2.216,3.921 Scn1a(2),9.224,7.369,10.145,14.242,17.262,11.535,8.144,7.166,13.625,6.604 Ntrk2(2),21.929,17.018,14.799,19.783,14.632,24.421,14.235,9.344,16.658,17.913 Cacna1c,4.585,3.637,3.948,4.135,3.403,5.381,4.193,3.162,3.455,3.695 Grin2b,3.273,2.57,2.101,2.922,1.826,3.338,2.121,1.416,2.973,2.005 Scn9a(2),0.319,0,0,0.453,0.434,0.376,0,0,0.346,0.469 Gria4(2),10.867,8.156,7.889,9.236,14.134,10.574,8.404,8.179,9.442,7.982 Cacna1e(2),1.805,1.783,2.045,1.968,1.405,1.807,0.973,0.993,0.857,1.769 Gria3,4.237,4.188,3.901,5.221,6.439,3.993,3.421,4.012,4.452,4.631 Gria1,8.284,7.942,7.557,12.001,3.976,9.472,7.653,4.16,7.971,5.381 Kcnj5,3.089,2.046,3.332,3.392,2.168,3.786,3.865,1.414,2.37,2.009 Cacna1b(2),11.071,8.716,8.246,9.594,7.189,11.62,6.028,4.481,9.307,9.074 Scn5a,1.301,1.017,0.714,1.401,0.449,1.183,1.065,0.292,0.823,0.714 Scn2a(2),3.286,2.119,2.257,2.024,1.902,3.441,1.327,1.072,2.576,2.09 Scn10a,0.037,0.069,0.087,0.076,0.082,0.095,0.052,0.019,0.078,0.045 Cacna1g(2),6.543,5.095,5.463,8.404,3.084,7.359,5.746,4.682,5.969,4.315 Cacna1e(3),5.37,4.002,3.313,4.803,2.665,5.623,3.296,1.953,3.827,4.092 Bdnf(4),0.869,0.509,0.996,1.032,0.256,0.742,0.498,0.531,0.994,0.473 Scn4a,0.284,0.278,0.359,0.45,0.761,0.31,0.319,0.27,0.366,0.273 Scn5a(2),0.256,0.477,0.587,0.283,0,0.564,0.044,0.023,0.204,0.15 Gabra1,51.019,44.3,57.609,81.522,40.853,64.921,68.263,31.766,58.006,39.518 Scn8a,6.854,14.666,5.416,12.347,4.823,14.935,7.014,16.684,9.686,17.44 Kcnj3,17.047,14.3,13.741,14.363,14.01,13.268,12.172,10.718,15.374,13.048 Slc6a2,107.9,69.941,91.704,36.411,112.57,114.5,23.398,63.848,53.323,135.26 Grin3a,6.952,5.676,7.301,12.557,3.65,10.628,9.783,4.286,8.015,4.499 Cnr1,20.261,16.981,19.996,26.469,12.709,24.705,25.548,10.61,19.746,14.64 Scn1b,13.732,15.763,5.03,20.68,17.788,14.959,16.298,24.682,22.477,15.117 Gria1(2),2.709,3.667,2.51,2.9,2.134,1.93,4.308,2.59,2.487,1.742 Scn3a(2),1.439,2.614,0,0.352,0,1.358,1.027,0,0.452,0.586 Scn11a,0.058,0.292,0.036,0.127,0.058,0.06,0.074,0.164,0.047,0.05 Gria1(3),25.283,17.779,22.725,32.705,8.823,28.727,26.915,12.876,23.545,17.879 Cacna1f,0.056,0.067,0.14,0.123,0.04,0.182,0.072,0.083,0.077,0.097 Cacna1a,20.791,19.816,17.613,21.663,15.697,22.824,16.737,16.719,16.604,20.469 Gria4,8.51,7.107,8.342,9.338,7.46,8.877,7.673,6.341,8.393,9.555 Scn8a,6.738,14.706,4.172,11.467,2.552,10.757,6.021,15.222,3.588,11.333 Grin2d,20.398,15.794,22.521,24.693,16.97,24.108,24.19,21.016,18.314,19.044 Gria3(2),15.301,13.087,13.918,14.433,12.282,14.914,12.198,11.602,13.738,15.481 Oprk1(2),6.66,4.97,7.604,10.281,2.151,10.462,10.278,1.525,6.869,4.902 Scn1b(3),46.553,42.795,49.498,55.558,64.101,38.178,44.1,59.033,43.837,39.382 Cacna1h,9.145,7.295,8.7,8.028,5.415,10.799,8.21,6.332,8.455,7.683 Scn2a,36.803,29.975,30.609,38.334,19.053,39.127,31.146,23.066,30.896,32.345 Cacna1g,5.489,5.213,6.24,7.896,3.97,4.876,6.283,5.464,6.08,3.692 Ntrk2(3),147.81,152.45,153.46,136.09,181.1,156.85,219.8,164.53,156.64,147.92 Scn1a,9.222,9.162,9.659,13.83,12.679,8.088,11.45,10.406,9.503,6.827 Grin1(3),69.943,68.01,76.358,81.029,63.692,83.424,70.981,80.088,69.821,70.764 Grin3b(2),2.065,1.265,1.45,1.576,3.875,1.441,1.822,1.964,2.286,0.965 Gabra2(2),2.268,1.251,1.638,2.844,2.93,2.934,3.725,1.724,1.455,2.674 Scn1b2(2),161.76,164.24,213.24,209.19,235.38,172.98,207.33,216.96,198.26,130.93 Oprm1(2),4.046,5.181,2.362,1.925,0.806,2.232,1.178,1.491,3.259,3.751 Cacna1c(3),0.077,0.194,0.23,0,0.132,0.127,0,0.035,0.09,0.092 Ntrk2,27.139,26.028,23.881,27.22,22.259,30.728,22.381,19.782,24.704,30.85 Cacna1d(2),2.126,2.263,2.038,2.1,1.995,2.966,1.943,2.01,2.317,2.214 Scn3a,21.272,16.356,16.245,14.875,11.825,19.753,10.994,11.08,16.905,19.832 Grin1(2),76.771,65.788,66.059,78.716,33.91,88.228,73.859,47.717,70.674,61.275 Grina,672.31,705.45,679.04,623.4,597.51,742.12,619.74,662.95,665.18,781.29 Cacna1e,2.448,1.981,1.506,2.003,1.318,3.052,1.953,0.814,2.17,2.482 Bdnf(2),1.853,2.128,2.553,1.996,0.663,2.5,2.385,0.468,1.922,1.481 Fos,18.402,24.653,23.038,20.615,8.027,38.444,20.836,11.756,20.823,20.296 Scn4b,23.772,27.874,25.388,25.109,51.926,20.291,25.521,28.701,30.256,17.344 Slc6a2(3),480.05,455.95,307.6,186.82,376.96,447.61,123.5,409.58,347.86,681.04 Ntf3(3),1.87,3.561,2.421,3.133,2.134,2.327,1.712,2.32,1.735,3.497 Bdnf(3),0.319,0.09,0.665,0.187,0.107,0.185,0.394,0.264,0.21,0.345 Scn3b,112.86,115.29,99.711,96.245,71.741,122.34,85.875,88.906,102.88,132.13 Grin2c,14.224,15.944,15.473,21.936,32.732,13.98,20.168,23.958,14.541,17.402 Gabrd,0.701,3.542,0.532,5.222,5.593,0.133,2.954,0.961,0.506,2.152 Cacna1b,16.935,15.764,14.475,15.639,10.655,19.408,14.115,14.079,14.26,16.737 Slc18a2,433.92,429.22,293.57,164.53,287.51,370.72,93.973,283.12,321.49,551.07 Cacnb1(2),16.456,5.099,16.969,4.469,12.471,5.143,14.017,10.049,17.537,4.26 Gabrg1,40.614,37.373,43.103,39.253,47.768,41.202,51.665,37.74,42.17,39.097 Grin1,1.235,0.812,0.909,1.605,0.513,1.371,1.596,1.346,1.213,0.922 Slc6a2(2),138.21,136.75,34.759,38.393,25.89,87.126,0,0.467,99.703,137.66 Galr3,2.691,2.51,2.517,4.446,0.727,2.933,4.041,2.08,2.638,1.456 Oprm1(3),7.273,7.676,7.08,6.196,5.515,9.023,2.57,4.8,7.699,10.471 Gabrq,70.623,67.728,51.095,42.456,43.156,77.924,28.63,32.975,54.192,87.697 Gria4(3),25.846,26.045,24.37,37.866,18.037,26.907,31.423,21.292,26.795,24.642 Cacna1c(2),0.644,0.894,0.831,1.084,0.721,1.026,0.817,0.371,1.333,1.015 Cacna1d(3),0.299,0.406,0.127,0.319,0.319,0.231,0.178,0.075,0.18,0.405 Cacnb1,47.24,51.505,42.702,48.718,33.28,60.334,38.611,41.827,40.352,56.132 Scn7a,2.351,2.38,2.114,1.96,0.316,2.647,1.945,1.219,2.559,1.498 Cacna1d,2.661,2.733,2.714,2.649,2.403,2.923,3.216,2.768,2.401,2.302 Gabra2,25.209,26.731,23.249,25.599,20.17,22.928,24.072,18.664,23.808,23.306 Scn9a,3.209,3.106,3.212,3.206,1.094,3.35,3.994,1.934,2.883,2.046 Ntf3(2),2.347,2.282,2.112,1.025,1.762,2.029,0.501,1.652,2.717,1.982 Gria2,12.726,12.997,12.74,15.615,7.156,14.375,13.387,11.682,12.968,11.332 Bdnf,0.703,0.777,1.034,0.571,0.166,1.164,0.549,0.325,0.801,1.12 Gria2(2),17.769,17.694,16.62,18.603,11.295,19.926,18.044,13.594,16.946,17.712 Bdnf(5),1.321,2.152,1.882,2.397,1.598,3.072,3.038,1.53,2.04,1.464

+0

也许你的意思是'x < - t.test(x,y)'?我不建议重新使用变量名'x' –

+1

我认为你的行和列索引是倒退的('goi [row,column]');你已经将你的第一个5 *行识别为'control',接下来的5 *行*识别为'stress',并且你试图访问92 *列*。 – r2evans

+1

您还会遇到这样的问题,即每次在循环中都覆盖'x'。也许这是一个很好的时机,提到'for'循环不会“返回”或者记住像你想要的那样的值。这可能是'results < - lapply(seq_len(nrow(goi2)),function(i)t.test(control [i,],stress [i,]))''的好机会。 – r2evans

回答

2

这里的工作示例只用图基R.使用您的goi

str(goi) 
# 'data.frame': 92 obs. of 11 variables: 
# $ Gene : chr "Oprd1" "Grin2a" "Grin2d(2)" "Oprm1" ... 
# $ Rat_1 : num 2.75 3.13 4.5 2 137.35 ... 
# $ Rat_2 : num 1.39 2.64 5.53 1.8 97.16 ... 
# $ Rat_3 : num 2.25 2.96 2.63 1.61 113.65 ... 
# $ Rat_4 : num 3.36 5.17 4.68 1.71 141.93 ... 
# $ Rat_5 : num 3.19 2.48 3.93 3.67 77.3 ... 
# $ Rat_6 : num 2.43 3.54 6.05 3.21 133.02 ... 
# $ Rat_7 : num 1.985 2.596 0.98 0.249 88.872 ... 
# $ Rat_8 : num 1.75 1.535 0.077 1.248 75.586 ... 
# $ Rat_9 : num 2.75 3.2 4.38 1.76 108.96 ... 
# $ Rat_10: num 1.77 2.23 2.33 2.67 97.63 ... 
control <- goi[,2:6] 
stress <- goi[,7:11] 

现在,而不是使用for循环和处理每个回报我们计算它让我们计算一切,将每个测试的完整对象存储在list之内,并保留抓住我们想要的任何东西的机会之后所有的测试。

results <- lapply(seq_len(nrow(goi)), 
        function(i) t.test(control[i,], stress[i,])) 
length(results) 
# [1] 92 

results每个元素是从t.test单个调用的返回值。

results[[1]] 
# Welch Two Sample t-test 
# data: control[i, ] and stress[i, ] 
# t = 1.1034, df = 6.2218, p-value = 0.3107 
# alternative hypothesis: true difference in means is not equal to 0 
# 95 percent confidence interval: 
# -0.5386851 1.4374851 
# sample estimates: 
# mean of x mean of y 
# 2.5874 2.1380 

您可以访问测试结果的任何组件:

names(results[[1]]) 
# [1] "statistic" "parameter" "p.value"  "conf.int" "estimate" 
# [6] "null.value" "alternative" "method"  "data.name" 
head(sapply(results, `[[`, "p.value")) 
# [1] 0.3107098 0.3083295 0.2626753 0.6245368 0.4406157 0.2800657 
head(t(sapply(results, `[[`, "conf.int"))) 
#    [,1]  [,2] 
# [1,] -0.5386851 1.4374851 
# [2,] -0.7513650 2.0681650 
# [3,] -1.5018657 4.4862657 
# [4,] -1.1880098 1.8504098 
# [5,] -23.5402499 48.8678499 
# [6,] -2.2762668 0.8250668 

NB,R的许多细微之处之一,是一个事实,即*apply家人将返回有些人可能会认为是从什么换位矩阵它应该是是。因为这样,返回matrix的呼叫将受益于夹在t(...)中。 (这是按“我相信”按钮,继续前进的好机会。)

你可以像所有这些结果合并成一个单一data.frame

namefunc <- function(x, nameroot) { dimnames(x) <- list(NULL, paste0(nameroot, seq_len(ncol(x)))) ; x ; } 

(这是一个小辅助功能,使阅读略有下更容易。这是一个非常呐ï VE命名约定,只用来保持列独特的现在。)

test_results <- cbind.data.frame(
    statistic = sapply(results, `[[`, "statistic"), 
    p.value = sapply(results, `[[`, "p.value"), 
    parameter = sapply(results, `[[`, "parameter"), 
    namefunc(t(sapply(results, `[[`, "conf.int")), "conf"), 
    namefunc(t(sapply(results, `[[`, "estimate")), "est") 
) 
head(test_results) 
# statistic p.value parameter  conf1  conf2  est1  est2 
# 1 1.1033554 0.3107098 6.221806 -0.5386851 1.4374851 2.5874 2.1380 
# 2 1.0948456 0.3083295 7.312678 -0.7513650 2.0681650 3.2784 2.6200 
# 3 1.2480711 0.2626753 5.480699 -1.5018657 4.4862657 4.2546 2.7624 
# 4 0.5107431 0.6245368 7.337202 -1.1880098 1.8504098 2.1594 1.8282 
# 5 0.8134064 0.4406157 7.633546 -23.5402499 48.8678499 113.4766 100.8128 
# 6 -1.2161356 0.2800657 4.824393 -2.2762668 0.8250668 1.2900 2.0156 

肯定是有一间屋子用包从作为RobertMc建议的。为此,我建议dplyrtidyr,虽然也许broom也有用。

+0

我知道它与你最初的尝试有一点分歧,但这是否回答你的问题?如果是这样,请“接受”答案(答案左侧的复选标记)。 – r2evans

+0

它的确如此。谢谢! –