2016-04-24 248 views
0

我有这个数据称为mydf我有混合样本比较的效率。对于sampleA和sampleB的混合,有七个不同的效率列。我想看看这七种效率的情节,看看哪些效率水平与前几列相比会大幅下降。如何绘制R中曲线的渐近线?

mydf<-structure(list(sample_A = structure(c(1L, 2L, 2L, 2L, 3L, 4L), .Label = c("2568", 
"2669", "2670", "2671", "2946", "LPH-001-10_AK1", "LPH-001-12_AK2", 
"LPH-001-9"), class = "factor"), sample_B = structure(c(1L, 2L, 
3L, 4L, 3L, 4L), .Label = c("2568", "2669", "2670", "2671", "2946", 
"LPH-001-10_AK1", "LPH-001-12_AK2", "LPH-001-9"), class = "factor"), 
    efficiency = c(1.02, 0.964, 0.415, 0.422, 0.98, 0.986), efficiency2 = c(1, 
    0.944, 0.395, 0.402, 0.96, 0.966), efficiency3 = c(0.9, 0.844, 
    0.295, 0.302, 0.86, 0.866), efficiency4 = c(0.32, 0.264, 
    -0.285, -0.278, 0.28, 0.286), efficiency5 = c(0.02, -0.0360000000000001, 
    -0.585, -0.578, -0.0200000000000001, -0.0140000000000001), 
    efficiency6 = c(0.12, 0.0639999999999999, -0.485, -0.478, 
    0.08, 0.086), efficiency7 = c(0.02, -0.036, -0.585, -0.578, 
    -0.02, -0.014)), .Names = c("sample_A", "sample_B", "efficiency", 
"efficiency2", "efficiency3", "efficiency4", "efficiency5", "efficiency6", 
"efficiency7"), row.names = c(NA, 6L), class = "data.frame") 
+0

没有'渐近线(mydf)'工作? – rawr

+0

@rawr不,它不。 – MAPK

回答

4

这里是绘制你的数据的一种方法:

mydf <- structure(list(sample_A=structure(c(1L,2L,2L,2L,3L,4L),.Label=c('2568','2669','2670','2671','2946','LPH-001-10_AK1','LPH-001-12_AK2','LPH-001-9'),class='factor'),sample_B=structure(c(1L,2L,3L,4L,3L,4L),.Label=c('2568','2669','2670','2671','2946','LPH-001-10_AK1','LPH-001-12_AK2','LPH-001-9'),class='factor'),efficiency=c(1.02,0.964,0.415,0.422,0.98,0.986),efficiency2=c(1,0.944,0.395,0.402,0.96,0.966),efficiency3=c(0.9,0.844,0.295,0.302,0.86,0.866),efficiency4=c(0.32,0.264,-0.285,-0.278,0.28,0.286),efficiency5=c(0.02,-0.0360000000000001,-0.585,-0.578,-0.0200000000000001,-0.0140000000000001),efficiency6=c(0.12,0.0639999999999999,-0.485,-0.478,0.08,0.086),efficiency7=c(0.02,-0.036,-0.585,-0.578,-0.02,-0.014)),.Names=c('sample_A','sample_B','efficiency','efficiency2','efficiency3','efficiency4','efficiency5','efficiency6','efficiency7'),row.names=c(NA,6L),class='data.frame'); 
effCis <- grep('^efficiency',names(mydf)); 
xlim <- c(1,length(effCis)); 
ylim <- range(mydf[,effCis],na.rm=T); 
ylim[1L] <- floor(ylim[1L]/0.1)*0.1; 
ylim[2L] <- ceiling(ylim[2L]/0.1)*0.1; 
xticks <- seq_along(effCis); 
yticks <- seq(ylim[1L],ylim[2L],0.1); 
plot(NA,xlim=xlim,ylim=ylim,xlab='measurement',ylab='efficiency',xaxs='i',yaxs='i',axes=F); 
abline(v=xticks,col='lightgrey'); 
abline(h=yticks,col='lightgrey'); 
abline(h=0,lwd=2); 
axis(1L,xticks,xticks,font=2L,cex.axis=0.7); 
axis(2L,yticks,sprintf('%.1f',yticks),las=1L,font=2L,cex.axis=0.7); 
hybrid.col <- data.frame(hybrid=seq_len(nrow(mydf)),col=c('red','green','blue','gold','cyan','magenta'),stringsAsFactors=F); 
splineN <- 200L; 
for (ri in seq_len(nrow(hybrid.col))) { 
    hybrid <- hybrid.col$hybrid[ri]; 
    col <- hybrid.col$col[ri]; 
    x <- xticks; 
    y <- c(as.matrix(mydf[hybrid,effCis])); 
    points(x,y,pch=16L,col=col,xpd=NA); 
    with(spline(x,y,splineN),{ 
     lines(x,y,col=col,lwd=2,xpd=NA); 
     localwin <- which(x>2 & x<3); 
     tp <- which.min(abs(diff(y[localwin]))); 
     if (length(tp)>0L) points(x[localwin[tp]],y[localwin[tp]],col=col,pch=4L); 
     localwin <- which(x>2 & x<5); 
     tp <- which.min(diff(y[localwin])); 
     if (length(tp)>0L) { 
      m <- diff(y[localwin[seq(tp,len=2L)]])/diff(x[localwin[seq(tp,len=2L)]]); 
      if (is.finite(m)) abline(y[localwin[tp]]-m*x[localwin[tp]],m,col=col,lty=2L); 
     }; 
    }); 
}; 
legend(5.5,0.95,paste0(mydf$sample_A,'/',mydf$sample_B),fill=hybrid.col$col,cex=0.7,title='hybrid'); 

plot


我肯定不是100%,你的渐近线是什么意思。我最初以为也许你想要曲线的局部最大值在它们开始下降之前,这就是为什么我用点(符号X,即pch=4L)标记局部最大值。但后来我意识到,也许你的意思是沿着下降的切线,所以我添加了与最陡的斜坡点相切的线。

这是渐近线的定义:

a straight line approached by a given curve as one of the variables in the equation of the curve approaches infinity.

我不认为这里这是适用的;绘制这些数据并不涉及任何事物的无限性。我想你想要的局部最大值或切线。

+0

@bgoldst非常感谢,我不会期望这样一个好的答案。 – MAPK

+0

我遇到了麻烦与我的数据框。我有7346行,有一些单元有'NAs'。我用'0's替换了NAs,所以应该解决这个问题,但是我得到这个错误:'xy.coords(x,y)中的错误:'x'和'y'长度不同'。任何建议?谢谢 – MAPK

+0

这是mydata,请指出它有什么问题:https://www.dropbox.com/s/qwcb3nxww0371c0/test.csv?dl=0 – MAPK