2015-08-08 131 views
7

我知道在ggplot2可以凸包通过基团作为在R:添加阿尔法袋2D或3D散点图

library(ggplot2) 
library(plyr) 
data(iris) 
df<-iris 
find_hull <- function(df) df[chull(df$Sepal.Length, df$Sepal.Width), ] 
hulls <- ddply(df, "Species", find_hull) 
plot <- ggplot(data = df, aes(x = Sepal.Length, y = Sepal.Width, colour=Species, fill = Species)) + 
    geom_point() + 
    geom_polygon(data = hulls, alpha = 0.5) + 
    labs(x = "Sepal.Length", y = "Sepal.Width") 
plot 

enter image description here

添加到散点图我想知道虽然如何一个可以计算并添加阿尔法包,也就是说,包含所有点至少包含1个阿尔法的最大凸包?在2d(用ggplot2显示)或3d(用rgl显示)。

编辑:我最初的想法是继续“剥离”凸包,因为至少包含给定的百分点的标准将会得到满足,尽管在论文here看来他们使用不同的算法(isodepth,似乎在R包深度中实现,功能isodepthaplpack::plothulls似乎也接近我想要的(虽然它产生一个完整的情节,而不仅仅是轮廓),所以我认为与这些我可能被排序虽然这些功能只适用于2D,我也会对3D扩展感兴趣(以rgl绘制)如果有人有任何指示让我知道!

EDIT2:带功能depth::isodepth我发现了一个2D解决方案(SE尽管我仍在寻找3D解决方案 - 如果有人会碰巧知道如何做到这一点,请告诉我!

回答

3

我们可以通过修改aplpack::plothulls函数接受的参数包含点的比例(在aplpack中设置为50%)。然后我们可以使用这个修改后的函数为ggplot制作一个自定义的geom。

这里的风俗GEOM:

library(ggplot2) 
StatBag <- ggproto("Statbag", Stat, 
        compute_group = function(data, scales, prop = 0.5) { 

        ################################# 
        ################################# 
        # originally from aplpack package, plotting functions removed 
        plothulls_ <- function(x, y, fraction, n.hull = 1, 
              col.hull, lty.hull, lwd.hull, density=0, ...){ 
         # function for data peeling: 
         # x,y : data 
         # fraction.in.inner.hull : max percentage of points within the hull to be drawn 
         # n.hull : number of hulls to be plotted (if there is no fractiion argument) 
         # col.hull, lty.hull, lwd.hull : style of hull line 
         # plotting bits have been removed, BM 160321 
         # pw 130524 
         if(ncol(x) == 2){ y <- x[,2]; x <- x[,1] } 
         n <- length(x) 
         if(!missing(fraction)) { # find special hull 
         n.hull <- 1 
         if(missing(col.hull)) col.hull <- 1 
         if(missing(lty.hull)) lty.hull <- 1 
         if(missing(lwd.hull)) lwd.hull <- 1 
         x.old <- x; y.old <- y 
         idx <- chull(x,y); x.hull <- x[idx]; y.hull <- y[idx] 
         for(i in 1:(length(x)/3)){ 
          x <- x[-idx]; y <- y[-idx] 
          if((length(x)/n) < fraction){ 
          return(cbind(x.hull,y.hull)) 
          } 
          idx <- chull(x,y); x.hull <- x[idx]; y.hull <- y[idx]; 
         } 
         } 
         if(missing(col.hull)) col.hull <- 1:n.hull 
         if(length(col.hull)) col.hull <- rep(col.hull,n.hull) 
         if(missing(lty.hull)) lty.hull <- 1:n.hull 
         if(length(lty.hull)) lty.hull <- rep(lty.hull,n.hull) 
         if(missing(lwd.hull)) lwd.hull <- 1 
         if(length(lwd.hull)) lwd.hull <- rep(lwd.hull,n.hull) 
         result <- NULL 
         for(i in 1:n.hull){ 
         idx <- chull(x,y); x.hull <- x[idx]; y.hull <- y[idx] 
         result <- c(result, list(cbind(x.hull,y.hull))) 
         x <- x[-idx]; y <- y[-idx] 
         if(0 == length(x)) return(result) 
         } 
         result 
        } # end of definition of plothulls 
        ################################# 


        # prepare data to go into function below 
        the_matrix <- matrix(data = c(data$x, data$y), ncol = 2) 

        # get data out of function as df with names 
        setNames(data.frame(plothulls_(the_matrix, fraction = prop)), nm = c("x", "y")) 
        # how can we get the hull and loop vertices passed on also? 
        }, 

        required_aes = c("x", "y") 
) 

#' @inheritParams ggplot2::stat_identity 
#' @param prop Proportion of all the points to be included in the bag (default is 0.5) 
stat_bag <- function(mapping = NULL, data = NULL, geom = "polygon", 
        position = "identity", na.rm = FALSE, show.legend = NA, 
        inherit.aes = TRUE, prop = 0.5, alpha = 0.3, ...) { 
    layer(
    stat = StatBag, data = data, mapping = mapping, geom = geom, 
    position = position, show.legend = show.legend, inherit.aes = inherit.aes, 
    params = list(na.rm = na.rm, prop = prop, alpha = alpha, ...) 
) 
} 


geom_bag <- function(mapping = NULL, data = NULL, 
        stat = "identity", position = "identity", 
        prop = 0.5, 
        alpha = 0.3, 
        ..., 
        na.rm = FALSE, 
        show.legend = NA, 
        inherit.aes = TRUE) { 
    layer(
    data = data, 
    mapping = mapping, 
    stat = StatBag, 
    geom = GeomBag, 
    position = position, 
    show.legend = show.legend, 
    inherit.aes = inherit.aes, 
    params = list(
     na.rm = na.rm, 
     alpha = alpha, 
     prop = prop, 
     ... 
    ) 
) 
} 

#' @rdname ggplot2-ggproto 
#' @format NULL 
#' @usage NULL 
#' @export 
GeomBag <- ggproto("GeomBag", Geom, 
        draw_group = function(data, panel_scales, coord) { 
        n <- nrow(data) 
        if (n == 1) return(zeroGrob()) 

        munched <- coord_munch(coord, data, panel_scales) 
        # Sort by group to make sure that colors, fill, etc. come in same order 
        munched <- munched[order(munched$group), ] 

        # For gpar(), there is one entry per polygon (not one entry per point). 
        # We'll pull the first value from each group, and assume all these values 
        # are the same within each group. 
        first_idx <- !duplicated(munched$group) 
        first_rows <- munched[first_idx, ] 

        ggplot2:::ggname("geom_bag", 
             grid:::polygonGrob(munched$x, munched$y, default.units = "native", 
                 id = munched$group, 
                 gp = grid::gpar(
                  col = first_rows$colour, 
                  fill = alpha(first_rows$fill, first_rows$alpha), 
                  lwd = first_rows$size * .pt, 
                  lty = first_rows$linetype 
                 ) 
            ) 
        ) 


        }, 

        default_aes = aes(colour = "NA", fill = "grey20", size = 0.5, linetype = 1, 
            alpha = NA, prop = 0.5), 

        handle_na = function(data, params) { 
        data 
        }, 

        required_aes = c("x", "y"), 

        draw_key = draw_key_polygon 
) 

这里是它如何被使用的例子:

ggplot(iris, aes(Sepal.Length, Petal.Length, colour = Species, fill = Species)) + 
    geom_point() + 
    stat_bag(prop = 0.95) + # enclose 95% of points 
    stat_bag(prop = 0.5, alpha = 0.5) + # enclose 50% of points 
    stat_bag(prop = 0.05, alpha = 0.9) # enclose 5% of points 

enter image description here

+0

对于非常感谢!这将是非常有用的! –

6

公顷的功能depth::isodepth的帮助下,我想出了以下解决方案 - 在这里我找到一个包含所有点的至少1-α的比例阿尔法袋:

library(mgcv) 
library(depth) 
library(plyr) 
library(ggplot2) 
data(iris) 
df=iris[,c(1,2,5)] 
alph=0.05 
find_bag = function(x,alpha=alph) { 
    n=nrow(x) 
    target=1-alpha 
    propinside=1 
    d=1 
    while (propinside>target) { 
     p=isodepth(x[,1:2],dpth=d,output=T, mustdith=T)[[1]] 
     ninside=sum(in.out(p,as.matrix(x[,1:2],ncol=2))*1) 
     nonedge=sum(sapply(1:nrow(p),function (row) 
      nrow(merge(round(setNames(as.data.frame(p[row,,drop=F]),names(x)[1:2]),5),as.data.frame(x[,1:2])))>0)*1)-3 
     propinside=(ninside+nonedge)/n 
     d=d+1 
    } 
    p=isodepth(x[,1:2],dpth=d-1,output=T, mustdith=T)[[1]] 
    p } 
bags <- ddply(df, "Species", find_bag,alpha=alph) 
names(bags) <- c("Species",names(df)[1:2]) 
plot <- ggplot(data = df, aes(x = Sepal.Length, y = Sepal.Width, colour=Species, fill = Species)) + 
    geom_point() + 
    geom_polygon(data = bags, alpha = 0.5) + 
    labs(x = "Sepal.Length", y = "Sepal.Width") 
plot 

enter image description here

EDIT2: 使用我最初的凸包剥离思想,我也想出了以下解决方案,它现在在2d & 3d中生效;结果是不太一样的是与isodepth算法,但它是非常接近:

# in 2d 
library(plyr) 
library(ggplot2) 
data(iris) 
df=iris[,c(1,2,5)] 
alph=0.05 
find_bag = function(x,alpha=alph) { 
    n=nrow(x) 
    propinside=1 
    target=1-alpha 
    x2=x 
    while (propinside>target) { 
    propinside=nrow(x2)/n 
    hull=chull(x2) 
    x2old=x2 
    x2=x2[-hull,] 
    } 
    x2old[chull(x2old),] } 
bags <- ddply(df, "Species", find_bag, alpha=alph) 
plot <- ggplot(data = df, aes(x = Sepal.Length, y = Sepal.Width, colour=Species, fill = Species)) + 
    geom_point() + 
    geom_polygon(data = bags, alpha = 0.5) + 
    labs(x = "Sepal.Length", y = "Sepal.Width") 
plot 

enter image description here

# in 3d 
library(plyr) 
library(ggplot2) 
data(iris) 
df=iris[,c(1,2,3,5)] 
levels=unique(df[,"Species"]) 
nlevels=length(levels) 
zoom=0.8 
cex=1 
aspectr=c(1,1,0.7) 
pointsalpha=1 
userMatrix=matrix(c(0.80,-0.60,0.022,0,0.23,0.34,0.91,0,-0.55,-0.72,0.41,0,0,0,0,1),ncol=4,byrow=T) 
windowRect=c(0,29,1920,1032) 
cols=c("red","forestgreen","blue") 
alph=0.05 
plotbag = function(x,alpha=alph,grp=1,cols=c("red","forestgreen","blue"),transp=0.2) { 
    propinside=1 
    target=1-alpha 
    x2=x 
    levels=unique(x2[,ncol(x2)]) 
    x2=x2[x2[,ncol(x2)]==levels[[grp]],] 
    n=nrow(x2) 
    while (propinside>target) { 
    propinside=nrow(x2)/n 
    hull=unique(as.vector(convhulln(as.matrix(x2[,1:3]), options = "Tv"))) 
    x2old=x2 
    x2=x2[-hull,] 
    } 
    ids=t(convhulln(as.matrix(x2old[,1:3]), options = "Tv")) 
    rgl.triangles(x2old[ids,1],x2old[ids,2],x2old[ids,3],col=cols[[grp]],alpha=transp,shininess=50) 
} 

open3d(zoom=zoom,userMatrix=userMatrix,windowRect=windowRect,antialias=8) 
for (i in 1:nlevels) { 
    plot3d(x=df[df[,ncol(df)]==levels[[i]],][,1], 
     y=df[df[,ncol(df)]==levels[[i]],][,2], 
     z=df[df[,ncol(df)]==levels[[i]],][,3], 
     type="s", 
     col=cols[[i]], 
     size=cex, 
     lit=TRUE, 
     alpha=pointsalpha,point_antialias=TRUE, 
     line_antialias=TRUE,shininess=50, add=TRUE) 
    plotbag(df,alpha=alph, grp=i, cols=c("red","forestgreen","blue"), transp=0.3) } 
axes3d(color="black",drawfront=T,box=T,alpha=1) 
title3d(color="black",xlab=names(df)[[1]],ylab=names(df)[[2]],zlab=names(df)[[3]],alpha=1) 
aspect3d(aspectr) 

enter image description here