2017-04-22 49 views
2

考虑df1删除数据帧是以前所有的行值的行

df <- data.frame(time = c(1,2,3,4,5,6,7,8,9,10), marker = c(NA,NA,NA,"stop",NA,NA,NA,"start",NA,NA), behaviour = c("Rest","Rest","Rest","Rest","Awake","Awake","Awake","Awake","Awake","Rest")) 

    time marker behaviour 
1  1 <NA>  Rest 
2  2 <NA>  Rest 
3  3 <NA>  Rest 
4  4 stop  Rest 
5  5 <NA>  Awake 
6  6 <NA>  Awake 
7  7 <NA>  Awake 
8  8 start  Awake 
9  9 <NA>  Awake 
10 10 <NA>  Rest 

我想子集基于markers列中的数据并没有包括作为元素“停”之间的数据和“启动,以便df看起来是这样的:

time marker behaviour 
    1 <NA>  Rest 
    2 <NA>  Rest 
    3 <NA>  Rest 
    4 stop  Rest 
    8 start  Awake 
    9 <NA>  Awake 
    10 <NA>  Rest 
+2

尝试'I1 < - 与(DF,其中(标记%以%C( “停止”, “开始”))); DF [ - ((I1 [ 1] +1):(i1 [2] -1)),]' – akrun

+1

是的,那是一种享受,谢谢你akrun! – Bonono

回答

1

我们可以用一个数字索引子集行

i1 <- with(df, which(marker %in% c("stop", "start"))) 
df[-((i1[1]+1):(i1[2]-1)),] 

如果有多个'开始”,‘停止’,那么,我们可以做

grp <- with(df, c(0, head(cumsum(marker == "stop" & !is.na(marker)),-1))) 
df[with(df, ave(marker == "start" & !is.na(marker), 
      grp, FUN = function(x) !any(x)|cumsum(x)>0)),] 
# time marker behaviour 
#1  1 <NA>  Rest 
#2  2 <NA>  Rest 
#3  3 <NA>  Rest 
#4  4 stop  Rest 
#8  8 start  Awake 
#9  9 <NA>  Awake 
#10 10 <NA>  Rest 
1
df <- data.frame(time = c(1,2,3,4,5,6,7,8,9,10), marker = c("NA","NA","NA","stop","NA","NA","NA","start","NA","NA"), behaviour = c("Rest","Rest","Rest","Rest","Awake","Awake","Awake","Awake","Awake","Rest")) 

df1 <- as.integer(row.names(df[df$marker=="stop",]))+1 
df2 <- as.integer(row.names(df[df$marker=="start",]))-1 
ans <- df[-(df1:df2),] 
1

一个cumsum溶液(我用data.table以及但是你没有)这可以推广到多个stop/start值将是:

library(data.table) 
dt <- as.data.table(df) 

dt[, drop := list(cumsum(marker=="stop" & !is.na(marker)) - 
        cumsum(marker=="start" & !is.na(marker)))][drop==0 | marker == "stop"] 

    # time marker behaviour drop 
    # 1: 1  NA  Rest 0 
    # 2: 2  NA  Rest 0 
    # 3: 3  NA  Rest 0 
    # 4: 4 stop  Rest 1 
    # 5: 8 start  Awake 0 
    # 6: 9  NA  Awake 0 
    # 7: 10  NA  Rest 0