2016-11-15 22 views
1

我有一个完整的文件,文件夹中使用这些名称如何检查R中缺失的文件?

ts.01094000.crest 
ts.01100600.crest 

我读数据框中DFX,并设法找到与文件名对应的文件并执行某些操作。如果它不存在,我想跳过它并捕获它的名字。如果存在,我想转到else语句。但我收到此错误

Error in if (!file.exists(grep(dfx$gauge[i], allCrestFiles, value = T))) { : 
    argument is of length zero 

代码:

allFiles <- list.files(path='F:/files/', full.names = TRUE) 

a<-0 

for(i in 1:nrow(dfx)) 
{ 
    if(!file.exists(grep(dfx$gauge[i], allFiles, value = T))) 
    { 
    a[i]<- grep(dfx$gauge[i], allFiles, value = T) 
    next 
    } 

    else 
    { 
    pFile <- grep(dfx$gauge[i], allFiles, value = T) 
    pDat <- fread(pFile) 
    } 
} 

DFX

structure(list(gauge = c(1094000L, 1100600L, 1100600L, 1100600L, 
1100600L, 1100600L, 1100600L, 1100600L, 1100600L, 1100600L), 
    eventID = c(2L, 6L, 10L, 43L, 52L, 53L, 60L, 62L, 70L, 71L 
    ), start = c("5/14/2006 21:00", "10/21/2011 6:15", "8/29/2011 13:00", 
    "5/14/2002 19:00", "9/28/2008 9:00", "7/24/2008 23:30", "5/26/2005 9:15", 
    "9/19/2004 15:30", "5/31/2002 5:30", "6/24/2003 1:45"), end = c("5/16/2006 17:45", 
    "10/22/2011 3:45", "8/30/2011 18:45", "5/16/2002 0:15", "9/29/2008 22:00", 
    "7/27/2008 13:00", "5/28/2005 6:00", "9/20/2004 11:45", "5/31/2002 16:00", 
    "6/24/2003 21:15"), peakt = c("5/15/2006 14:00", "10/21/2011 20:45", 
    "8/29/2011 22:45", "5/15/2002 7:00", "9/28/2008 22:15", "7/25/2008 21:30", 
    "5/27/2005 4:15", "9/19/2004 23:15", "5/31/2002 10:15", "6/24/2003 9:30" 
    ), fd = c(44.75, 21.5, 29.75, 29.25, 37, 61.5, 44.75, 20.25, 
    10.5, 19.5), tp = c(17, 14.5, 9.75, 12, 13.25, 22, 19, 7.75, 
    4.75, 7.75), rt = c(27.75, 7, 20, 17.25, 23.75, 39.5, 25.75, 
    12.5, 5.75, 11.75), startTime = structure(c(1147640400, 1319177700, 
    1314622800, 1021402800, 1222592400, 1216942200, 1117098900, 
    1095607800, 1022823000, 1056419100), tzone = "UTC", class = c("POSIXct", 
    "POSIXt")), peakTime = structure(c(1147701600, 1319229900, 
    1314657900, 1021446000, 1222640100, 1217021400, 1117167300, 
    1095635700, 1022840100, 1056447000), tzone = "UTC", class = c("POSIXct", 
    "POSIXt")), endTime = structure(c(1147801500, 1319255100, 
    1314729900, 1021508100, 1222725600, 1217163600, 1117260000, 
    1095680700, 1022860800, 1056489300), tzone = "UTC", class = c("POSIXct", 
    "POSIXt"))), row.names = c(NA, -10L), .Names = c("gauge", 
"eventID", "start", "end", "peakt", "fd", "tp", "rt", "startTime", 
"peakTime", "endTime"), class = "data.frame") 

工作液

if((length(v<-grep(dfx$gauge[i], allFiles, value = T))>0 && !file.exists(v)) 
    { 
     a[i]<- dfx$gauge[i] 
     next 
    } 

可能的解决方案,工程

allFiles <- list.files(path='F:/files/', full.names = TRUE) 

a<-0 

for(i in 1:nrow(dfx)) 
{ 
    fileNamex <- grep(dfx$gauge[i], allFiles, value = T) 
    if(identical(fileNamex, character(0)) 
    { 
    a[i]<- dfx$gauge[i] 
    next 
    } 

    else 
    { 
    pDat <- fread(fileNamex) 
    } 
} 

ERROR MESSAGE:“输入”必须是包含一个文件名,一个命令,全路径的文件的单独的字符串,一个URL起始的“http [秒]://','ftp [s]://'或'file://',或输入数据本身

为什么fread会被触发?我在做什么错控制?

+0

list.files(PATH =“”,模式=“.crest”) - 这将只返回存在该文件夹 –

+1

您需要首先确保'grep'已经匹配的东西在这些文件的名称在调用'file.exists'之前,例如'grep(dfx $ gauge [i],allCrestFiles,value = TRUE))> 0'必须为真。 – nrussell

+0

@ joel.wilson不知道为什么我需要这样做。我只有那些.crest文件在该文件夹中,没有别的。你能解释一下吗? – maximusdooku

回答

0

乐于学习更好的方法来做到这一点。但我认为这是有效的。

allFiles <- list.files(path='F:/files/', full.names = TRUE) 

a<-0 

for(i in 1:nrow(dfx)) 
{ 
    fileNamex <- grep(dfx$gauge[i], allFiles, value = T) 
    if(identical(fileNamex, character(0)) 
    { 
    a[i]<- dfx$gauge[i] 
    next 
    } 

    else 
    { 
    pDat <- fread(fileNamex) 
    } 
}