2017-09-06 89 views
1

我有一个在多个子目录中具有相同名称CSV的目录。我试图将类似命名的CSV合并到1个数据框中,并将子目录名称添加为列。在下面的示例中,我将有一个名为'data'的数据框和一个名为'name'的数据框,其中包含来自Run 1和Run 2的观察结果,并将名为Run的列添加到每个数据框。如果解决方案不知道CSV的名称,这将是理想的解决方案,但任何解决方案都将非常有帮助。基于文件名的子目录中的rbind文件

在这个问题上的人有同样的问题,但我需要的R解决方案:Combining files with same name in r and writing them into different files in R

dir <- getwd() 

subDir <- 'temp' 

dir.create(subDir) 

setwd(file.path(dir, subDir)) 

dir.create('Run1') 
dir.create('Run2') 

employeeID <- c('123','456','789') 
salary <- c(21000, 23400, 26800) 
startdate <- as.Date(c('2010-11-1','2008-3-25','2007-3-14')) 

employeeID <- c('123','456','789') 
first <- c('John','Jane','Tom') 
last <- c('Doe','Smith','Franks') 

data <- data.frame(employeeID,salary,startdate) 
name <- data.frame(employeeID,first,last) 

write.csv(data, file = "Run1/data.csv",row.names=FALSE, na="") 
write.csv(name, file = "Run1/name.csv",row.names=FALSE, na="") 

employeeID <- c('465','798','132') 
salary <- c(100000, 500000, 300000) 
startdate <- as.Date(c('2000-11-1','2001-3-25','2003-3-14')) 

employeeID <- c('465','798','132') 
first <- c('Jay','Susan','Tina') 
last <- c('Jones','Smith','Thompson') 

data <- data.frame(employeeID,salary,startdate) 
name <- data.frame(employeeID,first,last) 

write.csv(data, file = "Run2/data.csv",row.names=FALSE, na="") 
write.csv(name, file = "Run2/name.csv",row.names=FALSE, na="") 

# list files in all directories to read 
files <- list.files(recursive = TRUE) 

# Read csvs into a list 
list <- lapply(files, read.csv) 

# Name each dataframe with the run and filename 
names <- sub("\\..*$", "", files) 
names(list) <- sub("\\..*$", "", files) 

# And add .id = 'run' so that the run number is one of the columns 
# This would work if all of the files were the same, but I don't know how to subset the dataframes based on name. 
all_dat <- list %>% 
bind_rows(.id = 'run') 

回答

1
files_to_df <- function(pattern){ 

    # pattern <- "data" 
    filenames <- list.files(recursive = TRUE, pattern = pattern) 

    df_list <- lapply(filenames, read.csv, header = TRUE) 

    # Name each dataframe with the run and filename 
    names(df_list) <- str_sub(filenames, 1, 4) 

    # Create combined dataframe 
    df <- df_list %>% 
    bind_rows(.id = 'run') 

    # Assign dataframe to the name of the pattern 
    assign(pattern, df) 

    # Return the dataframe 
    return(data.frame(df)) 
} 

name_df <- files_to_df('name') 
相关问题