2015-07-19 57 views
0

我对package“seqinr”的read.fasta函数有问题。当我用乐器使用它时,它不会创建所需的矢量。如何修复R中的寡核苷酸频率误差

此外,当我使用手动构建的矢量的函数计数时,结果为零表格。

这是我的代码:

library("seqinr") 
library(MASS) 

#GETTING THE FILES AFTER FRAGMENTS OF 500 
files <- list.files(path="/Users/CamilaMV/Desktop/TESIS/",  pattern=".fna500mer..split", full.names=T, recursive=FALSE) 

files 

# SOLO ESTA TOMANDO EL PRIMER ARCHIVO 

#READING THE DIFFERENT FASTA FILES 
ncrna <- lapply(files, function(x) { read.fasta(x,seqonly = T) }) 


seqs<-list() 
for(i in seq_along(ncrna)) 
{ 
    seqs[i]<-list(ncrna[[i]]) 
} 

len1<-length(seqs[[1]]) 

frags1<-list() 
for(j in 1:len1) 
{ 
    frags1[j]<-list(seqs[[1]][[j]]) 
} 

frags1 

#COUNTING TRETRANUCLEOTIDES FOR EACH FRAGMENT 
tetra_frag1<-list() 

# seq_along(frags1) 

#frags1[[1]] 

for(l in seq_along(frags1)) 
{ 
    #tetra[i]<-list(count(ncra[[i]],4)) 
    tetra_frag1[l]<-oligonucleotideFrequency(frags1[[l]],4) 
} 

当我以前做过,计数功能工作,但它不能正常工作了。

于是,我决定用oligonucletideFrequency功能,但它给了我下面的错误:

错误(函数(类,FDEF,mtable): 无法找到函数“oligonucleotideFrequency”签字继承的方法“‘字符’”

但是,当我使用is.character(frags1 [[1]])作为测试,结果为真。

我想获得具有oligonucletide频率以执行一个矩阵PCA。

我想要一个最终表格,其中列是四核苷酸的256个组合,行是这些片段的名称(例如, frag1,frag2,...)类似如下:

AAAA AAAC ... F1 3 5 F2 4 6 F3 5 7 ...

我将apreciate帮助。

+1

尝试'的ncRNA < - lapply(文件,函数(X){ read.fasta(X,seqonly = T) })' 。该函数需要返回值。当然,你应该分配结果列表。 – Roland

+0

这工作获得ncrna,但计数功能它不工作。我有以下代码:SEQ1 <-ncrna [1] frag1_seq1 <-seq1 [[1]] [[1]] tetra_se1_frag1 <-count(frag1_seq1,4) tetra_se1_frag1 – user3275981

回答

1

我可以解决第一个问题和其他问题。最后,我有一个带有4个函数的R脚本,它们生成一个RGB矢量列表。

# GETTING LIBRARIES 

library("seqinr") 
library("ade4") 
library("Biostrings") 


## funcion 1 

Processing_fragments<-function(PATH_FILES){ 

    #GETTING THE FILES AFTER FRAGMENTS OF 500 
    files <- list.files(path=PATH_FILES, pattern=".fna500mer", full.names=T, recursive=FALSE) 

    #GETTING THE FILES READING AS FASTA 
    ncrna <- lapply(files, function(x) { read.fasta(x,seqonly = T) }) 


    fragmentsGeno1<-list() 
    for(k in seq_along(ncrna[1])) 
    { 
    for(l in 1:10484) 
    { 
     fragmentsGeno1[l]<-ncrna[[k]][[l]] 

    } 
    } 

    fragmentsGeno2<-list() 
    for(k in seq_along(ncrna[2])) 
    { 
    for(l in 1:length(ncrna[[2]])) 
    { 
     fragmentsGeno2[l]<-ncrna[[k]][[l]] 

    } 
    } 

    #GETTING ALL FRAGMENTS 

    allFragments<-c(fragmentsGeno1,fragmentsGeno2) 

    return(allFragments) 

} 


## funcion 2 

Getting_frequency_account<-function(allFragments,kmer){ 

    #CONVERTING LOS FRAGMENTOS DE CADA FILE A OBJETOS DE DNAString 

    DNA_String_Set_list_ALL<-list() 

    for(i in seq_along(allFragments)) 
    { 
    DNA_String_Set_list_ALL[i]<-DNAStringSet(allFragments[[i]]) 
    } 

    # counting oligonucleotide 
    countGenome1_Tetra<-lapply(DNA_String_Set_list_ALL,function(x) {oligonucleotideFrequency((x),kmer, as.prob = T) }) 

    # MATRIX FOR THE PCA 

    #names columns 
    col_names<-dimnames(countGenome1_Tetra[[1]]) 
    col_names<-col_names[[2]] 

    #names rows 
    frag_names<-c(paste("frag",c(1:length(allFragments)),sep="")) 

    #matrix for PCA 
    matrix_PCA<-matrix(unlist(countGenome1_Tetra),nrow = length(allFragments),ncol=256,byrow = T,dimnames=list(frag_names,col_names)) 

    return(matrix_PCA) 

} 


# View(matrix_PCA) 


## funcion 3 

Getting_first_three_components<-function(matrix_PCA){ 

    ######## PCA with prcomp######### 

    prcomp_All<-prcomp(matrix_PCA) 

    #obtaing the sum of varianza of the first three components 

    Var<-prcomp_All$sdev^2/sum(prcomp_All$sdev^2) 

    Varianza_3_first_comp<-Var[1:3] 

    Varianza_3_first_comp_Porcent<-Varianza_3_first_comp*100 

    Suma_total<-sum(Varianza_3_first_comp_Porcent) 

    ## obteniendo eigen of first three components 

    loadings_prcomp<-prcomp_All$x 

    #dim(loadings_prcomp) 

    First_three_components<-loadings_prcomp[,c(1,2,3)] 

    return(First_three_components) 

} 

#funcion 4 

Generating_hex_color_codes<-function(First_three_components){ 

    # getting min and max 
    min<-min(First_three_components) 
    max<-max(First_three_components) 

    # getting ranges 
    range_2_color<-c(min,max) 
    range_RGB_color<-c(0,1) 

    #making linear regression 
    lm.out<-lm(range_RGB_color~range_2_color) 

    #getting slope and intercept 
    slope<-lm.out$coefficients[2] 
    intercept<-lm.out$coefficients[1] 

    #normalizing pca results to RGB 
    new_Matriz<-(First_three_components*slope)+intercept 

    new_Matriz<-as.matrix(new_Matriz) 

    #using funcion rgb to generate matrix of hex color code 

    #hex_Color_Matriz<-t(mapply(rgb, split(new_Matriz[,1], new_Matriz[,2],new_Matriz[,3],maxColorValue=255))) 

    hex_Color_Vector<-vector() 

    # list de cada r,g,b de cada fragmento 

    rgb_List_Each_Fragment<-list() 

    row_Final<-length(new_Matriz[,1]) 

    columns_Final<-length(new_Matriz[1,]) 

    for(i in 1:row_Final){ 

    for(j in 1:columns_Final){ 

     red<-new_Matriz[i,1] 
     green<-new_Matriz[i,2] 
     blue<-new_Matriz[i,3] 

     hex_Color_Vector[i]<-rgb(red,green,blue,maxColorValue = 1) 

     rgb_List_Each_Fragment[i]<-list(c(red,green,blue)) 

    } 

    } 

    return(rgb_List_Each_Fragment) 

} 

# Calling all the funcionts in order 

allFragments<-Processing_fragments("/Users/CamilaMV/Desktop/TESIS") 

matrix_PCA<-Getting_frequency_account(allFragments,4) 

First_three_components<-Getting_first_three_components(matrix_PCA) 

Hex_color_list<-Generating_hex_color_codes(First_three_components) 
+0

我将会理解,如果有人能告诉我如何让我的脚本更好:) – user3275981

+0

此代码现在位于http://kamynz.github.io/ – user3275981