2012-07-23 56 views
0

我的脚本需要一个输入文件,文件如下所示。从fprintf获取虚假数据

 
chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18 
chr11 nonsense_mediated_decay exon 3036949 3037109 . - . gene_id "ENSMUSG00000023764"; transcript_id "ENSMUST00000126746"; exon_number "11"; gene_name "Sfi1"; gene_biotype "protein_coding"; transcript_name "Sfi1-015"; 
chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18 
chr11 nonsense_mediated_decay exon 3036949 3037109 . - . gene_id "ENSMUSG00000023764"; transcript_id "ENSMUST00000132893"; exon_number "17"; gene_name "Sfi1"; gene_biotype "protein_coding"; transcript_name "Sfi1-003"; 
chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18 
chr11 protein_coding exon 3036949 3037109 . - . gene_id "ENSMUSG00000023764"; transcript_id "ENSMUST00000066391"; exon_number "22"; gene_name "Sfi1"; gene_biotype "protein_coding"; transcript_name "Sfi1-001"; 
chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18 
chr11 protein_coding exon 3036949 3037109 . - . gene_id "ENSMUSG00000023764"; transcript_id "ENSMUST00000081318"; exon_number "23"; gene_name "Sfi1"; gene_biotype "protein_coding"; transcript_name "Sfi1-017"; 
chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18 
chr11 protein_coding exon 3036949 3037109 . - . gene_id "ENSMUSG00000023764"; transcript_id "ENSMUST00000093407"; exon_number "16"; gene_name "Sfi1"; gene_biotype "protein_coding"; transcript_name "Sfi1-202"; 
chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18 
chr11 protein_coding exon 3036949 3037109 . - . gene_id "ENSMUSG00000023764"; transcript_id "ENSMUST00000101649"; exon_number "22"; gene_name "Sfi1"; gene_biotype "protein_coding"; transcript_name "Sfi1-203"; 

chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18 
chr11 protein_coding exon 3036949 3037109 . - . gene_id "ENSMUSG00000023764"; transcript_id "ENSMUST00000101655"; exon_number "22"; gene_name "Sfi1"; gene_biotype "protein_coding"; transcript_name "Sfi1-020"; 
chr11 3037016 chr11_3037016 8 39 6 44 4 24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 11 18 
chr11 protein_coding exon 3036949 3037109 . - . gene_id "ENSMUSG00000023764"; transcript_id "ENSMUST00000140846"; exon_number "20"; gene_name "Sfi1"; gene_biotype "protein_coding"; transcript_name "Sfi1-016"; 

这里是我的脚本:

#include <stdio.h> 
#include <string.h> 
int main(void) 
{ 
    static const char filename[] = "input_file.txt"; 
    FILE *file3 = fopen("thirdstep2a.txt","w"); 
    FILE *file = fopen(filename, "r"); 

    if (file != NULL) 
    { 

     char line[BUFSIZ],line2[BUFSIZ] ; 
     char one[20], three[22], four[20], a1[20],a2[20],a3[20],a4[20],a5[20],a6[20],a7[20],a8[20], a9[20], a10[20],a11[20],a12[20],a13[20],a14[20],a15[20],a16[20],a17[20],a18[20],a19[20],a20[20],a21[20],a22[20],a23[20],a24[20],a25[20],a26[20],a27[20],a28[20],a29[20],a30[20],a31[20],a32[20],a33[20],a34[20]; 

     char one2[20],three2[20],four2[26], six2[26], seven2[20], eight2[11], nine2[20]; 

    fgets(line, sizeof line, file); 
    fgets(line2, sizeof line2, file); 
    while(1) 
    { 
     sscanf(line, "%19s %21s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s %19s", one, three, four, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32, a33, a34); 
     sscanf(line2, "%*s %*s %10s %19s %19s %*s %*s %*s %*s %25s %*s %25s %*s %19s %*s %19s", eight2, one2, three2, four2, six2, seven2, nine2); 

     fprintf(file3,"%s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s \t %s\t %s\t %s\t %s\t %s\t %s\t", one, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, a32, a33, a34); 

     fprintf(file3,"%s\t %s\t %s\t %s\t %s\t\n", four2, six2, seven2, one2, three2); 
     if(fgets(line, sizeof line, file) == NULL) 
      break; 

     if(fgets(line2, sizeof line, file) == NULL) 
      break; 
     } 
    } 
    else 
    { 
     perror(filename); 
    } 
    return 0; 
} 

我的输出是这样的:

 
chr11 8 39 6 44 "Sfi1";  24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 "ENSMUSG00000023764"; "ENSMUST00000126746"; "11"; 3036949  3037109  
chr11 8 39 6 44 "Sfi1";  24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 "ENSMUSG00000023764"; "ENSMUST00000132893"; "17"; 3036949  3037109  
chr11 8 39 6 44 "Sfi1";  24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 "ENSMUSG00000023764"; "ENSMUST00000066391"; "22"; 3036949  3037109  
chr11 8 39 6 44 "Sfi1";  24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 "ENSMUSG00000023764"; "ENSMUST00000081318"; "23"; 3036949  3037109  
chr11 8 39 6 44 "Sfi1";  24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 "ENSMUSG00000023764"; "ENSMUST00000093407"; "16"; 3036949  3037109  
chr11 8 39 6 44 "Sfi1";  24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 "ENSMUSG00000023764"; "ENSMUST00000101649"; "22"; 3036949  3037109  
chr11 8 39 6 44 "Sfi1";  24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 "ENSMUSG00000023764"; "ENSMUST00000101655"; "22"; 3036949  3037109  
chr11 8 39 6 44 "Sfi1";  24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 "ENSMUSG00000023764"; "ENSMUST00000140846"; "20"; 3036949  3037109  
chr11 8 39 6 44 "Sfi1";  24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 "ENSMUSG00000023764"; "ENSMUST00000153425"; "21"; 3036949  3037109  
chr11 8 39 6 44 "Sfi1";  24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 "ENSMUSG00000023764"; "ENSMUST00000137633"; "3"; 3036949  3037109  
chr11 8 39 6 44 "Sfi1";  24 7 22 6 31 7 44 8 39 13 41 10 37 9 23 18 32 8 37 2 9 16 33 9 29 4 7 5 30 "ENSMUSG00000023764"; "ENSMUST00000138126"; 

正如你可以看到我的输出看起来不正确的。我从一个字符串中得到一些错误的数据。我无法弄清楚错误。

+2

好吧,关于格式化问题:首先,**不要**对每行代码使用反引号。相反,复制粘贴代码,突出显示所有代码,然后按代码('{}')按钮。你可以对其他块做同样的事情,但是我选择了'pre',因为它没有语法高亮。 – chris 2012-07-23 03:57:39

+4

这些是***变量的可怕***名称。 – dreamlax 2012-07-23 03:58:52

+1

我尽我所能修复了代码。但克里斯是对的,在这里学习格式化工具,并且不要使用反引号 – abelenky 2012-07-23 03:58:55

回答

3

当代码被编译(在文件bio.c),编译器说:

$ gcc -O3 -g -Wall -Wextra -std=c99 bio.c -o bio 
bio.c: In function ‘main’: 
bio.c:24: warning: too few arguments for format 
bio.c:24: warning: too few arguments for format 
bio.c:17: warning: unused variable ‘two2’ 
bio.c:15: warning: unused variable ‘five’ 
bio.c:9: warning: unused variable ‘i’ 
$ 

我也懒得来更精确地是哪里的问题制定(第24行是sscanf()line2 ),但警告表明代码有问题。

1

读你编译器输出。 GCC给出这个

../../../vmc/_tests/chom.c:23:10: warning: too few arguments for format 
../../../vmc/_tests/chom.c:23:10: warning: too few arguments for format 
../../../vmc/_tests/chom.c:16:22: warning: unused variable ‘two2’ 
../../../vmc/_tests/chom.c:14:32: warning: unused variable ‘five’ 
../../../vmc/_tests/chom.c:8:8: warning: unused variable ‘i’ 

你想特别担心前两个。

+0

我刚刚清除了所有的警告。 – chom 2012-07-23 04:45:56

+1

现在可以工作吗?如果不是,问题仍然是一样的,还是改变了? – Mawg 2012-07-23 06:10:48