2014-09-30 82 views
0

我想将电子邮件的主题内容分成一个文本文件,其他头部字段放入下一个文本文件,最后将邮件正文放入另一个文本文件中。我的代码可以提取具有单行内容的电子邮件字段。但它不提取,如果该字段有多个行(这是必需的,因为诸如Subject,To等的字段可能有多行)。plz help me ... 我的代码如下:电子邮件内容分类

程序名称:f2all.c

# include <stdio.h> 
# include <string.h> 

int main (int argc, char **argv) { 

    if (argc < 5) { 
     fprintf (stderr, "Error: insufficient input. Usage: %s input_file output_file\n", 
      argv[0]); 
     return 1; 
    } 

    FILE *ifp = fopen(argv[1],"r"); 
    FILE *ofp1 = fopen(argv[2],"w");/*this points to a file(eg:f.txt),which should contain`contents of subject field only*/ 
    FILE *ofp2= fopen(argv[3],"w");/*this points to a file(eg:g.txt),which should contain contents of all other other header field only*/ 
    FILE *ofp3= fopen(argv[4],"w");/*this points to a file(eg:h.txt),which should contain contents of message body only*/ 

    char *buf = NULL; 
    char *buf1 = NULL; /* forces getline to allocate space for buf */ 
    ssize_t read = 0; 
    size_t n = 0; 
    char *ptr = NULL; 

    if (ifp==NULL)  
    {  
     printf("\nFile cannot be opened\n"); 
     return 1; 
    } 
    else 
    { 
     while ((read = getline (&buf, &n, ifp)) != -1) 
     { 
      if (((ptr=strstr(buf,"Subject:")) != 0)) 
      { 
       fprintf(ofp1,"%s",(ptr+8));  /* use (ptr + 8) to trim 'Subject:` away */ 
      } 
      if ((ptr=strstr(buf,"subject :")) != 0) 
      { 
       fprintf(ofp1,"%s",(ptr+9));   
      } 

      if (((ptr=strstr(buf,"Date:")) != 0)||((ptr=strstr(buf,"From:")) != 0)||((ptr=strstr(buf,"X-cc:")) != 0)) 
      { 
       fprintf(ofp2,"%s",(ptr+5));   
      } 
      if ((ptr=strstr(buf,"X-To:")) != 0) 
      { 
       fprintf(ofp2,"%s",(ptr+5));     
      } 
      else 
      { 
       strcpy(buf1,buf); 
       fprintf(ofp1,"%s",buf1); 

      } 
     } 
    } 
    if (buf)  /* free memory allocated by getline for buf */ 
     free (buf); 
    fclose(ofp1); 
    fclose(ofp2); 
    fclose(ofp3); 
    fclose(ifp); 

    return 0; 
} 

我做编辑,然后运行该程序如下:

princy @ PRINCY:〜/ minipjt/SUBJECT $ cc f2all.c f2all.c:函数'main': f2all.c:85:9:warning:不兼容隐式声明内置函数'free' [默认启用] princy @ PRINCY:〜/ minipjt/SUBJECT $ ./a.out 8.txt f.txt g.txt h.txt 分割故障(核心转储)

+1

'#包括'以使用'免费()'' – 2014-09-30 09:42:11

+1

是buf1'从未被分配过,但你打电话'的strcpy(BUF1,BUF);' – 2014-09-30 09:57:12

回答

0

可以建立在上下文通过文件扫描,然后根据该上下文打印到输出文件。否则,您的条件将仅适用于每个标题条目的第一行。

在这种情况下,保留一组输出文件指针是有意义的。

您的代码从标题条目中剥去关键字。这意味着在阅读输出文件时上下文将会丢失:[email protected]是发件人,收件人还是回复的首选地址?

基于下面的示例实现。

#define _GNU_SOURCE 1 

#include <stdlib.h> 
#include <stdio.h> 
#include <string.h> 

enum { 
    SUBJECT, 
    HEADER, 
    BODY, 
    NONE = -1 
}; 

/* 
*  Check whether the line starts with any of the given keywords in 
*  kw. If so, return a pointer to the char after the colon. If not, 
*  return NULL. The array kw must be terminated with NULL. 
*/ 
const char *is_header(const char *line, const char **kw) 
{ 
    while (*kw) { 
     int l = strlen(*kw); 

     if (strncmp(line, *kw, l) == 0 && line[l] == ':') { 
      /* Note: Could use strncasecmp here for case-insenitive matching */ 
      return line + l + 1; 
     } 
     kw++; 
    } 

    return NULL; 
} 

const char *header_subject[] = { 
    "Subject", NULL 
}; 

const char *header_other[] = { 
    "From", "To", "Date", /* ... */ NULL 
}; 

int main(int argc, char **argv) 
{ 
    if (argc < 5) { 
     fprintf(stderr, 
      "Error: insufficient input. Usage: %s input_file output_file\n", 
      argv[0]); 
     return 1; 
    } 

    FILE *ifp; 
    FILE *ofp[3]; 

    ifp = fopen(argv[1], "r"); 
    ofp[0] = fopen(argv[2], "w"); 
    ofp[1] = fopen(argv[3], "w"); 
    ofp[2] = fopen(argv[4], "w"); 

    /* Omitted: Error checking for file opening/creatinon */ 

    char *buf = NULL; 
    size_t n; 
    int context = NONE; 

    while (getline(&buf, &n, ifp) != -1) { 
     const char *line = buf; 

     if (context != BODY) { 
      /* Check for context if we are not already in the body */ 
      const char *p; 

      /* Strip white space from string */ 
      while (*line == ' ' || *line == '\t') line++; 

      if (*line == '\n' || *line == '\r') {     
       context = BODY;  /* An empty line starts the body ... */ 
       continue;   /* ... but we don't print it. */ 
      } 

      p = is_header(buf, header_subject); 
      if (p) { 
       line = p; 
       while (*line == ' ' || *line == '\t') line++; 
       context = SUBJECT; 
      } 

      p = is_header(buf, header_other); 
      if (p) { 
       line = p; 
       while (*line == ' ' || *line == '\t') line++; 
       context = HEADER; 
      } 
     } 

     if (context != NONE) fprintf(ofp[context], "%s", line); 
    } 

    if (buf) free(buf); 
    fclose(ofp[0]); 
    fclose(ofp[1]); 
    fclose(ofp[2]); 
    fclose(ifp); 

    return 0; 
}