2016-11-18 83 views
-1

该项目计算书中字母的平均字母频率 - 出于某种原因,A,B和Z的数字大幅增加 - 数十亿。我在这里错过了什么吗?我觉得没有错误,但由此产生的代码是错误的。 argv[1]是书文件,argv[2]是新文件。字母频率 - 不成比例的高比率

#include <stdlib.h> 
#include <string.h> 
#include <ctype.h> 
#define NUM_LETTERS 26 

int main(int argc, char *argv[]) 
{ 
    FILE *fp,*fp2; 
    int ch, totalcounter = 0, i; 
    int letters[25]; 
    double letterfrequency[25]; 
    if(argc < 2) 
    { 
     fprintf(stderr, "Usage: %s <filename>\n", argv[0]); 
     exit(1); 
    } 
    for(i=0; i<NUM_LETTERS; i++) 
    { 
     letters[i] = 0; 
     letterfrequency[i] = 0.0; 
    } 
    printf("Opening: %s", argv[1]); 
    fp = fopen(argv[1], "r"); 
    if(!fp) 
    { 
     perror("fopen"); 
     exit(1); 
    } 
    while((ch=fgetc(fp)) != EOF) 
    { 
     ch = toupper(ch); 
     if('A' <= ch && ch <= 'Z') 
     { 
      ch -= 65; 
      letters[ch]++; 
      totalcounter++; 
     } 
    } 

    fp2 = fopen(argv[2], "w"); 
    for(i=0; i<NUM_LETTERS; i++) 
    { 
     letterfrequency[i] = (double)letters[i]/totalcounter; 
    } 
    for(i=0; i<NUM_LETTERS; i++) 
    { 
     fprintf(fp2, "\n%c: Times used: %10d\tFrequency Used: %20.20lf", i+65, letters[i], letterfrequency[i]); 
    } 
    fclose(fp); 
    fclose(fp2); 
    return 0; 
} 
+8

哪个字母仅具有25个信吗?你正在写出你的数组的界限,所以任何事情都会发生。 –

+0

一个快速而脏的测试文件,只包含字母a到z的段落错误。你的代码需要一些工作。 – David

+0

您使用字符常量进行范围检查,但为什么这样:'ch - = 65;'?不要使用神奇数字!为什么不简单地使用'isalpha'作为支票? – Olaf

回答

0

最根本的问题是,你要挤计数为26个字母到空间25.本保留不导致幸福。

这是您的代码的小清理。除了将阵列大小更改为NUM_LETTERS之外,它还会更好地报告使用情况,检查是否打开输出文件,使用isalpha()检查字母,并通过减去'A'而不是65从大写转换为偏移。

#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 
#include <ctype.h> 

#define NUM_LETTERS 26 

int main(int argc, char *argv[]) 
{ 
    FILE *fp, *fp2; 
    int ch, totalcounter = 0, i; 
    int letters[NUM_LETTERS]; 
    double letterfrequency[NUM_LETTERS]; 

    if (argc != 3) 
    { 
     fprintf(stderr, "Usage: %s infile outfile\n", argv[0]); 
     exit(1); 
    } 

    for (i = 0; i < NUM_LETTERS; i++) 
    { 
     letters[i] = 0; 
     letterfrequency[i] = 0.0; 
    } 

    printf("Opening: %s\n", argv[1]); 
    fp = fopen(argv[1], "r"); 
    if (!fp) 
    { 
     perror("fopen"); 
     exit(1); 
    } 

    while ((ch = fgetc(fp)) != EOF) 
    { 
     if (isalpha(ch)) 
     { 
      ch = toupper(ch) - 'A'; 
      letters[ch]++; 
      totalcounter++; 
     } 
    } 
    fclose(fp); 

    for (i = 0; i < NUM_LETTERS; i++) 
    { 
     letterfrequency[i] = (double)letters[i]/totalcounter; 
    } 

    printf("Opening: %s\n", argv[2]); 
    fp2 = fopen(argv[2], "w"); 
    if (!fp) 
    { 
     perror("fopen"); 
     exit(1); 
    } 

    for (i = 0; i < NUM_LETTERS; i++) 
    { 
     fprintf(fp2, "%c: Times used: %10d\tFrequency Used: %20.20lf\n", i + 65, letters[i], letterfrequency[i]); 
    } 
    fclose(fp2); 
    return 0; 
} 

实施例输出(上面的源代码 - fq97.c):

Opening: fq97.c 
Opening: /dev/stdout 
A: Times used:   17 Frequency Used: 0.02956521739130434784 
B: Times used:   3 Frequency Used: 0.00521739130434782650 
C: Times used:   26 Frequency Used: 0.04521739130434782733 
D: Times used:   13 Frequency Used: 0.02260869565217391366 
E: Times used:   78 Frequency Used: 0.13565217391304348893 
F: Times used:   41 Frequency Used: 0.07130434782608695288 
G: Times used:   13 Frequency Used: 0.02260869565217391366 
H: Times used:   13 Frequency Used: 0.02260869565217391366 
I: Times used:   50 Frequency Used: 0.08695652173913043237 
J: Times used:   0 Frequency Used: 0.00000000000000000000 
K: Times used:   0 Frequency Used: 0.00000000000000000000 
L: Times used:   33 Frequency Used: 0.05739130434782608453 
M: Times used:   8 Frequency Used: 0.01391304347826087008 
N: Times used:   43 Frequency Used: 0.07478260869565217517 
O: Times used:   25 Frequency Used: 0.04347826086956521618 
P: Times used:   26 Frequency Used: 0.04521739130434782733 
Q: Times used:   5 Frequency Used: 0.00869565217391304358 
R: Times used:   52 Frequency Used: 0.09043478260869565466 
S: Times used:   25 Frequency Used: 0.04347826086956521618 
T: Times used:   61 Frequency Used: 0.10608695652173913415 
U: Times used:   26 Frequency Used: 0.04521739130434782733 
V: Times used:   6 Frequency Used: 0.01043478260869565299 
W: Times used:   2 Frequency Used: 0.00347826086956521752 
X: Times used:   3 Frequency Used: 0.00521739130434782650 
Y: Times used:   6 Frequency Used: 0.01043478260869565299 
Z: Times used:   0 Frequency Used: 0.00000000000000000000 
+0

它的工作原理!谢谢 – Submersed24