2016-10-05 63 views
1

我有一个小的Brainfuck解释器,我写了一段时间;在编译它时,我注意到gcc的优化开关的输出大小并不是我所期望的。以下是我编的程序:意外的文件大小,同时变化gcc优化开关

struct node { 
    struct node *prev; 
    int val; 
    struct node *jump; 
    struct node *next; 
}; 
typedef struct node node; 
node *newnode(); 
node *append(node *n); 
node *prepend(node *n); 
void erase(node *n); 
int pop(node *n); 
void doop(node *n); 
node *link(node *n); 

#include <stdlib.h> 

// allocates a new node and sets all the things to zero 
node *newnode() { 
    node *n = malloc(sizeof(node)); 
    n->prev = n->next = n->jump = NULL; 
    n->val = 0; 
    return n; 
} 

// appends a node to a given node. assumes it is an end node 
node *append(node *n) { 
    n->next = newnode(); 
    n->next->prev = n; 
    return n->next; 
} 

// prepend node to list. assumes it is the first node 
node *prepend(node *n) { 
    n->prev = newnode(); 
    n->prev->next = n; 
    return n->prev; 
} 

// navigates to first node, then frees all the nodes, iterating to the end 
void erase(node *n) { 
    node *m; 
    while (n->prev) 
     n = n->prev; 
    while (n) { 
     m = n->next; 
     free(n); 
     n = m; 
    } 
} 

// pops any node and links any connected nodes to each other 
// returns value of erased node 
int pop(node *n) { 
    int ret; 
    if (n->prev) 
     n->prev->next = n->next; 
    if (n->next) 
     n->next->prev = n->prev; 
    ret = n->val; 
    free(n); 
    return ret; 
} 

#include <stdio.h> 

// bf tokens. all other are ignored 
#define LSEEK  '<' 
#define RSEEK  '>' 
#define INCREMENT '+' 
#define DECREMENT '-' 
#define STDOUT  '.' 
#define STDIN  ',' 
#define LBRACKET '[' 
#define RBRACKET ']' 

// memory used by bf program. is this really turing-compliant? 
char mem[30000] = { 0 }; 
// pointer used by bf program 
char *ptr = mem; 

// do operation beginning with given node 
void doop(node *n) { 
    // copy node pointer in case we need the head of the list later 
    node *m = n; 
    // loop while node pointer is a valid one; e.g. stop at EOF 
    while (m) { 
     switch (m->val) { 
      // most of these are pretty self-explanatory 
      case LSEEK: 
       ptr--; 
       break; 
      case RSEEK: 
       ptr++; 
       break; 
      case INCREMENT: 
       (*ptr)++; 
       break; 
      case DECREMENT: 
       (*ptr)--; 
       break; 
      case STDOUT: 
       printf("%c", *ptr); 
       fflush(stdout); 
       break; 
      case STDIN: 
       *ptr = getchar(); 
       break; 
      case LBRACKET: 
       // jump to closing bracket if value at pointer is false 
       if (!*ptr) 
        m = m->jump; 
       break; 
      case RBRACKET: 
       // jump back to opening bracket if value at pointer is true 
       if (*ptr) 
        m = m->jump; 
       break; 
     } 
     // proceed to next instruction 
     m = m->next; 
    } 
} 

// finds and references each bracket instruction to its corresponding bracket 
node *link(node *n) { 
    // make a copy of the list head 
    node *m = n; 
    // make a temporary list to contain bracket links 
    node *links = newnode(); 
    // while a valid node 
    while (m) { 
     // switch to bracket type 
     switch (m->val) { 
      case LBRACKET: 
       // this is an opening bracket, so we temporarily store it's 
       // location, and append the list as it grows 
       if (links->jump) 
        links = append(links); 
       links->jump = m; 
       break; 
      case RBRACKET: 
       // this is the closing bracket, so we save the temporarily 
       // stored link address to the closing bracket node, and 
       // connect the opening bracket node to the closing also; 
       // popping the list as we no longer need the data 
       m->jump = links->jump; 
       links->jump->jump = m; 
       if (links->prev) { 
        links = links->prev; 
        pop(links->next); 
       } 
       break; 
     } 
     // increment to next character 
     m = m->next; 
    } 
    // erase all the nodes in the temporary linked list 
    erase(links); 
    // return the head of the list 
    return n; 
} 

#include <signal.h> 

// press ctrl-c then enter to quit if not running from a file 
int run = 1; 
void quit(int val) { 
    run = 0; 
} 

int main(int argc, char** argv) { 
    // catch crtl-c 
    signal(SIGINT, quit); 
    int c; 
    // our text structure is a linked list 
    node *text, *text_start; 
    if (argc > 1) { 
     // print the file name 
     printf("%s\n", argv[1]); 
     // open the file and read it to the linked list 
     FILE *f = fopen(argv[1], "r"); 
     if (f == NULL) return 1; 
     text = text_start = newnode(); 
     while ((c = fgetc(f)) != EOF) { 
      if (text->val) 
       text = append(text); 
      text->val = c; 
     } 
     fclose(f); 
     // link all the loops/ gotos, then process all instructions 
     doop(link(text_start)); 
     // free all linked list nodes 
     erase(text_start); 
     // we just ran a file, so no interpreter 
     run = 0; 
    } 
    // repeatedly read and execute only one line until interrupted 
    while (run) { 
     // linked list generated for each line of input 
     text = text_start = newnode(); 
     // read stdin buffer to list 
     while ((c = getchar()) != '\n') { 
      if (text->val) 
       text = append(text); 
      text->val = c; 
     } 
     // link all the loops/ gotos, then process the 
     // instructions for the line 
     doop(link(text_start)); 
     // free all linked list nodes 
     erase(text_start); 
    } 
    return 0; 
} 

最后,以下是编译程序意外文件大小从派生:

$ gcc brainfuck.c -o brainfuck -Os && ls brainfuck -l && for i in `seq 0 3`; do gcc brainfuck.c -o brainfuck -O$i && ls brainfuck -l; done && gcc --version 
-rwxrwxr-x 1 m m 13552 Oct 4 18:31 brainfuck 
-rwxrwxr-x 1 m m 13544 Oct 4 18:31 brainfuck 
-rwxrwxr-x 1 m m 13600 Oct 4 18:31 brainfuck 
-rwxrwxr-x 1 m m 13600 Oct 4 18:31 brainfuck 
-rwxrwxr-x 1 m m 13600 Oct 4 18:31 brainfuck 
gcc (Ubuntu 5.4.0-6ubuntu1~16.04.2) 5.4.0 20160609 
+1

你以为是什么意思? –

+0

@ M.M我预计文件大小会有很大的变化。 – motoku

+2

也许在代码中没有太多的优化。您可以比较不同版本之间生成的程序集,以确切了解发生了什么变化。 –

回答

4

了大量小的二进制文件的大小将是样板启动,加上调试符号表,以及全局数据区域和其他部分的大量零填充。对null填充进行二进制检查。要得到一个有点比例更符合实际,去掉符号。

您应该只是比较TEXT部分的大小,即指令流而不是整个Unix可执行文件二进制文件的大小。

优化代码还有一个非常大小的不可预测的影响。展开循环可以延长代码以及内联,但是删除冗余内存加载/存储,常见子表达式消除,死代码消除和常量折叠可以减小大小。因此,在总结这些对立的力量时,你的视野非常不透明。如果你真的想学习一些东西,请一行一行地研究组件。见gcc -S并回报。

此外,如果您花费大部分能量将数据传输到I/O流和从I/O流传输数据,许多代码将不会很好地优化。优化适用于CPU绑定和内存绑定材质。

% gcc -OS -o bfos brainfuck.C# -OS is optimize but keep code small 
% objdump -h bfos | grep text 
12 .text   00000452 0000000000400730 0000000000400730 00000730 2**4 

% gcc -O0 -o bfo0 brainfuck.C# -O0 is default: no optimizations 
% objdump -h bfo0 | grep text 
12 .text   00000652 0000000000400730 0000000000400730 00000730 2**4 

0x452/0x652 =巨大的差异。

然而二进制尺寸大许多倍,有填充,并没有什么做的编译后的代码大小:

% ls -l bfo0 bfos 
-rwxr-xr-x 1 root root 13461 Oct 4 22:42 bfo0 
-rwxr-xr-x 1 root root 13469 Oct 4 22:41 bfos 

% gcc --version 
gcc (Ubuntu 4.8.4-2ubuntu1~14.04.3) 4.8.4 

最后,零填充的很长一段(以下简称“*”表示所有的重复,所以从0x000760到0x0006700,都是零字节)

% od -x bfo0 | grep -1 '\*' 
0000720 0000 0000 0000 0000 0000 0000 0000 0000 
* 
0000760 0000 0000 0000 0000 0010 0000 0000 0000 
-- 
0006700 0a8c 0040 0000 0000 0a8c 0040 0000 0000 
* 
0007040 09c9 0040 0000 0000 0a8c 0040 0000 0000 
-- 
0007100 0a8c 0040 0000 0000 0a8c 0040 0000 0000 
* 
0007420 0a8c 0040 0000 0000 0a51 0040 0000 0000 
-- 
0010640 0000 0000 0000 0000 0000 0000 0000 0000 
* 
0017020 07f0 0040 0000 0000 07d0 0040 0000 0000 
-- 
0017640 0000 0000 0000 0000 0000 0000 0000 0000 
* 
0020000 1e28 0060 0000 0000 0000 0000 0000 0000 
-- 
0020720 0000 0000 0000 0000 0000 0000 0000 0000 
* 
0021000 0000 0000 0000 0000 001b 0000 0001 0000 
-- 
0024500 0000 0000 0000 0000 0000 0000 0000 0000 
* 
0024540 0000 0000 0003 0001 0238 0040 0000 0000