2011-05-30 72 views
7

我想用boost :: spirit库编写一个python解析器。下面是代码:用boost :: spirit解析python语法 - 问题

template <typename Iterator> 
class Parser : public qi::grammar<Iterator, space_type> 
{ 
public: 
    Parser() : Parser::base_type(small_stmt) 
    { 
     NEWLINE = lit("<NEWLINE>"); 
     INDENT = lit("<INDENT>"); 
     DEDENT = lit("<DEDENT>"); 
     ENDMARKER = lit("<EOT>"); 
     NAME = identifier.alias(); 
     NUMBER = integer|longinteger|floatnumber|imagnumber; 
     STRING = stringliteral.alias(); 

     identifier = (alpha | '_') >> *(alpha | digit | '_'); 

     stringliteral = -stringprefix >> (shortstring | longstring); 
     stringprefix = lit("r") | lit("u") | lit("ur") | lit("R") | lit("U") | lit("UR") | lit("Ur") | lit("uR") | lit("b") | lit("B") | lit("br") | lit("Br") | lit("bR") | lit("BR"); 
     shortstring = "'" >> *(shortstringitem - "'") >> "'" | "\"" >> *(shortstringitem - "\"") >> "\""; 
     longstring = "'''" >> *longstringitem >> "'''" | "\"\"\"" >> *longstringitem >> "\"\"\""; 
     shortstringitem = shortstringchar | escapeseq; 
     longstringitem = longstringchar | escapeseq; 
     shortstringchar = char_ - "\\" - "\n"; 
     longstringchar = char_ - "\\"; 
     escapeseq = '\\' >> char_; 

     longinteger = integer >> (lit("l") | lit("L")); 
     integer = decimalinteger | octinteger | hexinteger | bininteger; 
     decimalinteger = nonzerodigit >> *digit | lit("0"); 
     octinteger = lit("0") >> (lit("o") | lit("O")) >> +octdigit | lit("0") >> +octdigit; 
     hexinteger = lit("0") >> (lit("x") | lit("X")) >> +hexdigit; 
     bininteger = lit("0") >> (lit("b") | lit("B")) >> +bindigit; 
     nonzerodigit = char_('1', '9'); 
     octdigit = char_('0', '7'); 
     bindigit = lit("0") | lit("1"); 
     hexdigit = digit | char_('a', 'f') | char_('A', 'F'); 

     floatnumber = pointfloat | exponentfloat; 
     pointfloat = -intpart >> fraction | intpart >> "."; 
     exponentfloat = (intpart | pointfloat) >> exponent; 
     intpart = +digit; 
     fraction = "." >> +digit; 
     exponent = (lit("e") | lit("E")) >> -(lit("+") | lit("-")) >> +digit; 

     imagnumber = (floatnumber | intpart) >> (lit("j") | lit("J")); 

     single_input = NEWLINE|simple_stmt|compound_stmt >> NEWLINE; 
     file_input = *(NEWLINE|stmt) >> ENDMARKER; 
     eval_input = testlist >> *NEWLINE >> ENDMARKER; 
     decorator = lit("@") >> dotted_name >> -(lit("(") >> -(arglist) >> lit(")")) >> NEWLINE; 
     decorators = +decorator; 
     decorated = decorators >> (classdef|funcdef); 
     funcdef = lit("def") >> NAME >> parameters >> lit(":") >> suite; 
     parameters = lit("(") >> -(varargslist) >> lit(")"); 
     varargslist = (*(fpdef >> -(lit("=") >> test) >> lit(",")) >> (lit("*") >> NAME >> -(lit(",") >> lit("**") >> NAME)|lit("**") >> NAME)|fpdef >> -(lit("=") >> test) >> *(lit(",") >> fpdef >> -(lit("=") >> test)) >> -(lit(","))); 
     fpdef = NAME|lit("(") >> fplist >> lit(")"); 
     fplist = fpdef >> *(lit(",") >> fpdef) >> -(lit(",")); 
     stmt = simple_stmt|compound_stmt; 
     simple_stmt = small_stmt >> *(lit(";") >> small_stmt) >> -(lit(";")) >> NEWLINE; 
     small_stmt = (expr_stmt|print_stmt|del_stmt|pass_stmt|flow_stmt|import_stmt|global_stmt|exec_stmt|assert_stmt); 
     expr_stmt = testlist >> (augassign >> (yield_expr|testlist)|*(lit("=") >> (yield_expr|testlist))); 
     augassign = (lit("+=")|lit("-=")|lit("*=")|lit("/=")|lit("%=")|lit("&=")|lit("|=")|lit("^=")|lit("<<=")|lit(">>=")|lit("**=")|lit("//=")); 
     print_stmt = lit("print") >> (-(test >> *(lit(",") >> test) >> -(lit(",")))|lit(">>") >> test >> -(+(lit(",") >> test) >> -(lit(",")))); 
     del_stmt = lit("del") >> exprlist; 
     pass_stmt = lit("pass"); 
     flow_stmt = break_stmt|continue_stmt|return_stmt|raise_stmt|yield_stmt; 
     break_stmt = lit("break"); 
     continue_stmt = lit("continue"); 
     return_stmt = lit("return") >> -(testlist); 
     yield_stmt = yield_expr.alias(); 
     raise_stmt = lit("raise") >> -(test >> -(lit(",") >> test >> -(lit(",") >> test))); 
     import_stmt = import_name|import_from; 
     import_name = lit("import") >> dotted_as_names; 
     import_from = (lit("from") >> (*lit(".") >> dotted_name|+lit(".")) >> lit("import") >> (lit("*")|lit("(") >> import_as_names >> lit(")")|import_as_names)); 
     import_as_name = NAME >> -(lit("as") >> NAME); 
     dotted_as_name = dotted_name >> -(lit("as") >> NAME); 
     import_as_names = import_as_name >> *(lit(",") >> import_as_name) >> -(lit(",")); 
     dotted_as_names = dotted_as_name >> *(lit(",") >> dotted_as_name); 
     dotted_name = NAME >> *(lit(".") >> NAME); 
     global_stmt = lit("global") >> NAME >> *(lit(",") >> NAME); 
     exec_stmt = lit("exec") >> expr >> -(lit("in") >> test >> -(lit(",") >> test)); 
     assert_stmt = lit("assert") >> test >> -(lit(",") >> test); 
     compound_stmt = if_stmt|while_stmt|for_stmt|try_stmt|with_stmt|funcdef|classdef|decorated; 
     if_stmt = lit("if") >> test >> lit(":") >> suite >> *(lit("elif") >> test >> lit(":") >> suite) >> -(lit("else") >> lit(":") >> suite); 
     while_stmt = lit("while") >> test >> lit(":") >> suite >> -(lit("else") >> lit(":") >> suite); 
     for_stmt = lit("for") >> exprlist >> lit("in") >> testlist >> lit(":") >> suite >> -(lit("else") >> lit(":") >> suite); 
     try_stmt = (lit("try") >> lit(":") >> suite >> (+(except_clause >> lit(":") >> suite) >> -(lit("else") >> lit(":") >> suite) >> -(lit("finally") >> lit(":") >> suite)|lit("finally") >> lit(":") >> suite)); 
     with_stmt = lit("with") >> with_item >> *(lit(",") >> with_item) >> lit(":") >> suite; 
     with_item = test >> -(lit("as") >> expr); 
     except_clause = lit("except") >> -(test >> -((lit("as")|lit(",")) >> test)); 
     suite = simple_stmt|NEWLINE >> INDENT >> +stmt >> DEDENT; 
     testlist_safe = old_test >> -(+(lit(",") >> old_test) >> -(lit(","))); 
     old_test = or_test|old_lambdef; 
     old_lambdef = lit("lambda") >> -(varargslist) >> lit(":") >> old_test; 
     test = or_test >> -(lit("if") >> or_test >> lit("else") >> test)|lambdef; 
     or_test = and_test >> *(lit("or") >> and_test); 
     and_test = not_test >> *(lit("and") >> not_test); 
     not_test = lit("not") >> not_test|comparison; 
     comparison = expr >> *(comp_op >> expr); 
     comp_op = lit("<")|lit(">")|lit("==")|lit(">=")|lit("<=")|lit("<>")|lit("!=")|lit("in")|lit("not in")|lit("is")|lit("is not"); 
     expr = xor_expr >> *(lit("|") >> xor_expr); 
     xor_expr = and_expr >> *(lit("^") >> and_expr); 
     and_expr = shift_expr >> *(lit("&") >> shift_expr); 
     shift_expr = arith_expr >> *((lit("<<")|lit(">>")) >> arith_expr); 
     arith_expr = term >> *((lit("+")|lit("-")) >> term); 
     term = factor >> *((lit("*")|lit("/")|lit("%")|lit("//")) >> factor); 
     factor = (lit("+")|lit("-")|lit("~")) >> factor|power; 
     power = atom >> *trailer >> -(lit("**") >> factor); 
     atom = (lit("(") >> -(yield_expr|testlist_comp) >> lit(")")|lit("-(") >> -(listmaker) >> lit(")")|lit("{") >> -(dictorsetmaker) >> lit("}")|lit("`") >> testlist1 >> lit("`")|NAME|NUMBER|+STRING); 
     listmaker = test >> (list_for|*(lit(",") >> test) >> -(lit(","))); 
     testlist_comp = test >> (comp_for|*(lit(",") >> test) >> -(lit(","))); 
     lambdef = lit("lambda") >> -(varargslist) >> lit(":") >> test; 
     trailer = lit("(") >> -(arglist) >> lit(")")|lit("[") >> subscriptlist >> lit("]")|lit(".") >> NAME; 
     subscriptlist = subscript >> *(lit(",") >> subscript) >> -(lit(",")); 
     subscript = lit(".") >> lit(".") >> lit(".")|test|-(test) >> lit(":") >> -(test) >> -(sliceop); 
     sliceop = lit(":") >> -(test); 
     exprlist = expr >> *(lit(",") >> expr) >> -(lit(",")); 
     testlist = test >> *(lit(",") >> test) >> -(lit(",")); 
     dictorsetmaker = ((test >> lit(":") >> test >> (comp_for|*(lit(",") >> test >> lit(":") >> test) >> -(lit(","))))|(test >> (comp_for|*(lit(",") >> test) >> -(lit(","))))); 
     classdef = lit("class") >> NAME >> -(lit("(") >> -(testlist) >> lit(")")) >> lit(":") >> suite; 
     arglist = *(argument >> lit(",")) >> (argument >> -(lit(","))|lit("*") >> test >> *(lit(",") >> argument) >> -(lit(",") >> lit("**") >> test)|lit("**") >> test); 
     argument = test >> -(comp_for)|test >> lit("=") >> test; 
     list_iter = list_for|list_if; 
     list_for = lit("for") >> exprlist >> lit("in") >> testlist_safe >> -(list_iter); 
     list_if = lit("if") >> old_test >> -(list_iter); 
     comp_iter = comp_for|comp_if; 
     comp_for = lit("for") >> exprlist >> lit("in") >> or_test >> -(comp_iter); 
     comp_if = lit("if") >> old_test >> -(comp_iter); 
     testlist1 = test >> *(lit(",") >> test); 
     encoding_decl = NAME.alias(); 
     yield_expr = lit("yield") >> -(testlist); 


    } 

    // LEXEMS 
    qi::rule<Iterator, space_type> NEWLINE; 
    qi::rule<Iterator, space_type> INDENT; 
    qi::rule<Iterator, space_type> DEDENT; 
    qi::rule<Iterator, space_type> ENDMARKER; 
    qi::rule<Iterator, space_type> NAME; 
    qi::rule<Iterator, space_type> NUMBER; 
    qi::rule<Iterator, space_type> STRING; 

    // IDENTIFIER 
    qi::rule<Iterator, space_type> identifier; 

    // STRING LITERAL 
    qi::rule<Iterator, space_type> stringliteral; 
    qi::rule<Iterator, space_type> stringprefix; 
    qi::rule<Iterator, space_type> shortstring; 
    qi::rule<Iterator, space_type> longstring; 
    qi::rule<Iterator, space_type> shortstringitem; 
    qi::rule<Iterator, space_type> longstringitem; 
    qi::rule<Iterator, space_type> shortstringchar; 
    qi::rule<Iterator, space_type> longstringchar; 
    qi::rule<Iterator, space_type> escapeseq; 

    // INTEGER LITERAL 
    qi::rule<Iterator, space_type> longinteger; 
    qi::rule<Iterator, space_type> integer; 
    qi::rule<Iterator, space_type> decimalinteger; 
    qi::rule<Iterator, space_type> octinteger; 
    qi::rule<Iterator, space_type> hexinteger; 
    qi::rule<Iterator, space_type> bininteger; 
    qi::rule<Iterator, space_type> nonzerodigit; 
    qi::rule<Iterator, space_type> octdigit; 
    qi::rule<Iterator, space_type> bindigit; 
    qi::rule<Iterator, space_type> hexdigit; 

    // FLOAT LITERAL 
    qi::rule<Iterator, space_type> floatnumber; 
    qi::rule<Iterator, space_type> pointfloat; 
    qi::rule<Iterator, space_type> exponentfloat; 
    qi::rule<Iterator, space_type> intpart; 
    qi::rule<Iterator, space_type> fraction; 
    qi::rule<Iterator, space_type> exponent; 

    //IMAGINARY LITERAL 
    qi::rule<Iterator, space_type> imagnumber; 

    // PYTHON GRAMMAR 
    qi::rule<Iterator, space_type> single_input; 
    qi::rule<Iterator, space_type> file_input; 
    qi::rule<Iterator, space_type> eval_input; 
    qi::rule<Iterator, space_type> decorator; 
    qi::rule<Iterator, space_type> decorators; 
    qi::rule<Iterator, space_type> decorated; 
    qi::rule<Iterator, space_type> funcdef; 
    qi::rule<Iterator, space_type> parameters; 
    qi::rule<Iterator, space_type> varargslist; 
    qi::rule<Iterator, space_type> fpdef; 
    qi::rule<Iterator, space_type> fplist; 
    qi::rule<Iterator, space_type> stmt; 
    qi::rule<Iterator, space_type> simple_stmt; 
    qi::rule<Iterator, space_type> small_stmt; 
    qi::rule<Iterator, space_type> expr_stmt; 
    qi::rule<Iterator, space_type> augassign; 
    qi::rule<Iterator, space_type> print_stmt; 
    qi::rule<Iterator, space_type> del_stmt; 
    qi::rule<Iterator, space_type> pass_stmt; 
    qi::rule<Iterator, space_type> flow_stmt; 
    qi::rule<Iterator, space_type> break_stmt; 
    qi::rule<Iterator, space_type> continue_stmt; 
    qi::rule<Iterator, space_type> return_stmt; 
    qi::rule<Iterator, space_type> yield_stmt; 
    qi::rule<Iterator, space_type> raise_stmt; 
    qi::rule<Iterator, space_type> import_stmt; 
    qi::rule<Iterator, space_type> import_name; 
    qi::rule<Iterator, space_type> import_from; 
    qi::rule<Iterator, space_type> import_as_name; 
    qi::rule<Iterator, space_type> dotted_as_name; 
    qi::rule<Iterator, space_type> import_as_names; 
    qi::rule<Iterator, space_type> dotted_as_names; 
    qi::rule<Iterator, space_type> dotted_name; 
    qi::rule<Iterator, space_type> global_stmt; 
    qi::rule<Iterator, space_type> exec_stmt; 
    qi::rule<Iterator, space_type> assert_stmt; 
    qi::rule<Iterator, space_type> compound_stmt; 
    qi::rule<Iterator, space_type> if_stmt; 
    qi::rule<Iterator, space_type> while_stmt; 
    qi::rule<Iterator, space_type> for_stmt; 
    qi::rule<Iterator, space_type> try_stmt; 
    qi::rule<Iterator, space_type> with_stmt; 
    qi::rule<Iterator, space_type> with_item; 
    qi::rule<Iterator, space_type> except_clause; 
    qi::rule<Iterator, space_type> suite; 
    qi::rule<Iterator, space_type> testlist_safe; 
    qi::rule<Iterator, space_type> old_test; 
    qi::rule<Iterator, space_type> old_lambdef; 
    qi::rule<Iterator, space_type> test; 
    qi::rule<Iterator, space_type> or_test; 
    qi::rule<Iterator, space_type> and_test; 
    qi::rule<Iterator, space_type> not_test; 
    qi::rule<Iterator, space_type> comparison; 
    qi::rule<Iterator, space_type> comp_op; 
    qi::rule<Iterator, space_type> expr; 
    qi::rule<Iterator, space_type> xor_expr; 
    qi::rule<Iterator, space_type> and_expr; 
    qi::rule<Iterator, space_type> shift_expr; 
    qi::rule<Iterator, space_type> arith_expr; 
    qi::rule<Iterator, space_type> term; 
    qi::rule<Iterator, space_type> factor; 
    qi::rule<Iterator, space_type> power; 
    qi::rule<Iterator, space_type> atom; 
    qi::rule<Iterator, space_type> listmaker; 
    qi::rule<Iterator, space_type> testlist_comp; 
    qi::rule<Iterator, space_type> lambdef; 
    qi::rule<Iterator, space_type> trailer; 
    qi::rule<Iterator, space_type> subscriptlist; 
    qi::rule<Iterator, space_type> subscript; 
    qi::rule<Iterator, space_type> sliceop; 
    qi::rule<Iterator, space_type> exprlist; 
    qi::rule<Iterator, space_type> testlist; 
    qi::rule<Iterator, space_type> dictorsetmaker; 
    qi::rule<Iterator, space_type> classdef; 
    qi::rule<Iterator, space_type> arglist; 
    qi::rule<Iterator, space_type> argument; 
    qi::rule<Iterator, space_type> list_iter; 
    qi::rule<Iterator, space_type> list_for; 
    qi::rule<Iterator, space_type> list_if; 
    qi::rule<Iterator, space_type> comp_iter; 
    qi::rule<Iterator, space_type> comp_for; 
    qi::rule<Iterator, space_type> comp_if; 
    qi::rule<Iterator, space_type> testlist1; 
    qi::rule<Iterator, space_type> encoding_decl; 
    qi::rule<Iterator, space_type> yield_expr; 
}; 

的问题是,当我尝试解析简单的文件:

pass 

魔女通过一些词法分析模块会后:

pass <NEWLINE> <EOT> 

解析失败并在第一个字符停止。 当我试图解析这个文件与pass_stmt规则一切都没问题(除了我们仍然有剩余,但通行字是“消耗”)。 当我试图用规则一个级别解析它 - small_stmt - 解析器停在

> <EOT> 

消费

pass <NEWLINE 

的上一层 - simple_stmt给出相同的结果file_input - 解析器停在第一个字符处。

在添加PYTHON GRAMMAR部分(从http://docs.python.org/reference/grammar.html获取)中定义的语法之前,一切正常。解析器识别标识符,文字,数字等。

有没有人有一个想法在这里可能会出错?

+0

这是一个猜测 - 它看起来像small_stmt不应该消耗换行符,因为simple_stmt预计换行符应该仍然存在。 – Owen 2011-05-30 22:32:35

+0

另一个可能的问题:small_stmt首先尝试匹配expr_stmt - 但如果它作为标识符进行解析,“pass”可能被认为是一个有效的表达式(我不知道该怎么看)。 – Owen 2011-05-30 22:36:13

+0

如果你的编译器支持C++ 0x,你可以试试AX解析器生成器。我从来没有遇到过使用它调试解析器的困难。使用lambdas将调试语义操作添加到fly的规则使得调试非常轻松。 – 2011-05-31 06:49:06

回答

5

我建议你启用调试,如解释here。这会让你了解实际发生的事情。一般来说,我建议一步一步地构建语法,而不是试图通过一次大的飞跃实现所有的事情。

上面提供的代码非常难以理解,因为它非常大,没有评论。编写语法非常像编写“普通”代码。封装是成功的关键。尝试构建覆盖自包含文件的较小文法,并根据需要合并这些子文法。有关最佳做法,请参阅here

+0

你提到的调试方法非常有用。它告诉我问题在哪里。看起来,解析器将pass字符识别为一个标识符而不是pass_stmt。它进入与标识符连接的路径,不会尝试任何其他匹配。有什么方法可以告诉解析器哪个规则更重要(应该先尝试)? – John 2011-05-31 21:48:22

+0

通常,替代品会按照它们定义的顺序进行尝试。因此,如果先移动关键字规则,则应在标识符规则之前尝试。 – hkaiser 2011-06-01 00:06:08

+1

感谢您的帮助 - 调试工具非常棒。事实证明,正式的Python语法是以不利的顺序给出的,导致解析器经常四处游荡。 – John 2011-06-06 08:00:14

相关问题