2016-08-26 27 views
1

我知道如何添加标记定义与标识符:在boost :: spirit :: lex中,如何添加具有语义动作和令牌ID的令牌?

this->self.add(identifier, ID_IDENTIFIER); 

而且我知道如何添加标记定义与语义动作:

this->self += whitespace [ lex::_pass = lex::pass_flags::pass_ignore ]; 

不幸的是,这并不工作:

this->self.add(whitespace 
        [ lex::_pass = lex::pass_flags::pass_ignore ], 
       ID_IDENTIFIER); 

它给出令牌不能转换为字符串(!?)的错误:

error C2664:'const boost :: spirit :: lex :: detail :: lexer_def _> :: adder & boost :: spirit :: lex :: detail :: lexer_def _> :: adder :: operator() (wchar_t的,无符号整型)常量:const的升压::原”不能从转换参数1 ':: exprns _ :: EXPR' 到 '常量性病:: basic_string的,标准::分配器> &'

有趣,lexer.hpp中的adder有一个operator(),它将一个动作作为第三个参数 - 但它在我的boost版本(1.55.0)中被注释掉了。这是否适用于新版本?

在没有这个的情况下,我将如何添加具有语义操作的标记定义和ID到词法分析器?

回答

2

看着似乎至少有两种可能的方法的头文件:您已经定义了令牌之后

  • 您可以依次设置id使用token_defid成员函数:

    ellipses = "\\.\\.\\."; 
    ... 
    ellipses.id(ID_ELLIPSES); 
    
  • 您可以使用token_def的两个参数的构造函数,当你定义您的令牌:

    number = lex::token_def<>("[0-9]+", ID_NUMBER); 
    

然后像之前,你可以简单地添加你的语义动作:

this->self = ellipses[phx::ref(std::cout) << "Found ellipses.\n"] | '(' | ')' | number[phx::ref(std::cout) << "Found: " << phx::construct<std::string>(lex::_start, lex::_end) << '\n']; 

下面的代码是based on Boost.Spirit.Lex example3.cpp有细微的变化(标有//CHANGED)来实现你想要什么。

全样本(Running on rextester)

#include <iostream> 
#include <string> 

#include <boost/config/warning_disable.hpp> 
#include <boost/spirit/include/qi.hpp> 
#include <boost/spirit/include/lex_lexertl.hpp> 
#include <boost/spirit/include/phoenix.hpp> 




using namespace boost::spirit; 
namespace phx = boost::phoenix; 

enum token_id //ADDED 
{ 
    ID_ELLIPSES = lex::min_token_id + 1, 
    ID_NUMBER 
}; 

/////////////////////////////////////////////////////////////////////////////// 
// Token definition 
/////////////////////////////////////////////////////////////////////////////// 
template <typename Lexer> 
struct example3_tokens : lex::lexer<Lexer> 
{ 
    example3_tokens() 
    { 
     // define the tokens to match 
     ellipses = "\\.\\.\\."; 
     number = lex::token_def<>("[0-9]+", ID_NUMBER); //CHANGED 

     ellipses.id(ID_ELLIPSES); //CHANGED 

     // associate the tokens and the token set with the lexer 
     this->self = ellipses[phx::ref(std::cout) << "Found ellipses.\n"] | '(' | ')' | number[phx::ref(std::cout) << "Found: " << phx::construct<std::string>(lex::_start, lex::_end) << '\n']; //CHANGED 

     // define the whitespace to ignore (spaces, tabs, newlines and C-style 
     // comments) 
     this->self("WS") 
      = lex::token_def<>("[ \\t\\n]+")   // whitespace 
      | "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/" // C style comments 
      ; 
    } 

    // these tokens expose the iterator_range of the matched input sequence 
    lex::token_def<> ellipses, identifier, number; 
}; 

/////////////////////////////////////////////////////////////////////////////// 
// Grammar definition 
/////////////////////////////////////////////////////////////////////////////// 
template <typename Iterator, typename Lexer> 
struct example3_grammar 
    : qi::grammar<Iterator, qi::in_state_skipper<Lexer> > 
{ 
    template <typename TokenDef> 
    example3_grammar(TokenDef const& tok) 
     : example3_grammar::base_type(start) 
    { 
     start 
      = +(couplet | qi::token(ID_ELLIPSES)) //CHANGED 
      ; 

     // A couplet matches nested left and right parenthesis. 
     // For example: 
     // (1) (1 2) (1 2 3) ... 
     // ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ... 
     // (((1))) ... 
     couplet 
      = qi::token(ID_NUMBER) //CHANGED 
      | '(' >> +couplet >> ')' 
      ; 

     BOOST_SPIRIT_DEBUG_NODE(start); 
     BOOST_SPIRIT_DEBUG_NODE(couplet); 
    } 

    qi::rule<Iterator, qi::in_state_skipper<Lexer> > start, couplet; 
}; 

/////////////////////////////////////////////////////////////////////////////// 
int main() 
{ 
    // iterator type used to expose the underlying input stream 
    typedef std::string::iterator base_iterator_type; 

    // This is the token type to return from the lexer iterator 
    typedef lex::lexertl::token<base_iterator_type> token_type; 

    // This is the lexer type to use to tokenize the input. 
    // Here we use the lexertl based lexer engine. 
    typedef lex::lexertl::actor_lexer<token_type> lexer_type; //CHANGED 

    // This is the token definition type (derived from the given lexer type). 
    typedef example3_tokens<lexer_type> example3_tokens; 

    // this is the iterator type exposed by the lexer 
    typedef example3_tokens::iterator_type iterator_type; 

    // this is the type of the grammar to parse 
    typedef example3_grammar<iterator_type, example3_tokens::lexer_def> example3_grammar; 

    // now we use the types defined above to create the lexer and grammar 
    // object instances needed to invoke the parsing process 
    example3_tokens tokens;       // Our lexer 
    example3_grammar calc(tokens);     // Our parser 

    std::string str ="(1) (1 2) (1 2 3) ... ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ... (((1))) ..."; //CHANGED 

    // At this point we generate the iterator pair used to expose the 
    // tokenized input stream. 
    std::string::iterator it = str.begin(); 
    iterator_type iter = tokens.begin(it, str.end()); 
    iterator_type end = tokens.end(); 

    // Parsing is done based on the token stream, not the character 
    // stream read from the input. 
    // Note how we use the lexer defined above as the skip parser. 
    bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[tokens.self]); 

    if (r && iter == end) 
    { 
     std::cout << "-------------------------\n"; 
     std::cout << "Parsing succeeded\n"; 
     std::cout << "-------------------------\n"; 
    } 
    else 
    { 
     std::cout << "-------------------------\n"; 
     std::cout << "Parsing failed\n"; 
     std::cout << "-------------------------\n"; 
    } 

    std::cout << "Bye... :-) \n\n"; 
    return 0; 
}