2015-07-13 98 views
3

我想用键入的值解析CSV文件。类型每一列的在头,购自定义:用boost :: spirit :: qi解析键入的csv文件

int double double int unsigned 
12 1.3 23445 1 42 
45 46  47  48 49 

结果数据结构可以是这样的2维矢量:

using ColumnType = boost::variant< 
    std::vector<int>, 
    std::vector<unsigned>, 
    std::vector<double> 
>; 

using ResultType = std::vector<ColumnType>; 

我的工作代码:

namespace phoenix = boost::phoenix; 
namespace qi = boost::spirit::qi; 
namespace ascii = boost::spirit::ascii; 

using ColumnType = boost::variant< 
    std::vector<int>, 
    std::vector<unsigned>, 
    std::vector<double> 
>; 

using ResultType = std::vector<ColumnType>; 

enum class CSVDataType 
{ 
    Int, UInt, Double 
}; 

template<typename Iterator> 
struct TypedCSVGrammar: qi::grammar<Iterator, ResultType(), ascii::blank_type> { 
    struct types_: qi::symbols<char, CSVDataType> { 
    types_() { 
     add 
     ("int", CSVDataType::Int) 
     ("unsigned", CSVDataType::UInt) 
     ("double", CSVDataType::Double); 
    } 
    } types; 

    TypedCSVGrammar() : 
    TypedCSVGrammar::base_type(csv, "csv") 
    { 
    using ascii::string; 
    using namespace qi::labels; 

    header %= *(types); 

    cell = 
     (
     qi::eps(phoenix::ref(column_types)[_r1] == phoenix::val(CSVDataType::Int)) 
     >> qi::int_ [phoenix::bind(&TypedCSVGrammar::add_int, this, _r1, _1)] 
    ) | (
     qi::eps(phoenix::ref(column_types)[_r1] == phoenix::val(CSVDataType::UInt)) 
     >> qi::uint_ [phoenix::bind(&TypedCSVGrammar::add_uint, this, _r1, _1)] 
    ) | (
     qi::eps(phoenix::ref(column_types)[_r1] == phoenix::val(CSVDataType::Double)) 
     >> qi::double_ [phoenix::bind(&TypedCSVGrammar::add_double, this, _r1, _1)] 
    ); 

    row = 
     qi::eps [phoenix::ref(column) = phoenix::val(0)] 
     >> qi::repeat(phoenix::size(phoenix::ref(column_types))) [ 
     cell(phoenix::ref(column)) 
     >> qi::eps [phoenix::ref(column)++] 
     ]; 

    csv = 
     header [phoenix::bind(&TypedCSVGrammar::construct_columns, this, _1)] 
     > qi::eol 
     > row % qi::eol 
     > *qi::eol 
     > qi::attr(result); 

    qi::on_error<qi::fail> 
    (
     csv 
     , std::cout 
      << phoenix::val("Error! Expecting ") 
      << _4        // what failed? 
      << phoenix::val(" here: \"") 
      << phoenix::construct<std::string>(_3, _2) // iterators to error-pos, end 
      << phoenix::val("\"") 
      << std::endl 
    ); 
    } 

    void add_int(std::size_t c, int i) { 
    boost::get<std::vector<int>>(result[c]).push_back(i); 
    }  
    void add_uint(std::size_t c, unsigned i) { 
    boost::get<std::vector<unsigned>>(result[c]).push_back(i); 
    }  
    void add_double(std::size_t c, double i) { 
    boost::get<std::vector<double>>(result[c]).push_back(i); 
    } 

    void construct_columns(const std::vector<CSVDataType>& columns) { 
    column_types = columns;  
    for (const auto& c : columns) { 
     switch (c) { 
     case CSVDataType::Int: 
     result.push_back(std::vector<int>()); 
     break; 

     case CSVDataType::UInt: 
     result.push_back(std::vector<unsigned>()); 
     break; 

     case CSVDataType::Double: 
     result.push_back(std::vector<double>()); 
     break; 
     } 
    } 
    } 

    std::vector<CSVDataType> column_types; 
    std::size_t column; 
    ResultType result; 

    qi::rule<Iterator, ResultType(), ascii::blank_type> csv;  
    qi::rule<Iterator, std::vector<CSVDataType>(), ascii::blank_type> header; 
    qi::rule<Iterator, void(std::size_t), ascii::blank_type> cell; 
    qi::rule<Iterator, void(), ascii::blank_type> row; 
}; 

有没有更好的解决方案?我想用不止三种类型(可能超过10种)。这将是很多打字。

回答

4

我不明白你为什么会想出这样一个人为的目标数据结构。它似乎邀请了无与伦比的指数错误。

我可以在这里建议一个Nabialek Trick

,如果你改变周围的AST到它运作良好:

using ValueType = boost::variant<int, unsigned, double>; 
using ResultType = std::vector<std::vector<ValueType>>; 

(这似乎是反正一个更可取的办法)

总之,你翻译列类型到的语法规则的向量(std::vector<dynamic>)。

Live On Coliru

#define BOOST_SPIRIT_DEBUG 
#include <boost/spirit/include/qi.hpp> 
#include <boost/spirit/include/phoenix.hpp> 

namespace px = boost::phoenix; 
namespace qi = boost::spirit::qi; 
namespace ascii = boost::spirit::ascii; 

using ValueType = boost::variant<int, unsigned, double>; 
using ResultType = std::vector<std::vector<ValueType>>; 

enum class CSVDataType { Int, UInt, Double }; 

namespace boost { namespace spirit { namespace qi { // FOR DEBUG 
    template <typename... T> std::ostream& operator<<(std::ostream& os, rule<T...> const*)      { return os << "(lazy rule)";  } 
    template <typename... T> std::ostream& operator<<(std::ostream& os, std::vector<rule<T...> const*> const&) { return os << "(column mappings)"; } 
} } } 

template<typename Iterator, typename Skipper = ascii::blank_type> 
struct TypedCSVGrammar: qi::grammar<Iterator, ResultType(), Skipper> { 

    TypedCSVGrammar() : TypedCSVGrammar::base_type(start, "csv") 
    { 
     using namespace qi::labels; 

     header = *types; 

     csv = qi::omit[ header [ _cols = _1 ] ] > qi::eol 
       > qi::repeat(_current=0, px::size(_cols)) [ qi::lazy(*_cols[_current++]) ] % qi::eol 
       > *qi::eol 
       ; 

     start = csv; 

     BOOST_SPIRIT_DEBUG_NODES((start)(csv)(header)); 

     qi::on_error<qi::fail> (csv, px::ref(std::cout) 
        << "Error! Expecting " << _4         // what failed? 
        << " here: \""   << px::construct<std::string>(_3, _2) // iterators to error-pos, end 
        << "\"\n" 
      ); 
    } 

    private: 
    using cell_parser_t = qi::rule<Iterator, ValueType(), Skipper>; 
    using dynamic  = cell_parser_t const*; 

    struct types_: qi::symbols<char, dynamic> { 
     cell_parser_t 
      int_cell = qi::int_, 
      uint_cell = qi::uint_, 
      double_cell = qi::double_; 

     types_() { 
      this->add 
       ("int",  &int_cell) 
       ("unsigned", &uint_cell) 
       ("double", &double_cell); 
      BOOST_SPIRIT_DEBUG_NODES((int_cell)(uint_cell)(double_cell)) 
     } 
    } types; 

    using state = qi::locals<std::vector<dynamic>, size_t>; 
    qi::_a_type _cols; 
    qi::_b_type _current; 

    qi::rule<Iterator, ResultType(),    Skipper> start; 
    qi::rule<Iterator, std::vector<dynamic>(), Skipper> header; 
    qi::rule<Iterator, ResultType(),    Skipper, state> csv; 
}; 

int main() { 
    using It = boost::spirit::istream_iterator; 

    It f(std::cin >> std::noskipws), l; 
    TypedCSVGrammar<It> g; 
    ResultType data; 
    bool ok = qi::phrase_parse(f, l, g, ascii::blank, data); 
    if (ok) { 
     std::cout << "Parse success\n"; 

     for(auto& row: data) { 
      for(auto& cell: row) std::cout << cell << "\t"; 
      std::cout << "\n"; 
     } 
    } 
    else 
     std::cout << "Parse failed\n"; 

    if (f!=l) 
     std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n"; 
} 

所以对于输入显示它打印

Parse success 
12 1.3 23445 1 42 
45 46 47 48 49 

如果调试信息

<start> 
    <try>int double double in</try> 
    <csv> 
    <try>int double double in</try> 
    <header> 
     <try>int double double in</try> 
     <success>\n12 1.3 23445 1</success> 
     <attributes>[[(lazy rule), (lazy rule), (lazy rule), (lazy rule), (lazy rule)]]</attributes> 
    </header> 
    <int_cell> 
     <try>12 1.3 23445 1 </try> 
     <success> 1.3 23445 1 </success> 
     <attributes>[12]</attributes> 
    </int_cell> 
    <double_cell> 
     <try> 1.3 23445 1 </try> 
     <success> 23445 1 42\n45</success> 
     <attributes>[1.3]</attributes> 
    </double_cell> 
    <double_cell> 
     <try> 23445 1 42\n45</try> 
     <success> 1 42\n45 46  </success> 
     <attributes>[23445]</attributes> 
    </double_cell> 
    <int_cell> 
     <try> 1 42\n45 46  </try> 
     <success> 42\n45 46  47 </success> 
     <attributes>[1]</attributes> 
    </int_cell> 
    <uint_cell> 
     <try> 42\n45 46  47 </try> 
     <success>\n45 46  47  4</success> 
     <attributes>[42]</attributes> 
    </uint_cell> 
    <int_cell> 
     <try>45 46  47  48</try> 
     <success> 46  47  48 </success> 
     <attributes>[45]</attributes> 
    </int_cell> 
    <double_cell> 
     <try> 46  47  48 </try> 
     <success>  47  48 49\n</success> 
     <attributes>[46]</attributes> 
    </double_cell> 
    <double_cell> 
     <try>  47  48 49\n</try> 
     <success>  48 49\n</success> 
     <attributes>[47]</attributes> 
    </double_cell> 
    <int_cell> 
     <try>  48 49\n</try> 
     <success> 49\n</success> 
     <attributes>[48]</attributes> 
    </int_cell> 
    <uint_cell> 
     <try> 49\n</try> 
     <success>\n</success> 
     <attributes>[49]</attributes> 
    </uint_cell> 
    <int_cell> 
     <try></try> 
     <fail/> 
    </int_cell> 
    <success></success> 
    <attributes>[[[12, 1.3, 23445, 1, 42], [45, 46, 47, 48, 49], []]]</attributes><locals>((column mappings) 1)</locals> 
    </csv> 
    <success></success> 
    <attributes>[[[12, 1.3, 23445, 1, 42], [45, 46, 47, 48, 49], []]]</attributes> 
</start> 
相关问题