3
我想用键入的值解析CSV文件。类型每一列的在头,购自定义:用boost :: spirit :: qi解析键入的csv文件
int double double int unsigned
12 1.3 23445 1 42
45 46 47 48 49
结果数据结构可以是这样的2维矢量:
using ColumnType = boost::variant<
std::vector<int>,
std::vector<unsigned>,
std::vector<double>
>;
using ResultType = std::vector<ColumnType>;
我的工作代码:
namespace phoenix = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
using ColumnType = boost::variant<
std::vector<int>,
std::vector<unsigned>,
std::vector<double>
>;
using ResultType = std::vector<ColumnType>;
enum class CSVDataType
{
Int, UInt, Double
};
template<typename Iterator>
struct TypedCSVGrammar: qi::grammar<Iterator, ResultType(), ascii::blank_type> {
struct types_: qi::symbols<char, CSVDataType> {
types_() {
add
("int", CSVDataType::Int)
("unsigned", CSVDataType::UInt)
("double", CSVDataType::Double);
}
} types;
TypedCSVGrammar() :
TypedCSVGrammar::base_type(csv, "csv")
{
using ascii::string;
using namespace qi::labels;
header %= *(types);
cell =
(
qi::eps(phoenix::ref(column_types)[_r1] == phoenix::val(CSVDataType::Int))
>> qi::int_ [phoenix::bind(&TypedCSVGrammar::add_int, this, _r1, _1)]
) | (
qi::eps(phoenix::ref(column_types)[_r1] == phoenix::val(CSVDataType::UInt))
>> qi::uint_ [phoenix::bind(&TypedCSVGrammar::add_uint, this, _r1, _1)]
) | (
qi::eps(phoenix::ref(column_types)[_r1] == phoenix::val(CSVDataType::Double))
>> qi::double_ [phoenix::bind(&TypedCSVGrammar::add_double, this, _r1, _1)]
);
row =
qi::eps [phoenix::ref(column) = phoenix::val(0)]
>> qi::repeat(phoenix::size(phoenix::ref(column_types))) [
cell(phoenix::ref(column))
>> qi::eps [phoenix::ref(column)++]
];
csv =
header [phoenix::bind(&TypedCSVGrammar::construct_columns, this, _1)]
> qi::eol
> row % qi::eol
> *qi::eol
> qi::attr(result);
qi::on_error<qi::fail>
(
csv
, std::cout
<< phoenix::val("Error! Expecting ")
<< _4 // what failed?
<< phoenix::val(" here: \"")
<< phoenix::construct<std::string>(_3, _2) // iterators to error-pos, end
<< phoenix::val("\"")
<< std::endl
);
}
void add_int(std::size_t c, int i) {
boost::get<std::vector<int>>(result[c]).push_back(i);
}
void add_uint(std::size_t c, unsigned i) {
boost::get<std::vector<unsigned>>(result[c]).push_back(i);
}
void add_double(std::size_t c, double i) {
boost::get<std::vector<double>>(result[c]).push_back(i);
}
void construct_columns(const std::vector<CSVDataType>& columns) {
column_types = columns;
for (const auto& c : columns) {
switch (c) {
case CSVDataType::Int:
result.push_back(std::vector<int>());
break;
case CSVDataType::UInt:
result.push_back(std::vector<unsigned>());
break;
case CSVDataType::Double:
result.push_back(std::vector<double>());
break;
}
}
}
std::vector<CSVDataType> column_types;
std::size_t column;
ResultType result;
qi::rule<Iterator, ResultType(), ascii::blank_type> csv;
qi::rule<Iterator, std::vector<CSVDataType>(), ascii::blank_type> header;
qi::rule<Iterator, void(std::size_t), ascii::blank_type> cell;
qi::rule<Iterator, void(), ascii::blank_type> row;
};
有没有更好的解决方案?我想用不止三种类型(可能超过10种)。这将是很多打字。