由于所有CSV问题似乎都在此处重定向,因此我想将答案发布在此处。这个答案没有直接解决提问者的问题。我希望能够读取已知为CSV格式的流,并且每个字段的类型也是已知的。当然,可以使用下面的方法将每个字段都视为字符串类型。
作为我希望如何使用CSV输入流的示例,请考虑以下输入(摘自CSV的维基百科页面):
const char input[] =
"Year,Make,Model,Description,Price\n"
"1997,Ford,E350,\"ac, abs, moon\",3000.00\n"
"1999,Chevy,\"Venture \"\"Extended Edition\"\"\",\"\",4900.00\n"
"1999,Chevy,\"Venture \"\"Extended Edition, Very Large\"\"\",\"\",5000.00\n"
"1996,Jeep,Grand Cherokee,\"MUST SELL!\n\
air, moon roof, loaded\",4799.00\n"
;
然后,我希望能够读取如下数据:
std::istringstream ss(input);
std::string title[5];
int year;
std::string make, model, desc;
float price;
csv_istream(ss)
>> title[0] >> title[1] >> title[2] >> title[3] >> title[4];
while (csv_istream(ss)
>> year >> make >> model >> desc >> price) {
//...do something with the record...
}
这就是我最终得到的解决方案。
struct csv_istream {
std::istream &is_;
csv_istream (std::istream &is) : is_(is) {}
void scan_ws () const {
while (is_.good()) {
int c = is_.peek();
if (c != ' ' && c != '\t') break;
is_.get();
}
}
void scan (std::string *s = 0) const {
std::string ws;
int c = is_.get();
if (is_.good()) {
do {
if (c == ',' || c == '\n') break;
if (s) {
ws += c;
if (c != ' ' && c != '\t') {
*s += ws;
ws.clear();
}
}
c = is_.get();
} while (is_.good());
if (is_.eof()) is_.clear();
}
}
template <typename T, bool> struct set_value {
void operator () (std::string in, T &v) const {
std::istringstream(in) >> v;
}
};
template <typename T> struct set_value<T, true> {
template <bool SIGNED> void convert (std::string in, T &v) const {
if (SIGNED) v = ::strtoll(in.c_str(), 0, 0);
else v = ::strtoull(in.c_str(), 0, 0);
}
void operator () (std::string in, T &v) const {
convert<is_signed_int<T>::val>(in, v);
}
};
template <typename T> const csv_istream & operator >> (T &v) const {
std::string tmp;
scan(&tmp);
set_value<T, is_int<T>::val>()(tmp, v);
return *this;
}
const csv_istream & operator >> (std::string &v) const {
v.clear();
scan_ws();
if (is_.peek() != '"') scan(&v);
else {
std::string tmp;
is_.get();
std::getline(is_, tmp, '"');
while (is_.peek() == '"') {
v += tmp;
v += is_.get();
std::getline(is_, tmp, '"');
}
v += tmp;
scan();
}
return *this;
}
template <typename T>
const csv_istream & operator >> (T &(*manip)(T &)) const {
is_ >> manip;
return *this;
}
operator bool () const { return !is_.fail(); }
};
使用以下可以通过C ++ 11中的新积分特征模板进行简化的助手:
template <typename T> struct is_signed_int { enum { val = false }; };
template <> struct is_signed_int<short> { enum { val = true}; };
template <> struct is_signed_int<int> { enum { val = true}; };
template <> struct is_signed_int<long> { enum { val = true}; };
template <> struct is_signed_int<long long> { enum { val = true}; };
template <typename T> struct is_unsigned_int { enum { val = false }; };
template <> struct is_unsigned_int<unsigned short> { enum { val = true}; };
template <> struct is_unsigned_int<unsigned int> { enum { val = true}; };
template <> struct is_unsigned_int<unsigned long> { enum { val = true}; };
template <> struct is_unsigned_int<unsigned long long> { enum { val = true}; };
template <typename T> struct is_int {
enum { val = (is_signed_int<T>::val || is_unsigned_int<T>::val) };
};
在线尝试!
boost::spirit
解析。感谢解析简单的文件格式,它更多地用于解析语法。我团队中的某个人正试图使用它来解析XML,调试起来很痛苦。boost::spirit
尽可能远离。