我目前可以提供2个选项,他们都不是完美的。
顺便说一句,"output.format.string"
已过时,并没有效果。
create external table mytable
(
q1 string
,field1 string
,q2 string
,field2 string
,q3 string
,field3 string
)
row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
with serdeproperties ('input.regex' = '.*?=(?<q1>"?)(.*?)(?:\\k<q1>)\\|.*?=(?<q2>"?)(.*?)(?:\\k<q2>)\\|.*?=(?<q3>"?)(.*?)(?:\\k<q3>)')
stored as textfile
;
select * from mytable
;
+----+--------+----+--------+----+-----------+
| q1 | field1 | q2 | field2 | q3 | field3 |
+----+--------+----+--------+----+-----------+
| | value2 | | value2 | " | va , lues |
+----+--------+----+--------+----+-----------+
create external table mytable
(
field1 string
,field2 string
,field3 string
)
row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
with serdeproperties ('input.regex' = '.*?=(".*?"|.*?)\\|.*?=(".*?"|.*?)\\|.*?=(".*?"|.*?)')
stored as textfile
;
select * from mytable
;
+--------+--------+-------------+
| field1 | field2 | field3 |
+--------+--------+-------------+
| value2 | value2 | "va , lues" |
+--------+--------+-------------+
什么是给定的输入您的电流输出结果呢? – horcrux
键1 =值2键2 =值KEY3 = “VA,梅毒” – rmnvnv
所以才改变这个?' “input.regex”= “[^ \\ | =] * = \”([^ \\ |] *)\ “\\?| [^ \\ | =] * = \”([^ \\ |] *)?\ “\\?| [^ \\ | =] * = \”([^ \\?| ] *)\“?”,' – horcrux