让我们假设我们会以某种方式设法将所有的描述分开。 因此,而不是单行ID = 1和说明=“我的名字是萨吉德·坎”,我们不得不这样
ID | Description
--- | ------------
1 | My
1 | NAME
1 | is
1 | Sajid
1 | KHAN
以这种形式
5行这将会是微不足道的,像
select Description, count(*) from data_in_new_form group by Description
所以,我们使用递归查询来做到这一点。
create table mytable
as
select 1 as ID, 'My NAME is Sajid KHAN' as Description from dual
union all
select 2, 'My Name is Ahmed Khan' from dual
union all
select 3, 'MY friend name is Salman Khan' from dual
union all
select 4, 'test, punctuation! it is' from dual
;
with
rec (id, str, depth, element_value) as
(
-- Anchor member.
select id, upper(Description) as str, 1 as depth, REGEXP_SUBSTR(upper(Description), '(.*?)(|$)', 1, 1, NULL, 1) AS element_value
from mytable
UNION ALL
-- Recursive member.
select id, str, depth + 1, REGEXP_SUBSTR(str ,'(.*?)(|$)', 1, depth+1, NULL, 1) AS element_value
from rec
where depth < regexp_count(str, ' ')+1
)
, data as (
select * from rec
--order by id, depth
)
select element_value, count(*) from data
group by element_value
order by element_value
;
请注意,该版本不会对标点符号做任何事情,假设词语用空格分隔。采用分层查询
with rec as
(
SELECT id, LEVEL AS depth,
REGEXP_SUBSTR(upper(description) ,'(.*?)(|$)', 1, LEVEL, NULL, 1) AS element_value
FROM mytable
CONNECT BY LEVEL <= regexp_count(description, ' ')+1
and prior id = id
and prior SYS_GUID() is not null
)
, data as (
select * from rec
--order by id, depth
)
select element_value, count(*) from data
group by element_value
order by 2 desc
;
到目前为止您尝试了什么? –