缓慢的TSQL查询

关于如何提高查询性能的任何想法？缓慢的TSQL查询

[ftsIndex] PK是sID，wordPos。
并且在wordID，sID，wordPos上有一个索引。
它们都是int。

最后使用一个独特的。
大多数sID只有几个匹配。
某些sID可能有10,000个以上的匹配并且终止查询。

查询第一个27,749行在11秒内返回的位置。
没有一个sID有超过500个匹配。
个人比赛的总和是65,615。

27,750行本身需要2分钟以上，并有15,000场比赛。

因为最后加入的是[sID]，这并不令人意外。

由于最终使用的不同，有没有办法把它找第一肯定

on [wXright].[sID] = [wXleft].[sID] 
    and [wXright].[wordPos] > [wXleft].[wordPos] 
    and [wXright].[wordPos] <= [wXleft].[wordPos] + 10

然后移动到下一个SID？

我知道这是从查询优化器问很多，但这真的很酷。

在现实生活中，问题文档是零件清单和供应商重复多次。

select distinct [wXleft].[sID] 
FROM 
(-- begin [wXleft] 
    (-- start term 
     select [ftsIndex].[sID], [ftsIndex].[wordPos] 
     from [ftsIndex] with (nolock) 
     where [ftsIndex].[wordID] in 
       (select [id] from [FTSwordDef] with (nolock) 
          where [word] like 'Brown') 
    ) -- end term 
) [wXleft] 
join 
(-- begin [wRight] 
    (-- start term 
     select [ftsIndex].[sID], [ftsIndex].[wordPos] 
     from [ftsIndex] with (nolock) 
     where [ftsIndex].[wordID] in 
       (select [id] from [FTSwordDef] with (nolock) 
          where [word] like 'Fox') 
    ) -- end term 
) [wXright] 
on [wXright].[sID] = [wXleft].[sID] 
and [wXright].[wordPos] > [wXleft].[wordPos] 
and [wXright].[wordPos] <= [wXleft].[wordPos] + 10

这使得它归结为1:40

inner loop join

我这样做只是尝试，它完全改变了查询计划。
我不知道问题查询需要多长时间。我在20点放弃了。
我甚至不会将此作为答案张贴，因为我没有看到它对任何人都有价值。
希望得到更好的答案。
如果在接下来的两天内我没有收到，我会删除这个问题。

这不能解决问题

select distinct [ft1].[sID] 
    from [ftsIndex] as [ft1] with (nolock) 
    join [ftsIndex] as [ft2] with (nolock) 
    on [ft2].[sID] = [ft1].[sID] 
    and [ft1].[wordID] in (select [id] from [FTSwordDef] with (nolock) where [word] like 'brown') 
    and [ft2].[wordID] in (select [id] from [FTSwordDef] with (nolock) where [word] like 'fox') 
    and [ft2].[wordPos] > [ft1].[wordPos] 
    and [ft2].[wordPos] <= [ft1].[wordPos] + 10

也支持类似“快速的棕色”查询与“狐狸”或“土狼”用别名，以便加入的10个字是不是一个很好的路径。

这需要14分钟（但至少它运行）。
这种格式再次不利于更高级的查询。

IF OBJECT_ID(N'tempdb..#tempMatch1', N'U') IS NOT NULL DROP TABLE #tempMatch1 
CREATE TABLE #tempMatch1(
    [sID] [int] NOT NULL, 
    [wordPos] [int] NOT NULL, 
CONSTRAINT [PK1] PRIMARY KEY CLUSTERED 
(
    [sID] ASC, 
    [wordPos] ASC 
)) 
IF OBJECT_ID(N'tempdb..#tempMatch2', N'U') IS NOT NULL DROP TABLE #tempMatch2 
CREATE TABLE #tempMatch2(
    [sID] [int] NOT NULL, 
    [wordPos] [int] NOT NULL, 
CONSTRAINT [PK2] PRIMARY KEY CLUSTERED 
(
    [sID] ASC, 
    [wordPos] ASC 
)) 
insert into #tempMatch1 
select [ftsIndex].[sID], [ftsIndex].[wordPos] 
     from [ftsIndex] with (nolock) 
     where [ftsIndex].[wordID] in 
       (select [id] from [FTSwordDef] with (nolock) 
          where [word] like 'Brown') 
     --and [wordPos] < 100000; 
    order by [ftsIndex].[sID], [ftsIndex].[wordPos]      
insert into #tempMatch2 
select [ftsIndex].[sID], [ftsIndex].[wordPos] 
     from [ftsIndex] with (nolock) 
     where [ftsIndex].[wordID] in 
       (select [id] from [FTSwordDef] with (nolock) 
          where [word] like 'Fox') 
     --and [wordPos] < 100000; 
    order by [ftsIndex].[sID], [ftsIndex].[wordPos] 
select count(distinct(#tempMatch1.[sID])) 
from #tempMatch1 
join #tempMatch2 
    on #tempMatch2.[sID] = #tempMatch1.[sID] 
and #tempMatch2.[wordPos] > #tempMatch1.[wordPos] 
and #tempMatch2.[wordPos] <= #tempMatch1.[wordPos] + 10

稍微不同的连接在5秒内运行（并且具有不同的查询计划）。
但我无法修复它的提示，因为它移动的地方它加入。
即使+1有超过10个文件，有超过7000比赛。

on [wXright].[sID] = [wXleft].[sID] 
and [wXright].[wordPos] = [wXleft].[wordPos] + 1

全表DEF

CREATE TABLE [dbo].[FTSindex](
    [sID] [int] NOT NULL, 
    [wordPos] [int] NOT NULL, 
    [wordID] [int] NOT NULL, 
    [charPos] [int] NOT NULL, 
CONSTRAINT [PK_FTSindex] PRIMARY KEY CLUSTERED 
(
    [sID] ASC, 
    [wordPos] ASC 
)WITH (PAD_INDEX = ON, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 100) ON [PRIMARY] 
) ON [PRIMARY] 

GO 

ALTER TABLE [dbo].[FTSindex] WITH CHECK ADD CONSTRAINT [FK_FTSindex_FTSwordDef] FOREIGN KEY([wordID]) 
REFERENCES [dbo].[FTSwordDef] ([ID]) 
GO 

ALTER TABLE [dbo].[FTSindex] CHECK CONSTRAINT [FK_FTSindex_FTSwordDef] 
GO

来源

2013-05-29 Paparazzi

我不知道你的所有数据，但你有没有想过可能插入临时表，然后创建聚簇索引呢？先插入，然后创建索引。这通常比创建索引本身更快。这可能对你有所帮助，可能不会，所以我想把它作为评论添加。 – djangojazz

@djangojazz插入只需要5秒钟。如果我添加一个排序，所以记录按PK顺序插入，它仍然是5秒。 – Paparazzi

我们将需要表/键/索引定义和查询计划（实际）。此外，这种设计/方法是否有任何理由，与仅使用SQL Server全文搜索相反？ – RBarryYoung

UPDATE：

，您仍然可以使用union all这有助于优化从指数挽留订购如果延误过滤 'L' 和 'R' 两边直至过程的最后部分。不幸的是，你需要事先检索所有的wordids，并在equals的条件下使用它们。在我的机器上，它将执行时间缩短到2/3：

; with o as (
    select sID, wordPos, wordID 
     from FTSindex 
    where wordID = 1 
    union all 
    select sID, wordPos, wordID 
     from FTSindex 
    where wordID = 4 
    union all 
    select sID, wordPos, wordID 
     from FTSindex 
    where wordID = 2 
), 
g as (
    select sID, wordPos, wordID, 
      ROW_NUMBER() over (partition by [sID] order by wordPos) rn 
     from o 
) 
select count(distinct(g1.sID)) -- 26919 00:02 
     from g g1 
     join g g2 
     on g1.sID = g2.sID 
     and g1.rn = g2.rn - 1 
     and g1.wordPos >= g2.wordPos - 10 
    -- Now is the time to repartition the stream 
     and g1.wordID in (1, 4) 
     and g2.wordID = 2

哦，现在真的需要两秒吗？

UPDATE - 2：

; with o as (
-- Union all resolves costly sort 
    select sid, wordpos, wordid 
     from FTSindex 
    where wordID = 1 
    union all 
    select sid, wordpos, wordID 
     from FTSindex 
    where wordID = 2 
), 
g as (
    select sid, wordid, wordpos, 
      ROW_NUMBER() over(order by sid, wordpos) rn 
     from o 
) 
select count(distinct g1.sid) 
    from g g1 
inner join g g2 
    on g1.sID = g2.sID 
    and g1.rn = g2.rn - 1 
where g1.wordID = 1 
    and g2.wordID = 2 
    and g1.wordPos >= g2.wordpos - 10

1和2支架对选定的词标识。结果不同于原始查询产生的10个词以内的多个匹配结果;原始查询将报告所有这些，但是这个只会显示最接近的一个。

这个想法是只提取搜索到的单词并比较两个相邻单词之间的距离，其中wordID 1先到达，wordID 2秒。

更新 - 1：

我记下了这个职位，因为它没有执行，以及我已经想好。但是，它满足OP的需要比优化查询更好，因为它允许同时搜索多个单词（在可能在where子句中指定的另一个单词附近发现的单词列表）。

; with g as (
    select sid, wordid, wordpos, 
      ROW_NUMBER() over(order by sid, wordpos) rn 
     from FTSindex 
    where wordID in (1, 2) 
) 
select count(distinct g1.sid) 
    from g g1 
inner join g g2 
    on g1.sID = g2.sID 
    and g1.rn = g2.rn - 1 
where g1.wordID = 1 
    and g2.wordID = 2 
    and g1.wordPos >= g2.wordpos - 10

第一次尝试：

有可能是联合使用cross apply与top 1的方式。

select [wXleft].[sID], [wXleft].[wordPos] 
    from [ftsIndex] wXleft with (nolock) 
cross apply 
(
    select top 1 r.sID 
     from [ftsIndex] r 
    where r.sID = wXleft.sID 
     and r.wordPos > wxLeft.wordPos 
     and r.wordPos <= wxLeft.wordPos + 10 
     and r.wordID in 
      (select [id] 
       from [FTSwordDef] with (nolock) 
      where [word] like 'Fox') 
) wXright 
where [wXleft].[wordID] in 
     (select [id] 
      from [FTSwordDef] with (nolock) 
     where [word] like 'Brown')

BONUS PIVOT尝试：

来源

2013-05-29 23:45:42

返回与内部循环连接相同的答案2/3的时间，在接受这个之前会等待几天的奇迹答案，谢谢 – Paparazzi

为什么你会采取另一种方式呢？这样会更快一些，我一直在尝试调整它来试图让它多出一点。奇怪的是，CTE出现的是成本的主宰 – Paparazzi

@Blam因为我的时机错了，它花了我最初的尝试，同时我已经解决了排序部分但是我很担心Sql Server需要为每个引用执行一次CTE，并且有两个引用。我会在一分钟后发布新版本。 –

好了，我希望我有更多的信息或方法来测试，但做不到这一点，这就是我可能会尝试：

IF OBJECT_ID(N'tempdb..#tempMatch', N'U') IS NOT NULL DROP TABLE #tempMatch 
CREATE TABLE #tempMatch(
    [sID] [int] NOT NULL, 
    [wordPos] [int] NOT NULL, 
    [wordID] [int] NOT NULL, 
CONSTRAINT [PK2] PRIMARY KEY CLUSTERED 
(
    [sID] ASC, 
    [wordPos] ASC 
)) 

-- 
;WITH cteWords As 
(
      SELECT 'Brown' as [word] 
    UNION ALL SELECT 'Fox' 
) 
INSERT INTO #tempMatch ([sID],[wordPos],[wordID]) 
SELECT sID, wordPos, wordID 
FROM ftsIndex 
WHERE EXISTS 
     (Select * From FTSWordDef s1 
     inner join cteWords s2 ON s1.word = s2.word 
     Where ftsIndex.wordID = s1.id) 
; 

select count(distinct(s1.[sID])) 
    from #tempMatch s1 
    join #tempMatch s2 
     on s2.[sID] = s1.[sID] 
     and s2.[wordPos] > s1.[wordPos] 
     and s2.[wordPos] <= s1.[wordPos] + 10 
    where s1.wordID = (select id from FTSWordDef w where w.word = 'Brown') 
     and s2.wordID = (select id from FTSWordDef w where w.word = 'Fox')

昨晚我想出了一个替代版本。这是相同的查询同上，但CREATE语句改为：

IF OBJECT_ID(N'tempdb..#tempMatch', N'U') IS NOT NULL DROP TABLE #tempMatch 
CREATE TABLE #tempMatch(
    [sID] [int] NOT NULL, 
    [wordID] [int] NOT NULL, 
    [wordPos] [int] NOT NULL, 
CONSTRAINT [PK0] PRIMARY KEY CLUSTERED 
(
    [wordID] ASC, 
    [sID] ASC, 
    [wordPos] ASC 
))

请让我知道，如果这些帮助都没有。

来源

2013-05-30 21:19:34 RBarryYoung

必须在第一个约束上添加wordID，并且都会在联接cteWords上引发错误。 – Paparazzi

@Blam什么是错误？我无法测试编译，因为我们没有表定义。 – RBarryYoung

@Blam为什么你必须添加wordID到第一个约束？根据你的帖子，'（sID，wordPos）'应该是足够的，因为它们是我在'INSERT..SELECT..'中绘制的唯一表的主键。（事实上，现在我看到了，我意识到'DISTINCT'是多余的，不应该在那里） – RBarryYoung

缓慢的TSQL查询

回答

相关问题