2009-11-06 109 views
10

我想做一些快速插入,但避免重复到表中。 为了论证的缘故,我们称之为MarketPrices,我一直在尝试两种做法,但不知道如何进行基准测试,速度会更快。SQL插入,但避免重复

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
SELECT @SecurityCode, @BuyPrice, @SellPrice, @IsMarketOpen 
EXCEPT 
SELECT SecurityCode, BuyPrice, SellPrice, j.bool as IsActive FROM MarketPrices 
CROSS JOIN (SELECT 0 as bool UNION SELECT 1 as bool) as j 

OR

DECLARE @MktId int 
SET @MktId = (SELECT SecurityId FROM MarketPrices 
       where SecurityCode = @SecurityCode 
       and [email protected] 
       and SellPrice = @SellPrice) 

IF (@MktId is NULL) 
BEGIN 
    INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
    VALUES 
    (@SecurityCode,@BuyPrice, @SellPrice, @IsMarketOpen) 
END 

假设@whatever是在存储过程中的输入参数。

我希望能够在BuyPrice或SellPrice或两者都不同于以前的每一次发生时为每个SecurityCode插入新记录。我不关心IsMarketOpen。

有没有什么明显愚蠢的关于上述任何一种方法?一个比另一个快吗?

+3

记住,第二方法应该被包含在一个交易中,其他明智的你可能会遇到并发问题。 – 2009-11-06 16:37:57

+1

难道你不能只是创建一个唯一的索引?我没有在MS SQL的经验,但我认为应该有这样的倾向 – 2009-11-06 16:41:19

+3

@valya:有趣的人们如何怀疑SQL Server甚至可以做到最简单的事情。我甚至不确定是否可以在没有*支持唯一索引的情况下实现关系数据库引擎*。 – Tomalak 2009-11-06 16:50:31

回答

11

EDIT:防止race conditions并发环境中,在相关子查询使用WITH (UPDLOCK)EXCEPT倒是SELECT。我在下面写的测试脚本不需要它,因为它使用只对当前连接可见的临时表,但是在真实环境中,对用户表进行操作时,这是非常必要的。

MERGE不需要UPDLOCK


通过MCL的回答再次启发:唯一索引&让数据库抛出一个错误,我决定把基准conditional insertstry/catch

结果似乎支持了try/catch语句条件插入,但情况因人而异。这是一个非常简单的场景(一列,小桌子等),一台机器上执行,等等

下面是结果(SQL Server 2008中,构建10.0.1600.2):

duplicates (short table)  
    try/catch:    14440 milliseconds/100000 inserts 
    conditional insert:  2983 milliseconds/100000 inserts 
    except:     2966 milliseconds/100000 inserts 
    merge:      2983 milliseconds/100000 inserts 

uniques 
    try/catch:     3920 milliseconds/100000 inserts 
    conditional insert:  3860 milliseconds/100000 inserts 
    except:     3873 milliseconds/100000 inserts 
    merge:      3890 milliseconds/100000 inserts 

    straight insert:   3173 milliseconds/100000 inserts 

duplicates (tall table) 
    try/catch:    14436 milliseconds/100000 inserts 
    conditional insert:  3063 milliseconds/100000 inserts 
    except:     3063 milliseconds/100000 inserts 
    merge:      3030 milliseconds/100000 inserts 

通知,即使在独特的插入上,也有略微比尝试/ catch更多的开销比条件插入。我想知道这是否因版本,CPU,内核数量等而异。

我没有基准IF条件插入,只是WHERE。我认为IF变种会显示更多的开销,因为a)你会有两个语句,b)你需要将两个语句包装在一个事务中,并将隔离级别设置为可序列化(!)。如果有人想要来测试这个,你需要将临时表更改为常规用户表(可序列化不适用于本地临时表)。

下面是脚本:

-- tested on SQL 2008. 
-- to run on SQL 2005, comment out the statements using MERGE 
set nocount on 

if object_id('tempdb..#temp') is not null drop table #temp 
create table #temp (col1 int primary key) 
go 

------------------------------------------------------- 

-- duplicate insert test against a table w/ 1 record 

------------------------------------------------------- 

insert #temp values (1) 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    begin try 
    insert #temp select @x 
    end try 
    begin catch end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), try/catch: %i milliseconds/%i inserts',-1,-1,@duration,@y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), conditional insert: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x except select col1 from #temp 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), except: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

-- comment this batch out for SQL 2005 
declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1); 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), merge: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

------------------------------------------------------- 

-- unique insert test against an initially empty table 

------------------------------------------------------- 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    insert #temp select @x 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, straight insert: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    begin try 
    insert #temp select @x 
    end try 
    begin catch end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, try/catch: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, conditional insert: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    insert #temp select @x except select col1 from #temp 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, except: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

-- comment this batch out for SQL 2005 
truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 1, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1); 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, merge: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

------------------------------------------------------- 

-- duplicate insert test against a table w/ 100000 records 

------------------------------------------------------- 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    begin try 
    insert #temp select @x 
    end try 
    begin catch end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), try/catch: %i milliseconds/%i inserts',-1,-1,@duration,@y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), conditional insert: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x except select col1 from #temp 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), except: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

-- comment this batch out for SQL 2005 
declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1); 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), merge: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 
+1

在这里使用唯一索引的主要原因是为了保证数据的完整性。我怀疑try/catch块中的失败插入不会成为大多数应用程序的瓶颈,尤其是在没有大量尝试插入重复的情况下(因为您的基准测试显示类似的性能案件)。但是我怀疑拥有一个没有强制的数据模型会在某个时候出现问题。另外,在SQL Server 2008上,我会建议探索MERGE在其他策略中的使用。 – mlibby 2009-11-06 17:47:42

+1

@mcl re:独特的索引,我完全同意,他应该有一个数据完整性的索引,如果他想要合理的性能,他将需要一个索引。回复:MERGE,我只是测试了它,它在所有场景中执行*非常类似于条件插入。 – 2009-11-06 17:51:51

+0

谢谢你们,我希望我能接受你们的答案。为了数据完整性,我将在其上放置一个唯一的索引,然后使用条件插入,因为它在性能和可读性方面似乎是最好的。 – Ravi 2009-11-06 23:34:48

6

EDIT:为防止race conditions在并发环境中,请在相关子查询中使用WITH (UPDLOCK)


我认为这将是标准的方法:

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
SELECT @SecurityCode, @BuyPrice, @SellPrice, @IsMarketOpen 
WHERE NOT EXISTS (
    SELECT * FROM MarketPrices WITH (UPDLOCK) 
    WHERE SecurityCode = @SecurityCode 
    AND BuyPrice = @BuyPrice 
    AND SellPrice = @SellPrice 
) 

如果您的字段是空的,你必须将它添加到条件。

你的第一种方法很有趣,但对于EXCEPT的要求你有没有经历过。这个方法本质上是一样的,但它会让你围绕着列匹配问题。

或者:

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
SELECT SecurityCode, BuyPrice, SellPrice, @IsMarketOpen 
FROM (
    SELECT @SecurityCode, @BuyPrice, @SellPrice 
    EXCEPT 
    SELECT SecurityCode, BuyPrice, SellPrice FROM MarketPrices WITH (UPDLOCK) 
) a (SecurityCode, BuyPrice, SellPrice) 

有关除了在这种情况下做的好处是,它处理空值,而无需您任何额外的编码。为了在第一个例子中达到同样的效果,你需要长时间测试每一对NULL和平等。

你的第二种方法是可以的,但你不需要变量。看到Tomalak的解决方案,他很好地清理了它。此外,如果这是一个问题,您需要明确处理并发插入的可能性。

3

我会随时为您提供语义解决方案。你的两个建议对我来说似乎很模糊(虽然后者比前者好)。

IF NOT EXISTS (
    SELECT 1 
    FROM MarketPrices 
    WHERE SecurityCode = @SecurityCode 
     AND BuyPrice = @BuyPrice 
     AND SellPrice = @SellPrice 
) 
BEGIN 
    INSERT MarketPrices 
    (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
    VALUES 
    (@SecurityCode, @BuyPrice, @SellPrice, @IsMarketOpen) 
END 

拥有超过SecurityCode, BuyPrice, SellPriceEXISTS查询应该相当快的砾岩指数。

基准测试是一个计时WHILE循环的问题,我会说。测试一下,看看你自己。

2

另一种选择:在相关字段(SecurityCode,BuyPrice,SellPrice)上创建一个唯一索引,发出一个简单的插入,并让数据库确定记录是否重复。插入尝试插入重复时会失败。

使用代码(无论是外部语言还是SQL处理程序)来保证唯一性不够严格,最终会导致您希望防止的重复。

+0

我在想你可能是对的,特别是当涉及到并发插入时 – Ravi 2009-11-06 16:57:17

+0

我很想看到这个基准。假设一个唯一的索引,它有更多的开销:条件插入的WHERE子句,或TRY/CATCH块的异常处理?如果您希望99%的插入*为*不重复,我想TRY/CATCH块可能更有效。 – 2009-11-06 16:59:03

+0

我打算做到这一点,当我回家 - 将在这里发布结果 – Ravi 2009-11-06 17:04:46

0

,如果你不需要重复的陷阱,你总是可以创建一个“忽略重复”设置为true的唯一索引。 SQL Server将为您处理此问题。

1

下面我已经加入了顶级的答案从Only inserting a row if it's not already there彼得Radocchia的出色答卷。

外卖是使用race safe with try/catch技术是轻微(〜1%),比race safe with updlock, holdlock技术快时,有没有实际的冲突(即你期望的碰撞将是非常罕见的 - 这是uniques情况),并且是当总是发生碰撞时,速度稍慢(〜20%)(这是duplicates方案)。这并没有将锁升级等复杂问题考虑在内。

下面是结果(SQL服务器2014年建立12.0.2000。8):

duplicates (short table)  
    try/catch:      15546 milliseconds/100000 inserts 
    conditional insert:    1460 milliseconds/100000 inserts 
    except:       1490 milliseconds/100000 inserts 
    merge:       1420 milliseconds/100000 inserts 
    race safe with try/catch:   1650 milliseconds/100000 inserts 
    race safe with updlock, holdlock: 1330 milliseconds/100000 inserts 

uniques 
    try/catch:      2266 milliseconds/100000 inserts 
    conditional insert:    2156 milliseconds/100000 inserts 
    except:       2273 milliseconds/100000 inserts 
    merge:       2136 milliseconds/100000 inserts 
    race safe with try/catch:   2400 milliseconds/100000 inserts 
    race safe with updlock, holdlock: 2430 milliseconds/100000 inserts 

    straight insert:     1686 milliseconds/100000 inserts 

duplicates (tall table) 
    try/catch:      15826 milliseconds/100000 inserts 
    conditional insert:    1530 milliseconds/100000 inserts 
    except:       1506 milliseconds/100000 inserts 
    merge:       1443 milliseconds/100000 inserts 
    race safe with try/catch:   1636 milliseconds/100000 inserts 
    race safe with updlock, holdlock: 1426 milliseconds/100000 inserts 

重复(短表)部分:

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    begin try 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
    end try 
    begin catch 
    if error_number() <> 2627 
     throw 
    end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), race safe with try/catch: %i milliseconds/%i inserts',-1,-1,@duration,@y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), race safe with updlock, holdlock: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

不重复部分

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    begin try 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
    end try 
    begin catch 
    if error_number() <> 2627 
     throw 
    end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, race safe with try/catch: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, race safe with updlock, holdlock: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

重复(高表)部分

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    begin try 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
    end try 
    begin catch 
    if error_number() <> 2627 
     throw 
    end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), race safe with try/catch: %i milliseconds/%i inserts',-1,-1,@duration,@y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), race safe with updlock, holdlock: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go