SQL插入，但避免重复

我想做一些快速插入，但避免重复到表中。为了论证的缘故，我们称之为MarketPrices，我一直在尝试两种做法，但不知道如何进行基准测试，速度会更快。SQL插入，但避免重复

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
SELECT @SecurityCode, @BuyPrice, @SellPrice, @IsMarketOpen 
EXCEPT 
SELECT SecurityCode, BuyPrice, SellPrice, j.bool as IsActive FROM MarketPrices 
CROSS JOIN (SELECT 0 as bool UNION SELECT 1 as bool) as j

DECLARE @MktId int 
SET @MktId = (SELECT SecurityId FROM MarketPrices 
       where SecurityCode = @SecurityCode 
       and [email protected] 
       and SellPrice = @SellPrice) 

IF (@MktId is NULL) 
BEGIN 
    INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
    VALUES 
    (@SecurityCode,@BuyPrice, @SellPrice, @IsMarketOpen) 
END

假设@whatever是在存储过程中的输入参数。

我希望能够在BuyPrice或SellPrice或两者都不同于以前的每一次发生时为每个SecurityCode插入新记录。我不关心IsMarketOpen。

有没有什么明显愚蠢的关于上述任何一种方法？一个比另一个快吗？

来源

2009-11-06 Ravi

记住，第二方法应该被包含在一个交易中，其他明智的你可能会遇到并发问题。 – 2009-11-06 16:37:57

难道你不能只是创建一个唯一的索引？我没有在MS SQL的经验，但我认为应该有这样的倾向 – 2009-11-06 16:41:19

@valya：有趣的人们如何怀疑SQL Server甚至可以做到最简单的事情。我甚至不确定是否可以在没有*支持唯一索引的情况下实现关系数据库引擎*。 – Tomalak 2009-11-06 16:50:31

EDIT：防止race conditions并发环境中，在相关子查询使用WITH (UPDLOCK)或EXCEPT倒是SELECT。我在下面写的测试脚本不需要它，因为它使用只对当前连接可见的临时表，但是在真实环境中，对用户表进行操作时，这是非常必要的。

MERGE不需要UPDLOCK。

通过MCL的回答再次启发：唯一索引&让数据库抛出一个错误，我决定把基准conditional inserts与try/catch。

结果似乎支持了try/catch语句条件插入，但情况因人而异。这是一个非常简单的场景（一列，小桌子等），一台机器上执行，等等

下面是结果（SQL Server 2008中，构建10.0.1600.2）：

duplicates (short table)  
    try/catch:    14440 milliseconds/100000 inserts 
    conditional insert:  2983 milliseconds/100000 inserts 
    except:     2966 milliseconds/100000 inserts 
    merge:      2983 milliseconds/100000 inserts 

uniques 
    try/catch:     3920 milliseconds/100000 inserts 
    conditional insert:  3860 milliseconds/100000 inserts 
    except:     3873 milliseconds/100000 inserts 
    merge:      3890 milliseconds/100000 inserts 

    straight insert:   3173 milliseconds/100000 inserts 

duplicates (tall table) 
    try/catch:    14436 milliseconds/100000 inserts 
    conditional insert:  3063 milliseconds/100000 inserts 
    except:     3063 milliseconds/100000 inserts 
    merge:      3030 milliseconds/100000 inserts

通知，即使在独特的插入上，也有略微比尝试/ catch更多的开销比条件插入。我想知道这是否因版本，CPU，内核数量等而异。

我没有基准IF条件插入，只是WHERE。我认为IF变种会显示更多的开销，因为a）你会有两个语句，b）你需要将两个语句包装在一个事务中，并将隔离级别设置为可序列化（！）。如果有人想要来测试这个，你需要将临时表更改为常规用户表（可序列化不适用于本地临时表）。

下面是脚本：

-- tested on SQL 2008. 
-- to run on SQL 2005, comment out the statements using MERGE 
set nocount on 

if object_id('tempdb..#temp') is not null drop table #temp 
create table #temp (col1 int primary key) 
go 

------------------------------------------------------- 

-- duplicate insert test against a table w/ 1 record 

------------------------------------------------------- 

insert #temp values (1) 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    begin try 
    insert #temp select @x 
    end try 
    begin catch end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), try/catch: %i milliseconds/%i inserts',-1,-1,@duration,@y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), conditional insert: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x except select col1 from #temp 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), except: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

-- comment this batch out for SQL 2005 
declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1); 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), merge: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

------------------------------------------------------- 

-- unique insert test against an initially empty table 

------------------------------------------------------- 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    insert #temp select @x 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, straight insert: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    begin try 
    insert #temp select @x 
    end try 
    begin catch end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, try/catch: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, conditional insert: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    insert #temp select @x except select col1 from #temp 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, except: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

-- comment this batch out for SQL 2005 
truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 1, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1); 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, merge: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

------------------------------------------------------- 

-- duplicate insert test against a table w/ 100000 records 

------------------------------------------------------- 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    begin try 
    insert #temp select @x 
    end try 
    begin catch end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), try/catch: %i milliseconds/%i inserts',-1,-1,@duration,@y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), conditional insert: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x except select col1 from #temp 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), except: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go 

-- comment this batch out for SQL 2005 
declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    merge #temp t using (select @x) s (col1) on t.col1 = s.col1 when not matched by target then insert values (col1); 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), merge: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go

来源

2009-11-06 17:31:51

在这里使用唯一索引的主要原因是为了保证数据的完整性。我怀疑try/catch块中的失败插入不会成为大多数应用程序的瓶颈，尤其是在没有大量尝试插入重复的情况下（因为您的基准测试显示类似的性能案件）。但是我怀疑拥有一个没有强制的数据模型会在某个时候出现问题。另外，在SQL Server 2008上，我会建议探索MERGE在其他策略中的使用。 – mlibby 2009-11-06 17:47:42

@mcl re：独特的索引，我完全同意，他应该有一个数据完整性的索引，如果他想要合理的性能，他将需要一个索引。回复：MERGE，我只是测试了它，它在所有场景中执行*非常类似于条件插入。 – 2009-11-06 17:51:51

谢谢你们，我希望我能接受你们的答案。为了数据完整性，我将在其上放置一个唯一的索引，然后使用条件插入，因为它在性能和可读性方面似乎是最好的。 – Ravi 2009-11-06 23:34:48

EDIT：为防止race conditions在并发环境中，请在相关子查询中使用WITH (UPDLOCK)。

我认为这将是标准的方法：

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
SELECT @SecurityCode, @BuyPrice, @SellPrice, @IsMarketOpen 
WHERE NOT EXISTS (
    SELECT * FROM MarketPrices WITH (UPDLOCK) 
    WHERE SecurityCode = @SecurityCode 
    AND BuyPrice = @BuyPrice 
    AND SellPrice = @SellPrice 
)

如果您的字段是空的，你必须将它添加到条件。

你的第一种方法很有趣，但对于EXCEPT的要求你有没有经历过。这个方法本质上是一样的，但它会让你围绕着列匹配问题。

或者：

INSERT INTO MarketPrices (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
SELECT SecurityCode, BuyPrice, SellPrice, @IsMarketOpen 
FROM (
    SELECT @SecurityCode, @BuyPrice, @SellPrice 
    EXCEPT 
    SELECT SecurityCode, BuyPrice, SellPrice FROM MarketPrices WITH (UPDLOCK) 
) a (SecurityCode, BuyPrice, SellPrice)

有关除了在这种情况下做的好处是，它处理空值，而无需您任何额外的编码。为了在第一个例子中达到同样的效果，你需要长时间测试每一对NULL和平等。

你的第二种方法是可以的，但你不需要变量。看到Tomalak的解决方案，他很好地清理了它。此外，如果这是一个问题，您需要明确处理并发插入的可能性。

来源

2009-11-06 16:33:15

我会随时为您提供语义解决方案。你的两个建议对我来说似乎很模糊（虽然后者比前者好）。

IF NOT EXISTS (
    SELECT 1 
    FROM MarketPrices 
    WHERE SecurityCode = @SecurityCode 
     AND BuyPrice = @BuyPrice 
     AND SellPrice = @SellPrice 
) 
BEGIN 
    INSERT MarketPrices 
    (SecurityCode, BuyPrice, SellPrice, IsMarketOpen) 
    VALUES 
    (@SecurityCode, @BuyPrice, @SellPrice, @IsMarketOpen) 
END

拥有超过SecurityCode, BuyPrice, SellPrice的EXISTS查询应该相当快的砾岩指数。

基准测试是一个计时WHILE循环的问题，我会说。测试一下，看看你自己。

来源

2009-11-06 16:33:58 Tomalak

另一种选择：在相关字段（SecurityCode，BuyPrice，SellPrice）上创建一个唯一索引，发出一个简单的插入，并让数据库确定记录是否重复。插入尝试插入重复时会失败。

使用代码（无论是外部语言还是SQL处理程序）来保证唯一性不够严格，最终会导致您希望防止的重复。

来源

2009-11-06 16:50:15 mlibby

我在想你可能是对的，特别是当涉及到并发插入时 – Ravi 2009-11-06 16:57:17

我很想看到这个基准。假设一个唯一的索引，它有更多的开销：条件插入的WHERE子句，或TRY/CATCH块的异常处理？如果您希望99％的插入*为*不重复，我想TRY/CATCH块可能更有效。 – 2009-11-06 16:59:03

我打算做到这一点，当我回家 - 将在这里发布结果 – Ravi 2009-11-06 17:04:46

，如果你不需要重复的陷阱，你总是可以创建一个“忽略重复”设置为true的唯一索引。 SQL Server将为您处理此问题。

来源

2010-12-11 08:26:51 IamIC

下面我已经加入了顶级的答案从Only inserting a row if it's not already there彼得Radocchia的出色答卷。

外卖是使用race safe with try/catch技术是轻微（〜1％），比race safe with updlock, holdlock技术快时，有没有实际的冲突（即你期望的碰撞将是非常罕见的 - 这是uniques情况），并且是当总是发生碰撞时，速度稍慢（〜20％）（这是duplicates方案）。这并没有将锁升级等复杂问题考虑在内。

下面是结果（SQL服务器2014年建立12.0.2000。8）：

duplicates (short table)  
    try/catch:      15546 milliseconds/100000 inserts 
    conditional insert:    1460 milliseconds/100000 inserts 
    except:       1490 milliseconds/100000 inserts 
    merge:       1420 milliseconds/100000 inserts 
    race safe with try/catch:   1650 milliseconds/100000 inserts 
    race safe with updlock, holdlock: 1330 milliseconds/100000 inserts 

uniques 
    try/catch:      2266 milliseconds/100000 inserts 
    conditional insert:    2156 milliseconds/100000 inserts 
    except:       2273 milliseconds/100000 inserts 
    merge:       2136 milliseconds/100000 inserts 
    race safe with try/catch:   2400 milliseconds/100000 inserts 
    race safe with updlock, holdlock: 2430 milliseconds/100000 inserts 

    straight insert:     1686 milliseconds/100000 inserts 

duplicates (tall table) 
    try/catch:      15826 milliseconds/100000 inserts 
    conditional insert:    1530 milliseconds/100000 inserts 
    except:       1506 milliseconds/100000 inserts 
    merge:       1443 milliseconds/100000 inserts 
    race safe with try/catch:   1636 milliseconds/100000 inserts 
    race safe with updlock, holdlock: 1426 milliseconds/100000 inserts

重复（短表）部分：

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    begin try 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
    end try 
    begin catch 
    if error_number() <> 2627 
     throw 
    end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), race safe with try/catch: %i milliseconds/%i inserts',-1,-1,@duration,@y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (short table), race safe with updlock, holdlock: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go

不重复部分

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    begin try 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
    end try 
    begin catch 
    if error_number() <> 2627 
     throw 
    end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, race safe with try/catch: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go 

truncate table #temp 
declare @x int, @now datetime, @duration int 
select @x = 0, @now = getdate() 
while @x < 100000 begin 
    set @x = @x+1 
    insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('uniques, race safe with updlock, holdlock: %i milliseconds/%i inserts',-1,-1,@duration, @x) with nowait 
go

重复（高表）部分

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    begin try 
    insert #temp select @x where not exists (select * from #temp where col1 = @x) 
    end try 
    begin catch 
    if error_number() <> 2627 
     throw 
    end catch 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), race safe with try/catch: %i milliseconds/%i inserts',-1,-1,@duration,@y) with nowait 
go 

declare @x int, @y int, @now datetime, @duration int 
select @x = 1, @y = 0, @now = getdate() 
while @y < 100000 begin 
    set @y = @y+1 
    insert #temp select @x where not exists (select * from #temp with (updlock, holdlock) where col1 = @x) 
end 
set @duration = datediff(ms,@now,getdate()) 
raiserror('duplicates (tall table), race safe with updlock, holdlock: %i milliseconds/%i inserts',-1,-1,@duration, @y) with nowait 
go

来源

2015-05-18 20:35:29

SQL插入，但避免重复

回答

相关问题