0

情景如何优化此SQL外部连接查询?

我需要基于列customeraccount_info一个1对1的比赛从test_userChecktest_userData选择记录。下面的代码将创建表的模型,并将为我的问题填充随机数据。在此基础上的代码,它是寻找任何记录,其中test_userData.customer = 'Guerrero, Unity'test_userData.account_info = 'XXXXXXXXXXXXXXXX0821',并且应该返回三行(confirmation_id = 6836985,5502798和3046441)

问题

目前的情况是,该查询返回了我需要...但是,我真正的userData表有近200万条记录,而userCheck表有大约10000个。查询大概需要7秒,我认为这太长了。我也很担心,因为userData表格将开始快速增长(每天有数万条独特记录),并且我设想我目前的方法变得难以管理。

问题

我如何能优化这个数以百万计的记录,以扩展任何想法?数据驻留在具有有限权限的共享SQL 2008服务器上。

--setup temporary testing tables 
IF EXISTS 
(
    SELECT * FROM dbo.sysobjects 
    WHERE id = object_id(N'[dbo].[test_userData]') 
     AND OBJECTPROPERTY(id, N'IsUserTable') = 1 
) 

DROP TABLE [dbo].[test_userData] 

GO 

IF EXISTS 
(
    SELECT * FROM dbo.sysobjects 
    WHERE id = object_id(N'[dbo].[test_userCheck]') 
     AND OBJECTPROPERTY(id, N'IsUserTable') = 1 
) 

DROP TABLE [dbo].[test_userCheck] 

GO 

CREATE TABLE [dbo].[test_userData](
    [id] [int] IDENTITY(1,1) NOT NULL, 
    [merchant_id] [int] NOT NULL, 
    [sales_date] [datetime] NOT NULL, 
    [confirmation_id] [int] NOT NULL, 
    [customer] [nvarchar](max) NOT NULL, 
    [total] [smallmoney] NOT NULL, 
    [account_info] [nvarchar](max) NOT NULL, 
    [email_address] [nvarchar](max) NOT NULL 
CONSTRAINT [PK_test_userData] PRIMARY KEY CLUSTERED 
(
    [id] ASC 
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] 
) ON [PRIMARY] 

GO 

CREATE TABLE [dbo].[test_userCheck](
    [confirmation_id] [int] NOT NULL, 
    [customer] [nvarchar](max) NOT NULL, 
    [total] [smallmoney] NOT NULL, 
    [account_info] [nvarchar](max) NOT NULL 
CONSTRAINT [PK_test_userCheck] PRIMARY KEY CLUSTERED 
(
    [confirmation_id] ASC 
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] 
) ON [PRIMARY] 

GO 

--insert some random user transactions 
INSERT INTO [dbo].[test_userData] (merchant_id,sales_date,confirmation_id,customer,total,account_info,email_address) VALUES 
('99','03/25/2010','3361424','Soto, Ahmed','936','XXXXXXXXXXXXXXXX8744','[email protected]'), 
('17','09/12/2010','6710165','Holcomb, Eden','1022','XXXXXXXXXXXXXXXX6367','[email protected]'), 
('32','05/04/2010','4489509','Foster, Nasim','1463','XXXXXXXXXXXXXXXX7115','[email protected]'), 
('95','01/02/2011','5384061','Browning, Owen','523','XXXXXXXXXXXXXXXX0576','[email protected]'), 
('91','08/21/2010','6075234','Dawson, McKenzie','141','XXXXXXXXXXXXXXXX3580','[email protected]'), 
('63','01/29/2010','1055619','Mathews, Keefe','1110','XXXXXXXXXXXXXXXX2682','[email protected]'), 
('27','10/20/2010','1819662','Clarke, Briar','1474','XXXXXXXXXXXXXXXX7481','[email protected]'), 
('82','03/05/2010','3184936','Holman, Dana','560','XXXXXXXXXXXXXXXX7080','[email protected]'), 
('24','06/11/2010','1007427','Kirk, Desiree','206','XXXXXXXXXXXXXXXX3681','[email protected]'), 
('49','06/17/2010','6137066','Foley, Sopoline','1831','XXXXXXXXXXXXXXXX1718','[email protected]'), 
('22','05/08/2010','3545367','Howell, Uriel','638','XXXXXXXXXXXXXXXX1945','[email protected]'), 
('5','10/25/2010','6836985','Little, Caryn','743','XXXXXXXXXXXXXXXX0821','[email protected]'), 
('91','06/16/2010','6852582','Buckner, Chiquita','99','XXXXXXXXXXXXXXXX1533','[email protected]'), 
('63','06/12/2010','7930230','Nolan, Wyoming','1192','XXXXXXXXXXXXXXXX1291','[email protected]'), 
('32','02/01/2010','8407102','Cummings, Deacon','1315','XXXXXXXXXXXXXXXX4375','[email protected]'), 
('75','06/29/2010','5502798','Guerrero, Unity','858','XXXXXXXXXXXXXXXX8000','[email protected]'), 
('50','09/13/2010','8312525','Russo, Yvette','1680','XXXXXXXXXXXXXXXX2046','[email protected]'), 
('11','04/13/2010','6204132','Small, Calista','426','XXXXXXXXXXXXXXXX0269','[email protected]'), 
('16','01/01/2011','7522507','Mosley, Thor','1459','XXXXXXXXXXXXXXXX8451','[email protected]'), 
('5','01/27/2010','1472120','Case, Kiona','1419','XXXXXXXXXXXXXXXX7097','[email protected]'), 
('70','02/17/2010','1095935','Snyder, Tanner','1655','XXXXXXXXXXXXXXXX8556','[email protected]'), 
('63','11/10/2010','3046441','Guerrero, Unity','629','XXXXXXXXXXXXXXXX0807','[email protected]'), 
('22','08/19/2010','5435100','Turner, Patrick','1133','XXXXXXXXXXXXXXXX6734','[email protected]'), 
('96','10/05/2010','6381992','May, Dominic','1858','XXXXXXXXXXXXXXXX7227','[email protected]'), 
('96','02/26/2010','8630748','Chandler, Olympia','1016','XXXXXXXXXXXXXXXX4001','[email protected]'); 

--insert a random fraud transaction to check against (based on customer and account_info only) 
INSERT INTO [dbo].[test_userCheck] (confirmation_id, customer, total, account_info) VALUES 
('2055015', 'Guerrero, Unity', '20.02', 'XXXXXXXXXXXXXXXX0821'); 

--get result, which is correct 
SELECT  a.confirmation_id, a.customer, a.total, a.account_info, a.email_address 
FROM   dbo.test_userData AS a RIGHT OUTER JOIN 
         dbo.test_userCheck AS b ON a.customer = b.customer OR a.account_info = b.account_info; 

DROP TABLE [dbo].[test_userData]; 
DROP TABLE [dbo].[test_userCheck]; 

回答

1

创建适当的索引或索引。只是基于你的问题,我建议两个索引,一个在test_userData.customer上,第二个索引在test_userData.account_info

+0

我试过各种索引组合,但没有注意到性能增益。 – Paul 2012-01-05 22:55:13

+0

最终成为索引问题。谢谢! – Paul 2012-08-03 00:35:10

0

创建索引可能会有所帮助,但你有没有考虑另一种符合正常形式的设计。如果你通过整数列而不是字符串索引访问日期会更好...

+0

虽然我同意你的看法,但我无法控制数据的格式。我想我可以做一些像“索引视图”的事情,但我没有经验。 – Paul 2012-01-06 17:53:33