2017-03-17 92 views
2

我正在努力合并一个过滤器的连续日期。结合连续日期

例子:

我的数据集:

enter image description here

结果我需要:(只为paid = 3

enter image description here

的脚本模式和数据

CREATE TABLE [dbo].[NRFC](
[work_status] [int] NOT NULL, 
[paid] [varchar](2) NOT NULL, 
[from_date] [date] NOT NULL, 
[to_date] [date] NOT NULL) 

INSERT INTO [dbo].[NRFC]([work_status], [paid], [from_date], [to_date]) 
SELECT 101, N'3', '20100101 00:00:00.000', '20100116 00:00:00.000' UNION ALL 
SELECT 101, N'3', '20100116 00:00:00.000', '20150128 00:00:00.000' UNION ALL 
SELECT 101, N'5', '20150129 00:00:00.000', '20150331 00:00:00.000' UNION ALL 
SELECT 101, N'4', '20150401 00:00:00.000', '20150512 00:00:00.000' UNION ALL 
SELECT 101, N'3', '20160101 00:00:00.000', '20170315 00:00:00.000' UNION ALL 
SELECT 101, N'4', '20170316 00:00:00.000', '20170317 00:00:00.000' UNION ALL 
SELECT 101, N'3', '20090101 00:00:00.000', '20090110 00:00:00.000' UNION ALL 
SELECT 101, N'3', '20090111 00:00:00.000', '20090115 00:00:00.000' UNION ALL 
SELECT 101, N'3', '20090115 00:00:00.000', '20090331 00:00:00.000' UNION ALL 
SELECT 101, N'3', '20090401 00:00:00.000', '20091212 00:00:00.000' 

到目前为止我的代码:

;WITH Grouped (
    from_date 
    ,to_date 
    ,paid 
    ,first 
    ) 
AS (
    SELECT from_date 
     ,to_date 
     ,paid 
     ,isnull((
       SELECT CASE 
         WHEN paid <> w.paid 
          THEN 1 
         ELSE 0 
         END 
       FROM NRFC 
       WHERE from_date = (
         SELECT max(from_date) 
         FROM NRFC 
         WHERE from_date < w.from_date 
         ) 
       ), 1) AS first 
    FROM NRFC w 
    WHERE paid = 3 
    ) 
SELECT min(from_date) AS from_date 
    ,max(to_date) AS to_date 
    ,paid 
FROM (
    SELECT from_date 
     ,to_date 
     ,paid 
     ,isnull((
       SELECT sum(first) 
       FROM grouped 
       WHERE from_date > g.from_date 
       ), 0) AS part 
    FROM grouped g 
    ) p 
GROUP BY p.part 
    ,p.paid 
ORDER BY from_date 
+0

'SELECT *,CASE WHEN DATEDIFF(DAY,to_date,LEAD(from_date)OVER(ORDER BY to_date))> = 1 THEN 0 ELSE 1 END newROW FROM @ NRFC'这会让您知道应该有一个新的逻辑行 – SQLMason

+0

感谢SQLMason,我在SQL 2008 – Sreedhar

+0

对不起,我没有注意到这一点。 – SQLMason

回答

1

这个长相酷似他的文章伊茨克奔甘很详细讨论Packing intervals问题。

其主要思想是间隔的每个开始都标记为+1,并且间隔的每个末端都标记为-1。如果这些标记的运行总数大于零,那么我们处于必须打包的连续间隔内。

阅读文章并按照步骤逐步运行查询,cte-by-cte并检查中间结果以了解其工作原理。

的样本数据

DECLARE @NRFC TABLE(
[work_status] [int] NOT NULL, 
[paid] [varchar](2) NOT NULL, 
[from_date] [date] NOT NULL, 
[to_date] [date] NOT NULL); 

INSERT INTO @NRFC([work_status], [paid], [from_date], [to_date]) 
SELECT 101, N'3', '20100101 00:00:00.000', '20100116 00:00:00.000' UNION ALL 
SELECT 101, N'3', '20100116 00:00:00.000', '20150128 00:00:00.000' UNION ALL 
SELECT 101, N'5', '20150129 00:00:00.000', '20150331 00:00:00.000' UNION ALL 
SELECT 101, N'4', '20150401 00:00:00.000', '20150512 00:00:00.000' UNION ALL 
SELECT 101, N'3', '20160101 00:00:00.000', '20170315 00:00:00.000' UNION ALL 
SELECT 101, N'4', '20170316 00:00:00.000', '20170317 00:00:00.000' UNION ALL 
SELECT 101, N'3', '20090101 00:00:00.000', '20090110 00:00:00.000' UNION ALL 
SELECT 101, N'3', '20090111 00:00:00.000', '20090115 00:00:00.000' UNION ALL 
SELECT 101, N'3', '20090115 00:00:00.000', '20090331 00:00:00.000' UNION ALL 
SELECT 101, N'3', '20090401 00:00:00.000', '20091212 00:00:00.000' 
; 

查询

查询呈开放式的间隔,这就是为什么我添加了1天的to_date和减去回在最后。

WITH 
C1 
AS 
(
    -- let e = end ordinals, let s = start ordinals 
    SELECT 
     work_status 
     ,paid 
     ,from_date AS ts -- "from" is inclusive 
     ,+1 AS type 
     ,NULL AS e 
     ,ROW_NUMBER() OVER (PARTITION BY work_status, paid ORDER BY from_date) AS s 
    FROM @NRFC 

    UNION ALL 

    SELECT 
     work_status 
     ,paid 
     ,DATEADD(day, 1, to_date) AS ts -- "to" is exclusive 
     ,-1 AS type 
     ,ROW_NUMBER() OVER (PARTITION BY work_status, paid ORDER BY to_date) AS e 
     ,NULL AS s 
    FROM @NRFC 
) 
,C2 
AS 
(
    -- let se = start or end ordinal, namely, 
    -- how many events (start or end) happened so far 
    SELECT 
     C1.* 
     ,ROW_NUMBER() OVER(PARTITION BY work_status, paid ORDER BY ts, type DESC) AS se 
    FROM C1 
) 
,C3 
AS 
-- For start events, the expression s - (se - s) - 1 represents how many sessions 
-- were active just before the current (hence - 1) 
-- 
-- For end events, the expression (se - e) - e represents how many sessions 
-- are active right after this one 
-- 
-- The above two expressions are 0 exactly when a group of packed intervals 
-- either starts or ends, respectively 
-- 
-- After filtering only events when a group of packed intervals either starts or ends, 
-- group each pair of adjacent start/end events 
(
    SELECT 
     work_status 
     ,paid 
     ,ts 
     ,(ROW_NUMBER() OVER (PARTITION BY work_status, paid ORDER BY ts) - 1) 
      /2 + 1 AS grpnum 
    FROM C2 
    WHERE COALESCE(s - (se - s) - 1, (se - e) - e) = 0 
) 
SELECT 
    work_status 
    ,paid 
    ,MIN(ts) AS starttime 
    ,DATEADD(day, -1, MAX(ts)) AS endtime 
FROM C3 
GROUP BY 
    work_status 
    ,paid 
    ,grpnum 
ORDER BY 
    work_status 
    ,paid 
    ,starttime 
; 

结果

+-------------+------+------------+------------+ 
| work_status | paid | starttime | endtime | 
+-------------+------+------------+------------+ 
|   101 | 3 | 2009-01-01 | 2009-12-12 | 
|   101 | 3 | 2010-01-01 | 2015-01-28 | 
|   101 | 3 | 2016-01-01 | 2017-03-15 | 
|   101 | 4 | 2015-04-01 | 2015-05-12 | 
|   101 | 4 | 2017-03-16 | 2017-03-17 | 
|   101 | 5 | 2015-01-29 | 2015-03-31 | 
+-------------+------+------------+------------+ 
+0

该死的,我正在努力的是越来越近,但我重塑了Itzik Ben-Gan。感谢您更加细心。 – SQLMason

0

使用递归CTE就可以达到所需的输出。我只考虑付费= 3来编写代码。请检查。

;with CTE as 
(
select a.work_status,a.paid,a.from_date,ISNULL(B.to_date,A.to_date)to_date 
from NFRC A left outer join NFRC B on (A.to_date=B.from_date OR DATEDIFF(D,A.to_date,B.from_date)=1) and A.paid=B.paid 
where a.paid=3 
union all 
select a.work_status,a.paid,a.from_date,b.to_date 
from CTE A inner join NFRC B on (A.to_date=B.from_date OR DATEDIFF(D,A.to_date,B.from_date)=1) and A.paid=B.paid 
where a.paid=3 
) 
select work_status,paid,min(from_date)from_date,to_date 
from (select work_status,paid,from_date,max(to_date) to_date 
     from CTE 
     group by from_date,work_status,paid) G 
group by to_date,work_status,paid 
0

这是一个典型的组和窗口的例子。

首先,你需要设置复位点,在这种情况下,复位点时,无论是paidto_date和明年from_date之间以前paid或当DIFF天不同的是,超过1天以上。

select work_status, paid, from_date, to_date, 
     coalesce(lag(paid) over (partition by work_status order by work_status, paid, from_date), paid) last_paid, 
     coalesce(datediff(day, lag(to_date) OVER (partition by work_status, paid ORDER BY work_status, paid, from_date), from_date),0) dif_days, 
     case when coalesce(lag(paid) over (partition by work_status order by work_status, paid, from_date), paid) <> paid 
       or 
       coalesce(datediff(day, lag(to_date) OVER (partition by work_status, paid ORDER BY work_status, paid, from_date), from_date),0) > 1 
      then 1 end is_reset 
from @NRFC; 
 
work_status | paid | from_date | to_date | last_paid | dif_days | is_reset 
----------: | :--- | :----------| :--------- | :-------- | -------: | -------: 
     101 | 3 | 01/01/2009 | 10/01/2009 | 3   |  0 |  null 
     101 | 3 | 11/01/2009 | 15/01/2009 | 3   |  1 |  null 
     101 | 3 | 15/01/2009 | 31/03/2009 | 3   |  0 |  null 
     101 | 3 | 01/04/2009 | 12/12/2009 | 3   |  1 |  null 
     101 | 3 | 01/01/2010 | 16/01/2010 | 3   |  20 |  1 
     101 | 3 | 16/01/2010 | 28/01/2015 | 3   |  0 |  null 
     101 | 3 | 01/01/2016 | 15/03/2017 | 3   |  338 |  1 
     101 | 4 | 01/04/2015 | 12/05/2015 | 3   |  0 |  1 
     101 | 4 | 16/03/2017 | 17/03/2017 | 4   |  674 |  1 
     101 | 5 | 29/01/2015 | 31/03/2015 | 4   |  0 |  1

后,我们知道复位点,那么我们就可以产生群体。

select work_status, paid, from_date, to_date, is_reset, 
     count(is_reset) OVER (ORDER BY work_status, paid, from_date) AS grp 
from c1 
 
    work_status | paid | from_date   | to_date    | is_reset | grp 
    ----------: | :--- | :------------------ | :------------------ | -------: | --: 
      101 | 3 | 01/01/2009 00:00:00 | 10/01/2009 00:00:00 |  null | 0 
      101 | 3 | 11/01/2009 00:00:00 | 15/01/2009 00:00:00 |  null | 0 
      101 | 3 | 15/01/2009 00:00:00 | 31/03/2009 00:00:00 |  null | 0 
      101 | 3 | 01/04/2009 00:00:00 | 12/12/2009 00:00:00 |  null | 0 
      101 | 3 | 01/01/2010 00:00:00 | 16/01/2010 00:00:00 |  1 | 1 
      101 | 3 | 16/01/2010 00:00:00 | 28/01/2015 00:00:00 |  null | 1 
      101 | 3 | 01/01/2016 00:00:00 | 15/03/2017 00:00:00 |  1 | 2 
      101 | 4 | 01/04/2015 00:00:00 | 12/05/2015 00:00:00 |  1 | 3 
      101 | 4 | 16/03/2017 00:00:00 | 17/03/2017 00:00:00 |  1 | 4 
      101 | 5 | 29/01/2015 00:00:00 | 31/03/2015 00:00:00 |  1 | 5

最后使用使用新的组MIN(),MAX():

 
work_status | paid | from_date   | to_date    
----------: | :--- | :------------------ | :------------------ 
     101 | 3 | 01/01/2009 00:00:00 | 12/12/2009 00:00:00 
     101 | 3 | 01/01/2010 00:00:00 | 28/01/2015 00:00:00 
     101 | 3 | 01/01/2016 00:00:00 | 15/03/2017 00:00:00 
     101 | 4 | 01/04/2015 00:00:00 | 12/05/2015 00:00:00 
     101 | 4 | 16/03/2017 00:00:00 | 17/03/2017 00:00:00 
     101 | 5 | 29/01/2015 00:00:00 | 31/03/2015 00:00:00 

dbfiddle here

1

另一个查询:

CREATE TABLE #tb([work_status] [int] NOT NULL,[paid] [varchar](2) NOT NULL,[from_date] [date] NOT NULL,[to_date] [date] NOT NULL); 

INSERT INTO #tb([work_status], [paid], [from_date], [to_date]) 
SELECT 101, N'3', '20100101 00:00:00.000', '20100116 00:00:00.000' UNION ALL 
SELECT 101, N'3', '20100116 00:00:00.000', '20150128 00:00:00.000' UNION ALL 
SELECT 101, N'5', '20150129 00:00:00.000', '20150331 00:00:00.000' UNION ALL 
SELECT 101, N'4', '20150401 00:00:00.000', '20150512 00:00:00.000' UNION ALL 
SELECT 101, N'3', '20160101 00:00:00.000', '20170315 00:00:00.000' UNION ALL 
SELECT 101, N'4', '20170316 00:00:00.000', '20170317 00:00:00.000' UNION ALL 
SELECT 101, N'3', '20090101 00:00:00.000', '20090110 00:00:00.000' UNION ALL 
SELECT 101, N'3', '20090111 00:00:00.000', '20090115 00:00:00.000' UNION ALL 
SELECT 101, N'3', '20090115 00:00:00.000', '20090331 00:00:00.000' UNION ALL 
SELECT 101, N'3', '20090401 00:00:00.000', '20091212 00:00:00.000' 
SELECT tt.work_status,tt.paid,MIN(dt) AS from_date,MAX(dt) AS to_date 
FROM (
    SELECT DISTINCT t.work_status,t.paid,DATEADD(d,sv.number,t.from_date) AS dt 
        ,DENSE_RANK()OVER(PARTITION BY t.work_status,t.paid ORDER BY DATEADD(d,sv.number,t.from_date)) AS rn 
        ,MIN(DATEADD(d,sv.number,t.from_date))OVER(PARTITION BY t.work_status,t.paid) AS from_date 
    FROM #tb AS t 
    INNER JOIN master.dbo.spt_values AS sv on sv.type='P' AND sv.number BETWEEN 0 AND DATEDIFF(d,t.from_date,t.to_date) 
) AS tt 
GROUP BY tt.work_status,tt.paid,rn-DATEDIFF(d,tt.from_date,dt) 
ORDER BY tt.work_status,tt.paid,MIN(dt) 
 
work_status paid from_date to_date 
----------- ---- ---------- ---------- 
101   3 2009-01-01 2009-12-12 
101   3 2010-01-01 2015-01-28 
101   3 2016-01-01 2017-03-15 
101   4 2015-04-01 2015-05-12 
101   4 2017-03-16 2017-03-17 
101   5 2015-01-29 2015-03-31