2016-12-15 116 views
0

我有一个查询返回以下内容。在Redshift上填写日期范围

select 
    trunc(t.created), 
    sum(count(*)) over (order by trunc(t.created) rows unbounded preceding) as cumulative_sales 
from event e 
LEFT JOIN person_tickets t on e.id = t.event_id 
where event_id = 9999 
group by trunc(t.created) 

Date  cumulative_bookings 
2016-02-12 1 
2016-02-18 3 
2016-02-19 5 
2016-02-20 352 
2016-02-21 352 

我想填写日期系列,以便包含所有日期。

2016-02-12 1 
2016-02-13 1 
2016-02-14 1 
2016-02-15 1 
2016-02-16 1 
2016-02-17 1 
2016-02-18 3 
2016-02-19 5 
2016-02-20 352 
2016-02-21 352 

我一直试图在其中产生一个很好的日期顺序我的代码加入,但我挣扎,看到哪里或如何优雅地做加盟。

select (
    getdate()::date - row_number() over (order by true) 
)::date as n 
from event limit 500 

做了一点点的进步与以下但不完全有

CASE WHEN cumulative_bookings is null then LAG(cumulative_bookings IGNORE NULLS) OVER (ORDER BY n) 
ELSE cumulative_bookings END as filled_cumulative_bookings 

回答

0

您需要红移使用数台本。 (In "normal" Postgres you could use generate_series() as per this answer

基本上,您希望创建一个列表,列出最小和最大日期之间的所有日期,然后将该列表与您的稀疏日期数据进行加入。

--Create numbers table - 128 rows (0-127) 
DROP TABLE IF EXISTS numbers; 
CREATE TABLE numbers (n INTEGER NOT NULL) DISTSTYLE ALL; 
INSERT INTO numbers VALUES (0); 
INSERT INTO numbers SELECT n + 1 FROM numbers; 
INSERT INTO numbers SELECT n + 2 FROM numbers; 
INSERT INTO numbers SELECT n + 4 FROM numbers; 
INSERT INTO numbers SELECT n + 8 FROM numbers; 
INSERT INTO numbers SELECT n + 16 FROM numbers; 
INSERT INTO numbers SELECT n + 32 FROM numbers; 
INSERT INTO numbers SELECT n + 64 FROM numbers; 
-- SELECT n FROM numbers; 

--Create sample table with sparse dates 
DROP TABLE IF EXISTS two_dates; 
CREATE TABLE two_dates (dtm DATETIME NOT NULL, value INT NOT NULL) DISTSTYLE ALL; 
INSERT INTO two_dates VALUES ('2016-12-01',1),('2016-12-07',1); 
-- SELECT dtm FROM two_dates; 

--First CTE finds Min, Max, and Days in range - 1 row 
WITH cte_range 
AS (SELECT MIN(dtm) min_dtm 
      ,MAX(dtm) max_dtm 
      ,DATEDIFF(day,MIN(dtm),MAX(dtm)) dtm_range 
    FROM two_dates) 
--Second CTE creates list of dates between Min and Max dates 
, cte_dtm_list 
AS (SELECT DATEADD(day,numbers.n,cte_range.min_dtm) dtm 
    FROM  cte_range 
    CROSS JOIN numbers 
    WHERE numbers.n <= cte_range.dtm_range) 
--Finally we left join to the sparse date data 
SELECT cte_dtm_list.dtm 
     ,COALESCE(two_dates.value,0) value 
     ,SUM(COALESCE(two_dates.value,0)) OVER (ORDER BY cte_dtm_list.dtm ROWS UNBOUNDED PRECEDING) as cume 
FROM   cte_dtm_list 
LEFT JOIN two_dates 
    ON cte_dtm_list.dtm = two_dates.dtm 
ORDER BY cte_dtm_list.dtm 
; 
--   dtm   | value | cume 
-- ---------------------+-------+------ 
-- 2016-12-01 00:00:00 |  1 | 1 
-- 2016-12-02 00:00:00 |  0 | 1 
-- 2016-12-03 00:00:00 |  0 | 1 
-- 2016-12-04 00:00:00 |  0 | 1 
-- 2016-12-05 00:00:00 |  0 | 1 
-- 2016-12-06 00:00:00 |  0 | 1 
-- 2016-12-07 00:00:00 |  1 | 2