解决此问题的一种可能方法是使用analytical functions。举个例子:
#standardSQL
WITH data AS(
select '1' as user, '1' as visitid, '20170520' as date, ARRAY<STRUCT<hitNumber INT64, eventInfo STRUCT<eventCategory STRING> >> [STRUCT(1 as hitNumber, STRUCT('event1' as eventCategory) as eventInfo)] hits UNION ALL
select '1' as user, '2' as visitid, '20170521' as date, ARRAY<STRUCT<hitNumber INT64, eventInfo STRUCT<eventCategory STRING> >> [STRUCT(1 as hitNumber, STRUCT('' as eventCategory) as eventInfo)] hits UNION ALL
select '1' as user, '3' as visitid, '20170522' as date, ARRAY<STRUCT<hitNumber INT64, eventInfo STRUCT<eventCategory STRING> >> [STRUCT(1 as hitNumber, STRUCT('event2' as eventCategory) as eventInfo)] hits UNION ALL
select '1' as user, '4' as visitid, '20170523' as date, ARRAY<STRUCT<hitNumber INT64, eventInfo STRUCT<eventCategory STRING> >> [STRUCT(1 as hitNumber, STRUCT('' as eventCategory) as eventInfo)] hits UNION ALL
select '2' as user, '1' as visitid, '20170520' as date, ARRAY<STRUCT<hitNumber INT64, eventInfo STRUCT<eventCategory STRING> >> [STRUCT(1 as hitNumber, STRUCT('event1' as eventCategory) as eventInfo)] hits UNION ALL
select '2' as user, '2' as visitid, '20170521' as date, ARRAY<STRUCT<hitNumber INT64, eventInfo STRUCT<eventCategory STRING> >> [STRUCT(1 as hitNumber, STRUCT('event2' as eventCategory) as eventInfo)] hits UNION ALL
select '2' as user, '3' as visitid, '20170522' as date, ARRAY<STRUCT<hitNumber INT64, eventInfo STRUCT<eventCategory STRING> >> [STRUCT(1 as hitNumber, STRUCT('' as eventCategory) as eventInfo)] hits union all
select '3' as user, '1' as visitid, '20170520' as date, ARRAY<STRUCT<hitNumber INT64, eventInfo STRUCT<eventCategory STRING> >> [STRUCT(1 as hitNumber, STRUCT('event1' as eventCategory) as eventInfo)] hits UNION ALL
select '3' as user, '2' as visitid, '20170521' as date, ARRAY<STRUCT<hitNumber INT64, eventInfo STRUCT<eventCategory STRING> >> [STRUCT(1 as hitNumber, STRUCT('' as eventCategory) as eventInfo)] hits UNION ALL
select '3' as user, '3' as visitid, '20170522' as date, ARRAY<STRUCT<hitNumber INT64, eventInfo STRUCT<eventCategory STRING> >> [STRUCT(1 as hitNumber, STRUCT('' as eventCategory) as eventInfo)] hits
)
SELECT
user,
visitid,
date
FROM(
SELECT
user,
visitid,
date,
MIN(CASE WHEN hits.eventInfo.eventCategory = 'event1' THEN date END) OVER(PARTITION BY user) min_date,
MAX(CASE WHEN hits.eventInfo.eventCategory = 'event2' THEN date END) OVER(PARTITION BY user) max_date
FROM data,
UNNEST(hits) hits
)
WHERE date BETWEEN min_date AND max_date
哪里data
是您ga_sessions数据(我命名为 'fullvisitorid' 为 '用户')的模拟。
这使得给定用户可以有日期1和日期2个不同事件的假设(所以它采取了MIN
和MAX
分别),并假定您保存在eventCategory
场的情况下(假设您的活动“下载”和“购买”在会话级别中定义,我建议您使用customDimensions字段而不是hits.eventInfo.eventCategory
一个)。
除了分析功能,您还可以用标准的SQL版本ARRAYs and STRUCTs工作:
SELECT
user,
ARRAY(SELECT AS STRUCT visitid, date FROM UNNEST(user_data) WHERE date BETWEEN min_date AND max_date) user_data
FROM(
SELECT
user,
ARRAY_AGG((SELECT AS STRUCT visitid, date)) user_data,
MIN(CASE WHEN EXISTS(SELECT 1 FROM UNNEST(hits) hits WHERE hits.eventInfo.eventCategory = 'event1') then date END) min_date,
MAX(CASE WHEN EXISTS(SELECT 1 FROM UNNEST(hits) hits WHERE hits.eventInfo.eventCategory = 'event2') THEN date END) max_date
FROM data
GROUP BY user
)
WHERE ARRAY_LENGTH(ARRAY(SELECT AS STRUCT visitid, date FROM UNNEST(user_data) WHERE date BETWEEN min_date AND max_date)) > 0
如果我所做的假设是不与您的数据一致,你可以采用这些技术来查询你想要什么。您也可以将模拟数据用于测试目的(以及使其更适合您的数据集)。
Thanks @Will This help! :) –