2014-12-05 61 views
1

我使用下面的查询BigQuery中的连接表,并获得visitnumber和源谁订购了产品的人:给BigQuery的子查询的别名,以缩短查询

QUERY:

SELECT visitor, transactionId as OrderNumber, source, referralPath, medium, keyword, campaign, visitNumber, visitStartTime, conversionTime, visitLastClikcTime 
FROM (
SELECT conversions.fullVisitorId as visitor, conversions.transactionId as transactionId, 
clickChanges.visitNumber as visitNumber, conversions.visitNumber as transactionVisitNumber, 
clickChanges.source as source, clickChanges.referralPath as referralPath, 
clickChanges.medium as medium, clickChanges.keyword as keyword, clickChanges.campaign as campaign, 
conversionTime, min(clikcTime) as visitStartTime, max(clikcTime) as visitLastClikcTime 
FROM (SELECT fullVisitorId, hits.transaction.transactionId as transactionId, visitNumber as visitNumber, 
CONCAT(CONCAT(SUBSTR(date, 7, 2), '.', SUBSTR(date, 5, 2), '.', SUBSTR(date, 0, 4)), ' ', CONCAT(STRING(hits.hour), ':', STRING(hits.minute))) as conversionTime 
FROM 
(TABLE_DATE_RANGE([54247008.ga_sessions_], 
       TIMESTAMP('2014-11-16'), 
       TIMESTAMP('2014-11-21'))) 
WHERE hits.transaction.transactionId is not null 
ORDER BY fullVisitorId, visitNumber 
) conversions 
JOIN EACH(
SELECT fullVisitorId, trafficSource.referralPath as referralPath, trafficSource.medium as medium, trafficSource.source as source, visitNumber,  
CONCAT(CONCAT(SUBSTR(date, 7, 2), '.', SUBSTR(date, 5, 2), '.', SUBSTR(date, 0, 4)), ' ', CONCAT(STRING(hits.hour), ':', STRING(hits.minute))) as clikcTime, 
trafficSource.campaign as campaign, trafficSource.keyword as keyword 
FROM (TABLE_DATE_RANGE([54247008.ga_sessions_], 
       TIMESTAMP('2014-11-16'), 
       TIMESTAMP('2014-11-21'))) 
GROUP BY fullVisitorId, referralPath, medium, source, visitNumber, clikcTime, campaign, keyword 
ORDER BY fullVisitorId, visitNumber 
) clickChanges 
ON 
conversions.fullVisitorId = clickChanges.fullVisitorId 
GROUP BY visitor, transactionId, visitNumber, transactionVisitNumber, source, referralPath, medium, keyword, campaign, conversionTime 
HAVING visitNumber <= transactionVisitNumber 
ORDER BY transactionId, visitNumber 
) 

现在我想获得每个转换ID的MAX(访问号码)的聚合列,并向我显示与MAX相关的信息源(访问号码)。要做到这一点,我在这里研究并建立了整个查询,你可以看到下面,这是工作的另一个JOIN,但它也是非常长的和重复的:

相同的查询(略有修改):

SELECT a.visitor, a.transactionId as OrderNumber, a.source, a.referralPath, a.medium, a.keyword, a.campaign, a.visitNumber as max_visitnumber, a.visitStartTime, a.conversionTime, a.visitLastClikcTime 
FROM (
SELECT conversions.fullVisitorId as visitor, conversions.transactionId as transactionId, 
clickChanges.visitNumber as visitNumber, conversions.visitNumber as transactionVisitNumber, 
clickChanges.source as source, clickChanges.referralPath as referralPath, 
clickChanges.medium as medium, clickChanges.keyword as keyword, clickChanges.campaign as campaign, 
conversionTime, min(clikcTime) as visitStartTime, max(clikcTime) as visitLastClikcTime 
FROM (SELECT fullVisitorId, hits.transaction.transactionId as transactionId, visitNumber as visitNumber, 
CONCAT(CONCAT(SUBSTR(date, 7, 2), '.', SUBSTR(date, 5, 2), '.', SUBSTR(date, 0, 4)), ' ', CONCAT(STRING(hits.hour), ':', STRING(hits.minute))) as conversionTime 
FROM 
(TABLE_DATE_RANGE([54247008.ga_sessions_], 
       TIMESTAMP('2014-11-16'), 
       TIMESTAMP('2014-11-21'))) 
WHERE hits.transaction.transactionId is not null 
ORDER BY fullVisitorId, visitNumber 
) conversions 
JOIN EACH(
SELECT fullVisitorId, trafficSource.referralPath as referralPath, trafficSource.medium as medium, trafficSource.source as source, visitNumber,  
CONCAT(CONCAT(SUBSTR(date, 7, 2), '.', SUBSTR(date, 5, 2), '.', SUBSTR(date, 0, 4)), ' ', CONCAT(STRING(hits.hour), ':', STRING(hits.minute))) as clikcTime, 
trafficSource.campaign as campaign, trafficSource.keyword as keyword 
FROM (TABLE_DATE_RANGE([54247008.ga_sessions_], 
       TIMESTAMP('2014-11-16'), 
       TIMESTAMP('2014-11-21'))) 
GROUP BY fullVisitorId, referralPath, medium, source, visitNumber, clikcTime, campaign, keyword 
ORDER BY fullVisitorId, visitNumber 
) clickChanges 
ON 
conversions.fullVisitorId = clickChanges.fullVisitorId 
GROUP BY visitor, transactionId, visitNumber, transactionVisitNumber, source, referralPath, medium, keyword, campaign, conversionTime 
HAVING visitNumber <= transactionVisitNumber 
ORDER BY transactionId, visitNumber 
) a 
JOIN EACH 
(
SELECT MAX(visitNumber) as max_visitnumber, transactionId 
FROM (
SELECT conversions.fullVisitorId as visitor, conversions.transactionId as transactionId, 
clickChanges.visitNumber as visitNumber, conversions.visitNumber as transactionVisitNumber, 
clickChanges.source as source, clickChanges.referralPath as referralPath, 
clickChanges.medium as medium, clickChanges.keyword as keyword, clickChanges.campaign as campaign, 
conversionTime, min(clikcTime) as visitStartTime, max(clikcTime) as visitLastClikcTime 
FROM (SELECT fullVisitorId, hits.transaction.transactionId as transactionId, visitNumber as visitNumber, 
CONCAT(CONCAT(SUBSTR(date, 7, 2), '.', SUBSTR(date, 5, 2), '.', SUBSTR(date, 0, 4)), ' ', CONCAT(STRING(hits.hour), ':', STRING(hits.minute))) as conversionTime 
FROM 
(TABLE_DATE_RANGE([54247008.ga_sessions_], 
       TIMESTAMP('2014-11-16'), 
       TIMESTAMP('2014-11-21'))) 
WHERE hits.transaction.transactionId is not null 
ORDER BY fullVisitorId, visitNumber 
) conversions 
JOIN EACH(
SELECT fullVisitorId, trafficSource.referralPath as referralPath, trafficSource.medium as medium, trafficSource.source as source, visitNumber,  
CONCAT(CONCAT(SUBSTR(date, 7, 2), '.', SUBSTR(date, 5, 2), '.', SUBSTR(date, 0, 4)), ' ', CONCAT(STRING(hits.hour), ':', STRING(hits.minute))) as clikcTime, 
trafficSource.campaign as campaign, trafficSource.keyword as keyword 
FROM (TABLE_DATE_RANGE([54247008.ga_sessions_], 
       TIMESTAMP('2014-11-16'), 
       TIMESTAMP('2014-11-21'))) 
GROUP BY fullVisitorId, referralPath, medium, source, visitNumber, clikcTime, campaign, keyword 
ORDER BY fullVisitorId, visitNumber 
) clickChanges 
ON 
conversions.fullVisitorId = clickChanges.fullVisitorId 
GROUP BY visitor, transactionId, visitNumber, transactionVisitNumber, source, referralPath, medium, keyword, campaign, conversionTime 
HAVING visitNumber <= transactionVisitNumber 
ORDER BY transactionId, visitNumber 
) 
GROUP EACH BY transactionId 
) b 
ON 
b.max_visitnumber = a.visitnumber and 
b.transactionId = a.transactionId 
ORDER BY max_visitnumber DESC 
  1. 所以我已经把几乎相同的查询两次,只是改变它的一小部分。有没有更好的方法来做到这一点,并缩短查询?

  2. 我在StackOverflow上发现有人建议使用WITH AS的常规表达式,但它在BigQuery中似乎不起作用。任何其他想法使这个查询更有效率?

+1

使用视图...... – Pentium10 2014-12-05 13:32:11

+0

你能详细解释一下吗?我是一个完全新手,只是调整了一位前同事写的查询 – Mustika 2014-12-05 13:39:47

+0

以下是对BigQuery文档的引用:https://cloud.google.com/bigquery/querying-data#views – 2014-12-05 18:58:58

回答

3

正如评论所说,视图就是你想要的:一个查询的别名,你可以在其他查​​询中重用。

https://cloud.google.com/bigquery/querying-data#views

注意看法[当前]不兼容TABLE_DATE_RANGE,所以你需要重写你的查询/视图明确地叫出表。