select 'pageview' as "event_type",
-- view.tag_url.query['aqet']
event_date,
date_format(
from_unixtime(view.event_timestamp),
'%Y-%m-%d %H:%i:%s GMT'
) human_readable_timestamp,
view.event_timestamp event_timestamp_gmt,
view.event_id,
entity_id,
entity_domain,
entity_type,
SPLIT_PART(
filter(view.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_domain,
'_z_',
4
) as ip_type,
filter(view.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_id as ip_address,
COALESCE(
element_at(view.tag_url.query, 'suu'),
CASE
WHEN cardinality(
FILTER(
view.other_entity_keys,
x->x.entity_type = 'statid'
)
) > 0 then case
when FILTER(
view.other_entity_keys,
x->x.entity_type = 'statid'
) [ 1 ].entity_id = 'NA' THEN NULL ELSE FILTER(
view.other_entity_keys,
x->x.entity_type = 'statid'
) [ 1 ].entity_id
end else NULL
END,
to_base64url(
sha256(
to_utf8(
concat(
filter(view.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_id,
event_group.user_agent,
element_at(view.others, 'accept_encoding_header'),
element_at(view.others, 'accept_language_header')
)
)
)
)
) analytic_id,
entity_id as cookie,
element_at(view.tag_url.query, 'pnn') as partner_name,
COALESCE(
element_at(view.tag_url.query, 'puu'),
CASE
WHEN cardinality(
FILTER(view.other_entity_keys, x->x.entity_domain = '') -- CHANGE TO puu_entity_domain
) > 0 then CASE
WHEN FILTER(
view.other_entity_keys,
x->x.entity_domain = ''
) [ 1 ].entity_id IN ('NA', '0', 'undefined') THEN NULL ELSE FILTER(
view.other_entity_keys,
x->x.entity_domain = '' -- CHANGE TO puu_entity_domain
) [ 1 ].entity_id
END else NULL
END
) partner_user_id,
element_at(view.tag_url.query, 'dmn') as domain,
element_at(view.tag_url.query, 'pn') as path,
COALESCE(
element_at(view.tag_url.query, 'qs'),
element_at(view.tag_url.query, 'pqs')
) page_qs,
COALESCE(
element_at(view.tag_url.query, 'rd'),
element_at(view.tag_url.query, 'rdn')
) referrer_domain,
element_at(view.tag_url.query, 'rpn') referrer_path,
CONCAT(
COALESCE(
element_at(view.tag_url.query, 'rd'),
element_at(view.tag_url.query, 'rdn')
),
element_at(view.tag_url.query, 'rpn')
) referrer_url,
COALESCE(
element_at(view.tag_url.query, 'imp'),
element_at(view.tag_url.query, 'aut__query_imp')
) impression_id,
COALESCE(
view.agency_id,
element_at(view.tag_url.query, 'ag')
) agency_id,
COALESCE(
view.advertiser_id,
element_at(view.tag_url.query, 'adv')
) advertiser_id,
COALESCE(
element_at(view.tag_url.query, 'ca'),
try(
regexp_extract(
split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
'utm_campaign=([^&]*)',
1
)
),
try(
regexp_extract(
element_at(view.tag_url.query, 'qs'),
'utm_campaign=([^&]*)',
1
)
)
) campaign_id,
COALESCE(
element_at(view.tag_url.query, 'ch'),
try(
regexp_extract(
split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
'utm_medium=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(view.tag_url.query, 'qs'), '?') [ 2 ],
'utm_medium=([^&]*)',
1
)
),
element_at(view.tag_url.query, 'utm_medium'),
try(
regexp_extract(
element_at(view.tag_url.query, 'qs'),
'utm_medium=([^&]*)',
1
)
)
) channel,
element_at(view.tag_url.query, 'cr') as creative_id,
element_at(view.tag_url.query, 'pl') as placement_id,
COALESCE(
element_at(view.tag_url.query, 'si'),
element_at(view.tag_url.query, 'sid'),
try(
regexp_extract(
split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
'sid=([^&]*)',
1
)
)
) site_id,
element_at(view.tag_url.query, 'pubid') as publisher_id,
element_at(view.tag_url.query, 'ivp') as inventory_partner,
COALESCE(
element_at(view.tag_url.query, 'mp'),
try(
regexp_extract(
split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
'gclsrc=([^&]*)',
1
)
)
) media_partner,
view.source,
COALESCE(
element_at(view.tag_url.query, 'srch'),
try(
regexp_extract(
split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
'utm_term=([^&]*)',
1
)
),
try(
regexp_extract(
split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
'searchTerm=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(view.tag_url.query, 'qs'), '?') [ 2 ],
'utm_term=([^&]*)',
1
)
)
) search_terms,
element_at(view.tag_url.query, 'ru') as click_redirect_url,
COALESCE(
element_at(view.tag_url.query, 'clk'),
try(
regexp_extract(
split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
'gclid=([^&]*)',
1
)
),
try(
regexp_extract(
split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
'msclkid=([^&]*)',
1
)
),
try(
regexp_extract(
split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
'fbclid=([^&]*)',
1
)
),
try(
regexp_extract(
split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
'dclid=([^&]*)',
1
)
),
try(
regexp_extract(
split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
'wbraid=([^&]*)',
1
)
),
try(
regexp_extract(
split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
'gbraid=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(view.tag_url.query, 'qs'), '?') [ 2 ],
'gclid=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(view.tag_url.query, 'qs'), '?') [ 2 ],
'msclkid=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(view.tag_url.query, 'qs'), '?') [ 2 ],
'fbclid=([^&]*)',
1
)
)
) click_id,
element_at(view.tag_url.query, 'v0') user_defined_0,
element_at(view.tag_url.query, 'v1') as user_defined_1,
element_at(view.tag_url.query, 'v2') as user_defined_2,
element_at(view.tag_url.query, 'v3') as user_defined_3,
element_at(view.tag_url.query, 'v4') as user_defined_4,
element_at(view.tag_url.query, 'v5') as user_defined_5,
element_at(view.tag_url.query, 'v6') as user_defined_6,
element_at(view.tag_url.query, 'v7') as user_defined_7,
element_at(view.tag_url.query, 'v8') as user_defined_8,
element_at(view.tag_url.query, 'v9') as user_defined_9,
event_group.user_agent,
event_group.os,
event_group.os_version,
event_group.browser,
event_group.browser_version,
element_at(view.others, 'accept_language_header') language_header,
element_at(view.others, 'accept_encoding_header') encoding_header,
view.http_status_code status_code,
event_group.device_type,
' ' device_type,
' ' device_make,
event_group.network,
event_group.network_type,
event_group.throughput,
event_group.zip,
event_group.city,
event_group.region_code,
event_group.country_code,
event_group.dma,
event_group.msa,
event_group.latitude,
event_group.longitude,
event_group.area_code,
event_group.fips,
event_group.timezone,
(
CASE
WHEN (CARDINALITY(view.tag_url.query) = 0) THEN NULL ELSE view.tag_url.query
END
) AS tag_qs,
(
CASE
WHEN (CARDINALITY(filter(view.other_entity_keys, o->o.entity_type = 'ck')) = 0) THEN NULL ELSE filter(view.other_entity_keys, o->o.entity_type = 'ck')
END
) AS cookies,
(
CASE
WHEN (CARDINALITY(view.others) = 0) THEN NULL ELSE view.others
END
) AS others,
event_date as eventdate
from aqfer.collation_daily_v5_1
cross join unnest(aqfer.collation_daily_v5_1.event_groups) as e(event_group)
cross join unnest(aqfer.collation_daily_v5_1.views) as v(view)
where event_date = '${prev_date}'
and event_group.event_id = view.event_group_id
and has_views = True
union
select 'conversion' as "event_type",
-- conversion.tag_url.query['aqet']
event_date,
date_format(
from_unixtime(conversion.event_timestamp),
'%Y-%m-%d %H:%i:%s GMT'
) human_readable_timestamp,
conversion.event_timestamp event_timestamp_gmt,
conversion.event_id,
entity_id,
entity_domain,
entity_type,
SPLIT_PART(
filter(conversion.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_domain,
'_z_',
4
) as ip_type,
filter(conversion.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_id as ip_address,
COALESCE(
element_at(conversion.tag_url.query, 'suu'),
CASE
WHEN cardinality(
FILTER(
conversion.other_entity_keys,
x->x.entity_type = 'statid'
)
) > 0 then case
when FILTER(
conversion.other_entity_keys,
x->x.entity_type = 'statid'
) [ 1 ].entity_id = 'NA' THEN NULL ELSE FILTER(
conversion.other_entity_keys,
x->x.entity_type = 'statid'
) [ 1 ].entity_id
end else NULL
END,
to_base64url(
sha256(
to_utf8(
concat(
filter(conversion.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_id,
event_group.user_agent,
element_at(conversion.others, 'accept_encoding_header'),
element_at(conversion.others, 'accept_language_header')
)
)
)
)
) analytic_id,
entity_id as cookie,
element_at(conversion.tag_url.query, 'pnn') as partner_name,
COALESCE(
element_at(conversion.tag_url.query, 'puu'),
CASE
WHEN cardinality(
FILTER(conversion.other_entity_keys, x->x.entity_domain = '') -- CHANGE TO puu_entity_domain
) > 0 then CASE
WHEN FILTER(
conversion.other_entity_keys,
x->x.entity_domain = ''
) [ 1 ].entity_id IN ('NA', '0', 'undefined') THEN NULL ELSE FILTER(
conversion.other_entity_keys,
x->x.entity_domain = '' -- CHANGE TO puu_entity_domain
) [ 1 ].entity_id
END else NULL
END
) partner_user_id,
element_at(conversion.tag_url.query, 'dmn') as domain,
element_at(conversion.tag_url.query, 'pn') as path,
COALESCE(
element_at(conversion.tag_url.query, 'qs'),
element_at(conversion.tag_url.query, 'pqs')
) page_qs,
COALESCE(
element_at(conversion.tag_url.query, 'rd'),
element_at(conversion.tag_url.query, 'rdn')
) referrer_domain,
element_at(conversion.tag_url.query, 'rpn') referrer_path,
CONCAT(
COALESCE(
element_at(conversion.tag_url.query, 'rd'),
element_at(conversion.tag_url.query, 'rdn')
),
element_at(conversion.tag_url.query, 'rpn')
) referrer_url,
COALESCE(
element_at(conversion.tag_url.query, 'imp'),
element_at(conversion.tag_url.query, 'aut__query_imp')
) impression_id,
COALESCE(
conversion.agency_id,
element_at(conversion.tag_url.query, 'ag')
) agency_id,
COALESCE(
conversion.advertiser_id,
element_at(conversion.tag_url.query, 'adv')
) advertiser_id,
COALESCE(
element_at(conversion.tag_url.query, 'ca'),
try(
regexp_extract(
split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
'utm_campaign=([^&]*)',
1
)
),
try(
regexp_extract(
element_at(conversion.tag_url.query, 'qs'),
'utm_campaign=([^&]*)',
1
)
)
) campaign_id,
COALESCE(
element_at(conversion.tag_url.query, 'ch'),
try(
regexp_extract(
split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
'utm_medium=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(conversion.tag_url.query, 'qs'), '?') [ 2 ],
'utm_medium=([^&]*)',
1
)
),
element_at(conversion.tag_url.query, 'utm_medium'),
try(
regexp_extract(
element_at(conversion.tag_url.query, 'qs'),
'utm_medium=([^&]*)',
1
)
)
) channel,
element_at(conversion.tag_url.query, 'cr') as creative_id,
element_at(conversion.tag_url.query, 'pl') as placement_id,
COALESCE(
element_at(conversion.tag_url.query, 'si'),
element_at(conversion.tag_url.query, 'sid'),
try(
regexp_extract(
split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
'sid=([^&]*)',
1
)
)
) site_id,
element_at(conversion.tag_url.query, 'pubid') as publisher_id,
element_at(conversion.tag_url.query, 'ivp') as inventory_partner,
COALESCE(
element_at(conversion.tag_url.query, 'mp'),
try(
regexp_extract(
split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
'gclsrc=([^&]*)',
1
)
)
) media_partner,
conversion.source,
COALESCE(
element_at(conversion.tag_url.query, 'srch'),
try(
regexp_extract(
split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
'utm_term=([^&]*)',
1
)
),
try(
regexp_extract(
split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
'searchTerm=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(conversion.tag_url.query, 'qs'), '?') [ 2 ],
'utm_term=([^&]*)',
1
)
)
) search_terms,
element_at(conversion.tag_url.query, 'ru') as click_redirect_url,
COALESCE(
element_at(conversion.tag_url.query, 'clk'),
try(
regexp_extract(
split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
'gclid=([^&]*)',
1
)
),
try(
regexp_extract(
split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
'msclkid=([^&]*)',
1
)
),
try(
regexp_extract(
split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
'fbclid=([^&]*)',
1
)
),
try(
regexp_extract(
split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
'dclid=([^&]*)',
1
)
),
try(
regexp_extract(
split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
'wbraid=([^&]*)',
1
)
),
try(
regexp_extract(
split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
'gbraid=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(conversion.tag_url.query, 'qs'), '?') [ 2 ],
'gclid=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(conversion.tag_url.query, 'qs'), '?') [ 2 ],
'msclkid=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(conversion.tag_url.query, 'qs'), '?') [ 2 ],
'fbclid=([^&]*)',
1
)
)
) click_id,
element_at(conversion.tag_url.query, 'v0') user_defined_0,
element_at(conversion.tag_url.query, 'v1') as user_defined_1,
element_at(conversion.tag_url.query, 'v2') as user_defined_2,
element_at(conversion.tag_url.query, 'v3') as user_defined_3,
element_at(conversion.tag_url.query, 'v4') as user_defined_4,
element_at(conversion.tag_url.query, 'v5') as user_defined_5,
element_at(conversion.tag_url.query, 'v6') as user_defined_6,
element_at(conversion.tag_url.query, 'v7') as user_defined_7,
element_at(conversion.tag_url.query, 'v8') as user_defined_8,
element_at(conversion.tag_url.query, 'v9') as user_defined_9,
event_group.user_agent,
event_group.os,
event_group.os_version,
event_group.browser,
event_group.browser_version,
element_at(conversion.others, 'accept_language_header') language_header,
element_at(conversion.others, 'accept_encoding_header') encoding_header,
conversion.http_status_code status_code,
event_group.device_type,
' ' device_type,
' ' device_make,
event_group.network,
event_group.network_type,
event_group.throughput,
event_group.zip,
event_group.city,
event_group.region_code,
event_group.country_code,
event_group.dma,
event_group.msa,
event_group.latitude,
event_group.longitude,
event_group.area_code,
event_group.fips,
event_group.timezone,
(
CASE
WHEN (CARDINALITY(conversion.tag_url.query) = 0) THEN NULL ELSE conversion.tag_url.query
END
) AS tag_qs,
(
CASE
WHEN (CARDINALITY(filter(conversion.other_entity_keys, o->o.entity_type = 'ck')) = 0) THEN NULL ELSE filter(conversion.other_entity_keys, o->o.entity_type = 'ck')
END
) AS cookies,
(
CASE
WHEN (CARDINALITY(conversion.others) = 0) THEN NULL ELSE conversion.others
END
) AS others,
event_date as eventdate
from aqfer.collation_daily_v5_1
cross join unnest(aqfer.collation_daily_v5_1.event_groups) as e(event_group)
cross join unnest(aqfer.collation_daily_v5_1.conversions) as c(conversion)
where event_date = '${prev_date}'
and event_group.event_id = conversion.event_group_id
and has_conversions = True
union
select 'impression' as "event_type",
-- impression.tag_url.query['aqet']
event_date,
date_format(
from_unixtime(impression.event_timestamp),
'%Y-%m-%d %H:%i:%s GMT'
) human_readable_timestamp,
impression.event_timestamp event_timestamp_gmt,
impression.event_id,
entity_id,
entity_domain,
entity_type,
SPLIT_PART(
filter(impression.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_domain,
'_z_',
4
) as ip_type,
filter(impression.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_id as ip_address,
COALESCE(
element_at(impression.tag_url.query, 'suu'),
CASE
WHEN cardinality(
FILTER(
impression.other_entity_keys,
x->x.entity_type = 'statid'
)
) > 0 then case
when FILTER(
impression.other_entity_keys,
x->x.entity_type = 'statid'
) [ 1 ].entity_id = 'NA' THEN NULL ELSE FILTER(
impression.other_entity_keys,
x->x.entity_type = 'statid'
) [ 1 ].entity_id
end else NULL
END,
to_base64url(
sha256(
to_utf8(
concat(
filter(impression.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_id,
event_group.user_agent,
element_at(impression.others, 'accept_encoding_header'),
element_at(impression.others, 'accept_language_header')
)
)
)
)
) analytic_id,
entity_id as cookie,
element_at(impression.tag_url.query, 'pnn') as partner_name,
COALESCE(
element_at(impression.tag_url.query, 'puu'),
CASE
WHEN cardinality(
FILTER(impression.other_entity_keys, x->x.entity_domain = '') -- CHANGE TO puu_entity_domain
) > 0 then CASE
WHEN FILTER(
impression.other_entity_keys,
x->x.entity_domain = ''
) [ 1 ].entity_id IN ('NA', '0', 'undefined') THEN NULL ELSE FILTER(
impression.other_entity_keys,
x->x.entity_domain = '' -- CHANGE TO puu_entity_domain
) [ 1 ].entity_id
END else NULL
END
) partner_user_id,
element_at(impression.tag_url.query, 'dmn') as domain,
element_at(impression.tag_url.query, 'pn') as path,
COALESCE(
element_at(impression.tag_url.query, 'qs'),
element_at(impression.tag_url.query, 'pqs')
) page_qs,
COALESCE(
element_at(impression.tag_url.query, 'rd'),
element_at(impression.tag_url.query, 'rdn')
) referrer_domain,
element_at(impression.tag_url.query, 'rpn') referrer_path,
CONCAT(
COALESCE(
element_at(impression.tag_url.query, 'rd'),
element_at(impression.tag_url.query, 'rdn')
),
element_at(impression.tag_url.query, 'rpn')
) referrer_url,
COALESCE(
element_at(impression.tag_url.query, 'imp'),
element_at(impression.tag_url.query, 'aut__query_imp')
) impression_id,
COALESCE(
impression.agency_id,
element_at(impression.tag_url.query, 'ag')
) agency_id,
COALESCE(
impression.advertiser_id,
element_at(impression.tag_url.query, 'adv')
) advertiser_id,
COALESCE(
element_at(impression.tag_url.query, 'ca'),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'utm_campaign=([^&]*)',
1
)
),
try(
regexp_extract(
element_at(impression.tag_url.query, 'qs'),
'utm_campaign=([^&]*)',
1
)
)
) campaign_id,
COALESCE(
element_at(impression.tag_url.query, 'ch'),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'utm_medium=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
'utm_medium=([^&]*)',
1
)
),
element_at(impression.tag_url.query, 'utm_medium'),
try(
regexp_extract(
element_at(impression.tag_url.query, 'qs'),
'utm_medium=([^&]*)',
1
)
)
) channel,
element_at(impression.tag_url.query, 'cr') as creative_id,
element_at(impression.tag_url.query, 'pl') as placement_id,
COALESCE(
element_at(impression.tag_url.query, 'si'),
element_at(impression.tag_url.query, 'sid'),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'sid=([^&]*)',
1
)
)
) site_id,
element_at(impression.tag_url.query, 'pubid') as publisher_id,
element_at(impression.tag_url.query, 'ivp') as inventory_partner,
COALESCE(
element_at(impression.tag_url.query, 'mp'),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'gclsrc=([^&]*)',
1
)
)
) media_partner,
impression.source,
COALESCE(
element_at(impression.tag_url.query, 'srch'),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'utm_term=([^&]*)',
1
)
),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'searchTerm=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
'utm_term=([^&]*)',
1
)
)
) search_terms,
element_at(impression.tag_url.query, 'ru') as click_redirect_url,
COALESCE(
element_at(impression.tag_url.query, 'clk'),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'gclid=([^&]*)',
1
)
),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'msclkid=([^&]*)',
1
)
),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'fbclid=([^&]*)',
1
)
),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'dclid=([^&]*)',
1
)
),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'wbraid=([^&]*)',
1
)
),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'gbraid=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
'gclid=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
'msclkid=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
'fbclid=([^&]*)',
1
)
)
) click_id,
element_at(impression.tag_url.query, 'v0') user_defined_0,
element_at(impression.tag_url.query, 'v1') as user_defined_1,
element_at(impression.tag_url.query, 'v2') as user_defined_2,
element_at(impression.tag_url.query, 'v3') as user_defined_3,
element_at(impression.tag_url.query, 'v4') as user_defined_4,
element_at(impression.tag_url.query, 'v5') as user_defined_5,
element_at(impression.tag_url.query, 'v6') as user_defined_6,
element_at(impression.tag_url.query, 'v7') as user_defined_7,
element_at(impression.tag_url.query, 'v8') as user_defined_8,
element_at(impression.tag_url.query, 'v9') as user_defined_9,
event_group.user_agent,
event_group.os,
event_group.os_version,
event_group.browser,
event_group.browser_version,
element_at(impression.others, 'accept_language_header') language_header,
element_at(impression.others, 'accept_encoding_header') encoding_header,
impression.http_status_code status_code,
event_group.device_type,
' ' device_type,
' ' device_make,
event_group.network,
event_group.network_type,
event_group.throughput,
event_group.zip,
event_group.city,
event_group.region_code,
event_group.country_code,
event_group.dma,
event_group.msa,
event_group.latitude,
event_group.longitude,
event_group.area_code,
event_group.fips,
event_group.timezone,
(
CASE
WHEN (CARDINALITY(impression.tag_url.query) = 0) THEN NULL ELSE impression.tag_url.query
END
) AS tag_qs,
(
CASE
WHEN (CARDINALITY(filter(impression.other_entity_keys, o->o.entity_type = 'ck')) = 0) THEN NULL ELSE filter(impression.other_entity_keys, o->o.entity_type = 'ck')
END
) AS cookies,
(
CASE
WHEN (CARDINALITY(impression.others) = 0) THEN NULL ELSE impression.others
END
) AS others,
event_date as eventdate
from aqfer.collation_daily_v5_1
cross join unnest(aqfer.collation_daily_v5_1.event_groups) as e(event_group)
cross join unnest(aqfer.collation_daily_v5_1.impressions) as i(impression)
where event_date = '${prev_date}'
and event_group.event_id = impression.event_group_id
and has_impressions = True and impression.has_click = False
and cardinality(impression.engagements) = 0
union
select 'click' as "event_type",
-- impression.tag_url.query['aqet']
event_date,
date_format(
from_unixtime(impression.event_timestamp),
'%Y-%m-%d %H:%i:%s GMT'
) human_readable_timestamp,
impression.event_timestamp event_timestamp_gmt,
click.event_id,
entity_id,
entity_domain,
entity_type,
SPLIT_PART(
filter(impression.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_domain,
'_z_',
4
) as ip_type,
filter(impression.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_id as ip_address,
COALESCE(
element_at(impression.tag_url.query, 'suu'),
CASE
WHEN cardinality(
FILTER(
impression.other_entity_keys,
x->x.entity_type = 'statid'
)
) > 0 then case
when FILTER(
impression.other_entity_keys,
x->x.entity_type = 'statid'
) [ 1 ].entity_id = 'NA' THEN NULL ELSE FILTER(
impression.other_entity_keys,
x->x.entity_type = 'statid'
) [ 1 ].entity_id
end else NULL
END,
to_base64url(
sha256(
to_utf8(
concat(
filter(impression.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_id,
event_group.user_agent,
element_at(impression.others, 'accept_encoding_header'),
element_at(impression.others, 'accept_language_header')
)
)
)
)
) analytic_id,
entity_id as cookie,
element_at(impression.tag_url.query, 'pnn') as partner_name,
COALESCE(
element_at(impression.tag_url.query, 'puu'),
CASE
WHEN cardinality(
FILTER(impression.other_entity_keys, x->x.entity_domain = '') -- CHANGE TO puu_entity_domain
) > 0 then CASE
WHEN FILTER(
impression.other_entity_keys,
x->x.entity_domain = ''
) [ 1 ].entity_id IN ('NA', '0', 'undefined') THEN NULL ELSE FILTER(
impression.other_entity_keys,
x->x.entity_domain = '' -- CHANGE TO puu_entity_domain
) [ 1 ].entity_id
END else NULL
END
) partner_user_id,
element_at(impression.tag_url.query, 'dmn') as domain,
element_at(impression.tag_url.query, 'pn') as path,
COALESCE(
element_at(impression.tag_url.query, 'qs'),
element_at(impression.tag_url.query, 'pqs')
) page_qs,
COALESCE(
element_at(impression.tag_url.query, 'rd'),
element_at(impression.tag_url.query, 'rdn')
) referrer_domain,
element_at(impression.tag_url.query, 'rpn') referrer_path,
CONCAT(
COALESCE(
element_at(impression.tag_url.query, 'rd'),
element_at(impression.tag_url.query, 'rdn')
),
element_at(impression.tag_url.query, 'rpn')
) referrer_url,
COALESCE(
element_at(impression.tag_url.query, 'imp'),
element_at(impression.tag_url.query, 'aut__query_imp')
) impression_id,
COALESCE(
impression.agency_id,
element_at(impression.tag_url.query, 'ag')
) agency_id,
COALESCE(
impression.advertiser_id,
element_at(impression.tag_url.query, 'adv')
) advertiser_id,
COALESCE(
element_at(impression.tag_url.query, 'ca'),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'utm_campaign=([^&]*)',
1
)
),
try(
regexp_extract(
element_at(impression.tag_url.query, 'qs'),
'utm_campaign=([^&]*)',
1
)
)
) campaign_id,
COALESCE(
element_at(impression.tag_url.query, 'ch'),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'utm_medium=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
'utm_medium=([^&]*)',
1
)
),
element_at(impression.tag_url.query, 'utm_medium'),
try(
regexp_extract(
element_at(impression.tag_url.query, 'qs'),
'utm_medium=([^&]*)',
1
)
)
) channel,
element_at(impression.tag_url.query, 'cr') as creative_id,
element_at(impression.tag_url.query, 'pl') as placement_id,
COALESCE(
element_at(impression.tag_url.query, 'si'),
element_at(impression.tag_url.query, 'sid'),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'sid=([^&]*)',
1
)
)
) site_id,
element_at(impression.tag_url.query, 'pubid') as publisher_id,
element_at(impression.tag_url.query, 'ivp') as inventory_partner,
COALESCE(
element_at(impression.tag_url.query, 'mp'),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'gclsrc=([^&]*)',
1
)
)
) media_partner,
impression.source,
COALESCE(
element_at(impression.tag_url.query, 'srch'),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'utm_term=([^&]*)',
1
)
),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'searchTerm=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
'utm_term=([^&]*)',
1
)
)
) search_terms,
element_at(impression.tag_url.query, 'ru') as click_redirect_url,
COALESCE(
element_at(impression.tag_url.query, 'clk'),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'gclid=([^&]*)',
1
)
),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'msclkid=([^&]*)',
1
)
),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'fbclid=([^&]*)',
1
)
),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'dclid=([^&]*)',
1
)
),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'wbraid=([^&]*)',
1
)
),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'gbraid=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
'gclid=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
'msclkid=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
'fbclid=([^&]*)',
1
)
)
) click_id,
element_at(impression.tag_url.query, 'v0') user_defined_0,
element_at(impression.tag_url.query, 'v1') as user_defined_1,
element_at(impression.tag_url.query, 'v2') as user_defined_2,
element_at(impression.tag_url.query, 'v3') as user_defined_3,
element_at(impression.tag_url.query, 'v4') as user_defined_4,
element_at(impression.tag_url.query, 'v5') as user_defined_5,
element_at(impression.tag_url.query, 'v6') as user_defined_6,
element_at(impression.tag_url.query, 'v7') as user_defined_7,
element_at(impression.tag_url.query, 'v8') as user_defined_8,
element_at(impression.tag_url.query, 'v9') as user_defined_9,
event_group.user_agent,
event_group.os,
event_group.os_version,
event_group.browser,
event_group.browser_version,
element_at(impression.others, 'accept_language_header') language_header,
element_at(impression.others, 'accept_encoding_header') encoding_header,
impression.http_status_code status_code,
event_group.device_type,
' ' device_type,
' ' device_make,
event_group.network,
event_group.network_type,
event_group.throughput,
event_group.zip,
event_group.city,
event_group.region_code,
event_group.country_code,
event_group.dma,
event_group.msa,
event_group.latitude,
event_group.longitude,
event_group.area_code,
event_group.fips,
event_group.timezone,
(
CASE
WHEN (CARDINALITY(impression.tag_url.query) = 0) THEN NULL ELSE impression.tag_url.query
END
) AS tag_qs,
(
CASE
WHEN (CARDINALITY(filter(impression.other_entity_keys, o->o.entity_type = 'ck')) = 0) THEN NULL ELSE filter(impression.other_entity_keys, o->o.entity_type = 'ck')
END
) AS cookies,
(
CASE
WHEN (CARDINALITY(impression.others) = 0) THEN NULL ELSE impression.others
END
) AS others,
event_date as eventdate
from aqfer.collation_daily_v5_1
cross join unnest(aqfer.collation_daily_v5_1.event_groups) as e(event_group)
cross join unnest(aqfer.collation_daily_v5_1.impressions) as i(impression)
cross join unnest(impression.clicks) as c(click)
where event_date = '${prev_date}'
and event_group.event_id = impression.event_group_id
and has_impressions = True and impression.has_click = True
union
select 'engagement' as "event_type",
-- impression.tag_url.query['aqet']
event_date,
date_format(
from_unixtime(impression.event_timestamp),
'%Y-%m-%d %H:%i:%s GMT'
) human_readable_timestamp,
impression.event_timestamp event_timestamp_gmt,
engagement.event_id,
entity_id,
entity_domain,
entity_type,
SPLIT_PART(
filter(impression.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_domain,
'_z_',
4
) as ip_type,
filter(impression.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_id as ip_address,
COALESCE(
element_at(impression.tag_url.query, 'suu'),
CASE
WHEN cardinality(
FILTER(
impression.other_entity_keys,
x->x.entity_type = 'statid'
)
) > 0 then case
when FILTER(
impression.other_entity_keys,
x->x.entity_type = 'statid'
) [ 1 ].entity_id = 'NA' THEN NULL ELSE FILTER(
impression.other_entity_keys,
x->x.entity_type = 'statid'
) [ 1 ].entity_id
end else NULL
END,
to_base64url(
sha256(
to_utf8(
concat(
filter(impression.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_id,
event_group.user_agent,
element_at(impression.others, 'accept_encoding_header'),
element_at(impression.others, 'accept_language_header')
)
)
)
)
) analytic_id,
entity_id as cookie,
element_at(impression.tag_url.query, 'pnn') as partner_name,
COALESCE(
element_at(impression.tag_url.query, 'puu'),
CASE
WHEN cardinality(
FILTER(impression.other_entity_keys, x->x.entity_domain = '') -- CHANGE TO puu_entity_domain
) > 0 then CASE
WHEN FILTER(
impression.other_entity_keys,
x->x.entity_domain = ''
) [ 1 ].entity_id IN ('NA', '0', 'undefined') THEN NULL ELSE FILTER(
impression.other_entity_keys,
x->x.entity_domain = '' -- CHANGE TO puu_entity_domain
) [ 1 ].entity_id
END else NULL
END
) partner_user_id,
element_at(impression.tag_url.query, 'dmn') as domain,
element_at(impression.tag_url.query, 'pn') as path,
COALESCE(
element_at(impression.tag_url.query, 'qs'),
element_at(impression.tag_url.query, 'pqs')
) page_qs,
COALESCE(
element_at(impression.tag_url.query, 'rd'),
element_at(impression.tag_url.query, 'rdn')
) referrer_domain,
element_at(impression.tag_url.query, 'rpn') referrer_path,
CONCAT(
COALESCE(
element_at(impression.tag_url.query, 'rd'),
element_at(impression.tag_url.query, 'rdn')
),
element_at(impression.tag_url.query, 'rpn')
) referrer_url,
COALESCE(
element_at(impression.tag_url.query, 'imp'),
element_at(impression.tag_url.query, 'aut__query_imp')
) impression_id,
COALESCE(
impression.agency_id,
element_at(impression.tag_url.query, 'ag')
) agency_id,
COALESCE(
impression.advertiser_id,
element_at(impression.tag_url.query, 'adv')
) advertiser_id,
COALESCE(
element_at(impression.tag_url.query, 'ca'),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'utm_campaign=([^&]*)',
1
)
),
try(
regexp_extract(
element_at(impression.tag_url.query, 'qs'),
'utm_campaign=([^&]*)',
1
)
)
) campaign_id,
COALESCE(
element_at(impression.tag_url.query, 'ch'),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'utm_medium=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
'utm_medium=([^&]*)',
1
)
),
element_at(impression.tag_url.query, 'utm_medium'),
try(
regexp_extract(
element_at(impression.tag_url.query, 'qs'),
'utm_medium=([^&]*)',
1
)
)
) channel,
element_at(impression.tag_url.query, 'cr') as creative_id,
element_at(impression.tag_url.query, 'pl') as placement_id,
COALESCE(
element_at(impression.tag_url.query, 'si'),
element_at(impression.tag_url.query, 'sid'),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'sid=([^&]*)',
1
)
)
) site_id,
element_at(impression.tag_url.query, 'pubid') as publisher_id,
element_at(impression.tag_url.query, 'ivp') as inventory_partner,
COALESCE(
element_at(impression.tag_url.query, 'mp'),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'gclsrc=([^&]*)',
1
)
)
) media_partner,
impression.source,
COALESCE(
element_at(impression.tag_url.query, 'srch'),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'utm_term=([^&]*)',
1
)
),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'searchTerm=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
'utm_term=([^&]*)',
1
)
)
) search_terms,
element_at(impression.tag_url.query, 'ru') as click_redirect_url,
COALESCE(
element_at(impression.tag_url.query, 'clk'),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'gclid=([^&]*)',
1
)
),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'msclkid=([^&]*)',
1
)
),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'fbclid=([^&]*)',
1
)
),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'dclid=([^&]*)',
1
)
),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'wbraid=([^&]*)',
1
)
),
try(
regexp_extract(
split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
'gbraid=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
'gclid=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
'msclkid=([^&]*)',
1
)
),
try(
regexp_extract(
split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
'fbclid=([^&]*)',
1
)
)
) click_id,
element_at(impression.tag_url.query, 'v0') user_defined_0,
element_at(impression.tag_url.query, 'v1') as user_defined_1,
element_at(impression.tag_url.query, 'v2') as user_defined_2,
element_at(impression.tag_url.query, 'v3') as user_defined_3,
element_at(impression.tag_url.query, 'v4') as user_defined_4,
element_at(impression.tag_url.query, 'v5') as user_defined_5,
element_at(impression.tag_url.query, 'v6') as user_defined_6,
element_at(impression.tag_url.query, 'v7') as user_defined_7,
element_at(impression.tag_url.query, 'v8') as user_defined_8,
element_at(impression.tag_url.query, 'v9') as user_defined_9,
event_group.user_agent,
event_group.os,
event_group.os_version,
event_group.browser,
event_group.browser_version,
element_at(impression.others, 'accept_language_header') language_header,
element_at(impression.others, 'accept_encoding_header') encoding_header,
impression.http_status_code status_code,
event_group.device_type,
' ' device_type,
' ' device_make,
event_group.network,
event_group.network_type,
event_group.throughput,
event_group.zip,
event_group.city,
event_group.region_code,
event_group.country_code,
event_group.dma,
event_group.msa,
event_group.latitude,
event_group.longitude,
event_group.area_code,
event_group.fips,
event_group.timezone,
(
CASE
WHEN (CARDINALITY(impression.tag_url.query) = 0) THEN NULL ELSE impression.tag_url.query
END
) AS tag_qs,
(
CASE
WHEN (CARDINALITY(filter(impression.other_entity_keys, o->o.entity_type = 'ck')) = 0) THEN NULL ELSE filter(impression.other_entity_keys, o->o.entity_type = 'ck')
END
) AS cookies,
(
CASE
WHEN (CARDINALITY(impression.others) = 0) THEN NULL ELSE impression.others
END
) AS others,
event_date as eventdate
from aqfer.collation_daily_v5_1
cross join unnest(aqfer.collation_daily_v5_1.event_groups) as e(event_group)
cross join unnest(aqfer.collation_daily_v5_1.impressions) as i(impression)
cross join unnest(impression.engagements) as e(engagement)
where event_date = '${prev_date}'
and event_group.event_id = impression.event_group_id
and has_impressions = True and cardinality(impression.engagements) > 0