ctas.sql

select 'pageview' as "event_type",
	-- view.tag_url.query['aqet']
	event_date,
	date_format(
		from_unixtime(view.event_timestamp),
		'%Y-%m-%d %H:%i:%s GMT'
	) human_readable_timestamp,
	view.event_timestamp event_timestamp_gmt,
	view.event_id,
	entity_id,
	entity_domain,
	entity_type,
	SPLIT_PART(
		filter(view.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_domain,
		'_z_',
		4
	) as ip_type,
	filter(view.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_id as ip_address,
	COALESCE(
		element_at(view.tag_url.query, 'suu'),
		CASE
			WHEN cardinality(
				FILTER(
					view.other_entity_keys,
					x->x.entity_type = 'statid'
				)
			) > 0 then case
				when FILTER(
					view.other_entity_keys,
					x->x.entity_type = 'statid'
				) [ 1 ].entity_id = 'NA' THEN NULL ELSE FILTER(
					view.other_entity_keys,
					x->x.entity_type = 'statid'
				) [ 1 ].entity_id
			end else NULL
		END,
		to_base64url(
			sha256(
				to_utf8(
					concat(
						filter(view.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_id,
						event_group.user_agent,
						element_at(view.others, 'accept_encoding_header'),
						element_at(view.others, 'accept_language_header')
					)
				)
			)
		)
	) analytic_id,
	entity_id as cookie,
    element_at(view.tag_url.query, 'pnn') as partner_name,
	COALESCE(
		element_at(view.tag_url.query, 'puu'),
		CASE
			WHEN cardinality(
				FILTER(view.other_entity_keys, x->x.entity_domain = '') -- CHANGE TO puu_entity_domain
			) > 0 then CASE
				WHEN FILTER(
					view.other_entity_keys,
					x->x.entity_domain = ''
				) [ 1 ].entity_id IN ('NA', '0', 'undefined') THEN NULL ELSE FILTER(
					view.other_entity_keys,
					x->x.entity_domain = '' -- CHANGE TO puu_entity_domain
				) [ 1 ].entity_id
			END else NULL
		END
	) partner_user_id,
	element_at(view.tag_url.query, 'dmn') as domain,
    element_at(view.tag_url.query, 'pn') as path,
	COALESCE(
		element_at(view.tag_url.query, 'qs'),
		element_at(view.tag_url.query, 'pqs')
	) page_qs,
	COALESCE(
		element_at(view.tag_url.query, 'rd'),
		element_at(view.tag_url.query, 'rdn')
	) referrer_domain,
	element_at(view.tag_url.query, 'rpn') referrer_path,
	CONCAT(
		COALESCE(
			element_at(view.tag_url.query, 'rd'),
			element_at(view.tag_url.query, 'rdn')
		),
		element_at(view.tag_url.query, 'rpn')
	) referrer_url,
	COALESCE(
		element_at(view.tag_url.query, 'imp'),
		element_at(view.tag_url.query, 'aut__query_imp')
	) impression_id,
	COALESCE(
		view.agency_id,
		element_at(view.tag_url.query, 'ag')
	) agency_id,
	COALESCE(
		view.advertiser_id,
		element_at(view.tag_url.query, 'adv')
	) advertiser_id,
	COALESCE(
		element_at(view.tag_url.query, 'ca'),
		try(
			regexp_extract(
				split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
				'utm_campaign=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				element_at(view.tag_url.query, 'qs'),
				'utm_campaign=([^&]*)',
				1
			)
		)
	) campaign_id,
	COALESCE(
		element_at(view.tag_url.query, 'ch'),
		try(
			regexp_extract(
				split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
				'utm_medium=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(view.tag_url.query, 'qs'), '?') [ 2 ],
				'utm_medium=([^&]*)',
				1
			)
		),
		element_at(view.tag_url.query, 'utm_medium'),
		try(
			regexp_extract(
				element_at(view.tag_url.query, 'qs'),
				'utm_medium=([^&]*)',
				1
			)
		)
	) channel,
	element_at(view.tag_url.query, 'cr') as creative_id,
	element_at(view.tag_url.query, 'pl') as placement_id,
	COALESCE(
		element_at(view.tag_url.query, 'si'),
		element_at(view.tag_url.query, 'sid'),
		try(
			regexp_extract(
				split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
				'sid=([^&]*)',
				1
			)
		)
	) site_id,
	element_at(view.tag_url.query, 'pubid') as publisher_id,
	element_at(view.tag_url.query, 'ivp') as inventory_partner,
	COALESCE(
		element_at(view.tag_url.query, 'mp'),
		try(
			regexp_extract(
				split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
				'gclsrc=([^&]*)',
				1
			)
		)
	) media_partner,
	view.source,
	COALESCE(
		element_at(view.tag_url.query, 'srch'),
		try(
			regexp_extract(
				split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
				'utm_term=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
				'searchTerm=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(view.tag_url.query, 'qs'), '?') [ 2 ],
				'utm_term=([^&]*)',
				1
			)
		)
	) search_terms,
	element_at(view.tag_url.query, 'ru') as click_redirect_url,
	COALESCE(
		element_at(view.tag_url.query, 'clk'),
		try(
			regexp_extract(
				split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
				'gclid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
				'msclkid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
				'fbclid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
				'dclid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
				'wbraid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(view.tag_url.query [ 'ru' ], '?') [ 2 ],
				'gbraid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(view.tag_url.query, 'qs'), '?') [ 2 ],
				'gclid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(view.tag_url.query, 'qs'), '?') [ 2 ],
				'msclkid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(view.tag_url.query, 'qs'), '?') [ 2 ],
				'fbclid=([^&]*)',
				1
			)
		)
	) click_id,
	element_at(view.tag_url.query, 'v0') user_defined_0,
	element_at(view.tag_url.query, 'v1') as user_defined_1,
	element_at(view.tag_url.query, 'v2') as user_defined_2,
	element_at(view.tag_url.query, 'v3') as user_defined_3,
	element_at(view.tag_url.query, 'v4') as user_defined_4,
	element_at(view.tag_url.query, 'v5') as user_defined_5,
	element_at(view.tag_url.query, 'v6') as user_defined_6,
	element_at(view.tag_url.query, 'v7') as user_defined_7,
	element_at(view.tag_url.query, 'v8') as user_defined_8,
	element_at(view.tag_url.query, 'v9') as user_defined_9,
	event_group.user_agent,
	event_group.os,
	event_group.os_version,
	event_group.browser,
	event_group.browser_version,
	element_at(view.others, 'accept_language_header') language_header,
	element_at(view.others, 'accept_encoding_header') encoding_header,
	view.http_status_code status_code,
	event_group.device_type,
	' ' device_type,
	' ' device_make,
	event_group.network,
	event_group.network_type,
	event_group.throughput,
	event_group.zip,
	event_group.city,
	event_group.region_code,
	event_group.country_code,
	event_group.dma,
	event_group.msa,
	event_group.latitude,
	event_group.longitude,
	event_group.area_code,
	event_group.fips,
	event_group.timezone,
	(
		CASE
			WHEN (CARDINALITY(view.tag_url.query) = 0) THEN NULL ELSE view.tag_url.query
		END
	) AS tag_qs,
	(
		CASE
			WHEN (CARDINALITY(filter(view.other_entity_keys, o->o.entity_type = 'ck')) = 0) THEN NULL ELSE filter(view.other_entity_keys, o->o.entity_type = 'ck')
		END
	) AS cookies,
	(
		CASE
			WHEN (CARDINALITY(view.others) = 0) THEN NULL ELSE view.others
		END
	) AS others,
	event_date as eventdate
from aqfer.collation_daily_v5_1
	cross join unnest(aqfer.collation_daily_v5_1.event_groups) as e(event_group)
	cross join unnest(aqfer.collation_daily_v5_1.views) as v(view)
where event_date = '${prev_date}'
	and event_group.event_id = view.event_group_id
	and has_views = True

union
select 'conversion' as "event_type",
	-- conversion.tag_url.query['aqet']
	event_date,
	date_format(
		from_unixtime(conversion.event_timestamp),
		'%Y-%m-%d %H:%i:%s GMT'
	) human_readable_timestamp,
	conversion.event_timestamp event_timestamp_gmt,
	conversion.event_id,
	entity_id,
	entity_domain,
	entity_type,
	SPLIT_PART(
		filter(conversion.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_domain,
		'_z_',
		4
	) as ip_type,
	filter(conversion.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_id as ip_address,
	COALESCE(
		element_at(conversion.tag_url.query, 'suu'),
		CASE
			WHEN cardinality(
				FILTER(
					conversion.other_entity_keys,
					x->x.entity_type = 'statid'
				)
			) > 0 then case
				when FILTER(
					conversion.other_entity_keys,
					x->x.entity_type = 'statid'
				) [ 1 ].entity_id = 'NA' THEN NULL ELSE FILTER(
					conversion.other_entity_keys,
					x->x.entity_type = 'statid'
				) [ 1 ].entity_id
			end else NULL
		END,
		to_base64url(
			sha256(
				to_utf8(
					concat(
						filter(conversion.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_id,
						event_group.user_agent,
						element_at(conversion.others, 'accept_encoding_header'),
						element_at(conversion.others, 'accept_language_header')
					)
				)
			)
		)
	) analytic_id,
	entity_id as cookie,
    element_at(conversion.tag_url.query, 'pnn') as partner_name,
	COALESCE(
		element_at(conversion.tag_url.query, 'puu'),
		CASE
			WHEN cardinality(
				FILTER(conversion.other_entity_keys, x->x.entity_domain = '') -- CHANGE TO puu_entity_domain
			) > 0 then CASE
				WHEN FILTER(
					conversion.other_entity_keys,
					x->x.entity_domain = ''
				) [ 1 ].entity_id IN ('NA', '0', 'undefined') THEN NULL ELSE FILTER(
					conversion.other_entity_keys,
					x->x.entity_domain = '' -- CHANGE TO puu_entity_domain
				) [ 1 ].entity_id
			END else NULL
		END
	) partner_user_id,
	element_at(conversion.tag_url.query, 'dmn') as domain,
    element_at(conversion.tag_url.query, 'pn') as path,
	COALESCE(
		element_at(conversion.tag_url.query, 'qs'),
		element_at(conversion.tag_url.query, 'pqs')
	) page_qs,
	COALESCE(
		element_at(conversion.tag_url.query, 'rd'),
		element_at(conversion.tag_url.query, 'rdn')
	) referrer_domain,
	element_at(conversion.tag_url.query, 'rpn') referrer_path,
	CONCAT(
		COALESCE(
			element_at(conversion.tag_url.query, 'rd'),
			element_at(conversion.tag_url.query, 'rdn')
		),
		element_at(conversion.tag_url.query, 'rpn')
	) referrer_url,
	COALESCE(
		element_at(conversion.tag_url.query, 'imp'),
		element_at(conversion.tag_url.query, 'aut__query_imp')
	) impression_id,
	COALESCE(
		conversion.agency_id,
		element_at(conversion.tag_url.query, 'ag')
	) agency_id,
	COALESCE(
		conversion.advertiser_id,
		element_at(conversion.tag_url.query, 'adv')
	) advertiser_id,
	COALESCE(
		element_at(conversion.tag_url.query, 'ca'),
		try(
			regexp_extract(
				split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
				'utm_campaign=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				element_at(conversion.tag_url.query, 'qs'),
				'utm_campaign=([^&]*)',
				1
			)
		)
	) campaign_id,
	COALESCE(
		element_at(conversion.tag_url.query, 'ch'),
		try(
			regexp_extract(
				split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
				'utm_medium=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(conversion.tag_url.query, 'qs'), '?') [ 2 ],
				'utm_medium=([^&]*)',
				1
			)
		),
		element_at(conversion.tag_url.query, 'utm_medium'),
		try(
			regexp_extract(
				element_at(conversion.tag_url.query, 'qs'),
				'utm_medium=([^&]*)',
				1
			)
		)
	) channel,
	element_at(conversion.tag_url.query, 'cr') as creative_id,
	element_at(conversion.tag_url.query, 'pl') as placement_id,
	COALESCE(
		element_at(conversion.tag_url.query, 'si'),
		element_at(conversion.tag_url.query, 'sid'),
		try(
			regexp_extract(
				split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
				'sid=([^&]*)',
				1
			)
		)
	) site_id,
	element_at(conversion.tag_url.query, 'pubid') as publisher_id,
	element_at(conversion.tag_url.query, 'ivp') as inventory_partner,
	COALESCE(
		element_at(conversion.tag_url.query, 'mp'),
		try(
			regexp_extract(
				split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
				'gclsrc=([^&]*)',
				1
			)
		)
	) media_partner,
	conversion.source,
	COALESCE(
		element_at(conversion.tag_url.query, 'srch'),
		try(
			regexp_extract(
				split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
				'utm_term=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
				'searchTerm=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(conversion.tag_url.query, 'qs'), '?') [ 2 ],
				'utm_term=([^&]*)',
				1
			)
		)
	) search_terms,
	element_at(conversion.tag_url.query, 'ru') as click_redirect_url,
	COALESCE(
		element_at(conversion.tag_url.query, 'clk'),
		try(
			regexp_extract(
				split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
				'gclid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
				'msclkid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
				'fbclid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
				'dclid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
				'wbraid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(conversion.tag_url.query [ 'ru' ], '?') [ 2 ],
				'gbraid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(conversion.tag_url.query, 'qs'), '?') [ 2 ],
				'gclid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(conversion.tag_url.query, 'qs'), '?') [ 2 ],
				'msclkid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(conversion.tag_url.query, 'qs'), '?') [ 2 ],
				'fbclid=([^&]*)',
				1
			)
		)
	) click_id,
	element_at(conversion.tag_url.query, 'v0') user_defined_0,
	element_at(conversion.tag_url.query, 'v1') as user_defined_1,
	element_at(conversion.tag_url.query, 'v2') as user_defined_2,
	element_at(conversion.tag_url.query, 'v3') as user_defined_3,
	element_at(conversion.tag_url.query, 'v4') as user_defined_4,
	element_at(conversion.tag_url.query, 'v5') as user_defined_5,
	element_at(conversion.tag_url.query, 'v6') as user_defined_6,
	element_at(conversion.tag_url.query, 'v7') as user_defined_7,
	element_at(conversion.tag_url.query, 'v8') as user_defined_8,
	element_at(conversion.tag_url.query, 'v9') as user_defined_9,
	event_group.user_agent,
	event_group.os,
	event_group.os_version,
	event_group.browser,
	event_group.browser_version,
	element_at(conversion.others, 'accept_language_header') language_header,
	element_at(conversion.others, 'accept_encoding_header') encoding_header,
	conversion.http_status_code status_code,
	event_group.device_type,
	' ' device_type,
	' ' device_make,
	event_group.network,
	event_group.network_type,
	event_group.throughput,
	event_group.zip,
	event_group.city,
	event_group.region_code,
	event_group.country_code,
	event_group.dma,
	event_group.msa,
	event_group.latitude,
	event_group.longitude,
	event_group.area_code,
	event_group.fips,
	event_group.timezone,
	(
		CASE
			WHEN (CARDINALITY(conversion.tag_url.query) = 0) THEN NULL ELSE conversion.tag_url.query
		END
	) AS tag_qs,
	(
		CASE
			WHEN (CARDINALITY(filter(conversion.other_entity_keys, o->o.entity_type = 'ck')) = 0) THEN NULL ELSE filter(conversion.other_entity_keys, o->o.entity_type = 'ck')
		END
	) AS cookies,
	(
		CASE
			WHEN (CARDINALITY(conversion.others) = 0) THEN NULL ELSE conversion.others
		END
	) AS others,
	event_date as eventdate
from aqfer.collation_daily_v5_1
	cross join unnest(aqfer.collation_daily_v5_1.event_groups) as e(event_group)
	cross join unnest(aqfer.collation_daily_v5_1.conversions) as c(conversion)
where event_date = '${prev_date}'
	and event_group.event_id = conversion.event_group_id
	and has_conversions = True

union
select 'impression' as "event_type",
	-- impression.tag_url.query['aqet']
	event_date,
	date_format(
		from_unixtime(impression.event_timestamp),
		'%Y-%m-%d %H:%i:%s GMT'
	) human_readable_timestamp,
	impression.event_timestamp event_timestamp_gmt,
	impression.event_id,
	entity_id,
	entity_domain,
	entity_type,
	SPLIT_PART(
		filter(impression.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_domain,
		'_z_',
		4
	) as ip_type,
	filter(impression.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_id as ip_address,
	COALESCE(
		element_at(impression.tag_url.query, 'suu'),
		CASE
			WHEN cardinality(
				FILTER(
					impression.other_entity_keys,
					x->x.entity_type = 'statid'
				)
			) > 0 then case
				when FILTER(
					impression.other_entity_keys,
					x->x.entity_type = 'statid'
				) [ 1 ].entity_id = 'NA' THEN NULL ELSE FILTER(
					impression.other_entity_keys,
					x->x.entity_type = 'statid'
				) [ 1 ].entity_id
			end else NULL
		END,
		to_base64url(
			sha256(
				to_utf8(
					concat(
						filter(impression.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_id,
						event_group.user_agent,
						element_at(impression.others, 'accept_encoding_header'),
						element_at(impression.others, 'accept_language_header')
					)
				)
			)
		)
	) analytic_id,
	entity_id as cookie,
    element_at(impression.tag_url.query, 'pnn') as partner_name,
	COALESCE(
		element_at(impression.tag_url.query, 'puu'),
		CASE
			WHEN cardinality(
				FILTER(impression.other_entity_keys, x->x.entity_domain = '') -- CHANGE TO puu_entity_domain
			) > 0 then CASE
				WHEN FILTER(
					impression.other_entity_keys,
					x->x.entity_domain = ''
				) [ 1 ].entity_id IN ('NA', '0', 'undefined') THEN NULL ELSE FILTER(
					impression.other_entity_keys,
					x->x.entity_domain = '' -- CHANGE TO puu_entity_domain
				) [ 1 ].entity_id
			END else NULL
		END
	) partner_user_id,
	element_at(impression.tag_url.query, 'dmn') as domain,
    element_at(impression.tag_url.query, 'pn') as path,
	COALESCE(
		element_at(impression.tag_url.query, 'qs'),
		element_at(impression.tag_url.query, 'pqs')
	) page_qs,
	COALESCE(
		element_at(impression.tag_url.query, 'rd'),
		element_at(impression.tag_url.query, 'rdn')
	) referrer_domain,
	element_at(impression.tag_url.query, 'rpn') referrer_path,
	CONCAT(
		COALESCE(
			element_at(impression.tag_url.query, 'rd'),
			element_at(impression.tag_url.query, 'rdn')
		),
		element_at(impression.tag_url.query, 'rpn')
	) referrer_url,
	COALESCE(
		element_at(impression.tag_url.query, 'imp'),
		element_at(impression.tag_url.query, 'aut__query_imp')
	) impression_id,
	COALESCE(
		impression.agency_id,
		element_at(impression.tag_url.query, 'ag')
	) agency_id,
	COALESCE(
		impression.advertiser_id,
		element_at(impression.tag_url.query, 'adv')
	) advertiser_id,
	COALESCE(
		element_at(impression.tag_url.query, 'ca'),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'utm_campaign=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				element_at(impression.tag_url.query, 'qs'),
				'utm_campaign=([^&]*)',
				1
			)
		)
	) campaign_id,
	COALESCE(
		element_at(impression.tag_url.query, 'ch'),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'utm_medium=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
				'utm_medium=([^&]*)',
				1
			)
		),
		element_at(impression.tag_url.query, 'utm_medium'),
		try(
			regexp_extract(
				element_at(impression.tag_url.query, 'qs'),
				'utm_medium=([^&]*)',
				1
			)
		)
	) channel,
	element_at(impression.tag_url.query, 'cr') as creative_id,
	element_at(impression.tag_url.query, 'pl') as placement_id,
	COALESCE(
		element_at(impression.tag_url.query, 'si'),
		element_at(impression.tag_url.query, 'sid'),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'sid=([^&]*)',
				1
			)
		)
	) site_id,
	element_at(impression.tag_url.query, 'pubid') as publisher_id,
	element_at(impression.tag_url.query, 'ivp') as inventory_partner,
	COALESCE(
		element_at(impression.tag_url.query, 'mp'),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'gclsrc=([^&]*)',
				1
			)
		)
	) media_partner,
	impression.source,
	COALESCE(
		element_at(impression.tag_url.query, 'srch'),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'utm_term=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'searchTerm=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
				'utm_term=([^&]*)',
				1
			)
		)
	) search_terms,
	element_at(impression.tag_url.query, 'ru') as click_redirect_url,
	COALESCE(
		element_at(impression.tag_url.query, 'clk'),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'gclid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'msclkid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'fbclid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'dclid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'wbraid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'gbraid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
				'gclid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
				'msclkid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
				'fbclid=([^&]*)',
				1
			)
		)
	) click_id,
	element_at(impression.tag_url.query, 'v0') user_defined_0,
	element_at(impression.tag_url.query, 'v1') as user_defined_1,
	element_at(impression.tag_url.query, 'v2') as user_defined_2,
	element_at(impression.tag_url.query, 'v3') as user_defined_3,
	element_at(impression.tag_url.query, 'v4') as user_defined_4,
	element_at(impression.tag_url.query, 'v5') as user_defined_5,
	element_at(impression.tag_url.query, 'v6') as user_defined_6,
	element_at(impression.tag_url.query, 'v7') as user_defined_7,
	element_at(impression.tag_url.query, 'v8') as user_defined_8,
	element_at(impression.tag_url.query, 'v9') as user_defined_9,
	event_group.user_agent,
	event_group.os,
	event_group.os_version,
	event_group.browser,
	event_group.browser_version,
	element_at(impression.others, 'accept_language_header') language_header,
	element_at(impression.others, 'accept_encoding_header') encoding_header,
	impression.http_status_code status_code,
	event_group.device_type,
	' ' device_type,
	' ' device_make,
	event_group.network,
	event_group.network_type,
	event_group.throughput,
	event_group.zip,
	event_group.city,
	event_group.region_code,
	event_group.country_code,
	event_group.dma,
	event_group.msa,
	event_group.latitude,
	event_group.longitude,
	event_group.area_code,
	event_group.fips,
	event_group.timezone,
	(
		CASE
			WHEN (CARDINALITY(impression.tag_url.query) = 0) THEN NULL ELSE impression.tag_url.query
		END
	) AS tag_qs,
	(
		CASE
			WHEN (CARDINALITY(filter(impression.other_entity_keys, o->o.entity_type = 'ck')) = 0) THEN NULL ELSE filter(impression.other_entity_keys, o->o.entity_type = 'ck')
		END
	) AS cookies,
	(
		CASE
			WHEN (CARDINALITY(impression.others) = 0) THEN NULL ELSE impression.others
		END
	) AS others,
	event_date as eventdate
from aqfer.collation_daily_v5_1
	cross join unnest(aqfer.collation_daily_v5_1.event_groups) as e(event_group)
	cross join unnest(aqfer.collation_daily_v5_1.impressions) as i(impression)
where event_date = '${prev_date}'
	and event_group.event_id = impression.event_group_id
	and has_impressions = True and impression.has_click = False
			and cardinality(impression.engagements) = 0

union
select 'click' as "event_type",
	-- impression.tag_url.query['aqet']
	event_date,
	date_format(
		from_unixtime(impression.event_timestamp),
		'%Y-%m-%d %H:%i:%s GMT'
	) human_readable_timestamp,
	impression.event_timestamp event_timestamp_gmt,
	click.event_id,
	entity_id,
	entity_domain,
	entity_type,
	SPLIT_PART(
		filter(impression.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_domain,
		'_z_',
		4
	) as ip_type,
	filter(impression.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_id as ip_address,
	COALESCE(
		element_at(impression.tag_url.query, 'suu'),
		CASE
			WHEN cardinality(
				FILTER(
					impression.other_entity_keys,
					x->x.entity_type = 'statid'
				)
			) > 0 then case
				when FILTER(
					impression.other_entity_keys,
					x->x.entity_type = 'statid'
				) [ 1 ].entity_id = 'NA' THEN NULL ELSE FILTER(
					impression.other_entity_keys,
					x->x.entity_type = 'statid'
				) [ 1 ].entity_id
			end else NULL
		END,
		to_base64url(
			sha256(
				to_utf8(
					concat(
						filter(impression.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_id,
						event_group.user_agent,
						element_at(impression.others, 'accept_encoding_header'),
						element_at(impression.others, 'accept_language_header')
					)
				)
			)
		)
	) analytic_id,
	entity_id as cookie,
    element_at(impression.tag_url.query, 'pnn') as partner_name,
	COALESCE(
		element_at(impression.tag_url.query, 'puu'),
		CASE
			WHEN cardinality(
				FILTER(impression.other_entity_keys, x->x.entity_domain = '') -- CHANGE TO puu_entity_domain
			) > 0 then CASE
				WHEN FILTER(
					impression.other_entity_keys,
					x->x.entity_domain = ''
				) [ 1 ].entity_id IN ('NA', '0', 'undefined') THEN NULL ELSE FILTER(
					impression.other_entity_keys,
					x->x.entity_domain = '' -- CHANGE TO puu_entity_domain
				) [ 1 ].entity_id
			END else NULL
		END
	) partner_user_id,
	element_at(impression.tag_url.query, 'dmn') as domain,
    element_at(impression.tag_url.query, 'pn') as path,
	COALESCE(
		element_at(impression.tag_url.query, 'qs'),
		element_at(impression.tag_url.query, 'pqs')
	) page_qs,
	COALESCE(
		element_at(impression.tag_url.query, 'rd'),
		element_at(impression.tag_url.query, 'rdn')
	) referrer_domain,
	element_at(impression.tag_url.query, 'rpn') referrer_path,
	CONCAT(
		COALESCE(
			element_at(impression.tag_url.query, 'rd'),
			element_at(impression.tag_url.query, 'rdn')
		),
		element_at(impression.tag_url.query, 'rpn')
	) referrer_url,
	COALESCE(
		element_at(impression.tag_url.query, 'imp'),
		element_at(impression.tag_url.query, 'aut__query_imp')
	) impression_id,
	COALESCE(
		impression.agency_id,
		element_at(impression.tag_url.query, 'ag')
	) agency_id,
	COALESCE(
		impression.advertiser_id,
		element_at(impression.tag_url.query, 'adv')
	) advertiser_id,
	COALESCE(
		element_at(impression.tag_url.query, 'ca'),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'utm_campaign=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				element_at(impression.tag_url.query, 'qs'),
				'utm_campaign=([^&]*)',
				1
			)
		)
	) campaign_id,
	COALESCE(
		element_at(impression.tag_url.query, 'ch'),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'utm_medium=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
				'utm_medium=([^&]*)',
				1
			)
		),
		element_at(impression.tag_url.query, 'utm_medium'),
		try(
			regexp_extract(
				element_at(impression.tag_url.query, 'qs'),
				'utm_medium=([^&]*)',
				1
			)
		)
	) channel,
	element_at(impression.tag_url.query, 'cr') as creative_id,
	element_at(impression.tag_url.query, 'pl') as placement_id,
	COALESCE(
		element_at(impression.tag_url.query, 'si'),
		element_at(impression.tag_url.query, 'sid'),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'sid=([^&]*)',
				1
			)
		)
	) site_id,
	element_at(impression.tag_url.query, 'pubid') as publisher_id,
	element_at(impression.tag_url.query, 'ivp') as inventory_partner,
	COALESCE(
		element_at(impression.tag_url.query, 'mp'),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'gclsrc=([^&]*)',
				1
			)
		)
	) media_partner,
	impression.source,
	COALESCE(
		element_at(impression.tag_url.query, 'srch'),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'utm_term=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'searchTerm=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
				'utm_term=([^&]*)',
				1
			)
		)
	) search_terms,
	element_at(impression.tag_url.query, 'ru') as click_redirect_url,
	COALESCE(
		element_at(impression.tag_url.query, 'clk'),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'gclid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'msclkid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'fbclid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'dclid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'wbraid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'gbraid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
				'gclid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
				'msclkid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
				'fbclid=([^&]*)',
				1
			)
		)
	) click_id,
	element_at(impression.tag_url.query, 'v0') user_defined_0,
	element_at(impression.tag_url.query, 'v1') as user_defined_1,
	element_at(impression.tag_url.query, 'v2') as user_defined_2,
	element_at(impression.tag_url.query, 'v3') as user_defined_3,
	element_at(impression.tag_url.query, 'v4') as user_defined_4,
	element_at(impression.tag_url.query, 'v5') as user_defined_5,
	element_at(impression.tag_url.query, 'v6') as user_defined_6,
	element_at(impression.tag_url.query, 'v7') as user_defined_7,
	element_at(impression.tag_url.query, 'v8') as user_defined_8,
	element_at(impression.tag_url.query, 'v9') as user_defined_9,
	event_group.user_agent,
	event_group.os,
	event_group.os_version,
	event_group.browser,
	event_group.browser_version,
	element_at(impression.others, 'accept_language_header') language_header,
	element_at(impression.others, 'accept_encoding_header') encoding_header,
	impression.http_status_code status_code,
	event_group.device_type,
	' ' device_type,
	' ' device_make,
	event_group.network,
	event_group.network_type,
	event_group.throughput,
	event_group.zip,
	event_group.city,
	event_group.region_code,
	event_group.country_code,
	event_group.dma,
	event_group.msa,
	event_group.latitude,
	event_group.longitude,
	event_group.area_code,
	event_group.fips,
	event_group.timezone,
	(
		CASE
			WHEN (CARDINALITY(impression.tag_url.query) = 0) THEN NULL ELSE impression.tag_url.query
		END
	) AS tag_qs,
	(
		CASE
			WHEN (CARDINALITY(filter(impression.other_entity_keys, o->o.entity_type = 'ck')) = 0) THEN NULL ELSE filter(impression.other_entity_keys, o->o.entity_type = 'ck')
		END
	) AS cookies,
	(
		CASE
			WHEN (CARDINALITY(impression.others) = 0) THEN NULL ELSE impression.others
		END
	) AS others,
	event_date as eventdate
from aqfer.collation_daily_v5_1
	cross join unnest(aqfer.collation_daily_v5_1.event_groups) as e(event_group)
	cross join unnest(aqfer.collation_daily_v5_1.impressions) as i(impression)
	cross join unnest(impression.clicks) as c(click)
where event_date = '${prev_date}'
	and event_group.event_id = impression.event_group_id
	and has_impressions = True and impression.has_click = True

union
select 'engagement' as "event_type",
	-- impression.tag_url.query['aqet']
	event_date,
	date_format(
		from_unixtime(impression.event_timestamp),
		'%Y-%m-%d %H:%i:%s GMT'
	) human_readable_timestamp,
	impression.event_timestamp event_timestamp_gmt,
	engagement.event_id,
	entity_id,
	entity_domain,
	entity_type,
	SPLIT_PART(
		filter(impression.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_domain,
		'_z_',
		4
	) as ip_type,
	filter(impression.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_id as ip_address,
	COALESCE(
		element_at(impression.tag_url.query, 'suu'),
		CASE
			WHEN cardinality(
				FILTER(
					impression.other_entity_keys,
					x->x.entity_type = 'statid'
				)
			) > 0 then case
				when FILTER(
					impression.other_entity_keys,
					x->x.entity_type = 'statid'
				) [ 1 ].entity_id = 'NA' THEN NULL ELSE FILTER(
					impression.other_entity_keys,
					x->x.entity_type = 'statid'
				) [ 1 ].entity_id
			end else NULL
		END,
		to_base64url(
			sha256(
				to_utf8(
					concat(
						filter(impression.other_entity_keys, o->o.entity_type = 'ip') [ 1 ].entity_id,
						event_group.user_agent,
						element_at(impression.others, 'accept_encoding_header'),
						element_at(impression.others, 'accept_language_header')
					)
				)
			)
		)
	) analytic_id,
	entity_id as cookie,
    element_at(impression.tag_url.query, 'pnn') as partner_name,
	COALESCE(
		element_at(impression.tag_url.query, 'puu'),
		CASE
			WHEN cardinality(
				FILTER(impression.other_entity_keys, x->x.entity_domain = '') -- CHANGE TO puu_entity_domain
			) > 0 then CASE
				WHEN FILTER(
					impression.other_entity_keys,
					x->x.entity_domain = ''
				) [ 1 ].entity_id IN ('NA', '0', 'undefined') THEN NULL ELSE FILTER(
					impression.other_entity_keys,
					x->x.entity_domain = '' -- CHANGE TO puu_entity_domain
				) [ 1 ].entity_id
			END else NULL
		END
	) partner_user_id,
	element_at(impression.tag_url.query, 'dmn') as domain,
    element_at(impression.tag_url.query, 'pn') as path,
	COALESCE(
		element_at(impression.tag_url.query, 'qs'),
		element_at(impression.tag_url.query, 'pqs')
	) page_qs,
	COALESCE(
		element_at(impression.tag_url.query, 'rd'),
		element_at(impression.tag_url.query, 'rdn')
	) referrer_domain,
	element_at(impression.tag_url.query, 'rpn') referrer_path,
	CONCAT(
		COALESCE(
			element_at(impression.tag_url.query, 'rd'),
			element_at(impression.tag_url.query, 'rdn')
		),
		element_at(impression.tag_url.query, 'rpn')
	) referrer_url,
	COALESCE(
		element_at(impression.tag_url.query, 'imp'),
		element_at(impression.tag_url.query, 'aut__query_imp')
	) impression_id,
	COALESCE(
		impression.agency_id,
		element_at(impression.tag_url.query, 'ag')
	) agency_id,
	COALESCE(
		impression.advertiser_id,
		element_at(impression.tag_url.query, 'adv')
	) advertiser_id,
	COALESCE(
		element_at(impression.tag_url.query, 'ca'),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'utm_campaign=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				element_at(impression.tag_url.query, 'qs'),
				'utm_campaign=([^&]*)',
				1
			)
		)
	) campaign_id,
	COALESCE(
		element_at(impression.tag_url.query, 'ch'),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'utm_medium=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
				'utm_medium=([^&]*)',
				1
			)
		),
		element_at(impression.tag_url.query, 'utm_medium'),
		try(
			regexp_extract(
				element_at(impression.tag_url.query, 'qs'),
				'utm_medium=([^&]*)',
				1
			)
		)
	) channel,
	element_at(impression.tag_url.query, 'cr') as creative_id,
	element_at(impression.tag_url.query, 'pl') as placement_id,
	COALESCE(
		element_at(impression.tag_url.query, 'si'),
		element_at(impression.tag_url.query, 'sid'),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'sid=([^&]*)',
				1
			)
		)
	) site_id,
	element_at(impression.tag_url.query, 'pubid') as publisher_id,
	element_at(impression.tag_url.query, 'ivp') as inventory_partner,
	COALESCE(
		element_at(impression.tag_url.query, 'mp'),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'gclsrc=([^&]*)',
				1
			)
		)
	) media_partner,
	impression.source,
	COALESCE(
		element_at(impression.tag_url.query, 'srch'),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'utm_term=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'searchTerm=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
				'utm_term=([^&]*)',
				1
			)
		)
	) search_terms,
	element_at(impression.tag_url.query, 'ru') as click_redirect_url,
	COALESCE(
		element_at(impression.tag_url.query, 'clk'),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'gclid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'msclkid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'fbclid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'dclid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'wbraid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(impression.tag_url.query [ 'ru' ], '?') [ 2 ],
				'gbraid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
				'gclid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
				'msclkid=([^&]*)',
				1
			)
		),
		try(
			regexp_extract(
				split(element_at(impression.tag_url.query, 'qs'), '?') [ 2 ],
				'fbclid=([^&]*)',
				1
			)
		)
	) click_id,
	element_at(impression.tag_url.query, 'v0') user_defined_0,
	element_at(impression.tag_url.query, 'v1') as user_defined_1,
	element_at(impression.tag_url.query, 'v2') as user_defined_2,
	element_at(impression.tag_url.query, 'v3') as user_defined_3,
	element_at(impression.tag_url.query, 'v4') as user_defined_4,
	element_at(impression.tag_url.query, 'v5') as user_defined_5,
	element_at(impression.tag_url.query, 'v6') as user_defined_6,
	element_at(impression.tag_url.query, 'v7') as user_defined_7,
	element_at(impression.tag_url.query, 'v8') as user_defined_8,
	element_at(impression.tag_url.query, 'v9') as user_defined_9,
	event_group.user_agent,
	event_group.os,
	event_group.os_version,
	event_group.browser,
	event_group.browser_version,
	element_at(impression.others, 'accept_language_header') language_header,
	element_at(impression.others, 'accept_encoding_header') encoding_header,
	impression.http_status_code status_code,
	event_group.device_type,
	' ' device_type,
	' ' device_make,
	event_group.network,
	event_group.network_type,
	event_group.throughput,
	event_group.zip,
	event_group.city,
	event_group.region_code,
	event_group.country_code,
	event_group.dma,
	event_group.msa,
	event_group.latitude,
	event_group.longitude,
	event_group.area_code,
	event_group.fips,
	event_group.timezone,
	(
		CASE
			WHEN (CARDINALITY(impression.tag_url.query) = 0) THEN NULL ELSE impression.tag_url.query
		END
	) AS tag_qs,
	(
		CASE
			WHEN (CARDINALITY(filter(impression.other_entity_keys, o->o.entity_type = 'ck')) = 0) THEN NULL ELSE filter(impression.other_entity_keys, o->o.entity_type = 'ck')
		END
	) AS cookies,
	(
		CASE
			WHEN (CARDINALITY(impression.others) = 0) THEN NULL ELSE impression.others
		END
	) AS others,
	event_date as eventdate
from aqfer.collation_daily_v5_1
	cross join unnest(aqfer.collation_daily_v5_1.event_groups) as e(event_group)
	cross join unnest(aqfer.collation_daily_v5_1.impressions) as i(impression)
	cross join unnest(impression.engagements) as e(engagement)
where event_date = '${prev_date}'
	and event_group.event_id = impression.event_group_id
	and has_impressions = True and cardinality(impression.engagements) > 0
  • Table of contents

Was this article helpful?