import schema
import json
import aq_utils
import os
import traceback
from urllib.parse import urlparse
import sys
sys.path.append(os.environ.get("USER_DIR"))
from py_config_vars import regions_dict, entity_domain_attributes, puu_entity_type, puu_entity_domain
run_id = os.environ.get('BATCH_ID', "test_1")
if not run_id:
raise Exception("BATCH_ID NOT SET")
def extract_event_id(string, key, delimeter):
values = string.split(",")
for value in values:
if key in value:
return value.split(delimeter)[1]
return ""
def process_entity_domain(rec, id_type, id_format):
entity_domain_format = "{owner}_z_{partner}_z_{property}_z_{id_type}_z_{id_format}"
entity_domain_value = rec.get('entity_domain')
owner = entity_domain_attributes.get("owner", "")
partner = entity_domain_attributes.get("partner", "")
type = id_type
format = id_format
if "_z_" in entity_domain_value:
# Split the entity_domain_value into individual variables
splitted_owner, splitted_partner, splitted_property_value, splitted_id_type, splitted_id_format = entity_domain_value.split(
"_z_")
owner = owner or splitted_owner
partner = partner or splitted_partner
entity_domain_value = splitted_property_value
type = id_type or splitted_id_type
format = id_format or splitted_id_format
return entity_domain_format.format(owner=owner, partner=partner, property=entity_domain_value, id_type=type,
id_format=format)
def dat_hnd(rec: any) -> any:
# ! tenant_id field
rec['query']['tid'] = rec['query'].get('tid') or "tid"
rec['runid'] = run_id
# ! Generating additional other_entity_keys
other_entity_items = []
if rec['query'].get('suu'):
suu_object_keys = {
"entity_type": "statid",
"entity_domain": process_entity_domain(rec, "statid", "raw"),
"entity_id": rec['query'].get('suu')
}
other_entity_items.append(suu_object_keys)
if rec['query'].get('puu'):
puu_object_keys = {
"entity_type": puu_entity_type,
"entity_domain": puu_entity_domain,
"entity_id": rec['query'].get('puu')
}
other_entity_items.append(puu_object_keys)
for key in rec.get('cookie', {}).keys():
if key != "none":
other_entity_items.append({"entity_type": "ck",
"entity_domain": "{owner}_z__z_{host}_z_{cookie}_z_".format(owner=entity_domain_attributes.get("owner", ""), host=rec['query'].get('dmn') or rec.get('domain'), cookie=key),
"entity_id": rec['cookie'].get(key)
})
if rec.get("hashed_ip"):
other_entity_items.append({"entity_type": "ip",
"entity_domain": process_entity_domain(rec, "v4", "md5"),
"entity_id": rec.get("hashed_ip")
})
if rec.get("raw_ip"):
other_entity_items.append({"entity_type": "ip",
"entity_domain": process_entity_domain(rec, "v4", "raw"),
"entity_id": rec.get("raw_ip")
})
# Appending new generated entity keys to "other_entity_keys"
rec['other_entity_keys'].extend(other_entity_items)
country_code = rec['country_code']
rec['global_region'] = "Unknown"
for region, countries in regions_dict.items():
if country_code in countries:
rec['global_region'] = region
break
rec['entity_domain'] = process_entity_domain(rec, rec.get('entity_type', ''), "raw")
# REMOVE IF CERTAIN THAT FIELDS WILL ALWAYS BE PRESENT.
rec['query']['pevid'] = rec['query'].get("pevid") or extract_event_id(rec['aqfer_var'], "z_evid", "=")
rec['query']['src'] = rec['query'].get('src') or "aUT"
return [rec]
def schema_handler_v1(sch: schema.Schema) -> (schema.Schema, any):
schema.add_fields(sch, schema.Field(name="global_region", schema=schema.StringSchema()))
schema.add_fields(sch, schema.Field(name="runid", schema=schema.StringSchema()))
return sch, dat_hnd