import schema
import s3_utils
global_regions_lookup = "<>" #! e.g. 's3://.default@com.aqfer..development//region_countries.txt'
regions_dict = {}
def setup():
regions = s3_utils.get_s3_file_content(global_regions_lookup, has_header=True, delimiter=':', text_qualifier='"')
for r in regions:
regions_dict[r[0]] = r[1]
def dat_hnd(rec: any) -> any:
# append timestamp to event_id - this way we are only converting ts 1x
rec['event_id'] += str(rec['event_timestamp'])
## determine country_code and global_region by forensicflag binary value
if (int(rec['others']['forensicflag']) >> 12) and 1: # If bit 12 "EU Bit" is set from forensicflag binary value
rec['country_code'] = "EU"
rec['global_region'] = "EEA"
if (int(rec['others']['forensicflag']) >> 25) and 1: # If bit 25 "US Bit" is set from forensicflag binary value
rec['country_code'] = "US"
rec['global_region'] = "NA"
return [rec]
def schema_handler_v1(sch: schema.Schema) -> (schema.Schema, any):
setup()
return sch, dat_hnd