From caf93420a1fb45168e8c7c29f1243c11b101b316 Mon Sep 17 00:00:00 2001 From: eddycollotonn Date: Fri, 24 May 2024 12:52:34 -0600 Subject: [PATCH 1/6] Can check mediatrace xml output against fields stored in config.yaml. Currently not exporting XML file, but can modify script slightly to do so. --- config/command_config.yaml | 10 ++--- config/config.yaml | 12 ++++-- src/AV_Spex/av_spex_the_file.py | 25 ++++++++++++ src/AV_Spex/checks/mediatrace_check.py | 54 ++++++++++++++++++++++++++ 4 files changed, 92 insertions(+), 9 deletions(-) create mode 100644 src/AV_Spex/checks/mediatrace_check.py diff --git a/config/command_config.yaml b/config/command_config.yaml index dc88736..7ef07cd 100644 --- a/config/command_config.yaml +++ b/config/command_config.yaml @@ -3,8 +3,8 @@ outputs: access_file: 'no' difference_csv: 'no' fixity: - check_fixity: 'yes' - check_stream_fixity: 'yes' + check_fixity: 'no' + check_stream_fixity: 'no' embed_stream_fixity: 'no' output_fixity: 'no' overwrite_stream_fixity: 'no' @@ -23,15 +23,15 @@ tools: ## mediaconch policy file name from any xml file in the config directory mediaconch_policy: JPC_AV_NTSC_MKV_2023-11-21.xml ## 'yes' or 'no' - run_mediaconch: 'yes' + run_mediaconch: 'no' mediainfo: ## 'yes' or 'no' check_mediainfo: 'yes' - run_mediainfo: 'no' + run_mediainfo: 'yes' qctools: ## 'yes' or 'no' check_qctools: 'no' - run_qctools: 'yes' + run_qctools: 'no' qct-parse: ## barsDetection can be true, null, or left empty barsDetection: true diff --git a/config/config.yaml b/config/config.yaml index d4df5b3..797f6b2 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,9 +1,8 @@ mediaconch_policy: JPC_AV_01709_mkv.xml filename_values: - Collection: '2012_79' - MediaType: '2' - ObjectID: \d{3}_\d{1}[a-zA-Z] - DigitalGeneration: PM + Collection: JPC + MediaType: AV + ObjectID: \d{5} FileExtension: mkv mediainfo_values: expected_general: @@ -140,6 +139,11 @@ ffmpeg_values: DESCRIPTION: ORIGINAL MEDIA TYPE: ENCODED_BY: +mediatrace: + TITLE: + DESCRIPTION: + ORIGINAL MEDIA TYPE: + ENCODED_BY: qct-parse: content: silence: diff --git a/src/AV_Spex/av_spex_the_file.py b/src/AV_Spex/av_spex_the_file.py index 83cc8ac..1b8579c 100644 --- a/src/AV_Spex/av_spex_the_file.py +++ b/src/AV_Spex/av_spex_the_file.py @@ -22,6 +22,7 @@ from .checks.fixity_check import check_fixity, output_fixity from .checks.filename_check import check_filenames from .checks.mediainfo_check import parse_mediainfo +from .checks.mediatrace_check import parse_mediatrace from .checks.exiftool_check import parse_exiftool from .checks.ffprobe_check import parse_ffprobe from .checks.embed_fixity import extract_tags, extract_hashes, embed_fixity, validate_embedded_md5 @@ -69,6 +70,22 @@ def run_command(command, input_path, output_type, output_path): logger.debug(f'\nRunning command: {full_command}') subprocess.run(full_command, shell=True, env=env) +def run_mediatrace_command(command, input_path): + ''' + Run a shell command with 4 variables: command name, path to the input file, output type (often '>'), path to the output file + ''' + + # Get the current PATH environment variable + env = os.environ.copy() + env['PATH'] = '/usr/local/bin:' + env.get('PATH', '') + + full_command = f"{command} \"{input_path}\" " + + logger.debug(f'\nRunning mediainfo to generate MediaTrace XML: {full_command}') + output = subprocess.run(full_command, shell=True, capture_output=True) + + return output + # Mediaconch needs its own function, because the command's flags and multiple inputs don't conform to the simple 3 part structure of the other commands def run_mediaconch_command(command, input_path, output_type, output_path): ''' @@ -380,6 +397,14 @@ def main(): # If check_mediainfo is set to 'yes' in command_config.yaml then parse_mediainfo(mediainfo_output_path) # Run parse functions defined in the '_check.py' scripts + + # mediatrace_output_path = os.path.join(destination_directory, f'{video_id}_mediatrace_output.txt') + if command_config.command_dict['tools']['mediainfo']['check_mediainfo'] == 'yes': + # If check_mediainfo is set to 'yes' in command_config.yaml then + mediatrace_output = run_mediatrace_command("mediainfo --Details=1 --Output=XML", video_path) + mediatrace_xml = mediatrace_output.stdout.decode('utf-8') + parse_mediatrace(mediatrace_xml) + # Run parse functions defined in the '_check.py' scripts ffprobe_output_path = os.path.join(destination_directory, f'{video_id}_ffprobe_output.txt') if command_config.command_dict['tools']['ffprobe']['run_ffprobe'] == 'yes': diff --git a/src/AV_Spex/checks/mediatrace_check.py b/src/AV_Spex/checks/mediatrace_check.py new file mode 100644 index 0000000..94fbf32 --- /dev/null +++ b/src/AV_Spex/checks/mediatrace_check.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import os + +import subprocess +import xml.etree.ElementTree as ET + +from ..utils.log_setup import logger +from ..utils.find_config import config_path + +def parse_mediatrace(xml_content): + expected_mediatrace = config_path.config_dict['mediatrace'] + expected_mt_keys = expected_mediatrace.keys() + + # Parse the XML content + root = ET.fromstring(xml_content) + + # Define the namespace + ns = {'mt': 'https://mediaarea.net/mediatrace'} + + mediatrace_output = {} + for mt_key in expected_mt_keys: + + # Find all 'block' elements with the name attribute matching 'SimpleTag' + simple_tags = root.findall(".//mt:block[@name='SimpleTag']", ns) + + for simple_tag in simple_tags: + # Find the 'TagName' block with the specific string_we_have + tag_name_block = simple_tag.find(f".//mt:block[@name='TagName']/mt:data[.='{mt_key}']", ns) + if tag_name_block is not None: + # Find the corresponding 'TagString' block + tag_string_block = simple_tag.find(f".//mt:block[@name='TagString']/mt:data", ns) + if tag_string_block is not None: + mediatrace_output[mt_key] = tag_string_block.text + #found = True + break + #if not found: + # mediatrace_output[mt_key] = None + + mediatrace_differences = [] + for expected_key, expected_value in expected_mediatrace.items(): + # defines variables "expected_key" and "expected_value" to the dictionary "expected_mediatrace" + if expected_key not in mediatrace_output: + mediatrace_differences.append(f"MediaTrace metadata field {expected_key} does not exist") + elif len(mediatrace_output[expected_key]) == 0: + # count the values in the dictionary "mediatrace_output" with 'len', if the values are zero, then: + mediatrace_differences.append(f"MediaTrace: {expected_key} is empty") + # append this string to the list "mediatrace_differences" + + if mediatrace_differences: + logger.critical("\nSome specified MediaTrace fields or values are missing or don't match:") + for diff in mediatrace_differences: + logger.critical(f"{diff}") From 79a10bd33c4f409db83ba925f5b74e079b88b9bc Mon Sep 17 00:00:00 2001 From: eddycollotonn Date: Fri, 24 May 2024 15:53:11 -0600 Subject: [PATCH 2/6] now exports and checks mediatrace.xml file --- src/AV_Spex/av_spex_the_file.py | 7 +++---- src/AV_Spex/checks/mediatrace_check.py | 7 ++++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/AV_Spex/av_spex_the_file.py b/src/AV_Spex/av_spex_the_file.py index 1b8579c..8cfc23f 100644 --- a/src/AV_Spex/av_spex_the_file.py +++ b/src/AV_Spex/av_spex_the_file.py @@ -398,12 +398,11 @@ def main(): parse_mediainfo(mediainfo_output_path) # Run parse functions defined in the '_check.py' scripts - # mediatrace_output_path = os.path.join(destination_directory, f'{video_id}_mediatrace_output.txt') + mediatrace_output_path = os.path.join(destination_directory, f'{video_id}_mediatrace_output.xml') if command_config.command_dict['tools']['mediainfo']['check_mediainfo'] == 'yes': # If check_mediainfo is set to 'yes' in command_config.yaml then - mediatrace_output = run_mediatrace_command("mediainfo --Details=1 --Output=XML", video_path) - mediatrace_xml = mediatrace_output.stdout.decode('utf-8') - parse_mediatrace(mediatrace_xml) + run_command("mediainfo --Details=1 --Output=XML", video_path, '>', mediatrace_output_path) + parse_mediatrace(mediatrace_output_path) # Run parse functions defined in the '_check.py' scripts ffprobe_output_path = os.path.join(destination_directory, f'{video_id}_ffprobe_output.txt') diff --git a/src/AV_Spex/checks/mediatrace_check.py b/src/AV_Spex/checks/mediatrace_check.py index 94fbf32..04cd7bf 100644 --- a/src/AV_Spex/checks/mediatrace_check.py +++ b/src/AV_Spex/checks/mediatrace_check.py @@ -9,12 +9,13 @@ from ..utils.log_setup import logger from ..utils.find_config import config_path -def parse_mediatrace(xml_content): +def parse_mediatrace(xml_file): expected_mediatrace = config_path.config_dict['mediatrace'] expected_mt_keys = expected_mediatrace.keys() - # Parse the XML content - root = ET.fromstring(xml_content) + # Parse the XML file + tree = ET.parse(xml_file) + root = tree.getroot() # Define the namespace ns = {'mt': 'https://mediaarea.net/mediatrace'} From a9918795a8caddea8c7a9bd11a245738f1af2a3f Mon Sep 17 00:00:00 2001 From: eddycollotonn Date: Fri, 21 Jun 2024 14:00:30 -0600 Subject: [PATCH 3/6] small code cleanup for contentFilter detection to have the same conditonal structure as other qct-parse options. --- src/AV_Spex/checks/qct_parse.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/AV_Spex/checks/qct_parse.py b/src/AV_Spex/checks/qct_parse.py index 684b28b..985244c 100644 --- a/src/AV_Spex/checks/qct_parse.py +++ b/src/AV_Spex/checks/qct_parse.py @@ -631,11 +631,12 @@ def run_qctparse(video_path, qctools_output_path, qctools_check_output): # set the path for the thumbnail export metadata_dir = os.path.dirname(qctools_output_path) thumbPath = os.path.join(metadata_dir, "ThumbExports") - if not os.path.exists(thumbPath): - os.makedirs(thumbPath) - else: - thumbPath = uniquify(thumbPath) - os.makedirs(thumbPath) + if qct_parse['thumbExport']: + if not os.path.exists(thumbPath): + os.makedirs(thumbPath) + else: + thumbPath = uniquify(thumbPath) + os.makedirs(thumbPath) profile = {} # init a dictionary where we'll store reference values from config.yaml file @@ -654,7 +655,7 @@ def run_qctparse(video_path, qctools_output_path, qctools_check_output): break ######## Iterate Through the XML for content detection ######## - if qct_parse['contentFilter'] != None: + if qct_parse['contentFilter']: logger.debug(f"Checking for segments of {os.path.basename(video_path)} that match the content filter {qct_parse['contentFilter']}\n") duration_str = get_duration(video_path) contentFilter_name = qct_parse['contentFilter'] @@ -696,7 +697,7 @@ def run_qctparse(video_path, qctools_output_path, qctools_check_output): if durationStart == "" and durationEnd == "": logger.error("No color bars detected\n") if barsStartString and barsEndString: - print_bars_durations(qctools_check_output,thumbPath,barsEndString) + print_bars_durations(qctools_check_output,barsStartString,barsEndString) if qct_parse['thumbExport']: barsStampString = dts2ts(durationStart) printThumb(video_path,"color_bars",startObj,thumbPath,"first_frame",barsStampString) From fcf932a451a60093e87c8a73f5aafa012264cc02 Mon Sep 17 00:00:00 2001 From: eddycollotonn Date: Fri, 21 Jun 2024 15:33:22 -0600 Subject: [PATCH 4/6] small change to comments explaining one line --- src/AV_Spex/checks/qct_parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/AV_Spex/checks/qct_parse.py b/src/AV_Spex/checks/qct_parse.py index 985244c..4d69fcc 100644 --- a/src/AV_Spex/checks/qct_parse.py +++ b/src/AV_Spex/checks/qct_parse.py @@ -711,7 +711,7 @@ def run_qctparse(video_path, qctools_output_path, qctools_check_output): evalBars(startObj,pkt,durationStart,durationEnd,framesList) # Define the keys for which you want to calculate the average keys_to_average = ['YMAX', 'YMIN', 'UMIN', 'UMAX', 'VMIN', 'VMAX', 'SATMIN', 'SATMAX'] - # Initialize a dictionary to store the average values + # Create a dictionary of the median values of each of the keys from the frameDict created in the evalBars function average_dict = {key: median([float(frameDict[key]) for frameDict in framesList if key in frameDict]) for key in keys_to_average} if average_dict is None: logger.critical(f"\nSomething went wrong - Cannot run evaluate color bars\n") From f3cb1bf56b2eec6ad141bb35b51f530ba810809d Mon Sep 17 00:00:00 2001 From: eddycollotonn Date: Fri, 21 Jun 2024 18:54:10 -0600 Subject: [PATCH 5/6] removed custom field check from mediainfo, small fixes to mediatrace check, and added mediatrace fields to config.yaml --- config/config.yaml | 16 +++++++++------- src/AV_Spex/checks/mediainfo_check.py | 20 +------------------- src/AV_Spex/checks/mediatrace_check.py | 4 ++++ 3 files changed, 14 insertions(+), 26 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 797f6b2..fa0c423 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -41,12 +41,6 @@ mediainfo_values: Sampling rate: 48.0 kHz Bit depth: 24 bits Compression mode: Lossless - expected_custom_fields: - Title: - Encoded by: - Description: - Encoding settings: - ORIGINAL MEDIA TYPE: exiftool_values: File Type: MKV File Type Extension: mkv @@ -140,10 +134,18 @@ ffmpeg_values: ORIGINAL MEDIA TYPE: ENCODED_BY: mediatrace: + COLLECTION: TITLE: + CATALOG_NUMBER: DESCRIPTION: - ORIGINAL MEDIA TYPE: + DATE_DIGITIZED: + ENCODING_SETTINGS: ENCODED_BY: + ORIGINAL_MEDIA_TYPE: + DATE_TAGGED: + TERMS_OF_USE: + _TECHNICAL_NOTES: + _ORIGINAL_FPS: qct-parse: content: silence: diff --git a/src/AV_Spex/checks/mediainfo_check.py b/src/AV_Spex/checks/mediainfo_check.py index 173ff40..9fc2462 100644 --- a/src/AV_Spex/checks/mediainfo_check.py +++ b/src/AV_Spex/checks/mediainfo_check.py @@ -13,7 +13,6 @@ def parse_mediainfo(file_path): expected_general = config_path.config_dict['mediainfo_values']['expected_general'] expected_video = config_path.config_dict['mediainfo_values']['expected_video'] expected_audio = config_path.config_dict['mediainfo_values']['expected_audio'] - expected_custom_fields = config_path.config_dict['mediainfo_values']['expected_custom_fields'] section_data = {} # creates empty dictionary "section_data" @@ -114,32 +113,15 @@ def parse_mediainfo(file_path): if actual_value not in expected_value: mediainfo_differences.append(f"Metadata field in Audio: {expected_key} has a value of {actual_value}\nThe expected value is: {expected_value}") # append this string to the list "mediainfo_differences" - - custom_mediainfo_differences = [] - for expected_key, expected_value in expected_custom_fields.items(): - # defines variables "expected_key" and "expected_value" to the dictionary "expected_audio" - if expected_key not in (section_data["General"]): - custom_mediainfo_differences.append(f"metadata field in General: {expected_key} does not exist") - elif len(section_data["General"][expected_key]) == 0: - # count the values in the nested dictionary "General" with 'len', if the values are zero, then: - custom_mediainfo_differences.append(f"General: {expected_key} is empty") - # append this string to the list "mediainfo_differences" - if not mediainfo_differences and not custom_mediainfo_differences: + if not mediainfo_differences: # if the list "mediainfo_differences" is empty, then logger.info("\nAll specified fields and values found in the MediaInfo output.") - elif not mediainfo_differences: - logger.info("\nAll specified metadata fields and values found in the MediaInfo output, but some custom embedded fields are missing or don't match.") else: # if the list "mediainfo_differences" is not empty, then logger.critical(f"\nSome specified MediaInfo fields or values are missing or don't match:") for diff in mediainfo_differences: logger.critical(f'{diff}') - - if custom_mediainfo_differences: - logger.critical("\nThe specified MediaInfo fields or values for embedded metadata are below:") - for custom_diff in custom_mediainfo_differences: - logger.critical(f'{custom_diff}') # Only execute if this file is run directly, not imported) if __name__ == "__main__": diff --git a/src/AV_Spex/checks/mediatrace_check.py b/src/AV_Spex/checks/mediatrace_check.py index 04cd7bf..5a68cfd 100644 --- a/src/AV_Spex/checks/mediatrace_check.py +++ b/src/AV_Spex/checks/mediatrace_check.py @@ -49,6 +49,10 @@ def parse_mediatrace(xml_file): mediatrace_differences.append(f"MediaTrace: {expected_key} is empty") # append this string to the list "mediatrace_differences" + if not mediatrace_differences: + # if the list "mediatrace_differences" is empty, then + logger.info("\nAll specified mediatrace fields and values found in output.") + if mediatrace_differences: logger.critical("\nSome specified MediaTrace fields or values are missing or don't match:") for diff in mediatrace_differences: From 81115974ab3e2a9523b154ba0904f00a5b43a174 Mon Sep 17 00:00:00 2001 From: eddycollotonn Date: Mon, 24 Jun 2024 09:59:42 -0600 Subject: [PATCH 6/6] additional logging message to clarifythat 'mediainfo --Details=1 --Output=XML' is the mediatrace xml command --- src/AV_Spex/av_spex_the_file.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/AV_Spex/av_spex_the_file.py b/src/AV_Spex/av_spex_the_file.py index 8cfc23f..05ba1e3 100644 --- a/src/AV_Spex/av_spex_the_file.py +++ b/src/AV_Spex/av_spex_the_file.py @@ -400,6 +400,7 @@ def main(): mediatrace_output_path = os.path.join(destination_directory, f'{video_id}_mediatrace_output.xml') if command_config.command_dict['tools']['mediainfo']['check_mediainfo'] == 'yes': + logger.info(f"\nCreating MediaTrace XML file to check custom MKV Tag metadata fields:") # If check_mediainfo is set to 'yes' in command_config.yaml then run_command("mediainfo --Details=1 --Output=XML", video_path, '>', mediatrace_output_path) parse_mediatrace(mediatrace_output_path)