Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

COM-12460: add ISO_IEC_13818-4 MPEG-2 AAC compressedAdif test suite #206

Merged
merged 5 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions fluster/decoders/iso_mpeg2_aac.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ def decode(
# pylint: disable=unused-argument
# Addition of .pcm as extension is a must. If it is something else, e.g. ".out" the decoder will output a
# ".wav", which is undesirable.
output_filepath += ".pcm"
output_filepath += ".wav"
run_command(
[self.binary, input_filepath, output_filepath],
[self.binary, "-w", input_filepath, output_filepath],
timeout=timeout,
verbose=verbose,
)
Expand Down
29 changes: 22 additions & 7 deletions fluster/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,39 @@

import hashlib
import os
import random
import shutil
import subprocess
import sys
import time
import urllib.request
import zipfile
import platform
from typing import List, Optional
from threading import Lock


TARBALL_EXTS = ("tar.gz", "tgz", "tar.bz2", "tbz2", "tar.xz")


def download(url: str, dest_dir: str) -> None:
"""Downloads a file to a directory"""
with urllib.request.urlopen(url) as response:
dest_path = os.path.join(dest_dir, url.split("/")[-1])
with open(dest_path, "wb") as dest:
shutil.copyfileobj(response, dest)
download_lock = Lock()


def download(url: str, dest_dir: str, max_retries: int = 5) -> None:
"""Downloads a file to a directory with a mutex lock to avoid conflicts and retries with exponential backoff."""
for attempt in range(max_retries):
try:
with download_lock:
with urllib.request.urlopen(url) as response:
dest_path = os.path.join(dest_dir, url.split("/")[-1])
with open(dest_path, "wb") as dest:
shutil.copyfileobj(response, dest)
break
except urllib.error.URLError:
if attempt < max_retries - 1:
wait_time = random.uniform(1, 2**attempt)
time.sleep(wait_time)
else:
print(f"Failed to download {url} after {max_retries} attempts.")


def file_checksum(path: str) -> str:
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ packages = ["fluster", "fluster.decoders"]
[tool.setuptools.data-files]
"share/fluster/test_suites/aac" = [
"test_suites/aac/MPEG2_AAC-ADTS.json",
"test_suites/aac/MPEG2_AAC-ADIF.json",
"test_suites/aac/MPEG4_AAC-ADIF.json",
"test_suites/aac/ISO_IEC_14496-26_2010.json"
]
Expand Down
144 changes: 97 additions & 47 deletions scripts/gen_aac.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@

URL_MPEG2 = BASE_URL + "ittf/PubliclyAvailableStandards/ISO_IEC_13818-4_2004_Conformance_Testing/AAC/"
URL_MPEG2_ADTS = URL_MPEG2 + "compressedAdts"
URL_MPEG2_ADIF = URL_MPEG2 + "compressedAdif"
URL_MPEG2_WAV_REFS = URL_MPEG2 + "referencesWav"
URL_MPEG2_WAV_REFS_MD5 = URL_MPEG2 + "referencesWav/_checksum"

Expand Down Expand Up @@ -98,46 +99,48 @@ def __init__(

def _download_raw_output_references_and_checksums(self, jobs, test_suite, raw_bitstream_links,
raw_bitstream_md5_links):
"""Downlodas raw output reference bitstreams and their checksums"""

"""Downloads raw output reference bitstreams and their checksums"""
with Pool(jobs) as pool:

def _callback_error(err):
print(f"\nError downloading -> {err}\n")
pool.terminate()

downloads = []

print(f"\tDownloading output reference files for test suite {self.suite_name}")

for link in raw_bitstream_links:
file_name = os.path.basename(link)
base_name = file_name.split('.')[0]
main_prefix = "_".join(base_name.split('_')[:2])

directory = os.path.join(test_suite.resources_dir, test_suite.name, main_prefix)
if not os.path.exists(directory):
os.makedirs(directory)

downloads.append(
pool.apply_async(
utils.download,
args=(
link,
os.path.join(
test_suite.resources_dir,
test_suite.name,
os.path.splitext(os.path.basename(link))[0],
),
),
args=(link, directory),
error_callback=_callback_error,
)
)

print(f"\tDownloading output reference checksum files for test suite {self.suite_name}")

for link in raw_bitstream_md5_links:
file_name = os.path.basename(link)
base_name = file_name.split('.')[0]
main_prefix = "_".join(base_name.split('_')[:2])

directory = os.path.join(test_suite.resources_dir, test_suite.name, main_prefix)
if not os.path.exists(directory):
os.makedirs(directory)

downloads.append(
pool.apply_async(
utils.download,
args=(
link,
os.path.join(
test_suite.resources_dir,
test_suite.name,
os.path.splitext(os.path.splitext(os.path.basename(link))[0])[0],
),
),
args=(link, directory),
error_callback=_callback_error,
)
)
Expand All @@ -146,8 +149,12 @@ def _callback_error(err):
pool.join()

for job in downloads:
if not job.successful():
sys.exit("Some download failed")
try:
job.get()
if not job.successful():
raise ValueError("Download task was not successful")
except Exception as e:
sys.exit(f"Some download failed: {e}")

def generate(self, download, jobs):
"""Generates the test suite and saves it to a file"""
Expand All @@ -173,36 +180,47 @@ def generate(self, download, jobs):
data = str(resp.read())
hparser.feed(data)
raw_bitstream_links = [url for url in hparser.links if url.endswith(tuple(RAW_EXTS))]
raw_bitstream_names = [os.path.splitext(os.path.basename(x))[0] for x in raw_bitstream_links]

if not set(compressed_bitstream_names).issubset(raw_bitstream_names):
raise Exception("Following test vectors are missing reference files {}"
.format([x for x in set(compressed_bitstream_names).difference(raw_bitstream_names)]))
else:
raw_bitstream_names = compressed_bitstream_names
raw_bitstream_names = [
os.path.splitext(os.path.basename(x))[0].split('_f')[0] for x in raw_bitstream_links
]

missing_files = [x for x in set(compressed_bitstream_names).difference(raw_bitstream_names)]
if missing_files:
print(f"Missing reference files: {missing_files}")
for missing_file in missing_files:
print(f"Skipping test vector {missing_file}, as the reference file is missing.")

raw_bitstream_names = [name for name in compressed_bitstream_names if name not in missing_files]

# Match and store entries of raw_bitstream_links that contain entries of raw_bitstream_names as substrings
raw_bitstream_links = [link for link in raw_bitstream_links
if any(name in link for name in raw_bitstream_names)]
raw_bitstream_links = [
link for link in raw_bitstream_links if any(name in link for name in raw_bitstream_names)
]

with urllib.request.urlopen(self.url_reference_vectors_checksums) as resp:
data = str(resp.read())
hparser.feed(data)
raw_bitstream_md5_links = [url for url in hparser.links if url.endswith(tuple(MD5_EXTS))]
raw_bitstream_md5_names = [os.path.splitext(os.path.splitext(os.path.basename(x))[0])[0]
for x in raw_bitstream_md5_links]

if not set(compressed_bitstream_names).issubset(raw_bitstream_md5_names):
raise Exception("Following test vectors are missing reference checksum files {}"
.format([x for x in set(compressed_bitstream_names).difference(raw_bitstream_md5_names)]))
else:
raw_bitstream_md5_names = compressed_bitstream_names
raw_bitstream_md5_names = [
os.path.splitext(os.path.splitext(os.path.basename(x))[0].split('_f')[0])[0] for x in
raw_bitstream_md5_links
]

missing_checksum_files = [x for x in set(compressed_bitstream_names).difference(raw_bitstream_md5_names)]
if missing_checksum_files:
print(f"Missing reference checksum files: {missing_checksum_files}")
for missing_checksum in missing_checksum_files:
print(f"Skipping checksum for {missing_checksum}, as the reference file is missing.")

raw_bitstream_md5_names = [name for name in compressed_bitstream_names if name not in missing_checksum_files]

# Match and store entries of raw_bitstream_md5_links that contain entries of raw_bitstream_md5_names
# as substrings
raw_bitstream_md5_links = [link for link in raw_bitstream_md5_links
if any(str(os.path.basename(link)).startswith(name)
for name in raw_bitstream_md5_names)]
raw_bitstream_md5_links = [
link for link in raw_bitstream_md5_links if any(name in link for name in raw_bitstream_md5_names)
]

for source_url in compressed_bitstream_links:
input_filename = os.path.basename(source_url)
Expand All @@ -212,7 +230,6 @@ def generate(self, download, jobs):
)
test_suite.test_vectors[test_vector_name] = test_vector

# Download test suite input files
print(f"Download list of compressed bitstreams from {self.url_test_vectors}")
if download:
test_suite.download(
Expand Down Expand Up @@ -262,16 +279,40 @@ def generate(self, download, jobs):
raise key_err

# Read or calculate checksum of expected raw output
self._fill_checksum_aac(test_vector, dest_dir)
if test_vector.name not in missing_checksum_files:
self._fill_checksum_aac(test_vector, dest_dir)

test_suite.to_json_file(output_filepath)
print("Generate new test suite: " + test_suite.name + ".json")

@staticmethod
def _fill_checksum_aac(test_vector, dest_dir):
base_name = test_vector.name
raw_file = None
ext = None

for ext in RAW_EXTS:
exact_file = os.path.join(dest_dir, base_name + ext)
if os.path.exists(exact_file):
raw_file = exact_file
break

if not raw_file:
for ext in RAW_EXTS:
fallback_file = os.path.join(dest_dir, base_name + '_f00' + ext)
if os.path.exists(fallback_file):
raw_file = fallback_file
break

if not raw_file:
raise Exception(
f"Neither {base_name + ext} nor {base_name + '_f00' + ext} found with extensions {RAW_EXTS} in {dest_dir}"
)

checksum_file = utils.find_by_ext(dest_dir, MD5_EXTS)
if checksum_file is None:
raise Exception("MD5 not found")

with open(checksum_file, "r") as checksum_file:
regex = re.compile(rf"([a-fA-F0-9]{{32,}}).*(?:\.(wav))?")
lines = checksum_file.readlines()
Expand All @@ -284,15 +325,12 @@ def _fill_checksum_aac(test_vector, dest_dir):
)
if match:
test_vector.result = match.group(1).lower()
# Assert that we have extracted a valid MD5 from the file
assert (
# Assert that we have extracted a valid MD5 from the file
assert (
len(test_vector.result) == 32
and re.search(r"^[a-fA-F0-9]{32}$", test_vector.result) is not None
), f"{test_vector.result} is not a valid MD5 hash"
), f"{test_vector.result} is not a valid MD5 hash"

raw_file = utils.find_by_ext(dest_dir, RAW_EXTS)
if raw_file is None or len(raw_file) == 0:
raise Exception(f"RAW file not found in {dest_dir}")
test_vector.result = utils.file_checksum(raw_file)


Expand Down Expand Up @@ -325,6 +363,18 @@ def _fill_checksum_aac(test_vector, dest_dir):
)
generator.generate(not args.skip_download, args.jobs)

generator = AACGenerator(
"MPEG2_AAC-ADIF",
"MPEG2_AAC-ADIF",
Codec.AAC,
"ISO IEC 13818-4 MPEG2 AAC ADIF test suite",
URL_MPEG2_ADIF,
URL_MPEG2_WAV_REFS,
URL_MPEG2_WAV_REFS_MD5,
False,
)
generator.generate(not args.skip_download, args.jobs)

generator = AACGenerator(
"MPEG4_AAC-ADIF",
"MPEG4_AAC-ADIF",
Expand Down
Loading