Skip to content

Commit

Permalink
COM-12461: Create an AAC test suite generator
Browse files Browse the repository at this point in the history
- created new AAC test suite generator gen_aac.py
- tested to work with ISO_IEC_13818-4_2004 ADTS suite
  • Loading branch information
mdimopoulos committed Nov 14, 2024
1 parent e997402 commit 3a49e85
Show file tree
Hide file tree
Showing 4 changed files with 692 additions and 363 deletions.
1 change: 1 addition & 0 deletions fluster/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,4 @@ class OutputFormat(Enum):
GRAY12LE = "gray12le"
GRAY16LE = "gray16le"
UNKNOWN = "Unknown"
FLTP = "fltp"
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ packages = ["fluster", "fluster.decoders"]

[tool.setuptools.data-files]
"share/fluster/test_suites/aac" = [
"test_suites/aac/ISO_IEC_13818-4_2004.json",
"test_suites/aac/MPEG2_AAC-ADTS.json",
"test_suites/aac/ISO_IEC_14496-26_2010.json"
]
"share/fluster/test_suites/av1" = [
Expand Down
328 changes: 328 additions & 0 deletions scripts/gen_aac.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,328 @@
#!/usr/bin/env python3

# Fluster - testing framework for decoders conformance
# Copyright (C) 2024, Fluendo, S.A.
# Author: Michalis Dimopoulod <[email protected]>, Fluendo, S.A.
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public License
# as published by the Free Software Foundation, either version 3
# of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library. If not, see <https://www.gnu.org/licenses/>.

import argparse
import re
from html.parser import HTMLParser
from multiprocessing import Pool
import os
import sys
import urllib.request
import multiprocessing

# pylint: disable=wrong-import-position
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from fluster import utils
from fluster.codec import Codec, OutputFormat
from fluster.test_suite import TestSuite, TestVector

# pylint: enable=wrong-import-position

BASE_URL = "https://standards.iso.org/"
URL_MPEG2 = BASE_URL + "ittf/PubliclyAvailableStandards/ISO_IEC_13818-4_2004_Conformance_Testing/AAC/"
URL_MPEG2_ADTS = (
URL_MPEG2
+ "compressedAdts"
)
URL_MPEG2_WAV_REFS = (
URL_MPEG2
+ "referencesWav"
)
URL_MPEG2_WAV_REFS_MD5 = (
URL_MPEG2
+ "referencesWav/_checksum"
)

BITSTREAM_EXTS = [".adts"]
MD5_EXTS = [".wav.md5sum"]
MD5_EXCLUDES = []
RAW_EXTS = [".wav"]


class HREFParser(HTMLParser):
"""Custom parser to find href links"""

def __init__(self):
self.links = []
super().__init__()

def error(self, message):
print(message)

def handle_starttag(self, tag, attrs):
# Only parse the 'anchor' tag.
if tag == "a":
# Check the list of defined attributes.
for name, value in attrs:
# If href is defined, print it.
if name == "href":
base_url = BASE_URL if BASE_URL[-1] != "/" else BASE_URL[0:-1]
self.links.append(base_url + value)


class AACGenerator:
"""Generates a test suite from the conformance bitstreams"""

def __init__(
self,
name: str,
suite_name: str,
codec: Codec,
description: str,
url_test_vectors: str,
url_reference_vectors: str,
url_reference_vectors_checksums: str,
use_ffprobe: bool = False,
):
self.name = name
self.suite_name = suite_name
self.codec = codec
self.description = description
self.url_test_vectors = url_test_vectors
self.url_reference_vectors = url_reference_vectors
self.url_reference_vectors_checksums = url_reference_vectors_checksums
self.use_ffprobe = use_ffprobe

def _download_raw_output_references_and_checksums(self, jobs, test_suite, raw_bitstream_links,
raw_bitstream_md5_links):
"""Downlodas raw output reference bitstreams and their checksums"""

with Pool(jobs) as pool:

def _callback_error(err):
print(f"\nError downloading -> {err}\n")
pool.terminate()

downloads = []

print(f"\tDownloading output reference files for test suite {self.suite_name}")
for link in raw_bitstream_links:
downloads.append(
pool.apply_async(
utils.download,
args=(
link,
os.path.join(
test_suite.resources_dir,
test_suite.name,
os.path.splitext(os.path.basename(link))[0],
),
),
error_callback=_callback_error,
)
)

print(f"\tDownloading output reference checksum files for test suite {self.suite_name}")
for link in raw_bitstream_md5_links:
downloads.append(
pool.apply_async(
utils.download,
args=(
link,
os.path.join(
test_suite.resources_dir,
test_suite.name,
os.path.splitext(os.path.splitext(os.path.basename(link))[0])[0],
),
),
error_callback=_callback_error,
)
)

pool.close()
pool.join()

for job in downloads:
if not job.successful():
sys.exit("Some download failed")

def generate(self, download, jobs):
"""Generates the test suite and saves it to a file"""
output_filepath = os.path.join(self.suite_name + ".json")
test_suite = TestSuite(
output_filepath,
"resources",
self.suite_name,
self.codec,
self.description,
dict(),
)

hparser = HREFParser()

with urllib.request.urlopen(self.url_test_vectors) as resp:
data = str(resp.read())
hparser.feed(data)
compressed_bitstream_links = [url for url in hparser.links if url.endswith(tuple(BITSTREAM_EXTS))]
compressed_bitstream_names = [os.path.splitext(os.path.basename(x))[0] for x in compressed_bitstream_links]

with urllib.request.urlopen(self.url_reference_vectors) as resp:
data = str(resp.read())
hparser.feed(data)
raw_bitstream_links = [url for url in hparser.links if url.endswith(tuple(RAW_EXTS))]
raw_bitstream_names = [os.path.splitext(os.path.basename(x))[0] for x in raw_bitstream_links]

if not set(compressed_bitstream_names).issubset(raw_bitstream_names):
raise Exception("Following test vectors are missing reference files {}"
.format([x for x in set(compressed_bitstream_names).difference(raw_bitstream_names)]))
else:
raw_bitstream_names = compressed_bitstream_names

# Match and store entries of raw_bitstream_links that contain entries of raw_bitstream_names as substrings
raw_bitstream_links = [link for link in raw_bitstream_links
if any(name in link for name in raw_bitstream_names)]

with urllib.request.urlopen(self.url_reference_vectors_checksums) as resp:
data = str(resp.read())
hparser.feed(data)
raw_bitstream_md5_links = [url for url in hparser.links if url.endswith(tuple(MD5_EXTS))]
raw_bitstream_md5_names = [os.path.splitext(os.path.splitext(os.path.basename(x))[0])[0]
for x in raw_bitstream_md5_links]

if not set(compressed_bitstream_names).issubset(raw_bitstream_md5_names):
raise Exception("Following test vectors are missing reference checksum files {}"
.format([x for x in set(compressed_bitstream_names).difference(raw_bitstream_md5_names)]))
else:
raw_bitstream_md5_names = compressed_bitstream_names

# Match and store entries of raw_bitstream_md5_links that contain entries of raw_bitstream_md5_names
# as substrings
raw_bitstream_md5_links = [link for link in raw_bitstream_md5_links
if any(name in link for name in raw_bitstream_md5_names)]

for source_url in compressed_bitstream_links:
input_filename = os.path.basename(source_url)
test_vector_name = os.path.splitext(input_filename)[0]
test_vector = TestVector(
test_vector_name, source_url, "__skip__", input_filename, OutputFormat.UNKNOWN, ""
)
test_suite.test_vectors[test_vector_name] = test_vector

# Download test suite input files
print(f"Download list of compressed bitstreams from {self.url_test_vectors}")
if download:
test_suite.download(
jobs=jobs,
out_dir=test_suite.resources_dir,
verify=False,
extract_all=True,
keep_file=True,
)

# Download test suite output reference and md5 checksum files
self._download_raw_output_references_and_checksums(jobs, test_suite, raw_bitstream_links,
raw_bitstream_md5_links)

for test_vector in test_suite.test_vectors.values():
dest_dir = os.path.join(test_suite.resources_dir, test_suite.name, test_vector.name)
dest_path = os.path.join(dest_dir, os.path.basename(test_vector.source))
absolute_input_path = os.path.join(os.getcwd(), dest_dir, test_vector.input_file)

# Check that bitstream file is located inside the corresponding test vector folder
if not os.path.isfile(absolute_input_path):
raise Exception(f"Bitstream file {test_vector.input_file} not found in {dest_dir}")

# Calculate source file checksum
test_vector.source_checksum = utils.file_checksum(dest_path)

# Extract sample format of input file using ffprobe
if self.use_ffprobe:
ffprobe = utils.normalize_binary_cmd("ffprobe")
command = [
ffprobe,
"-v",
"error",
"-select_streams",
"a:0",
"-show_entries",
"stream=sample_fmt",
"-of",
"default=nokey=1:noprint_wrappers=1",
absolute_input_path,
]

sample_format = utils.run_command_with_output(command).splitlines()[0]
try:
test_vector.output_format = OutputFormat[sample_format.upper()]
except KeyError as key_err:
raise key_err

# Read or calculate checksum of expected raw output
self._fill_checksum_aac(test_vector, dest_dir)

test_suite.to_json_file(output_filepath)
print("Generate new test suite: " + test_suite.name + ".json")

@staticmethod
def _fill_checksum_aac(test_vector, dest_dir):
checksum_file = utils.find_by_ext(dest_dir, MD5_EXTS)
if checksum_file is None:
raise Exception("MD5 not found")
with open(checksum_file, "r") as checksum_file:
regex = re.compile(rf"([a-fA-F0-9]{{32,}}).*(?:\.(wav))?")
lines = checksum_file.readlines()
# Filter out empty lines
filtered_lines = [line.strip() for line in lines if line.strip()]
# Prefer lines matching the regex pattern
match = next(
(regex.match(line) for line in filtered_lines if regex.match(line)),
None,
)
if match:
test_vector.result = match.group(1).lower()
# Assert that we have extracted a valid MD5 from the file
assert (
len(test_vector.result) == 32
and re.search(r"^[a-fA-F0-9]{32}$", test_vector.result) is not None
), f"{test_vector.result} is not a valid MD5 hash"

raw_file = utils.find_by_ext(dest_dir, RAW_EXTS)
if raw_file is None or len(raw_file) == 0:
raise Exception(f"RAW file not found in {dest_dir}")
test_vector.result = utils.file_checksum(raw_file)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--skip-download",
help="skip extracting tarball",
action="store_true",
default=False,
)
parser.add_argument(
"-j",
"--jobs",
help="number of parallel jobs to use. 2x logical cores by default",
type=int,
default=2 * multiprocessing.cpu_count(),
)
args = parser.parse_args()

generator = AACGenerator(
"MPEG2_AAC-ADTS",
"MPEG2_AAC-ADTS",
Codec.AAC,
"ISO IEC 13818-4 MPEG2 AAC ADTS test suite",
URL_MPEG2_ADTS,
URL_MPEG2_WAV_REFS,
URL_MPEG2_WAV_REFS_MD5,
True,
)
generator.generate(not args.skip_download, args.jobs)
Loading

0 comments on commit 3a49e85

Please sign in to comment.