Skip to content

Commit

Permalink
fixup! COM-12461: Create an AAC test suite generator
Browse files Browse the repository at this point in the history
  • Loading branch information
mdimopoulos committed Nov 13, 2024
1 parent 8c827ee commit 1b0a23b
Show file tree
Hide file tree
Showing 3 changed files with 481 additions and 439 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ packages = ["fluster", "fluster.decoders"]

[tool.setuptools.data-files]
"share/fluster/test_suites/aac" = [
"test_suites/aac/ISO_IEC_13818-4_2004.json",
"test_suites/aac/MPEG2_AAC-ADTS.json",
"test_suites/aac/ISO_IEC_14496-26_2010.json"
]
"share/fluster/test_suites/av1" = [
Expand Down
194 changes: 118 additions & 76 deletions scripts/gen_aac.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# License along with this library. If not, see <https://www.gnu.org/licenses/>.

import argparse
import pathlib
import re
from html.parser import HTMLParser
from multiprocessing import Pool
Expand All @@ -35,17 +36,18 @@
# pylint: enable=wrong-import-position

BASE_URL = "https://standards.iso.org/"
URL_13818_ADTS = (
BASE_URL
+ "ittf/PubliclyAvailableStandards/ISO_IEC_13818-4_2004_Conformance_Testing/AAC/compressedAdts"
URL_MPEG2 = BASE_URL + "ittf/PubliclyAvailableStandards/ISO_IEC_13818-4_2004_Conformance_Testing/AAC/"
URL_MPEG2_ADTS = (
URL_MPEG2
+ "compressedAdts"
)
URL_13818_RAW = (
BASE_URL
+ "ittf/PubliclyAvailableStandards/ISO_IEC_13818-4_2004_Conformance_Testing/AAC/referencesWav"
URL_MPEG2_WAV_REFS = (
URL_MPEG2
+ "referencesWav"
)
URL_13818_RAW_CHECKSUMS = (
BASE_URL
+ "ittf/PubliclyAvailableStandards/ISO_IEC_13818-4_2004_Conformance_Testing/AAC/referencesWav/_checksum"
URL_MPEG2_WAV_REFS_MD5 = (
URL_MPEG2
+ "referencesWav/_checksum"
)

BITSTREAM_EXTS = [".adts"]
Expand Down Expand Up @@ -84,57 +86,24 @@ def __init__(
suite_name: str,
codec: Codec,
description: str,
site: str,
url_test_vectors: str,
url_reference_vectors: str,
url_reference_vectors_checksums: str,
use_ffprobe: bool = False,
):
self.name = name
self.suite_name = suite_name
self.codec = codec
self.description = description
self.site = site
self.url_test_vectors = url_test_vectors
self.url_reference_vectors = url_reference_vectors
self.url_reference_vectors_checksums = url_reference_vectors_checksums
self.use_ffprobe = use_ffprobe

def generate(self, download, jobs):
"""Generates the test suite and saves it to a file"""
output_filepath = os.path.join(self.suite_name + ".json")
test_suite = TestSuite(
output_filepath,
"resources",
self.suite_name,
self.codec,
self.description,
dict(),
)

hparser = HREFParser()
print(f"Download list of compressed bitstreams from {self.site + self.name}")
with urllib.request.urlopen(self.site + self.name) as resp:
data = str(resp.read())
hparser.feed(data)

compressed_bitstream_links = [
url for url in hparser.links if url.endswith(tuple(BITSTREAM_EXTS))
]

for url in compressed_bitstream_links:
filename = os.path.basename(url)
test_vector_name = os.path.splitext(filename)[0]
test_vector = TestVector(
test_vector_name, url, "__skip__", filename, OutputFormat.UNKNOWN, ""
)
test_suite.test_vectors[test_vector_name] = test_vector
def _download_raw_output_references_and_checksums(self, jobs, test_suite, raw_bitstream_links,
raw_bitstream_md5_links):
"""Downlodas raw output reference bitstreams and their checksums"""

# Download test suite input files
if download:
test_suite.download(
jobs=jobs,
out_dir=test_suite.resources_dir,
verify=False,
extract_all=True,
keep_file=True,
)

# Download test suite output reference and md5 checksum files
with Pool(jobs) as pool:

def _callback_error(err):
Expand All @@ -143,62 +112,133 @@ def _callback_error(err):

downloads = []

for test_vector in test_suite.test_vectors.values():
print(f"Downloading output reference file for test vector {test_vector.name}")
print(f"\tDownloading output reference files for test suite {self.suite_name}")
for link in raw_bitstream_links:
downloads.append(
pool.apply_async(
utils.download,
args=(
URL_13818_RAW + "/" + test_vector.name + RAW_EXTS[0],
link,
os.path.join(
test_suite.resources_dir,
test_suite.name,
test_vector.name,
os.path.splitext(os.path.basename(link))[0],
),
),
error_callback=_callback_error,
)
)

print(f"Downloading output reference checksum file for test vector {test_vector.name}")
print(f"\tDownloading output reference checksum files for test suite {self.suite_name}")
for link in raw_bitstream_md5_links:
downloads.append(
pool.apply_async(
utils.download,
args=(
URL_13818_RAW_CHECKSUMS + "/" + test_vector.name + MD5_EXTS[0],
link,
os.path.join(
test_suite.resources_dir,
test_suite.name,
test_vector.name,
os.path.splitext(os.path.splitext(os.path.basename(link))[0])[0],
),
),
error_callback=_callback_error,
)
)

pool.close()
pool.join()

for job in downloads:
if not job.successful():
sys.exit("Some download failed")

for test_vector in test_suite.test_vectors.values():
dest_dir = os.path.join(
test_suite.resources_dir, test_suite.name, test_vector.name
def generate(self, download, jobs):
"""Generates the test suite and saves it to a file"""
output_filepath = os.path.join(self.suite_name + ".json")
test_suite = TestSuite(
output_filepath,
"resources",
self.suite_name,
self.codec,
self.description,
dict(),
)

hparser = HREFParser()

with urllib.request.urlopen(self.url_test_vectors) as resp:
data = str(resp.read())
hparser.feed(data)
compressed_bitstream_links = [url for url in hparser.links if url.endswith(tuple(BITSTREAM_EXTS))]
compressed_bitstream_names = [os.path.splitext(os.path.basename(x))[0] for x in compressed_bitstream_links]

with urllib.request.urlopen(self.url_reference_vectors) as resp:
data = str(resp.read())
hparser.feed(data)
raw_bitstream_links = [url for url in hparser.links if url.endswith(tuple(RAW_EXTS))]
raw_bitstream_names = [os.path.splitext(os.path.basename(x))[0] for x in raw_bitstream_links]

if not set(compressed_bitstream_names).issubset(raw_bitstream_names):
raise Exception("Following test vectors are missing reference files {}"
.format([x for x in set(compressed_bitstream_names).difference(raw_bitstream_names)]))
else:
raw_bitstream_names = compressed_bitstream_names

# Match and store entries of raw_bitstream_links that contain entries of raw_bitstream_names as substrings
raw_bitstream_links = [link for link in raw_bitstream_links
if any(name in link for name in raw_bitstream_names)]

with urllib.request.urlopen(self.url_reference_vectors_checksums) as resp:
data = str(resp.read())
hparser.feed(data)
raw_bitstream_md5_links = [url for url in hparser.links if url.endswith(tuple(MD5_EXTS))]
raw_bitstream_md5_names = [os.path.splitext(os.path.splitext(os.path.basename(x))[0])[0]
for x in raw_bitstream_md5_links]

if not set(compressed_bitstream_names).issubset(raw_bitstream_md5_names):
raise Exception("Following test vectors are missing reference checksum files {}"
.format([x for x in set(compressed_bitstream_names).difference(raw_bitstream_md5_names)]))
else:
raw_bitstream_md5_names = compressed_bitstream_names

# Match and store entries of raw_bitstream_md5_links that contain entries of raw_bitstream_md5_names
# as substrings
raw_bitstream_md5_links = [link for link in raw_bitstream_md5_links
if any(name in link for name in raw_bitstream_md5_names)]

for source_url in compressed_bitstream_links:
input_filename = os.path.basename(source_url)
test_vector_name = os.path.splitext(input_filename)[0]
test_vector = TestVector(
test_vector_name, source_url, "__skip__", input_filename, OutputFormat.UNKNOWN, ""
)
dest_path = os.path.join(dest_dir, os.path.basename(test_vector.source))
test_suite.test_vectors[test_vector_name] = test_vector

test_vector.input_file = utils.find_by_ext(dest_dir, BITSTREAM_EXTS)
absolute_input_path = test_vector.input_file
test_vector.input_file = test_vector.input_file.replace(
os.path.join(
test_suite.resources_dir, test_suite.name, test_vector.name
)
+ os.sep,
"",
# Download test suite input files
print(f"Download list of compressed bitstreams from {self.url_test_vectors}")
if download:
test_suite.download(
jobs=jobs,
out_dir=test_suite.resources_dir,
verify=False,
extract_all=True,
keep_file=True,
)
if not test_vector.input_file:
raise Exception(f"Bitstream file not found in {dest_dir}")

# Download test suite output reference and md5 checksum files
self._download_raw_output_references_and_checksums(jobs, test_suite, raw_bitstream_links,
raw_bitstream_md5_links)

for test_vector in test_suite.test_vectors.values():
dest_dir = os.path.join(test_suite.resources_dir, test_suite.name, test_vector.name)
dest_path = os.path.join(dest_dir, os.path.basename(test_vector.source))
absolute_input_path = os.path.join(pathlib.Path(__file__).parent.resolve(), dest_dir,
test_vector.input_file)

# Check that bitstream file is located inside the corresponding test vector folder
if not os.path.isfile(absolute_input_path):
raise Exception(f"Bitstream file {test_vector.input_file} not found in {dest_dir}")

# Calculate source file checksum
test_vector.source_checksum = utils.file_checksum(dest_path)
Expand Down Expand Up @@ -278,11 +318,13 @@ def _fill_checksum_aac(test_vector, dest_dir):
args = parser.parse_args()

generator = AACGenerator(
"",
"ISO_IEC_13818-4_2004",
"MPEG2_AAC-ADTS",
"MPEG2_AAC-ADTS",
Codec.AAC,
"ISO IEC 13814-4 AAC ADTS test suite",
URL_13818_ADTS,
"ISO IEC 13818-4 MPEG2 AAC ADTS test suite",
URL_MPEG2_ADTS,
URL_MPEG2_WAV_REFS,
URL_MPEG2_WAV_REFS_MD5,
True,
)
generator.generate(not args.skip_download, args.jobs)
Loading

0 comments on commit 1b0a23b

Please sign in to comment.