fixup! COM-12461: Create an AAC test suite generator

fluendo · Nov 13, 2024 · 1b0a23b · 1b0a23b
1 parent 8c827ee
commit 1b0a23b
Show file tree

Hide file tree

Showing 3 changed files with 481 additions and 439 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -43,7 +43,7 @@ packages = ["fluster", "fluster.decoders"]
 
 [tool.setuptools.data-files]
 "share/fluster/test_suites/aac" = [
-    "test_suites/aac/ISO_IEC_13818-4_2004.json",
+    "test_suites/aac/MPEG2_AAC-ADTS.json",
     "test_suites/aac/ISO_IEC_14496-26_2010.json"
 ]
 "share/fluster/test_suites/av1" = [

diff --git a/scripts/gen_aac.py b/scripts/gen_aac.py
@@ -18,6 +18,7 @@
 # License along with this library. If not, see <https://www.gnu.org/licenses/>.
 
 import argparse
+import pathlib
 import re
 from html.parser import HTMLParser
 from multiprocessing import Pool
@@ -35,17 +36,18 @@
 # pylint: enable=wrong-import-position
 
 BASE_URL = "https://standards.iso.org/"
-URL_13818_ADTS = (
-    BASE_URL
-    + "ittf/PubliclyAvailableStandards/ISO_IEC_13818-4_2004_Conformance_Testing/AAC/compressedAdts"
+URL_MPEG2 = BASE_URL + "ittf/PubliclyAvailableStandards/ISO_IEC_13818-4_2004_Conformance_Testing/AAC/"
+URL_MPEG2_ADTS = (
+    URL_MPEG2
+    + "compressedAdts"
 )
-URL_13818_RAW = (
-    BASE_URL
-    + "ittf/PubliclyAvailableStandards/ISO_IEC_13818-4_2004_Conformance_Testing/AAC/referencesWav"
+URL_MPEG2_WAV_REFS = (
+    URL_MPEG2
+    + "referencesWav"
 )
-URL_13818_RAW_CHECKSUMS = (
-    BASE_URL
-    + "ittf/PubliclyAvailableStandards/ISO_IEC_13818-4_2004_Conformance_Testing/AAC/referencesWav/_checksum"
+URL_MPEG2_WAV_REFS_MD5 = (
+    URL_MPEG2
+    + "referencesWav/_checksum"
 )
 
 BITSTREAM_EXTS = [".adts"]
@@ -84,57 +86,24 @@ def __init__(
         suite_name: str,
         codec: Codec,
         description: str,
-        site: str,
+        url_test_vectors: str,
+        url_reference_vectors: str,
+        url_reference_vectors_checksums: str,
         use_ffprobe: bool = False,
     ):
         self.name = name
         self.suite_name = suite_name
         self.codec = codec
         self.description = description
-        self.site = site
+        self.url_test_vectors = url_test_vectors
+        self.url_reference_vectors = url_reference_vectors
+        self.url_reference_vectors_checksums = url_reference_vectors_checksums
         self.use_ffprobe = use_ffprobe
 
-    def generate(self, download, jobs):
-        """Generates the test suite and saves it to a file"""
-        output_filepath = os.path.join(self.suite_name + ".json")
-        test_suite = TestSuite(
-            output_filepath,
-            "resources",
-            self.suite_name,
-            self.codec,
-            self.description,
-            dict(),
-        )
-
-        hparser = HREFParser()
-        print(f"Download list of compressed bitstreams from {self.site + self.name}")
-        with urllib.request.urlopen(self.site + self.name) as resp:
-            data = str(resp.read())
-            hparser.feed(data)
-
-        compressed_bitstream_links = [
-            url for url in hparser.links if url.endswith(tuple(BITSTREAM_EXTS))
-        ]
-
-        for url in compressed_bitstream_links:
-            filename = os.path.basename(url)
-            test_vector_name = os.path.splitext(filename)[0]
-            test_vector = TestVector(
-                test_vector_name, url, "__skip__", filename, OutputFormat.UNKNOWN, ""
-            )
-            test_suite.test_vectors[test_vector_name] = test_vector
+    def _download_raw_output_references_and_checksums(self, jobs, test_suite, raw_bitstream_links,
+                                                      raw_bitstream_md5_links):
+        """Downlodas raw output reference bitstreams and their checksums"""
 
-        # Download test suite input files
-        if download:
-            test_suite.download(
-                jobs=jobs,
-                out_dir=test_suite.resources_dir,
-                verify=False,
-                extract_all=True,
-                keep_file=True,
-            )
-
-        # Download test suite output reference and md5 checksum files
         with Pool(jobs) as pool:
 
             def _callback_error(err):
@@ -143,62 +112,133 @@ def _callback_error(err):
 
             downloads = []
 
-            for test_vector in test_suite.test_vectors.values():
-                print(f"Downloading output reference file for test vector {test_vector.name}")
+            print(f"\tDownloading output reference files for test suite {self.suite_name}")
+            for link in raw_bitstream_links:
                 downloads.append(
                     pool.apply_async(
                         utils.download,
                         args=(
-                            URL_13818_RAW + "/" + test_vector.name + RAW_EXTS[0],
+                            link,
                             os.path.join(
                                 test_suite.resources_dir,
                                 test_suite.name,
-                                test_vector.name,
+                                os.path.splitext(os.path.basename(link))[0],
                             ),
                         ),
                         error_callback=_callback_error,
                     )
                 )
 
-                print(f"Downloading output reference checksum file for test vector {test_vector.name}")
+            print(f"\tDownloading output reference checksum files for test suite {self.suite_name}")
+            for link in raw_bitstream_md5_links:
                 downloads.append(
                     pool.apply_async(
                         utils.download,
                         args=(
-                            URL_13818_RAW_CHECKSUMS + "/" + test_vector.name + MD5_EXTS[0],
+                            link,
                             os.path.join(
                                 test_suite.resources_dir,
                                 test_suite.name,
-                                test_vector.name,
+                                os.path.splitext(os.path.splitext(os.path.basename(link))[0])[0],
                             ),
                         ),
                         error_callback=_callback_error,
                     )
                 )
+
             pool.close()
             pool.join()
 
         for job in downloads:
             if not job.successful():
                 sys.exit("Some download failed")
 
-        for test_vector in test_suite.test_vectors.values():
-            dest_dir = os.path.join(
-                test_suite.resources_dir, test_suite.name, test_vector.name
+    def generate(self, download, jobs):
+        """Generates the test suite and saves it to a file"""
+        output_filepath = os.path.join(self.suite_name + ".json")
+        test_suite = TestSuite(
+            output_filepath,
+            "resources",
+            self.suite_name,
+            self.codec,
+            self.description,
+            dict(),
+        )
+
+        hparser = HREFParser()
+
+        with urllib.request.urlopen(self.url_test_vectors) as resp:
+            data = str(resp.read())
+            hparser.feed(data)
+        compressed_bitstream_links = [url for url in hparser.links if url.endswith(tuple(BITSTREAM_EXTS))]
+        compressed_bitstream_names = [os.path.splitext(os.path.basename(x))[0] for x in compressed_bitstream_links]
+
+        with urllib.request.urlopen(self.url_reference_vectors) as resp:
+            data = str(resp.read())
+            hparser.feed(data)
+        raw_bitstream_links = [url for url in hparser.links if url.endswith(tuple(RAW_EXTS))]
+        raw_bitstream_names = [os.path.splitext(os.path.basename(x))[0] for x in raw_bitstream_links]
+
+        if not set(compressed_bitstream_names).issubset(raw_bitstream_names):
+            raise Exception("Following test vectors are missing reference files {}"
+                            .format([x for x in set(compressed_bitstream_names).difference(raw_bitstream_names)]))
+        else:
+            raw_bitstream_names = compressed_bitstream_names
+
+        # Match and store entries of raw_bitstream_links that contain entries of raw_bitstream_names as substrings
+        raw_bitstream_links = [link for link in raw_bitstream_links
+                               if any(name in link for name in raw_bitstream_names)]
+
+        with urllib.request.urlopen(self.url_reference_vectors_checksums) as resp:
+            data = str(resp.read())
+            hparser.feed(data)
+        raw_bitstream_md5_links = [url for url in hparser.links if url.endswith(tuple(MD5_EXTS))]
+        raw_bitstream_md5_names = [os.path.splitext(os.path.splitext(os.path.basename(x))[0])[0]
+                                   for x in raw_bitstream_md5_links]
+
+        if not set(compressed_bitstream_names).issubset(raw_bitstream_md5_names):
+            raise Exception("Following test vectors are missing reference checksum files {}"
+                            .format([x for x in set(compressed_bitstream_names).difference(raw_bitstream_md5_names)]))
+        else:
+            raw_bitstream_md5_names = compressed_bitstream_names
+
+        # Match and store entries of raw_bitstream_md5_links that contain entries of raw_bitstream_md5_names
+        # as substrings
+        raw_bitstream_md5_links = [link for link in raw_bitstream_md5_links
+                                   if any(name in link for name in raw_bitstream_md5_names)]
+
+        for source_url in compressed_bitstream_links:
+            input_filename = os.path.basename(source_url)
+            test_vector_name = os.path.splitext(input_filename)[0]
+            test_vector = TestVector(
+                test_vector_name, source_url, "__skip__", input_filename, OutputFormat.UNKNOWN, ""
             )
-            dest_path = os.path.join(dest_dir, os.path.basename(test_vector.source))
+            test_suite.test_vectors[test_vector_name] = test_vector
 
-            test_vector.input_file = utils.find_by_ext(dest_dir, BITSTREAM_EXTS)
-            absolute_input_path = test_vector.input_file
-            test_vector.input_file = test_vector.input_file.replace(
-                os.path.join(
-                    test_suite.resources_dir, test_suite.name, test_vector.name
-                )
-                + os.sep,
-                "",
+        # Download test suite input files
+        print(f"Download list of compressed bitstreams from {self.url_test_vectors}")
+        if download:
+            test_suite.download(
+                jobs=jobs,
+                out_dir=test_suite.resources_dir,
+                verify=False,
+                extract_all=True,
+                keep_file=True,
             )
-            if not test_vector.input_file:
-                raise Exception(f"Bitstream file not found in {dest_dir}")
+
+        # Download test suite output reference and md5 checksum files
+        self._download_raw_output_references_and_checksums(jobs, test_suite, raw_bitstream_links,
+                                                           raw_bitstream_md5_links)
+
+        for test_vector in test_suite.test_vectors.values():
+            dest_dir = os.path.join(test_suite.resources_dir, test_suite.name, test_vector.name)
+            dest_path = os.path.join(dest_dir, os.path.basename(test_vector.source))
+            absolute_input_path = os.path.join(pathlib.Path(__file__).parent.resolve(), dest_dir,
+                                               test_vector.input_file)
+
+            # Check that bitstream file is located inside the corresponding test vector folder
+            if not os.path.isfile(absolute_input_path):
+                raise Exception(f"Bitstream file {test_vector.input_file} not found in {dest_dir}")
 
             # Calculate source file checksum
             test_vector.source_checksum = utils.file_checksum(dest_path)
@@ -278,11 +318,13 @@ def _fill_checksum_aac(test_vector, dest_dir):
     args = parser.parse_args()
 
     generator = AACGenerator(
-        "",
-        "ISO_IEC_13818-4_2004",
+        "MPEG2_AAC-ADTS",
+        "MPEG2_AAC-ADTS",
         Codec.AAC,
-        "ISO IEC 13814-4 AAC ADTS test suite",
-        URL_13818_ADTS,
+        "ISO IEC 13818-4 MPEG2 AAC ADTS test suite",
+        URL_MPEG2_ADTS,
+        URL_MPEG2_WAV_REFS,
+        URL_MPEG2_WAV_REFS_MD5,
         True,
     )
     generator.generate(not args.skip_download, args.jobs)