From d7c983b8375769fd21b359d8cc13346a76fd6045 Mon Sep 17 00:00:00 2001
From: Timo Tijhof <krinklemail@gmail.com>
Date: Sun, 8 Aug 2021 01:33:08 +0100
Subject: [PATCH] Build: Add scheduled job to verify our reproducible builds

Ref https://github.com/qunitjs/qunit/issues/1560.
---
 .github/workflows/reproducible.yaml |  26 ++++
 build/dist-replace.js               |   8 ++
 build/reproducible-builds.js        | 189 ++++++++++++++++++++++++++++
 build/review-package.js             |  42 +------
 build/utils.js                      |  90 +++++++++++++
 5 files changed, 316 insertions(+), 39 deletions(-)
 create mode 100644 .github/workflows/reproducible.yaml
 create mode 100644 build/reproducible-builds.js
 create mode 100644 build/utils.js

diff --git a/.github/workflows/reproducible.yaml b/.github/workflows/reproducible.yaml
new file mode 100644
index 000000000..34333a3aa
--- /dev/null
+++ b/.github/workflows/reproducible.yaml
@@ -0,0 +1,26 @@
+name: Reproducible builds
+on:
+  # Once a week on Monday at 00:30 UTC
+  schedule:
+    - cron: '30 0 * * 1'
+  # Or manually
+  workflow_dispatch:
+  # Or when developing this workflow
+  push:
+    paths:
+      - .github/workflows/reproducible.yaml
+
+jobs:
+  run:
+    name: Verify releases
+    if: ${{ github.repository == 'qunitjs/qunit' }} # skip on forks, noisy cron
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Use Node.js 12
+        uses: actions/setup-node@v2
+        with:
+          node-version: 12.x
+
+      - run: node build/reproducible-builds.js
diff --git a/build/dist-replace.js b/build/dist-replace.js
index 7bfa15627..cb43a347b 100644
--- a/build/dist-replace.js
+++ b/build/dist-replace.js
@@ -16,6 +16,14 @@ if ( /pre/.test( distVersion ) ) {
 
 const replacements = {
 
+	// Normalize CRLF from fuzzysort.js.
+	//
+	// The way we upload files to npm, Git, and the jQuery CDN all normalize
+	// CRLF to LF. Thus, if we don't do this ourselves during the build, then
+	// reproducible build verification would find that the distribution is
+	// not identical to the reproduced build artefact.
+	"\r\n": "\n",
+
 	// Embed version
 	"@VERSION": distVersion
 };
diff --git a/build/reproducible-builds.js b/build/reproducible-builds.js
new file mode 100644
index 000000000..21f550e30
--- /dev/null
+++ b/build/reproducible-builds.js
@@ -0,0 +1,189 @@
+// Helper for the "Reproducible builds" job.
+
+const cp = require( "child_process" );
+const fs = require( "fs" );
+const path = require( "path" );
+const util = require( "util" );
+
+const utils = require( "./utils.js" );
+const execFile = util.promisify( cp.execFile );
+const tempDir = path.join( __dirname, "../temp", "reproducible-builds" );
+const SRC_REPO = "https://github.com/qunitjs/qunit.git";
+
+/**
+ * How many past releases to verify.
+ *
+ * Note that qunit@<2.16.0 were not fully reproducible.
+ *
+ * qunit@<=2.14.1 embedded a timestamp in the file header. This would have to be
+ * ignored (or replaced with the timestamp found in the files you compare against).
+ * In the 2.14.1, timestamps were removed from the output. Also, prior to 2.14.1,
+ * the build wrote files to "/dist" instead of "/qunit".
+ *
+ * qunit@2.15.0 contained some CR (\r) characters in comments from fuzzysort.js,
+ * which got normalized to LF (\n) by Git, npm, and the CDN on their own. This was
+ * fixed in qunit@2.16.0 by removing the comment in question, and qunit@2.17.0
+ * normalizes CRLF during the build.
+ */
+const VERIFY_COUNT = 2;
+
+async function buildRelease( version, cacheDir = null ) {
+	console.log( `... ${version}: checking out the source` );
+
+	const gitDir = path.join( tempDir, `git-${version}` );
+	utils.cleanDir( gitDir );
+
+	await execFile( "git", [ "clone", "-q", "-b", version, "--depth=5", SRC_REPO, gitDir ] );
+
+	// Remove any artefacts that were checked into Git
+	utils.cleanDir( gitDir + "/qunit/" );
+
+	// Use sync for npm-ci to avoid concurrency bugs with shared cache
+	console.log( `... ${version}: installing development dependencies from npm` );
+	cp.execFileSync( "npm", [ "ci" ], {
+		env: {
+			npm_config_cache: cacheDir,
+			npm_config_update_notifier: "false",
+			PATH: process.env.PATH,
+			PUPPETEER_DOWNLOAD_PATH: path.join( cacheDir, "puppeteer_download" )
+		},
+		cwd: gitDir
+	} );
+
+	console.log( `... ${version}: building release` );
+	await execFile( "npm", [ "run", "build" ], {
+		env: {
+			PATH: process.env.PATH
+		},
+		cwd: gitDir
+	} );
+
+	return {
+		js: fs.readFileSync( gitDir + "/qunit/qunit.js", "utf8" ),
+		css: fs.readFileSync( gitDir + "/qunit/qunit.css", "utf8" )
+	};
+}
+
+const Reproducible = {
+	async fetch() {
+
+		// Keep the stuff that matters in memory. Below, we will run unaudited npm dev deps
+		// as part of build commands, which can modify anything on disk.
+		const releases = {};
+
+		{
+			console.log( "Setting up temp directory..." );
+
+			// This can take a while when running it locally (not CI),
+			// as it first need to remove any old builds.
+			utils.cleanDir( tempDir );
+		}
+		{
+			console.log( "Fetching releases from jQuery CDN..." );
+			const cdnIndexUrl = "https://releases.jquery.com/resources/cdn.json";
+			const data = JSON.parse( await utils.download( cdnIndexUrl ) );
+
+			for ( const release of data.qunit.all.slice( 0, VERIFY_COUNT ) ) {
+				releases[ release.version ] = {
+					cdn: {
+						js: await utils.download( `https://code.jquery.com/${release.filename}` ),
+						css: await utils.download( `https://code.jquery.com/${release.theme}` )
+					}
+				};
+			}
+		}
+		{
+			console.log( "Fetching releases from npmjs.org..." );
+			const npmIndexUrl = "https://registry.npmjs.org/qunit";
+			const data = JSON.parse( await utils.download( npmIndexUrl ) );
+
+			for ( const version of Object.keys( data.versions ).slice( -VERIFY_COUNT ) ) {
+				if ( !releases[ version ] ) {
+					releases[ version ] = {};
+				}
+
+				const tarball = data.versions[ version ].dist.tarball;
+				const tarFile = path.join( tempDir, `npm-${version}${path.extname( tarball )}` );
+				await utils.downloadFile( tarball, tarFile );
+
+				releases[ version ].npm = {
+					js: cp.execFileSync(
+						"tar", [ "-xOf", tarFile, "package/qunit/qunit.js" ],
+						{ encoding: "utf8" }
+					),
+					css: cp.execFileSync(
+						"tar", [ "-xOf", tarFile, "package/qunit/qunit.css" ],
+						{ encoding: "utf8" }
+					)
+				};
+			}
+		}
+		{
+			console.log( "Reproducing release builds..." );
+
+			const cacheDir = path.join( tempDir, "cache" );
+			utils.cleanDir( cacheDir );
+
+			// Start the builds in parallel and await results.
+			// Let the first error propagate and ignore others (avoids "Unhandled rejection" later).
+			const buildPromises = [];
+			for ( const version in releases ) {
+				releases[ version ].buildPromise = buildRelease( version, cacheDir );
+				buildPromises.push( releases[ version ].buildPromise );
+			}
+			await Promise.all( buildPromises );
+
+			const diffs = [];
+			for ( const version in releases ) {
+				const release = releases[ version ];
+				const build = await release.buildPromise;
+
+				// For qunit@2.15.0, normalize CRLF to match what Git and npm did during upload.
+				if ( version === "2.15.0" ) {
+					build.js = utils.normalizeEOL( build.js );
+				}
+
+				let verified = true;
+				for ( const distro of [ "cdn", "npm" ] ) {
+					for ( const file of [ "js", "css" ] ) {
+						if ( release[ distro ][ file ] !== build[ file ] ) {
+							verified = false;
+							console.error(
+								`QUnit ${version} ${file} from ${distro} differs from build`
+							);
+							diffs.push( [
+								{ name: `qunit-${version}-build.${file}`,
+									contents: build[ file ] },
+								{ name: `qunit-${version}-${distro}.${file}`,
+									contents: release[ distro ][ file ] }
+							] );
+						}
+					}
+				}
+				if ( verified ) {
+					console.log( `QUnit ${version} is reproducible and matches distributions!` );
+				}
+			}
+
+			diffs.forEach( diff => {
+				const fromFile = path.join( tempDir, diff[ 0 ].name );
+				const toFile = path.join( tempDir, diff[ 1 ].name );
+				fs.writeFileSync( fromFile, utils.verboseNonPrintable( diff[ 0 ].contents ) );
+				fs.writeFileSync( toFile, utils.verboseNonPrintable( diff[ 1 ].contents ) );
+				process.stdout.write(
+					utils.getDiff( fromFile, toFile, { ignoreWhitespace: false } )
+				);
+			} );
+			if ( diffs.length ) {
+				throw new Error( "One or more distributions differ from the reproduced build" );
+			}
+		}
+	}
+};
+
+( async function main() {
+	await Reproducible.fetch();
+}() ).catch( e => {
+	console.error( e.toString() );
+	process.exit( 1 );
+} );
diff --git a/build/review-package.js b/build/review-package.js
index 3169390e4..7bd74830a 100644
--- a/build/review-package.js
+++ b/build/review-package.js
@@ -6,35 +6,9 @@
 
 const cp = require( "child_process" );
 const fs = require( "fs" );
-const https = require( "https" );
 const path = require( "path" );
 const readline = require( "readline" );
-
-function getDiff( from, to ) {
-
-	// macOS 10.15+ comes with GNU diff (2.8)
-	// https://unix.stackexchange.com/a/338960/37512
-	// https://stackoverflow.com/a/41770560/319266
-	const gnuDiffVersion = cp.execFileSync( "diff", [ "--version" ], { encoding: "utf8" } );
-	const versionStr = /diff.* (\d+\.\d+)/.exec( gnuDiffVersion );
-	const isOld = ( versionStr && Number( versionStr[ 1 ] ) < 3.4 );
-
-	try {
-		cp.execFileSync( "diff", [
-			"-w",
-			"--text",
-			"--unified",
-			...( isOld ? [] : [ "--color=always" ] ),
-			from,
-			to
-		], { encoding: "utf8" } );
-	} catch ( e ) {
-
-		// Expected, `diff` command yields non-zero exit status if files differ
-		return e.stdout;
-	}
-	throw new Error( `Unable to diff between ${from} and ${to}` );
-}
+const { getDiff, downloadFile } = require( "./utils.js" );
 
 async function confirm( text ) {
 	const rl = readline.createInterface( { input: process.stdin, output: process.stdout } );
@@ -50,16 +24,6 @@ async function confirm( text ) {
 	} );
 }
 
-async function download( url, dest ) {
-	const fileStr = fs.createWriteStream( dest );
-	await new Promise( ( resolve, reject ) => {
-		https.get( url, ( resp ) => {
-			resp.pipe( fileStr );
-			fileStr.on( "finish", () => fileStr.close( resolve ) );
-		} ).on( "error", ( err ) => reject( err ) );
-	} );
-}
-
 const ReleaseAssets = {
 	async audit( prevVersion ) {
 		if ( typeof prevVersion !== "string" || !/^\d+\.\d+\.\d+$/.test( prevVersion ) ) {
@@ -86,7 +50,7 @@ const ReleaseAssets = {
 
 			const prevUrl = `https://code.jquery.com/qunit/qunit-${prevVersion}.js`;
 			const tempPrevPath = path.join( __dirname, "../temp", file );
-			await download( prevUrl, tempPrevPath );
+			await downloadFile( prevUrl, tempPrevPath );
 
 			const currentPath = path.join( __dirname, "../qunit", file );
 			process.stdout.write( getDiff( tempPrevPath, currentPath ) );
@@ -98,7 +62,7 @@ const ReleaseAssets = {
 
 			const prevUrl = `https://code.jquery.com/qunit/qunit-${prevVersion}.css`;
 			const tempPrevPath = path.join( __dirname, "../temp", file );
-			await download( prevUrl, tempPrevPath );
+			await downloadFile( prevUrl, tempPrevPath );
 
 			const currentPath = path.join( __dirname, "../qunit", file );
 			process.stdout.write( getDiff( tempPrevPath, currentPath ) );
diff --git a/build/utils.js b/build/utils.js
new file mode 100644
index 000000000..2f8776f85
--- /dev/null
+++ b/build/utils.js
@@ -0,0 +1,90 @@
+const cp = require( "child_process" );
+const fs = require( "fs" );
+const https = require( "https" );
+
+function getDiff( from, to, options = {} ) {
+
+	// macOS 10.15+ comes with GNU diff (2.8)
+	// https://unix.stackexchange.com/a/338960/37512
+	// https://stackoverflow.com/a/41770560/319266
+	const gnuDiffVersion = cp.execFileSync( "diff", [ "--version" ], { encoding: "utf8" } );
+	const versionStr = /diff.* (\d+\.\d+)/.exec( gnuDiffVersion );
+	const isOld = ( versionStr && Number( versionStr[ 1 ] ) < 3.4 );
+
+	try {
+		cp.execFileSync( "diff", [
+			...( options.ignoreWhitespace !== false ? [ "-w" ] : [] ),
+			"--text",
+			"--unified",
+			...( isOld ? [] : [ "--color=always" ] ),
+			from,
+			to
+		], { encoding: "utf8" } );
+	} catch ( e ) {
+
+		// Expected, `diff` command yields non-zero exit status if files differ
+		return e.stdout;
+	}
+	throw new Error( `Unable to diff between ${from} and ${to}` );
+}
+
+async function download( url ) {
+	return new Promise( ( resolve, reject ) => {
+		https.get( url, async resp => {
+			try {
+				const chunks = [];
+				for await ( const chunk of resp ) {
+					chunks.push( Buffer.from( chunk ) );
+				}
+				resolve( Buffer.concat( chunks ).toString( "utf8" ) );
+			} catch ( err ) {
+				reject( err );
+			}
+		} );
+	} );
+}
+
+async function downloadFile( url, dest ) {
+	const fileStr = fs.createWriteStream( dest );
+	return new Promise( ( resolve, reject ) => {
+		https.get( url, resp => {
+			resp.pipe( fileStr );
+			fileStr.on( "finish", () => fileStr.close( resolve ) );
+		} ).on( "error", err => reject( err ) );
+	} );
+}
+
+function cleanDir( dirPath ) {
+	if ( fs.existsSync( dirPath ) ) {
+		fs.rmdirSync( dirPath, { recursive: true } );
+	}
+	fs.mkdirSync( dirPath, { recursive: true } );
+}
+
+// Turn invisible chars and non-ASCII chars into escape sequences.
+//
+// This is like `cat --show-nonprinting` and makes diffs easier to understand
+// when e.g. there is an added/removed line with a Window-style CRLF which
+// would otherwise look the same in both lines.
+function verboseNonPrintable( str ) {
+
+	// Match all chars that are not printable ASCII,
+	// except \t (U+0009) and \n (U+000A).
+	return str.replace( /[^\t\n\u0020-\u007F]/g, function( m ) {
+		return `U+${m.codePointAt( 0 ).toString( 16 ).toUpperCase().padStart( 4, "0" )}`;
+	} );
+}
+
+function normalizeEOL( str ) {
+	return str.replace( /\r\n/g, "\n" );
+}
+
+
+module.exports = {
+	getDiff,
+	download,
+	downloadFile,
+	cleanDir,
+	verboseNonPrintable,
+	normalizeEOL
+};