From f11c276c0059f41665414c64749f22525a0d406e Mon Sep 17 00:00:00 2001
From: Armin Braun <armin@daten.io>
Date: Mon, 28 Oct 2024 11:35:48 +0100
Subject: [PATCH] Fix bias towards competitor in searchBench (#307)

This makes competitor and baseline run in alternating order
instead of keeping the same order for all iterations.
This removes non-trivial bias in tests that run the system at
full load. The bias comes from the fact that generating the report
after each iteration takes macroscopic time but only utilizes a single
CPU cor. This allows the CPU to cool down giving the competitor that
runs first an advantage on a fully loaded system because it starts on
a cooler CPU. There's also other potential biases like that from
page-cache utilization and the like that may or may not advantage the
first or second run.
I could clearly reproduce a bias for the competitor before this change,
making things run in alterating order removes this effect in my testing.

Co-authored-by: Adrien Grand <jpountz@gmail.com>
---
 src/python/searchBench.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/python/searchBench.py b/src/python/searchBench.py
index 139fb033a..0fc77626c 100644
--- a/src/python/searchBench.py
+++ b/src/python/searchBench.py
@@ -172,15 +172,16 @@ def run(id, base, challenger, coldRun=False, doCharts=False, search=False, index
 
         seed = rand.randint(-10000000, 1000000)
 
-        for c in competitors:
+        # Change which competitor runs first on every iteration to avoid
+        # biasing results based on which competitors ran first or last.
+        rotation_index = iter % len(competitors)
+        rotated_competitors = competitors[rotation_index:] + competitors[:rotation_index]
+        for c in rotated_competitors:
           print('    %s:' % c.name)
-          t0 = time.time()
-          if c not in results:
-            results[c] = []
           logFile = r.runSimpleSearchBench(iter, id, c,
                                            coldRun, seed, staticSeed,
                                            filter=None, taskPatterns=taskPatterns) 
-          results[c].append(logFile)
+          results.setdefault(c, []).append(logFile)
 
         print()
         print('Report after iter %d:' % iter)