diff --git a/server/fishtest/rundb.py b/server/fishtest/rundb.py index 409597de0..28790d062 100644 --- a/server/fishtest/rundb.py +++ b/server/fishtest/rundb.py @@ -1505,8 +1505,8 @@ def purge_run(self, run, p=0.001, res=7.0, iters=1): self.buffer(run, True) return message - def spsa_param_clip(self, param, increment): - return min(max(param["theta"] + increment, param["min"]), param["max"]) + def spsa_param_clip(self, param, increment, r): + return min(max(param["theta"] + increment + r, param["min"]), param["max"]) # Store SPSA parameters for each worker spsa_params = {} @@ -1553,20 +1553,25 @@ def generate_spsa(self, run): # and avoid division by zero for param in spsa["params"]: c = param["c"] / iter_local ** spsa["gamma"] + r = random.uniform(0, 1) flip = 1 if random.getrandbits(1) else -1 + # Stochastic rounding and probability for float N.p: (N, 1-p); (N+1, p) + w_value = math.floor(self.spsa_param_clip(param, c * flip, r)) + b_value = math.floor(self.spsa_param_clip(param, -c * flip, r)) result["w_params"].append( { "name": param["name"], - "value": self.spsa_param_clip(param, c * flip), + "value": w_value, "R": param["a"] / (spsa["A"] + iter_local) ** spsa["alpha"] / c**2, - "c": c, + # Set c to the real delta after stochastic rounding is applied + "c": abs(w_value - b_value) / 2, "flip": flip, } ) result["b_params"].append( { "name": param["name"], - "value": self.spsa_param_clip(param, -c * flip), + "value": b_value } ) @@ -1595,7 +1600,7 @@ def update_spsa(self, worker, run, spsa_results): R = w_params[idx]["R"] c = w_params[idx]["c"] flip = w_params[idx]["flip"] - param["theta"] = self.spsa_param_clip(param, R * c * result * flip) + param["theta"] = self.spsa_param_clip(param, R * c * result * flip, 0) if grow_summary: summary.append({"theta": param["theta"], "R": R, "c": c}) diff --git a/worker/games.py b/worker/games.py index fc801b09c..c251f4c5c 100644 --- a/worker/games.py +++ b/worker/games.py @@ -1085,13 +1085,12 @@ def launch_cutechess( b_params = [] # Run cutechess-cli binary. - # Stochastic rounding and probability for float N.p: (N, 1-p); (N+1, p) idx = cmd.index("_spsa_") cmd = ( cmd[:idx] + [ "option.{}={}".format( - x["name"], math.floor(x["value"] + random.uniform(0, 1)) + x["name"], x["value"] ) for x in w_params ] @@ -1102,7 +1101,7 @@ def launch_cutechess( cmd[:idx] + [ "option.{}={}".format( - x["name"], math.floor(x["value"] + random.uniform(0, 1)) + x["name"], x["value"] ) for x in b_params ] diff --git a/worker/sri.txt b/worker/sri.txt index aabcc55da..5e9c87940 100644 --- a/worker/sri.txt +++ b/worker/sri.txt @@ -1 +1 @@ -{"__version": 232, "updater.py": "Mg+pWOgGA0gSo2TuXuuLCWLzwGwH91rsW1W3ixg3jYauHQpRMtNdGnCfuD1GqOhV", "worker.py": "DRLqEwLvuSBzNt6k3MYtlC87bp6uwdUZ/O7Xs3/d+gDox0jTOT6SO/FNWzsXVs51", "games.py": "PrNaUxAFP0wPV/jCLivA0g9mcO1vq1Og1nN3vuPa408qcKoAYuza5ZHgDCZitkQG"} +{"__version": 232, "updater.py": "Mg+pWOgGA0gSo2TuXuuLCWLzwGwH91rsW1W3ixg3jYauHQpRMtNdGnCfuD1GqOhV", "worker.py": "DRLqEwLvuSBzNt6k3MYtlC87bp6uwdUZ/O7Xs3/d+gDox0jTOT6SO/FNWzsXVs51", "games.py": "xwE2as6BgI1PKjem3C89n3CV7TNH5M3uNmsOwr23Sm7Y31PA0WXE3VvRDYFh+iUp"}