From f10eb08a4cfe2718ccf66d3ff5788001211138e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Lichtl=C3=A9?= Date: Fri, 2 Aug 2024 15:20:21 +0200 Subject: [PATCH] Fix bug in target Q-value for illegal actions Fix DQN bug: set ILLEGAL_ACTION_LOGITS_PENALTY to a large negative number instead of 0. --- open_spiel/python/pytorch/dqn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/open_spiel/python/pytorch/dqn.py b/open_spiel/python/pytorch/dqn.py index 7b5bc775e9..f027b115e6 100644 --- a/open_spiel/python/pytorch/dqn.py +++ b/open_spiel/python/pytorch/dqn.py @@ -30,7 +30,7 @@ "Transition", "info_state action reward next_info_state is_final_step legal_actions_mask") -ILLEGAL_ACTION_LOGITS_PENALTY = sys.float_info.min +ILLEGAL_ACTION_LOGITS_PENALTY = torch.finfo(torch.float).min class SonnetLinear(nn.Module):