EdanToledo · EdanToledo · Sep 30, 2024 · Sep 30, 2024
diff --git a/README.md b/README.md
@@ -73,6 +73,7 @@ Stoix currently offers the following building blocks for Single-Agent RL researc
 - **Munchausen DQN (M-DQN)** [Paper](https://arxiv.org/abs/2007.14430)
 - **Quantile Regression DQN (QR-DQN)** - [Paper](https://arxiv.org/abs/1710.10044)
 - **DQN with Regularized Q-learning (DQN-Reg)** [Paper](https://arxiv.org/abs/2101.03958)
+- **Parallelised Q-network (PQN)** [Paper](https://arxiv.org/abs/2407.04811)
 - **Rainbow** - [Paper](https://arxiv.org/abs/1710.02298)
 - **REINFORCE With Baseline** - [Paper](https://people.cs.umass.edu/~barto/courses/cs687/williams92simple.pdf)
 - **Deep Deterministic Policy Gradient (DDPG)** - [Paper](https://arxiv.org/abs/1509.02971)

diff --git a/stoix/configs/default/anakin/default_ff_pqn.yaml b/stoix/configs/default/anakin/default_ff_pqn.yaml
@@ -0,0 +1,11 @@
+defaults:
+  - logger: base_logger
+  - arch: anakin
+  - system: q_learning/ff_pqn
+  - network: mlp_dqn
+  - env: gymnax/cartpole
+  - _self_
+
+hydra:
+  searchpath:
+    - file://stoix/configs
diff --git a/stoix/configs/system/q_learning/ff_pqn.yaml b/stoix/configs/system/q_learning/ff_pqn.yaml
@@ -0,0 +1,16 @@
+# --- Defaults FF-DQN ---
+
+system_name: ff_pqn # Name of the system.
+
+# --- RL hyperparameters ---
+rollout_length: 8 # Number of environment steps per vectorised environment.
+q_lr: 5e-4  # the learning rate of the Q network network optimizer
+epochs: 4 # Number of ppo epochs per training data batch.
+num_minibatches: 16 # Number of minibatches per ppo epoch.
+gamma: 0.99 # Discounting factor.
+q_lambda: 0.95 # Lambda value for Q lambda targets.
+max_grad_norm: 0.5 # Maximum norm of the gradients for a weight update.
+decay_learning_rates: False # Whether learning rates should be linearly decayed during training.
+training_epsilon: 0.1  # epsilon for the epsilon-greedy policy during training
+evaluation_epsilon: 0.00  # epsilon for the epsilon-greedy policy during evaluation
+huber_loss_parameter: 0.0  # parameter for the huber loss. If 0, it uses MSE loss.