python train_online.py --env_name=HalfCheetah-v2
MUJOCO_GL=egl XLA_PYTHON_CLIENT_PREALLOCATE=false python train_online_pixels.py --env_name=cheetah-run-v0
python train_offline.py --config=configs/offline_config.py:bc --config.model_config.distr=unitstd_normal --env_name=halfcheetah-expert-v2
python train_offline.py --config=configs/offline_config.py:bc --config.model_config.distr=unitstd_normal --env_name=halfcheetah-medium-expert-v2 --filter_percentile=10
python train_offline.py --config=configs/offline_config.py:bc --config.model_config.distr=unitstd_normal --env_name=antmaze-large-play-v2 --filter_threshold=0.5
BC (Autoregressive Policy)
python train_offline.py --config=configs/offline_config.py:bc --config.model_config.distr=ar --env_name=halfcheetah-expert-v2
python train_offline.py --config=configs/offline_config.py:iql_antmaze --env_name=antmaze-large-play-v2 --eval_interval=100000 --eval_episodes=100
python train_offline.py --config=configs/offline_config.py:iql_mujoco --env_name=halfcheetah-medium-expert-v2 --eval_interval=100000 --eval_episodes=100
MUJOCO_GL=egl XLA_PYTHON_CLIENT_PREALLOCATE=false python train_online_pixels.py --env_name=cheetah-run-v0 --save_buffer
MUJOCO_GL=egl XLA_PYTHON_CLIENT_PREALLOCATE=false python train_offline_pixels.py --env_name=cheetah-run-v0 --config=configs/offline_pixels_config.py:bc
MUJOCO_GL=egl XLA_PYTHON_CLIENT_PREALLOCATE=false python train_offline_pixels.py --env_name=cheetah-run-v0 --config=configs/offline_pixels_config.py:iql