Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat:support bias #131

Merged
merged 1 commit into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 61 additions & 4 deletions unidock_tools/src/unidock_tools/application/unidock_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@
"local_only": False,
"seed": 181129,
"debug": False,
"bias_file": None,
"multi_bias": False,
"multi_bias_file": None,
"multi_bias_index": None,
}


Expand All @@ -57,6 +61,8 @@ def __init__(self,
size_x: float = 22.5,
size_y: float = 22.5,
size_z: float = 22.5,
bias_file: Optional[Path] = None,
multi_bias_files: List[Path] = [],
workdir: Path = Path("docking_pipeline"),
):
"""
Expand Down Expand Up @@ -87,6 +93,8 @@ def __init__(self,
self.receptor = receptor
self.mols = sum([read_ligand(ligand) for ligand in ligands], [])
self.mols = [PropertyMol(mol) for mol in self.mols]
self.bias_file = bias_file
self.multi_bias_files_dict = {f.stem: f for f in multi_bias_files}
self.center_x = center_x
self.center_y = center_y
self.center_z = center_z
Expand Down Expand Up @@ -127,11 +135,20 @@ def prepare_topology_sdf(self, mol_list: List[Chem.Mol], savedir: Path) -> List[
return prepared_sdf_list

@time_logger
def init_docking_data(self, input_dir: Path, batch_size: int = 20):
def init_docking_data(self, input_dir: Path, multi_bias: bool = False, batch_size: int = 20):
real_batch_size = math.ceil(len(self.mols) / math.ceil(len(self.mols) / batch_size))
batched_mol_list = [self.mols[i:i+real_batch_size] for i in range(0, len(self.mols), real_batch_size)]
for one_batch_mol_list in batched_mol_list:
input_list = self.prepare_topology_sdf(one_batch_mol_list, input_dir)
if multi_bias:
for input_file in input_list:
filename = input_file.stem
logging.info(filename)
logging.info(self.multi_bias_files_dict)
if filename in self.multi_bias_files_dict:
shutil.copyfile(self.multi_bias_files_dict[filename], os.path.join(input_dir, f"{filename}.bpf"))
else:
logging.warning(f"Cannot find bias file in multi-bias mode for {filename}")
yield input_list

@time_logger
Expand All @@ -148,6 +165,7 @@ def docking(self,
batch_size: int = 1200,
score_only: bool = False,
local_only: bool = False,
multi_bias: bool = False,
score_name: str = "docking_score",
docking_dir_name : str = "docking_dir",
topn: int = 10,
Expand All @@ -157,6 +175,7 @@ def docking(self,
for ligand_list in self.init_docking_data(
input_dir=input_dir,
batch_size=batch_size,
multi_bias=multi_bias,
):
# Run docking
ligands, scores_list = run_unidock(
Expand All @@ -165,8 +184,8 @@ def docking(self,
size_x=self.size_x, size_y=self.size_y, size_z=self.size_z,
scoring=scoring_function, num_modes=num_modes,
search_mode=search_mode, exhaustiveness=exhaustiveness, max_step=max_step,
seed=seed, refine_step=refine_step, energy_range=energy_range,
score_only=score_only, local_only=local_only,
seed=seed, refine_step=refine_step, energy_range=energy_range, bias_file=self.bias_file,
score_only=score_only, local_only=local_only, multi_bias=multi_bias,
)
self.postprocessing(ligand_scores_list=zip(ligands, scores_list),
save_dir=save_dir,
Expand Down Expand Up @@ -222,6 +241,33 @@ def main(args: dict):
exit(1)
logging.info(f"[UniDock Pipeline] {len(ligands)} ligands found.")

bias_file = None
if args.get("bias_file"):
bias_file = Path(args["bias_file"]).resolve()
if not bias_file.exists():
logging.error(f"Cannot find {bias_file}")
exit(1)

multi_bias_file_list = []
if args["multi_bias"]:
if args.get("multi_bias_file"):
for multi_bias_file in args["multi_bias_file"]:
if not Path(multi_bias_file).exists():
logging.error(f"Cannot find {multi_bias_file}")
continue
multi_bias_file_list.append(Path(multi_bias_file).resolve())
elif args.get("multi_bias_index"):
with open(args["multi_bias_index"], "r") as f:
index_content = f.read()
index_lines1 = [line.strip() for line in index_content.split("\n") if line.strip()]
index_lines2 = [line.strip() for line in index_content.split(" ") if line.strip()]
multi_bias_file_list.extend(index_lines2 if len(index_lines2) > len(index_lines1) else index_lines1)
multi_bias_file_list = [Path(multi_bias_file).resolve() for multi_bias_file in multi_bias_file_list if Path(multi_bias_file).exists()]

if len(multi_bias_file_list) != len(ligands):
logging.error("Number of ligands and bias files should be equal in multi-bias mode.")
exit(1)

logging.info("[UniDock Pipeline] Start")
start_time = time.time()
runner = UniDock(
Expand All @@ -233,7 +279,9 @@ def main(args: dict):
size_x=float(args["size_x"]),
size_y=float(args["size_y"]),
size_z=float(args["size_z"]),
workdir=workdir
bias_file=bias_file,
multi_bias_files=multi_bias_file_list,
workdir=workdir,
)
logging.info("[UniDock Pipeline] Start docking")
runner.docking(
Expand All @@ -249,6 +297,7 @@ def main(args: dict):
batch_size=int(args["batch_size"]),
score_only=bool(args["score_only"]),
local_only=bool(args["local_only"]),
multi_bias=bool(args["multi_bias"]),
score_name="docking_score",
docking_dir_name="docking_dir",
topn=int(args["topn"]),
Expand All @@ -268,6 +317,12 @@ def get_parser() -> argparse.ArgumentParser:
help="Ligand file in sdf format. Specify multiple files separated by commas.")
parser.add_argument("-i", "--ligand_index", type=str, default=None,
help="A text file containing the path of ligand files in sdf format.")
parser.add_argument("-b", "--bias_file", type=str, default=None,
help="Bias file in bpf format. Default: None.")
parser.add_argument("-mbf", "--multi_bias_file", type=lambda s: s.split(','), default=None,
help="multi Bias file in bpf format separated by commas. Number should be equal to ligands. Default: None.")
parser.add_argument("-mbi", "--multi_bias_index", type=str, default=None,
help="A text file containing the path of multi bias files in bpf format. Number should be equal to ligands. Default: None.")

parser.add_argument("-cx", "--center_x", type=float, required=True,
help="X-coordinate of the docking box center.")
Expand Down Expand Up @@ -317,6 +372,8 @@ def get_parser() -> argparse.ArgumentParser:
help="Whether to use score_only mode.")
parser.add_argument("--local_only", action="store_true",
help="Whether to use local_only mode.")
parser.add_argument("--multi_bias", action="store_true",
help="Whether to use multi_bias mode.")

parser.add_argument("--seed", type=int, default=181129,
help="Uni-Dock random seed")
Expand Down
14 changes: 11 additions & 3 deletions unidock_tools/src/unidock_tools/modules/docking/unidock.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,11 @@ def __init__(self,
max_step: int = 10,
energy_range: float = 3.0,
refine_step: int = 5,
bias_file: Optional[Path] = None,
seed : int = 181129,
score_only: bool = False,
local_only: bool = False
local_only: bool = False,
multi_bias: bool = False,
):
self.mgltools_python_path = ""
self.prepare_gpf4_script_path = ""
Expand Down Expand Up @@ -86,10 +88,14 @@ def __init__(self,
"--verbosity", "2",
"--keep_nonpolar_H",
]
if bias_file:
cmd += ["--bias", str(bias_file)]
if score_only:
cmd.append("--score_only")
if local_only:
cmd.append("--local_only")
if multi_bias:
cmd.append("--multi_bias")

logging.info(f"unidock cmd: {' '.join(cmd)}")
self.cmd = cmd
Expand Down Expand Up @@ -263,9 +269,11 @@ def run_unidock(
max_step: int = 10,
energy_range: float = 3.0,
refine_step: int = 5,
bias_file: Optional[Path] = None,
seed: int = 181129,
score_only: bool = False,
local_only: bool = False,
multi_bias: bool = False,
) -> Tuple[List[Path], List[List[float]]]:
runner = UniDockRunner(
receptor=receptor, ligands=ligands,
Expand All @@ -275,8 +283,8 @@ def run_unidock(
scoring=scoring, num_modes=num_modes,
search_mode=search_mode,
exhaustiveness=exhaustiveness, max_step=max_step,
energy_range=energy_range, refine_step=refine_step, seed=seed,
score_only=score_only, local_only=local_only,
energy_range=energy_range, refine_step=refine_step, seed=seed, bias_file=bias_file,
score_only=score_only, local_only=local_only, multi_bias=multi_bias,
)
result_ligands = runner.run()
scores_list = [UniDockRunner.read_scores(ligand) for ligand in result_ligands]
Expand Down
21 changes: 21 additions & 0 deletions unidock_tools/tests/applications/test_unidock.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,14 @@ def pocket():
def get_docking_args(testset_name):
receptor = os.path.join(testset_dir_path, testset_name, "protein.pdb")
ligand = os.path.join(testset_dir_path, testset_name, "ligand.sdf")
multi_bias_file = os.path.join(testset_dir_path, testset_name, "ligand.bpf")
with open(os.path.join(testset_dir_path, testset_name, "docking_grid.json")) as f:
pocket = json.load(f)
testset_info = {
"receptor": receptor,
"ligand": ligand,
"pocket": pocket,
"multi_bias_file": multi_bias_file if os.path.exists(multi_bias_file) else None
}
return testset_info

Expand Down Expand Up @@ -119,6 +121,25 @@ def test_unidock_pipeline_multi_pose(receptor, ligand, pocket):
assert -20 <= score <= 0, f"Uni-Dock score not in range: {score}"
shutil.rmtree(results_dir, ignore_errors=True)

def test_unidock_pipeline_multi_bias():
testset_name = "1gkc"
testset_info = get_docking_args(testset_name)
receptor, ligand, pocket, multi_bias_file = testset_info["receptor"], testset_info["ligand"], testset_info["pocket"], testset_info["multi_bias_file"]
results_dir = "unidock_results_multi_bias"
cmd = f"unidocktools unidock_pipeline -r {receptor} -l {ligand} -mbf {multi_bias_file} -sd {results_dir} \
-cx {pocket['center_x']} -cy {pocket['center_y']} -cz {pocket['center_z']} \
-sx {pocket['size_x']} -sy {pocket['size_y']} -sz {pocket['size_z']} \
-sf vina -nm 4 --seed 181129 --multi_bias --debug"
print(cmd)
resp = subprocess.run(cmd, shell=True, capture_output=True, encoding="utf-8")
print(resp.stdout)
assert resp.returncode == 0, f"run unidock pipeline app err:\n{resp.stderr}"

result_file = os.path.join(results_dir, Path(ligand).name)
assert os.path.exists(result_file), f"docking result file not found"

score_list = read_scores(result_file, "docking_score")
shutil.rmtree(results_dir, ignore_errors=True)

@pytest.mark.parametrize("testset_name", testset_name_list)
def test_unidock_pipeline_default_arg(testset_name):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"center_x": 40.2,
"center_y": 14.41,
"center_z": 141.58,
"size_x": 16.51,
"size_y": 16.17,
"size_z": 18.82
}
23 changes: 23 additions & 0 deletions unidock_tools/tests/inputs/unidock_pipeline/1gkc/ligand.bpf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
x y z Vset r type atom
40.812 14.326 141.745 -1.00 1.49 map C
41.195 15.202 145.465 -1.00 1.41 map N
41.790 14.064 141.049 -1.00 1.40 map OA
39.236 13.318 139.119 -1.00 1.49 map C
39.553 13.856 141.464 -1.00 1.41 map N
38.239 13.830 138.621 -1.00 1.40 map OA
40.410 13.158 138.399 -1.00 1.41 map N
40.934 15.242 142.960 -1.00 1.49 map C
42.023 16.325 142.803 -1.00 1.49 map C
42.041 17.061 141.448 -1.00 1.49 map C
42.352 15.594 146.097 -1.00 1.49 map C
42.472 15.765 147.306 -1.00 1.40 map OA
41.197 14.416 144.234 -1.00 1.49 map C
40.126 14.875 146.326 -1.00 1.40 map OA
39.301 12.719 140.557 -1.00 1.49 map C
38.068 11.859 140.989 -1.00 1.49 map C
41.056 18.229 141.397 -1.00 1.49 map C
43.458 17.566 141.156 -1.00 1.49 map C
36.930 12.732 141.550 -1.00 1.49 map C
37.533 11.023 139.806 -1.00 1.49 map C
38.478 10.844 142.081 -1.00 1.49 map C
40.378 12.708 137.032 -1.00 1.49 map C
107 changes: 107 additions & 0 deletions unidock_tools/tests/inputs/unidock_pipeline/1gkc/ligand.sdf
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
1GKC ligand
3D

51 50 0 0 1 0 999 V2000
41.0934 14.0383 141.8702 C 0 0 0 0 0 0
41.8346 15.1186 145.4892 N 0 0 0 0 0 0
42.0373 13.3340 141.5168 O 0 0 0 0 0 0
39.7692 13.4251 138.9333 C 0 0 0 0 0 0
39.8590 13.9070 141.3537 N 0 0 0 0 0 0
39.3556 14.5376 138.6133 O 0 0 0 0 0 0
40.4448 12.6120 138.1086 N 0 0 0 0 0 0
41.3173 15.0785 142.9866 C 0 0 2 0 0 0
42.4695 16.0680 142.6365 C 0 0 0 0 0 0
42.3216 16.8916 141.3317 C 0 0 0 0 0 0
43.0333 15.4532 145.9995 C 0 0 0 0 0 0
43.2104 16.1573 146.9924 O 0 0 0 0 0 0
41.6055 14.3010 144.2999 C 0 0 0 0 0 0
40.7153 15.7294 146.0614 O 0 0 0 0 0 0
39.4946 12.8863 140.3562 C 0 0 1 0 0 0
38.0521 12.2924 140.5705 C 0 0 0 0 0 0
41.0356 17.7322 141.3046 C 0 0 0 0 0 0
43.5543 17.7823 141.1169 C 0 0 0 0 0 0
37.7476 11.1469 139.5698 C 0 0 0 0 0 0
37.9569 11.6901 141.9975 C 0 0 0 0 0 0
36.9428 13.3679 140.4439 C 0 0 0 0 0 0
40.7560 12.9302 136.7236 C 0 0 0 0 0 0
39.1127 14.5146 141.6644 H 0 0 0 0 0 0
40.7769 11.7200 138.4563 H 0 0 0 0 0 0
40.4097 15.6628 143.1381 H 0 0 0 0 0 0
43.4128 15.5186 142.6005 H 0 0 0 0 0 0
42.5814 16.7779 143.4562 H 0 0 0 0 0 0
42.2798 16.2014 140.4884 H 0 0 0 0 0 0
43.8506 15.0170 145.4066 H 0 0 0 0 0 0
40.7747 13.6269 144.5204 H 0 0 0 0 0 0
42.4791 13.6614 144.1562 H 0 0 0 0 0 0
40.3227 15.0122 146.5719 H 0 0 0 0 0 0
40.1732 12.0442 140.4999 H 0 0 0 0 0 0
41.0187 18.4113 140.4528 H 0 0 0 0 0 0
40.1600 17.0922 141.2201 H 0 0 0 0 0 0
40.9243 18.3345 142.2044 H 0 0 0 0 0 0
43.4856 18.3455 140.1865 H 0 0 0 0 0 0
43.6791 18.4984 141.9281 H 0 0 0 0 0 0
44.4572 17.1778 141.0688 H 0 0 0 0 0 0
36.7529 10.7289 139.7309 H 0 0 0 0 0 0
37.7777 11.4837 138.5331 H 0 0 0 0 0 0
38.4574 10.3266 139.6757 H 0 0 0 0 0 0
36.9834 11.2303 142.1738 H 0 0 0 0 0 0
38.7088 10.9155 142.1577 H 0 0 0 0 0 0
38.0889 12.4456 142.7733 H 0 0 0 0 0 0
35.9519 12.9339 140.5834 H 0 0 0 0 0 0
37.0478 14.1488 141.1956 H 0 0 0 0 0 0
36.9375 13.8428 139.4628 H 0 0 0 0 0 0
40.9353 12.0097 136.1681 H 0 0 0 0 0 0
39.9408 13.4669 136.2363 H 0 0 0 0 0 0
41.6573 13.5389 136.6646 H 0 0 0 0 0 0
1 3 2 0 0 0
1 5 1 0 0 0
1 8 1 0 0 0
2 11 1 0 0 0
2 13 1 0 0 0
2 14 1 0 0 0
4 6 2 0 0 0
4 7 1 0 0 0
4 15 1 0 0 0
5 15 1 0 0 0
5 23 1 0 0 0
7 22 1 0 0 0
7 24 1 0 0 0
8 9 1 0 0 0
8 13 1 0 0 0
8 25 1 0 0 0
9 10 1 0 0 0
9 26 1 0 0 0
9 27 1 0 0 0
10 17 1 0 0 0
10 18 1 0 0 0
10 28 1 0 0 0
11 12 2 0 0 0
11 29 1 0 0 0
13 30 1 0 0 0
13 31 1 0 0 0
14 32 1 0 0 0
15 16 1 0 0 0
15 33 1 0 0 0
16 19 1 0 0 0
16 20 1 0 0 0
16 21 1 0 0 0
17 34 1 0 0 0
17 35 1 0 0 0
17 36 1 0 0 0
18 37 1 0 0 0
18 38 1 0 0 0
18 39 1 0 0 0
19 40 1 0 0 0
19 41 1 0 0 0
19 42 1 0 0 0
20 43 1 0 0 0
20 44 1 0 0 0
20 45 1 0 0 0
21 46 1 0 0 0
21 47 1 0 0 0
21 48 1 0 0 0
22 49 1 0 0 0
22 50 1 0 0 0
22 51 1 0 0 0
M END
$$$$
Loading