diff --git a/src/ligand_neighbourhood_alignment/cli.py b/src/ligand_neighbourhood_alignment/cli.py index 4d4db428..189e2a3d 100644 --- a/src/ligand_neighbourhood_alignment/cli.py +++ b/src/ligand_neighbourhood_alignment/cli.py @@ -446,6 +446,7 @@ def _get_structures(datasets): return structures + def _get_dataset_protein_chains(structure): protein_chains = [] for model in structure: @@ -459,6 +460,7 @@ def _get_dataset_protein_chains(structure): return protein_chains + def _get_closest_xtalform(xtalforms: dict[str, dt.XtalForm], structure, structures): structure_spacegroup = structure.spacegroup_hm structure_cell = structure.cell @@ -475,7 +477,8 @@ def _get_closest_xtalform(xtalforms: dict[str, dt.XtalForm], structure, structur continue # Check they have the same protein chains - xtalform_protein_chains = [_chain for xtalform_assembly in xtalform.assemblies.values() for _chain in xtalform_assembly.chains] + xtalform_protein_chains = [_chain for xtalform_assembly in xtalform.assemblies.values() for _chain in + xtalform_assembly.chains] dataset_protein_chains = _get_dataset_protein_chains(structure) print(f'Xtalform chains: {set(xtalform_protein_chains)}') print(f'Dataseet chains: {set(dataset_protein_chains)}') @@ -524,7 +527,8 @@ def _assign_dataset(dataset, assemblies, xtalforms, structure, structures): logger.info(f"Deltas to closest unit cell are: {deltas}") logger.info(f"Structure path is: {dataset.pdb}") - raise Exception(f"No reference for dataset: {dataset.dtag}\nDeltas to closest unit cell in {closest_xtalform_id} are: {deltas}\nStructure path is: {dataset.pdb}") + raise Exception( + f"No reference for dataset: {dataset.dtag}\nDeltas to closest unit cell in {closest_xtalform_id} are: {deltas}\nStructure path is: {dataset.pdb}") return closest_xtalform_id @@ -534,7 +538,7 @@ def _save_assignments(fs_model: dt.FSModel, dataset_assignments: dict[str, str]) yaml.safe_dump(dataset_assignments, f) -def _generate_assembly(xtalform: dt.XtalForm, structure, assemblies: dict[str, dt.Assembly], pdb): +def _generate_assembly(xtalform: dt.XtalForm, structure, assemblies: dict[str, dt.Assembly], pdb, dataset): full_st = structure.clone() chains_to_delete = [] for model in full_st: @@ -544,6 +548,7 @@ def _generate_assembly(xtalform: dt.XtalForm, structure, assemblies: dict[str, d for model_name, chain_name in chains_to_delete: del full_st[model_name][chain_name] + cloned_chains = [] for xtalform_assembly_id, xtalform_assembly in xtalform.assemblies.items(): assembly = assemblies[xtalform_assembly.assembly] # chains = xtalform_assembly.chains @@ -553,6 +558,7 @@ def _generate_assembly(xtalform: dt.XtalForm, structure, assemblies: dict[str, d xtalform_assembly.chains, xtalform_assembly.transforms, ): + cloned_chains.append(_chain) # for generator in assembly.generators: # op = gemmi.Op(generator.triplet) @@ -579,6 +585,22 @@ def _generate_assembly(xtalform: dt.XtalForm, structure, assemblies: dict[str, d chain_clone.name = f"{_chain}~{_biogen.biomol}~{_transform}" full_st[0].add_chain(chain_clone) + # Catch any ligand only chains + for lbe in dataset.ligand_binding_events: + _chain = lbe[1] + if _chain not in cloned_chains: + op = gemmi.Op("x,y,z") + chain_clone = structure[0][_chain].clone() + for residue in chain_clone: + for atom in residue: + atom_frac = structure.cell.fractionalize(atom.pos) + new_pos_frac = op.apply_to_xyz([atom_frac.x, atom_frac.y, atom_frac.z]) + new_pos_orth = structure.cell.orthogonalize(gemmi.Fractional(*new_pos_frac)) + atom.pos = gemmi.Position(*new_pos_orth) + chain_clone.name = f"{_chain}~{_chain}~x,y,z" + full_st[0].add_chain(chain_clone) + cloned_chains.append(_chain) + chains = [] num_chains = 0 for model in full_st: @@ -632,7 +654,7 @@ def _get_dataset_neighbourhoods( logger.debug(f"{structure.cell}") # Get the rest of the assembly - assembly = _generate_assembly(xtalform, structure, assemblies, dataset.pdb) + assembly = _generate_assembly(xtalform, structure, assemblies, dataset.pdb, dataset) # Get the bound fragments fragments: dict[tuple[str, str, str, str], gemmi.Residue] = _get_structure_fragments(dataset, assembly, version) @@ -967,8 +989,10 @@ def _crystalform_incremental_cluster( } # While observations remain to be assigned - assignments = {xtalform_site_id: [_x for _x in xtalform_sites[xtalform_site_id].members] for xtalform_site_id in xtalform_sites} - assigned_observations = [observation_id for xtalform_site_id in assignments for observation_id in assignments[xtalform_site_id]] + assignments = {xtalform_site_id: [_x for _x in xtalform_sites[xtalform_site_id].members] for xtalform_site_id in + xtalform_sites} + assigned_observations = [observation_id for xtalform_site_id in assignments for observation_id in + assignments[xtalform_site_id]] observations_to_assign = [observation_id for observation_id in centroid_ca_positions] while len(observations_to_assign) > 0: @@ -976,7 +1000,6 @@ def _crystalform_incremental_cluster( # Assign observations near current xtalform sites for xtalform_site_id, xtalform_site_pos in centre_residues_positions.items(): if _get_dist(centroid_ca_positions[observation_id], xtalform_site_pos) < cutoff: - assignments[xtalform_site_id].append(observation_id) assigned_observations.append(observation_id) break @@ -1049,12 +1072,12 @@ def _update_xtalform_sites( for xtalform_name in crystalform_observations } - # Spatially cluster crystalform_observation_cluster_assignments = { xtalform_name: _crystalform_incremental_cluster( crystalform_observation_centroids[xtalform_name], - {xid: xs for xid, xs in xtalform_sites.items() if (xs.xtalform_id == xtalform_name) & (xs.canonical_site_id == canonical_site_id)}, + {xid: xs for xid, xs in xtalform_sites.items() if + (xs.xtalform_id == xtalform_name) & (xs.canonical_site_id == canonical_site_id)}, neighbourhoods ) for xtalform_name in crystalform_observations @@ -1360,7 +1383,6 @@ def _update( # Get the structures structures: dict = _get_structures(datasets) - # Get the assembly alignment hierarchy hierarchy, biochain_priorities = alignment_heirarchy._derive_alignment_heirarchy(assemblies) alignment_heirarchy.save_yaml(fs_model.hierarchy, hierarchy, lambda x: x) @@ -1533,7 +1555,7 @@ def _update( # Check if residues match as usual, otherwise create a new canon site for it debug = False if canonical_site_id == 'LYSRSCPZ-x0426+A+805+1': - debug=True + debug = True _update_xtalform_sites( xtalform_sites, canonical_site, @@ -1623,7 +1645,8 @@ def _update( canonical_site_id, aligned_structure_path, ) in ligand_neighbourhood_output.aligned_structures.items(): - if not ( (fs_model.source_dir.parent / aligned_structure_path).exists() | Path(aligned_structure_path).exists()): + if not ((fs_model.source_dir.parent / aligned_structure_path).exists() | Path( + aligned_structure_path).exists()): # _update_aligned_structures() _structure = structures[dtag].clone() canonical_site = canonical_sites[canonical_site_id] @@ -1652,9 +1675,9 @@ def _update( _xtalform_id = _xtalform_site.xtalform_id _xtalform_canonical_site_id = _xtalform_site.canonical_site_id if (_xtalform_id == site_reference_ligand_xtalform_id) \ - & (_xtalform_canonical_site_id == canonical_site_id) \ - & (site_reference_ligand_id in _xtalform_site.members): - xtalform_site = _xtalform_site + & (_xtalform_canonical_site_id == canonical_site_id) \ + & (site_reference_ligand_id in _xtalform_site.members): + xtalform_site = _xtalform_site site_chain = xtalform_site.crystallographic_chain # Aligns to conformer site, then to the corresponding assembly, then from that assembly to @@ -1685,7 +1708,8 @@ def _update( ) ], assembly_transform=assembly_transforms[ - xtalforms[dataset_assignments[conformer_site.reference_ligand_id[0]]].assemblies[ + xtalforms[ + dataset_assignments[conformer_site.reference_ligand_id[0]]].assemblies[ alignment_heirarchy._chain_to_xtalform_assembly( # conformer_site.reference_ligand_id[1], site_chain, @@ -1693,7 +1717,8 @@ def _update( ) ].assembly ], - ) + xtalform_sites=xtalform_sites + ) except: logger.info(f"Failed to generate aligned structure {aligned_structure_path}") @@ -1710,7 +1735,8 @@ def _update( for canonical_site_id, alignment_info in dataset_alignment_info.items(): aligned_structure_path = alignment_info["aligned_structures"] logger.info(f"Outputting reference structure: {aligned_structure_path}") - if not ( (fs_model.source_dir.parent / aligned_structure_path).exists() | Path(aligned_structure_path).exists()): + if not ((fs_model.source_dir.parent / aligned_structure_path).exists() | Path( + aligned_structure_path).exists()): _structure = structures[dtag].clone() _align_reference_structure( _structure, diff --git a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py index 371c6c6a..16d8d275 100644 --- a/src/ligand_neighbourhood_alignment/generate_aligned_structures.py +++ b/src/ligand_neighbourhood_alignment/generate_aligned_structures.py @@ -282,6 +282,7 @@ def _drop_non_binding_chains_and_symmetrize_waters( moving_ligand_id, dataset_ligand_neighbourhood_ids, xtalform, + xtalform_sites ): # Other Ligand IDs other_ligand_ids = [(lid[1], lid[2]) for lid in dataset_ligand_neighbourhood_ids if not ((lid[1] == moving_ligand_id[1]) & (lid[2] == moving_ligand_id[2]))] @@ -299,7 +300,12 @@ def _drop_non_binding_chains_and_symmetrize_waters( } # Get the assembly the ligand is modelled as part of - lig_assembly = chain_assemblies[moving_ligand_id[1]] + for xsid, _xtalform_site in xtalform_sites.items(): + _xtalform_id = _xtalform_site.xtalform_id + if moving_ligand_id in _xtalform_site.members: + xtalform_site = _xtalform_site + site_chain = xtalform_site.crystallographic_chain + lig_assembly = chain_assemblies[site_chain] # Determine which waters are bound near the ligand, and at what positions ns = gemmi.NeighborSearch(new_structure[0], new_structure.cell, 10).populate(include_h=False) @@ -451,6 +457,7 @@ def _align_structure( site_reference_xform, chain_to_assembly_transform, assembly_transform, +xtalform_sites ): shortest_path: list[tuple[str, str, str]] = nx.shortest_path(g, moving_ligand_id, reference_ligand_id) logger.debug(f"Shortest path: {shortest_path}") @@ -466,7 +473,9 @@ def _align_structure( neighbourhood, moving_ligand_id, dataset_ligand_neighbourhood_ids, - xtalform) + xtalform, + xtalform_sites + ) previous_ligand_id = moving_ligand_id running_transform = gemmi.Transform() diff --git a/src/ligand_neighbourhood_alignment/generate_sites_from_components.py b/src/ligand_neighbourhood_alignment/generate_sites_from_components.py index f331f7d5..6994e355 100644 --- a/src/ligand_neighbourhood_alignment/generate_sites_from_components.py +++ b/src/ligand_neighbourhood_alignment/generate_sites_from_components.py @@ -326,15 +326,15 @@ def _update_reference_structure_transforms( xtalform_sites, canonical_site_id ): - # Get the biochain of the canonical site + # # Get the biochain of the canonical site site_reference_ligand_id = conformer_sites[canonical_site.reference_conformer_site_id].reference_ligand_id site_reference_ligand_xtalform_id = dataset_assignments[site_reference_ligand_id[0]] site_reference_ligand_xtalform = xtalforms[site_reference_ligand_xtalform_id] for xsid, _xtalform_site in xtalform_sites.items(): _xtalform_id = _xtalform_site.xtalform_id - if _xtalform_id == site_reference_ligand_xtalform_id: - _xtalform_canonical_site_id = _xtalform_site.canonical_site_id - if _xtalform_canonical_site_id == canonical_site_id: + if (_xtalform_id == site_reference_ligand_xtalform_id) &\ + (_xtalform_site.canonical_site_id == canonical_site_id) & \ + (site_reference_ligand_id in _xtalform_site.members): xtalform_site = _xtalform_site site_chain = xtalform_site.crystallographic_chain canonical_site_biochain = alignment_heirarchy._chain_to_biochain( @@ -343,6 +343,7 @@ def _update_reference_structure_transforms( assemblies ) + # Determine whether the biochain is shared, and if not skip reference_structure = structures[key[0]] reference_structure_xtalform = xtalforms[dataset_assignments[key[0]]] @@ -354,10 +355,21 @@ def _update_reference_structure_transforms( } reference_structure_biochains_inv = {v: k for k, v in reference_structure_biochains.items()} - if canonical_site_biochain not in reference_structure_biochains.values(): - return None - - # Align the reference to the biochain reference using the canonical site residues + # # Get the reference residues whose image is the canonical site biochain aligned residues + # alignment_residues_ref_st = [] + # alignment_residues_mov_st = [] + # for rid in canonical_site.residues: + # chain, res = rid[0], rid[1] + # biochain = alignment_heirarchy._chain_to_biochain( + # chain, + # site_reference_ligand_xtalform, + # assemblies + # ) + # reference_structure_chain = reference_structure_biochains_inv[biochain] + # alignment_residues_ref_st.append((reference_structure_chain, res)) + # alignment_residues_mov_st.append((reference_structure_chain, res)) + + # # Align the reference to the biochain reference using the canonical site residues alignment_residues_ref_st = [] alignment_residues_mov_st = [] core_chain = reference_structure_biochains_inv[canonical_site_biochain] @@ -373,13 +385,14 @@ def _update_reference_structure_transforms( continue alignment_residues_ref_st.append((chain, res)) alignment_residues_mov_st.append((core_chain, res)) - transform = _get_transform_from_residues( alignment_residues_ref_st, structures[site_reference_ligand_id[0]], reference_structure, other_rs=alignment_residues_mov_st, ) + + reference_structure_transforms[key] = dt.Transform( transform.vec.tolist(), transform.mat.tolist(),