Skip to content

Commit

Permalink
Merge pull request #52 from xchem/CpKRS
Browse files Browse the repository at this point in the history
Cp krs
  • Loading branch information
ConorFWild authored Dec 12, 2024
2 parents 79cd22e + cbd8bb6 commit b03a3cf
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 29 deletions.
62 changes: 44 additions & 18 deletions src/ligand_neighbourhood_alignment/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ def _get_structures(datasets):

return structures


def _get_dataset_protein_chains(structure):
protein_chains = []
for model in structure:
Expand All @@ -459,6 +460,7 @@ def _get_dataset_protein_chains(structure):

return protein_chains


def _get_closest_xtalform(xtalforms: dict[str, dt.XtalForm], structure, structures):
structure_spacegroup = structure.spacegroup_hm
structure_cell = structure.cell
Expand All @@ -475,7 +477,8 @@ def _get_closest_xtalform(xtalforms: dict[str, dt.XtalForm], structure, structur
continue

# Check they have the same protein chains
xtalform_protein_chains = [_chain for xtalform_assembly in xtalform.assemblies.values() for _chain in xtalform_assembly.chains]
xtalform_protein_chains = [_chain for xtalform_assembly in xtalform.assemblies.values() for _chain in
xtalform_assembly.chains]
dataset_protein_chains = _get_dataset_protein_chains(structure)
print(f'Xtalform chains: {set(xtalform_protein_chains)}')
print(f'Dataseet chains: {set(dataset_protein_chains)}')
Expand Down Expand Up @@ -524,7 +527,8 @@ def _assign_dataset(dataset, assemblies, xtalforms, structure, structures):
logger.info(f"Deltas to closest unit cell are: {deltas}")
logger.info(f"Structure path is: {dataset.pdb}")

raise Exception(f"No reference for dataset: {dataset.dtag}\nDeltas to closest unit cell in {closest_xtalform_id} are: {deltas}\nStructure path is: {dataset.pdb}")
raise Exception(
f"No reference for dataset: {dataset.dtag}\nDeltas to closest unit cell in {closest_xtalform_id} are: {deltas}\nStructure path is: {dataset.pdb}")

return closest_xtalform_id

Expand All @@ -534,7 +538,7 @@ def _save_assignments(fs_model: dt.FSModel, dataset_assignments: dict[str, str])
yaml.safe_dump(dataset_assignments, f)


def _generate_assembly(xtalform: dt.XtalForm, structure, assemblies: dict[str, dt.Assembly], pdb):
def _generate_assembly(xtalform: dt.XtalForm, structure, assemblies: dict[str, dt.Assembly], pdb, dataset):
full_st = structure.clone()
chains_to_delete = []
for model in full_st:
Expand All @@ -544,6 +548,7 @@ def _generate_assembly(xtalform: dt.XtalForm, structure, assemblies: dict[str, d
for model_name, chain_name in chains_to_delete:
del full_st[model_name][chain_name]

cloned_chains = []
for xtalform_assembly_id, xtalform_assembly in xtalform.assemblies.items():
assembly = assemblies[xtalform_assembly.assembly]
# chains = xtalform_assembly.chains
Expand All @@ -553,6 +558,7 @@ def _generate_assembly(xtalform: dt.XtalForm, structure, assemblies: dict[str, d
xtalform_assembly.chains,
xtalform_assembly.transforms,
):
cloned_chains.append(_chain)

# for generator in assembly.generators:
# op = gemmi.Op(generator.triplet)
Expand All @@ -579,6 +585,22 @@ def _generate_assembly(xtalform: dt.XtalForm, structure, assemblies: dict[str, d
chain_clone.name = f"{_chain}~{_biogen.biomol}~{_transform}"
full_st[0].add_chain(chain_clone)

# Catch any ligand only chains
for lbe in dataset.ligand_binding_events:
_chain = lbe[1]
if _chain not in cloned_chains:
op = gemmi.Op("x,y,z")
chain_clone = structure[0][_chain].clone()
for residue in chain_clone:
for atom in residue:
atom_frac = structure.cell.fractionalize(atom.pos)
new_pos_frac = op.apply_to_xyz([atom_frac.x, atom_frac.y, atom_frac.z])
new_pos_orth = structure.cell.orthogonalize(gemmi.Fractional(*new_pos_frac))
atom.pos = gemmi.Position(*new_pos_orth)
chain_clone.name = f"{_chain}~{_chain}~x,y,z"
full_st[0].add_chain(chain_clone)
cloned_chains.append(_chain)

chains = []
num_chains = 0
for model in full_st:
Expand Down Expand Up @@ -632,7 +654,7 @@ def _get_dataset_neighbourhoods(
logger.debug(f"{structure.cell}")

# Get the rest of the assembly
assembly = _generate_assembly(xtalform, structure, assemblies, dataset.pdb)
assembly = _generate_assembly(xtalform, structure, assemblies, dataset.pdb, dataset)

# Get the bound fragments
fragments: dict[tuple[str, str, str, str], gemmi.Residue] = _get_structure_fragments(dataset, assembly, version)
Expand Down Expand Up @@ -967,16 +989,17 @@ def _crystalform_incremental_cluster(
}

# While observations remain to be assigned
assignments = {xtalform_site_id: [_x for _x in xtalform_sites[xtalform_site_id].members] for xtalform_site_id in xtalform_sites}
assigned_observations = [observation_id for xtalform_site_id in assignments for observation_id in assignments[xtalform_site_id]]
assignments = {xtalform_site_id: [_x for _x in xtalform_sites[xtalform_site_id].members] for xtalform_site_id in
xtalform_sites}
assigned_observations = [observation_id for xtalform_site_id in assignments for observation_id in
assignments[xtalform_site_id]]
observations_to_assign = [observation_id for observation_id in centroid_ca_positions]

while len(observations_to_assign) > 0:
for observation_id in observations_to_assign:
# Assign observations near current xtalform sites
for xtalform_site_id, xtalform_site_pos in centre_residues_positions.items():
if _get_dist(centroid_ca_positions[observation_id], xtalform_site_pos) < cutoff:

assignments[xtalform_site_id].append(observation_id)
assigned_observations.append(observation_id)
break
Expand Down Expand Up @@ -1049,12 +1072,12 @@ def _update_xtalform_sites(
for xtalform_name in crystalform_observations
}


# Spatially cluster
crystalform_observation_cluster_assignments = {
xtalform_name: _crystalform_incremental_cluster(
crystalform_observation_centroids[xtalform_name],
{xid: xs for xid, xs in xtalform_sites.items() if (xs.xtalform_id == xtalform_name) & (xs.canonical_site_id == canonical_site_id)},
{xid: xs for xid, xs in xtalform_sites.items() if
(xs.xtalform_id == xtalform_name) & (xs.canonical_site_id == canonical_site_id)},
neighbourhoods
)
for xtalform_name in crystalform_observations
Expand Down Expand Up @@ -1360,7 +1383,6 @@ def _update(
# Get the structures
structures: dict = _get_structures(datasets)


# Get the assembly alignment hierarchy
hierarchy, biochain_priorities = alignment_heirarchy._derive_alignment_heirarchy(assemblies)
alignment_heirarchy.save_yaml(fs_model.hierarchy, hierarchy, lambda x: x)
Expand Down Expand Up @@ -1533,7 +1555,7 @@ def _update(
# Check if residues match as usual, otherwise create a new canon site for it
debug = False
if canonical_site_id == 'LYSRSCPZ-x0426+A+805+1':
debug=True
debug = True
_update_xtalform_sites(
xtalform_sites,
canonical_site,
Expand Down Expand Up @@ -1623,7 +1645,8 @@ def _update(
canonical_site_id,
aligned_structure_path,
) in ligand_neighbourhood_output.aligned_structures.items():
if not ( (fs_model.source_dir.parent / aligned_structure_path).exists() | Path(aligned_structure_path).exists()):
if not ((fs_model.source_dir.parent / aligned_structure_path).exists() | Path(
aligned_structure_path).exists()):
# _update_aligned_structures()
_structure = structures[dtag].clone()
canonical_site = canonical_sites[canonical_site_id]
Expand Down Expand Up @@ -1652,9 +1675,9 @@ def _update(
_xtalform_id = _xtalform_site.xtalform_id
_xtalform_canonical_site_id = _xtalform_site.canonical_site_id
if (_xtalform_id == site_reference_ligand_xtalform_id) \
& (_xtalform_canonical_site_id == canonical_site_id) \
& (site_reference_ligand_id in _xtalform_site.members):
xtalform_site = _xtalform_site
& (_xtalform_canonical_site_id == canonical_site_id) \
& (site_reference_ligand_id in _xtalform_site.members):
xtalform_site = _xtalform_site
site_chain = xtalform_site.crystallographic_chain

# Aligns to conformer site, then to the corresponding assembly, then from that assembly to
Expand Down Expand Up @@ -1685,15 +1708,17 @@ def _update(
)
],
assembly_transform=assembly_transforms[
xtalforms[dataset_assignments[conformer_site.reference_ligand_id[0]]].assemblies[
xtalforms[
dataset_assignments[conformer_site.reference_ligand_id[0]]].assemblies[
alignment_heirarchy._chain_to_xtalform_assembly(
# conformer_site.reference_ligand_id[1],
site_chain,
xtalforms[dataset_assignments[conformer_site.reference_ligand_id[0]]]
)
].assembly
],
)
xtalform_sites=xtalform_sites
)

except:
logger.info(f"Failed to generate aligned structure {aligned_structure_path}")
Expand All @@ -1710,7 +1735,8 @@ def _update(
for canonical_site_id, alignment_info in dataset_alignment_info.items():
aligned_structure_path = alignment_info["aligned_structures"]
logger.info(f"Outputting reference structure: {aligned_structure_path}")
if not ( (fs_model.source_dir.parent / aligned_structure_path).exists() | Path(aligned_structure_path).exists()):
if not ((fs_model.source_dir.parent / aligned_structure_path).exists() | Path(
aligned_structure_path).exists()):
_structure = structures[dtag].clone()
_align_reference_structure(
_structure,
Expand Down
13 changes: 11 additions & 2 deletions src/ligand_neighbourhood_alignment/generate_aligned_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ def _drop_non_binding_chains_and_symmetrize_waters(
moving_ligand_id,
dataset_ligand_neighbourhood_ids,
xtalform,
xtalform_sites
):
# Other Ligand IDs
other_ligand_ids = [(lid[1], lid[2]) for lid in dataset_ligand_neighbourhood_ids if not ((lid[1] == moving_ligand_id[1]) & (lid[2] == moving_ligand_id[2]))]
Expand All @@ -299,7 +300,12 @@ def _drop_non_binding_chains_and_symmetrize_waters(
}

# Get the assembly the ligand is modelled as part of
lig_assembly = chain_assemblies[moving_ligand_id[1]]
for xsid, _xtalform_site in xtalform_sites.items():
_xtalform_id = _xtalform_site.xtalform_id
if moving_ligand_id in _xtalform_site.members:
xtalform_site = _xtalform_site
site_chain = xtalform_site.crystallographic_chain
lig_assembly = chain_assemblies[site_chain]

# Determine which waters are bound near the ligand, and at what positions
ns = gemmi.NeighborSearch(new_structure[0], new_structure.cell, 10).populate(include_h=False)
Expand Down Expand Up @@ -451,6 +457,7 @@ def _align_structure(
site_reference_xform,
chain_to_assembly_transform,
assembly_transform,
xtalform_sites
):
shortest_path: list[tuple[str, str, str]] = nx.shortest_path(g, moving_ligand_id, reference_ligand_id)
logger.debug(f"Shortest path: {shortest_path}")
Expand All @@ -466,7 +473,9 @@ def _align_structure(
neighbourhood,
moving_ligand_id,
dataset_ligand_neighbourhood_ids,
xtalform)
xtalform,
xtalform_sites
)

previous_ligand_id = moving_ligand_id
running_transform = gemmi.Transform()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -326,15 +326,15 @@ def _update_reference_structure_transforms(
xtalform_sites,
canonical_site_id
):
# Get the biochain of the canonical site
# # Get the biochain of the canonical site
site_reference_ligand_id = conformer_sites[canonical_site.reference_conformer_site_id].reference_ligand_id
site_reference_ligand_xtalform_id = dataset_assignments[site_reference_ligand_id[0]]
site_reference_ligand_xtalform = xtalforms[site_reference_ligand_xtalform_id]
for xsid, _xtalform_site in xtalform_sites.items():
_xtalform_id = _xtalform_site.xtalform_id
if _xtalform_id == site_reference_ligand_xtalform_id:
_xtalform_canonical_site_id = _xtalform_site.canonical_site_id
if _xtalform_canonical_site_id == canonical_site_id:
if (_xtalform_id == site_reference_ligand_xtalform_id) &\
(_xtalform_site.canonical_site_id == canonical_site_id) & \
(site_reference_ligand_id in _xtalform_site.members):
xtalform_site = _xtalform_site
site_chain = xtalform_site.crystallographic_chain
canonical_site_biochain = alignment_heirarchy._chain_to_biochain(
Expand All @@ -343,6 +343,7 @@ def _update_reference_structure_transforms(
assemblies
)


# Determine whether the biochain is shared, and if not skip
reference_structure = structures[key[0]]
reference_structure_xtalform = xtalforms[dataset_assignments[key[0]]]
Expand All @@ -354,10 +355,21 @@ def _update_reference_structure_transforms(
}
reference_structure_biochains_inv = {v: k for k, v in reference_structure_biochains.items()}

if canonical_site_biochain not in reference_structure_biochains.values():
return None

# Align the reference to the biochain reference using the canonical site residues
# # Get the reference residues whose image is the canonical site biochain aligned residues
# alignment_residues_ref_st = []
# alignment_residues_mov_st = []
# for rid in canonical_site.residues:
# chain, res = rid[0], rid[1]
# biochain = alignment_heirarchy._chain_to_biochain(
# chain,
# site_reference_ligand_xtalform,
# assemblies
# )
# reference_structure_chain = reference_structure_biochains_inv[biochain]
# alignment_residues_ref_st.append((reference_structure_chain, res))
# alignment_residues_mov_st.append((reference_structure_chain, res))

# # Align the reference to the biochain reference using the canonical site residues
alignment_residues_ref_st = []
alignment_residues_mov_st = []
core_chain = reference_structure_biochains_inv[canonical_site_biochain]
Expand All @@ -373,13 +385,14 @@ def _update_reference_structure_transforms(
continue
alignment_residues_ref_st.append((chain, res))
alignment_residues_mov_st.append((core_chain, res))

transform = _get_transform_from_residues(
alignment_residues_ref_st,
structures[site_reference_ligand_id[0]],
reference_structure,
other_rs=alignment_residues_mov_st,
)


reference_structure_transforms[key] = dt.Transform(
transform.vec.tolist(),
transform.mat.tolist(),
Expand Down

0 comments on commit b03a3cf

Please sign in to comment.