Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cp krs #52

Merged
merged 8 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 44 additions & 18 deletions src/ligand_neighbourhood_alignment/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ def _get_structures(datasets):

return structures


def _get_dataset_protein_chains(structure):
protein_chains = []
for model in structure:
Expand All @@ -459,6 +460,7 @@ def _get_dataset_protein_chains(structure):

return protein_chains


def _get_closest_xtalform(xtalforms: dict[str, dt.XtalForm], structure, structures):
structure_spacegroup = structure.spacegroup_hm
structure_cell = structure.cell
Expand All @@ -475,7 +477,8 @@ def _get_closest_xtalform(xtalforms: dict[str, dt.XtalForm], structure, structur
continue

# Check they have the same protein chains
xtalform_protein_chains = [_chain for xtalform_assembly in xtalform.assemblies.values() for _chain in xtalform_assembly.chains]
xtalform_protein_chains = [_chain for xtalform_assembly in xtalform.assemblies.values() for _chain in
xtalform_assembly.chains]
dataset_protein_chains = _get_dataset_protein_chains(structure)
print(f'Xtalform chains: {set(xtalform_protein_chains)}')
print(f'Dataseet chains: {set(dataset_protein_chains)}')
Expand Down Expand Up @@ -524,7 +527,8 @@ def _assign_dataset(dataset, assemblies, xtalforms, structure, structures):
logger.info(f"Deltas to closest unit cell are: {deltas}")
logger.info(f"Structure path is: {dataset.pdb}")

raise Exception(f"No reference for dataset: {dataset.dtag}\nDeltas to closest unit cell in {closest_xtalform_id} are: {deltas}\nStructure path is: {dataset.pdb}")
raise Exception(
f"No reference for dataset: {dataset.dtag}\nDeltas to closest unit cell in {closest_xtalform_id} are: {deltas}\nStructure path is: {dataset.pdb}")

return closest_xtalform_id

Expand All @@ -534,7 +538,7 @@ def _save_assignments(fs_model: dt.FSModel, dataset_assignments: dict[str, str])
yaml.safe_dump(dataset_assignments, f)


def _generate_assembly(xtalform: dt.XtalForm, structure, assemblies: dict[str, dt.Assembly], pdb):
def _generate_assembly(xtalform: dt.XtalForm, structure, assemblies: dict[str, dt.Assembly], pdb, dataset):
full_st = structure.clone()
chains_to_delete = []
for model in full_st:
Expand All @@ -544,6 +548,7 @@ def _generate_assembly(xtalform: dt.XtalForm, structure, assemblies: dict[str, d
for model_name, chain_name in chains_to_delete:
del full_st[model_name][chain_name]

cloned_chains = []
for xtalform_assembly_id, xtalform_assembly in xtalform.assemblies.items():
assembly = assemblies[xtalform_assembly.assembly]
# chains = xtalform_assembly.chains
Expand All @@ -553,6 +558,7 @@ def _generate_assembly(xtalform: dt.XtalForm, structure, assemblies: dict[str, d
xtalform_assembly.chains,
xtalform_assembly.transforms,
):
cloned_chains.append(_chain)

# for generator in assembly.generators:
# op = gemmi.Op(generator.triplet)
Expand All @@ -579,6 +585,22 @@ def _generate_assembly(xtalform: dt.XtalForm, structure, assemblies: dict[str, d
chain_clone.name = f"{_chain}~{_biogen.biomol}~{_transform}"
full_st[0].add_chain(chain_clone)

# Catch any ligand only chains
for lbe in dataset.ligand_binding_events:
_chain = lbe[1]
if _chain not in cloned_chains:
op = gemmi.Op("x,y,z")
chain_clone = structure[0][_chain].clone()
for residue in chain_clone:
for atom in residue:
atom_frac = structure.cell.fractionalize(atom.pos)
new_pos_frac = op.apply_to_xyz([atom_frac.x, atom_frac.y, atom_frac.z])
new_pos_orth = structure.cell.orthogonalize(gemmi.Fractional(*new_pos_frac))
atom.pos = gemmi.Position(*new_pos_orth)
chain_clone.name = f"{_chain}~{_chain}~x,y,z"
full_st[0].add_chain(chain_clone)
cloned_chains.append(_chain)

chains = []
num_chains = 0
for model in full_st:
Expand Down Expand Up @@ -632,7 +654,7 @@ def _get_dataset_neighbourhoods(
logger.debug(f"{structure.cell}")

# Get the rest of the assembly
assembly = _generate_assembly(xtalform, structure, assemblies, dataset.pdb)
assembly = _generate_assembly(xtalform, structure, assemblies, dataset.pdb, dataset)

# Get the bound fragments
fragments: dict[tuple[str, str, str, str], gemmi.Residue] = _get_structure_fragments(dataset, assembly, version)
Expand Down Expand Up @@ -967,16 +989,17 @@ def _crystalform_incremental_cluster(
}

# While observations remain to be assigned
assignments = {xtalform_site_id: [_x for _x in xtalform_sites[xtalform_site_id].members] for xtalform_site_id in xtalform_sites}
assigned_observations = [observation_id for xtalform_site_id in assignments for observation_id in assignments[xtalform_site_id]]
assignments = {xtalform_site_id: [_x for _x in xtalform_sites[xtalform_site_id].members] for xtalform_site_id in
xtalform_sites}
assigned_observations = [observation_id for xtalform_site_id in assignments for observation_id in
assignments[xtalform_site_id]]
observations_to_assign = [observation_id for observation_id in centroid_ca_positions]

while len(observations_to_assign) > 0:
for observation_id in observations_to_assign:
# Assign observations near current xtalform sites
for xtalform_site_id, xtalform_site_pos in centre_residues_positions.items():
if _get_dist(centroid_ca_positions[observation_id], xtalform_site_pos) < cutoff:

assignments[xtalform_site_id].append(observation_id)
assigned_observations.append(observation_id)
break
Expand Down Expand Up @@ -1049,12 +1072,12 @@ def _update_xtalform_sites(
for xtalform_name in crystalform_observations
}


# Spatially cluster
crystalform_observation_cluster_assignments = {
xtalform_name: _crystalform_incremental_cluster(
crystalform_observation_centroids[xtalform_name],
{xid: xs for xid, xs in xtalform_sites.items() if (xs.xtalform_id == xtalform_name) & (xs.canonical_site_id == canonical_site_id)},
{xid: xs for xid, xs in xtalform_sites.items() if
(xs.xtalform_id == xtalform_name) & (xs.canonical_site_id == canonical_site_id)},
neighbourhoods
)
for xtalform_name in crystalform_observations
Expand Down Expand Up @@ -1360,7 +1383,6 @@ def _update(
# Get the structures
structures: dict = _get_structures(datasets)


# Get the assembly alignment hierarchy
hierarchy, biochain_priorities = alignment_heirarchy._derive_alignment_heirarchy(assemblies)
alignment_heirarchy.save_yaml(fs_model.hierarchy, hierarchy, lambda x: x)
Expand Down Expand Up @@ -1533,7 +1555,7 @@ def _update(
# Check if residues match as usual, otherwise create a new canon site for it
debug = False
if canonical_site_id == 'LYSRSCPZ-x0426+A+805+1':
debug=True
debug = True
_update_xtalform_sites(
xtalform_sites,
canonical_site,
Expand Down Expand Up @@ -1623,7 +1645,8 @@ def _update(
canonical_site_id,
aligned_structure_path,
) in ligand_neighbourhood_output.aligned_structures.items():
if not ( (fs_model.source_dir.parent / aligned_structure_path).exists() | Path(aligned_structure_path).exists()):
if not ((fs_model.source_dir.parent / aligned_structure_path).exists() | Path(
aligned_structure_path).exists()):
# _update_aligned_structures()
_structure = structures[dtag].clone()
canonical_site = canonical_sites[canonical_site_id]
Expand Down Expand Up @@ -1652,9 +1675,9 @@ def _update(
_xtalform_id = _xtalform_site.xtalform_id
_xtalform_canonical_site_id = _xtalform_site.canonical_site_id
if (_xtalform_id == site_reference_ligand_xtalform_id) \
& (_xtalform_canonical_site_id == canonical_site_id) \
& (site_reference_ligand_id in _xtalform_site.members):
xtalform_site = _xtalform_site
& (_xtalform_canonical_site_id == canonical_site_id) \
& (site_reference_ligand_id in _xtalform_site.members):
xtalform_site = _xtalform_site
site_chain = xtalform_site.crystallographic_chain

# Aligns to conformer site, then to the corresponding assembly, then from that assembly to
Expand Down Expand Up @@ -1685,15 +1708,17 @@ def _update(
)
],
assembly_transform=assembly_transforms[
xtalforms[dataset_assignments[conformer_site.reference_ligand_id[0]]].assemblies[
xtalforms[
dataset_assignments[conformer_site.reference_ligand_id[0]]].assemblies[
alignment_heirarchy._chain_to_xtalform_assembly(
# conformer_site.reference_ligand_id[1],
site_chain,
xtalforms[dataset_assignments[conformer_site.reference_ligand_id[0]]]
)
].assembly
],
)
xtalform_sites=xtalform_sites
)

except:
logger.info(f"Failed to generate aligned structure {aligned_structure_path}")
Expand All @@ -1710,7 +1735,8 @@ def _update(
for canonical_site_id, alignment_info in dataset_alignment_info.items():
aligned_structure_path = alignment_info["aligned_structures"]
logger.info(f"Outputting reference structure: {aligned_structure_path}")
if not ( (fs_model.source_dir.parent / aligned_structure_path).exists() | Path(aligned_structure_path).exists()):
if not ((fs_model.source_dir.parent / aligned_structure_path).exists() | Path(
aligned_structure_path).exists()):
_structure = structures[dtag].clone()
_align_reference_structure(
_structure,
Expand Down
13 changes: 11 additions & 2 deletions src/ligand_neighbourhood_alignment/generate_aligned_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ def _drop_non_binding_chains_and_symmetrize_waters(
moving_ligand_id,
dataset_ligand_neighbourhood_ids,
xtalform,
xtalform_sites
):
# Other Ligand IDs
other_ligand_ids = [(lid[1], lid[2]) for lid in dataset_ligand_neighbourhood_ids if not ((lid[1] == moving_ligand_id[1]) & (lid[2] == moving_ligand_id[2]))]
Expand All @@ -299,7 +300,12 @@ def _drop_non_binding_chains_and_symmetrize_waters(
}

# Get the assembly the ligand is modelled as part of
lig_assembly = chain_assemblies[moving_ligand_id[1]]
for xsid, _xtalform_site in xtalform_sites.items():
_xtalform_id = _xtalform_site.xtalform_id
if moving_ligand_id in _xtalform_site.members:
xtalform_site = _xtalform_site
site_chain = xtalform_site.crystallographic_chain
lig_assembly = chain_assemblies[site_chain]

# Determine which waters are bound near the ligand, and at what positions
ns = gemmi.NeighborSearch(new_structure[0], new_structure.cell, 10).populate(include_h=False)
Expand Down Expand Up @@ -451,6 +457,7 @@ def _align_structure(
site_reference_xform,
chain_to_assembly_transform,
assembly_transform,
xtalform_sites
):
shortest_path: list[tuple[str, str, str]] = nx.shortest_path(g, moving_ligand_id, reference_ligand_id)
logger.debug(f"Shortest path: {shortest_path}")
Expand All @@ -466,7 +473,9 @@ def _align_structure(
neighbourhood,
moving_ligand_id,
dataset_ligand_neighbourhood_ids,
xtalform)
xtalform,
xtalform_sites
)

previous_ligand_id = moving_ligand_id
running_transform = gemmi.Transform()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -326,15 +326,15 @@ def _update_reference_structure_transforms(
xtalform_sites,
canonical_site_id
):
# Get the biochain of the canonical site
# # Get the biochain of the canonical site
site_reference_ligand_id = conformer_sites[canonical_site.reference_conformer_site_id].reference_ligand_id
site_reference_ligand_xtalform_id = dataset_assignments[site_reference_ligand_id[0]]
site_reference_ligand_xtalform = xtalforms[site_reference_ligand_xtalform_id]
for xsid, _xtalform_site in xtalform_sites.items():
_xtalform_id = _xtalform_site.xtalform_id
if _xtalform_id == site_reference_ligand_xtalform_id:
_xtalform_canonical_site_id = _xtalform_site.canonical_site_id
if _xtalform_canonical_site_id == canonical_site_id:
if (_xtalform_id == site_reference_ligand_xtalform_id) &\
(_xtalform_site.canonical_site_id == canonical_site_id) & \
(site_reference_ligand_id in _xtalform_site.members):
xtalform_site = _xtalform_site
site_chain = xtalform_site.crystallographic_chain
canonical_site_biochain = alignment_heirarchy._chain_to_biochain(
Expand All @@ -343,6 +343,7 @@ def _update_reference_structure_transforms(
assemblies
)


# Determine whether the biochain is shared, and if not skip
reference_structure = structures[key[0]]
reference_structure_xtalform = xtalforms[dataset_assignments[key[0]]]
Expand All @@ -354,10 +355,21 @@ def _update_reference_structure_transforms(
}
reference_structure_biochains_inv = {v: k for k, v in reference_structure_biochains.items()}

if canonical_site_biochain not in reference_structure_biochains.values():
return None

# Align the reference to the biochain reference using the canonical site residues
# # Get the reference residues whose image is the canonical site biochain aligned residues
# alignment_residues_ref_st = []
# alignment_residues_mov_st = []
# for rid in canonical_site.residues:
# chain, res = rid[0], rid[1]
# biochain = alignment_heirarchy._chain_to_biochain(
# chain,
# site_reference_ligand_xtalform,
# assemblies
# )
# reference_structure_chain = reference_structure_biochains_inv[biochain]
# alignment_residues_ref_st.append((reference_structure_chain, res))
# alignment_residues_mov_st.append((reference_structure_chain, res))

# # Align the reference to the biochain reference using the canonical site residues
alignment_residues_ref_st = []
alignment_residues_mov_st = []
core_chain = reference_structure_biochains_inv[canonical_site_biochain]
Expand All @@ -373,13 +385,14 @@ def _update_reference_structure_transforms(
continue
alignment_residues_ref_st.append((chain, res))
alignment_residues_mov_st.append((core_chain, res))

transform = _get_transform_from_residues(
alignment_residues_ref_st,
structures[site_reference_ligand_id[0]],
reference_structure,
other_rs=alignment_residues_mov_st,
)


reference_structure_transforms[key] = dt.Transform(
transform.vec.tolist(),
transform.mat.tolist(),
Expand Down
Loading