Skip to content

Commit

Permalink
Add proper branching (#24)
Browse files Browse the repository at this point in the history
  • Loading branch information
pumken authored Mar 27, 2023
1 parent bb7f5e3 commit 5b272f2
Show file tree
Hide file tree
Showing 5 changed files with 471 additions and 37 deletions.
251 changes: 243 additions & 8 deletions src/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,260 @@
//! The `chain` module contains functions that allow for identifying the parent chain of an
//! organic molecule.
use crate::groups::Fallible;
use crate::groups::InvalidGraphError::{Cycle, Other};
use crate::molecule::{Atom, Cell, Element};
use crate::groups::{link_groups, Fallible};
use crate::molecule::Group::{Alkene, Alkyne};
use crate::molecule::{Atom, Branch, Cell, Element, Substituent};
use crate::nested_vec;
use crate::pointer::Pointer;
use crate::spatial::GridState;
use ruscii::spatial::Vec2;

/// Gets the longest of the given [`Vec`] of chains, assuming that it is non-empty.
/// Gets the primary chain of the given [`Vec`] of chains according to
/// [IUPAC rules](https://en.wikipedia.org/wiki/IUPAC_nomenclature_of_organic_chemistry) on
/// Wikipedia, accessed 26 Mar 2023. The primary chain is, in order of precedence, that which:
/// 1. has the greatest number of occurrences of the primary functional group.
/// 2. has the greatest number of multiple bonds.
/// 3. is the longest.
/// 3. has the greatest number of occurrences of prefix substituents.
/// 4. has the greatest number of single bonds.
pub fn primary_chain(
graph: &GridState,
chains: Vec<Vec<Atom>>,
parent: Option<Atom>,
) -> Fallible<Vec<Atom>> {
if chains.is_empty() {
return Err(Other("No carbon chain found.".to_string()));
}

let branches = chains
.into_iter()
.map(|chain| link_groups(graph, chain, parent.clone()))
.collect::<Fallible<Vec<Branch>>>()?;

let mut unique_branches = vec![];

for branch in branches.into_iter() {
if !unique_branches.contains(&branch.reversed()) {
unique_branches.push(branch);
}
}

if unique_branches.is_empty() {
panic!("unique_branches is empty!")
}

// 1
let most_primary_groups = chain_with_most_primary_groups(unique_branches);
if let Ok(it) = most_primary_groups {
return Ok(it);
}
let filtration = most_primary_groups.unwrap_err();

// 2
let most_multiple_bonds = chain_with_most_multiple_bonds(filtration);
if let Ok(it) = most_multiple_bonds {
return Ok(it);
}
let filtration = most_multiple_bonds.unwrap_err();

// 3
let longest = longest_chain(filtration);
if let Ok(it) = longest {
return Ok(it);
}
let filtration = longest.unwrap_err();

// 4
let most_prefix_subst = chain_with_most_prefix_subst(filtration);
if let Ok(it) = most_prefix_subst {
return Ok(it);
}
let filtration = most_prefix_subst.unwrap_err();

// TODO 5

Ok(filtration[0].chain.to_owned())
}

pub(crate) fn chain_with_most_primary_groups(
branches: Vec<Branch>,
) -> Result<Vec<Atom>, Vec<Branch>> {
let primary_group = &branches
.iter()
.flat_map(|it| it.groups.to_owned())
.flatten()
.filter(|it| matches!(it, Substituent::Group(_)))
.map(|it| {
if let Substituent::Group(group) = it {
group
} else {
panic!("call to filter() failed")
}
})
.max_by_key(|it| it.priority())
.ok_or(branches.to_owned())?;

let max_occurrences =
&branches
.iter()
.map(|branch| {
branch
.groups
.iter()
.flatten()
.filter(|&subst| matches!(subst, Substituent::Group(group) if group == primary_group))
.count()
})
.max()
.ok_or(branches.to_owned())?;

let primary_by_max =
branches
.iter()
.filter(|&branch| {
branch
.groups
.iter()
.flatten()
.filter(|&subst| matches!(subst, Substituent::Group(group) if group == primary_group))
.count() == *max_occurrences
})
.map(Branch::to_owned)
.collect::<Vec<Branch>>();

if primary_by_max.len() == 1 {
Ok(primary_by_max[0].to_owned().chain)
} else {
Err(primary_by_max)
}
}

fn chain_with_most_multiple_bonds(branches: Vec<Branch>) -> Result<Vec<Atom>, Vec<Branch>> {
let max_occurrences = branches
.iter()
.map(|branch| {
branch
.groups
.iter()
.flatten()
.filter(|&subst| {
matches!(
subst,
Substituent::Group(Alkene) | Substituent::Group(Alkyne)
)
})
.count()
})
.max()
.ok_or(branches.to_owned())?;

let primary_by_max = branches
.iter()
.filter(|&branch| {
branch
.groups
.iter()
.flatten()
.filter(|&subst| {
matches!(
subst,
Substituent::Group(Alkene) | Substituent::Group(Alkyne)
)
})
.count()
== max_occurrences
})
.map(Branch::to_owned)
.collect::<Vec<Branch>>();

if primary_by_max.len() == 1 {
Ok(primary_by_max[0].to_owned().chain)
} else {
Err(primary_by_max)
}
}

/// Gets the longest of the given [`Vec`] of chains, assuming that it is non-empty. If there
/// are multiple chains of the greatest length, [`None`] is returned.
///
/// ## Errors
///
/// If there are no `chains`, this function will return an `Err`.
pub(crate) fn longest_chain(chains: Vec<Vec<Atom>>) -> Fallible<Vec<Atom>> {
Ok(match chains.iter().max_by(|&a, &b| a.len().cmp(&b.len())) {
None => return Err(Other("No carbon chain found.".to_string())), // FIXME this is returned when a bond is placed at the edge
Some(it) => it,
pub(crate) fn longest_chain(branches: Vec<Branch>) -> Result<Vec<Atom>, Vec<Branch>> {
let max_length = branches
.iter()
.map(|chain| chain.chain.len())
.max()
.ok_or(branches.to_owned())?;
let longest_chains = branches
.into_iter()
.filter(|chain| chain.chain.len() == max_length)
.collect::<Vec<Branch>>();

if longest_chains.len() == 1 {
Ok(longest_chains[0].chain.to_owned())
} else {
Err(longest_chains)
}
}

pub(crate) fn chain_with_most_prefix_subst(
branches: Vec<Branch>,
) -> Result<Vec<Atom>, Vec<Branch>> {
let primary_group = branches
.iter()
.flat_map(|it| it.groups.to_owned())
.flatten()
.filter(|it| matches!(it, Substituent::Group(_)))
.map(|it| {
if let Substituent::Group(group) = it {
group
} else {
panic!("call to filter() failed")
}
})
.max_by_key(|it| it.priority())
.ok_or(branches.to_owned())?;

let max_occurrences = branches
.iter()
.map(|branch| {
branch
.groups
.iter()
.flatten()
.filter(|&subst| {
matches!(subst, Substituent::Group(group) if *group != primary_group)
|| matches!(subst, Substituent::Branch(_))
})
.count()
})
.max()
.ok_or(branches.to_owned())?;

let primary_by_max = branches
.iter()
.filter(|&branch| {
branch
.groups
.iter()
.flatten()
.filter(|&subst| {
matches!(subst, Substituent::Group(group) if *group != primary_group)
|| matches!(subst, Substituent::Branch(_))
})
.count()
== max_occurrences
})
.map(Branch::to_owned)
.collect::<Vec<Branch>>();

if primary_by_max.len() == 1 {
Ok(primary_by_max[0].to_owned().chain)
} else {
Err(primary_by_max)
}
.to_owned())
}

pub(crate) fn get_all_chains(graph: &GridState) -> Fallible<Vec<Vec<Atom>>> {
Expand Down
17 changes: 13 additions & 4 deletions src/groups.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//! The `groups` module provides functionality for identifying functional groups on a branch.
use crate::chain;
use crate::chain::{endpoint_head_chains, longest_chain};
use crate::chain::{endpoint_head_chains, primary_chain};
use crate::compound;
use crate::groups::InvalidGraphError::{Other, UnrecognizedGroup};
use crate::molecule::Group::{
Expand All @@ -18,7 +18,7 @@ use ruscii::spatial::{Direction, Vec2};
use thiserror::Error;

/// Generates a [`Branch`] from the given `chain` containing all functional groups attached
/// to it.
/// to it, except for the group starting at the given `parent`.
pub(crate) fn link_groups(
graph: &GridState,
chain: Vec<Atom>,
Expand Down Expand Up @@ -75,7 +75,7 @@ pub(crate) fn link_groups(

pub(crate) fn debug_branches(graph: &GridState) -> Fallible<Branch> {
let all_chains = chain::get_all_chains(graph)?;
let chain = longest_chain(all_chains)?;
let chain = primary_chain(graph, all_chains, None)?;
link_groups(graph, chain, None)
}

Expand Down Expand Up @@ -204,7 +204,16 @@ pub(crate) fn branch_node_tree(
) -> Fallible<Vec<Atom>> {
let atom = Pointer::new(graph, pos).traverse_bond(direction)?;
let chains = endpoint_head_chains(atom, graph, Some(pos))?;
longest_chain(chains)
primary_chain(
graph,
chains,
Some(
graph
.get(pos)
.map_err(|_| Other("An unexpected error occurred.".to_string()))?
.unwrap_atom(),
),
)
}

/// Returns a [`Vec`] of [`Direction`] from the [`Atom`] at the given `pos` to bonded atoms
Expand Down
Loading

0 comments on commit 5b272f2

Please sign in to comment.