From 98f2c0e9610838e917dd09c375432a56077971d3 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Tue, 21 May 2024 14:18:55 -0500 Subject: [PATCH 1/2] initiate all subgraphs extraction --- Cargo.lock | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 1 + src/main.rs | 32 +++++++++++++++++++++++++++++- 3 files changed, 88 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4fa7585..f53d653 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,15 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "anstream" version = "0.6.13" @@ -208,23 +217,40 @@ checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "impg" -version = "0.1.0" +version = "0.2.0" dependencies = [ "bincode", "clap", "coitrees", + "itertools", "noodles", "num_cpus", "rayon", + "regex", "serde", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "libc" version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +[[package]] +name = "memchr" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" + [[package]] name = "miniz_oxide" version = "0.7.2" @@ -303,6 +329,35 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "regex" +version = "1.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" + [[package]] name = "serde" version = "1.0.197" diff --git a/Cargo.toml b/Cargo.toml index 258072d..4ac58db 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,3 +14,4 @@ rayon = "1.9.0" serde = { version = "1.0.197", features = ["derive"] } noodles = { version = "0.66.0", features = ["bgzf"] } regex = "1.10.4" +itertools = "0.13.0" diff --git a/src/main.rs b/src/main.rs index 9445f4c..42bca1d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,6 +9,8 @@ use impg::paf; use rayon::ThreadPoolBuilder; use std::io::BufRead; +use itertools::Itertools; + /// Command-line tool for querying overlaps in PAF files. #[derive(Parser, Debug)] #[clap(author, version, about)] @@ -29,6 +31,10 @@ struct Args { #[clap(short='b', long, value_parser)] target_bed: Option, + /// Window size to create PAF files from. + #[clap(short='w', long, value_parser)] + window_size: Option, + /// Enable transitive overlap requests. #[clap(short='x', long, action)] transitive: bool, @@ -102,7 +108,31 @@ fn main() -> io::Result<()> { output_results_bedpe(&impg, results, &target_name, name); } } - } + } else if let Some(window_size) = args.window_size { + // println!("{window_size}"); + for key in impg.trees.keys().sorted() { + println!("key: {}", key); + impg.trees.get(key); + let target_length = impg.seq_index.get_len_from_id(*key).expect("Target length not found in index"); + println!("target length: {}", target_length); + let target_name = impg.seq_index.get_name(*key).unwrap(); + println!("target name: {}", target_name); + let mut i: i32 = 0; + while i < target_length.try_into().unwrap() { + println!("IIIII: {}", i); + let end; + if i + window_size < target_length.try_into().unwrap() { + end = i + window_size; + } else { + end = target_length.try_into().unwrap(); + } + // transitive stuff + let results = impg.query(key.clone(), i, end); + output_results_paf(&impg, results, &target_name, None); + i = i + window_size; + } + } + } Ok(()) } From bcff3c8be9f63d8ddd116e928df649c5606b5565 Mon Sep 17 00:00:00 2001 From: subwaystation Date: Tue, 21 May 2024 16:21:33 -0500 Subject: [PATCH 2/2] unstable push --- src/main.rs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index 8e0b5aa..174e247 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,12 @@ use clap::Parser; use std::fs::File; +// use std::hash::Hash; +use std::collections::HashMap; use std::io::{self, BufReader, BufWriter}; use std::num::NonZeroUsize; use noodles::bgzf; use impg::impg::{Impg, SerializableImpg, AdjustedInterval, check_intervals}; -use coitrees::IntervalTree; +use coitrees::{IntervalTree, BasicCOITree, Interval}; use impg::paf; use rayon::ThreadPoolBuilder; use std::io::BufRead; @@ -110,6 +112,7 @@ fn main() -> io::Result<()> { } } else if let Some(window_size) = args.window_size { // println!("{window_size}"); + let mut seen: HashMap>> = HashMap::new(); for key in impg.trees.keys().sorted() { println!("key: {}", key); impg.trees.get(key); @@ -118,6 +121,12 @@ fn main() -> io::Result<()> { let target_name = impg.seq_index.get_name(*key).unwrap(); println!("target name: {}", target_name); let mut i: i32 = 0; + // check if key in coitree (once per coitree is sufficient) + let interval_arr = if let Some(interval_arr) = seen.get_mut(key) { + interval_arr + } else { // else we insert a new empty vec + seen.entry(*key).or_insert(Vec::new()) + }; while i < target_length.try_into().unwrap() { println!("IIIII: {}", i); let end; @@ -126,8 +135,15 @@ fn main() -> io::Result<()> { } else { end = target_length.try_into().unwrap(); } + // if already in coitree, extract overlapping intervals + // TODO NOT SURE HERE + // if not in coitree, add a new key with vec + // TODO NOT SURE HERE + // transitive stuff let results = impg.query(key.clone(), i, end); + // add new intervals to coitree + interval_arr.push(Interval::new(i, end, ())); output_results_paf(&impg, results, &target_name, None); i = i + window_size; }