Skip to content

Commit

Permalink
Merge pull request #10 from cschin/main
Browse files Browse the repository at this point in the history
v0.4.0 tagged version
  • Loading branch information
cschin authored Feb 9, 2023
2 parents 8d20b4b + ed55d6a commit 12b5825
Show file tree
Hide file tree
Showing 12 changed files with 242 additions and 27 deletions.
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,25 @@ With the MAP graph, we can use the "principal bundle decomposition" to study com

## Documentation, Usage and Examples

Command Line Tools:

PGR-TK provides the following tool to

- create the PGR-TK sequence and index database
- `pgr-mdb`: create pgr minimizer database with AGC backend
- `pgr-make-frgdb`: create PGR-TK fragment minimizer database with frg format backend
- query the database to fetch sequences
- `pgr-query`: query a PGR-TK pangenome sequence database, ouput the hit summary and generate fasta files from the target sequences
- generate MAP-graph in GFA format and principal bundle decomposition bed file
- `pgr-pbundle-decomp`: generat the principal bundle decomposition though MAP Graph from a fasta file
- generate SVG from the principal bundle decomposition bed file
- `pgr-pbundle-bed2svg`: generate SVG from a principal bundle bed file
- auxiliary tools
- `pgr-pbundle-bed2sorted`: generate annotation file with a sorting order from the principal bundle decomposition
- `pgr-pbundle-bed2dist`: generate alignment scores between sequences using bundle decomposition from a principal bundle bed file

For each comannd, `command --help` provides the detail usage information.

The API documentation is at https://sema4-research.github.io/pgr-tk/

A collection of Jupyter Notebooks are at https://github.com/sema4-Research/pgr-tk-notebooks/
Expand Down
2 changes: 1 addition & 1 deletion pgr-bin/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "pgr-bin"
version = "0.4.0-dev"
version = "0.4.0"
edition = "2021"
authors = ["Jason Chin <[email protected]>"]

Expand Down
8 changes: 7 additions & 1 deletion pgr-bin/src/bin/pgr-fetch-seqs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,28 @@ use std::fs::File;
use std::io::{self, BufRead, BufReader, BufWriter, Write};
use std::path::Path;

/// List or fetch sequences from a PGR-TK database
#[derive(Parser, Debug)]
#[clap(name = "pgr-fetch-seqs")]
#[clap(author, version)]
#[clap(about = "list or fetch sequences from a pgr database", long_about = None)]
#[clap(about, long_about = None)]
struct CmdOptions {
/// the prefix to a PGR-TK sequence database
pgr_db_prefix: String,

/// using the frg format for the sequence database (default to the AGC backend databse if not specified)
#[clap(long, default_value_t = false)]
frg_file: bool,

/// the regions file path
#[clap(short, long, default_value=None)]
region_file: Option<String>,

/// output file name
#[clap(short, long, default_value=None)]
output_file: Option<String>,

/// list all sequence source, contig names in the database
#[clap(long, default_value_t = false)]
list: bool,
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@ use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;

/// Create PGR-TK fragment minimizer database with frg format backend
#[derive(Parser, Debug)]
#[clap(name = "pgr-mdb")]
#[clap(name = "pgr-make-frgdb")]
#[clap(author, version)]
#[clap(about = "create pgr fragment minimizer db", long_about = None)]
#[clap(about, long_about = None)]
struct CmdOptions {
// file contains the paths to the fastx files to load
/// the path to the file contains the paths to the fastx files to load
filepath: String,
prefix: String,
/// minimizer window size
Expand Down
3 changes: 2 additions & 1 deletion pgr-bin/src/bin/pgr-mdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@ use std::io::{BufRead, BufReader};

use pgr_db::seq_db;

/// Create pgr minimizer database with AGC backend
#[derive(Parser, Debug)]
#[clap(name = "pgr-mdb")]
#[clap(author, version)]
#[clap(about = "create pgr minimizer db", long_about = None)]
#[clap(about, long_about = None)]
struct CmdOptions {
filepath: String,
prefix: String,
Expand Down
5 changes: 4 additions & 1 deletion pgr-bin/src/bin/pgr-pbundle-bed2dist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@ use std::io::{BufRead, BufReader, BufWriter, Write};
use std::path::Path;
use std::{fs::File, path};

/// Generate alignment scores between sequences using bundle decomposition from a principal bundle bed file
#[derive(Parser, Debug)]
#[clap(name = "pgr-pbundle-bed2dist")]
#[clap(author, version)]
#[clap(about = "generate alignment scores between contigs using bundle decomposition from a principal bundle bed file", long_about = None)]
#[clap(about, long_about = None)]
struct CmdOptions {
/// the path to the pricipal bundle bed file
bed_file_path: String,
/// the prefix of the output file
output_prefix: String,
}

Expand Down
5 changes: 4 additions & 1 deletion pgr-bin/src/bin/pgr-pbundle-bed2sorted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,15 @@ use std::io::{BufRead, BufReader, BufWriter, Write};
use std::path::Path;
use std::{fs::File, path};

/// Generate annotation file with a sorting order from the principal bundle decomposition
#[derive(Parser, Debug)]
#[clap(name = "pgr-pbundle-bed2sorted")]
#[clap(author, version)]
#[clap(about = "sort the contig by bunldes", long_about = None)]
#[clap(about, long_about = None)]
struct CmdOptions {
/// the path to the pricipal bundle bed file
bed_file_path: String,
/// the prefix of the output file
output_prefix: String,
}

Expand Down
49 changes: 37 additions & 12 deletions pgr-bin/src/bin/pgr-pbundle-bed2svg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,33 +6,49 @@ use std::{fs::File, path};
use svg::node::{self, element, Node};
use svg::Document;

/// Generate SVG from a principal bundle bed file
#[derive(Parser, Debug)]
#[clap(name = "pgr-pbundle-bed2svg")]
#[clap(author, version)]
#[clap(about = "generate SVG from a principal bundle bed file", long_about = None)]
#[clap(about, long_about = None)]
struct CmdOptions {
/// the path to the pricipal bundle bed file
bed_file_path: String,
/// the prefix of the output file
output_prefix: String,
/// the prefix of the dendrogram file generated by pgr-pbundle-bed2dist
#[clap(long)]
ddg_file: Option<String>,
/// the path the annotation files
#[clap(long)]
annotations: Option<String>,
/// the path the annotation track file
#[clap(long)]
annotation_region_bedfile: Option<String>,
/// the track range in base pair count
#[clap(long, default_value_t = 100000)]
track_range: usize,
/// the track tick interval
#[clap(long, default_value_t = 10000)]
track_tick_interval: usize,
/// the track panel size in pixel
#[clap(long, default_value_t = 1600)]
track_panel_width: usize,
/// the left padding in pixel
#[clap(long)]
left_padding: Option<usize>,
/// the stroke boundary width
#[clap(long, default_value_t = 0.5)]
stroke_width: f32,
/// the stroke with for the annotation track
#[clap(long, default_value_t = 2.5)]
annotation_region_stroke_width: f32,
/// the anotation panel width
#[clap(long, default_value_t = 500.0)]
annotation_panel_width: f32,
/// the factor to increase the bounder width for highlighting repeatitive bundles
#[clap(long, default_value_t = 1.0)]
highlight_repeats: f32,
}

static CMAP: [&str; 97] = [
Expand Down Expand Up @@ -237,6 +253,11 @@ fn main() -> Result<(), std::io::Error> {
let ctg_with_svg_paths: Vec<(String, (Vec<element::Path>, Vec<element::Path>, element::Text))> = ctg_data_vec
.into_iter()
.map(|(ctg, annotation,bundle_segment, annotation_segments)| {
let mut bundle_segment_count = FxHashMap::<u32, usize>::default();
bundle_segment.iter().for_each(|&(_bgn, _end, bundle_id, _direction)| {
let e = bundle_segment_count.entry(bundle_id).or_insert_with(|| 0);
*e += 1;
});

let paths: Vec<element::Path> = bundle_segment
.into_iter()
Expand All @@ -250,24 +271,26 @@ fn main() -> Result<(), std::io::Error> {
let bundle_color = CMAP[((bundle_id * 17) % 97) as usize];
let stroke_color = CMAP[((bundle_id * 47) % 43) as usize];
let arror_end = end as f32;
let halfwidth = 5.0;
let end =
if direction == 0 {
if end as f32 - 5.0 < bgn {
if end as f32 - halfwidth < bgn {
bgn
} else {
end as f32 - 5.0
end as f32 - halfwidth
}
} else if end as f32 + 5.0 > bgn {
} else if end as f32 + halfwidth > bgn {
bgn
} else {
end as f32 + 5.0
end as f32 + halfwidth
};
let bottom0 = -3_i32 + y_offset as i32;
let top0 = 3_i32 + y_offset as i32;
let bottom1 = -4_i32 + y_offset as i32;
let top1 = 4_i32 + y_offset as i32;
let center = y_offset as i32;

let bottom0 = -halfwidth * 0.6 + y_offset as f32;
let top0 = halfwidth * 0.6 + y_offset as f32;
let bottom1 = -halfwidth * 0.8 + y_offset as f32;
let top1 = halfwidth * 0.8 + y_offset as f32;
let center = y_offset as f32;
let stroke_width = stroke_width * ( if args.highlight_repeats > 1.0001 &&
*bundle_segment_count.get(&bundle_id).unwrap_or(&0) > 1 {args.highlight_repeats} else {1.0});
let path_str = format!(
"M {bgn} {bottom0} L {bgn} {top0} L {end} {top0} L {end} {top1} L {arror_end} {center} L {end} {bottom1} L {end} {bottom0} Z");
element::Path::new()
Expand Down Expand Up @@ -398,7 +421,9 @@ fn main() -> Result<(), std::io::Error> {
// println!("{}", ctg);
document.append(text);
paths.into_iter().for_each(|path| document.append(path));
annotation_paths.into_iter().for_each(|path| document.append(path));
annotation_paths
.into_iter()
.for_each(|path| document.append(path));
});
let out_path = path::Path::new(&args.output_prefix).with_extension("svg");
svg::save(out_path, &document).unwrap();
Expand Down
Loading

0 comments on commit 12b5825

Please sign in to comment.