Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New function: power_lifter and various documentation updates #20

Merged
merged 11 commits into from
Feb 9, 2024
13 changes: 8 additions & 5 deletions .github/workflows/build_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,16 @@ jobs:

- name: Check out repository
uses: actions/checkout@v3

- name: Install BiocManager
run: Rscript -e "install.packages('BiocManager')"

- name: Install BiomaesteR
run: Rscript -e "devtools::install(repos = BiocManager::repositories())"

- name: Build package
run: Rscript -e "devtools::install()"

- name: Check package
- name: Check BiomaesteR
run: Rscript -e "devtools::check(vignettes = FALSE)"

- name: Run unit tests
- name: Run unit tests (BiomaesteR)
run: Rscript -e "devtools::test()"

1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
.Rproj.user
.DS_Store
my_bed.bed
docs
6 changes: 5 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,8 @@ Depends:
Imports:
data.table,
dplyr,
stringr
GenomicRanges,
rtracklayer,
stats,
stringr,
utils
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@ export(bin_splitter)
export(cyto_ranger)
export(gene_ranger)
export(get_gene_info)
export(power_lifter)
export(purify_chr)
export(purify_regions)
export(region_ranger)
export(sanity_check_regions)
import(GenomicRanges, except = c("start", "end", "shift",
"union", "intersect", "setdiff", "update"))
import(data.table, except = c("last", "first", "between", "transpose"))
import(dplyr)
import(rtracklayer, except = c("start", "end", "offset"))
import(stats, except = c("lag", "filter"))
import(stringr)
import(utils)
11 changes: 8 additions & 3 deletions R/bin_splitter.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,15 @@
#' qstart = c(100, 200),
#' qend = c(200, 300),
#' bin_size = 10)
#' head(my_bins, 5)
#'
#' #Example 2 - Call the funciton with regions from a data frame
#' my_regions = purify_regions(these_regions = c("chr7:1000-500000", "chr8:50000-7000000"))
#' these_bins = bin_splitter(these_regions = my_regions, bin_size = 100000)
#' #Example 2 - Call the function with regions from a data frame
#' my_regions = purify_regions(these_regions = c("chr7:1000-500000",
#' "chr8:50000-7000000"))
#'
#' these_bins = bin_splitter(these_regions = my_regions,
#' bin_size = 100000)
#' head(these_bins, 5)
#'
bin_splitter = function(these_regions = NULL,
qchrom = NULL,
Expand Down
22 changes: 11 additions & 11 deletions R/cyto_ranger.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,23 @@
#'
#' @examples
#' #' #Example 1 - Give the function one region as a string
#' my_region = cyto_ranger(these_regions = "chr8:127735434-127742951")
#' cyto_ranger(these_regions = "chr8:127735434-127742951")
#'
#' #Example 2 - Give the function multiple regions as a string
#' my_regions = cyto_ranger(these_regions = c("chr8:128747680-128753674",
#' "chr18:60790579-60987361"),
#' projection = "grch37")
#' cyto_ranger(these_regions = c("chr8:128747680-128753674",
#' "chr18:60790579-60987361"),
#' projection = "grch37")
#'
#' #Example 3 - Individually specify the chromosome, start and end coordinates
#' this_region = cyto_ranger(qchrom = "chr8",
#' qstart = 127735434,
#' qend = 127742951)
#' cyto_ranger(qchrom = "chr8",
#' qstart = 127735434,
#' qend = 127742951)
#'
#' #Example 4 - Individually specify multiple regions with the query parameters
#' these_regions = cyto_ranger(qchrom = c("chr8", "chr18"),
#' qstart = c(128747680, 60790579),
#' qend = c(128753674, 60987361),
#' projection = "grch37")
#' cyto_ranger(qchrom = c("chr8", "chr18"),
#' qstart = c(128747680, 60790579),
#' qend = c(128753674, 60987361),
#' projection = "grch37")
#'
cyto_ranger <- function(these_regions = NULL,
qchrom = NULL,
Expand Down
10 changes: 5 additions & 5 deletions R/gene_ranger.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,19 +43,19 @@
#'
#' @examples
#' #Example 1 - Request one gene (in Hugo format) and with default parameters
#' hugo_myc = gene_ranger(these_genes = "MYC")
#' gene_ranger(these_genes = "MYC")
#'
#' #Example 2 - Same as example one but MYC is here specified as Ensembl ID
#' ensembl_myc = gene_ranger(these_genes = "ENSG00000136997")
#' gene_ranger(these_genes = "ENSG00000136997")
#'
#' #Example 3 - Request multiple genes with non-default parameters
#' my_genes = gene_ranger(these_genes = c("MYC", "BCL2"),
#' gene_ranger(these_genes = c("MYC", "BCL2"),
#' projection = "grch37",
#' return_as = "region")
#'
#' #Example 4 - Request multiple Ensembl IDs and return in bed format
#' my_bed = gene_ranger(these_genes = c("ENSG00000136997", "ENSG00000171791"),
#' return_as = "bed")
#' gene_ranger(these_genes = c("ENSG00000136997", "ENSG00000171791"),
#' return_as = "bed")
#'
#' #Example 5 - Write to bed file
#' gene_ranger(these_genes = c("BCL2", "MYC"),
Expand Down
12 changes: 6 additions & 6 deletions R/get_gene_info.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,18 @@
#'
#' @examples
#' #Example 1 - Query one gene (in Hugo format) and with default parameters.
#' hugo_myc = get_gene_info(these_genes = "MYC")
#' get_gene_info(these_genes = "MYC")
#'
#' #Example 2 - Same as example 1 but MYC is here specified as Ensembl ID.
#' ensembl_myc = get_gene_info(these_genes = "ENSG00000136997")
#' get_gene_info(these_genes = "ENSG00000136997")
#'
#' #Example 3 - Request multiple genes with non-default parameters
#' hugo_genes = get_gene_info(these_genes = c("MYC", "BCL2"),
#' projection = "grch37")
#' get_gene_info(these_genes = c("MYC", "BCL2"),
#' projection = "grch37")
#'
#' #Example 4 - Request multiple Ensembl IDs and return all columns.
#' ensembl_genes = get_gene_info(these_genes = c("ENSG00000136997", "ENSG00000171791"),
#' raw = TRUE)
#' get_gene_info(these_genes = c("ENSG00000136997", "ENSG00000171791"),
#' raw = TRUE)
#'
get_gene_info <- function(these_genes = NULL,
projection = "hg38",
Expand Down
86 changes: 86 additions & 0 deletions R/power_lifter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#' @title Power Lifter
#'
#' @description A function to convert genomic regions from one assembly to another.
#'
#' @details This function is a wrapper for the [rtracklayer::liftOver] function.
#' Specify the original assembly and the target assembly, and the function will
#' convert the regions accordingly. Regions can be provided as a data frame with
#' `these_regions`, or as a string with `qchrom`, `qstart`, and `qend`.
#'
#' @param these_regions The region(s) to be queried. Can be a data frame with
#' regions with the following columns; chrom, start, end.
#' Or in a string in the following format chr:start-end.
#' @param qchrom Query chromosome (prefixed or un-prefixed),
#' Required if `these_regions` is not provided.
#' @param qstart Query start position. Required if `these_regions` is not provided.
#' @param qend Query end position. Required if `these_regions` is not provided.
#' @param original_assembly The original assembly of the regions. Default is hg38
#' @param target_assembly The target assembly of the regions. Default is grch37.
#'
#' @return A data frame with the regions in the selected target assembly.
#'
#' @rawNamespace import(GenomicRanges, except = c("start", "end", "shift",
#' "union", "intersect", "setdiff", "update"))
#' @rawNamespace import(rtracklayer, except = c("start", "end", "offset"))
#' @import dplyr
#'
#' @export
#'
#' @examples
#' #Example 1 - Convert MYC region from hg38 to grch37
#' power_lifter(these_regions = "chr8:127735434-127742951")
#'
#' #Example 2 - Convert MYC region from grch37 to hg38
#' power_lifter(these_regions = "18:60790579-60987361",
#' original_assembly = "grch37",
#' target_assembly = "hg38")
#'
#' #Example 3 - Same as Example 1, but use the `qchrom`, `qstart`, and `qend`.
#' power_lifter(qchrom = "chr8",
#' qstart = 127735434,
#' qend = 127742951)
#'
power_lifter <- function(these_regions = NULL,
qchrom = NULL,
qstart = NULL,
qend = NULL,
original_assembly = "hg38",
target_assembly = "grch37"){

#format incoming regions accordingly
region_table = BioMaesteR::purify_regions(these_regions = these_regions,
qchrom = qchrom,
qstart = qstart,
qend = qend,
projection = original_assembly)

#convert incoming regions to GRanges object
incoming = GenomicRanges::makeGRangesFromDataFrame(region_table,
keep.extra.columns = TRUE)

#load the correct liftOver chains
if(target_assembly == "grch37"){
chain_file = rtracklayer::import.chain(system.file("extdata",
"hg38ToHg19.over.chain",
package = "BioMaesteR"))
}else if(target_assembly == "hg38"){
chain_file = rtracklayer::import.chain(system.file("extdata",
"hg19ToHg38.over.chain",
package = "BioMaesteR"))
}else{
stop("Target assembly not recognized. Please use either 'grch37' or 'hg38'.")
}

#liftOver
lifted = rtracklayer::liftOver(incoming, chain = chain_file)

#revert object to data frame
lifted = data.frame(lifted@unlistData) %>%
dplyr::rename(chrom = seqnames)

#deal with chr prefixes based on target assembly
lifted = purify_chr(incoming_table = lifted,
projection = target_assembly)

return(lifted)
}
4 changes: 3 additions & 1 deletion R/purify_chr.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@
#' @examples
#' #Example 1 - Add prefixes to a data frame
#' my_data = data.frame(chrom = c("1", "2", "3"))
#' my_data = purify_chr(projection = "hg38", incoming_table = my_data)
#'
#' purify_chr(projection = "hg38",
#' incoming_table = my_data)
#'
purify_chr <- function(projection = NULL,
incoming_table = NULL) {
Expand Down
20 changes: 10 additions & 10 deletions R/purify_regions.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,22 +35,22 @@
#'
#' @examples
#' #Example 1 - Give the function one region as a string
#' my_region = purify_regions(these_regions = "chr1:100-500")
#' purify_regions(these_regions = "chr1:100-500")
#'
#' #Example 2 - Give the function multiple regions as a string
#' my_regions = purify_regions(these_regions = c("chr1:100-500", "chr2:100-500"),
#' projection = "grch37")
#' purify_regions(these_regions = c("chr1:100-500", "chr2:100-500"),
#' projection = "grch37")
#'
#' #Example 3 - Individually specify the chromosome, start and end coordinates
#' this_region = purify_regions(qchrom = "chr1",
#' qstart = 100,
#' qend = 500)
#' purify_regions(qchrom = "chr1",
#' qstart = 100,
#' qend = 500)
#'
#' #Example 4 - Individually specify multiple regions with the query parameters
#' these_regions = purify_regions(qchrom = c("chr1", "chr2"),
#' qstart = c(100, 200),
#' qend = c(500, 600),
#' projection = "grch37")
#' purify_regions(qchrom = c("chr1", "chr2"),
#' qstart = c(100, 200),
#' qend = c(500, 600),
#' projection = "grch37")
#'
purify_regions <- function(these_regions = NULL,
qchrom = NULL,
Expand Down
25 changes: 12 additions & 13 deletions R/region_ranger.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,24 +34,23 @@
#'
#' @examples
#' #Example 1 - Give the function one region as a string
#' my_region = region_ranger(these_regions = "chr8:127735434-127742951")
#' region_ranger(these_regions = "chr8:127735434-127742951")
#'
#' #Example 2 - Give the function multiple regions as a string
#' my_regions = region_ranger(these_regions = c("chr8:128747680-128753674",
#' "chr18:60790579-60987361"),
#' projection = "grch37")
#'
#' region_ranger(these_regions = c("chr8:128747680-128753674",
#' "chr18:60790579-60987361"),
#' projection = "grch37")
#'
#' #Example 3 - Individually specify the chromosome, start and end coordinates
#' this_region = region_ranger(qchrom = "chr8",
#' qstart = 127735434,
#' qend = 127742951)
#' region_ranger(qchrom = "chr8",
#' qstart = 127735434,
#' qend = 127742951)
#'
#' #Example 4 - Individually specify multiple regions with the query parameters
#' these_regions = region_ranger(qchrom = c("chr8", "chr18"),
#' qstart = c(128747680, 60790579),
#' qend = c(128753674, 60987361),
#' projection = "grch37")
#'
#' region_ranger(qchrom = c("chr8", "chr18"),
#' qstart = c(128747680, 60790579),
#' qend = c(128753674, 60987361),
#' projection = "grch37")
#'
region_ranger <- function(these_regions = NULL,
qchrom = NULL,
Expand Down
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ reference:
- bin_splitter
- cyto_ranger
- gene_ranger
- power_lifter
- region_ranger
- title: Helpers
desc: A collection of helper functions.
Expand Down
2 changes: 1 addition & 1 deletion docs/pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ pandoc: 3.1.1
pkgdown: 2.0.7
pkgdown_sha: ~
articles: {}
last_built: 2024-02-06T22:53Z
last_built: 2024-02-07T20:08Z
urls:
reference: https://github.com/mattssca/BioMaesteR/reference
article: https://github.com/mattssca/BioMaesteR/articles
Expand Down
23 changes: 20 additions & 3 deletions docs/reference/bin_splitter.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading