diff --git a/DESCRIPTION b/DESCRIPTION
index fcc05d1..be9ceb3 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -32,8 +32,6 @@ LazyData: true
 Imports:
     dplyr,
     ggplot2,
-    purrr,
-    tibble,
     tidyr
 LazyDataCompression: xz
 Suggests: 
diff --git a/NAMESPACE b/NAMESPACE
index 3678176..964d2eb 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,47 +1,8 @@
 # Generated by roxygen2: do not edit by hand
 
-S3method(arrange,genomic_data)
-S3method(filter,genomic_data)
-S3method(group_by,genomic_data)
-S3method(mutate,genomic_data)
-S3method(print,bed_data)
-S3method(print,maf_data)
-S3method(rename,genomic_data)
-S3method(select,genomic_data)
-S3method(ungroup,genomic_data)
-export(annotate_hotspots)
-export(assign_cn_to_ssm)
-export(bind_genomic_data)
-export(calc_mutation_frequency_bin_region)
-export(calc_mutation_frequency_bin_regions)
-export(check_excess_params)
-export(collate_results)
-export(cool_overlaps)
-export(create_bed_data)
-export(create_maf_data)
-export(create_seg_data)
-export(get_ashm_count_matrix)
-export(get_cn_segments)
-export(get_coding_ssm)
-export(get_coding_ssm_status)
 export(get_colours)
-export(get_gambl_metadata)
 export(get_genes)
-export(get_genome_build)
-export(get_manta_sv)
 export(get_mapped_colours)
-export(get_sample_cn_segments)
-export(get_ssm_by_patients)
-export(get_ssm_by_regions)
-export(get_ssm_by_samples)
-export(id_ease)
-export(preserve_genomic_attributes)
-export(process_regions)
-export(region_to_chunks)
-export(review_hotspots)
-export(strip_genomic_classes)
 import(dplyr)
 import(ggplot2)
-import(purrr)
-import(tibble)
 import(tidyr)
diff --git a/R/annotate_hotspots.R b/R/annotate_hotspots.R
deleted file mode 100644
index 4f64000..0000000
--- a/R/annotate_hotspots.R
+++ /dev/null
@@ -1,39 +0,0 @@
-#' @title Annotate Hotspots.
-#'
-#' @description Annotate MAF-like data frome with a hot_spot column indicating recurrent mutations.
-#'
-#' @details This function takes an already loaded MAF data frame with the `mutation_maf` parameter.
-#'
-#' @param mutation_maf A data frame in MAF format.
-#' @param ... Any other parameter. These parameters will be ignored.
-#'
-#' @return The same data frame with one additional column "hot_spot".
-#'
-#' @import dplyr
-#' @export
-#'
-#' @examples
-#' my_metadata = get_gambl_metadata()
-#' all_coding_ssm = get_coding_ssm(these_samples_metadata = my_metadata,
-#'                                 projection = "grch37",
-#'                                 this_seq_type = "genome")
-#'
-#' hot_ssms = annotate_hotspots(all_coding_ssm)
-#'
-annotate_hotspots = function(
-        mutation_maf,
-        ...
-){
-
-    # check if any invalid parameters are provided
-    check_excess_params(...)
-
-    filled_coords <- GAMBLR.data::hotspots_annotations
-    # just the ssms that match these coordinates!
-    hot_ssms <- left_join(
-        mutation_maf,
-        filled_coords,
-        by = c("Chromosome", "Start_Position")
-    )
-    return(hot_ssms)
-}
diff --git a/R/assign_cn_to_ssm.R b/R/assign_cn_to_ssm.R
deleted file mode 100644
index 8faa091..0000000
--- a/R/assign_cn_to_ssm.R
+++ /dev/null
@@ -1,143 +0,0 @@
-#' @title Assign CN to SSM.
-#'
-#' @description Annotate mutations with their copy number information.
-#'
-#' @details This function takes a sample ID with the `this_sample_id` parameter
-#'      and annotates mutations with copy number information. A variety of
-#'      parameters are at hand for a customized workflow. For example,
-#'      the user can specify if only coding mutations are of interest. To do so,
-#'      set `coding_only = TRUE`. This function internally calls
-#'      `get_ssm_by_samples` and `get_sample_cn_segments`. This function can
-#'      also take a vector with genes of interest (`genes`) that the returned
-#'      data frame will be restricted to.
-#'
-#' @param this_sample_id Sample ID of the sample you want to annotate.
-#' @param genes A vector of characters with gene symbols (Hugo).
-#' @param this_seq_type Specified seq type for returned data. Default is genome.
-#' @param projection Specified genome projection that returned data is in
-#'      reference to. Default is grch37.
-#' @param coding_only Optional. Set to TRUE to restrict to only coding variants
-#'      (ssm). Deafult is FALSE.
-#' @param assume_diploid Optional, this parameter annotates every mutation as
-#'      copy neutral. Default is FALSE.
-#' @param include_silent Logical parameter indicating whether to include silent
-#'      mutations into coding mutations. Default is FALSE. This parameter only
-#'      makes sense if `coding_only` is set to TRUE.
-#' @param ... Any additional parameters.
-#'
-#' @return A list containing a data frame (MAF-like format) with three extra
-#'      columns:
-#'      - log.ratio is the log ratio from the seg file (NA when no overlap).
-#'      - LOH
-#'      - CN (the rounded absolute copy number estimate of the region based on
-#'          log.ratio, NA when no overlap was found).
-#'
-#' @import dplyr
-#' @export
-#'
-#' @examples
-#' cn_list = assign_cn_to_ssm(
-#'      this_sample_id = "DOHH-2",
-#'      coding_only = TRUE
-#' )
-#'
-assign_cn_to_ssm = function(
-    this_sample_id,
-    genes,
-    this_seq_type = "genome",
-    projection = "grch37",
-    coding_only = FALSE,
-    assume_diploid = FALSE,
-    include_silent = FALSE,
-    ...
-){
-
-    #warn/notify the user what version of this function they are using
-    message("Using the bundled CN segments (.seg) calls in GAMBLR.data...")
-
-    #check if any invalid parameters are provided
-    check_excess_params(...)
-
-    #ensure only one sample ID is provided
-    if(length(this_sample_id) > 1){
-        stop(
-            "This function only supports queries of 1 sample ID at the time..."
-        )
-    }
-
-    #get maf
-    maf_sample = get_ssm_by_sample(
-        this_sample_id = this_sample_id,
-        projection = projection,
-        this_seq_type = this_seq_type
-    )
-
-    #maf filtering
-    #silent mutations
-    if(!include_silent){
-        coding_class = coding_class[coding_class != "Silent"]
-    }
-
-    #coding mutations
-    if(coding_only){
-        maf_sample = dplyr::filter(
-            maf_sample,
-            Variant_Classification %in% coding_class
-        )
-    }
-
-    #subset to genes of interest
-    if(!missing(genes)){
-        maf_sample = dplyr::filter(maf_sample, Hugo_Symbol %in% genes)
-        if(nrow(maf_sample) == 0){
-            stop("No variants left after filtering on the provided genes...")
-        }
-    }
-
-    #get seg
-    seg_sample = get_sample_cn_segments(
-        these_sample_ids = this_sample_id,
-        projection = projection,
-        this_seq_type = this_seq_type
-    )
-
-    #annotate all CN segments as copy number neutral
-    if(assume_diploid){
-        diploid = dplyr::mutate(maf_sample, CN = 2)
-        return(list(maf = diploid))
-    }
-
-    #wrangle the seg file
-    seg_sample = seg_sample %>%
-        dplyr::filter(end - start > 100) %>%
-        mutate(chrom = gsub("chr", "", chrom)) %>%
-        rename(
-            Chromosome = chrom,
-            Start_Position = start,
-            End_Position = end,
-            LOH = LOH_flag
-        ) %>%
-        mutate(across(LOH, as.factor))
-
-    #perform an overlap join and add CN columns from the seg file and subset
-    # MAF to basic columns (first 45)
-    maf_tmp = cool_overlaps(maf_sample, seg_sample, type = "any")
-
-    #rename and change order of columns to match expected format
-    maf_with_segs = maf_tmp %>%
-        rename(
-            Start_Position = Start_Position.x,
-            End_Position = End_Position.x
-        ) %>%
-        dplyr::select(
-            colnames(maf_sample),
-            LOH, log.ratio, CN
-        )
-
-    return(
-        list(
-            maf = maf_with_segs,
-            seg = seg_sample
-        )
-    )
-}
diff --git a/R/calc_mutation_frequency_bin_region.R b/R/calc_mutation_frequency_bin_region.R
deleted file mode 100644
index 1082a68..0000000
--- a/R/calc_mutation_frequency_bin_region.R
+++ /dev/null
@@ -1,294 +0,0 @@
-#' @title Calculate Mutation Frequency By Sliding Window.
-#'
-#' @description Count the number of mutations in a sliding window across a 
-#' region for all samples.
-#'
-#' @details This function is called to return the mutation frequency for a given
-#' region, either from a provided input maf data frame or from the GAMBL maf data.
-#' Regions are specified with the `region` parameter. Alternatively, the region of
-#' interest can also be specified by calling the function with `chromosome`,
-#' `start_pos`, and `end_pos` parameters. This function operates on a single region. 
-#' To return a matrix of sliding window counts over multiple regions, 
-#' see `calc_mutation_frequency_bin_regions`.
-#'
-#' @param region A string describing a genomic region in the "chrom:start-end" format. 
-#' The region must be specified in this format OR as separate chromosome, start_pos, end_pos arguments.
-#' @param chromosome Chromosome name in region.
-#' @param start_pos Start coordinate of region.
-#' @param end_pos End coordinate of region.
-#' @param these_samples_metadata Optional data frame containing a sample_id column. 
-#' If not providing a maf file, seq_type is also a required column.
-#' @param these_sample_ids Optional vector of sample IDs. Output will be subset 
-#' to IDs present in this vector.
-#' @param this_seq_type Optional vector of seq_types to include in heatmap. 
-#' Default is "genome". Uses default seq_type priority for samples 
-#' with >1 seq_type.
-#' @param maf_data Optional maf data frame. Will be subset to rows where 
-#' Tumor_Sample_Barcode matches provided sample IDs or metadata table. 
-#' If not provided, maf data will be obtained with get_ssm_by_regions().
-#' @param projection Specify which genome build to use. Required. Default grch37.
-#' @param slide_by Slide size for sliding window. Default 100.
-#' @param window_size Size of sliding window. Default 1000.
-#' @param return_format Return format of mutations. Accepted inputs are "long"
-#' and "wide". Long returns a data frame of one sample ID/window per row. 
-#' Wide returns a matrix with one sample ID per row and one window per column. 
-#' Using the "wide" format will retain all samples and windows regardless of 
-#' the drop_unmutated or min_count_per_bin parameters.
-#' @param min_count_per_bin Minimum counts per bin, default is 0. Setting this 
-#' greater than 0 will drop unmutated windows only when return_format is long.
-#' @param return_count Boolean statement to return mutation count per window (TRUE) 
-#' or binary mutated/unmutated status (FALSE). Default is TRUE.
-#' @param drop_unmutated Boolean for whether to drop windows with 0 mutations. 
-#' Only effective with "long" return format.
-#' @param ... Any additional parameters.
-#' 
-#' @return Either a matrix or a long tidy table of counts per window.
-#'
-#' @import dplyr tidyr
-#' @export
-#'
-#' @examples
-#' myc_mut_freq = calc_mutation_frequency_bin_region(region = "8:128747680-128753674",
-#'                                                   slide_by = 10,
-#'                                                   window_size = 10000)
-#'
-calc_mutation_frequency_bin_region <- function(region,
-                                               chromosome,
-                                               start_pos,
-                                               end_pos,
-                                               these_samples_metadata = NULL,
-                                               these_sample_ids = NULL,
-                                               this_seq_type = "genome",
-                                               maf_data = NULL,
-                                               projection = "grch37",
-                                               slide_by = 100,
-                                               window_size = 1000,
-                                               return_format = "long",
-                                               min_count_per_bin = 0,
-                                               return_count = TRUE,
-                                               drop_unmutated = FALSE,
-                                               ...) {
-  
-  #check if any invalid parameters are provided
-  check_excess_params(...)
-  
-  # Create objects to describe region both as string and individual objects
-  try(if (missing(region) & missing(chromosome)) {
-    stop("No region information provided. Please provide a region as a string in the chrom:start-end format, or as individual arguments. ")
-  })
-  
-  if ((drop_unmutated | min_count_per_bin > 0) & return_format == "wide") {
-    message("To return a wide table, all samples and windows must be kept. Ignoring drop_unmutated and min_count_per_bin arguments. ")
-  }
-  
-  if (missing(region)) {
-    region <- paste0(
-      chromosome, ":", start_pos, "-",
-      end_pos
-    )
-  } else {
-    chunks <- region_to_chunks(region)
-    chromosome <- chunks$chromosome
-    start_pos <- as.numeric(chunks$start)
-    end_pos <- as.numeric(chunks$end)
-  }
-  
-  # Harmonize metadata and sample IDs
-  metadata <- id_ease(
-    these_samples_metadata,
-    these_sample_ids,
-    this_seq_type
-  )
-  these_sample_ids <- metadata$sample_id
-  
-  
-  if (
-    (grepl("chr", chromosome) & projection == "grch37") |
-    (!grepl("chr", chromosome) & projection == "hg38")
-  ) {
-    stop("chr prefixing status of region and specified projection don't match. ")
-  }
-  
-  
-  # Check region size and compare to max region size
-  # Is this really needed?
-  max_region <- 5e+06
-  
-  region_size <- end_pos - start_pos
-  if (region_size < max_region) {
-    message(paste(
-      "processing bins of size", window_size,
-      "across", region_size, "bp region"
-    ))
-  } else {
-    message(paste("CAUTION!\n", region_size, "exceeds maximum size recommended by this function."))
-  }
-  
-  # Split region into windows
-  windows <- data.frame(
-    chrom = chromosome,
-    window_start = seq(start_pos, end_pos, by = slide_by)
-  ) %>%
-    dplyr::mutate(window_end = window_start + window_size - 1) %>%
-    dplyr::select(chrom, window_start, window_end)
-  
-  # Option to return full region count instead of sliding window
-  if (window_size == 0) {
-    windows <- data.frame(
-      chrom = chromosome,
-      window_start = start_pos,
-      window_end = end_pos
-    )
-  }
-  
-  # Obtain SSM coordinates from GAMBL if no maf_data was provided
-  if (is.null(maf_data)) {
-    try(
-      if (!"seq_type" %in% colnames(metadata)) {
-        stop("seq_type must be present in metadata for compatibility with get_ssm_by_samples")
-      }
-    )
-    message("Using GAMBLR.data::get_ssm_by_region...")
-    region_ssm <- list()
-    for (st in unique(metadata$seq_type)) {
-      this_seq_type <- get_ssm_by_region(
-        region = region,
-        projection = projection,
-        streamlined = FALSE,
-        this_seq_type = st
-      ) %>%
-        dplyr::mutate(end = Start_Position + 1) %>%
-        dplyr::select(
-          chrom = Chromosome,
-          start = Start_Position,
-          end,
-          sample_id = Tumor_Sample_Barcode
-        ) %>%
-        dplyr::mutate(mutated = 1, seq_type = st) %>%
-        dplyr::filter(sample_id %in% these_sample_ids)
-      region_ssm[[st]] <- data.frame(metadata) %>%
-        dplyr::select(sample_id, seq_type) %>%
-        dplyr::filter(seq_type == st) %>%
-        dplyr::left_join(this_seq_type, by = c("sample_id", "seq_type")) %>%
-        dplyr::filter(!is.na(mutated)) %>%
-        dplyr::select(-seq_type)
-    }
-    region_ssm <- dplyr::bind_rows(region_ssm)
-  } else {
-    #  Subset provided maf to specified region
-    message("Using provided maf...")
-    region_bed <- data.frame(
-      "chrom" = as.character(chromosome),
-      "start" = as.numeric(start_pos),
-      "end" = as.numeric(end_pos)
-    )
-    region_ssm <- cool_overlaps(
-            maf_data, region_bed,
-            columns2 = c("chrom", "start", "end")
-        ) %>%
-      dplyr::filter(!is.na(Start_Position)) %>%
-      dplyr::mutate(end = Start_Position - 1) %>%
-      dplyr::select(
-        chrom = Chromosome,
-        start = Start_Position,
-        end,
-        sample_id = Tumor_Sample_Barcode
-      ) %>%
-      dplyr::mutate(mutated = 1)
-    
-    region_ssm <- data.frame(metadata) %>%
-      dplyr::select(sample_id) %>%
-      dplyr::left_join(region_ssm) %>%
-      dplyr::filter(!is.na(mutated))
-  }
-  
-  # Check if the region is empty.
-  # If yes return NULL so that running this function with lapply will allow bind_rows to run on the output.
-  if (nrow(region_ssm) == 0 & (drop_unmutated | min_count_per_bin > 0)) {
-    message(paste0("No mutations found in region ", region, " for this sample set. "))
-    return(NULL)
-  }
-  
-  # Count mutations per window
-  windows_tallied <- dplyr::inner_join(
-    windows,
-    region_ssm,
-    by = "chrom"
-  ) %>%
-    dplyr::filter(
-      start >= window_start,
-      start <= window_end
-    ) %>%
-    dplyr::group_by(
-      sample_id,
-      window_start
-    ) %>%
-    dplyr::tally() %>%
-    dplyr::ungroup() %>%
-    dplyr::full_join(select(metadata, sample_id)) %>%
-    dplyr::arrange(sample_id) %>%
-    dplyr::full_join(select(windows, window_start)) %>%
-    dplyr::distinct() %>%
-    tidyr::pivot_wider(
-      names_from = window_start,
-      values_from = n,
-      values_fill = 0
-    ) %>%
-    dplyr::select(-matches("^NA$")) %>%
-    tidyr::pivot_longer(
-      -c(sample_id),
-      names_to = "window_start",
-      values_to = "n"
-    ) %>%
-    dplyr::distinct() %>%
-    dplyr::filter(!is.na(sample_id))
-  
-  # Remove unmutated windows if requested
-  if (drop_unmutated | min_count_per_bin > 0) {
-    windows_tallied <- windows_tallied %>%
-      dplyr::filter(n >= min_count_per_bin)
-    if (drop_unmutated & min_count_per_bin == 0) {
-      windows_tallied %>%
-        dplyr::filter(n > 0)
-    }
-  }
-  
-  # Create requested data output format
-  if (return_count) {
-    # Return table of mutation counts per bin
-    windows_tallied_final <- mutate(
-      windows_tallied,
-      bin = paste0(chromosome, "_", window_start)
-    ) %>%
-      dplyr::mutate(mutation_count = n) %>%
-      dplyr::select(
-        sample_id,
-        bin,
-        mutation_count
-      )
-  } else {
-    # Return table of binary mutated/unmutated status per bin
-    windows_tallied_final <- mutate(
-      windows_tallied,
-      bin = paste0(chromosome, "_", window_start)
-    ) %>%
-      dplyr::mutate(mutated = ifelse(n > 0, 1, 0)) %>%
-      dplyr::select(
-        sample_id,
-        bin,
-        mutated
-      )
-  }
-  
-  if (return_format == "wide") {
-    widened <- windows_tallied_final %>%
-      tidyr::pivot_wider(
-        names_from = bin,
-        values_from = matches("mutat"),
-        values_fill = 0
-      )
-    return(widened)
-  } else {
-    return(windows_tallied_final)
-  }
-}
diff --git a/R/calc_mutation_frequency_bin_regions.R b/R/calc_mutation_frequency_bin_regions.R
deleted file mode 100644
index 540a13d..0000000
--- a/R/calc_mutation_frequency_bin_regions.R
+++ /dev/null
@@ -1,147 +0,0 @@
-#' @title Mutation counts across sliding windows for multiple regions.
-#'
-#' @description Obtain a long tidy or wide matrix of mutation counts across
-#' sliding windows for multiple regions.
-#'
-#' @details This function takes a metadata table with `these_samples_metadata` 
-#' parameter and internally calls `calc_mutation_frequency_bin_region` 
-#' (that internally calls `get_ssm_by_regions`).
-#' to retrieve mutation counts for sliding windows across one or more regions. 
-#' May optionally provide any combination of a maf data frame, existing metadata,
-#' or a regions data frame or named vector.
-#'
-#' @param regions_list Named vector of regions in the format 
-#' c(name1 = "chr:start-end", name2 = "chr:start-end"). If neither `regions` nor 
-#' `regions_bed` is specified, the function will use GAMBLR aSHM region information.
-#' @param regions_bed Data frame of regions with four columns (chrom, start, end, name).
-#' @param these_samples_metadata Metadata with at least sample_id column. 
-#' If not providing a maf data frame, seq_type is also required.
-#' @param these_sample_ids Vector of sample IDs. Metadata will be subset to
-#' sample IDs present in this vector.
-#' @param this_seq_type Optional vector of seq_types to include in heatmap. 
-#' Default "genome". Uses default seq_type priority for samples with >1 seq_type.
-#' @param maf_data Optional maf data frame. Will be subset to rows where 
-#' Tumor_Sample_Barcode matches provided sample IDs or metadata table. 
-#' If not provided, maf data will be obtained with get_ssm_by_regions().
-#' @param region_padding Amount to pad the start and end coordinates by. Default 1000.
-#' @param projection Genome build the function will operate in. Ensure this 
-#' matches your provided regions and maf data for correct chr prefix handling. Default "grch37".
-#' @param drop_unmutated Whether to drop bins with 0 mutations. If returning a 
-#' matrix format, this will only drop bins with no mutations in any samples.
-#' @param skip_regions Optional character vector of genes to exclude from the default aSHM regions.
-#' @param only_regions Optional character vector of genes to include from the default aSHM regions.
-#' @param slide_by Slide size for sliding window. Default 100.
-#' @param window_size Size of sliding window. Default 500.
-#' @param return_format Return format of mutations. Accepted inputs are "long" and 
-#' "wide". Long returns a data frame of one sample ID/window per row. Wide returns 
-#' a matrix with one sample ID per row and one window per column. Using the "wide" 
-#' format will retain all samples and windows regardless of the drop_unmutated or 
-#' min_count_per_bin parameters. Default wide.
-#' @param ... Any additional parameters.
-#' 
-#' @return A table of mutation counts for sliding windows across one or more regions. May be long or wide.
-#'
-#' @import dplyr tidyr tibble
-#' @export
-#'
-#' @examples
-#' #get some regions
-#' these_regions <- process_regions(only_regions = c("MYC", "BCL2", "BCL6"))
-#' reg_vec <- these_regions$regions_list
-#' reg_bed <- these_regions$regions_bed
-#'
-#' # use a set of user defined regions (from genes) and
-#' # calculate mut frequency across all available samples
-#' mult_freq_all = calc_mutation_frequency_bin_regions(regions_list = reg_vec)
-#' mult_freq_all = calc_mutation_frequency_bin_regions(regions_bed = reg_bed)
-#'
-#' #restrict the analysis to specific samples using the metadata
-#' my_meta = get_gambl_metadata() %>% 
-#'               dplyr::filter(pathology %in% c("DLBCL","FL"))
-#' mult_reg_freq_fl_dlbcl = calc_mutation_frequency_bin_regions(regions_list = reg_vec,
-#'                                                           these_sample_ids = "DOHH-2")
-#'
-calc_mutation_frequency_bin_regions <- function(regions_list = NULL,
-                                                regions_bed = NULL,
-                                                these_samples_metadata = NULL,
-                                                these_sample_ids = NULL,
-                                                this_seq_type = "genome",
-                                                maf_data = NULL,
-                                                projection = "grch37",
-                                                region_padding = 1000,
-                                                drop_unmutated = FALSE,
-                                                skip_regions = NULL,
-                                                only_regions = NULL,
-                                                slide_by = 100,
-                                                window_size = 500,
-                                                return_format = "wide",
-                                                ...){
-
-  #check if any invalid parameters are provided
-  check_excess_params(...)
-  
-  regions <- process_regions(regions_list = regions_list,
-                             regions_bed = regions_bed,
-                             region_padding = region_padding,
-                             skip_regions = skip_regions,
-                             only_regions = only_regions)
-  
-  regions_bed <- regions$regions_bed
-  regions <- regions$regions_list
-  
-  if (
-    (grepl("chr", regions_bed$chrom[1]) & projection == "grch37") |
-    (!grepl("chr", regions_bed$chrom[1]) & projection == "hg38")
-  ) {
-    stop("chr prefixing status of provided regions and specified projection don't match. ")
-  }
-  # Harmonize metadata and sample IDs
-  metadata <- id_ease(
-    these_samples_metadata,
-    these_sample_ids,
-    this_seq_type
-  )
-  
-  these_sample_ids <- metadata$sample_id
-  
-  # Obtain sliding window mutation frequencies for all regions
-  dfs <- mclapply(names(regions), function(x) {
-    df <- calc_mutation_frequency_bin_region(
-      region = regions[x],
-      these_samples_metadata = metadata,
-      maf_data = maf_data,
-      projection = projection,
-      drop_unmutated = drop_unmutated,
-      slide_by = slide_by,
-      window_size = window_size,
-      min_count_per_bin = 0,
-      return_count = TRUE,
-      ...
-    ) %>%
-      dplyr::mutate(name = x)
-    return(df)
-  })
-  
-  all <- dplyr::bind_rows(dfs) %>%
-    dplyr::distinct(bin, sample_id, .keep_all = TRUE)
-  
-  # If none of the samples are mutated, return the mutation frequency df and exit.
-  if (max(all$mutation_count) == 0) {
-    message("No mutations found in specified regions for specified samples. Exiting. ")
-    return(all)
-  }
-  
-  if (return_format == "wide") {
-    # Convert mutation frequency table to a matrix
-    all_wide <- all %>%
-      dplyr::select(sample_id, mutation_count, bin) %>%
-      pivot_wider(
-        names_from = bin,
-        values_from = mutation_count,
-        values_fill = 0
-      )
-    return(all_wide)
-  } else {
-    return(all)
-  }
-}
diff --git a/R/check_excess_params.R b/R/check_excess_params.R
deleted file mode 100644
index 68eea07..0000000
--- a/R/check_excess_params.R
+++ /dev/null
@@ -1,26 +0,0 @@
-#' @title Check Excess Params
-#' 
-#' @description Function for checking excessive parameter names.
-#' This function will notify the user if any unavailable parameters are called for any given given function.
-#' This function is designed to work as internal function-call in already available GAMBLR functions.
-#' 
-#' @details Catch function calls containing unsupported arguments.
-#'
-#' @param ... Parameters to check.
-#'
-#' @return Nothing
-#' 
-#' @export
-#'
-check_excess_params = function(...){
-  callingFun = as.list(sys.call(-1))[[1]]
-  arguments <- list(...)
-  extraneous = names(arguments)
-  if(length(arguments)>0){
-    k <- gettextf("Warning: You have given one or more unsupported or deprecated arguments to %s and they are going to be ignored. Please check the documentation and spelling of your arguments.\nIgnored argument(s): %s.",
-                  as.character(callingFun), 
-                  paste(extraneous, collapse = ", "))
-    message(k)
-  }
-  
-}
diff --git a/R/collate_results.R b/R/collate_results.R
deleted file mode 100644
index 8459ccb..0000000
--- a/R/collate_results.R
+++ /dev/null
@@ -1,83 +0,0 @@
-#' @title Collate Results
-#'
-#' @description Bring together collated results for a selection of gambl samples.
-#'
-#' @details Currently, this function only gathers QC metrics (`mirage_metrics`) as the only collated result.
-#' Potentially, in the future, additional collated results can be added by this function as well.  
-#'
-#' @param sample_table A vector of characters with sample IDs, or a data frame with sample IDs in a column (sample_id). 
-#' If provided, this will overwrite any sample subsets provided these_samples_metadata.
-#' @param these_samples_metadata A metadata table with sample IDs of interest. 
-#' If not provided, the function will get metadata for all available samples. 
-#' This parameter is intended to use in combination with `join_with_full_metadata`.
-#' @param join_with_full_metadata Set to TRUE to horizontally expand metadata with QC results. 
-#' Default is FALSE. If `these_samples_metadata` is provided, collated resutls will be added to this metadata table.
-#' If not provided, the function will join collated results with all available metadata in the specified seq_type (`seq_type_filter`).
-#' @param seq_type_filter Filtering criteria for `get_gambl_metadata` if `these_samples_metadata` is not provided, default is genomes and captures. 
-#' @param ... Any additional parameters.
-#'
-#' @return A data frame with collated results.
-#'
-#' @import dplyr
-#' 
-#' @export
-#'
-#' @examples
-#' #load packages
-#' library(dplyr)
-#' 
-#' #return collated results for all available samples
-#' all_collated = collate_results()
-#'
-#' #return available collated results for a metadata subset
-#' fl_collated = collate_results(
-#'  these_samples_metadata = get_gambl_metadata(
-#'    seq_type_filter = "genome") %>% 
-#'    dplyr::filter(pathology == "FL"))
-#'
-#' #horizontally expand a metadata subset with collated results
-#' fl_meta_collated = collate_results(
-#'  join_with_full_metadata = TRUE, 
-#'  these_samples_metadata = get_gambl_metadata(
-#'    seq_type_filter = "genome") %>% 
-#'    dplyr::filter(pathology == "FL"))
-#'
-#' #horizontally expand all available metadata with collated results
-#' all_meta_collated = collate_results(join_with_full_metadata = TRUE)
-#' 
-collate_results = function(sample_table,
-                           these_samples_metadata,
-                           join_with_full_metadata = FALSE,
-                           seq_type_filter = c("genome", "capture"),
-                           ...){
-  
-  #check if any invalid parameters are provided
-  check_excess_params(...)
-  
-  #warn/notify the user what version of this function they are using
-  message("Using the bundled collated results in GAMBLR.data...")
-  
-  if(missing(these_samples_metadata)){
-    these_samples_metadata = get_gambl_metadata(seq_type_filter = seq_type_filter)
-  }
-  
-  if(missing(sample_table)){
-    sample_table = these_samples_metadata %>% 
-      pull(sample_id)
-  }else{
-    if(is.data.frame(sample_table)){
-      sample_table = sample_table$sample_id
-    }
-  }
-  
-  #read mirage metrics and subset to the sample IDs (in sample_table) we have QC data for
-  collated = GAMBLR.data::mirage_metrics %>%
-    dplyr::filter(sample_id %in% sample_table)
-
-  #horizontally expand the provided metadata with QC results
-  if(join_with_full_metadata){
-    full_table = left_join(these_samples_metadata, collated)
-    return(full_table)
-  }
-  return(collated)
-}
diff --git a/R/cool_overlaps.R b/R/cool_overlaps.R
deleted file mode 100644
index 12d8289..0000000
--- a/R/cool_overlaps.R
+++ /dev/null
@@ -1,231 +0,0 @@
-#' @title Cool overlap of data frames.
-#'
-#' @description This function implements overlap of 2 data frames that contain
-#' regions of coordinates similar to what data.table::foverlaps does. Unlike
-#' foverlaps, this function takes as input data frame class objects, and relies
-#' on dplyr solution rather than data.table handling, therefore allowing usage
-#' of data frames with virtually unlimited dimensions without crashing. This
-#' implementation uses same logic of different types of overlaps as the original
-#' foverlaps solution ("any", "start", "end", "within", "equal"). The type "any"
-#' is default and allows for any overlapping solution between 2 regions. The
-#' type "start" only considers regions with exact same start position as
-#' overlap; similarly type "end" considers regions overlapped when the end
-#' positions are exact matches. Type "within" means that regions are overlapped
-#' when one is contained in another and neither start nor end positions match.
-#' Finally, type "equal" only considers overlap when both start and end
-#' positions match for both regions. For any type, the presence of any
-#' additional column not directly specifying regions (for example, Chromosome)
-#' will serve similar to a grouping variable.
-#' The generated output of this function will contain the overlapping regions
-#' and all columns present in the data frame data1, as well as any columns from
-#' the data frame supplied with data2 argument, except for those columns present
-#' in data2 that are used for overlap. When the same columns are present in both
-#' data1 and data2, the output data frame will have ".x" and ".y" suffixes to
-#' indicate which original input data they are coming from.
-#'
-#' @param data1 Data frame with data to overlap. Required parameter. The minimal
-#'      required columns are those supplied with the argument columns1. Will
-#'      dictate the naming of the columns used for overlap in the output.
-#' @param data2 Data frame with data to overlap. Required parameter. The minimal
-#'      required columns are those supplied with the argument columns2.
-#' @param columns1 The list of columns from data frame data1 to be used to find
-#'      overlapping regions.
-#' @param columns2 The list of columns from data frame data2 to be used to find
-#'      overlapping regions.
-#' @param type Character specifying the way to find overlaps. Accepted values
-#'      are "any" (used as default), "start", "end", "within", and "equal".
-#'      Please see function description for more details of different types.
-#' @param nomatch Whether the rows from data1 that do not have overlap in data2
-#'      should be returned or not. The default is FALSE (rows without overlap
-#'      are not returned). If TRUE is specified, the row order in the output
-#'      data will match the exact order of rows in the input data1.
-#'
-#' @return data frame
-#'
-#' @examples
-#' # obtain maf data
-#' maf1 <- get_coding_ssm(
-#'     these_sample_ids = "DOHH-2"
-#' )
-#'
-#' maf2 <- get_coding_ssm(
-#'     these_sample_ids = "SU-DHL-4"
-#' )
-#'
-#' # The same mutations are not expected to be present in different samples
-#' # so this overlap will produce 0 matching rows
-#' overlap <- cool_overlaps(
-#'     maf1,
-#'     maf1,
-#'     type = "equal"
-#' )
-#'
-#' # To demonstrate functionality we can supply the same maf to the data2
-#' overlap <- cool_overlaps(
-#'     maf1,
-#'     maf1 %>% head
-#' )
-#'
-#' # We can also overlap different formats, for example
-#' seg1 <- get_sample_cn_segments(these_sample_ids = "DOHH-2")
-#' overlap <- cool_overlaps(
-#'     data1 = maf1,
-#'     data2 = seg1,
-#'     columns2 = c("chrom", "start", "end")
-#' )
-#'
-#' @import dplyr tidyr
-#' @export
-#'
-cool_overlaps <- function(
-    data1,
-    data2,
-    columns1 = c("Chromosome", "Start_Position", "End_Position"),
-    columns2 = c("Chromosome", "Start_Position", "End_Position"),
-    type = "any",
-    nomatch = FALSE
-){
-
-    # Ensure all columns provided for overlap are present in the data frame
-    if(! length(columns1) == length(intersect(columns1, colnames(data1)))){
-        stop(
-            "Not all of the requested columns for overlap in data1 are present."
-        )
-    }
-
-    if(! length(columns2) == length(intersect(columns2, colnames(data2)))){
-        stop(
-            "Not all of the requested columns for overlap in data2 are present."
-        )
-    }
-
-    # What is the name of the column in columns1 that specifies start and end?
-    start1 <- columns1[grepl("start", columns1, ignore.case = TRUE)]
-    end1 <- columns1[grepl("end", columns1, ignore.case = TRUE)]
-
-    # What is the name of the column in columns1 that specifies start and end?
-    start2 <- columns2[grepl("start", columns2, ignore.case = TRUE)]
-    end2 <- columns2[grepl("end", columns2, ignore.case = TRUE)]
-
-    # What are the other columns to be used in overlap?
-    columns1 <- columns1[!columns1 %in% c(start1, end1)]
-    columns2 <- columns2[!columns2 %in% c(start2, end2)]
-
-    # When the same columns are provided they will become .x and .y
-    original_start1 <- start1
-    original_end1 <- end1
-    if(start1 == start2) {
-        start1 <- paste0(start1, ".x")
-        start2 <- paste0(start2, ".y")
-
-    }
-    if(end1 == end2) {
-        end1 <- paste0(end1, ".x")
-        end2 <- paste0(end2, ".y")
-
-    }
-
-
-    # Prepare for overlap
-    overlap <- dplyr::inner_join(
-        data1,
-        data2,
-        by = structure(names = columns1, .Data = columns2),
-        relationship = "many-to-many"
-    )
-
-    # Return matches based on mode
-    if(type == "any"){
-        message(
-            "Running in default mode of any..."
-        )
-        overlap <- overlap %>%
-            dplyr::filter(
-                !!sym(start2) >= !!sym(start1) & !!sym(end2) <= !!sym(end1) |
-                !!sym(start1) >= !!sym(start2) & !!sym(end1) <= !!sym(end2)
-            )
-    } else if (type == "start"){
-        message(
-            "Running in the mode start..."
-        )
-        overlap <- overlap %>%
-            dplyr::filter(
-               !!sym(start1) == !!sym(start2)
-            )
-    } else if (type == "end"){
-        message(
-            "Running in the mode end..."
-        )
-        overlap <- overlap %>%
-            dplyr::filter(
-               !!sym(end1) == !!sym(end2)
-            )
-    } else if (type == "within"){
-        message(
-            "Running in the mode within..."
-        )
-        overlap <- overlap %>%
-            dplyr::filter(
-               (!!sym(start1) >= !!sym(start2)) & (!!sym(end1) <= !!sym(end2)) |
-               (!!sym(start2) >= !!sym(start1)) & (!!sym(end2) <= !!sym(end1))
-            )
-    } else if (type == "equal"){
-        message(
-            "Running in the mode equal..."
-        )
-        overlap <- overlap %>%
-            dplyr::filter(
-               (!!sym(start1) == !!sym(start2)) & (!!sym(end1) == !!sym(end2))
-            )
-    } else {
-        message(
-            "You have requested mode that is not supported."
-        )
-        stop(
-            "Please supply one of any, start, end, within, or equal with type."
-        )
-    }
-
-    # This will ensure that features from data1 that don't have match in data2
-    # will be returned with NA annotation
-    if(nomatch){
-        no_annotation <- suppressMessages(
-            anti_join(
-                data1,
-                overlap
-            )
-        )
-        if(original_start1 %in% colnames(no_annotation)){
-            colnames(no_annotation) = gsub(
-                original_start1,
-                start1,
-                colnames(no_annotation)
-            )
-        }
-        if(original_end1 %in% colnames(no_annotation)){
-            colnames(no_annotation) = gsub(
-                original_end1,
-                end1,
-                colnames(no_annotation)
-            )
-        }
-        overlap <- bind_rows(
-            overlap,
-            no_annotation
-        )
-
-        # Ensure order is consistent between input data and the output after
-        # overlap is found since we used bind_rows
-        data1 <- data1 %>%
-            tidyr::unite("row_id", 1:ncol(data1), remove = FALSE)
-
-        colnames(overlap) <- gsub("\\.x$", "", colnames(overlap))
-        overlap <- overlap %>%
-            tidyr::unite("row_id", 1:(ncol(data1)-1), remove = FALSE) %>%
-            dplyr::arrange(match(row_id, data1$row_id)) %>%
-            dplyr::select(-row_id)
-
-    }
-
-    return(overlap)
-}
diff --git a/R/data_comp.R b/R/data_comp.R
index c6126bf..c21a25e 100644
--- a/R/data_comp.R
+++ b/R/data_comp.R
@@ -20,7 +20,7 @@
 #'
 #' @return A character vector of gene symbol or Ensembl IDs or a data frame.
 #'
-#' @import dplyr
+#' @import dplyr tidyr
 #' @export
 #'
 #' @examples
@@ -223,7 +223,7 @@ get_genes <- function(
 #'
 #' @return Either a vector or list of Hex codes.
 #'
-#' @import dplyr
+#' @import dplyr tidyr
 #' @export
 #'
 #' @examples
@@ -321,7 +321,7 @@ get_mapped_colours <- function(
 #'
 #' @return A data frame or named character vector of colour Hex codes.
 #'
-#' @import dplyr ggplot2 tibble
+#' @import dplyr ggplot2 tidyr
 #' @export
 #'
 #' @examples
diff --git a/R/genomic_data.R b/R/genomic_data.R
deleted file mode 100644
index e92b7c7..0000000
--- a/R/genomic_data.R
+++ /dev/null
@@ -1,417 +0,0 @@
-# functions for creating and working with S3 objects
-
-
-#' Create MAF Data
-#'
-#' This function creates MAF (Mutation Annotation Format) data from the given input.
-#'
-#' @param maf_df A data frame containing the MAF data.
-#' @param genome_build A string specifying the genome build ("grch37" or "hg38").
-#' @return A data frame with class attributes for MAF data.
-#' @export
-create_maf_data <- function(maf_df, genome_build) {
-  if (!inherits(maf_df, "data.frame")) stop("data must be a data frame")
-  if (!genome_build %in% c("grch37", "hg38")) stop("Invalid genome build")
-  
-  structure(maf_df,
-            class = c("maf_data", "genomic_data", class(maf_df)),  #  "genomic_data" for generic methods
-            genome_build = genome_build)
-}
-
-#' @export
-print.maf_data <- function(x, ...) {
-  cat("MAF Data Object\n")
-  cat("Genome Build:", attr(x, "genome_build"), "\n")
-  cat("Showing first 10 rows:\n")
-  # Convert to a plain data.frame (if not already) so that printing uses the default
-  # data.frame print method rather than printing as a list.
-  print(utils::head(as.data.frame(x), 10))
-}
-
-
-#' Get Genome Build
-#'
-#' This function retrieves the genome build attribute from the data.
-#'
-#' @param data A data frame with genome build attribute.
-#' @return A string specifying the genome build.
-#' @export
-get_genome_build <- function(data) {
-  attr(data, "genome_build")
-}
-
-#' Preserve Genomic Attributes
-#'
-#' This function preserves the genomic attributes and class after dplyr operations.
-#'
-#' @param new_data A data frame resulting from dplyr operations.
-#' @param old_data The original data frame with genomic attributes.
-#' @return A data frame with preserved genomic attributes.
-#' @export
-preserve_genomic_attributes <- function(new_data, old_data) {
-  attr(new_data, "genome_build") <- attr(old_data, "genome_build")
-  class(new_data) <- class(old_data)
-  return(new_data)
-}
-
-#' Strip Genomic Data Classes
-#'
-#' This function removes custom classes associated with genomic data objects
-#' (by default, "genomic_data", "maf_data", and "bed_data") from the class attribute
-#' of an object. This can be useful when you want to revert an S3 object to its
-#' underlying data.frame (or data.table) classes without converting the object.
-#'
-#' @param x An object, such as one of your genomic data objects.
-#' @param classes A character vector of class names to remove. The default is
-#'        c("genomic_data", "maf_data", "bed_data").
-#' @return The object with the specified classes removed.
-#' @export
-strip_genomic_classes <- function(x, classes = c("genomic_data", "maf_data", "bed_data")) {
-  current_classes <- class(x)
-  new_classes <- setdiff(current_classes, classes)
-  class(x) <- new_classes
-  return(x)
-}
-
-
-# S3 methods for genomic_data class
-#' @export
-mutate.genomic_data <- function(.data, ...) {
-  new_data <- dplyr::mutate(as.data.frame(.data), ...)
-  preserve_genomic_attributes(new_data, .data)
-}
-#' @export
-filter.genomic_data <- function(.data, ...) {
-  new_data <- dplyr::filter(as.data.frame(.data), ...)
-  preserve_genomic_attributes(new_data, .data)
-}
-#' @export
-select.genomic_data <- function(.data, ...) {
-  new_data <- dplyr::select(as.data.frame(.data), ...)
-  preserve_genomic_attributes(new_data, .data)
-}
-#' @export
-rename.genomic_data <- function(.data, ...) {
-  new_data <- dplyr::rename(as.data.frame(.data), ...)
-  preserve_genomic_attributes(new_data, .data)
-}
-#' @export
-arrange.genomic_data <- function(.data, ...) {
-  new_data <- dplyr::arrange(as.data.frame(.data), ...)
-  preserve_genomic_attributes(new_data, .data)
-}
-#' @export
-group_by.genomic_data <- function(.data, ..., .add = FALSE) {
-  new_data <- dplyr::group_by(as.data.frame(.data), ..., .add = .add)
-  preserve_genomic_attributes(new_data, .data)
-}
-#' @export
-ungroup.genomic_data <- function(x, ...) {
-  new_data <- dplyr::ungroup(as.data.frame(x), ...)
-  preserve_genomic_attributes(new_data, x)
-}
-
-#' Bind maf or other genomic data together
-#'
-#' @description Combine multiple maf_data objects and retain metadata such as genome_build.
-#' This function will not allow you to combine maf_data objects that have different genome_build values.
-#' An error will also be thrown if the same sample id is found in more than one of the inputs (if check_id is TRUE).
-#'
-#' @param ... All maf_data or seg_data objects to be combined.
-#' @param check_id Logical. If TRUE (the default), the function will check for the presence of the expected ID column
-#'        and for duplicate sample IDs across the inputs. Set to FALSE to skip this check.
-#'
-#' @return data.frame with combined data and preserved genome_build metadata.
-#' @export
-#'
-#' @examples
-#'
-#' merged_maf = bind_genomic_data(maf1, maf2,check_id=FALSE)
-#'
-bind_genomic_data <- function(..., check_id = TRUE) {
-  
-  in_list <- list(...)
-  
-  if ("maf_data" %in% class(in_list[[1]])) {
-    # MAF format, ID column is Tumor_Sample_Barcode
-    id_col <- "Tumor_Sample_Barcode"
-  } else if ("seg_data" %in% class(in_list[[1]])) {
-    # SEG format, ID column is ID
-    id_col <- "ID"
-  } else {
-    stop(paste("Unsure how to merge:", class(in_list[[1]])))
-  }
-  
-  # Ensure all inputs are either maf_data or seg_data objects
-  if (!all(sapply(in_list, inherits, "maf_data")) && !all(sapply(in_list, inherits, "seg_data"))) {
-    stop("All inputs must be maf_data objects or seg_data objects.")
-  }
-  
-  # Extract genome builds
-  genome_builds <- unique(sapply(in_list, get_genome_build))
-  
-  if (length(genome_builds) > 1) {
-    stop("Cannot bind seg_data or maf_data objects with different genome builds: ", 
-         paste(genome_builds, collapse = ", "))
-  }
-  
-  # If check_id is TRUE, verify that the expected ID column exists and that IDs are unique.
-  if (check_id) {
-    # Collect unique sample IDs from each dataset
-    id_sets <- lapply(in_list, function(df) {
-      if (!(id_col %in% colnames(df))) {
-        stop("ID column '", id_col, "' not found in input data.")
-      }
-      unique(df[[id_col]])
-    })
-    
-    # Flatten the list and count occurrences of each ID
-    all_ids <- unlist(id_sets)
-    duplicate_ids <- names(table(all_ids)[table(all_ids) > 1])
-    
-    # If any ID is found in multiple datasets, throw an error
-    if (length(duplicate_ids) > 0) {
-      stop("Duplicate IDs found in multiple input data frames: ", paste(duplicate_ids, collapse = ", "))
-    }
-  }
-  
-  combined <- dplyr::bind_rows(in_list)
-  attr(combined, "genome_build") <- genome_builds[1]  # Assign the common genome build
-  
-  if (!"maf_data" %in% class(combined)) {
-    class(combined) <- c("maf_data", "genomic_data", class(combined))  # Preserve class
-  }
-  
-  return(combined)
-}
-
-
-
-#' Create BED Data
-#'
-#' This function creates BED (Browser Extensible Data) objects from the given input.
-#' It assumes that the BED data should have columns corresponding to chromosome, start,
-#' and end. If the second and third columns are not numeric (as expected for start and end),
-#' the function will attempt to identify the proper columns by matching column names.
-#'
-#' In the output, the first three columns will be renamed to "chrom", "start", and "end".
-#' If a fourth column exists, it is renamed to "name" (and any additional columns are preserved).
-#'
-#' Additionally, if a "name" column exists and its values are not unique, the function
-#' will warn the user. The user can optionally supply a method to automatically fix the
-#' names via the `fix_names` argument:
-#'
-#'   - If `fix_names = "chrom_start_end"`, the new name will be built as "chrom:start-end".
-#'
-#'   - If `fix_names = "concat"`, then the columns specified by `concat_cols` (using the
-#'     original column names in the input data) will be concatenated to form the new name.
-#'     By default, no separator is used, but a separator can be specified via the `sep`
-#'     argument.
-#'
-#' After applying the fix, the function checks if the new names are unique. If they are not,
-#' a warning is issued that includes up to five examples of duplicate names and the row numbers
-#' where they occur.
-#'
-#' @param bed_df A data frame containing the BED data.
-#' @param genome_build A string specifying the genome build ("grch37" or "hg38").
-#'        If NULL, the function will try to infer the genome build from the object name.
-#' @param fix_names Either NULL (the default), or one of "chrom_start_end" or "concat".
-#'        If not NULL and duplicate names are detected, the function will apply the chosen fix.
-#' @param concat_cols When `fix_names = "concat"`, a character vector specifying which columns
-#'        from the original data to merge.
-#' @param sep The separator to use when concatenating columns if fix_names = "concat".
-#'        Defaults to "" (no separator).
-#' @return A data frame with class attributes for BED data.
-#' 
-#' @export
-#' 
-#' @examples
-#' 
-#' # get a abed_data object for all aSHM regions
-#' ashm_bed = create_bed_data(GAMBLR.data::grch37_ashm_regions,
-#'                 fix_names = "concat",
-#'                 concat_cols = c("gene","region"),
-#'                 sep="-")
-#' # the build is automatically inferred if it is in the variable name
-#' # get_genome_build(ashm_bed)
-#' # [1] "grch37"
-#' 
-#' another_bed = create_bed_data(somatic_hypermutation_locations_GRCh37_v_latest,
-#'                               fix_names = "concat",
-#'                               concat_cols = c("chr_name","hg19_start","hg19_end"))
-#' 
-#' # get_genome_build(another_bed)
-#' # [1] "grch37"
-#' 
-#' # get a bed_data object for all gene regions and combine several columns to make a unique name
-#' gene_regions <- create_bed_data(hg38_gene_coordinates,
-#'                     fix_names = "concat",
-#'                     sep="-",
-#'                     concat_cols = c("chromosome","start","end","gene_name"))
-#'                     
-#' #get_genome_build(gene_regions)
-#' # [1] "hg38"                     
-#' 
-#' 
-create_bed_data <- function(bed_df,
-                            genome_build = NULL,
-                            fix_names = NULL,
-                            concat_cols = NULL,
-                            sep = "") {
-  # Check that input is a data frame.
-  if (!inherits(bed_df, "data.frame")) {
-    stop("Input data must be a data frame")
-  }
-  
-  # Capture the original data and column names (before any reordering or renaming)
-  orig_df <- bed_df
-  orig_names <- names(bed_df)
-  
-  # If genome_build is not provided, attempt to infer it from the object name.
-  if (is.null(genome_build)) {
-    object_name <- deparse(substitute(bed_df))
-    possible_builds <- character(0)
-    
-    if (grepl("grch37", object_name, ignore.case = TRUE)) {
-      possible_builds <- c(possible_builds, "grch37")
-    }
-    if (grepl("hg38", object_name, ignore.case = TRUE)) {
-      possible_builds <- c(possible_builds, "hg38")
-    }
-    
-    if (length(possible_builds) == 1) {
-      genome_build <- possible_builds
-    } else if (length(possible_builds) == 0) {
-      stop("Could not determine genome build from object name; please supply genome_build argument.")
-    } else {
-      stop("Ambiguous genome build in object name; please supply genome_build argument explicitly.")
-    }
-  }
-  
-  # Validate genome build.
-  if (!genome_build %in% c("grch37", "hg38")) {
-    stop("Invalid genome build. Please choose either 'grch37' or 'hg38'.")
-  }
-  
-  # Helper function to force column naming for the BED data.
-  force_bed_column_names <- function(df) {
-    new_names <- names(df)
-    # Force first three columns to be "chrom", "start", "end"
-    new_names[1:3] <- c("chrom", "start", "end")
-    # If there's a fourth column, force it to "name"
-    if (ncol(df) >= 4) {
-      new_names[4] <- "name"
-    }
-    names(df) <- new_names
-    return(df)
-  }
-  
-  # Check if the first three columns (as supplied) are in the expected form.
-  # We expect columns 2 and 3 (start and end) to be numeric.
-  if (ncol(bed_df) >= 3 && is.numeric(bed_df[[2]]) && is.numeric(bed_df[[3]])) {
-    # The data is assumed to be in the correct order.
-    bed_df <- force_bed_column_names(bed_df)
-  } else {
-    # Attempt to guess the proper columns based on names.
-    names_lower <- tolower(names(bed_df))
-    
-    chrom_idx <- which(names_lower %in% c("chrom", "chromosome"))
-    start_idx <- which(names_lower %in% c("start", "start_position", "startpos"))
-    end_idx   <- which(names_lower %in% c("end", "end_position", "endpos"))
-    
-    if (length(chrom_idx) != 1 || length(start_idx) != 1 || length(end_idx) != 1) {
-      stop("Columns 2 and 3 (start and end) are not numeric and the chromosome/start/end columns ",
-           "cannot be unambiguously identified from the column names.")
-    }
-    
-    # Reorder the data frame so that the candidate columns come first.
-    remaining_idx <- setdiff(seq_len(ncol(bed_df)), c(chrom_idx, start_idx, end_idx))
-    new_order <- c(chrom_idx, start_idx, end_idx, remaining_idx)
-    bed_df <- bed_df[, new_order, drop = FALSE]
-    
-    # After reordering, check that the new second and third columns are numeric.
-    if (!is.numeric(bed_df[[2]]) || !is.numeric(bed_df[[3]])) {
-      stop("After reordering based on column names, the start and end columns are not numeric.")
-    }
-    
-    # Force the first three (and optionally the fourth) column names.
-    bed_df <- force_bed_column_names(bed_df)
-  }
-  
-  # If a "name" column exists, check that its values are unique.
-  if (ncol(bed_df) >= 4) {
-    if (anyDuplicated(bed_df[[4]]) > 0) {
-      # If no fix is provided, issue a generic warning.
-      if (is.null(fix_names)) {
-        warning("The values in the 'name' column are not unique.")
-      } else {
-        # Apply the requested fix.
-        if (fix_names == "chrom_start_end") {
-          new_names_vec <- paste0(bed_df$chrom, ":", bed_df$start, "-", bed_df$end)
-          bed_df[[4]] <- new_names_vec
-          if (length(unique(new_names_vec)) != nrow(bed_df)) {
-            # Identify duplicate examples.
-            dup_idx <- which(duplicated(new_names_vec) | duplicated(new_names_vec, fromLast = TRUE))
-            dup_names <- unique(new_names_vec[dup_idx])
-            dup_info <- sapply(dup_names, function(nm) {
-              rows <- which(new_names_vec == nm)
-              paste0(nm, " (rows: ", paste(rows, collapse = ", "), ")")
-            })
-            warning("The 'chrom_start_end' fix did not result in a unique set of names. Examples: ",
-                    paste(dup_info[1:min(5, length(dup_info))], collapse = "; "),
-                    ". Please review your data or consider an alternative fix.")
-          }
-        } else if (fix_names == "concat") {
-          if (is.null(concat_cols)) {
-            stop("For fix_names = 'concat', you must supply concat_cols indicating which columns to merge.")
-          }
-          if (!is.character(concat_cols)) {
-            stop("For fix_names = 'concat', concat_cols must be a character vector referring to the original column names.")
-          }
-          if (!all(concat_cols %in% orig_names)) {
-            stop("One or more column names specified in concat_cols do not exist in the original data.")
-          }
-          # Build new names using the original data.
-          # Use paste with the specified separator.
-          new_names_vec <- do.call(paste, c(orig_df[, concat_cols, drop = FALSE], sep = sep))
-          bed_df[[4]] <- new_names_vec
-          if (length(unique(new_names_vec)) != nrow(bed_df)) {
-            dup_idx <- which(duplicated(new_names_vec) | duplicated(new_names_vec, fromLast = TRUE))
-            dup_names <- unique(new_names_vec[dup_idx])
-            dup_info <- sapply(dup_names, function(nm) {
-              rows <- which(new_names_vec == nm)
-              paste0(nm, " (rows: ", paste(rows, collapse = ", "), ")")
-            })
-            warning("The 'concat' fix did not result in a unique set of names. Examples: ",
-                    paste(dup_info[1:min(5, length(dup_info))], collapse = "; "),
-                    ". Please review your data or consider an alternative fix.")
-          }
-        } else {
-          stop("Invalid value for fix_names. Use 'chrom_start_end' or 'concat'.")
-        }
-      }
-    }
-  }
-  # enforce strict matching of chr prefixing
-  if(genome_build == "grch37"){
-    if(any(grepl("chr",bed_df$chrom))){
-      bed_df = mutate(bed_df,chrom = gsub("chr", "", chrom))
-    }
-  }
-  # Create the S3 object with additional class attributes and genome_build attribute.
-  structure(bed_df,
-            class = c("bed_data", "genomic_data", class(bed_df)),
-            genome_build = genome_build)
-}
-
-#' @export
-print.bed_data <- function(x, ...) {
-  cat("BED Data Object\n")
-  cat("Genome Build:", attr(x, "genome_build"), "\n")
-  cat("Showing first 10 rows:\n")
-  # Convert to a plain data.frame (if not already) so that printing uses the default
-  # data.frame print method rather than printing as a list.
-  print(utils::head(as.data.frame(x), 10))
-}
-
-
diff --git a/R/get_ashm_count_matrix.R b/R/get_ashm_count_matrix.R
deleted file mode 100644
index 6f4fd81..0000000
--- a/R/get_ashm_count_matrix.R
+++ /dev/null
@@ -1,136 +0,0 @@
-#' @title Get ASHM Count Matrix.
-#'
-#' @description Prepare a matrix with one row per sample and one column per
-#' region using a set of hypermutated regions.
-#'
-#' @details Values are the number of mutations in that patient in the region.
-#'
-#' @param regions_bed A bed file with one row for each region.
-#' @param these_samples_metadata This is used to complete your matrix. All GAMBL
-#'      samples will be used by default. Provide a data frame with at least
-#'      sample_id for all samples if you are using non-GAMBL data.
-#' @param this_seq_type The seq type to return results for. Only used if no
-#'      metadata is provided with these_samples_metadata.
-#' @param projection Which genome build to use for the mutations 
-#' (must match the coordinate system your regions to avoid a nonsense result)
-#'
-#' @return matrix
-#'
-#' @import dplyr tibble
-#' @export
-#'
-#' @examples
-#' regions_bed = create_bed_data(GAMBLR.data::grch37_ashm_regions,
-#'                               fix_names="concat",
-#'                               concat_cols=c("gene","region"),
-#'                               sep="-")
-#' my_meta = get_gambl_metadata() %>% dplyr::filter(pathology=="DLBCL")
-#' matrix <- get_ashm_count_matrix(
-#'      regions_bed = regions_bed,
-#'      this_seq_type = "genome"
-#' )
-#'
-#' #this example intentionally fails 
-#'  matrix <- get_ashm_count_matrix(regions_bed=regions_bed,this_seq_type = "genome",
-#'                             these_samples_metadata = my_meta,
-#'                             projection = "hg38")
-#' # Error in get_ashm_count_matrix(
-#' # Your projection argument does not match the genome_build of regions_bed
-#' 
-#' # format the name column to include the chromosome coordinates instead of the gene
-#' regions_bed = create_bed_data(GAMBLR.data::hg38_ashm_regions,
-#'                            fix_names="concat",
-#'                            concat_cols=c("chr_name","hg38_start","hg38_end"),
-#'                            sep="-")
-#'                            
-#'  matrix_hg38 <- get_ashm_count_matrix(regions_bed=regions_bed,this_seq_type = "genome",
-#'                             these_samples_metadata = my_meta,
-#'                             projection = "hg38")
-#'
-get_ashm_count_matrix = function(
-        regions_bed,
-        these_samples_metadata,
-        this_seq_type,
-        projection = "grch37"
-    ){
-    if(missing(this_seq_type)){
-        if(missing(these_samples_metadata)){
-            stop(
-                "Please supply either the this_seq_type or a metadata from which it can be retrieved"
-            )
-        }
-        this_seq_type <- these_samples_metadata %>%
-            pull(seq_type) %>%
-            unique()
-    }
-
-    if(missing(regions_bed)){
-        message(
-            "Using aSHM regions in grch37 genome_build as regions_bed"
-        )
-        if(projection=="grch37"){
-          regions_bed <- GAMBLR.data::grch37_ashm_regions %>%
-            mutate(name = paste(gene, region, sep = "_")) %>%
-            create_bed_data(genome_build = projection)
-        }else if(projection=="hg38"){
-          regions_bed <- GAMBLR.data::hg38_ashm_regions %>%
-            mutate(name = paste(gene, region, sep = "_")) %>%
-            create_bed_data(genome_build = projection)
-        }else{
-          stop(paste("unsupported genome build",projection))
-        }
-        
-    }else{
-      if("bed_data" %in% class(regions_bed)){
-        if(!get_genome_build(regions_bed)==projection){
-          stop(paste("Your genome_build argument does not match the genome_build of regions_bed",get_genome_build(regions_bed),genome_build))
-        }
-      }
-    }
-
-    
-
-    if(missing(these_samples_metadata)){
-        all_meta <- get_gambl_metadata(
-            seq_type_filter=this_seq_type
-        ) %>%
-        dplyr::select(sample_id)
-    }else{
-        all_meta <- these_samples_metadata %>%
-            dplyr::select(sample_id)
-    }
-  
-    ashm_maf <- get_ssm_by_regions(
-      regions_bed = regions_bed,
-      streamlined = TRUE,
-      these_samples_metadata = these_samples_metadata,
-      use_name_column = TRUE,
-      projection = projection
-    )
-    # Not sure why this was necessary. Possibly because it's also a data.table?
-    ashm_maf = strip_genomic_classes(ashm_maf)
-
-    ashm_counted <- ashm_maf %>%
-      group_by(sample_id, region) %>%
-      tally()
-
-    
-    #fill out all combinations so we can get the cases with zero mutations
-    eg <- expand_grid(
-        sample_id = pull(all_meta, sample_id),
-        region = unique(ashm_counted$region)
-    )
-    all_counts <- left_join(eg, ashm_counted) %>%
-        mutate(n = replace_na(n, 0)) %>%
-        unique() #not sure where the duplicates are coming from but its annoying
-
-    all_counts_wide <- pivot_wider(
-        all_counts,
-        id_cols = sample_id,
-        names_from = region,
-        values_from = n
-    ) %>%
-        column_to_rownames(var = "sample_id")
-
-    return(all_counts_wide)
-}
diff --git a/R/get_cn_segments.R b/R/get_cn_segments.R
deleted file mode 100644
index c3cecb0..0000000
--- a/R/get_cn_segments.R
+++ /dev/null
@@ -1,89 +0,0 @@
-## GAMBLR.data
-#' Create Segmented Data
-#'
-#' This function creates segmented data from the given input.
-#'
-#' @param seg_df A data frame containing the segmented data.
-#' @param genome_build A string specifying the genome build ("grch37" or "hg38").
-#' @return A data frame with class attributes for segmented data.
-#' @export
-#' @examples
-#' seg_df <- data.frame(...)
-#' create_seg_data(seg_df, "grch37")
-create_seg_data <- function(seg_df, genome_build) {
-  if (!inherits(seg_df, "data.frame")) stop("data must be a data frame")
-  if (!genome_build %in% c("grch37", "hg38")) stop("Invalid genome build")
-  
-  structure(seg_df, 
-            class = c("seg_data", class(seg_df)), 
-            genome_build = genome_build)
-}
-
-#' @title Get CN Segments.
-#'
-#' @description Retrieve all copy number segments from the GAMBL outputs
-#'
-#' @details This function merely loads and returns all the seg_data available for a projection (genome build)
-#' @param these_samples_metadata User must provide a metadata table to restrict the data to the samples in your table. 
-#' The metadata also ensures the proper handling of duplicate sample_id across seq_types and ensures the 
-#' seq_type in the metadata faithfully represents the seq_type of the data
-#' @param projection Desired genome coordinate system for returned CN segments. Default is "grch37".
-#' @param this_seq_type Deprecated.
-#' @param ... Additional parameters to be passed to the function.
-#'
-#' @return A data frame with CN segments for the specified region.
-#'
-#' @import dplyr
-#' @export
-#'
-#' @examples
-#' # Example for the capture samples:
-#' 
-#' genome_metadata = GAMBLR.data::get_gambl_metadata(seq_type_filter="genome") 
-#'                       
-#' genome_segments_hg38 = get_cn_segments(
-#'                              these_samples_metadata = genome_metadata,
-#'                              projection="hg38")
-#'
-#'
-get_cn_segments = function(these_samples_metadata,
-                           projection = "grch37",
-                           this_seq_type,...){
-  #warn/notify the user what version of this function they are using
-  message("Using the bundled CN segments (.seg) calls in GAMBLR.data...")
-
-  #check if any invalid parameters are provided
-  check_excess_params(...)
-
-  #get valid projections
-  valid_projections = grep("meta", names(GAMBLR.data::sample_data), value = TRUE, invert = TRUE)
-
-  metadata = these_samples_metadata
-
-  sample_ids = metadata$sample_id
-  #return CN segments based on the selected projection
-  if(projection %in% valid_projections){
-    all_segs = GAMBLR.data::sample_data[[projection]]$seg %>%
-      dplyr::filter(ID %in% sample_ids)
-  }else{
-    stop(paste("please provide a valid projection. The following are available:",
-               paste(valid_projections,collapse=", ")))
-  }
-
-  #ensure chr prefixes are there when necessary 
-  if(projection=="grch37"){
-    if(grepl("chr",all_segs$chrom[1])){
-      all_segs = all_segs %>%
-        dplyr::mutate(chrom = gsub("chr", "", chrom))
-    }
-  }else{
-    if(!grepl("chr",all_segs$chrom[1])){
-      all_segs = all_segs %>%
-        dplyr::mutate(chrom = paste0("chr", chrom))
-    }
-  }
-
-  #return S3 class with CN segments and genome_build 
-  all_segs = create_seg_data(all_segs,projection)
-  return(all_segs)
-}
diff --git a/R/get_coding_ssm.R b/R/get_coding_ssm.R
deleted file mode 100644
index 306ff4f..0000000
--- a/R/get_coding_ssm.R
+++ /dev/null
@@ -1,134 +0,0 @@
-
-#' @title Get Coding SSMs
-#'
-#' @description Convenience function for loading coding Simple Somatic Mutations
-#'      (SSM) from the bundled data [GAMBLR.data::sample_data].
-#'
-#' @details This "bare bones" function was developed to retrieve coding SSM
-#'      calls for non-GSC-users. Effectively retrieve coding SSM calls. Multiple
-#'      filtering parameters are available for this function. For more
-#'      information on how to implement the filtering parameters, refer to the
-#'      parameter descriptions as well as examples in the vignettes. This
-#'      function depends on the bundled sample data in this package.
-#'
-#' @param these_sample_ids Optional, a vector of multiple sample_id (or a single
-#'      sample ID as a string) that you want results for.
-#' @param these_samples_metadata Optional, a metadata table (with sample IDs in
-#'      a column) to subset the return to. If not provided (and if
-#'      `these_sample_ids` is not provided), the function will return all
-#'      samples from the specified seq_type in the metadata.
-#' @param projection Reference genome build for the coordinates in the MAF file.
-#'      The default is grch37.
-#' @param this_seq_type The this_seq_type you want back, default is genome.
-#' @param min_read_support Only returns variants with at least this many reads
-#'      in t_alt_count.
-#' @param include_silent Logical parameter indicating whether to include silent
-#'      mutations into coding mutations. Default is TRUE.
-#' @param verbose Set to FALSE to minimize the output to console. Default is
-#'      TRUE. This parameter also dictates the verbosity of any helper function
-#'      internally called inside the main function.
-#' @param tool_name Optionally specify which tool to report variant from. The
-#'      default is slms-3, also supports "publication" to return the exact
-#'      variants as reported in the original papers.
-#' @param ... Any additional parameters.
-#'
-#' @return data frame
-#'
-#' @import dplyr
-#'
-#' @export
-#'
-#' @examples
-#'
-#'  # Get mutations from exome data originally aligned to grch37
-#' ssm_exomes_grch37 = get_coding_ssm(projection = "grch37",this_seq_type = "capture")
-#' 
-#' # Get mutations from genome data, hg38 build
-#' ssm_genomes_hg38 = get_coding_ssm(projection = "hg38",this_seq_type = "genome")
-#'
-#' 
-#'
-#'
-get_coding_ssm = function(
-    these_sample_ids = NULL,
-    these_samples_metadata = NULL,
-    projection = "grch37",
-    this_seq_type = "genome",
-    tool_name = "slms-3",
-    min_read_support = 3,
-    include_silent = TRUE,
-    verbose = FALSE,
-    ...
-){
-
-    # Warn/notify the user what version of this function they are using
-    message("Using the bundled SSM calls (.maf) calls in GAMBLR.data...")
-
-    #check if any invalid parameters are provided
-    check_excess_params(...)
-
-    # Get valid projections
-    valid_projections = grep(
-        "meta",
-        names(GAMBLR.data::sample_data),
-        value = TRUE,
-        invert = TRUE
-    )
-
-    #get samples with the dedicated helper function
-    metadata = id_ease(
-        these_samples_metadata = these_samples_metadata,
-        these_sample_ids = these_sample_ids,
-        verbose = verbose,
-        this_seq_type = this_seq_type
-    )
-
-    sample_ids = metadata$sample_id
-
-
-    if(!projection %in% valid_projections){
-        stop(
-            paste(
-                "Provide a valid projection. The following are available:",
-                paste(
-                    valid_projections,
-                    collapse = ", "
-                )
-            )
-        )
-    }
-
-    #return SSMs based on the selected projection
-    muts = GAMBLR.data::sample_data[[projection]]$maf %>% 
-        dplyr::filter(Tumor_Sample_Barcode %in% sample_ids) %>%
-        dplyr::filter((tolower(!!sym("Pipeline")) == tool_name))
-    
-    if(!include_silent){
-        coding_class = coding_class[coding_class != "Silent"]
-    }
-
-    sample_ids = pull(metadata, sample_id)
-
-    # Drop variants with low read support (default is 3),
-    # enforce sample IDs and keep only coding variants
-    muts = dplyr::filter(muts, t_alt_count >= min_read_support) %>%
-        dplyr::filter(Tumor_Sample_Barcode %in% sample_ids) %>%
-        dplyr::filter(Variant_Classification %in% coding_class)
-
-    # Filter maf on selected sample ids
-    muts = muts %>%
-        dplyr::filter(Tumor_Sample_Barcode %in% sample_ids)
-
-    mutated_samples = length(unique(muts$Tumor_Sample_Barcode))
-    message(
-        paste(
-            "after linking with metadata, we have mutations from",
-            mutated_samples,
-            "samples"
-        )
-    )
-    muts = create_maf_data(muts,projection)
-    # use S3-safe version of dplyr function
-    muts = mutate.genomic_data(muts,maf_seq_type = this_seq_type)
-    return(muts)
-}
diff --git a/R/get_coding_ssm_status.R b/R/get_coding_ssm_status.R
deleted file mode 100644
index 979c1d4..0000000
--- a/R/get_coding_ssm_status.R
+++ /dev/null
@@ -1,311 +0,0 @@
-#' @title Get Coding SSM Status.
-#'
-#' @description Tabulate mutation status (SSM) for a set of genes.
-#'
-#' @details This function takes a data frame (in MAF-like format) and converts
-#' it to a binary one-hot encoded matrix of mutation status for either a set of
-#' user-specified genes (via gene_symbols) or, if no genes are provided, default
-#' to all lymphoma genes. The default behaviour is to assign each gene/sample_id
-#' combination as mutated only if there is a protein coding mutation for that
-#' sample in the MAF but this can be configured to use synonymous variants in
-#' some (via include_silent_genes) or all (via include_silent) genes.
-#' This function also has other filtering and convenience parameters giving
-#' the user full control of the return. For more information, refer to the
-#' parameter descriptions and examples.
-#' Currently only the grch37 genome build is supported for hotspot annotation
-#' and review for this version of the function.
-#'
-#' @param gene_symbols A vector of gene symbols for which the mutation status
-#'      will be tabulated. If not provided, lymphoma genes will be returned
-#'      by default.
-#' @param these_samples_metadata The metadata for samples of interest to be
-#'      included in the returned matrix. Only the column "sample_id" is
-#'      required. If not provided, the example metadata is used as default.
-#' @param maf_data data frame in maf format. Must be in the grch37 projection.
-#' @param include_hotspots Logical parameter indicating whether hotspots object
-#'      should also be tabulated. Default is TRUE.
-#' @param keep_multihit_hotspot Logical parameter indicating whether to keep the
-#'      gene annotation as mutated when the gene has both hot spot and
-#'      non-hotspot mutation. Default is FALSE. If set to TRUE, will report the
-#'      number of non-hotspot mutations instead of tabulating for just mutation
-#'      presence.
-#' @param review_hotspots Logical parameter indicating whether hotspots object
-#'      should be reviewed to include functionally relevant mutations or rare
-#'      lymphoma-related genes. Default is TRUE.
-#' @param genes_of_interest A vector of genes for hotspot review. Currently only
-#'      FOXO1, MYD88, and CREBBP are supported.
-#' @param genome_build Reference genome build for the coordinates in the MAF
-#'      file. The default is inferred from maf_data. 
-#' @param include_silent Logical parameter indicating whether to include silent
-#'      mutations into coding mutations. Default is FALSE.
-#' @param include_silent_genes Optionally, provide a list of genes for which the
-#'      Silent variants to be considered. If provided, the Silent variants for
-#'      these genes will be included regardless of the include_silent argument.
-#' @param ... Any other parameter. These parameters will be ignored.
-#'
-#' @return A data frame with tabulated mutation status.
-#'
-#' @import dplyr tidyr
-#' @export
-#'
-#' @examples
-#' coding_tabulated_df = get_coding_ssm_status(
-#'  maf_data = get_coding_ssm(),
-#'  gene_symbols = c("EZH2","KMT2D","CREBBP","MYC")
-#' )
-#'
-#' 
-#'
-#' #all lymphoma genes from bundled NHL gene list
-#' coding_tabulated_df = get_coding_ssm_status()
-#' 
-#' #this example will fail because hg38 is not supported by this function (yet)
-#' coding_tabulated_df = get_coding_ssm_status(maf_data=
-#'                         get_coding_ssm(projection = "hg38"))
-#' # Error in get_coding_ssm_status(maf_data = get_coding_ssm(projection = "hg38")) : 
-#' # Currently only grch37 projection (hg19 genome build) is supported.
-#'
-get_coding_ssm_status = function(
-        gene_symbols,
-        these_samples_metadata,
-        maf_data,
-        include_hotspots = TRUE,
-        keep_multihit_hotspot = FALSE,
-        review_hotspots = TRUE,
-        genes_of_interest = c("FOXO1", "MYD88", "CREBBP"),
-        genome_build,
-        include_silent = FALSE,
-        include_silent_genes,
-        ...
-    ){
-    if(missing(maf_data)){
-      stop("maf_data is required")
-    }
-    # check if any invalid parameters are provided
-    check_excess_params(...)
-    if("maf_data" %in% class(maf_data)){
-      if(missing(genome_build)){
-        genome_build = get_genome_build(maf_data)
-      }else{
-        if(!genome_build == get_genome_build(maf_data)){
-          stop("you have specified a genome_build that doesn't match the genome_build attached to maf_data")
-        }
-      }
-    }
-    # check the projection
-    if(!genome_build == "grch37"){
-        stop(
-            "Currently only grch37 projection (hg19 genome build) is supported."
-        )
-    }
-
-    if(missing(gene_symbols)){
-        message(
-            "No gene_symbols provided, defaulting to all lymphoma genes."
-        )
-        gene_symbols <- GAMBLR.data::lymphoma_genes$Gene
-    }
-
-    if(!missing(include_silent_genes)){
-        message(
-            strwrap(
-                prefix = " ",
-                initial = "",
-                "Output will include all genes specified in gene_symbols
-                and include_silent_genes parameters."
-            )
-        )
-        gene_symbols <- c(
-            gene_symbols,
-            include_silent_genes
-        ) %>%
-        unique()
-    }
-
-    if(missing(these_samples_metadata)){
-        these_samples_metadata <- get_gambl_metadata()
-    }
-
-    coding_var <- c(
-        "Frame_Shift_Del", "Frame_Shift_Ins", "In_Frame_Del",
-        "In_Frame_Ins", "Missense_Mutation", "Nonsense_Mutation",
-        "Nonstop_Mutation", "Splice_Region", "Splice_Site",
-        "Targeted_Region", "Translation_Start_Site"
-    )
-
-    if(include_silent){
-        message("Including Synonymous variants for all genes...")
-        coding_var <- c(coding_var, "Silent")
-    }
-
-    if(missing(include_silent_genes)){
-        coding_ssm <- maf_data %>%
-            dplyr::filter(
-                Variant_Classification %in% coding_var
-            )
-    } else {
-        message(
-            strwrap(
-                prefix = " ",
-                initial = "", 
-                "You have provided gene list with argument include_silent_genes.
-                The Silent variants will be included even if the include_silent
-                argument is set to FALSE.
-                "
-            )
-        )
-        coding_ssm <- maf_data %>%
-            dplyr::filter(
-                Variant_Classification %in% coding_var |
-                (
-                    Hugo_Symbol %in% include_silent_genes &
-                    Variant_Classification == "Silent"
-                )
-            )
-    }
-
-    coding <- coding_ssm %>%
-        dplyr::filter(
-            Hugo_Symbol %in% gene_symbols
-        ) %>%
-        dplyr::select(Tumor_Sample_Barcode, Hugo_Symbol) %>%
-        dplyr::rename(
-            "sample_id" = "Tumor_Sample_Barcode",
-            "gene" = "Hugo_Symbol"
-        ) %>%
-        unique() %>%
-        dplyr::mutate(mutated = 1)
-
-    samples_table <- dplyr::select(
-        these_samples_metadata,
-        sample_id
-    )
-    wide_coding <- pivot_wider(
-        coding,
-        names_from = "gene",
-        values_from = "mutated",
-        values_fill = 0
-    )
-    all_tabulated <- left_join(
-        samples_table,
-        wide_coding
-    )
-    all_tabulated <- all_tabulated %>%
-        replace(is.na(.), 0)
-
-    # include hotspots if user chooses to do so
-    if(include_hotspots){
-        # first annotate
-        annotated <- GAMBLR.data::annotate_hotspots(
-            coding_ssm
-        )
-
-        # review for the supported genes
-        if(review_hotspots){
-            annotated = review_hotspots(
-                annotated,
-                genes_of_interest = genes_of_interest,
-                genome_build = genome_build
-            )
-        }
-
-        message("annotating hotspots")
-
-        hotspots <- annotated %>%
-            dplyr::filter(Hugo_Symbol %in% genes_of_interest) %>%
-            dplyr::select(Tumor_Sample_Barcode, Hugo_Symbol, hot_spot) %>%
-            dplyr::rename(
-                "sample_id" = "Tumor_Sample_Barcode",
-                "gene" = "Hugo_Symbol"
-            ) %>%
-            dplyr::mutate(gene = paste0(gene, "HOTSPOT")) %>%
-            unique() %>%
-            dplyr::mutate(mutated = ifelse(hot_spot == "TRUE", 1, 0)) %>%
-            dplyr::filter(mutated == 1) %>%
-            dplyr::select(-hot_spot)
-
-        # long to wide hotspots, samples are tabulated with 0 if no hotspot is detected
-        wide_hotspots <- pivot_wider(
-            hotspots,
-            names_from = "gene",
-            values_from = "mutated",
-            values_fill = 0
-        )
-        # join with the ssm object
-        all_tabulated <- left_join(
-            all_tabulated,
-            wide_hotspots
-        )
-        all_tabulated <- all_tabulated %>%
-            replace(is.na(.), 0)
-
-        all_tabulated <- all_tabulated %>%
-            dplyr::select(where(~ any(. != 0)))
-
-        all_tabulated <- as.data.frame(all_tabulated)
-        # make SSM and hotspots non-redundant by giving priority
-        # to hotspot feature and setting SSM to 0
-        for (hotspot_site in colnames(wide_hotspots)[grepl("HOTSPOT", colnames(wide_hotspots))]){
-            message(hotspot_site)
-            this_gene <- gsub("HOTSPOT", "", hotspot_site)
-            redundant_features <- all_tabulated %>%
-                dplyr::select(starts_with(this_gene))
-
-            # if not both the gene and the hotspot are present, go to
-            # the next iteration
-            if(ncol(redundant_features)!= 2) next
-            message("OK")
-            # if both gene and it's hotspot are in the matrix, give priority to hotspot feature
-            all_tabulated[(all_tabulated[, this_gene] >0 & all_tabulated[, paste0(this_gene, "HOTSPOT")] == 1),][,c(this_gene, paste0(this_gene, "HOTSPOT"))][, this_gene] = 0
-
-            # in case gene has both hotspot and another mutation in the same gene,
-            # keep both tabulated as multihits
-            if(keep_multihit_hotspot){
-                # determine which samples have hot spot and another mutation in same gene
-                multihits <- annotated %>%
-                    dplyr::filter(Hugo_Symbol == this_gene) %>%
-                    group_by(Tumor_Sample_Barcode) %>%
-                    dplyr::mutate(n_mut = n()) %>%
-                    dplyr::filter(
-                        n_mut > 1
-                    ) %>%
-                    dplyr::distinct(Tumor_Sample_Barcode, n_mut, hot_spot) %>%
-                    # account for cases with both hotspot and not hotspot to avoid
-                    # double-counting the number of mutations
-                    mutate_at(vars(hot_spot), ~replace_na(., "FALSE")) %>%
-                    dplyr::mutate(
-                        n_mut = ifelse(
-                            hot_spot == "TRUE",
-                            n_mut - 1,
-                            n_mut
-                        )
-                    ) %>%
-                    group_by(Tumor_Sample_Barcode) %>%
-                    dplyr::arrange(n_mut) %>%
-                    slice_head() %>%
-                    ungroup %>%
-                    select(-hot_spot)
-
-                # Return the annotation of this gene to mutated in these samples
-                all_tabulated <- all_tabulated %>%
-                    left_join(
-                        .,
-                        multihits,
-                        by = c("sample_id" = "Tumor_Sample_Barcode")
-                    ) %>%
-                    dplyr::mutate(
-                        {{this_gene}} := ifelse(
-                                !is.na(n_mut),
-                                n_mut,
-                                !!!syms(this_gene)
-                            )
-                    ) %>%
-                    select(- n_mut)
-            }
-
-        }
-
-    }
-    return(all_tabulated)
-
-}
\ No newline at end of file
diff --git a/R/get_gambl_metadata.R b/R/get_gambl_metadata.R
deleted file mode 100644
index a2471d2..0000000
--- a/R/get_gambl_metadata.R
+++ /dev/null
@@ -1,145 +0,0 @@
-#' @title Get GAMBL Metadata.
-#'
-#' @description Convenience function for loading the sample metadata.
-#'
-#' @details This bare bones function was developed to retrieve metadata for
-#' non-GSC-users. Specify the seq type (`seq_type_filter`) for the samples you
-#' want returned as the only argument.
-#' It relies on the bundled metadata in this package.
-#' Specify `case_set` argument to retreive samples from particular study.
-#' Currently supported case_sets are: FL_Dreval (FL samples from Dreval et al),
-#' DLBCL_Dreval (DLBCL samples from Dreval et al), FL-DLBCL-study (all samples
-#' from Dreval et al), DLBCL_Arthur (all samples from Arthur et al study),
-#' DLBCL_Hilton (all samples from Hilton et al DLBCL Trios study),
-#' DLBCL_cell_lines (5 DLBCL cell lines), DLBCL_Chapuy (all samples from Chapuy
-#' et al study), DLBCL_Schmitz (all samples from Schmitz et al study),
-#' DLBCL_Reddy (all samples from Reddy et al study), DLBCL_Thomas (HTMCP DLBCLs
-#' from Thomas et al study), BL_Thomas (BL samples from Thomas et al study)
-#'
-#' @param seq_type_filter Specify the seq type you want to return metadata for.
-#' Default is "genome".
-#' @param case_set Optionally specify study details to return samples from a
-#' particular case set. See function description for supported case sets.
-#' @param ... Any additional parameters.
-#'
-#' @return A data frame with metadata, tailored for user without GSC access.
-#'
-#' \describe{
-#'   \item{compression}{Format of the original data used as input for our analysis pipelines (cram, bam or fastq)}
-#'   \item{bam_available}{Whether or not this file was available when last checked.}
-#'   \item{patient_id}{The anonymized unique identifier for this patient. For BC samples, this will be Res ID.}
-#'   \item{sample_id}{A unique identifier for the sample analyzed.}
-#'   \item{seq_type}{The assay type used to produce this data (one of "genome","capture, "mrna", "promethION")}
-#'   \item{genome_build}{The name of the genome reference the data were aligned to.}
-#'   \item{cohort}{Name for a group of samples that were added together (usually from a single study), often in the format {pathology}_{cohort_descriptor}.}
-#'   \item{pathology}{The diagnosis or pathology for the sample}
-#'   \item{time_point}{Timing of biopsy in increasing alphabetical order (A = diagnosis, B = first relapse etc)}
-#'   \item{ffpe_or_frozen}{Whether the nucleic acids were extracted from a frozen or FFPE sample}
-#'   \item{COO_consensus}{Consensus call of COO between different sources.}
-#'   \item{DHITsig_consensus}{Consensus call of DHIT signature status between different sources.}
-#'   \item{EBV_status_inf}{Inferred EBV status of the tumor}
-#'   \item{lymphgen_no_cnv}{LymphGen label using model without CNV}
-#'   \item{lymphgen_with_cnv}{LymphGen label using model with CNV}
-#'   \item{lymphgen_cnv_noA53}{LymphGen label using model with CNV but excluding A53 class}
-#'   \item{lymphgen_wright}{The LymphGen call for this sample from Wright et all (if applicable)}
-#'   \item{fl_grade}{Grade of FL samples}
-#'   \item{normal_sample_id}{Sample id for normal tissue used in the analysis}
-#'   \item{pairing_status}{Matching status of the sample}
-#'   \item{lymphgen}{LymphGen label}
-#'   \item{molecular_BL}{label of the sample according to the molecular BL classifier}
-#'   \item{Tumor_Sample_Barcode}{Duplicate of sample_id for simplifying joins to MAF data frames}
-#'   \item{pathology_rank}{Numeric rank for consistent ordering of samples by pathology}
-#'   \item{hiv_status}{HIV status of the sample}
-#'   \item{age_group}{Adult_BL or Pediatric_BL or Other, specific to the BLGSP study}
-#'   \item{sex}{The biological sex of the patient, if available. Allowable options: M, F, NA}
-#' }
-#'
-#' @import dplyr purrr
-#'
-#' @export
-#'
-#' @examples
-#' #return metadata for genome samples
-#' genome_meta = get_gambl_metadata(seq_type_filter = "genome")
-#'
-#' #return metadata for capture samples
-#' capture_meta = get_gambl_metadata(seq_type_filter = "capture")
-#'
-#' #return metadata for genome and capture samples
-#' all_meta = get_gambl_metadata(seq_type_filter = c("genome", "capture"))
-#'
-get_gambl_metadata = function(
-    seq_type_filter = "genome",
-    case_set,
-    ...
-){
-
-    #check if any invalid parameters are provided
-    check_excess_params(...)
-
-    message("Using the bundled metadata in GAMBLR.data...")
-    metadata <- GAMBLR.data::sample_data$meta %>%
-            dplyr::filter(seq_type %in% seq_type_filter)
-
-
-    if(!missing(case_set)){
-
-        # pre-defined case sets
-        if(case_set == "FL_Dreval"){
-            metadata <- metadata %>%
-                dplyr::filter(cohort == "FL_Dreval", pathology == "FL")
-        }else if(case_set == "DLBCL_Dreval"){
-            metadata <- metadata %>%
-                dplyr::filter(cohort == "FL_Dreval", pathology == "DLBCL")
-        }else if(case_set == "FL-DLBCL-study"){
-            metadata <- metadata %>%
-                dplyr::filter(cohort == "FL_Dreval")
-        }else if(case_set == "DLBCL_Arthur"){
-            metadata <- metadata %>%
-                dplyr::filter(cohort == "DLBCL_Arthur")
-        }else if(case_set == "DLBCL_Hilton"){
-            metadata <- metadata %>%
-                dplyr::filter(cohort == "DLBCL_Hilton")
-        }else if(case_set == "DLBCL_cell_lines"){
-            metadata <- metadata %>%
-                dplyr::filter(cohort == "DLBCL_cell_lines")
-        }else if(case_set == "DLBCL_Chapuy"){
-            metadata <- metadata %>%
-                dplyr::filter(cohort == "dlbcl_chapuy")
-        }else if(case_set == "DLBCL_Schmitz"){
-            metadata <- metadata %>%
-                dplyr::filter(cohort == "dlbcl_schmitz")
-        }else if(case_set == "DLBCL_Reddy"){
-            metadata <- metadata %>%
-                dplyr::filter(cohort == "dlbcl_reddy")
-        }else if(case_set == "BL_Thomas"){
-            metadata <- metadata %>%
-                dplyr::filter(cohort == "BL_Thomas")
-        }else if(case_set == "DLBCL_Thomas"){
-            metadata <- metadata %>%
-                dplyr::filter(cohort == "DLBCL_Thomas")
-        }else{
-            message(paste("case set", case_set, "not available"))
-            return()
-        }
-    }
-
-    metadata <- metadata %>%
-        dplyr::left_join(
-            gambl_metadata,
-            by = "sample_id",
-            suffix = c(".X", ".Y")
-        ) %>%
-        split.default(gsub('.[XY]', '', names(.))) %>%
-        purrr::map_dfc( ~ if (ncol(.x) == 1)
-            .x
-            else
-            dplyr::mutate(.x,!!sym(gsub('.X', '', names(
-                .x
-            )[1])) := dplyr::coalesce(!!!syms(names(
-                .x
-            ))))) %>%
-        dplyr::select(!contains("."))
-    #ensure only unique rows are returned
-    return(unique(metadata))
-}
diff --git a/R/get_manta_sv.R b/R/get_manta_sv.R
deleted file mode 100644
index dd9f697..0000000
--- a/R/get_manta_sv.R
+++ /dev/null
@@ -1,164 +0,0 @@
-#' @title Get Manta SVs
-#'
-#' @description Convenience function for retrieving Manta Structural Variants (SVs) from the bundled data [GAMBLR.data::sample_data].
-#'
-#' @details To obtain SV calls for multiple samples, give `these_sample_ids` a vector of sample IDs. 
-#' Alternatively, the user can also provide the `these_samples_metadata` parameter to make use of an already subset metadata table. 
-#' In this case, the returned SVs will be restricted to the sample_ids within that data frame. 
-#' This function internally calls [GAMBLR.data::id_ease] to streamline sample ID/metadata parameters.
-#' This function can also restrict the returned calls to any genomic regions specified within `chromosome`, `qstart`, `qend`,
-#' or the complete region specified under `region` (in chr:start-end format), note that chromosome can be either prefixed or not prefixed.
-#' Useful filtering parameters are also available, use `min_vaf` to set the minimum tumour VAF for a SV to be returned and `min_score`
-#' to set the lowest Manta somatic score for a SV to be returned. `pair_status` can be used to return variants from either matched or unmatched samples.
-#' In addition, the user can chose to return all variants, even the ones not passing the filter criteria. To do so, set `pass = FALSE` (default is TRUE).
-#'
-#' @param these_sample_ids Optional, a vector of multiple sample_id (or a single sample ID as a string) that you want results for.
-#' @param these_samples_metadata Optional, a metadata table (with sample IDs in a column) to subset the return to. 
-#' If not provided (and if `these_sample_ids` is not provided), the function will return all samples from the specified seq_type in the metadata.
-#' @param projection The projection genome build. Default is grch37.
-#' @param this_seq_type The this_seq_type you want back, default is genome.
-#' @param chromosome Optional, the chromosome you are restricting to (can be prefixed or not prefixed).
-#' @param qstart Optional, query start coordinate of the range you are restricting to.
-#' @param qend Optional, query end coordinate of the range you are restricting to.
-#' @param region Optional, region formatted like chrX:1234-5678 (chromosome can be prefixed or not prefixed) instead of specifying chromosome, start and end separately.
-#' @param pairing_status Use to restrict results (if desired) to matched or unmatched results (default is to return all). This parameter takes the filtering condition as a string ("matched" or "unmatched").
-#' @param min_vaf The minimum tumour VAF for a SV to be returned. Default is 0.1.
-#' @param min_score The lowest Manta somatic score for a SV to be returned. Default is 40.
-#' @param pass If TRUE (default) only return SVs that are annotated with PASS in the FILTER column. Set to FALSE to keep all variants, regardless if they PASS the filters.
-#' @param verbose Set to FALSE to minimize the output to console. Default is TRUE. This parameter also dictates the verbosity of any helper function internally called inside the main function.
-#' @param ... Any additional parameters.
-#' 
-#' @export
-#' 
-#' @import dplyr
-#' 
-#' @examples
-#' #load packages
-#' library(dplyr)
-#' 
-#' #lazily get every SV in the table with default quality filters
-#' all_sv = get_manta_sv()
-#'
-#' #get all SVs DLBCL cell line samples
-#' cell_line_meta = GAMBLR.data::sample_data$meta %>% 
-#'   dplyr::filter(cohort == "DLBCL_cell_lines")
-#'   
-#' dlbcl_sv = get_manta_sv(these_samples_metadata = cell_line_meta)
-#'
-#' #get the SVs in a region around MYC
-#' myc_locus_sv = get_manta_sv(region = "8:128723128-128774067")
-#' 
-get_manta_sv = function(these_sample_ids = NULL,
-                        these_samples_metadata = NULL,
-                        projection = "grch37",
-                        this_seq_type = "genome",
-                        chromosome,
-                        qstart,
-                        qend,
-                        region,
-                        pairing_status,
-                        min_vaf = 0.1,
-                        min_score = 40,
-                        pass = TRUE,
-                        verbose = FALSE,
-                        ...){
-  
-  #warn/notify the user what version of this function they are using
-  message("Using the bundled Manta SV (.bedpe) calls in GAMBLR.data...")
-  
-  #check if any invalid parameters are provided
-  check_excess_params(...)
-  
-  #get valid projections
-  valid_projections = grep("meta", names(GAMBLR.data::sample_data), value = TRUE, invert = TRUE)
-  
-  #get samples with the dedicated helper function
-  metadata = id_ease(these_samples_metadata = these_samples_metadata,
-                     these_sample_ids = these_sample_ids,
-                     verbose = verbose,
-                     this_seq_type = this_seq_type)
-  
-  sample_ids = metadata$sample_id
-  
-  #return manta SV based on the selected projection
-  if(projection %in% valid_projections){
-    manta_sv = GAMBLR.data::sample_data[[projection]]$bedpe %>% 
-      dplyr::filter(tumour_sample_id %in% sample_ids)
-  }else{
-    stop(paste("please provide a valid projection. The following are available:",
-               paste(valid_projections,collapse=", ")))
-  }
-  
-  if(!missing(region)){
-    region = gsub(",", "", region)
-    split_chunks = unlist(strsplit(region, ":"))
-    chromosome = split_chunks[1]
-    startend = unlist(strsplit(split_chunks[2], "-"))
-    qstart = startend[1]
-    qend = startend[2]
-  }
-  
-  manta_sv = manta_sv %>%
-    dplyr::filter(VAF_tumour >= min_vaf,
-                  SCORE >= min_score)
-  
-  if(verbose){
-    no_manta = setdiff(metadata$sample_id, manta_sv$tumour_sample_id)
-    
-    if(length(no_manta) > 0){
-      message(paste0("No Manta results found for ", length(no_manta), " samples..."))
-      print(no_manta)
-    }
-  }
-  
-  #deal with chr prefixes based on the selected projection (if return is to be subset to regions...)
-  if(!missing(region) || !missing(chromosome)){
-    if(projection == "grch37"){
-      if(grepl("chr", chromosome)){
-        chromosome = gsub("chr", "", chromosome)
-      }
-    }else if(projection == "hg38"){
-      if(!grepl("chr", chromosome)){
-        chromosome = paste0("chr", chromosome)
-      }
-    }
-    
-    manta_sv = manta_sv %>%
-      dplyr::filter((CHROM_A == chromosome & START_A >= qstart & START_A <= qend) | (CHROM_B == chromosome & START_B >= qstart & START_B <= qend))
-  }
-  
-  if(verbose){
-    message("\nThe following VCF filters are applied;")
-    message(paste0("  Minimum VAF: ", min_vaf))
-    message(paste0("  Minimum Score: ", min_score))
-    message(paste0("  Only keep variants passing the quality filter: ", pass))
-  }
-  
-  #PASS filter
-  if(pass){
-    manta_sv = manta_sv %>%
-      dplyr::filter(FILTER == "PASS")
-  }
-  
-  #pairing status filter
-  if(!missing(pairing_status)){
-    if(verbose){
-      message(paste0("  Pairing status: ", pairing_status))
-    }
-    
-    manta_sv = manta_sv %>%
-      dplyr::filter(pair_status == pairing_status)
-  }
-  
-  #convert to data frame and print some metrics
-  manta_sv = as.data.frame(manta_sv)
-  
-  if(verbose){
-    n_variants = nrow(manta_sv)
-    unique_samples = unique(manta_sv$tumour_sample_id)
-    message(paste0("\nReturning ", n_variants, " variants from ", length(unique_samples), " sample(s)"))
-    message("\nDone!")
-  }
-  
-  return(manta_sv)
-}
diff --git a/R/get_sample_cn_segments.R b/R/get_sample_cn_segments.R
deleted file mode 100644
index 17388ea..0000000
--- a/R/get_sample_cn_segments.R
+++ /dev/null
@@ -1,90 +0,0 @@
-#' @title Get Sample CN Segments.
-#'
-#' @description Get all segments for a single (or multiple) sample_id(s).
-#'
-#' @details This function returns CN segments. This works for single sample or multiple samples.
-#' Specify the sample IDs you are interested in with `these_sample_ids` (as a vector of characters),
-#' Or call this function with `these_samples_metadata` if you already have a metadata table subset to the sample IDs of interest.
-#' If none of the above parameters are specified, the function will return CN segments for available samples (from get_gambl_metadata).
-#' Note, this. function internally calls [GAMBLR.data::id_ease] for dealing with sample IDs and metadata tables. 
-#'
-#' @param these_sample_ids Optional, a vector of multiple sample_id (or a single sample ID as a string) that you want results for.
-#' @param these_samples_metadata Optional, a metadata table (with sample IDs in a column) to subset the return to. 
-#' If not provided (and if `these_sample_ids` is not provided), the function will return all samples from the specified seq_type in the metadata.
-#' @param projection Selected genome projection for returned CN segments. Default is "grch37".
-#' @param this_seq_type Seq type for returned CN segments. Default is genome.
-#' @param with_chr_prefix Set to TRUE to add a chr prefix to chromosome names. Default is FALSE.
-#' @param streamlined Return a minimal output rather than full details. Default is FALSE.
-#' @param verbose Set to FALSE to minimize the output to console. Default is TRUE. This parameter also dictates the verbosity of any helper function internally called inside the main function.
-#' @param ... Any additional parameters.
-#'
-#' @return A data frame of segments for a specific or multiple sample ID(s).
-#'
-#' @import dplyr
-#' @export
-#'
-#' @examples
-#' #load pacakges
-#' library(dplyr)
-#' 
-#' #get CN segments for one sample
-#' dohh2_segs = get_sample_cn_segments(these_sample_ids = "DOHH-2",
-#'                                     projection = "hg38", 
-#'                                     streamlined = TRUE)
-#'
-#' #get CN segments for DLBCL cell line
-#' cell_line_meta = GAMBLR.data::sample_data$meta %>% 
-#'   dplyr::filter(cohort == "DLBCL_cell_lines")
-#'   
-#' dlbcl_segs = get_sample_cn_segments(these_samples_metadata = cell_line_meta, 
-#'                                     streamlined = TRUE)
-#'
-get_sample_cn_segments = function(these_sample_ids = NULL,
-                                  these_samples_metadata = NULL,
-                                  projection = "grch37",
-                                  this_seq_type = "genome",
-                                  with_chr_prefix = FALSE,
-                                  streamlined = FALSE,
-                                  verbose = FALSE,
-                                  ...){
-  
-  #warn/notify the user what version of this function they are using
-  message("Using the bundled CN segments (.seg) calls in GAMBLR.data...")
-  
-  #check if any invalid parameters are provided
-  check_excess_params(...)
-  
-  #get samples with the dedicated helper function
-  metadata = id_ease(these_samples_metadata = these_samples_metadata,
-                     these_sample_ids = these_sample_ids,
-                     verbose = verbose,
-                     this_seq_type = this_seq_type)
-  
-  sample_ids = metadata$sample_id
-  
-  #get valid projections
-  valid_projections = grep("meta", names(GAMBLR.data::sample_data), value = TRUE, invert = TRUE)
-  
-  #return CN segments based on the selected projection
-  if(projection %in% valid_projections){
-    all_segs = GAMBLR.data::sample_data[[projection]]$seg %>%
-      dplyr::filter(ID %in% sample_ids)
-  }else{
-    stop(paste("please provide a valid projection. The following are available:",
-               paste(valid_projections,collapse=", ")))
-  }
-  
-  #deal with chr prefixes
-  if(!with_chr_prefix){
-    all_segs = all_segs %>%
-      dplyr::mutate(chrom = gsub("chr", "", chrom))
-  }else{
-    if(!grepl("chr", all_segs$chrom[1])){
-      all_segs$chrom = paste0("chr", all_segs$chrom)
-    }
-  }
-  
-  if(streamlined){all_segs = dplyr::select(all_segs, ID, CN)}
-  
-  return(all_segs)
-}
diff --git a/R/get_ssm_by_patients.R b/R/get_ssm_by_patients.R
deleted file mode 100644
index 6f33702..0000000
--- a/R/get_ssm_by_patients.R
+++ /dev/null
@@ -1,83 +0,0 @@
-#' @title Get SSM By Patients.
-#'
-#' @description Get MAF-format data frame for more than one patient.
-#'
-#' @details This function returns variants from a set of patients.
-#' This function internally calls [GAMBLR.data::get_ssm_by_samples].
-#' Thus, the main contents of this function is to wrangle the provided patient IDs,
-#' so that the corresponding sample IDs can be provided to the internal call of `get_ssm_by_samples`.
-#' This function expects either a vector of patient IDs (`these_patients_ids`)
-#' or an already subset metadata table (`these_samples_metadata`).
-#'
-#' @param these_patient_ids A vector of patient IDs that you want results for.
-#' The user can also use a metadata table that has been subset to the patient IDs of interest (see `these_samples_metadata`).
-#' @param these_samples_metadata A metadata subset to contain the rows corresponding to the patients of interest.
-#' If the vector of patient IDs is missing (`these_patient_ids`), this function will default to all patient IDs in the metadata table given to this parameter.
-#' @param projection Obtain variants projected to this reference (one of grch37 or hg38). Default is grch37.
-#' @param this_seq_type The seq type you want results for. Default is "genome".
-#' @param tool_name Optionally specify which tool to report variant from. The default is slms-3, also supports "publication" to return the exact variants as reported in the original papers.
-#' @param this_study Optionally specify first name of the author for the paper
-#'      from which the variants should be returned for.
-#' This parameter can either be a vector of indexes (integer) or a vector of characters (matching columns in MAF).
-#' @param verbose Set to FALSE to minimize the output to console. Default is TRUE. This parameter also dictates the verbosity of any helper function internally called inside the main function.
-#' @param ... Any additional parameters.
-#'
-#' @return A data frame with SSM calls for the selected patients in MAF format.
-#'
-#' @import dplyr
-#'
-#' @export
-#'
-#' @examples
-#' #load packages
-#' library(dplyr)
-#'
-#' #basic usage, these_patient_ids
-#' my_patient = get_ssm_by_patients(these_patient_ids = "DOHH-2")
-#'
-#' #using a subset metadata tablee to retreive patient SSMs
-#' cell_line_meta = GAMBLR.data::sample_data$meta %>%
-#'  dplyr::filter(cohort == "DLBCL_cell_lines")
-#'
-#' patient_maf = get_ssm_by_patients(these_samples_metadata = cell_line_meta,
-#'                                   this_seq_type = "genome")
-#'
-get_ssm_by_patients = function(these_patient_ids,
-                               these_samples_metadata,
-                               projection = "grch37",
-                               this_seq_type = "genome",
-                               tool_name = "slms-3",
-                               this_study,
-                               verbose = FALSE,
-                               ...){
-
-  #check if any invalid parameters are provided
-  check_excess_params(...)
-
-  #figure out what patients the user wants
-  if(missing(these_patient_ids)){
-    if(missing(these_samples_metadata)){
-      stop("You must provide either patient IDs (`these_patient_ids`) or a metadata table with the patient IDs of interest (`these_samples_metadata`)...")
-    }else{
-      message("No patient IDs were provided, this function will resort to all available patient IDs in the provided metadata.")
-    }
-  }else{
-    if(missing(these_samples_metadata)){
-      these_samples_metadata = GAMBLR.data::get_gambl_metadata(seq_type_filter = this_seq_type)
-    }
-    message("Patient IDs and metadata were provided, this function will resort to all available patient IDs in the provided metadata.")
-    these_samples_metadata = these_samples_metadata %>%
-      dplyr::filter(patient_id %in% these_patient_ids)
-  }
-
-  #run get_ssm_by_samples with these_samples_metadata parameter
-  samples_ssm = GAMBLR.data::get_ssm_by_samples(these_samples_metadata = these_samples_metadata,
-                                         projection = projection,
-                                         this_seq_type = this_seq_type,
-                                         tool_name = tool_name,
-                                         verbose = verbose,
-                                         ...)
-  samples_ssm = create_maf_data(samples_ssm,projection)
-  # use S3-safe version of dplyr function
-  samples_ssm = mutate.genomic_data(samples_ssm,maf_seq_type = this_seq_type)
-}
diff --git a/R/get_ssm_by_region.R b/R/get_ssm_by_region.R
deleted file mode 100644
index 34a6f65..0000000
--- a/R/get_ssm_by_region.R
+++ /dev/null
@@ -1,138 +0,0 @@
-#' @title Get SSM By Region.
-#'
-#' @description Retrieve all SSMs from the GAMBL database within a single genomic coordinate range.
-#'
-#' @details This function lets the user specify a region of interest for returning SSM calls within that region.
-#' There are multiple ways a region can be specified. For example, the user can provide the full region in a "region" format (chr:start-end) to the `region` parameter.
-#' Or, the user can provide chromosome, start and end coordinates individually with `chr`, `start`, and `end` parameters.
-#'
-#' @param these_sample_ids Optional, a vector of multiple sample_id (or a single sample ID as a string) that you want results for.
-#' @param these_samples_metadata Optional, a metadata table (with sample IDs in a column) to subset the return to.
-#' If not provided (and if `these_sample_ids` is not provided), the function will return all samples from the specified seq_type in the metadata.
-#' @param maf_data Optional data frame with mutations in MAF format.
-#' If user provides a maf, the function trusts that the user has already subset this to samples of interest, correct seq_type.
-#' i.e the following parameters are ignored; `these_samples_metadata`, `these_sample_ids`, and `this_seq_type`
-#' @param chromosome The chromosome you are restricting to (with or without a chr prefix).
-#' @param qstart Query start coordinate of the range you are restricting to.
-#' @param qend Query end coordinate of the range you are restricting to.
-#' @param region Region formatted like chrX:1234-5678 instead of specifying chromosome, start and end separately.
-#' @param streamlined Return Start_Position and Tumor_Smaple_Barcode as the only two MAF columns. Default is FALSE.
-#' @param projection Obtain variants projected to this reference (one of grch37 or hg38).
-#' @param this_seq_type The seq_type you want back, default is genome.
-#' @param tool_name Optionally specify which tool to report variant from. The default is slms-3, also supports "publication" to return the exact variants as reported in the original papers.
-#' @param this_study Optionally specify first name of the author for the paper
-#'      from which the variants should be returned for.
-#' @param verbose Set to FALSE to prevent ANY message to be printed.
-#' In most cases, this parameter should be left to TRUE.
-#' The parameter was added to accommodate for noisy output
-#' when running this function in a loop for retrieving SSM
-#' for multiple regions [GAMBLR.data::get_ssm_by_regions].
-#' @param ... Any additional parameters.
-#'
-#' @return A data frame containing all mutations (MAF) in the specified region.
-#'
-#' @import dplyr
-#'
-#' @examples
-#' my_mutations = get_ssm_by_region(region = "chr8:128,723,128-128,774,067")
-#'
-#' #specifying chromosome, start and end individually
-#' my_mutations = get_ssm_by_region(chromosome = "8",
-#'                                  qstart = 128723128,
-#'                                  qend = 128774067)
-#'
-get_ssm_by_region = function(these_sample_ids = NULL,
-                             these_samples_metadata = NULL,
-                             maf_data,
-                             chromosome,
-                             qstart,
-                             qend,
-                             region = "",
-                             streamlined = FALSE,
-                             projection = "grch37",
-                             this_seq_type = "genome",
-                             tool_name = "slms-3",
-                             this_study,
-                             verbose = FALSE,
-                             ...){
-
-  if(verbose){
-    if(missing(maf_data)){
-      #warn/notify the user what version of this function they are using
-      message("Using the bundled SSM calls (.maf) calls in GAMBLR.data...")
-    }
-  }
-
-  #check if any invalid parameters are provided
-  check_excess_params(...)
-
-  #get samples with the dedicated helper function
-  metadata = id_ease(these_samples_metadata = these_samples_metadata,
-                     these_sample_ids = these_sample_ids,
-                     verbose = verbose,
-                     this_seq_type = this_seq_type)
-
-  sample_ids = metadata$sample_id
-
-  
-
-  # Optionally return variants from a particular study
-  if(!missing(this_study)){
-    this_maf <- this_maf %>%
-      dplyr::filter((!!sym("Study")) == this_study)
-  }
-
-  #split region into chunks (chr, start, end) and deal with chr prefixes based on the selected projection
-  if(length(region) > 1){
-    stop("You are providing more than one region, please refer to get_ssm_by_regions for multiple regions...")
-  }
-
-  if(!region == ""){
-    region = gsub(",", "", region)
-    split_chunks = unlist(strsplit(region, ":"))
-
-    chromosome = split_chunks[1]
-    startend = unlist(strsplit(split_chunks[2], "-"))
-    qstart = as.numeric(startend[1])
-    qend = as.numeric(startend[2])
-  }else{
-    if(projection =="grch37"){
-      chromosome = gsub("chr", "", chromosome)
-    }
-    region = paste0(chromosome, ":", qstart, "-", qend)
-  }
-
-  if(projection == "grch37"){
-    chromosome = gsub("chr", "", chromosome)
-  }
-
-  #return SSMs based on the selected projection
-  if(missing(maf_data)){
-    # Filter by position on-the-fly to avoid wastefully building the same large MAF each time
-    this_maf = GAMBLR.data::sample_data[[projection]]$maf %>%
-      dplyr::filter(Chromosome == chromosome & Start_Position > qstart & Start_Position < qend) %>%
-      dplyr::filter(Tumor_Sample_Barcode %in% sample_ids) %>%
-      dplyr::filter((tolower(!!sym("Pipeline")) == tool_name))
-    muts_region <- GAMBLR.data::sample_data[[projection]]$ashm %>%
-      dplyr::filter(Chromosome == chromosome & Start_Position > qstart & Start_Position < qend) %>%
-      dplyr::filter(Tumor_Sample_Barcode %in% sample_ids) %>%
-      dplyr::filter((tolower(!!sym("Pipeline")) == tool_name)) %>%
-      bind_rows(this_maf, .)
-  }else{
-    muts_region = dplyr::filter(maf_data, Tumor_Sample_Barcode %in% sample_ids) %>%
-      dplyr::filter(Chromosome == chromosome & Start_Position > qstart & Start_Position < qend)
-  }
-  
-  # Handle possible duplicates
-  muts_region <- muts_region %>%
-    distinct(Tumor_Sample_Barcode, Chromosome, Start_Position, End_Position, .keep_all = TRUE)
-
-  if(streamlined){
-    muts_region = muts_region %>%
-      dplyr::select(Start_Position, Tumor_Sample_Barcode)
-  }
-  muts_region = create_maf_data(muts_region,projection)
-  # use S3-safe version of dplyr function
-  muts_region = mutate.genomic_data(muts_region,maf_seq_type = this_seq_type)
-  return(muts_region)
-}
diff --git a/R/get_ssm_by_regions.R b/R/get_ssm_by_regions.R
deleted file mode 100644
index bda6930..0000000
--- a/R/get_ssm_by_regions.R
+++ /dev/null
@@ -1,143 +0,0 @@
-#' @title Get SSM By Regions.
-#'
-#' @description Efficiently retrieve all mutations across a range of genomic regions.
-#'
-#' @details This function internally calls get_ssm_by_region to retrieve SSM calls for the specified regions.
-#'
-#' @param these_samples_metadata Optional, a metadata table (with sample IDs in a column) to subset the return to.
-#' @param this_seq_type The this_seq_type you want back, default is genome.
-#' @param tool_name Optionally specify which tool to report variant from. The default is slms-3, also supports "publication" to return the exact variants as reported in the original papers.
-#' @param regions_list A vector of regions in the chr:start-end format to restrict the returned SSM calls to.
-#' @param regions_bed A data frame in BED format with the coordinates you want to retrieve (recommended).
-#' This parameter can also accept an additional column with region names that will be added to the return if `use_name_column = TRUE`
-#' @param streamlined If set to TRUE (default) only 3 columns will be kept in the returned data frame (start, sample_id and region_name).
-#' @param projection Obtain variants projected to this reference (one of grch37 or hg38), default is grch37.
-#' @param verbose Set to TRUE to maximize the output to console. Default is TRUE.
-#' This parameter also dictates the verbosity of any helper function internally called inside the main function.
-#' @param ... Any additional parameters.
-#'
-#' @return Returns a data frame of variants in MAF-like format.
-#'
-#' @import tibble dplyr tidyr
-#'
-#' @export
-#'
-#' @examples
-#' #basic usage, adding custom names from bundled ashm data frame
-#' regions_bed = create_bed_data( GAMBLR.data::grch37_ashm_regions,
-#'                           fix_names = "concat",
-#'                           concat_cols = c("gene","region"),
-#'                           sep="-")
-#' 
-#' my_meta = get_gambl_metadata()
-#' # get a full MAF-format data frame for all aSHM regions on grch37 coordinates
-#' ashm_maf = get_ssm_by_regions(regions_bed = regions_bed,
-#'                                         these_samples_metadata = my_meta,
-#'                                         streamlined = FALSE)
-#'
-#' # This example intentionally fails
-#' ashm_maf = get_ssm_by_regions(regions_bed = regions_bed,
-#'                               these_samples_metadata = my_meta,
-#'                                projection="hg38")
-#' # Error in get_ssm_by_regions(regions_bed = regions_bed, these_samples_metadata = my_meta,  : 
-#' # requested projection: hg38 and genome_build of regions_bed: grch37 don't match
-#'
-get_ssm_by_regions <- function(these_samples_metadata,
-                               regions_list,
-                               regions_bed,
-                               this_seq_type = "genome",
-                               streamlined = TRUE,
-                               projection = "grch37",
-                               verbose = FALSE,
-                               tool_name = "slms-3",
-                               ...) {
-
-  # check provided projection
-  # first, get valid projections
-  valid_projections = grep("meta", names(GAMBLR.data::sample_data),
-                           value = TRUE, invert = TRUE)
-  if (!projection %in% valid_projections) {
-    stop("Please provide a valid projection. The following are available: ",
-         paste(valid_projections, collapse = ", "), ".")
-  }
-  
-  # check if any invalid parameters are provided
-  check_excess_params(...)
-
-  bed2region = function(x) {
-    paste0(x[1], ":", as.numeric(x[2]), "-", as.numeric(x[3]))
-  }
-
-  if (missing(regions_list)) {
-    if (!missing(regions_bed)) {
-      if("bed_data" %in% class(regions_bed)){
-        #confirm the genome builds match
-        if(!get_genome_build(regions_bed)==projection){
-          stop(paste("requested projection:",projection,"and genome_build of regions_bed:", get_genome_build(regions_bed), "don't match"))
-        }
-      }
-      regions = apply(regions_bed, 1, bed2region)
-    } else {
-      stop("You must supply either regions_list or regions_bed")
-    }
-  } else {
-    regions = regions_list
-  }
-
-  # Get samples with the dedicated helper function
-  metadata = id_ease(these_samples_metadata = these_samples_metadata,
-                     verbose = verbose,
-                     this_seq_type = this_seq_type)
-
-
-  # Warn/notify the user what version of this function they are using
-  message("Using the bundled SSM calls (.maf) calls in GAMBLR.data...")
-    if (verbose) {
-      print("Using the non-default engine for efficiency...")
-    }
-
-    sample_maf <- get_ssm_by_samples(
-      these_samples_metadata = these_samples_metadata,
-      this_seq_type = this_seq_type,
-      projection = projection,
-      tool_name = tool_name
-    )
-    if(!missing(regions_bed) & "bed_data" %in% class(regions_bed)){
-      regions_df = dplyr::select(regions_bed,1:4) %>%
-        dplyr::rename(c("Chromosome"="chrom",
-                        "Start_Position"="start",
-                        "End_Position"="end",
-                        "region"="name")) 
-    }else{
-      regions_df <- as.data.frame(regions) %>%
-        `names<-`("regions") %>%
-        separate(
-          regions,
-          c("Chromosome", "Start_Position", "End_Position"),
-          ":|-"
-        ) %>%
-        mutate(
-          Start_Position = as.numeric(Start_Position),
-          End_Position = as.numeric(End_Position),
-          region = row_number()
-        )
-    }
-    
-
-    region_mafs <- cool_overlaps(
-      sample_maf,
-      regions_df
-    ) %>%
-      dplyr::rename_with(~ gsub(".x", "", .x, fixed = TRUE)) %>%
-      dplyr::select(all_of(c(names(sample_maf), "region"))) %>%
-      dplyr::group_split(region)
-    maf_df = do.call(bind_rows, region_mafs)
-    
-    if(streamlined){
-      maf_df = dplyr::select(maf_df,Start_Position,Tumor_Sample_Barcode,region) %>%
-        dplyr::rename(c("sample_id"="Tumor_Sample_Barcode"))
-    }
-    return(maf_df)
-    
-
-}
\ No newline at end of file
diff --git a/R/get_ssm_by_samples.R b/R/get_ssm_by_samples.R
deleted file mode 100644
index 5c93669..0000000
--- a/R/get_ssm_by_samples.R
+++ /dev/null
@@ -1,85 +0,0 @@
-#' @title Get SSM By Samples.
-#'
-#' @description Get the SSMs (i.e. load MAF) for a single sample or a collection of samples.
-#'
-#' @details Retrieve a maf for a specific sample or a set of samples.
-#' Either specify the sample IDs of interest with `these_sample_ids`.
-#' Or a metadata table subset to the sample IDs of interest with `these_samples_metadata`.
-#'
-#' @param these_sample_ids A vector of one or more sample IDs that you want results for.
-#' @param these_samples_metadata Optional, a metadata table (with sample IDs in a column) to auto-subset the data to samples in that table before returning.
-#' If not provided and these_sample_ids is also not provided, the function will return SSM for all samples from the specified seq_type in the bundled metadata.
-#' @param this_seq_type Default is genome.
-#' @param projection The projection genome build. Supports hg38 and grch37.
-#' @param tool_name Optionally specify which tool to report variant from. The default is slms-3, also supports "publication" to return the exact variants as reported in the original papers.
-#' @param verbose Enable for debugging/noisier output.
-#' @param ... Any additional parameters.
-#'
-#' @return data frame in MAF format.
-#'
-#' @import dplyr
-#'
-#' @export
-#'
-#' @examples
-#' #load a common dependency
-#' library(dplyr)
-#'
-#' #Get genome-wide set of mutations from all DLBCL cell lines
-#' cell_line_meta = get_gambl_metadata() %>% 
-#'   dplyr::filter(cohort == "DLBCL_cell_lines")
-#'
-#' dlbcl_maf = get_ssm_by_samples(these_samples_metadata = cell_line_meta)
-#'
-get_ssm_by_samples <- function(these_sample_ids = NULL,
-                               these_samples_metadata = NULL,
-                               this_seq_type = "genome",
-                               projection = "grch37",
-                               tool_name = "slms-3",
-                               verbose = FALSE,
-                               ...){
-
-  #warn/notify the user what version of this function they are using
-  message("Using the bundled SSM calls (.maf) calls in GAMBLR.data...")
-
-  #check if any invalid parameters are provided
-  check_excess_params(...)
-
-  #get samples with the dedicated helper function
-  metadata = id_ease(these_samples_metadata = these_samples_metadata,
-                     these_sample_ids = these_sample_ids,
-                     verbose = verbose,
-                     this_seq_type = this_seq_type)
-
-  sample_ids = metadata$sample_id
-
-  #get valid projections
-  valid_projections = grep("meta", names(GAMBLR.data::sample_data), value = TRUE, invert = TRUE)
-
-  #return SSMs based on the selected projection
-  if(projection %in% valid_projections){
-    sample_ssm = GAMBLR.data::sample_data[[projection]]$maf %>%
-        dplyr::filter(Tumor_Sample_Barcode %in% sample_ids) %>%
-        dplyr::filter((tolower(!!sym("Pipeline")) == tool_name))
-    sample_ssm <- bind_rows(
-        sample_ssm,
-        GAMBLR.data::sample_data[[projection]]$ashm %>%
-            dplyr::filter(Tumor_Sample_Barcode %in% sample_ids) %>%
-            dplyr::filter((tolower(!!sym("Pipeline")) == tool_name))
-    )
-    
-  }else{
-    stop(paste("please provide a valid projection. The following are available:",
-               paste(valid_projections,collapse=", ")))
-  }
-
-
-  # Handle possible duplicates
-  sample_ssm <- sample_ssm %>%
-    distinct(Tumor_Sample_Barcode, Chromosome, Start_Position, End_Position, .keep_all = TRUE)
-  # bundle genome_build with the maf_data
-  sample_ssm = create_maf_data(sample_ssm,projection)
-  # use S3-safe version of dplyr function
-  sample_ssm = mutate.genomic_data(sample_ssm,maf_seq_type = this_seq_type)
-  return(sample_ssm)
-}
diff --git a/R/id_ease.R b/R/id_ease.R
deleted file mode 100644
index 0a1dd9e..0000000
--- a/R/id_ease.R
+++ /dev/null
@@ -1,109 +0,0 @@
-#' @title ID Ease
-#'
-#' @aliases id_ease, id ease
-#'
-#' @description Internal convenience function that standardize the way GAMBLR functions deals with sample IDs (these_sample_ids)
-#' and metadata (these_samples_metadata).
-#'
-#' @details This function can take sample IDs as a vector of characters, or a metadata table in data frame format.
-#' If no sample IDs are provided to the function, the function will operate on all gambl sample IDs available for the given seq type.
-#' It is highly recommended to run this function with `verbose = TRUE`. 
-#' Since this will not only improve the overall logic on how the function operates.
-#' But also might help with debugging functions that are internally calling this function.
-#' The function also performs sanity checks and notifies the user if any of the requested sample IDs are not found in the metadata.
-#' In addition, the function also notifies the dimensions of the returned object, providing further insight to what is returned. 
-#' As with all GAMBLR functions, providing a curated metadata table to any GAMBLR function (as opposed to a vector of IDs) is the safest way to ensure you get the expected result.
-#' 
-#' @param these_samples_metadata An optional data frame with metadata, subset to sample IDs of interest.
-#' If not provided will retrieve GAMBL metadata for all available samples.
-#' @param these_sample_ids Optional character vector of GAMBL sample IDs.
-#' @param this_seq_type The seq type of interest. Default is both genome and exome, with priority for genome when a sample has >1 seq_type. 
-#' @param verbose Set to FALSE to limit the information that gets printed to the console. Default is FALSE.
-#' 
-#' @export
-#'
-#' @return Metadata (data frame).
-#'
-#' @examples
-#' #load packages
-#' library(dplyr)
-#' 
-#' #give the function nothing (i.e return all sample IDs in the metadata for the default seq type)
-#' #return metadata for all samples in the default seq type
-#' all_meta = id_ease()
-#'
-#' #return metadata based on a sample ID
-#' sample_meta = id_ease(these_sample_ids = "94-15772_tumorA")
-#'
-#' #return sample IDs based on an already filtered metadata
-#' this_metadata = get_gambl_metadata(seq_type_filter = "genome") %>% 
-#'   head(5)
-#'
-#' these_ids = id_ease(these_samples_metadata = this_metadata)
-#'
-id_ease = function(these_samples_metadata = NULL,
-                   these_sample_ids = NULL,
-                   this_seq_type = c("genome", "capture"),
-                   verbose = FALSE){
-  
-  #check for provided metadata, else use GAMBL metadata
-  if(is.null(these_samples_metadata)){
-    if(verbose){
-      message("id_ease: No metadata provided, the helper function will fetch metadata for all gambl samples in the selected seq type...") 
-    }
-    metadata = GAMBLR.data::get_gambl_metadata(seq_type_filter = this_seq_type)
-  }else{
-    if(verbose){
-      message("id_ease: Metadata is provided and samples of the selected seq type are kept...") 
-    }
-    metadata = dplyr::filter(these_samples_metadata, seq_type %in% this_seq_type)
-    not_seq_type = setdiff(these_samples_metadata$sample_id, metadata$sample_id)
-    if(length(not_seq_type) > 0){
-      not_seq_type_msg = gettextf("id_ease: WARNING! %i samples in the provided metadata were removed because their seq types are not the same as in the `set_type` argument.",
-                                  length(not_seq_type))
-      if(verbose){
-        max_to_show <- 100
-        if( length(not_seq_type) > max_to_show ){
-          not_seq_type_msg = gettextf("%s Their first %i IDs are:", not_seq_type_msg, 
-                                      max_to_show)
-          not_seq_type = head(not_seq_type, max_to_show)
-        }else{
-          not_seq_type_msg = gettextf("%s Their IDs are:", not_seq_type_msg)
-        }
-        message(not_seq_type_msg)
-        print(not_seq_type)
-      }else{
-        not_seq_type_msg = gettextf("%s Use `verbose = TRUE` to see their IDs.", not_seq_type_msg)
-        message(not_seq_type_msg)
-      }
-    }
-  }
-  
-  #ensure metadata is subset to specified sample IDs
-  if(!is.null(these_sample_ids)){
-    if(verbose){
-      message("id_ease: Sample IDs are provided, filtering the metadata for selected sample IDs...") 
-    }
-    metadata = dplyr::filter(metadata, sample_id %in% these_sample_ids)
-    
-    #check if metadata is empty
-    if(nrow(metadata) == 0){
-      stop("No samples in the metadata, try a different sample ID...")
-    }
-    #check the existence of provided sample IDs in the metadata
-    not_in_meta = setdiff(these_sample_ids, metadata$sample_id)
-    if(length(not_in_meta) > 0){
-      message("id_ease: WARNING! The following sample IDs were not found in the metadata:")
-      print(not_in_meta)
-    }
-  }else{
-    if(verbose){
-      message("id_ease: No sample IDs provided, all sample IDs in the metadata will be kept...")
-    }
-  }
-  if(verbose){
-    unique_samples = unique(metadata$sample_id)
-    message(paste0("id_ease: Returning metadata for ", length(unique_samples), " samples..." ))
-  }
-  return(metadata) 
-}
diff --git a/R/process_regions.R b/R/process_regions.R
deleted file mode 100644
index 4b10d9c..0000000
--- a/R/process_regions.R
+++ /dev/null
@@ -1,147 +0,0 @@
-#' @title Process Regions objects.
-#'
-#' @description INTERNAL FUNCTION to harmonize genomic regions specified as character vectors or data frames.
-#'
-#' @details INTERNAL FUNCTION to harmonize genomic regions specified as character vectors or data frames.
-#'
-#' @param regions_list Character vector of genomic regions. If neither regions nor regions_df is specified, will use GAMBLR aSHM regions
-#' @param regions_bed Data frame of genomic regions with column names "chrom", "start", "end", "name"
-#' @param region_padding Amount to pad the start and end coordinates by. The default is 0 (no padding).
-#' @param skip_regions Character vector of genes to drop from GAMBLR aSHM regions.
-#' @param only_regions Character vector of genes to include from GAMBLR aSHM regions.
-#' @param projection Specify which genome build projection to use. The default is "grch37", also accepts "hg38".
-#' @param sort Set to TRUE to force regions_bed to be ordered on chromosome and coordinate
-#'
-#' @return A list with two objects, regions as a vector and in bed format.
-#'
-#' @export
-#'
-#' @examples
-#' library(dplyr)
-#'
-#' regions <- setNames(
-#'      c("chr1:10000-15000", "chr1:100000000-100005000"),
-#'      c("one_region", "another_region")
-#' )
-#' process_regions(regions_list = regions)
-#'
-#' reg_bed = GAMBLR.data::grch37_ashm_regions %>%
-#' dplyr::filter(chr_name == "chr17") %>%
-#'   mutate(name = region, chrom = chr_name, start = hg19_start, end = hg19_end) %>%
-#'   select(chrom, start, end, name)
-#'
-#' process_regions(regions_bed = reg_bed)
-#'
-process_regions <- function(regions_list = NULL,
-                            regions_bed = NULL,
-                            region_padding = 0,
-                            skip_regions = NULL,
-                            only_regions = NULL,
-                            projection = "grch37",
-                            sort = FALSE) {
-
-  # Use default ashm region table if no regions are provided
-  if (is.null(regions_list)) {
-    if (is.null(regions_bed)) {
-      message("Using default GAMBLR aSHM regions. ")
-      if (projection == "grch37") {
-        regions_bed <-  create_bed_data(grch37_ashm_regions,
-                                        fix_names="concat",
-                                        concat_cols=c("gene","region"),
-                                        sep="_")
-      } else if(projection=="hg38") {
-        regions_bed <-  create_bed_data(hg38_ashm_regions,
-                                        fix_names="concat",
-                                        concat_cols=c("gene","region"),
-                                        sep="_")
-      }else{
-        stop("unsupported projection!")
-      }
-      
-      if (!is.null(skip_regions)) {
-        # drop user-specified regions
-        regions_bed <- regions_bed %>%
-          dplyr::filter(!gene %in% skip_regions)
-      }
-      if (!is.null(only_regions)) {
-        # keep only user-specified regions
-        regions_bed <- regions_bed %>%
-          dplyr::filter(gene %in% only_regions)
-      }
-    }
-
-    required_cols <- c("chrom", "start", "end", "name")
-    if (min(required_cols %in% colnames(regions_bed)) == 0) {
-      stop("Provided regions_bed lacks required column names. Ensure columns chrom, start, end, and name are present. ")
-    }
-
-    # gene column is required for later joins
-    if (!"gene" %in% colnames(regions_bed)) {
-      regions_bed <- mutate(regions_bed, gene = name)
-    }
-  } else {
-    # Convert character vector of regions to df
-    regions_bed <- bind_rows(lapply(regions_list, function(x) {
-
-      chunks <- region_to_chunks(x)
-      if(projection=="grch37"){
-        chunks$chromosome = gsub("chr","",chunks$chromosome)
-      }else if(projection=="hg38" && !any(grepl("chr",chunks$chromosome))){
-        chunks$chromosome = paste0("chr",chunks$chromosome)
-      }
-      df <- data.frame(
-        chrom = chunks$chromosome,
-        start = as.numeric(chunks$start),
-        end = as.numeric(chunks$end)
-      )
-    }))
-    if(sort){
-      if(projection=="hg38"){
-        chrom_order = c(paste0("chr",c(1:22)),"chrX","chrY")
-      }else{
-        chrom_order = c(c(1:22),"X","Y")
-      }
-      
-      regions_bed = mutate(regions_bed,
-                           chrom=factor(chrom,levels=chrom_order)) %>%
-        arrange(chrom,start) %>%
-        mutate(chrom = as.character(chrom))
-    }
-    if (!is.null(names(regions_list))) {
-      regions_bed$name <- names(regions_list)
-      regions_bed$gene <- names(regions_list)
-    } else {
-      regions_bed = mutate(regions_bed,name=paste0(chrom,":",start,"-",end))
-    }
-  }
-
-  # Collapse regions with duplicate names
-  if (length(unique(regions_bed$name)) < length(regions_bed$name)) {
-    message("Warning: Multiple regions in the provided data frame have the same name. Merging these entries based on min(start) and max(end) per name value. ")
-    regions_bed <- regions_bed %>%
-      group_by(name) %>%
-      mutate(
-        start = min(start),
-        end = max(end)
-      ) %>%
-      ungroup() %>%
-      distinct()
-  }
-
-  regions_list <- unlist(apply(
-    regions_bed,
-    1,
-    function(x) {
-      # add specified padding around each region
-      paste0(x[1], ":", as.numeric(x[2]) - region_padding, "-", as.numeric(x[3]) + region_padding)
-    }
-  ))
-  names(regions_list) <- regions_bed$name
-
-  return(
-    list(
-      regions_list = regions_list,
-      regions_bed = regions_bed
-    )
-  )
-}
diff --git a/R/region_to_chunks.R b/R/region_to_chunks.R
deleted file mode 100644
index 2faad22..0000000
--- a/R/region_to_chunks.R
+++ /dev/null
@@ -1,26 +0,0 @@
-#' @title Separate a chromosome region into chunks
-#' 
-#' @description `region_to_chunks` breaks the input string that stores a chromosome 
-#' region to create a list with chromosome number and start and end positions as 
-#' separated elements.
-#'
-#' @param region A single string that stores a chromosome region. Any format like 
-#' "chr1:100000-200000", "1:100000-200000", "chr1:100'000-200'000" is possible. 
-#'
-#' @return A list with length 3 and names "chromosome", "start" and "end.
-#' @export
-#'
-#' @examples
-#' region_to_chunks(region = "chr1:100000-200000")
-#' 
-region_to_chunks = function(region){
-  region = unname(region)
-  region = gsub(",", "", region)
-  #format is chr6:37060224-37151701
-  split_chunks = unlist(strsplit(region, ":"))
-  chromosome = split_chunks[1]
-  startend = unlist(strsplit(split_chunks[2], "-"))
-  qstart = startend[1]
-  qend = startend[2]
-  return(list(chromosome = chromosome, start = qstart, end = qend))
-}
diff --git a/R/review_hotspots.R b/R/review_hotspots.R
deleted file mode 100644
index 66d8d51..0000000
--- a/R/review_hotspots.R
+++ /dev/null
@@ -1,114 +0,0 @@
-#' @title Review Hotspots.
-#'
-#' @description Annotate MAF-like data frome with a hot_spot column indicating recurrent mutations.
-#'
-#' @details This function takes an annotated MAF (with [annotate_hotspots]) and updates an existing column, "hot_spot", in the same data frame.
-#' Genes for hotspot review are supplied with the `genes_of_interest` parameter.
-#' Currently only a few sets of genes are supported, see parameter description for more information and limitations.
-#' The desired genome build can be specified with `genome_build` parameter. Should be the same as the incoming MAF.
-#'
-#' @param annotated_maf A data frame in MAF format that has hotspots annotated using the function annotate_hotspots().
-#' @param genes_of_interest A vector of genes for hotspot review. Currently only FOXO1, MYD88, CREBBP, NOTCH1, NOTCH2, CD79B and EZH2 are supported.
-#' @param genome_build Reference genome build for the coordinates in the MAF file. The default is grch37 genome build.
-#'
-#' @return The same data frame (as given to the `annotated_maf` parameter) with the reviewed column "hot_spot".
-#'
-#' @import dplyr
-#' @export
-#'
-#' @examples
-#' hot_ssms = review_hotspots(annotate_hotspots(get_coding_ssm(this_seq_type = "genome")),
-#'                            genes_of_interest = c("CREBBP"))
-#'
-review_hotspots = function(annotated_maf,
-                           genes_of_interest = c("FOXO1", "MYD88", "CREBBP", "NOTCH1", "NOTCH2", "CD79B", "EZH2"),
-                           genome_build){
-  if(missing(genome_build)){
-    if("maf_data" %in% class(annotated_maf)){
-      genome_build = get_genome_build(annotated_maf)
-      #drop our S3 classes because these additional attributes seem to cause some problems when the data is subsequently munged.
-      annotated_maf = strip_genomic_classes(annotated_maf)
-    }else{
-      stop("genome_build is required")
-    }
-  }
-
-  # define the list of genes currently supported for review
-  supported_genes = c("FOXO1", "MYD88", "CREBBP", "NOTCH1", "NOTCH2", "CD79B", "EZH2")
-
-  # check genome build because CREBBP coordinates are hg19-based or hg38-based
-
-  if (genome_build %in% c("hg19", "grch37", "hs37d5", "GRCh37")){
-    coordinates = hotspot_regions_grch37
-  }else if(genome_build %in% c("hg38", "grch38", "GRCh38")){
-    coordinates = hotspot_regions_hg38
-  }else{
-    stop("The genome build specified is not currently supported. Please provide MAF file in one of the following cordinates: hg19, grch37, hs37d5, GRCh37, hg38, grch38, or GRCh38")
-  }
-  # check that at least one of the currently supported genes are present
-  if (length(intersect(supported_genes, genes_of_interest))==0){
-      stop(paste0("Currently only ",  paste(supported_genes, collapse=", "), " are supported. Please specify one of these genes."))
-  }
-  # notify user that there is limited number of genes currently supported
-  if (length(setdiff(genes_of_interest, supported_genes))>0){
-      message(strwrap(paste0("Currently only ", paste(supported_genes, collapse=", "),
-                             " are supported. By default only these genes from the supplied list will be reviewed. Reviewing hotspots for genes ",
-                             paste(intersect(supported_genes, genes_of_interest), collapse = ", "), ", it will take a second ...")))
-  }
-  if("FOXO1" %in% genes_of_interest){
-      annotated_maf = annotated_maf %>%
-        dplyr::mutate(hot_spot = ifelse(Hugo_Symbol == "FOXO1" &
-                                        HGVSp_Short == "p.M1?",
-                                        "TRUE", hot_spot))
-  }
-
-  if("CREBBP" %in% genes_of_interest){
-      annotated_maf = annotated_maf %>%
-        dplyr::mutate(hot_spot = ifelse(Hugo_Symbol == "CREBBP" &
-                                        Start_Position > coordinates["CREBBP", "start"] &
-                                        End_Position < coordinates["CREBBP", "end"] &
-                                        Variant_Classification == "Missense_Mutation",
-                                        "TRUE", hot_spot))
-  }
-  if("EZH2" %in% genes_of_interest){
-      annotated_maf = annotated_maf %>%
-        dplyr::mutate(hot_spot = ifelse(Hugo_Symbol == "EZH2" &
-                                        Start_Position > coordinates["EZH2", "start"] &
-                                        End_Position < coordinates["EZH2", "end"],
-                                        "TRUE", hot_spot))
-  }
-  if("MYD88" %in% genes_of_interest){
-      annotated_maf = annotated_maf %>%
-        dplyr::mutate(hot_spot = ifelse(Hugo_Symbol == "MYD88" &
-                                        HGVSp_Short %in% c("p.L273P", "p.L265P"),
-                                        "TRUE", hot_spot))
-  }
-  if("NOTCH1" %in% genes_of_interest){
-      annotated_maf = annotated_maf %>%
-        dplyr::mutate(hot_spot = ifelse(Hugo_Symbol == "NOTCH1" &
-                                        Start_Position < coordinates["NOTCH1", "start"],
-                                        "TRUE", hot_spot))
-  }
-  if("NOTCH2" %in% genes_of_interest){
-      annotated_maf = annotated_maf %>%
-        dplyr::mutate(hot_spot = ifelse(Hugo_Symbol == "NOTCH2" &
-                                        Start_Position < coordinates["NOTCH2", "start"],
-                                        "TRUE", hot_spot))
-  }
-
-  if("CD79B" %in% genes_of_interest){
-      truncating_variants = c("Frame_Shift_Del", "Frame_Shift_Ins", "Nonsense_Mutation", "Splice_Region", "Splice_Site")
-      annotated_maf = annotated_maf %>%
-         dplyr::mutate(hot_spot = ifelse(Hugo_Symbol == "CD79B" &
-                                         Start_Position < coordinates["CD79B_trunc", "start"] &
-                                         Variant_Classification %in% truncating_variants,
-                                         "TRUE", hot_spot)) %>%
-          dplyr::mutate(hot_spot = ifelse(Hugo_Symbol == "CD79B" &
-                                          Start_Position < coordinates["CD79B_NONtrunc", "start"] &
-                                          ! Variant_Classification %in% truncating_variants,
-                                          "TRUE", hot_spot))
-  }
-  annotated_maf = create_maf_data(annotated_maf,genome_build)
-  
-  return(annotated_maf)
-}
diff --git a/man/annotate_hotspots.Rd b/man/annotate_hotspots.Rd
deleted file mode 100644
index 6040620..0000000
--- a/man/annotate_hotspots.Rd
+++ /dev/null
@@ -1,31 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/annotate_hotspots.R
-\name{annotate_hotspots}
-\alias{annotate_hotspots}
-\title{Annotate Hotspots.}
-\usage{
-annotate_hotspots(mutation_maf, ...)
-}
-\arguments{
-\item{mutation_maf}{A data frame in MAF format.}
-
-\item{...}{Any other parameter. These parameters will be ignored.}
-}
-\value{
-The same data frame with one additional column "hot_spot".
-}
-\description{
-Annotate MAF-like data frome with a hot_spot column indicating recurrent mutations.
-}
-\details{
-This function takes an already loaded MAF data frame with the \code{mutation_maf} parameter.
-}
-\examples{
-my_metadata = get_gambl_metadata()
-all_coding_ssm = get_coding_ssm(these_samples_metadata = my_metadata,
-                                projection = "grch37",
-                                this_seq_type = "genome")
-
-hot_ssms = annotate_hotspots(all_coding_ssm)
-
-}
diff --git a/man/assign_cn_to_ssm.Rd b/man/assign_cn_to_ssm.Rd
deleted file mode 100644
index e536b4b..0000000
--- a/man/assign_cn_to_ssm.Rd
+++ /dev/null
@@ -1,67 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/assign_cn_to_ssm.R
-\name{assign_cn_to_ssm}
-\alias{assign_cn_to_ssm}
-\title{Assign CN to SSM.}
-\usage{
-assign_cn_to_ssm(
-  this_sample_id,
-  genes,
-  this_seq_type = "genome",
-  projection = "grch37",
-  coding_only = FALSE,
-  assume_diploid = FALSE,
-  include_silent = FALSE,
-  ...
-)
-}
-\arguments{
-\item{this_sample_id}{Sample ID of the sample you want to annotate.}
-
-\item{genes}{A vector of characters with gene symbols (Hugo).}
-
-\item{this_seq_type}{Specified seq type for returned data. Default is genome.}
-
-\item{projection}{Specified genome projection that returned data is in
-reference to. Default is grch37.}
-
-\item{coding_only}{Optional. Set to TRUE to restrict to only coding variants
-(ssm). Deafult is FALSE.}
-
-\item{assume_diploid}{Optional, this parameter annotates every mutation as
-copy neutral. Default is FALSE.}
-
-\item{include_silent}{Logical parameter indicating whether to include silent
-mutations into coding mutations. Default is FALSE. This parameter only
-makes sense if \code{coding_only} is set to TRUE.}
-
-\item{...}{Any additional parameters.}
-}
-\value{
-A list containing a data frame (MAF-like format) with three extra
-columns:
-- log.ratio is the log ratio from the seg file (NA when no overlap).
-- LOH
-- CN (the rounded absolute copy number estimate of the region based on
-log.ratio, NA when no overlap was found).
-}
-\description{
-Annotate mutations with their copy number information.
-}
-\details{
-This function takes a sample ID with the \code{this_sample_id} parameter
-and annotates mutations with copy number information. A variety of
-parameters are at hand for a customized workflow. For example,
-the user can specify if only coding mutations are of interest. To do so,
-set \code{coding_only = TRUE}. This function internally calls
-\code{get_ssm_by_samples} and \code{get_sample_cn_segments}. This function can
-also take a vector with genes of interest (\code{genes}) that the returned
-data frame will be restricted to.
-}
-\examples{
-cn_list = assign_cn_to_ssm(
-     this_sample_id = "DOHH-2",
-     coding_only = TRUE
-)
-
-}
diff --git a/man/bind_genomic_data.Rd b/man/bind_genomic_data.Rd
deleted file mode 100644
index 1bc28f6..0000000
--- a/man/bind_genomic_data.Rd
+++ /dev/null
@@ -1,27 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/genomic_data.R
-\name{bind_genomic_data}
-\alias{bind_genomic_data}
-\title{Bind maf or other genomic data together}
-\usage{
-bind_genomic_data(..., check_id = TRUE)
-}
-\arguments{
-\item{...}{All maf_data or seg_data objects to be combined.}
-
-\item{check_id}{Logical. If TRUE (the default), the function will check for the presence of the expected ID column
-and for duplicate sample IDs across the inputs. Set to FALSE to skip this check.}
-}
-\value{
-data.frame with combined data and preserved genome_build metadata.
-}
-\description{
-Combine multiple maf_data objects and retain metadata such as genome_build.
-This function will not allow you to combine maf_data objects that have different genome_build values.
-An error will also be thrown if the same sample id is found in more than one of the inputs (if check_id is TRUE).
-}
-\examples{
-
-merged_maf = bind_genomic_data(maf1, maf2,check_id=FALSE)
-
-}
diff --git a/man/calc_mutation_frequency_bin_region.Rd b/man/calc_mutation_frequency_bin_region.Rd
deleted file mode 100644
index 83a50c3..0000000
--- a/man/calc_mutation_frequency_bin_region.Rd
+++ /dev/null
@@ -1,94 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/calc_mutation_frequency_bin_region.R
-\name{calc_mutation_frequency_bin_region}
-\alias{calc_mutation_frequency_bin_region}
-\title{Calculate Mutation Frequency By Sliding Window.}
-\usage{
-calc_mutation_frequency_bin_region(
-  region,
-  chromosome,
-  start_pos,
-  end_pos,
-  these_samples_metadata = NULL,
-  these_sample_ids = NULL,
-  this_seq_type = "genome",
-  maf_data = NULL,
-  projection = "grch37",
-  slide_by = 100,
-  window_size = 1000,
-  return_format = "long",
-  min_count_per_bin = 0,
-  return_count = TRUE,
-  drop_unmutated = FALSE,
-  ...
-)
-}
-\arguments{
-\item{region}{A string describing a genomic region in the "chrom:start-end" format.
-The region must be specified in this format OR as separate chromosome, start_pos, end_pos arguments.}
-
-\item{chromosome}{Chromosome name in region.}
-
-\item{start_pos}{Start coordinate of region.}
-
-\item{end_pos}{End coordinate of region.}
-
-\item{these_samples_metadata}{Optional data frame containing a sample_id column.
-If not providing a maf file, seq_type is also a required column.}
-
-\item{these_sample_ids}{Optional vector of sample IDs. Output will be subset
-to IDs present in this vector.}
-
-\item{this_seq_type}{Optional vector of seq_types to include in heatmap.
-Default is "genome". Uses default seq_type priority for samples
-with >1 seq_type.}
-
-\item{maf_data}{Optional maf data frame. Will be subset to rows where
-Tumor_Sample_Barcode matches provided sample IDs or metadata table.
-If not provided, maf data will be obtained with get_ssm_by_regions().}
-
-\item{projection}{Specify which genome build to use. Required. Default grch37.}
-
-\item{slide_by}{Slide size for sliding window. Default 100.}
-
-\item{window_size}{Size of sliding window. Default 1000.}
-
-\item{return_format}{Return format of mutations. Accepted inputs are "long"
-and "wide". Long returns a data frame of one sample ID/window per row.
-Wide returns a matrix with one sample ID per row and one window per column.
-Using the "wide" format will retain all samples and windows regardless of
-the drop_unmutated or min_count_per_bin parameters.}
-
-\item{min_count_per_bin}{Minimum counts per bin, default is 0. Setting this
-greater than 0 will drop unmutated windows only when return_format is long.}
-
-\item{return_count}{Boolean statement to return mutation count per window (TRUE)
-or binary mutated/unmutated status (FALSE). Default is TRUE.}
-
-\item{drop_unmutated}{Boolean for whether to drop windows with 0 mutations.
-Only effective with "long" return format.}
-
-\item{...}{Any additional parameters.}
-}
-\value{
-Either a matrix or a long tidy table of counts per window.
-}
-\description{
-Count the number of mutations in a sliding window across a
-region for all samples.
-}
-\details{
-This function is called to return the mutation frequency for a given
-region, either from a provided input maf data frame or from the GAMBL maf data.
-Regions are specified with the \code{region} parameter. Alternatively, the region of
-interest can also be specified by calling the function with \code{chromosome},
-\code{start_pos}, and \code{end_pos} parameters. This function operates on a single region.
-To return a matrix of sliding window counts over multiple regions,
-see \code{calc_mutation_frequency_bin_regions}.
-}
-\examples{
-myc_mut_freq = calc_mutation_frequency_bin_region(region = "8:128747680-128753674",
-                                                  slide_by = 10,
-                                                  window_size = 10000)
-
-}
diff --git a/man/calc_mutation_frequency_bin_regions.Rd b/man/calc_mutation_frequency_bin_regions.Rd
deleted file mode 100644
index 7ef307b..0000000
--- a/man/calc_mutation_frequency_bin_regions.Rd
+++ /dev/null
@@ -1,101 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/calc_mutation_frequency_bin_regions.R
-\name{calc_mutation_frequency_bin_regions}
-\alias{calc_mutation_frequency_bin_regions}
-\title{Mutation counts across sliding windows for multiple regions.}
-\usage{
-calc_mutation_frequency_bin_regions(
-  regions_list = NULL,
-  regions_bed = NULL,
-  these_samples_metadata = NULL,
-  these_sample_ids = NULL,
-  this_seq_type = "genome",
-  maf_data = NULL,
-  projection = "grch37",
-  region_padding = 1000,
-  drop_unmutated = FALSE,
-  skip_regions = NULL,
-  only_regions = NULL,
-  slide_by = 100,
-  window_size = 500,
-  return_format = "wide",
-  ...
-)
-}
-\arguments{
-\item{regions_list}{Named vector of regions in the format
-c(name1 = "chr:start-end", name2 = "chr:start-end"). If neither \code{regions} nor
-\code{regions_bed} is specified, the function will use GAMBLR aSHM region information.}
-
-\item{regions_bed}{Data frame of regions with four columns (chrom, start, end, name).}
-
-\item{these_samples_metadata}{Metadata with at least sample_id column.
-If not providing a maf data frame, seq_type is also required.}
-
-\item{these_sample_ids}{Vector of sample IDs. Metadata will be subset to
-sample IDs present in this vector.}
-
-\item{this_seq_type}{Optional vector of seq_types to include in heatmap.
-Default "genome". Uses default seq_type priority for samples with >1 seq_type.}
-
-\item{maf_data}{Optional maf data frame. Will be subset to rows where
-Tumor_Sample_Barcode matches provided sample IDs or metadata table.
-If not provided, maf data will be obtained with get_ssm_by_regions().}
-
-\item{projection}{Genome build the function will operate in. Ensure this
-matches your provided regions and maf data for correct chr prefix handling. Default "grch37".}
-
-\item{region_padding}{Amount to pad the start and end coordinates by. Default 1000.}
-
-\item{drop_unmutated}{Whether to drop bins with 0 mutations. If returning a
-matrix format, this will only drop bins with no mutations in any samples.}
-
-\item{skip_regions}{Optional character vector of genes to exclude from the default aSHM regions.}
-
-\item{only_regions}{Optional character vector of genes to include from the default aSHM regions.}
-
-\item{slide_by}{Slide size for sliding window. Default 100.}
-
-\item{window_size}{Size of sliding window. Default 500.}
-
-\item{return_format}{Return format of mutations. Accepted inputs are "long" and
-"wide". Long returns a data frame of one sample ID/window per row. Wide returns
-a matrix with one sample ID per row and one window per column. Using the "wide"
-format will retain all samples and windows regardless of the drop_unmutated or
-min_count_per_bin parameters. Default wide.}
-
-\item{...}{Any additional parameters.}
-}
-\value{
-A table of mutation counts for sliding windows across one or more regions. May be long or wide.
-}
-\description{
-Obtain a long tidy or wide matrix of mutation counts across
-sliding windows for multiple regions.
-}
-\details{
-This function takes a metadata table with \code{these_samples_metadata}
-parameter and internally calls \code{calc_mutation_frequency_bin_region}
-(that internally calls \code{get_ssm_by_regions}).
-to retrieve mutation counts for sliding windows across one or more regions.
-May optionally provide any combination of a maf data frame, existing metadata,
-or a regions data frame or named vector.
-}
-\examples{
-#get some regions
-these_regions <- process_regions(only_regions = c("MYC", "BCL2", "BCL6"))
-reg_vec <- these_regions$regions_list
-reg_bed <- these_regions$regions_bed
-
-# use a set of user defined regions (from genes) and
-# calculate mut frequency across all available samples
-mult_freq_all = calc_mutation_frequency_bin_regions(regions_list = reg_vec)
-mult_freq_all = calc_mutation_frequency_bin_regions(regions_bed = reg_bed)
-
-#restrict the analysis to specific samples using the metadata
-my_meta = get_gambl_metadata() \%>\% 
-              dplyr::filter(pathology \%in\% c("DLBCL","FL"))
-mult_reg_freq_fl_dlbcl = calc_mutation_frequency_bin_regions(regions_list = reg_vec,
-                                                          these_sample_ids = "DOHH-2")
-
-}
diff --git a/man/check_excess_params.Rd b/man/check_excess_params.Rd
deleted file mode 100644
index a4f5064..0000000
--- a/man/check_excess_params.Rd
+++ /dev/null
@@ -1,22 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/check_excess_params.R
-\name{check_excess_params}
-\alias{check_excess_params}
-\title{Check Excess Params}
-\usage{
-check_excess_params(...)
-}
-\arguments{
-\item{...}{Parameters to check.}
-}
-\value{
-Nothing
-}
-\description{
-Function for checking excessive parameter names.
-This function will notify the user if any unavailable parameters are called for any given given function.
-This function is designed to work as internal function-call in already available GAMBLR functions.
-}
-\details{
-Catch function calls containing unsupported arguments.
-}
diff --git a/man/collate_results.Rd b/man/collate_results.Rd
deleted file mode 100644
index 4a0b13f..0000000
--- a/man/collate_results.Rd
+++ /dev/null
@@ -1,64 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/collate_results.R
-\name{collate_results}
-\alias{collate_results}
-\title{Collate Results}
-\usage{
-collate_results(
-  sample_table,
-  these_samples_metadata,
-  join_with_full_metadata = FALSE,
-  seq_type_filter = c("genome", "capture"),
-  ...
-)
-}
-\arguments{
-\item{sample_table}{A vector of characters with sample IDs, or a data frame with sample IDs in a column (sample_id).
-If provided, this will overwrite any sample subsets provided these_samples_metadata.}
-
-\item{these_samples_metadata}{A metadata table with sample IDs of interest.
-If not provided, the function will get metadata for all available samples.
-This parameter is intended to use in combination with \code{join_with_full_metadata}.}
-
-\item{join_with_full_metadata}{Set to TRUE to horizontally expand metadata with QC results.
-Default is FALSE. If \code{these_samples_metadata} is provided, collated resutls will be added to this metadata table.
-If not provided, the function will join collated results with all available metadata in the specified seq_type (\code{seq_type_filter}).}
-
-\item{seq_type_filter}{Filtering criteria for \code{get_gambl_metadata} if \code{these_samples_metadata} is not provided, default is genomes and captures.}
-
-\item{...}{Any additional parameters.}
-}
-\value{
-A data frame with collated results.
-}
-\description{
-Bring together collated results for a selection of gambl samples.
-}
-\details{
-Currently, this function only gathers QC metrics (\code{mirage_metrics}) as the only collated result.
-Potentially, in the future, additional collated results can be added by this function as well.
-}
-\examples{
-#load packages
-library(dplyr)
-
-#return collated results for all available samples
-all_collated = collate_results()
-
-#return available collated results for a metadata subset
-fl_collated = collate_results(
- these_samples_metadata = get_gambl_metadata(
-   seq_type_filter = "genome") \%>\% 
-   dplyr::filter(pathology == "FL"))
-
-#horizontally expand a metadata subset with collated results
-fl_meta_collated = collate_results(
- join_with_full_metadata = TRUE, 
- these_samples_metadata = get_gambl_metadata(
-   seq_type_filter = "genome") \%>\% 
-   dplyr::filter(pathology == "FL"))
-
-#horizontally expand all available metadata with collated results
-all_meta_collated = collate_results(join_with_full_metadata = TRUE)
-
-}
diff --git a/man/cool_overlaps.Rd b/man/cool_overlaps.Rd
deleted file mode 100644
index dc7b9d4..0000000
--- a/man/cool_overlaps.Rd
+++ /dev/null
@@ -1,98 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/cool_overlaps.R
-\name{cool_overlaps}
-\alias{cool_overlaps}
-\title{Cool overlap of data frames.}
-\usage{
-cool_overlaps(
-  data1,
-  data2,
-  columns1 = c("Chromosome", "Start_Position", "End_Position"),
-  columns2 = c("Chromosome", "Start_Position", "End_Position"),
-  type = "any",
-  nomatch = FALSE
-)
-}
-\arguments{
-\item{data1}{Data frame with data to overlap. Required parameter. The minimal
-required columns are those supplied with the argument columns1. Will
-dictate the naming of the columns used for overlap in the output.}
-
-\item{data2}{Data frame with data to overlap. Required parameter. The minimal
-required columns are those supplied with the argument columns2.}
-
-\item{columns1}{The list of columns from data frame data1 to be used to find
-overlapping regions.}
-
-\item{columns2}{The list of columns from data frame data2 to be used to find
-overlapping regions.}
-
-\item{type}{Character specifying the way to find overlaps. Accepted values
-are "any" (used as default), "start", "end", "within", and "equal".
-Please see function description for more details of different types.}
-
-\item{nomatch}{Whether the rows from data1 that do not have overlap in data2
-should be returned or not. The default is FALSE (rows without overlap
-are not returned). If TRUE is specified, the row order in the output
-data will match the exact order of rows in the input data1.}
-}
-\value{
-data frame
-}
-\description{
-This function implements overlap of 2 data frames that contain
-regions of coordinates similar to what data.table::foverlaps does. Unlike
-foverlaps, this function takes as input data frame class objects, and relies
-on dplyr solution rather than data.table handling, therefore allowing usage
-of data frames with virtually unlimited dimensions without crashing. This
-implementation uses same logic of different types of overlaps as the original
-foverlaps solution ("any", "start", "end", "within", "equal"). The type "any"
-is default and allows for any overlapping solution between 2 regions. The
-type "start" only considers regions with exact same start position as
-overlap; similarly type "end" considers regions overlapped when the end
-positions are exact matches. Type "within" means that regions are overlapped
-when one is contained in another and neither start nor end positions match.
-Finally, type "equal" only considers overlap when both start and end
-positions match for both regions. For any type, the presence of any
-additional column not directly specifying regions (for example, Chromosome)
-will serve similar to a grouping variable.
-The generated output of this function will contain the overlapping regions
-and all columns present in the data frame data1, as well as any columns from
-the data frame supplied with data2 argument, except for those columns present
-in data2 that are used for overlap. When the same columns are present in both
-data1 and data2, the output data frame will have ".x" and ".y" suffixes to
-indicate which original input data they are coming from.
-}
-\examples{
-# obtain maf data
-maf1 <- get_coding_ssm(
-    these_sample_ids = "DOHH-2"
-)
-
-maf2 <- get_coding_ssm(
-    these_sample_ids = "SU-DHL-4"
-)
-
-# The same mutations are not expected to be present in different samples
-# so this overlap will produce 0 matching rows
-overlap <- cool_overlaps(
-    maf1,
-    maf1,
-    type = "equal"
-)
-
-# To demonstrate functionality we can supply the same maf to the data2
-overlap <- cool_overlaps(
-    maf1,
-    maf1 \%>\% head
-)
-
-# We can also overlap different formats, for example
-seg1 <- get_sample_cn_segments(these_sample_ids = "DOHH-2")
-overlap <- cool_overlaps(
-    data1 = maf1,
-    data2 = seg1,
-    columns2 = c("chrom", "start", "end")
-)
-
-}
diff --git a/man/create_bed_data.Rd b/man/create_bed_data.Rd
deleted file mode 100644
index 537a835..0000000
--- a/man/create_bed_data.Rd
+++ /dev/null
@@ -1,86 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/genomic_data.R
-\name{create_bed_data}
-\alias{create_bed_data}
-\title{Create BED Data}
-\usage{
-create_bed_data(
-  bed_df,
-  genome_build = NULL,
-  fix_names = NULL,
-  concat_cols = NULL,
-  sep = ""
-)
-}
-\arguments{
-\item{bed_df}{A data frame containing the BED data.}
-
-\item{genome_build}{A string specifying the genome build ("grch37" or "hg38").
-If NULL, the function will try to infer the genome build from the object name.}
-
-\item{fix_names}{Either NULL (the default), or one of "chrom_start_end" or "concat".
-If not NULL and duplicate names are detected, the function will apply the chosen fix.}
-
-\item{concat_cols}{When \code{fix_names = "concat"}, a character vector specifying which columns
-from the original data to merge.}
-
-\item{sep}{The separator to use when concatenating columns if fix_names = "concat".
-Defaults to "" (no separator).}
-}
-\value{
-A data frame with class attributes for BED data.
-}
-\description{
-This function creates BED (Browser Extensible Data) objects from the given input.
-It assumes that the BED data should have columns corresponding to chromosome, start,
-and end. If the second and third columns are not numeric (as expected for start and end),
-the function will attempt to identify the proper columns by matching column names.
-}
-\details{
-In the output, the first three columns will be renamed to "chrom", "start", and "end".
-If a fourth column exists, it is renamed to "name" (and any additional columns are preserved).
-
-Additionally, if a "name" column exists and its values are not unique, the function
-will warn the user. The user can optionally supply a method to automatically fix the
-names via the \code{fix_names} argument:
-\itemize{
-\item If \code{fix_names = "chrom_start_end"}, the new name will be built as "chrom:start-end".
-\item If \code{fix_names = "concat"}, then the columns specified by \code{concat_cols} (using the
-original column names in the input data) will be concatenated to form the new name.
-By default, no separator is used, but a separator can be specified via the \code{sep}
-argument.
-}
-
-After applying the fix, the function checks if the new names are unique. If they are not,
-a warning is issued that includes up to five examples of duplicate names and the row numbers
-where they occur.
-}
-\examples{
-
-# get a abed_data object for all aSHM regions
-ashm_bed = create_bed_data(GAMBLR.data::grch37_ashm_regions,
-                fix_names = "concat",
-                concat_cols = c("gene","region"),
-                sep="-")
-# the build is automatically inferred if it is in the variable name
-# get_genome_build(ashm_bed)
-# [1] "grch37"
-
-another_bed = create_bed_data(somatic_hypermutation_locations_GRCh37_v_latest,
-                              fix_names = "concat",
-                              concat_cols = c("chr_name","hg19_start","hg19_end"))
-
-# get_genome_build(another_bed)
-# [1] "grch37"
-
-# get a bed_data object for all gene regions and combine several columns to make a unique name
-gene_regions <- create_bed_data(hg38_gene_coordinates,
-                    fix_names = "concat",
-                    sep="-",
-                    concat_cols = c("chromosome","start","end","gene_name"))
-                    
-#get_genome_build(gene_regions)
-# [1] "hg38"                     
-
-
-}
diff --git a/man/create_maf_data.Rd b/man/create_maf_data.Rd
deleted file mode 100644
index a6e8445..0000000
--- a/man/create_maf_data.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/genomic_data.R
-\name{create_maf_data}
-\alias{create_maf_data}
-\title{Create MAF Data}
-\usage{
-create_maf_data(maf_df, genome_build)
-}
-\arguments{
-\item{maf_df}{A data frame containing the MAF data.}
-
-\item{genome_build}{A string specifying the genome build ("grch37" or "hg38").}
-}
-\value{
-A data frame with class attributes for MAF data.
-}
-\description{
-This function creates MAF (Mutation Annotation Format) data from the given input.
-}
diff --git a/man/create_seg_data.Rd b/man/create_seg_data.Rd
deleted file mode 100644
index af31259..0000000
--- a/man/create_seg_data.Rd
+++ /dev/null
@@ -1,23 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_cn_segments.R
-\name{create_seg_data}
-\alias{create_seg_data}
-\title{Create Segmented Data}
-\usage{
-create_seg_data(seg_df, genome_build)
-}
-\arguments{
-\item{seg_df}{A data frame containing the segmented data.}
-
-\item{genome_build}{A string specifying the genome build ("grch37" or "hg38").}
-}
-\value{
-A data frame with class attributes for segmented data.
-}
-\description{
-This function creates segmented data from the given input.
-}
-\examples{
-seg_df <- data.frame(...)
-create_seg_data(seg_df, "grch37")
-}
diff --git a/man/get_ashm_count_matrix.Rd b/man/get_ashm_count_matrix.Rd
deleted file mode 100644
index eb9943e..0000000
--- a/man/get_ashm_count_matrix.Rd
+++ /dev/null
@@ -1,65 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_ashm_count_matrix.R
-\name{get_ashm_count_matrix}
-\alias{get_ashm_count_matrix}
-\title{Get ASHM Count Matrix.}
-\usage{
-get_ashm_count_matrix(
-  regions_bed,
-  these_samples_metadata,
-  this_seq_type,
-  projection = "grch37"
-)
-}
-\arguments{
-\item{regions_bed}{A bed file with one row for each region.}
-
-\item{these_samples_metadata}{This is used to complete your matrix. All GAMBL
-samples will be used by default. Provide a data frame with at least
-sample_id for all samples if you are using non-GAMBL data.}
-
-\item{this_seq_type}{The seq type to return results for. Only used if no
-metadata is provided with these_samples_metadata.}
-
-\item{projection}{Which genome build to use for the mutations
-(must match the coordinate system your regions to avoid a nonsense result)}
-}
-\value{
-matrix
-}
-\description{
-Prepare a matrix with one row per sample and one column per
-region using a set of hypermutated regions.
-}
-\details{
-Values are the number of mutations in that patient in the region.
-}
-\examples{
-regions_bed = create_bed_data(GAMBLR.data::grch37_ashm_regions,
-                              fix_names="concat",
-                              concat_cols=c("gene","region"),
-                              sep="-")
-my_meta = get_gambl_metadata() \%>\% dplyr::filter(pathology=="DLBCL")
-matrix <- get_ashm_count_matrix(
-     regions_bed = regions_bed,
-     this_seq_type = "genome"
-)
-
-#this example intentionally fails 
- matrix <- get_ashm_count_matrix(regions_bed=regions_bed,this_seq_type = "genome",
-                            these_samples_metadata = my_meta,
-                            projection = "hg38")
-# Error in get_ashm_count_matrix(
-# Your projection argument does not match the genome_build of regions_bed
-
-# format the name column to include the chromosome coordinates instead of the gene
-regions_bed = create_bed_data(GAMBLR.data::hg38_ashm_regions,
-                           fix_names="concat",
-                           concat_cols=c("chr_name","hg38_start","hg38_end"),
-                           sep="-")
-                           
- matrix_hg38 <- get_ashm_count_matrix(regions_bed=regions_bed,this_seq_type = "genome",
-                            these_samples_metadata = my_meta,
-                            projection = "hg38")
-
-}
diff --git a/man/get_cn_segments.Rd b/man/get_cn_segments.Rd
deleted file mode 100644
index 369bce9..0000000
--- a/man/get_cn_segments.Rd
+++ /dev/null
@@ -1,44 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_cn_segments.R
-\name{get_cn_segments}
-\alias{get_cn_segments}
-\title{Get CN Segments.}
-\usage{
-get_cn_segments(
-  these_samples_metadata,
-  projection = "grch37",
-  this_seq_type,
-  ...
-)
-}
-\arguments{
-\item{these_samples_metadata}{User must provide a metadata table to restrict the data to the samples in your table.
-The metadata also ensures the proper handling of duplicate sample_id across seq_types and ensures the
-seq_type in the metadata faithfully represents the seq_type of the data}
-
-\item{projection}{Desired genome coordinate system for returned CN segments. Default is "grch37".}
-
-\item{this_seq_type}{Deprecated.}
-
-\item{...}{Additional parameters to be passed to the function.}
-}
-\value{
-A data frame with CN segments for the specified region.
-}
-\description{
-Retrieve all copy number segments from the GAMBL outputs
-}
-\details{
-This function merely loads and returns all the seg_data available for a projection (genome build)
-}
-\examples{
-# Example for the capture samples:
-
-genome_metadata = GAMBLR.data::get_gambl_metadata(seq_type_filter="genome") 
-                      
-genome_segments_hg38 = get_cn_segments(
-                             these_samples_metadata = genome_metadata,
-                             projection="hg38")
-
-
-}
diff --git a/man/get_coding_ssm.Rd b/man/get_coding_ssm.Rd
deleted file mode 100644
index 9c0afdd..0000000
--- a/man/get_coding_ssm.Rd
+++ /dev/null
@@ -1,75 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_coding_ssm.R
-\name{get_coding_ssm}
-\alias{get_coding_ssm}
-\title{Get Coding SSMs}
-\usage{
-get_coding_ssm(
-  these_sample_ids = NULL,
-  these_samples_metadata = NULL,
-  projection = "grch37",
-  this_seq_type = "genome",
-  tool_name = "slms-3",
-  min_read_support = 3,
-  include_silent = TRUE,
-  verbose = FALSE,
-  ...
-)
-}
-\arguments{
-\item{these_sample_ids}{Optional, a vector of multiple sample_id (or a single
-sample ID as a string) that you want results for.}
-
-\item{these_samples_metadata}{Optional, a metadata table (with sample IDs in
-a column) to subset the return to. If not provided (and if
-\code{these_sample_ids} is not provided), the function will return all
-samples from the specified seq_type in the metadata.}
-
-\item{projection}{Reference genome build for the coordinates in the MAF file.
-The default is grch37.}
-
-\item{this_seq_type}{The this_seq_type you want back, default is genome.}
-
-\item{tool_name}{Optionally specify which tool to report variant from. The
-default is slms-3, also supports "publication" to return the exact
-variants as reported in the original papers.}
-
-\item{min_read_support}{Only returns variants with at least this many reads
-in t_alt_count.}
-
-\item{include_silent}{Logical parameter indicating whether to include silent
-mutations into coding mutations. Default is TRUE.}
-
-\item{verbose}{Set to FALSE to minimize the output to console. Default is
-TRUE. This parameter also dictates the verbosity of any helper function
-internally called inside the main function.}
-
-\item{...}{Any additional parameters.}
-}
-\value{
-data frame
-}
-\description{
-Convenience function for loading coding Simple Somatic Mutations
-(SSM) from the bundled data \link{sample_data}.
-}
-\details{
-This "bare bones" function was developed to retrieve coding SSM
-calls for non-GSC-users. Effectively retrieve coding SSM calls. Multiple
-filtering parameters are available for this function. For more
-information on how to implement the filtering parameters, refer to the
-parameter descriptions as well as examples in the vignettes. This
-function depends on the bundled sample data in this package.
-}
-\examples{
-
- # Get mutations from exome data originally aligned to grch37
-ssm_exomes_grch37 = get_coding_ssm(projection = "grch37",this_seq_type = "capture")
-
-# Get mutations from genome data, hg38 build
-ssm_genomes_hg38 = get_coding_ssm(projection = "hg38",this_seq_type = "genome")
-
-
-
-
-}
diff --git a/man/get_coding_ssm_status.Rd b/man/get_coding_ssm_status.Rd
deleted file mode 100644
index 458c407..0000000
--- a/man/get_coding_ssm_status.Rd
+++ /dev/null
@@ -1,97 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_coding_ssm_status.R
-\name{get_coding_ssm_status}
-\alias{get_coding_ssm_status}
-\title{Get Coding SSM Status.}
-\usage{
-get_coding_ssm_status(
-  gene_symbols,
-  these_samples_metadata,
-  maf_data,
-  include_hotspots = TRUE,
-  keep_multihit_hotspot = FALSE,
-  review_hotspots = TRUE,
-  genes_of_interest = c("FOXO1", "MYD88", "CREBBP"),
-  genome_build,
-  include_silent = FALSE,
-  include_silent_genes,
-  ...
-)
-}
-\arguments{
-\item{gene_symbols}{A vector of gene symbols for which the mutation status
-will be tabulated. If not provided, lymphoma genes will be returned
-by default.}
-
-\item{these_samples_metadata}{The metadata for samples of interest to be
-included in the returned matrix. Only the column "sample_id" is
-required. If not provided, the example metadata is used as default.}
-
-\item{maf_data}{data frame in maf format. Must be in the grch37 projection.}
-
-\item{include_hotspots}{Logical parameter indicating whether hotspots object
-should also be tabulated. Default is TRUE.}
-
-\item{keep_multihit_hotspot}{Logical parameter indicating whether to keep the
-gene annotation as mutated when the gene has both hot spot and
-non-hotspot mutation. Default is FALSE. If set to TRUE, will report the
-number of non-hotspot mutations instead of tabulating for just mutation
-presence.}
-
-\item{review_hotspots}{Logical parameter indicating whether hotspots object
-should be reviewed to include functionally relevant mutations or rare
-lymphoma-related genes. Default is TRUE.}
-
-\item{genes_of_interest}{A vector of genes for hotspot review. Currently only
-FOXO1, MYD88, and CREBBP are supported.}
-
-\item{genome_build}{Reference genome build for the coordinates in the MAF
-file. The default is inferred from maf_data.}
-
-\item{include_silent}{Logical parameter indicating whether to include silent
-mutations into coding mutations. Default is FALSE.}
-
-\item{include_silent_genes}{Optionally, provide a list of genes for which the
-Silent variants to be considered. If provided, the Silent variants for
-these genes will be included regardless of the include_silent argument.}
-
-\item{...}{Any other parameter. These parameters will be ignored.}
-}
-\value{
-A data frame with tabulated mutation status.
-}
-\description{
-Tabulate mutation status (SSM) for a set of genes.
-}
-\details{
-This function takes a data frame (in MAF-like format) and converts
-it to a binary one-hot encoded matrix of mutation status for either a set of
-user-specified genes (via gene_symbols) or, if no genes are provided, default
-to all lymphoma genes. The default behaviour is to assign each gene/sample_id
-combination as mutated only if there is a protein coding mutation for that
-sample in the MAF but this can be configured to use synonymous variants in
-some (via include_silent_genes) or all (via include_silent) genes.
-This function also has other filtering and convenience parameters giving
-the user full control of the return. For more information, refer to the
-parameter descriptions and examples.
-Currently only the grch37 genome build is supported for hotspot annotation
-and review for this version of the function.
-}
-\examples{
-coding_tabulated_df = get_coding_ssm_status(
- maf_data = get_coding_ssm(),
- gene_symbols = c("EZH2","KMT2D","CREBBP","MYC")
-)
-
-
-
-#all lymphoma genes from bundled NHL gene list
-coding_tabulated_df = get_coding_ssm_status()
-
-#this example will fail because hg38 is not supported by this function (yet)
-coding_tabulated_df = get_coding_ssm_status(maf_data=
-                        get_coding_ssm(projection = "hg38"))
-# Error in get_coding_ssm_status(maf_data = get_coding_ssm(projection = "hg38")) : 
-# Currently only grch37 projection (hg19 genome build) is supported.
-
-}
diff --git a/man/get_gambl_metadata.Rd b/man/get_gambl_metadata.Rd
deleted file mode 100644
index 664fac3..0000000
--- a/man/get_gambl_metadata.Rd
+++ /dev/null
@@ -1,79 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_gambl_metadata.R
-\name{get_gambl_metadata}
-\alias{get_gambl_metadata}
-\title{Get GAMBL Metadata.}
-\usage{
-get_gambl_metadata(seq_type_filter = "genome", case_set, ...)
-}
-\arguments{
-\item{seq_type_filter}{Specify the seq type you want to return metadata for.
-Default is "genome".}
-
-\item{case_set}{Optionally specify study details to return samples from a
-particular case set. See function description for supported case sets.}
-
-\item{...}{Any additional parameters.}
-}
-\value{
-A data frame with metadata, tailored for user without GSC access.
-
-\describe{
-\item{compression}{Format of the original data used as input for our analysis pipelines (cram, bam or fastq)}
-\item{bam_available}{Whether or not this file was available when last checked.}
-\item{patient_id}{The anonymized unique identifier for this patient. For BC samples, this will be Res ID.}
-\item{sample_id}{A unique identifier for the sample analyzed.}
-\item{seq_type}{The assay type used to produce this data (one of "genome","capture, "mrna", "promethION")}
-\item{genome_build}{The name of the genome reference the data were aligned to.}
-\item{cohort}{Name for a group of samples that were added together (usually from a single study), often in the format {pathology}_{cohort_descriptor}.}
-\item{pathology}{The diagnosis or pathology for the sample}
-\item{time_point}{Timing of biopsy in increasing alphabetical order (A = diagnosis, B = first relapse etc)}
-\item{ffpe_or_frozen}{Whether the nucleic acids were extracted from a frozen or FFPE sample}
-\item{COO_consensus}{Consensus call of COO between different sources.}
-\item{DHITsig_consensus}{Consensus call of DHIT signature status between different sources.}
-\item{EBV_status_inf}{Inferred EBV status of the tumor}
-\item{lymphgen_no_cnv}{LymphGen label using model without CNV}
-\item{lymphgen_with_cnv}{LymphGen label using model with CNV}
-\item{lymphgen_cnv_noA53}{LymphGen label using model with CNV but excluding A53 class}
-\item{lymphgen_wright}{The LymphGen call for this sample from Wright et all (if applicable)}
-\item{fl_grade}{Grade of FL samples}
-\item{normal_sample_id}{Sample id for normal tissue used in the analysis}
-\item{pairing_status}{Matching status of the sample}
-\item{lymphgen}{LymphGen label}
-\item{molecular_BL}{label of the sample according to the molecular BL classifier}
-\item{Tumor_Sample_Barcode}{Duplicate of sample_id for simplifying joins to MAF data frames}
-\item{pathology_rank}{Numeric rank for consistent ordering of samples by pathology}
-\item{hiv_status}{HIV status of the sample}
-\item{age_group}{Adult_BL or Pediatric_BL or Other, specific to the BLGSP study}
-\item{sex}{The biological sex of the patient, if available. Allowable options: M, F, NA}
-}
-}
-\description{
-Convenience function for loading the sample metadata.
-}
-\details{
-This bare bones function was developed to retrieve metadata for
-non-GSC-users. Specify the seq type (\code{seq_type_filter}) for the samples you
-want returned as the only argument.
-It relies on the bundled metadata in this package.
-Specify \code{case_set} argument to retreive samples from particular study.
-Currently supported case_sets are: FL_Dreval (FL samples from Dreval et al),
-DLBCL_Dreval (DLBCL samples from Dreval et al), FL-DLBCL-study (all samples
-from Dreval et al), DLBCL_Arthur (all samples from Arthur et al study),
-DLBCL_Hilton (all samples from Hilton et al DLBCL Trios study),
-DLBCL_cell_lines (5 DLBCL cell lines), DLBCL_Chapuy (all samples from Chapuy
-et al study), DLBCL_Schmitz (all samples from Schmitz et al study),
-DLBCL_Reddy (all samples from Reddy et al study), DLBCL_Thomas (HTMCP DLBCLs
-from Thomas et al study), BL_Thomas (BL samples from Thomas et al study)
-}
-\examples{
-#return metadata for genome samples
-genome_meta = get_gambl_metadata(seq_type_filter = "genome")
-
-#return metadata for capture samples
-capture_meta = get_gambl_metadata(seq_type_filter = "capture")
-
-#return metadata for genome and capture samples
-all_meta = get_gambl_metadata(seq_type_filter = c("genome", "capture"))
-
-}
diff --git a/man/get_genome_build.Rd b/man/get_genome_build.Rd
deleted file mode 100644
index 6a40f4e..0000000
--- a/man/get_genome_build.Rd
+++ /dev/null
@@ -1,17 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/genomic_data.R
-\name{get_genome_build}
-\alias{get_genome_build}
-\title{Get Genome Build}
-\usage{
-get_genome_build(data)
-}
-\arguments{
-\item{data}{A data frame with genome build attribute.}
-}
-\value{
-A string specifying the genome build.
-}
-\description{
-This function retrieves the genome build attribute from the data.
-}
diff --git a/man/get_manta_sv.Rd b/man/get_manta_sv.Rd
deleted file mode 100644
index 1f71895..0000000
--- a/man/get_manta_sv.Rd
+++ /dev/null
@@ -1,84 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_manta_sv.R
-\name{get_manta_sv}
-\alias{get_manta_sv}
-\title{Get Manta SVs}
-\usage{
-get_manta_sv(
-  these_sample_ids = NULL,
-  these_samples_metadata = NULL,
-  projection = "grch37",
-  this_seq_type = "genome",
-  chromosome,
-  qstart,
-  qend,
-  region,
-  pairing_status,
-  min_vaf = 0.1,
-  min_score = 40,
-  pass = TRUE,
-  verbose = FALSE,
-  ...
-)
-}
-\arguments{
-\item{these_sample_ids}{Optional, a vector of multiple sample_id (or a single sample ID as a string) that you want results for.}
-
-\item{these_samples_metadata}{Optional, a metadata table (with sample IDs in a column) to subset the return to.
-If not provided (and if \code{these_sample_ids} is not provided), the function will return all samples from the specified seq_type in the metadata.}
-
-\item{projection}{The projection genome build. Default is grch37.}
-
-\item{this_seq_type}{The this_seq_type you want back, default is genome.}
-
-\item{chromosome}{Optional, the chromosome you are restricting to (can be prefixed or not prefixed).}
-
-\item{qstart}{Optional, query start coordinate of the range you are restricting to.}
-
-\item{qend}{Optional, query end coordinate of the range you are restricting to.}
-
-\item{region}{Optional, region formatted like chrX:1234-5678 (chromosome can be prefixed or not prefixed) instead of specifying chromosome, start and end separately.}
-
-\item{pairing_status}{Use to restrict results (if desired) to matched or unmatched results (default is to return all). This parameter takes the filtering condition as a string ("matched" or "unmatched").}
-
-\item{min_vaf}{The minimum tumour VAF for a SV to be returned. Default is 0.1.}
-
-\item{min_score}{The lowest Manta somatic score for a SV to be returned. Default is 40.}
-
-\item{pass}{If TRUE (default) only return SVs that are annotated with PASS in the FILTER column. Set to FALSE to keep all variants, regardless if they PASS the filters.}
-
-\item{verbose}{Set to FALSE to minimize the output to console. Default is TRUE. This parameter also dictates the verbosity of any helper function internally called inside the main function.}
-
-\item{...}{Any additional parameters.}
-}
-\description{
-Convenience function for retrieving Manta Structural Variants (SVs) from the bundled data \link{sample_data}.
-}
-\details{
-To obtain SV calls for multiple samples, give \code{these_sample_ids} a vector of sample IDs.
-Alternatively, the user can also provide the \code{these_samples_metadata} parameter to make use of an already subset metadata table.
-In this case, the returned SVs will be restricted to the sample_ids within that data frame.
-This function internally calls \link{id_ease} to streamline sample ID/metadata parameters.
-This function can also restrict the returned calls to any genomic regions specified within \code{chromosome}, \code{qstart}, \code{qend},
-or the complete region specified under \code{region} (in chr:start-end format), note that chromosome can be either prefixed or not prefixed.
-Useful filtering parameters are also available, use \code{min_vaf} to set the minimum tumour VAF for a SV to be returned and \code{min_score}
-to set the lowest Manta somatic score for a SV to be returned. \code{pair_status} can be used to return variants from either matched or unmatched samples.
-In addition, the user can chose to return all variants, even the ones not passing the filter criteria. To do so, set \code{pass = FALSE} (default is TRUE).
-}
-\examples{
-#load packages
-library(dplyr)
-
-#lazily get every SV in the table with default quality filters
-all_sv = get_manta_sv()
-
-#get all SVs DLBCL cell line samples
-cell_line_meta = GAMBLR.data::sample_data$meta \%>\% 
-  dplyr::filter(cohort == "DLBCL_cell_lines")
-  
-dlbcl_sv = get_manta_sv(these_samples_metadata = cell_line_meta)
-
-#get the SVs in a region around MYC
-myc_locus_sv = get_manta_sv(region = "8:128723128-128774067")
-
-}
diff --git a/man/get_sample_cn_segments.Rd b/man/get_sample_cn_segments.Rd
deleted file mode 100644
index df0959c..0000000
--- a/man/get_sample_cn_segments.Rd
+++ /dev/null
@@ -1,65 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_sample_cn_segments.R
-\name{get_sample_cn_segments}
-\alias{get_sample_cn_segments}
-\title{Get Sample CN Segments.}
-\usage{
-get_sample_cn_segments(
-  these_sample_ids = NULL,
-  these_samples_metadata = NULL,
-  projection = "grch37",
-  this_seq_type = "genome",
-  with_chr_prefix = FALSE,
-  streamlined = FALSE,
-  verbose = FALSE,
-  ...
-)
-}
-\arguments{
-\item{these_sample_ids}{Optional, a vector of multiple sample_id (or a single sample ID as a string) that you want results for.}
-
-\item{these_samples_metadata}{Optional, a metadata table (with sample IDs in a column) to subset the return to.
-If not provided (and if \code{these_sample_ids} is not provided), the function will return all samples from the specified seq_type in the metadata.}
-
-\item{projection}{Selected genome projection for returned CN segments. Default is "grch37".}
-
-\item{this_seq_type}{Seq type for returned CN segments. Default is genome.}
-
-\item{with_chr_prefix}{Set to TRUE to add a chr prefix to chromosome names. Default is FALSE.}
-
-\item{streamlined}{Return a minimal output rather than full details. Default is FALSE.}
-
-\item{verbose}{Set to FALSE to minimize the output to console. Default is TRUE. This parameter also dictates the verbosity of any helper function internally called inside the main function.}
-
-\item{...}{Any additional parameters.}
-}
-\value{
-A data frame of segments for a specific or multiple sample ID(s).
-}
-\description{
-Get all segments for a single (or multiple) sample_id(s).
-}
-\details{
-This function returns CN segments. This works for single sample or multiple samples.
-Specify the sample IDs you are interested in with \code{these_sample_ids} (as a vector of characters),
-Or call this function with \code{these_samples_metadata} if you already have a metadata table subset to the sample IDs of interest.
-If none of the above parameters are specified, the function will return CN segments for available samples (from get_gambl_metadata).
-Note, this. function internally calls \link{id_ease} for dealing with sample IDs and metadata tables.
-}
-\examples{
-#load pacakges
-library(dplyr)
-
-#get CN segments for one sample
-dohh2_segs = get_sample_cn_segments(these_sample_ids = "DOHH-2",
-                                    projection = "hg38", 
-                                    streamlined = TRUE)
-
-#get CN segments for DLBCL cell line
-cell_line_meta = GAMBLR.data::sample_data$meta \%>\% 
-  dplyr::filter(cohort == "DLBCL_cell_lines")
-  
-dlbcl_segs = get_sample_cn_segments(these_samples_metadata = cell_line_meta, 
-                                    streamlined = TRUE)
-
-}
diff --git a/man/get_ssm_by_patients.Rd b/man/get_ssm_by_patients.Rd
deleted file mode 100644
index b24582c..0000000
--- a/man/get_ssm_by_patients.Rd
+++ /dev/null
@@ -1,67 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_ssm_by_patients.R
-\name{get_ssm_by_patients}
-\alias{get_ssm_by_patients}
-\title{Get SSM By Patients.}
-\usage{
-get_ssm_by_patients(
-  these_patient_ids,
-  these_samples_metadata,
-  projection = "grch37",
-  this_seq_type = "genome",
-  tool_name = "slms-3",
-  this_study,
-  verbose = FALSE,
-  ...
-)
-}
-\arguments{
-\item{these_patient_ids}{A vector of patient IDs that you want results for.
-The user can also use a metadata table that has been subset to the patient IDs of interest (see \code{these_samples_metadata}).}
-
-\item{these_samples_metadata}{A metadata subset to contain the rows corresponding to the patients of interest.
-If the vector of patient IDs is missing (\code{these_patient_ids}), this function will default to all patient IDs in the metadata table given to this parameter.}
-
-\item{projection}{Obtain variants projected to this reference (one of grch37 or hg38). Default is grch37.}
-
-\item{this_seq_type}{The seq type you want results for. Default is "genome".}
-
-\item{tool_name}{Optionally specify which tool to report variant from. The default is slms-3, also supports "publication" to return the exact variants as reported in the original papers.}
-
-\item{this_study}{Optionally specify first name of the author for the paper
-from which the variants should be returned for.
-This parameter can either be a vector of indexes (integer) or a vector of characters (matching columns in MAF).}
-
-\item{verbose}{Set to FALSE to minimize the output to console. Default is TRUE. This parameter also dictates the verbosity of any helper function internally called inside the main function.}
-
-\item{...}{Any additional parameters.}
-}
-\value{
-A data frame with SSM calls for the selected patients in MAF format.
-}
-\description{
-Get MAF-format data frame for more than one patient.
-}
-\details{
-This function returns variants from a set of patients.
-This function internally calls \link{get_ssm_by_samples}.
-Thus, the main contents of this function is to wrangle the provided patient IDs,
-so that the corresponding sample IDs can be provided to the internal call of \code{get_ssm_by_samples}.
-This function expects either a vector of patient IDs (\code{these_patients_ids})
-or an already subset metadata table (\code{these_samples_metadata}).
-}
-\examples{
-#load packages
-library(dplyr)
-
-#basic usage, these_patient_ids
-my_patient = get_ssm_by_patients(these_patient_ids = "DOHH-2")
-
-#using a subset metadata tablee to retreive patient SSMs
-cell_line_meta = GAMBLR.data::sample_data$meta \%>\%
- dplyr::filter(cohort == "DLBCL_cell_lines")
-
-patient_maf = get_ssm_by_patients(these_samples_metadata = cell_line_meta,
-                                  this_seq_type = "genome")
-
-}
diff --git a/man/get_ssm_by_region.Rd b/man/get_ssm_by_region.Rd
deleted file mode 100644
index e25a627..0000000
--- a/man/get_ssm_by_region.Rd
+++ /dev/null
@@ -1,80 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_ssm_by_region.R
-\name{get_ssm_by_region}
-\alias{get_ssm_by_region}
-\title{Get SSM By Region.}
-\usage{
-get_ssm_by_region(
-  these_sample_ids = NULL,
-  these_samples_metadata = NULL,
-  maf_data,
-  chromosome,
-  qstart,
-  qend,
-  region = "",
-  streamlined = FALSE,
-  projection = "grch37",
-  this_seq_type = "genome",
-  tool_name = "slms-3",
-  this_study,
-  verbose = FALSE,
-  ...
-)
-}
-\arguments{
-\item{these_sample_ids}{Optional, a vector of multiple sample_id (or a single sample ID as a string) that you want results for.}
-
-\item{these_samples_metadata}{Optional, a metadata table (with sample IDs in a column) to subset the return to.
-If not provided (and if \code{these_sample_ids} is not provided), the function will return all samples from the specified seq_type in the metadata.}
-
-\item{maf_data}{Optional data frame with mutations in MAF format.
-If user provides a maf, the function trusts that the user has already subset this to samples of interest, correct seq_type.
-i.e the following parameters are ignored; \code{these_samples_metadata}, \code{these_sample_ids}, and \code{this_seq_type}}
-
-\item{chromosome}{The chromosome you are restricting to (with or without a chr prefix).}
-
-\item{qstart}{Query start coordinate of the range you are restricting to.}
-
-\item{qend}{Query end coordinate of the range you are restricting to.}
-
-\item{region}{Region formatted like chrX:1234-5678 instead of specifying chromosome, start and end separately.}
-
-\item{streamlined}{Return Start_Position and Tumor_Smaple_Barcode as the only two MAF columns. Default is FALSE.}
-
-\item{projection}{Obtain variants projected to this reference (one of grch37 or hg38).}
-
-\item{this_seq_type}{The seq_type you want back, default is genome.}
-
-\item{tool_name}{Optionally specify which tool to report variant from. The default is slms-3, also supports "publication" to return the exact variants as reported in the original papers.}
-
-\item{this_study}{Optionally specify first name of the author for the paper
-from which the variants should be returned for.}
-
-\item{verbose}{Set to FALSE to prevent ANY message to be printed.
-In most cases, this parameter should be left to TRUE.
-The parameter was added to accommodate for noisy output
-when running this function in a loop for retrieving SSM
-for multiple regions \link{get_ssm_by_regions}.}
-
-\item{...}{Any additional parameters.}
-}
-\value{
-A data frame containing all mutations (MAF) in the specified region.
-}
-\description{
-Retrieve all SSMs from the GAMBL database within a single genomic coordinate range.
-}
-\details{
-This function lets the user specify a region of interest for returning SSM calls within that region.
-There are multiple ways a region can be specified. For example, the user can provide the full region in a "region" format (chr:start-end) to the \code{region} parameter.
-Or, the user can provide chromosome, start and end coordinates individually with \code{chr}, \code{start}, and \code{end} parameters.
-}
-\examples{
-my_mutations = get_ssm_by_region(region = "chr8:128,723,128-128,774,067")
-
-#specifying chromosome, start and end individually
-my_mutations = get_ssm_by_region(chromosome = "8",
-                                 qstart = 128723128,
-                                 qend = 128774067)
-
-}
diff --git a/man/get_ssm_by_regions.Rd b/man/get_ssm_by_regions.Rd
deleted file mode 100644
index f36a948..0000000
--- a/man/get_ssm_by_regions.Rd
+++ /dev/null
@@ -1,69 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_ssm_by_regions.R
-\name{get_ssm_by_regions}
-\alias{get_ssm_by_regions}
-\title{Get SSM By Regions.}
-\usage{
-get_ssm_by_regions(
-  these_samples_metadata,
-  regions_list,
-  regions_bed,
-  this_seq_type = "genome",
-  streamlined = TRUE,
-  projection = "grch37",
-  verbose = FALSE,
-  tool_name = "slms-3",
-  ...
-)
-}
-\arguments{
-\item{these_samples_metadata}{Optional, a metadata table (with sample IDs in a column) to subset the return to.}
-
-\item{regions_list}{A vector of regions in the chr:start-end format to restrict the returned SSM calls to.}
-
-\item{regions_bed}{A data frame in BED format with the coordinates you want to retrieve (recommended).
-This parameter can also accept an additional column with region names that will be added to the return if \code{use_name_column = TRUE}}
-
-\item{this_seq_type}{The this_seq_type you want back, default is genome.}
-
-\item{streamlined}{If set to TRUE (default) only 3 columns will be kept in the returned data frame (start, sample_id and region_name).}
-
-\item{projection}{Obtain variants projected to this reference (one of grch37 or hg38), default is grch37.}
-
-\item{verbose}{Set to TRUE to maximize the output to console. Default is TRUE.
-This parameter also dictates the verbosity of any helper function internally called inside the main function.}
-
-\item{tool_name}{Optionally specify which tool to report variant from. The default is slms-3, also supports "publication" to return the exact variants as reported in the original papers.}
-
-\item{...}{Any additional parameters.}
-}
-\value{
-Returns a data frame of variants in MAF-like format.
-}
-\description{
-Efficiently retrieve all mutations across a range of genomic regions.
-}
-\details{
-This function internally calls get_ssm_by_region to retrieve SSM calls for the specified regions.
-}
-\examples{
-#basic usage, adding custom names from bundled ashm data frame
-regions_bed = create_bed_data( GAMBLR.data::grch37_ashm_regions,
-                          fix_names = "concat",
-                          concat_cols = c("gene","region"),
-                          sep="-")
-
-my_meta = get_gambl_metadata()
-# get a full MAF-format data frame for all aSHM regions on grch37 coordinates
-ashm_maf = get_ssm_by_regions(regions_bed = regions_bed,
-                                        these_samples_metadata = my_meta,
-                                        streamlined = FALSE)
-
-# This example intentionally fails
-ashm_maf = get_ssm_by_regions(regions_bed = regions_bed,
-                              these_samples_metadata = my_meta,
-                               projection="hg38")
-# Error in get_ssm_by_regions(regions_bed = regions_bed, these_samples_metadata = my_meta,  : 
-# requested projection: hg38 and genome_build of regions_bed: grch37 don't match
-
-}
diff --git a/man/get_ssm_by_samples.Rd b/man/get_ssm_by_samples.Rd
deleted file mode 100644
index d975111..0000000
--- a/man/get_ssm_by_samples.Rd
+++ /dev/null
@@ -1,54 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/get_ssm_by_samples.R
-\name{get_ssm_by_samples}
-\alias{get_ssm_by_samples}
-\title{Get SSM By Samples.}
-\usage{
-get_ssm_by_samples(
-  these_sample_ids = NULL,
-  these_samples_metadata = NULL,
-  this_seq_type = "genome",
-  projection = "grch37",
-  tool_name = "slms-3",
-  verbose = FALSE,
-  ...
-)
-}
-\arguments{
-\item{these_sample_ids}{A vector of one or more sample IDs that you want results for.}
-
-\item{these_samples_metadata}{Optional, a metadata table (with sample IDs in a column) to auto-subset the data to samples in that table before returning.
-If not provided and these_sample_ids is also not provided, the function will return SSM for all samples from the specified seq_type in the bundled metadata.}
-
-\item{this_seq_type}{Default is genome.}
-
-\item{projection}{The projection genome build. Supports hg38 and grch37.}
-
-\item{tool_name}{Optionally specify which tool to report variant from. The default is slms-3, also supports "publication" to return the exact variants as reported in the original papers.}
-
-\item{verbose}{Enable for debugging/noisier output.}
-
-\item{...}{Any additional parameters.}
-}
-\value{
-data frame in MAF format.
-}
-\description{
-Get the SSMs (i.e. load MAF) for a single sample or a collection of samples.
-}
-\details{
-Retrieve a maf for a specific sample or a set of samples.
-Either specify the sample IDs of interest with \code{these_sample_ids}.
-Or a metadata table subset to the sample IDs of interest with \code{these_samples_metadata}.
-}
-\examples{
-#load a common dependency
-library(dplyr)
-
-#Get genome-wide set of mutations from all DLBCL cell lines
-cell_line_meta = get_gambl_metadata() \%>\% 
-  dplyr::filter(cohort == "DLBCL_cell_lines")
-
-dlbcl_maf = get_ssm_by_samples(these_samples_metadata = cell_line_meta)
-
-}
diff --git a/man/id_ease.Rd b/man/id_ease.Rd
deleted file mode 100644
index 7ee88e9..0000000
--- a/man/id_ease.Rd
+++ /dev/null
@@ -1,61 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/id_ease.R
-\name{id_ease}
-\alias{id_ease}
-\alias{id_ease,}
-\alias{id}
-\alias{ease}
-\title{ID Ease}
-\usage{
-id_ease(
-  these_samples_metadata = NULL,
-  these_sample_ids = NULL,
-  this_seq_type = c("genome", "capture"),
-  verbose = FALSE
-)
-}
-\arguments{
-\item{these_samples_metadata}{An optional data frame with metadata, subset to sample IDs of interest.
-If not provided will retrieve GAMBL metadata for all available samples.}
-
-\item{these_sample_ids}{Optional character vector of GAMBL sample IDs.}
-
-\item{this_seq_type}{The seq type of interest. Default is both genome and exome, with priority for genome when a sample has >1 seq_type.}
-
-\item{verbose}{Set to FALSE to limit the information that gets printed to the console. Default is FALSE.}
-}
-\value{
-Metadata (data frame).
-}
-\description{
-Internal convenience function that standardize the way GAMBLR functions deals with sample IDs (these_sample_ids)
-and metadata (these_samples_metadata).
-}
-\details{
-This function can take sample IDs as a vector of characters, or a metadata table in data frame format.
-If no sample IDs are provided to the function, the function will operate on all gambl sample IDs available for the given seq type.
-It is highly recommended to run this function with \code{verbose = TRUE}.
-Since this will not only improve the overall logic on how the function operates.
-But also might help with debugging functions that are internally calling this function.
-The function also performs sanity checks and notifies the user if any of the requested sample IDs are not found in the metadata.
-In addition, the function also notifies the dimensions of the returned object, providing further insight to what is returned.
-As with all GAMBLR functions, providing a curated metadata table to any GAMBLR function (as opposed to a vector of IDs) is the safest way to ensure you get the expected result.
-}
-\examples{
-#load packages
-library(dplyr)
-
-#give the function nothing (i.e return all sample IDs in the metadata for the default seq type)
-#return metadata for all samples in the default seq type
-all_meta = id_ease()
-
-#return metadata based on a sample ID
-sample_meta = id_ease(these_sample_ids = "94-15772_tumorA")
-
-#return sample IDs based on an already filtered metadata
-this_metadata = get_gambl_metadata(seq_type_filter = "genome") \%>\% 
-  head(5)
-
-these_ids = id_ease(these_samples_metadata = this_metadata)
-
-}
diff --git a/man/preserve_genomic_attributes.Rd b/man/preserve_genomic_attributes.Rd
deleted file mode 100644
index 45ed20b..0000000
--- a/man/preserve_genomic_attributes.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/genomic_data.R
-\name{preserve_genomic_attributes}
-\alias{preserve_genomic_attributes}
-\title{Preserve Genomic Attributes}
-\usage{
-preserve_genomic_attributes(new_data, old_data)
-}
-\arguments{
-\item{new_data}{A data frame resulting from dplyr operations.}
-
-\item{old_data}{The original data frame with genomic attributes.}
-}
-\value{
-A data frame with preserved genomic attributes.
-}
-\description{
-This function preserves the genomic attributes and class after dplyr operations.
-}
diff --git a/man/process_regions.Rd b/man/process_regions.Rd
deleted file mode 100644
index 4dc42e3..0000000
--- a/man/process_regions.Rd
+++ /dev/null
@@ -1,57 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/process_regions.R
-\name{process_regions}
-\alias{process_regions}
-\title{Process Regions objects.}
-\usage{
-process_regions(
-  regions_list = NULL,
-  regions_bed = NULL,
-  region_padding = 0,
-  skip_regions = NULL,
-  only_regions = NULL,
-  projection = "grch37",
-  sort = FALSE
-)
-}
-\arguments{
-\item{regions_list}{Character vector of genomic regions. If neither regions nor regions_df is specified, will use GAMBLR aSHM regions}
-
-\item{regions_bed}{Data frame of genomic regions with column names "chrom", "start", "end", "name"}
-
-\item{region_padding}{Amount to pad the start and end coordinates by. The default is 0 (no padding).}
-
-\item{skip_regions}{Character vector of genes to drop from GAMBLR aSHM regions.}
-
-\item{only_regions}{Character vector of genes to include from GAMBLR aSHM regions.}
-
-\item{projection}{Specify which genome build projection to use. The default is "grch37", also accepts "hg38".}
-
-\item{sort}{Set to TRUE to force regions_bed to be ordered on chromosome and coordinate}
-}
-\value{
-A list with two objects, regions as a vector and in bed format.
-}
-\description{
-INTERNAL FUNCTION to harmonize genomic regions specified as character vectors or data frames.
-}
-\details{
-INTERNAL FUNCTION to harmonize genomic regions specified as character vectors or data frames.
-}
-\examples{
-library(dplyr)
-
-regions <- setNames(
-     c("chr1:10000-15000", "chr1:100000000-100005000"),
-     c("one_region", "another_region")
-)
-process_regions(regions_list = regions)
-
-reg_bed = GAMBLR.data::grch37_ashm_regions \%>\%
-dplyr::filter(chr_name == "chr17") \%>\%
-  mutate(name = region, chrom = chr_name, start = hg19_start, end = hg19_end) \%>\%
-  select(chrom, start, end, name)
-
-process_regions(regions_bed = reg_bed)
-
-}
diff --git a/man/region_to_chunks.Rd b/man/region_to_chunks.Rd
deleted file mode 100644
index a4ab176..0000000
--- a/man/region_to_chunks.Rd
+++ /dev/null
@@ -1,24 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/region_to_chunks.R
-\name{region_to_chunks}
-\alias{region_to_chunks}
-\title{Separate a chromosome region into chunks}
-\usage{
-region_to_chunks(region)
-}
-\arguments{
-\item{region}{A single string that stores a chromosome region. Any format like
-"chr1:100000-200000", "1:100000-200000", "chr1:100'000-200'000" is possible.}
-}
-\value{
-A list with length 3 and names "chromosome", "start" and "end.
-}
-\description{
-\code{region_to_chunks} breaks the input string that stores a chromosome
-region to create a list with chromosome number and start and end positions as
-separated elements.
-}
-\examples{
-region_to_chunks(region = "chr1:100000-200000")
-
-}
diff --git a/man/review_hotspots.Rd b/man/review_hotspots.Rd
deleted file mode 100644
index f0f253e..0000000
--- a/man/review_hotspots.Rd
+++ /dev/null
@@ -1,36 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/review_hotspots.R
-\name{review_hotspots}
-\alias{review_hotspots}
-\title{Review Hotspots.}
-\usage{
-review_hotspots(
-  annotated_maf,
-  genes_of_interest = c("FOXO1", "MYD88", "CREBBP", "NOTCH1", "NOTCH2", "CD79B", "EZH2"),
-  genome_build
-)
-}
-\arguments{
-\item{annotated_maf}{A data frame in MAF format that has hotspots annotated using the function annotate_hotspots().}
-
-\item{genes_of_interest}{A vector of genes for hotspot review. Currently only FOXO1, MYD88, CREBBP, NOTCH1, NOTCH2, CD79B and EZH2 are supported.}
-
-\item{genome_build}{Reference genome build for the coordinates in the MAF file. The default is grch37 genome build.}
-}
-\value{
-The same data frame (as given to the \code{annotated_maf} parameter) with the reviewed column "hot_spot".
-}
-\description{
-Annotate MAF-like data frome with a hot_spot column indicating recurrent mutations.
-}
-\details{
-This function takes an annotated MAF (with \link{annotate_hotspots}) and updates an existing column, "hot_spot", in the same data frame.
-Genes for hotspot review are supplied with the \code{genes_of_interest} parameter.
-Currently only a few sets of genes are supported, see parameter description for more information and limitations.
-The desired genome build can be specified with \code{genome_build} parameter. Should be the same as the incoming MAF.
-}
-\examples{
-hot_ssms = review_hotspots(annotate_hotspots(get_coding_ssm(this_seq_type = "genome")),
-                           genes_of_interest = c("CREBBP"))
-
-}
diff --git a/man/strip_genomic_classes.Rd b/man/strip_genomic_classes.Rd
deleted file mode 100644
index 5c08846..0000000
--- a/man/strip_genomic_classes.Rd
+++ /dev/null
@@ -1,23 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/genomic_data.R
-\name{strip_genomic_classes}
-\alias{strip_genomic_classes}
-\title{Strip Genomic Data Classes}
-\usage{
-strip_genomic_classes(x, classes = c("genomic_data", "maf_data", "bed_data"))
-}
-\arguments{
-\item{x}{An object, such as one of your genomic data objects.}
-
-\item{classes}{A character vector of class names to remove. The default is
-c("genomic_data", "maf_data", "bed_data").}
-}
-\value{
-The object with the specified classes removed.
-}
-\description{
-This function removes custom classes associated with genomic data objects
-(by default, "genomic_data", "maf_data", and "bed_data") from the class attribute
-of an object. This can be useful when you want to revert an S3 object to its
-underlying data.frame (or data.table) classes without converting the object.
-}