Skip to content

Commit

Permalink
Merge pull request #100 from NuttyLogic/development
Browse files Browse the repository at this point in the history
- added support for bedGraph methylation calls
  • Loading branch information
NuttyLogic authored Oct 5, 2020
2 parents 8ef732b + 92bb750 commit f448e65
Show file tree
Hide file tree
Showing 12 changed files with 61 additions and 23 deletions.
23 changes: 19 additions & 4 deletions BSBolt/CallMethylation/ProcessMethylationContigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class ProcessContigs:
* *verbose (bool)*: Verbose processing, [False]
* *cg_only (bool)*: only return CG sites to queue, [False]
* *ignore_oprhans (bool)*: ignore orphaned reads (not properly paired), [True]
* *bedgraph_output* (bool)*: output bedgraph format inplace of CGmap, [False]
Usage:
Expand All @@ -57,7 +58,8 @@ def __init__(self, input_file: str = None, genome_database: str = None, output_p
ignore_overlap: bool = True, text_output: bool = False, remove_ccgg: bool = False,
min_read_depth: int = 10, max_read_depth: int = 8000, threads: int = 1, verbose: bool = True,
min_base_quality: int = 10, min_mapping_quality: int = 10,
ATCGmap: bool = False, cg_only: bool = True, ignore_orphans: bool = False):
ATCGmap: bool = False, cg_only: bool = True, ignore_orphans: bool = False,
bedgraph_output: bool = False):
assert isinstance(input_file, str), 'Path to input file not valid'
assert isinstance(text_output, bool), 'Not valid bool'
assert isinstance(threads, int), 'Threads must be specified with integer'
Expand All @@ -84,6 +86,7 @@ def __init__(self, input_file: str = None, genome_database: str = None, output_p
cg_only=cg_only)
self.min_read_depth = min_read_depth
self.ATCGmap = ATCGmap
self.bedgraph_output = bedgraph_output
self.methylation_calling = True
self.contigs = self.get_contigs
self.completed_contigs = None
Expand Down Expand Up @@ -173,7 +176,10 @@ def write_output(self, methylation_lines: List[Tuple[Union[int, float, str]]]):
self.write_line(self.output_objects['ATCGmap'], self.format_atcg(meth_line))
# if methylation level greater than or equal to min_read_depth output CGmap and wig lines
if meth_line['all_cytosines'] >= self.min_read_depth:
self.write_line(self.output_objects['CGmap'], self.format_cgmap(meth_line))
if self.bedgraph_output:
self.write_line(self.output_objects['CGmap'], self.format_bedgraph(meth_line))
else:
self.write_line(self.output_objects['CGmap'], self.format_cgmap(meth_line))

@staticmethod
def unpack_meth_line(meth_line: Tuple[Union[int, float, str]]) -> Dict[str, Union[int, float, str]]:
Expand All @@ -199,18 +205,27 @@ def format_cgmap(meth_line: Dict[str, Union[int, float, str]]) -> str:
f'\t{meth_line["meth_cytosines"]}\t{meth_line["all_cytosines"]}\n'
return CGmap_line

@staticmethod
def format_bedgraph(meth_line: Dict[str, Union[int, float, str]]) -> str:
"""Bedgraph line formatting"""
bedgraph_line = f'{meth_line["chrom"]}\t{meth_line["pos"] - 1}\t{meth_line["pos"]}' \
f'\t{meth_line["meth_level"] * 100:.3f}\t{meth_line["meth_cytosines"]}' \
f'\t{meth_line["unmeth_cytosines"]}\n'
return bedgraph_line

@property
def get_output_objects(self):
output_objects = {}
output_path = self.input_file
output_suffix = '.CGmap' if not self.bedgraph_output else '.bg'
if self.output_prefix:
output_path = self.output_prefix
if self.text_output:
output_objects['CGmap'] = open(f'{output_path}.CGmap', 'w')
output_objects['CGmap'] = open(f'{output_path}{output_suffix}', 'w')
if self.ATCGmap:
output_objects['ATCGmap'] = open(f'{output_path}.ATCGmap', 'w')
else:
output_objects['CGmap'] = io.BufferedWriter(gzip.open(f'{output_path}.CGmap.gz', 'wb'))
output_objects['CGmap'] = io.BufferedWriter(gzip.open(f'{output_path}{output_suffix}.gz', 'wb'))
if self.ATCGmap:
output_objects['ATCGmap'] = io.BufferedWriter(gzip.open(f'{output_path}.ATCGmap.gz', 'wb'))
return output_objects
Expand Down
3 changes: 2 additions & 1 deletion BSBolt/Utils/Launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,8 @@ def launch_methylation_call(arguments):
min_mapping_quality=arguments.MQ,
cg_only=arguments.CG,
ATCGmap=arguments.ATCG,
ignore_orphans=arguments.IO)
ignore_orphans=arguments.IO,
bedgraph_output=arguments.BG)
methylation_call.process_contigs()
methylation_call.watch_pool()

Expand Down
3 changes: 2 additions & 1 deletion BSBolt/Utils/Parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from BSBolt.Utils.ParserHelpMessages import aggregate_help, alignment_help, impute_help, index_help, meth_help, sim_help


parser = argparse.ArgumentParser(description='BiSulfite Bolt v1.3.4',
parser = argparse.ArgumentParser(description='BiSulfite Bolt v1.3.5',
usage='BSBolt Module {Module Arguments}')

subparsers = parser.add_subparsers(description='Please invoke BSBolt module for help,'
Expand Down Expand Up @@ -155,6 +155,7 @@
call_meth_parser.add_argument('-MQ', type=int, default=20, help='Minimum alignment quality for an alignment to be '
'considered for methylation calling, default=20')
call_meth_parser.add_argument('-CG', action="store_true", default=False, help='Only output CpG sites in CGmap file')
call_meth_parser.add_argument('-BG', action="store_true", default=False, help='Output call in bedGraph format')
call_meth_parser.add_argument('-ATCG', action="store_true", default=False, help='Output ATCGmap file')
call_meth_parser.add_argument('-IO', action="store_true", default=False, help='Ignore orphans during methylation call')

Expand Down
1 change: 1 addition & 0 deletions BSBolt/Utils/ParserHelpMessages.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
-DB File path to index directory
-O File output prefix
-text output plain text files [False]
-BG output calls in bedGraph format [False]
-CG only output CpG sites in CGmap file [False]
Algorithm Options:
-remove-ccgg remove methylation calls in ccgg sites [False]
Expand Down
4 changes: 2 additions & 2 deletions docs/align.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,12 @@ Algorithm Options
```shell
# Paired End Alignment Using Default Commands
python3 -m BSBolt Align -DB ~/Tests/TestData/BSB_Test_DB -F1 ~/Tests/TestSimulations/BSB_pe_meth_1.fastq -F2 ~/Tests/TestSimulations/BSB_pe_meth_2.fastq -O ~/Tests/BSB_pe_test
python3 -m BSBolt Align -DB ~/tests/TestData/BSB_Test_DB -F1 ~/tests/TestSimulations/BSB_pe_meth_1.fastq -F2 ~/tests/TestSimulations/BSB_pe_meth_2.fastq -O ~/tests/BSB_pe_test
```
#### **Single End Alignment**
```shell
# Single End Alignment Using Default Commands
python3 -m BSBolt Align -DB ~/Tests/TestData/BSB_Test_DB -F1 ~/Tests/TestSimulations/BSB_pe_meth_1.fastq -O ~/Tests/BSB_pe_test
python3 -m BSBolt Align -DB ~/tests/TestData/BSB_Test_DB -F1 ~/tests/TestSimulations/BSB_pe_meth_1.fastq -O ~/tests/BSB_pe_test
```
6 changes: 3 additions & 3 deletions docs/bsb_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,18 @@ Index Options:
#### **WGBS Index Generation Example**
```shell
python3 -m BSBolt Index -G ~/Tests/TestData/BSB_test.fa -DB ~/Tests/TestData/BSB_Test_DB
python3 -m BSBolt Index -G ~/tests/TestData/BSB_test.fa -DB ~/tests/TestData/BSB_Test_DB
```
#### **Masked Alignment Index Generation Example**
```shell
python3 -m BSBolt Index -G ~/Tests/TestData/BSB_test.fa -DB ~/Tests/TestData/BSB_Test_DB -MR /Tests/TestData/test_wgbs_madking.bed
python3 -m BSBolt Index -G ~/tests/TestData/BSB_test.fa -DB ~/tests/TestData/BSB_Test_DB -MR /tests/TestData/test_wgbs_madking.bed
```
#### **RRBS Index Generation Example**
```shell
# RRBS Index, MSPI Cut Format, 40bp Lower Fragment Bound, and 400bp Upper Fragment Bound
python3 -m BSBolt Index -G ~/Tests/TestData/BSB_test.fa -DB ~/Tests/TestData/BSB_Test_DB -rrbs -rrbs-cut-format C-CGG -rrbs-lower 40 -rrbs-upper 400
python3 -m BSBolt Index -G ~/tests/TestData/BSB_test.fa -DB ~/tests/TestData/BSB_Test_DB -rrbs -rrbs-cut-format C-CGG -rrbs-lower 40 -rrbs-upper 400
```
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# **BSBolt (BiSulfite Bolt)**

## A fast and safe bisulfite sequencing processing platform
## A fast and safe bisulfite sequencing analysis platform

[BiSuflite Bolt (BSBolt)](https://github.com/NuttyLogic/BSBolt); a fast and scalable bisulfite sequencing
analysis platform. BSBolt is an integrated analysis platform that offers support for bisulfite sequencing
Expand Down
28 changes: 21 additions & 7 deletions docs/methylation_calling.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,8 @@ samtools index BSB_pe_test.dup.bam

### BSBolt CallMethylation

Methylation calling outputs a .CGmap file by default. To maintain compatibility with downstream analysis tools
ATCGmap files can by output, but this feature will be deprecation in a future update. Methylation values are called for
all mapped reads at a position by default.
Methylation calling outputs a .CGmap file by default, and makes calls using
all mapped reads not flagged as duplicate at a position by default.

#### **BSB CallMethylation Commands**

Expand All @@ -41,6 +40,7 @@ Input / Output Options:
-DB File path to index directory
-O File output prefix
-text output plain text files [False]
-BG output calls in bedGraph format [False]
-CG only output CpG sites in CGmap file [False]
Algorithm Options:
-remove-ccgg remove methylation calls in ccgg sites [False]
Expand All @@ -62,10 +62,12 @@ can only be called using reads mapped to Watson and Crick strands respectively.

```shell
# Methylation Calling with 2 threads,
python3 -m BSBolt CallMethylation -I ~/Tests/BSB_pe_test.sorted.bam -O ~/Tests/BSB_pe_test -DB ~/Tests/TestData/BSB_Test_DB -t 2 -verbose > methylation_stats.txt
python3 -m BSBolt CallMethylation -I ~/tests/BSB_pe_test.sorted.bam -O ~/tests/BSB_pe_test -DB ~/tests/TestData/BSB_Test_DB -t 2 -verbose > methylation_stats.txt
```

### **Output Files**
### **Output File**

**CGmap**

CGmap is a tab separated text format describing the methylation status of observed cyotsines

Expand All @@ -74,8 +76,8 @@ CGmap is a tab separated text format describing the methylation status of observ
3. Position, base-pairs from start
4. Context, three base pair methylation context
5. Sub-Context, two base pair methylation context
6. Methylation Value, proportion of methylation reads to total reads
7. Methylation Bases, methylated nucleotides observed
6. Methylation Value, proportion of methylated bases to total bases
7. Methylated Bases, methylated nucleotides observed
8. All Bases, total number of nucleotides observed at the mapping position

```text
Expand All @@ -85,3 +87,15 @@ chr12 G 389290 CHH CT 0.0 0 10
chr13 G 200552 CHH CT 0.0 0 10
chr11 C 142826 CG CG 0.0 0 10
```

**Bedgraph**

BedGraph is functionally similar to CGmap format without cytosine context information and zero indexed positions.

1. Chromosome
2. Start Position
4. End Position
6. Methylation Percentage, percentage of methylated bases to total observed bases
7. Methylated Bases, methylated nucleotides observed
8. Unmethylated Bases, total unmethylated bases

2 changes: 1 addition & 1 deletion docs/read_simulation.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,5 +64,5 @@ Algorithm Options:
#### **Simulate Paired End, Undirectional Bisulfite Reads**
```shell
python3 -m BSBolt Simulate -G ~/Tests/TestData/BSB_test.fa -O ~/Tests/TestSimulations/BSB_pe -U -PE
python3 -m BSBolt Simulate -G ~/tests/TestData/BSB_test.fa -O ~/tests/TestSimulations/BSB_pe -U -PE
```
2 changes: 1 addition & 1 deletion docs/simulation.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,5 +76,5 @@ error simulation.

**Simulate Paired End, Undirectional Bisulfite Reads**
```shell
python3 -m BSBolt Simulate -G ~/Tests/TestData/BSB_test.fa -O ~/Tests/TestSimulations/BSB_pe -U -PE
python3 -m BSBolt Simulate -G ~/tests/TestData/BSB_test.fa -O ~/tests/TestSimulations/BSB_pe -U -PE
```
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
"""



class BuildError(Exception):
"""Build error"""
pass
Expand Down Expand Up @@ -110,7 +109,7 @@ def run(self):


setup(name='BSBolt',
version='1.3.4',
version='1.3.5',
description='Bisulfite Sequencing Processing Platform',
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
7 changes: 7 additions & 0 deletions tests/test_alignment_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@
subprocess.run(bs_call_methylation_args)
print('Methylation Values Called')

bs_call_methylation_args = ['python3', '-m', 'BSBolt', 'CallMethylation', '-I',
f'{bsb_directory}tests/BSB_pe_test.sorted.bam',
'-O', f'{bsb_directory}tests/BSB_pe_test',
'-DB', f'{bsb_directory}tests/TestData/BSB_Test_DB',
'-t', '6', '-verbose', '-BQ', '10', '-MQ', '20', '-BG']
subprocess.run(bs_call_methylation_args)

# retrieve reference and test alignments
test_alignments = f'{bsb_directory}tests/BSB_pe_test.sorted.bam'
test_fastqs = [f'{bsb_directory}tests/TestSimulations/BSB_pe_1.fq',
Expand Down

0 comments on commit f448e65

Please sign in to comment.