From 35094f7e078e82e94114a8906fde64e44fec9197 Mon Sep 17 00:00:00 2001 From: Harm Brugge Date: Fri, 11 Nov 2016 15:35:08 +0100 Subject: [PATCH] Cell groups --- build.gradle | 2 +- .../BamFileSplitter.java | 32 ++++++++++++++----- .../java/com.harmbrugge.bamtools/Main.java | 2 +- .../SampleSheetCreator.java | 21 +++++++++--- 4 files changed, 42 insertions(+), 15 deletions(-) diff --git a/build.gradle b/build.gradle index 915e098..d3230dd 100644 --- a/build.gradle +++ b/build.gradle @@ -1,5 +1,5 @@ group 'com.harmbrugge.bamtools' -version '1.0-SNAPSHOT' +version '1.0' apply plugin: 'java' diff --git a/src/main/java/com.harmbrugge.bamtools/BamFileSplitter.java b/src/main/java/com.harmbrugge.bamtools/BamFileSplitter.java index fc5721b..88ad7d2 100644 --- a/src/main/java/com.harmbrugge.bamtools/BamFileSplitter.java +++ b/src/main/java/com.harmbrugge.bamtools/BamFileSplitter.java @@ -13,8 +13,7 @@ import java.io.FileReader; import java.io.IOException; import java.nio.file.Path; -import java.util.HashMap; -import java.util.Map; +import java.util.*; /** * BamFileSplitter splits a BAM files generated in cell-ranger pipeline from 10xGenomics into a file per singe cell. @@ -28,12 +27,14 @@ public class BamFileSplitter { private static final String FILE_NAME_PREFIX = "cell_"; private static final String EXTENSION = ".bam"; + private static final int GROUP_SIZE = 50; private final Log logger = LogFactory.getLog(this.getClass()); private final Path pathToBarcodeFile; private final String outputDir; private int fileCount; + private int groupCount; private int recordCount; private int absentBarcodeCount; private int invalidBarcodeCount; @@ -52,7 +53,7 @@ public BamFileSplitter(Path pathToBamFile, Path pathToBarcodeFile, Path outputPa if (outputPath == null) outputDir = pathToBamFile.getParent() + "/output/"; else outputDir = outputPath.toString(); - File file = new File(outputDir); + File file = new File(outputDir + "/0/"); if (!file.exists()) file.mkdirs(); this.pathToBarcodeFile = pathToBarcodeFile; @@ -136,24 +137,39 @@ private boolean isMultimapped(SAMRecord samRecord) { private void addRecordToBam(String barcode, SAMRecord samRecord) { SAMFileWriter outputBam = outputBams.get(barcode); + List readGroup = outputBam.getFileHeader().getReadGroups(); + if (readGroup.size() > 0) { + String readGroupId = readGroup.get(0).getId(); + samRecord.setAttribute("RG", readGroupId); + } + outputBam.addAlignment(samRecord); } private File createBamFile(String barcode) { fileCount++; - String filePath = outputDir + "/" + FILE_NAME_PREFIX + fileCount + "_" + barcode + EXTENSION; + if (fileCount % GROUP_SIZE == 0) { + groupCount++; + File file = new File(outputDir + "/" + groupCount); + if (!file.exists()) file.mkdirs(); + } + + String filePath = outputDir + "/" + groupCount + "/" + FILE_NAME_PREFIX + fileCount + "_" + barcode + EXTENSION; return new File(filePath); } private SAMFileHeader createBamHeader(String barcode) { SAMFileHeader samHeader = samReader.getFileHeader(); - SAMReadGroupRecord readGroup = new SAMReadGroupRecord(barcode); - readGroup.setSample("cell_" + fileCount); - readGroup.setPlatform("Chromium"); + SAMReadGroupRecord readGroup = new SAMReadGroupRecord(String.valueOf(fileCount)); + readGroup.setSample("cell_" + fileCount + "_" + barcode); + readGroup.setPlatform("ILLUMINA"); + + List readGroups = new ArrayList<>(); + readGroups.add(readGroup); - samHeader.addReadGroup(readGroup); + samHeader.setReadGroups(readGroups); return samHeader; } diff --git a/src/main/java/com.harmbrugge.bamtools/Main.java b/src/main/java/com.harmbrugge.bamtools/Main.java index d685561..2979007 100644 --- a/src/main/java/com.harmbrugge.bamtools/Main.java +++ b/src/main/java/com.harmbrugge.bamtools/Main.java @@ -62,7 +62,7 @@ private void start(String[] args) { if (outputPath == null) outputPath = Paths.get(System.getProperty("user.dir")); - sampleSheetCreator.createSamplesheet(outputPath); + sampleSheetCreator.create(outputPath); } } catch (ParseException exp) { diff --git a/src/main/java/com.harmbrugge.bamtools/SampleSheetCreator.java b/src/main/java/com.harmbrugge.bamtools/SampleSheetCreator.java index f42c924..5a438d1 100644 --- a/src/main/java/com.harmbrugge.bamtools/SampleSheetCreator.java +++ b/src/main/java/com.harmbrugge.bamtools/SampleSheetCreator.java @@ -19,7 +19,10 @@ public class SampleSheetCreator { private static final String SAMPLE_SHEET_FILENAME = "samplesheet.csv"; + private static final int GROUP_SIZE = 50; + private int fileCount; + private int groupCount; private final Path pathToBarcodeFile; @@ -27,7 +30,7 @@ public SampleSheetCreator(Path pathToBarcodeFile) { this.pathToBarcodeFile = pathToBarcodeFile; } - public void createSamplesheet(Path outputDir) throws IOException { + public void create(Path outputDir) throws IOException { outputDir.toFile().mkdirs(); File outputFile = new File(outputDir.toString(), SAMPLE_SHEET_FILENAME); @@ -36,16 +39,24 @@ public void createSamplesheet(Path outputDir) throws IOException { // project,cellId,bamFile FileWriter fileWriter = new FileWriter(outputFile.getAbsoluteFile()); BufferedWriter bw = new BufferedWriter(fileWriter); - bw.write("project,cellId,bamFile\n"); + bw.write("project,cellId,bamFile,cellGroup\n"); try (BufferedReader br = new BufferedReader(new FileReader(pathToBarcodeFile.toFile()))) { String barcode = br.readLine(); while (barcode != null) { fileCount++; - String filePath = "${splitBamDir}/" + FILENAME_PREFIX + fileCount + "_" + barcode + EXTENSION; - bw.write(PROJECT_NAME + "," + FILENAME_PREFIX + fileCount + "_" + barcode + "," + filePath); - bw.write("\n"); + + if (fileCount % GROUP_SIZE == 0) { + groupCount++; + } + + String cellId = FILENAME_PREFIX + fileCount + "_" + barcode; + String filePath = groupCount + "/" + cellId + EXTENSION; + bw.write(PROJECT_NAME + ","); + bw.write(cellId + ","); + bw.write(filePath + ","); + bw.write(groupCount + "\n"); barcode = br.readLine(); }