Skip to content

Commit

Permalink
Remove unecessary calculation and methods in FlowBasedRead (#9077)
Browse files Browse the repository at this point in the history
* remove the field FlowBaseRead.forwardSequence was calculated at non-trivial cost and then only used for it's length
* removed the method seqlength because it was a more convoluted of getLength
* fixed some typos and finals because I couldn't help myself
  • Loading branch information
lbergelson authored Jan 16, 2025
1 parent c819ff6 commit 717d8a9
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ public double haplotypeReadMatching(final FlowBasedHaplotype haplotype, final Fl
read.getTrimmedEnd()).getLeft();

final int haplotypeLength = haplotypeEnd - haplotypeStart;
final int readLength = read.seqLength();
final int readLength = read.getLength();


//in case there is a deletion on the haplotype and hte read falls inside the deletion (thus length of the read is
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@
* is coded in the tags of the BAM and is given in flow space). This code is not used in production, but was used in
* development and testing
*
* A common usage pattern is to covert a GATKRead into a FlowBasedRead. Additionally
* A common usage pattern is to covert a GATKRead into a FlowBasedRead. Additionally,
* a SAMRecord can also be converted into a FlowBasedRead. Follows a common usage pattern:
*
* For a self contained example of a usage pattern, see {@link FlowBasedReadUtils#convertToFlowBasedRead(GATKRead, SAMFileHeader)}
* For a self-contained example of a usage pattern, see {@link FlowBasedReadUtils#convertToFlowBasedRead(GATKRead, SAMFileHeader)}
*
**/

Expand Down Expand Up @@ -72,30 +72,25 @@ public class FlowBasedRead extends SAMRecordToGATKReadAdapter implements GATKRea
/**
* The sam record from which this flow based read originated
*/
private SAMRecord samRecord;
private final SAMRecord samRecord;

/**
* The read's sequence, always in forward direction
*/
private byte[] forwardSequence;

/**
* The flow key for the read - i.e. lengths of hmers in an flow order.
* The flow key for the read - i.e. lengths of hmers in flow order.
*
* For example, assuming a flow order of TGCA, and a forward sequence of GGAAT, the key will be 0,2,0,2,1
*/
private int[] key;

/**
* the maping of key elements to their origin locations in the sequence. Entry n contains the offset in the sequence
* the mapping of key elements to their origin locations in the sequence. Entry n contains the offset in the sequence
* where the hmer described by this key element starts.
*/
private int [] flow2base;

/**
* The maximal length of an hmer that can be encoded (normally in the 10-12 range)
*/
private int maxHmer;
private final int maxHmer;

/**
* The value to fill the flow matrix with. Normally 0.001
Expand All @@ -104,20 +99,20 @@ public class FlowBasedRead extends SAMRecordToGATKReadAdapter implements GATKRea
private double perHmerMinErrorProb;

/**
* The order in which flow key in encoded (See decription for key field). Flow order may be wrapped if a longer one
* The order in which flow key in encoded (See description for key field). Flow order may be wrapped if a longer one
* needed.
*/
private byte[] flowOrder;

/**
* The probability matrix for this read. [n][m] position represents that probablity that an hmer of n length will be
* present at the m key position. Therefore, the first dimention is in the maxHmer order, where the second dimension
* The probability matrix for this read. [n][m] position represents that probability that an hmer of n length will be
* present at the m key position. Therefore, the first dimension is in the maxHmer order, where the second dimension
* is length(key).
*/
private double[][] flowMatrix;

/**
* The validity status of the key. Certain operations may produce undefined/errornous results. This is signaled by
* The validity status of the key. Certain operations may produce undefined/erroneous results. This is signaled by
* the read being marked with a validKey == false
*/
private boolean validKey = true;
Expand Down Expand Up @@ -199,7 +194,7 @@ public FlowBasedRead(final GATKRead read, final String flowOrder, final int maxH
* @param samRecord record from SAM file
* @param flowOrder flow order (single cycle)
* @param maxHmer maximal hmer to keep in the flow matrix
* @param fbargs arguments that control resoltion of the flow matrix
* @param fbargs arguments that control resolution of the flow matrix
*/
public FlowBasedRead(final SAMRecord samRecord, final String flowOrder, final int maxHmer, final FlowBasedArgumentCollection fbargs) {
super(samRecord);
Expand All @@ -208,9 +203,8 @@ public FlowBasedRead(final SAMRecord samRecord, final String flowOrder, final in
this.fbargs = fbargs;
this.maxHmer = maxHmer;
this.samRecord = samRecord;
forwardSequence = getForwardSequence();

// read flow matrix in. note that below code contains accomodates for old formats
// read flow matrix in. note that below code contains accommodates for old formats
if ( samRecord.hasAttribute(FLOW_MATRIX_TAG_NAME) ) {
perHmerMinErrorProb = fbargs.fillingValue;
totalMinErrorProb = perHmerMinErrorProb;
Expand Down Expand Up @@ -408,18 +402,6 @@ public Direction getDirection(){
}


private byte[] getForwardSequence(){
if (!isReverseStrand()) {
return samRecord.getReadBases();
} else {
final byte[] result = new byte[samRecord.getReadBases().length];
System.arraycopy(samRecord.getReadBases(), 0, result, 0, result.length);
SequenceUtil.reverseComplement(result);
return result;
}
}


private int[] getAttributeAsIntArray(final String attributeName, final boolean isSigned) {
ReadUtils.assertAttributeNameIsLegal(attributeName);
final Object attributeValue = this.samRecord.getAttribute(attributeName);
Expand Down Expand Up @@ -460,7 +442,7 @@ public boolean isValid() {
* @return
*/
public double getProb(final int flow, final int hmer) {
double prob = flowMatrix[hmer < maxHmer ? hmer : maxHmer][flow];
double prob = flowMatrix[Math.min(hmer, maxHmer)][flow];
return (prob <= 1) ? prob : 1;
}

Expand Down Expand Up @@ -525,7 +507,7 @@ private void implementMatrixMods(final int[] flowMatrixModsInstructions) {
flowMatrix[hmer2][pos] = flowMatrix[hmer][pos];
}

// if we are copying bacwards, zero out source
// if we are copying backwards, zero out source
if (hmer > hmer2)
flowMatrix[hmer][pos] = 0;
}
Expand Down Expand Up @@ -783,9 +765,6 @@ public int totalKeyBases() {
return sum;
}

public int seqLength(){
return forwardSequence.length;
}
public boolean isBaseClipped() {
return baseClipped;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public void testReads(final String inputFile, final String outputPrefix, final S
new SAMRecordToGATKReadAdapter(i.next()),
reader.getFileHeader());
fbr.applyAlignment();
Assert.assertEquals(fbr.totalKeyBases(), fbr.seqLength());
Assert.assertEquals(fbr.totalKeyBases(), fbr.getLength());

if ( limitCount < 1000 && outputPrefix != null ) {
try ( final FileWriter fos = new FileWriter(outputPrefix + "." + Integer.toString(count) + ".key.txt") ) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ void testBAMFormatParsing() throws Exception{

String expectedFile = outputDir + "sample." + curRead + ".key.txt";
if (!UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS) {
Assert.assertEquals(fbr.totalKeyBases(), fbr.seqLength());
Assert.assertEquals(fbr.totalKeyBases(), fbr.getLength());
try (FileWriter fos = new FileWriter(tempOutputDir + "/" + curRead + ".key.txt")) {
fbr.writeKey(fos);
}
Expand Down Expand Up @@ -91,7 +91,7 @@ void testBAMFormatParsingWithT0() throws Exception{
String expectedFile = outputDir + "sample.t0." + curRead + ".key.txt";

if ( !UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ) {
Assert.assertEquals(fbr.totalKeyBases(), fbr.seqLength());
Assert.assertEquals(fbr.totalKeyBases(), fbr.getLength());
try (FileWriter fos = new FileWriter(tempOutputDir + "/" + curRead + ".key.txt")) {
fbr.writeKey(fos);
}
Expand Down

0 comments on commit 717d8a9

Please sign in to comment.