Skip to content

Commit

Permalink
Fix sort field types
Browse files Browse the repository at this point in the history
  • Loading branch information
Sebastian Benjamin committed Feb 23, 2024
1 parent c06a5a4 commit 56f7132
Showing 1 changed file with 11 additions and 10 deletions.
21 changes: 11 additions & 10 deletions src/main/java/com/github/discvrseq/walkers/VcfToLuceneIndexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
Expand Down Expand Up @@ -100,7 +101,7 @@ public void onTraversalStart() {
}

IndexWriterConfig config = new IndexWriterConfig(analyzer);
config.setIndexSort(new Sort(new SortedNumericSortField("genomicPosition", SortField.Type.INT, false)));
config.setIndexSort(new Sort(new SortField("genomicPosition", SortField.Type.INT, false)));

try {
writer = new IndexWriter(index, config);
Expand Down Expand Up @@ -251,16 +252,16 @@ else if (line.getCountType() == VCFHeaderLineCount.INTEGER || line.getCountType(

doc.add(new IntPoint("start", variant.getStart()));
doc.add(new StoredField("start", variant.getStart()));
doc.add(new SortedNumericDocValuesField("start", variant.getStart()));
doc.add(new NumericDocValuesField("start", variant.getStart()));

doc.add(new IntPoint("end", variant.getEnd()));
doc.add(new StoredField("end", variant.getEnd()));
doc.add(new SortedNumericDocValuesField("end", variant.getEnd()));
doc.add(new NumericDocValuesField("end", variant.getEnd()));

final int genomicPosition = getGenomicPosition(variant.getContig(), variant.getStart());
doc.add(new IntPoint("genomicPosition", genomicPosition));
doc.add(new StoredField("genomicPosition", genomicPosition));
doc.add(new SortedNumericDocValuesField("genomicPosition", genomicPosition));
doc.add(new NumericDocValuesField("genomicPosition", genomicPosition));

if (variant.hasGenotypes()) {
variant.getGenotypes().stream().filter(g -> !g.isFiltered() && !g.isNoCall() && g.getAlleles().contains(alt)).map(Genotype::getSampleName).sorted().forEach(sample -> {
Expand All @@ -275,22 +276,22 @@ else if (line.getCountType() == VCFHeaderLineCount.INTEGER || line.getCountType(
long nHet = variant.getGenotypes().stream().filter(g -> !g.isFiltered() && !g.isNoCall() && g.getAlleles().contains(alt) && g.isHet()).count();
doc.add(new IntPoint("nHet", (int)nHet));
doc.add(new StoredField("nHet", (int)nHet));
doc.add(new SortedNumericDocValuesField("nHet", (int)nHet));
doc.add(new NumericDocValuesField("nHet", (int)nHet));

long nHomVar = variant.getGenotypes().stream().filter(g -> !g.isFiltered() && !g.isNoCall() && g.getAlleles().contains(alt) && g.isHomVar()).count();
doc.add(new IntPoint("nHomVar", (int)nHomVar));
doc.add(new StoredField("nHomVar", (int)nHomVar));
doc.add(new SortedNumericDocValuesField("nHomVar", (int)nHomVar));
doc.add(new NumericDocValuesField("nHomVar", (int)nHomVar));

long nCalled = variant.getGenotypes().stream().filter(g -> !g.isFiltered() && !g.isNoCall()).count();
doc.add(new IntPoint("nCalled", (int)nCalled));
doc.add(new StoredField("nCalled", (int)nCalled));
doc.add(new SortedNumericDocValuesField("nCalled", (int)nCalled));
doc.add(new NumericDocValuesField("nCalled", (int)nCalled));

float fractionHet = (float) nHet / (float) (nHet + nHomVar);
doc.add(new FloatPoint("fractionHet", fractionHet));
doc.add(new DoublePoint("fractionHet", fractionHet));
doc.add(new StoredField("fractionHet", fractionHet));
doc.add(new SortedNumericDocValuesField("fractionHet", (int)fractionHet));
doc.add(new NumericDocValuesField("fractionHet", NumericUtils.doubleToSortableLong(fractionHet)));
}

try {
Expand Down Expand Up @@ -393,7 +394,7 @@ synchronized private void addFieldToDocument(Document doc, VCFHeaderLineType var
parsedVals.forEach(x -> {
doc.add(new DoublePoint(key, x));
doc.add(new StoredField(key, x));
doc.add(new DoubleDocValuesField(key, x));
doc.add(new NumericDocValuesField(key, NumericUtils.doubleToSortableLong(x)));
});
}
}
Expand Down

0 comments on commit 56f7132

Please sign in to comment.