Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve status updating for BedTabixAdapter, Gff3TabixAdapter, VcfTabixAdapter, BigBedAdapter #4827

Merged
merged 4 commits into from
Feb 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 59 additions & 31 deletions plugins/bed/src/BedTabixAdapter/BedTabixAdapter.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import BED from '@gmod/bed'
import { TabixIndexedFile } from '@gmod/tabix'
import { BaseFeatureDataAdapter } from '@jbrowse/core/data_adapters/BaseAdapter'
import { SimpleFeature } from '@jbrowse/core/util'
import { SimpleFeature, updateStatus } from '@jbrowse/core/util'
import { openLocation } from '@jbrowse/core/util/io'
import { ObservableCreate } from '@jbrowse/core/util/rxjs'
import { checkStopToken } from '@jbrowse/core/util/stopToken'
Expand All @@ -25,6 +25,10 @@ export default class BedTabixAdapter extends BaseFeatureDataAdapter {

public static capabilities = ['getFeatures', 'getRefNames']

setupP?: Promise<{
meta: Awaited<ReturnType<TabixIndexedFile['getMetadata']>>
}>

public constructor(
config: AnyConfigurationModel,
getSubAdapter?: getSubAdapterType,
Expand Down Expand Up @@ -52,15 +56,37 @@ export default class BedTabixAdapter extends BaseFeatureDataAdapter {
return this.bed.getReferenceSequenceNames(opts)
}

async getHeader() {
return this.bed.getHeader()
async getHeader(opts?: BaseOptions) {
return this.bed.getHeader(opts)
}

async getMetadataPre2(_opts?: BaseOptions) {
if (!this.setupP) {
this.setupP = this.getMetadataPre().catch((e: unknown) => {
this.setupP = undefined
throw e
})
}
return this.setupP
}

async getMetadataPre() {
const meta = await this.bed.getMetadata()
return { meta }
}

async getMetadata(opts?: BaseOptions) {
const { statusCallback = () => {} } = opts || {}
return updateStatus('Downloading index', statusCallback, () =>
this.getMetadataPre2(opts),
)
}

async getNames() {
if (this.columnNames.length) {
return this.columnNames
}
const header = await this.bed.getHeader()
const header = await this.getHeader()
const defs = header.split(/\n|\r\n|\r/).filter(f => !!f)
const defline = defs.at(-1)
return defline?.includes('\t')
Expand All @@ -71,42 +97,44 @@ export default class BedTabixAdapter extends BaseFeatureDataAdapter {
: undefined
}

public getFeatures(query: Region, opts: BaseOptions = {}) {
const { stopToken } = opts
public getFeatures(query: Region, opts?: BaseOptions) {
const { stopToken, statusCallback = () => {} } = opts || {}
return ObservableCreate<Feature>(async observer => {
const meta = await this.bed.getMetadata()
const { meta } = await this.getMetadata()
const { columnNumbers } = meta
const colRef = columnNumbers.ref - 1
const colStart = columnNumbers.start - 1
const colEnd = columnNumbers.end - 1
const names = await this.getNames()
let start = performance.now()
checkStopToken(stopToken)
await this.bed.getLines(query.refName, query.start, query.end, {
lineCallback: (line, fileOffset) => {
if (performance.now() - start > 200) {
checkStopToken(stopToken)
start = performance.now()
}
observer.next(
new SimpleFeature(
featureData({
line,
colRef,
colStart,
colEnd,
scoreColumn: this.scoreColumn,
parser: this.parser,
uniqueId: `${this.id}-${fileOffset}`,
names,
}),
),
)
},
stopToken: opts.stopToken,
})
await updateStatus('Downloading features', statusCallback, () =>
this.bed.getLines(query.refName, query.start, query.end, {
lineCallback: (line, fileOffset) => {
if (performance.now() - start > 200) {
checkStopToken(stopToken)
start = performance.now()
}
observer.next(
new SimpleFeature(
featureData({
line,
colRef,
colStart,
colEnd,
scoreColumn: this.scoreColumn,
parser: this.parser,
uniqueId: `${this.id}-${fileOffset}`,
names,
}),
),
)
},
stopToken,
}),
)
observer.complete()
}, opts.stopToken)
}, stopToken)
}

public freeResources(): void {}
Expand Down
32 changes: 21 additions & 11 deletions plugins/bed/src/BigBedAdapter/BigBedAdapter.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import { BigBed } from '@gmod/bbi'
import BED from '@gmod/bed'
import { BaseFeatureDataAdapter } from '@jbrowse/core/data_adapters/BaseAdapter'
import { SimpleFeature, doesIntersect2, max, min } from '@jbrowse/core/util'
import {
SimpleFeature,
doesIntersect2,
max,
min,
updateStatus,
} from '@jbrowse/core/util'
import { openLocation } from '@jbrowse/core/util/io'
import { ObservableCreate } from '@jbrowse/core/util/rxjs'
import { firstValueFrom, toArray } from 'rxjs'
Expand Down Expand Up @@ -95,18 +101,22 @@ export default class BigBedAdapter extends BaseFeatureDataAdapter {
allowRedispatch: boolean
originalQuery?: Region
}) {
const { stopToken } = opts
const { stopToken, statusCallback = () => {} } = opts
const scoreColumn = this.getConf('scoreColumn')
const aggregateField = this.getConf('aggregateField')
const { parser, bigbed } = await this.configure(opts)
const feats = await bigbed.getFeatures(
query.refName,
query.start,
query.end,
{
stopToken,
basesPerSpan: query.end - query.start,
},
const { parser, bigbed } = await updateStatus(
'Downloading header',
statusCallback,
() => this.configure(opts),
)
const feats = await updateStatus(
'Downloading features',
statusCallback,
() =>
bigbed.getFeatures(query.refName, query.start, query.end, {
stopToken,
basesPerSpan: query.end - query.start,
}),
)
if (allowRedispatch && feats.length) {
let minStart = Number.POSITIVE_INFINITY
Expand Down
98 changes: 59 additions & 39 deletions plugins/gff3/src/Gff3TabixAdapter/Gff3TabixAdapter.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { TabixIndexedFile } from '@gmod/tabix'
import { readConfObject } from '@jbrowse/core/configuration'
import { BaseFeatureDataAdapter } from '@jbrowse/core/data_adapters/BaseAdapter'
import { updateStatus } from '@jbrowse/core/util'
import { openLocation } from '@jbrowse/core/util/io'
import { doesIntersect2 } from '@jbrowse/core/util/range'
import { ObservableCreate } from '@jbrowse/core/util/rxjs'
Expand All @@ -9,10 +9,7 @@ import { parseStringSync } from 'gff-nostream'

import { featureData } from '../featureData'

import type PluginManager from '@jbrowse/core/PluginManager'
import type { AnyConfigurationModel } from '@jbrowse/core/configuration'
import type { BaseOptions } from '@jbrowse/core/data_adapters/BaseAdapter'
import type { getSubAdapterType } from '@jbrowse/core/data_adapters/dataAdapterCache'
import type { Feature } from '@jbrowse/core/util/simpleFeature'
import type { Region } from '@jbrowse/core/util/types'
import type { Observer } from 'rxjs'
Expand All @@ -25,48 +22,67 @@ interface LineFeature {
}

export default class Gff3TabixAdapter extends BaseFeatureDataAdapter {
protected gff: TabixIndexedFile

protected dontRedispatch: string[]

public constructor(
config: AnyConfigurationModel,
getSubAdapter?: getSubAdapterType,
pluginManager?: PluginManager,
) {
super(config, getSubAdapter, pluginManager)
const gffGzLocation = readConfObject(config, 'gffGzLocation')
const indexType = readConfObject(config, ['index', 'indexType'])
const location = readConfObject(config, ['index', 'location'])
const dontRedispatch = readConfObject(config, 'dontRedispatch')

this.dontRedispatch = dontRedispatch || ['chromosome', 'contig', 'region']
this.gff = new TabixIndexedFile({
private configured?: Promise<{
gff: TabixIndexedFile
dontRedispatch: string[]
}>

private async configurePre(_opts?: BaseOptions) {
const gffGzLocation = this.getConf('gffGzLocation')
const indexType = this.getConf(['index', 'indexType'])
const loc = this.getConf(['index', 'location'])
const dontRedispatch = this.getConf('dontRedispatch') || [
'chromosome',
'contig',
'region',
]
const gff = new TabixIndexedFile({
filehandle: openLocation(gffGzLocation, this.pluginManager),
csiFilehandle:
indexType === 'CSI'
? openLocation(location, this.pluginManager)
: undefined,
indexType === 'CSI' ? openLocation(loc, this.pluginManager) : undefined,
tbiFilehandle:
indexType !== 'CSI'
? openLocation(location, this.pluginManager)
: undefined,
indexType !== 'CSI' ? openLocation(loc, this.pluginManager) : undefined,
chunkCacheSize: 50 * 2 ** 20,
renameRefSeqs: (n: string) => n,
})

return {
gff,
dontRedispatch,
header: await gff.getHeader(),
}
}

protected async configurePre2() {
if (!this.configured) {
this.configured = this.configurePre().catch((e: unknown) => {
this.configured = undefined
throw e
})
}
return this.configured
}

async configure(opts?: BaseOptions) {
const { statusCallback = () => {} } = opts || {}
return updateStatus('Downloading index', statusCallback, () =>
this.configurePre2(),
)
}
public async getRefNames(opts: BaseOptions = {}) {
return this.gff.getReferenceSequenceNames(opts)
const { gff } = await this.configure(opts)
return gff.getReferenceSequenceNames(opts)
}

public async getHeader() {
return this.gff.getHeader()
public async getHeader(opts: BaseOptions = {}) {
const { gff } = await this.configure(opts)
return gff.getHeader()
}

public getFeatures(query: Region, opts: BaseOptions = {}) {
return ObservableCreate<Feature>(async observer => {
const metadata = await this.gff.getMetadata()
const { gff } = await this.configure(opts)
const metadata = await gff.getMetadata()
await this.getFeaturesHelper(query, opts, metadata, observer, true)
}, opts.stopToken)
}
Expand All @@ -79,16 +95,20 @@ export default class Gff3TabixAdapter extends BaseFeatureDataAdapter {
allowRedispatch: boolean,
originalQuery = query,
) {
const { statusCallback = () => {} } = opts
try {
const lines: LineFeature[] = []

await this.gff.getLines(
query.refName,
query.start,
query.end,
(line, fileOffset) => {
lines.push(this.parseLine(metadata.columnNumbers, line, fileOffset))
},
const { dontRedispatch, gff } = await this.configure(opts)
await updateStatus('Downloading features', statusCallback, () =>
gff.getLines(
query.refName,
query.start,
query.end,
(line, fileOffset) => {
lines.push(this.parseLine(metadata.columnNumbers, line, fileOffset))
},
),
)
if (allowRedispatch && lines.length) {
let minStart = Number.POSITIVE_INFINITY
Expand All @@ -97,7 +117,7 @@ export default class Gff3TabixAdapter extends BaseFeatureDataAdapter {
const featureType = line.fields[2]!
// only expand redispatch range if feature is not a "dontRedispatch"
// type skips large regions like chromosome,region
if (!this.dontRedispatch.includes(featureType)) {
if (!dontRedispatch.includes(featureType)) {
const start = line.start - 1 // gff is 1-based
if (start < minStart) {
minStart = start
Expand Down
Loading