Skip to content

Commit

Permalink
GTF aggregation
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin committed Feb 7, 2025
1 parent a903d61 commit dd91bc2
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 21 deletions.
140 changes: 120 additions & 20 deletions plugins/gtf/src/GtfAdapter/GtfAdapter.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import IntervalTree from '@flatten-js/interval-tree'
import { BaseFeatureDataAdapter } from '@jbrowse/core/data_adapters/BaseAdapter'
import { SimpleFeature, fetchAndMaybeUnzip } from '@jbrowse/core/util'
import {
SimpleFeature,
doesIntersect2,
fetchAndMaybeUnzip,
max,
min,
} from '@jbrowse/core/util'
import { openLocation } from '@jbrowse/core/util/io'
import { ObservableCreate } from '@jbrowse/core/util/rxjs'
import { parseStringSync } from 'gtf-nostream'
Expand All @@ -9,8 +15,9 @@ import { featureData } from '../util'

import type { FeatureLoc } from '../util'
import type { BaseOptions } from '@jbrowse/core/data_adapters/BaseAdapter'
import type { Feature } from '@jbrowse/core/util'
import type { NoAssemblyRegion } from '@jbrowse/core/util/types'
import type { Feature, SimpleFeatureSerialized } from '@jbrowse/core/util'
import type { NoAssemblyRegion, Region } from '@jbrowse/core/util/types'

Check failure on line 19 in plugins/gtf/src/GtfAdapter/GtfAdapter.ts

View workflow job for this annotation

GitHub Actions / Lint, typecheck, test

There should be at least one empty line between import groups

Check warning on line 19 in plugins/gtf/src/GtfAdapter/GtfAdapter.ts

View workflow job for this annotation

GitHub Actions / Lint, typecheck, test

'NoAssemblyRegion' is defined but never used
import { Observer } from 'rxjs'

Check failure on line 20 in plugins/gtf/src/GtfAdapter/GtfAdapter.ts

View workflow job for this annotation

GitHub Actions / Lint, typecheck, test

`rxjs` import should occur before import of `../util`

Check failure on line 20 in plugins/gtf/src/GtfAdapter/GtfAdapter.ts

View workflow job for this annotation

GitHub Actions / Lint, typecheck, test

All imports in the declaration are only used as types. Use `import type`

Check failure on line 20 in plugins/gtf/src/GtfAdapter/GtfAdapter.ts

View workflow job for this annotation

GitHub Actions / Lint, typecheck, test

Observer not found in 'rxjs'

type StatusCallback = (arg: string) => void

Expand Down Expand Up @@ -70,15 +77,12 @@ export default class GtfAdapter extends BaseFeatureDataAdapter {
const intervalTree = new IntervalTree()
;(parseStringSync(lines) as FeatureLoc[][])
.flat()
.map(
(f, i) =>
new SimpleFeature({
data: featureData(f),
id: `${this.id}-${refName}-${i}`,
}),
)
.map((f, i) => featureData(f, `${this.id}-${refName}-${i}`))
.forEach(obj =>
intervalTree.insert([obj.get('start'), obj.get('end')], obj),
intervalTree.insert(
[obj.start as number, obj.end as number],
obj,
),
)
this.calculatedIntervalTreeMap[refName] = intervalTree
}
Expand Down Expand Up @@ -114,21 +118,117 @@ export default class GtfAdapter extends BaseFeatureDataAdapter {
return header
}

public getFeatures(query: NoAssemblyRegion, opts: BaseOptions = {}) {
public getFeatures(query: Region, opts: BaseOptions = {}) {
return ObservableCreate<Feature>(async observer => {
try {
const { start, end, refName } = query
const { intervalTreeMap } = await this.loadData(opts)
intervalTreeMap[refName]?.(opts.statusCallback)
.search([start, end])
.forEach(f => {
observer.next(f)
})
observer.complete()
await this.getFeaturesHelper({
query,
opts,
observer,
allowRedispatch: true,
})
} catch (e) {
observer.error(e)
}
}, opts.stopToken)
}

public async getFeaturesHelper({
query,
opts,
observer,
allowRedispatch,
originalQuery = query,
}: {
query: Region
opts: BaseOptions
observer: Observer<Feature>
allowRedispatch: boolean
originalQuery?: Region
}) {
const aggregateField = this.getConf('aggregateField')
const { start, end, refName } = query
const { intervalTreeMap } = await this.loadData(opts)
const feats = intervalTreeMap[refName]?.(opts.statusCallback).search([
start,
end,
])
if (feats) {
if (allowRedispatch && feats.length) {
let minStart = Number.POSITIVE_INFINITY
let maxEnd = Number.NEGATIVE_INFINITY
for (const feat of feats) {
if (feat.start < minStart) {
minStart = feat.start
}
if (feat.end > maxEnd) {
maxEnd = feat.end
}
}
if (maxEnd > query.end || minStart < query.start) {
await this.getFeaturesHelper({
query: {
...query,
start: minStart,
end: maxEnd,
},
opts,
observer,
allowRedispatch: false,
originalQuery: query,
})
return
}
}

const parentAggregation = {} as Record<string, SimpleFeatureSerialized[]>

if (feats?.some(f => f.uniqueId === undefined)) {

Check failure on line 186 in plugins/gtf/src/GtfAdapter/GtfAdapter.ts

View workflow job for this annotation

GitHub Actions / Lint, typecheck, test

Unnecessary optional chain on a non-nullish value
throw new Error('found uniqueId undefined')
}
for (const feat of feats) {
const aggr = feat[aggregateField]
if (!parentAggregation[aggr]) {
parentAggregation[aggr] = []
}

if (aggr) {
parentAggregation[aggr].push(feat)
} else {
console.log(feat)

Check warning on line 198 in plugins/gtf/src/GtfAdapter/GtfAdapter.ts

View workflow job for this annotation

GitHub Actions / Lint, typecheck, test

Unexpected console statement. Only these console methods are allowed: error, warn
observer.next(
new SimpleFeature({
id: `${this.id}-${feat.uniqueId}`,
data: feat,
}),
)
}
}

Object.entries(parentAggregation).map(([name, subfeatures]) => {
const s = min(subfeatures.map(f => f.start))
const e = max(subfeatures.map(f => f.end))
if (doesIntersect2(s, e, originalQuery.start, originalQuery.end)) {
const { uniqueId, strand } = subfeatures[0]!
observer.next(
new SimpleFeature({
id: `${this.id}-${uniqueId}-parent`,
data: {
type: 'gene',
subfeatures,
strand,
name,
start: s,
end: e,
refName: query.refName,
},
}),
)
}
})
}
observer.complete()
}

public freeResources(/* { region } */) {}
}
7 changes: 7 additions & 0 deletions plugins/gtf/src/GtfAdapter/configSchema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ const GtfAdapter = ConfigurationSchema(
type: 'fileLocation',
defaultValue: { uri: '/path/to/my.gtf', locationType: 'UriLocation' },
},
/**
* #slot
*/
aggregateField: {
type: 'string',
defaultValue: 'gene_name',
},
},
{ explicitlyTyped: true },
)
Expand Down
5 changes: 4 additions & 1 deletion plugins/gtf/src/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ export interface FeatureLoc {
attributes: Record<string, unknown[]>
}

export function featureData(data: FeatureLoc) {
export function featureData(data: FeatureLoc, id?: string) {
const f: Record<string, unknown> = { ...data }
;(f.start as number) -= 1 // convert to interbase
f.strand = { '+': 1, '-': -1, '.': 0, '?': undefined }[data.strand] // convert strand
Expand Down Expand Up @@ -72,5 +72,8 @@ export function featureData(data: FeatureLoc) {
if (f.transcript_id) {
f.name = f.transcript_id
}
if (id !== undefined) {
f.uniqueId = id
}
return f
}

0 comments on commit dd91bc2

Please sign in to comment.