diff --git a/plugins/gtf/src/GtfAdapter/GtfAdapter.ts b/plugins/gtf/src/GtfAdapter/GtfAdapter.ts index eb3ed44b68..bf78d6d30f 100644 --- a/plugins/gtf/src/GtfAdapter/GtfAdapter.ts +++ b/plugins/gtf/src/GtfAdapter/GtfAdapter.ts @@ -1,6 +1,12 @@ import IntervalTree from '@flatten-js/interval-tree' import { BaseFeatureDataAdapter } from '@jbrowse/core/data_adapters/BaseAdapter' -import { SimpleFeature, fetchAndMaybeUnzip } from '@jbrowse/core/util' +import { + SimpleFeature, + doesIntersect2, + fetchAndMaybeUnzip, + max, + min, +} from '@jbrowse/core/util' import { openLocation } from '@jbrowse/core/util/io' import { ObservableCreate } from '@jbrowse/core/util/rxjs' import { parseStringSync } from 'gtf-nostream' @@ -9,8 +15,9 @@ import { featureData } from '../util' import type { FeatureLoc } from '../util' import type { BaseOptions } from '@jbrowse/core/data_adapters/BaseAdapter' -import type { Feature } from '@jbrowse/core/util' -import type { NoAssemblyRegion } from '@jbrowse/core/util/types' +import type { Feature, SimpleFeatureSerialized } from '@jbrowse/core/util' +import type { NoAssemblyRegion, Region } from '@jbrowse/core/util/types' +import { Observer } from 'rxjs' type StatusCallback = (arg: string) => void @@ -70,15 +77,12 @@ export default class GtfAdapter extends BaseFeatureDataAdapter { const intervalTree = new IntervalTree() ;(parseStringSync(lines) as FeatureLoc[][]) .flat() - .map( - (f, i) => - new SimpleFeature({ - data: featureData(f), - id: `${this.id}-${refName}-${i}`, - }), - ) + .map((f, i) => featureData(f, `${this.id}-${refName}-${i}`)) .forEach(obj => - intervalTree.insert([obj.get('start'), obj.get('end')], obj), + intervalTree.insert( + [obj.start as number, obj.end as number], + obj, + ), ) this.calculatedIntervalTreeMap[refName] = intervalTree } @@ -114,21 +118,117 @@ export default class GtfAdapter extends BaseFeatureDataAdapter { return header } - public getFeatures(query: NoAssemblyRegion, opts: BaseOptions = {}) { + public getFeatures(query: Region, opts: BaseOptions = {}) { return ObservableCreate(async observer => { try { - const { start, end, refName } = query - const { intervalTreeMap } = await this.loadData(opts) - intervalTreeMap[refName]?.(opts.statusCallback) - .search([start, end]) - .forEach(f => { - observer.next(f) - }) - observer.complete() + await this.getFeaturesHelper({ + query, + opts, + observer, + allowRedispatch: true, + }) } catch (e) { observer.error(e) } }, opts.stopToken) } + + public async getFeaturesHelper({ + query, + opts, + observer, + allowRedispatch, + originalQuery = query, + }: { + query: Region + opts: BaseOptions + observer: Observer + allowRedispatch: boolean + originalQuery?: Region + }) { + const aggregateField = this.getConf('aggregateField') + const { start, end, refName } = query + const { intervalTreeMap } = await this.loadData(opts) + const feats = intervalTreeMap[refName]?.(opts.statusCallback).search([ + start, + end, + ]) + if (feats) { + if (allowRedispatch && feats.length) { + let minStart = Number.POSITIVE_INFINITY + let maxEnd = Number.NEGATIVE_INFINITY + for (const feat of feats) { + if (feat.start < minStart) { + minStart = feat.start + } + if (feat.end > maxEnd) { + maxEnd = feat.end + } + } + if (maxEnd > query.end || minStart < query.start) { + await this.getFeaturesHelper({ + query: { + ...query, + start: minStart, + end: maxEnd, + }, + opts, + observer, + allowRedispatch: false, + originalQuery: query, + }) + return + } + } + + const parentAggregation = {} as Record + + if (feats?.some(f => f.uniqueId === undefined)) { + throw new Error('found uniqueId undefined') + } + for (const feat of feats) { + const aggr = feat[aggregateField] + if (!parentAggregation[aggr]) { + parentAggregation[aggr] = [] + } + + if (aggr) { + parentAggregation[aggr].push(feat) + } else { + console.log(feat) + observer.next( + new SimpleFeature({ + id: `${this.id}-${feat.uniqueId}`, + data: feat, + }), + ) + } + } + + Object.entries(parentAggregation).map(([name, subfeatures]) => { + const s = min(subfeatures.map(f => f.start)) + const e = max(subfeatures.map(f => f.end)) + if (doesIntersect2(s, e, originalQuery.start, originalQuery.end)) { + const { uniqueId, strand } = subfeatures[0]! + observer.next( + new SimpleFeature({ + id: `${this.id}-${uniqueId}-parent`, + data: { + type: 'gene', + subfeatures, + strand, + name, + start: s, + end: e, + refName: query.refName, + }, + }), + ) + } + }) + } + observer.complete() + } + public freeResources(/* { region } */) {} } diff --git a/plugins/gtf/src/GtfAdapter/configSchema.ts b/plugins/gtf/src/GtfAdapter/configSchema.ts index c62fe293c6..3f556f1e21 100644 --- a/plugins/gtf/src/GtfAdapter/configSchema.ts +++ b/plugins/gtf/src/GtfAdapter/configSchema.ts @@ -16,6 +16,13 @@ const GtfAdapter = ConfigurationSchema( type: 'fileLocation', defaultValue: { uri: '/path/to/my.gtf', locationType: 'UriLocation' }, }, + /** + * #slot + */ + aggregateField: { + type: 'string', + defaultValue: 'gene_name', + }, }, { explicitlyTyped: true }, ) diff --git a/plugins/gtf/src/util.ts b/plugins/gtf/src/util.ts index cbe95f9a9d..2b5b0410e9 100644 --- a/plugins/gtf/src/util.ts +++ b/plugins/gtf/src/util.ts @@ -11,7 +11,7 @@ export interface FeatureLoc { attributes: Record } -export function featureData(data: FeatureLoc) { +export function featureData(data: FeatureLoc, id?: string) { const f: Record = { ...data } ;(f.start as number) -= 1 // convert to interbase f.strand = { '+': 1, '-': -1, '.': 0, '?': undefined }[data.strand] // convert strand @@ -72,5 +72,8 @@ export function featureData(data: FeatureLoc) { if (f.transcript_id) { f.name = f.transcript_id } + if (id !== undefined) { + f.uniqueId = id + } return f }