Skip to content
This repository has been archived by the owner on Oct 23, 2023. It is now read-only.

Commit

Permalink
Merge pull request #85 from CSCfi/feature/matename
Browse files Browse the repository at this point in the history
Adding support for mateName
  • Loading branch information
blankdots authored Apr 22, 2019
2 parents 51da153 + 50956c6 commit 82ec8e7
Show file tree
Hide file tree
Showing 18 changed files with 597 additions and 96 deletions.
12 changes: 9 additions & 3 deletions beacon_api/api/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .. import __apiVersion__, __handover_beacon__, __handover_drs__
from ..utils.data_query import filter_exists, find_datasets, fetch_datasets_access
from ..extensions.handover import make_handover
from ..extensions.mate_name import find_fusion
from .exceptions import BeaconUnauthorised, BeaconForbidden, BeaconBadRequest


Expand Down Expand Up @@ -100,9 +101,14 @@ async def query_request_handler(params):
public_datasets, registered_datasets, controlled_datasets = await fetch_datasets_access(params[0], request.get("datasetIds"))
access_type, accessible_datasets = access_resolution(request, params[3], params[4], public_datasets,
registered_datasets, controlled_datasets)
datasets = await find_datasets(params[0], request.get("assemblyId"), requested_position, request.get("referenceName"),
request.get("referenceBases"), alternate,
accessible_datasets, access_type, request.get("includeDatasetResponses", "NONE"))
if 'mateName' in request or alleleRequest.get('variantType') == 'BND':
datasets = await find_fusion(params[0], request.get("assemblyId"), requested_position, request.get("referenceName"),
request.get("referenceBases"), request.get('mateName'),
accessible_datasets, access_type, request.get("includeDatasetResponses", "NONE"))
else:
datasets = await find_datasets(params[0], request.get("assemblyId"), requested_position, request.get("referenceName"),
request.get("referenceBases"), alternate,
accessible_datasets, access_type, request.get("includeDatasetResponses", "NONE"))

beacon_response = {'beaconId': '.'.join(reversed(params[4].split('.'))),
'apiVersion': __apiVersion__,
Expand Down
4 changes: 2 additions & 2 deletions beacon_api/conf/config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
title=GA4GHBeacon at CSC

# Version of the Beacon implementation
version=1.1.0
version=1.2.0

# Author of this software
author=CSC developers
Expand All @@ -21,7 +21,7 @@ copyright=CSC - IT Center for Science

[beacon_api_info]
# Version of the Beacon API specification this implementation adheres to
apiVersion=1.0.1
apiVersion=1.1.0

# Globally unique identifier for this Beacon instance
beaconId=fi.csc.beacon
Expand Down
9 changes: 8 additions & 1 deletion beacon_api/extensions/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,8 @@
"""Extension Module."""
"""Extensions Module.
This module contains optional extensions to the Beacon Python Server.
Currently we have two extensions:
* handover - used to convey extra information regarding the Beacon service, or the dataset response;
* mate fusion - used for finding breakend records.
"""
111 changes: 111 additions & 0 deletions beacon_api/extensions/mate_name.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
"""Prepare mate."""
from functools import partial
from ..utils.logging import LOG
from ..api.exceptions import BeaconServerError
from ..conf.config import DB_SCHEMA
from .handover import add_handover
from ..utils.data_query import handle_wildcard, transform_misses, transform_record
from .. import __handover_drs__


async def fetch_fusion_dataset(db_pool, assembly_id, position, chromosome, reference, mate,
datasets=None, access_type=None, misses=False):
"""Execute filter datasets.
There is an Uber query that aims to retrieve specific for data for mate fusion table.
"""
# Take one connection from the database pool
async with db_pool.acquire(timeout=180) as connection:
# Start a new session with the connection
async with connection.transaction():
# Fetch dataset metadata according to user request
datasets_query = None if not datasets else datasets
access_query = None if not access_type else access_type

start_pos = None if position[0] is None or (position[2] and position[3]) else position[0]
end_pos = None if position[1] is None or (position[4] and position[5]) else position[1]
startMax_pos = position[3]
startMin_pos = position[2]
endMin_pos = position[4]
endMax_pos = position[5]

refbase = None if not reference else handle_wildcard(reference)
try:

# UBER QUERY - TBD if it is what we need
# referenceBases, alternateBases and variantType fields are NOT part of beacon's specification response
query = f"""SELECT {"DISTINCT ON (a.datasetId)" if misses else ''}
a.datasetId as "datasetId", b.accessType as "accessType", a.chromosome as "referenceName",
a.reference as "referenceBases", a.alternate as "alternateBases", a.chromosomeStart as "start",
a.mate as "mateName",
a.chromosomePos as "referenceID", a.matePos as "mateID", a.mateStart as "start", a.end as "end",
b.externalUrl as "externalUrl", b.description as "note",
a.alleleCount as "variantCount", CAST('BND' as text) as "variantType",
a.callCount as "callCount", b.sampleCount as "sampleCount",
a.frequency, {"FALSE" if misses else "TRUE"} as "exists"
FROM {DB_SCHEMA}beacon_dataset_table b, {DB_SCHEMA}beacon_mate_table a
WHERE a.datasetId=b.datasetId
AND b.assemblyId=$3
AND coalesce(a.mate=$4, true)
AND coalesce(a.reference LIKE any($5::varchar[]), true)
AND {"NOT" if misses else ''} (coalesce(a.mateStart=$7, true)
AND coalesce(a.end=$6, true)
AND coalesce(a.end<=$8, true) AND coalesce(a.end>=$9, true)
AND coalesce(a.mateStart>=$10, true) AND coalesce(a.mateStart<=$11, true))
AND coalesce(b.accessType = any($2::varchar[]), true)
{"<>" if misses and datasets else "AND"} coalesce(a.datasetId = any($1::varchar[]), true)
UNION
SELECT {"DISTINCT ON (a.datasetId)" if misses else ''}
a.datasetId as "datasetId", b.accessType as "accessType", a.chromosome as "referenceName",
a.reference as "referenceBases", a.alternate as "alternateBases", a.chromosomeStart as "start",
a.mate as "mateName",
a.chromosomePos as "referenceID", a.matePos as "mateID", a.mateStart as "mateStart", a.end as "end",
b.externalUrl as "externalUrl", b.description as "note",
a.alleleCount as "variantCount", CAST('BND' as text) as "variantType",
a.callCount as "callCount", b.sampleCount as "sampleCount",
a.frequency, {"FALSE" if misses else "TRUE"} as "exists"
FROM {DB_SCHEMA}beacon_dataset_table b, {DB_SCHEMA}beacon_mate_table a
WHERE a.datasetId=b.datasetId
AND b.assemblyId=$3
AND coalesce(a.mate=$12, true)
AND coalesce(a.reference LIKE any($5::varchar[]), true)
AND {"NOT" if misses else ''} (coalesce(a.mateStart=$6, true)
AND coalesce(a.end=$7, true)
AND coalesce(a.mateStart<=$8, true) AND coalesce(a.mateStart>=$9, true)
AND coalesce(a.end>=$10, true) AND coalesce(a.end<=$11, true))
AND coalesce(b.accessType = any($2::varchar[]), true)
{"<>" if misses and datasets else "AND"} coalesce(a.datasetId = any($1::varchar[]), true);"""
datasets = []
statement = await connection.prepare(query)
db_response = await statement.fetch(datasets_query, access_query, assembly_id,
mate, refbase,
start_pos, end_pos,
startMax_pos, startMin_pos,
endMin_pos, endMax_pos, chromosome)
LOG.info(f"Query for dataset(s): {datasets} that are {access_type} matching conditions.")
for record in list(db_response):
processed = transform_misses(record) if misses else transform_record(record)
if __handover_drs__:
# If handover feature is enabled, add handover object to response
processed = add_handover(processed)
datasets.append(processed)
return datasets
except Exception as e:
raise BeaconServerError(f'Query dataset DB error: {e}')


async def find_fusion(db_pool, assembly_id, position, chromosome, reference, mate, dataset_ids, access_type, include_dataset):
"""Find datasets based on filter parameters.
This also takes into consideration the token value as to establish permissions.
"""
hit_datasets = []
miss_datasets = []
response = []
fetch_call = partial(fetch_fusion_dataset, db_pool, assembly_id, position, chromosome, reference, mate)
hit_datasets = await fetch_call(dataset_ids, access_type)
if include_dataset in ['ALL', 'MISS']:
miss_datasets = await fetch_call([item["datasetId"] for item in hit_datasets], access_type, misses=True)

response = hit_datasets + miss_datasets
return response
54 changes: 27 additions & 27 deletions beacon_api/schemas/info.json
Original file line number Diff line number Diff line change
Expand Up @@ -805,7 +805,7 @@
},
"variantType": {
"type": "string",
"enum": ["DEL", "INS", "DUP", "INV", "CNV", "SNP", "MNP", "DUP:TANDEM", "DEL:ME", "INS:ME"]
"enum": ["DEL", "INS", "DUP", "INV", "CNV", "SNP", "MNP", "DUP:TANDEM", "DEL:ME", "INS:ME", "BND"]
},
"assemblyId": {
"type": "string",
Expand All @@ -828,33 +828,33 @@
"type": "object"
},
"beaconHandover": {
"type": "array",
"required": [
"handoverType",
"url"
],
"properties": {
"handoverType": {
"type": "object",
"required": [
"id"
],
"properties": {
"id": {
"type": "string"
},
"label": {
"type": "string"
}
}
},
"description": {
"type": "string"
},
"url": {
"type": "string"
}
"type": "array",
"required": [
"handoverType",
"url"
],
"properties": {
"handoverType": {
"type": "object",
"required": [
"id"
],
"properties": {
"id": {
"type": "string"
},
"label": {
"type": "string"
}
}
},
"description": {
"type": "string"
},
"url": {
"type": "string"
}
}
}
}
}
34 changes: 30 additions & 4 deletions beacon_api/schemas/query.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@
"13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", "MT"
]
},
"mateName": {
"type": "string",
"enum": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12",
"13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", "MT"
]
},
"start": {
"type": "integer",
"minimum": 0
Expand Down Expand Up @@ -48,7 +54,7 @@
},
"variantType": {
"type": "string",
"enum": ["DEL", "INS", "DUP", "INV", "CNV", "SNP", "MNP", "DUP:TANDEM", "DEL:ME", "INS:ME"]
"enum": ["DEL", "INS", "DUP", "INV", "CNV", "SNP", "MNP", "DUP:TANDEM", "DEL:ME", "INS:ME", "BND"]
},
"assemblyId": {
"type": "string",
Expand All @@ -60,7 +66,7 @@
"items": {
"type": "string",
"default": "none",
"pattern": "^(.*)$"
"pattern": "^[^<>'\"/;`%{}+=]*$"
}
},
"includeDatasetResponses": {
Expand All @@ -80,7 +86,22 @@
"endMax"
],
"startMax": ["startMin"],
"endMax": ["endMin"]
"endMax": ["endMin"],
"mateName": {
"oneOf": [{
"required": [
"start",
"end"
]
},
{
"required": [
"startMin",
"endMin"
]
}
]
}
},
"allOf": [{
"oneOf": [{
Expand All @@ -92,6 +113,11 @@
"required": [
"alternateBases"
]
},
{
"required": [
"mateName"
]
}
]
}, {
Expand Down Expand Up @@ -119,7 +145,7 @@
},
{
"required": [
"endMin"
"endMax"
]
}
]
Expand Down
Loading

0 comments on commit 82ec8e7

Please sign in to comment.