forked from opentargets/genetics-finemapping
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #8 from opentargets/js29
Changes for genetics R8
- Loading branch information
Showing
23 changed files
with
435 additions
and
159 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/usr/bin/env bash | ||
|
||
plink --bfile $1 --out $2 --make-bed --allow-extra-chr --extract range <(echo -e "$3") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Splits the LD reference panel into multiple subfiles, to greatly speed up | ||
# GCTA-cojo. Each subfile is a 3-Mb window, so that for any top_loci variant, | ||
# we can use a subfile that has the top_loci variant +- 1 Mb. | ||
|
||
import pandas as pd | ||
import os | ||
import subprocess as sp | ||
import argparse | ||
|
||
|
||
def main(): | ||
|
||
# Parse args | ||
args = parse_args() | ||
|
||
window_size = int(3e6) | ||
window_spacing = int(1e6) | ||
|
||
chrom_lengths = pd.read_csv('configs/grch38_chrom_lengths.tsv', sep='\t') | ||
|
||
# Loop through each chromosome and use plink to split the ref panel into | ||
# overlapping windows | ||
for index, row in chrom_lengths.iterrows(): | ||
chr_ld_path = args.path.format(chrom=row['chrom']) | ||
print(chr_ld_path) | ||
|
||
window_start = int(0) | ||
while (window_start + window_size - window_spacing) < row['length']: | ||
# Define window and output path for subfile of main LD file | ||
window_end = window_start + window_size | ||
out_ld_path = chr_ld_path + '.{:d}_{:d}'.format(window_start, window_end) | ||
print("chr_ld_path:" + chr_ld_path) | ||
print("out_ld_path:" + out_ld_path) | ||
# plink requires a file to define the range to extract | ||
# We don't want a temp file, so we use bash process substitution <(...) | ||
range_str = ' '.join([str(row['chrom']), str(window_start), str(window_end), 'range1']) | ||
cmd = '/bin/bash 0_plink_extract.sh {} {} \'{}\''.format(chr_ld_path, out_ld_path, range_str) | ||
print(cmd) | ||
|
||
# Run plink | ||
os.system(cmd) | ||
cp = sp.run(cmd, shell=True, stderr=sp.STDOUT) | ||
if cp.returncode != 0: | ||
print('Failed on plink command:\n{}'.format(cmd)) | ||
#return cp.returncode | ||
|
||
window_start = window_start + window_spacing | ||
|
||
return 0 | ||
|
||
|
||
def parse_args(): | ||
""" Load command line args """ | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--path', | ||
metavar="<string>", | ||
help='Path to LD reference; {chrom} in place of each chromosome name', | ||
type=str, | ||
required=True) | ||
args = parser.parse_args() | ||
return args | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,12 @@ | ||
#!/usr/bin/env bash | ||
# | ||
|
||
version_date=`date +%y%m%d` | ||
version_date=$1 | ||
|
||
# Copy results | ||
gsutil -m rsync -r $HOME/genetics-finemapping/results/ gs://genetics-portal-dev-staging/finemapping/$version_date | ||
|
||
# Tar the logs and copy over | ||
tar -zcvf logs.tar.gz $HOME/genetics-finemapping/logs | ||
gsutil -m cp logs.tar.gz gs://genetics-portal-dev-staging/finemapping/$version_date/logs.tar.gz | ||
# This can take a very long time, so you may not want to keep the logs at all | ||
#tar -zcvf logs.tar.gz $HOME/genetics-finemapping/logs | ||
#gsutil -m cp logs.tar.gz gs://genetics-portal-dev-staging/finemapping/$version_date/logs.tar.gz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
chrom length | ||
1 248956422 | ||
2 242193529 | ||
3 198295559 | ||
4 190214555 | ||
5 181538259 | ||
6 170805979 | ||
7 159345973 | ||
8 145138636 | ||
9 138394717 | ||
10 133797422 | ||
11 135086622 | ||
12 133275309 | ||
13 114364328 | ||
14 107043718 | ||
15 101991189 | ||
16 90338345 | ||
17 83257441 | ||
18 80373285 | ||
19 58617616 | ||
20 64444167 | ||
21 46709983 | ||
22 50818468 | ||
X 156040895 | ||
Y 57227415 | ||
MT 16569 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,3 +11,4 @@ dependencies: | |
- numpy | ||
- python-snappy | ||
- pyspark | ||
- jq |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.