-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtmhmmPipeline.sh
executable file
·68 lines (62 loc) · 1.83 KB
/
tmhmmPipeline.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/zsh
### --------------------------------------
# Run tmhmm with multiple processes and extract the results for transmembrane segments larger than a certain number.
### --------------------------------------
# Get options from command line
usage() {
echo "Usage:\n\tzsh /pathToScript/tmhmmPipeline.sh -p <protein> -n <threshold> -t <threads> "
}
while {getopts hp:o:t:n: arg} {
case $arg {
(h)
usage
exit
;;
(p)
protein=$OPTARG
if [[ ! -e $protein ]] {
echo "Please input correct path to protein!"
exit
}
;;
(t)
if [[ -n "`echo $OPTARG | sed 's/[0-9]//g'`" ]] {
"You must use int to specify the threads!"
exit
} else {
threads=$OPTARG
}
;;
(n)
if [[ -n "`echo $OPTARG | sed 's/[0-9]//g'`" ]] {
"You must use int to specify the counts of transmembrane!"
exit
} else {
trans=$OPTARG
}
;;
(?)
echo "You must use -p to specify protein file and -o to specify output file."
usage
exit
;;
}
}
# Start tmhmm pipeline
# Step1 Split cds
python $GENEFPATH/main/split_genome.py -g $protein -n $threads
# Step2 Running tmhmm
ls ${protein:h} |
grep -E "${protein:t}_[0-9]+$" |
while read id
do
echo "Proccessing ${protein:h}/$id"
tmhmm ${protein:h}/$id -short > ${protein:h}/tmhmm_result.txt_${id##*_} &
done
wait
cat ${protein:h}/tmhmm_result.txt_* > ${protein:h}/tmhmm_result.txt && rm -rf ${protein:h}/tmhmm_result.txt_* ${protein}_<-> $PWD/TMHMM_<->
# Step3 Get PredHel>=trans id
gawk -F"[\t=]" -v n=$trans '$9>=n{print $1}' ${protein:h}/tmhmm_result.txt > ${protein:h}/tmhmmGe${trans}_id.txt
# Step4 Get CDS by id
python $GENEFPATH/main/extract_by_id.py -f $protein -i ${protein:h}/tmhmmGe${trans}_id.txt > ${protein%.*}_tmhmm_Ge${trans}.faa
python $GENEFPATH/main/extract_by_id.py -f ${protein%faa}fna -i ${protein:h}/tmhmmGe${trans}_id.txt > ${protein%.*}_tmhmm_Ge${trans}.fna