Run in g/ directory:

See gpcrdb_gapped_tm_numbering.csv

3 + 4

python ../scripts/gpcrdb_spreadsheet_filter.py -n gpcrdb_gapped_tm_numbering.csv gpcrdb_human_swissprot_tm_aligment.csv > gpcrdb_human_swissprot.fa

5

export BLASTDB=../ensembl
# swissprot
blastp -db uniprot_sprot -query gpcrdb_human_swissprot.fa -out gpcrlike_swissprot.idlist -evalue 10 -outfmt '6 sseqid' -num_threads 4 -num_alignments 1000
sort -u gpcrlike_swissprot.idlist > gpcrlike_swissprot.uniq.idlist
blastdbcmd -db uniprot_sprot -entry_batch gpcrlike_swissprot.uniq.idlist -outfmt '>%i taxid=%T;organism=%S;source=swissprot;source_version=2015_08\n%s' | grep -v taxid=0 | perl -pe 's/^>.*\|.*\|(.*?) />$1 /;s/\xFF/*/g;s/\\n/\n/' > gpcrlike_swissprot.fa

# trembl
blastp -db uniprot_trembl -query gpcrdb_human_swissprot.fa -out gpcrlike_trembl.idlist -evalue 10 -outfmt '6 sseqid' -num_threads 4 -num_alignments 1000
sort -u gpcrlike_trembl.idlist > gpcrlike_trembl.uniq.idlist
blastdbcmd -db uniprot_trembl -entry_batch gpcrlike_trembl.uniq.idlist -outfmt '>%i taxid=%T;organism=%S;source=trembl;source_version=2015_08\n%s' | grep -v taxid=0 | perl -pe 's/^>.*\|.*\|(.*?) />$1 /;s/\xFF/*/g;s/\\n/\n/' > gpcrlike_trembl.fa

# ensembl
blastp -db ensembl -query gpcrdb_human_swissprot.fa -out gpcrlike_ensembl.idlist -evalue 10 -outfmt '6 sseqid' -num_threads 4 -num_alignments 1000
sort -u gpcrlike_ensembl.idlist > gpcrlike_ensembl.uniq.idlist
# replace xFF with *
blastdbcmd -db ensembl -entry_batch gpcrlike_ensembl.uniq.idlist -outfmt '>%a taxid=%T;organism=%S;source=ensembl;source_version=81\n%s' | perl -pe 's/\xFF/*/g;s/\\n/\n/' > gpcrlike_ensembl.fa

cat gpcrlike_swissprot.fa gpcrlike_trembl.fa gpcrlike_ensembl.fa > source.fa
../scripts/highly_similar_reducer.pl -m 0 source.fa > uniq.fa
../scripts/seq_min_group.pl -t 100 uniq.fa > uniqt100.fa
../scripts/alignment.py -run config-gapped.dat uniqt100.fa
cp files/total_alignment.dat ali.fa
../scripts/highly_similar_reducer.pl -m 9 ali.fa > ali.s9.fa
../scripts/seq_min_group.pl -t 100 ali.s9.fa > ali.s9t100.fa
clustalo -i ali.s9t100.fa -o ali.s9t100.clustalo.fa --guidetree-out=ali.s9t100.ph -v
# correct clustalo -0 distances to positive 0.0, biopython chokes on them
perl -p -e 's/:-0/:0.0/g' ali.s9t100.ph > ali.s9t100.ph-0
../scripts/entropy.py -a ali.s9t100.fa -n gpcrdb_gapped_tm_numbering.csv -t ali.s9t100.ph-0 > ali.s9t100.entropies

# Check if all seed sequences are present in output
grep '>' gpcrdb_human_swissprot2.fa|sort > gpcrdb_human_swissprot2.ids
wc -l gpcrdb_human_swissprot2.ids
310
grep '>' ali.s9t100.fa | awk '{print $1}' |sort > ali.s9t100.ids
diff  gpcrdb_human_swissprot2.ids ali.s9t100.ids | grep '<' |wc -l
87
# So 87 missing in final alignment
grep '>' gpcrlike_swissprot.fa | awk '{print $1}' |sort > gpcrlike_swissprot.fa.ids
diff  gpcrdb_human_swissprot2.ids gpcrlike_swissprot.fa.ids | grep '<' |wc -l
0
# So no missing in swissprot blast
grep '>' uniqt100.fa | awk '{print $1}' |sort > uniqt100.fa.ids
diff  gpcrdb_human_swissprot2.ids uniqt100.fa.ids | grep '<' |wc -l
0
# So no missing in input for alignment
grep '>' ali.fa | awk '{print $1}' |sort > ali.fa.ids
diff  gpcrdb_human_swissprot2.ids ali.fa.ids | grep '<' |wc -l
85
# Alignment looses most human seed seqs

# hypotheses seqs with gaps inside tms where dropped because they have lower scores

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

runs.md

runs.md

Files

runs.md

Latest commit

History

runs.md

File metadata and controls