-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathquery_rcsb.rb
128 lines (111 loc) · 4.27 KB
/
query_rcsb.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# query_rcsb.rb
#
# Created by Michael Walker as part of project MIF
# at Healthhack 2014
# Date 25/10/2014
# Emails: [email protected], [email protected]
#
# The purpose of this module is to present an amino acid sequence
# to the RCSB Protein Data Bank http://www.rcsb.org/pdb/home/home.do
# and receive the corresponding protein name and structure(s)
require 'mechanize' # Allows easy enquiry of web pages
#page3 = 0
#Divcontent = 0
def submit_rcsb(acc)
# Submits amino acid sequence to Protein Data Bank
agent = Mechanize.new
siteurl = 'http://www.rcsb.org/pdb/explore/remediatedSequence.do?structureId=' + acc +'&bionumber=1'
page = agent.get(siteurl)
chids = []
page.search("//div[@class='se_boxHeader']").each { |prot|
prottext = prot.text.strip
if prottext[11,7] == 'PROTEIN'
chids = chids.concat([prottext[7]])
end
}
p chids
return chids
end
def submit_ncbi(fasta)
# Submits fasta file to NCBI website
p "agent"
agent = Mechanize.new
p "siteurl"
siteurl = "http://blast.ncbi.nlm.nih.gov/Blast.cgi?PROGRAM=blastp&PAGE_TYPE=BlastSearch&LINK_LOC=blasthome"
p "page"
page = agent.get(siteurl)
p "page header"
p page.title # "DEBUG: ",
pageform = page.forms.first
#p pageform
# Enter fasta file
#hbox = pageforms.search("//textbox[@name='stype']")
hbox = pageform.fields_with(:name => "QUERY").first
hbox.value = fasta
# Select database
selectbox = pageform.radiobutton_with(:value => "blastp")
selectbox.check
# Select organism
orginput = pageform.fields_with(:name => "FORMAT_ORGANISM").first
orginput.value = "human (taxid:9606)"
#p pageform.buttons[1].private_methods
# Submit form
#page2 = pageform.click_button
page2 = pageform.submit #.buttons[1].click_button
siteurl2 = page2.uri
p siteurl2
sleep(10)
#header2 = page2.header
#length2 = page2.body.length
#p length2
page3 = agent.get(siteurl2)
sleep(10)
@page4 = agent.get(siteurl2)
#p page3.frames
#p page3.iframes
#p page3.links
p 'divwrap'
divwrap = @page4.search("//div[@id='wrap']").first
p "divcontentwrap"
divcontentwrap = divwrap.search("//div[@id='content-wrap']").first
p "divcontent"
@divcontent = divcontentwrap.search("//div[@id='content']").first
#p @divcontent
p divcontent.methods
p 'formoverview'
formoverview0 = @divcontent.search("//form[@name='overview0']").first
p formoverview0
p 'divdscrView'
#p @divcontent.children_with(:action => "Blast.cgi").length
#p formoverview0.search("//div[@class='hidden.shown']").length
#p divgrView
p 'divuihelperreset'
#divuihelperreset = divgrView.search("//div[@class='ui-helper-reset']").first
p 'divgraphicinfo'
#divgraphicInfo = divuihelperreset.search("//div[@id='graphicInfo']").first
p 'divgraphic'
#divgraphic = divgraphicInfo.search("//div[@id='graphic']").first
#p divgraphic
#p divgraphic.search("//table")
#form3.fields.each { |field|
# p field
#}
#while page3.body.length == 4
#page3 = agent.head(siteurl2)
# p page3.body.length
#end
#page3 = agent.get(siteurl2)
#p page3.frames
#p page3.iframes
#p page3.links
#p page2.forms_with(:name => "overview0")
#p agent
#p page2.fields_with(:id => "dscTable")
#p page2.methods
#page2.meta_refresh
#p page2.forms
return
end
at_exit {submit_ncbi("MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD")}
#MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD
#1TSR