-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathxds_iter.py
176 lines (164 loc) · 7.48 KB
/
xds_iter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import sys
import argparse
import subprocess
import re
import os
def set_xds_inp_crystal_form(args):
subprocess.call(["mv", "XDS.INP", "XDS.INP.old"])
f_in = open("XDS.INP.old", "r")
f_out = open("XDS.INP", "a")
for line in f_in.readlines():
if "SPACE_GROUP_NUMBER" in line:
# For PTP1B: 152
f_out.write("SPACE_GROUP_NUMBER= %s \n" % args.space_group_num)
elif "UNIT_CELL_CONSTANTS" in line:
# For PTP1B:
# 88,88,104,90,90,120
# 88 88 104 90 90 120
ucs_str = " ".join(args.unit_cell_constants.split(","))
f_out.write("UNIT_CELL_CONSTANTS= %s\n" % ucs_str)
else:
f_out.write(line,)
f_in.close()
f_out.close()
def set_xds_inp_resol_range(curr_resol):
subprocess.call(["mv", "XDS.INP", "XDS.INP.old"])
f_in = open("XDS.INP.old", "r")
f_out = open("XDS.INP", "a")
for line in f_in.readlines():
if line.strip().startswith("INCLUDE_RESOLUTION_RANGE"):
f_out.write("INCLUDE_RESOLUTION_RANGE= 80 %.2f \n" % curr_resol)
else:
f_out.write(line,)
f_in.close()
f_out.close()
def set_xds_inp_jobs(jobs):
subprocess.call(["mv", "XDS.INP", "XDS.INP.old"])
f_in = open("XDS.INP.old", "r")
f_out = open("XDS.INP", "a")
for line in f_in.readlines():
if "JOB" in line:
f_out.write("JOB= %s\n" % jobs)
else:
f_out.write(line,)
f_in.close()
f_out.close()
def conditions_met(args):
# Parse CORRECT.LP, find table with final stats by resolution bin,
# and decide if our conditions are all met for the highest-resolution bin.
# We are trusting that CORRECT divvies up reflections into appropriately sized
# bins, such that we can simply read from the last (high-resolution) bin.
# We also take advantage of the fact that versions of this table appear throughout
# CORRECT.LP, but earlier ones are for subsets of the data and the final one is
# for the entire dataset, which is what we care about.
compl = None
i_sigma = None
cc_half = None
cc_half_signif = False
f = open("CORRECT.LP", "r")
pattern = r'^(\s+)(\d+)\.(\d{2})(\s+)(\d+)(\s+)(\d+)'
for line in f.readlines():
if re.match(pattern, line):
parts = line.split()
compl = float(parts[4].split("%")[0])
i_sigma = float(parts[8])
cc_half = float(parts[10].split("*")[0])
if "*" in parts[10]:
cc_half_signif = True
else:
cc_half_signif = False # because this can be set to True by earlier lines (lower-resol bins),
# but we want its final value to reflect the last line (highest-resol bin)
assert compl is not None and i_sigma is not None and cc_half is not None, \
"Error: Couldn't find completeness, I/sigma, and/or CC1/2. Did XDS fail?!"
if compl >= args.min_compl and i_sigma >= args.min_i_sigma and cc_half >= args.min_cc_half and cc_half_signif:
print compl, ">=", args.min_compl
print i_sigma, ">=", args.min_i_sigma
print cc_half, ">=", args.min_cc_half
return True
return False
if __name__ == "__main__":
# Parse command-line arguments
parser = argparse.ArgumentParser()
parser.add_argument('--images', action="store", dest="image_fn_template", type=str, required=True)
parser.add_argument('--min_complete', action="store", dest="min_compl", type=float, default=90.)
parser.add_argument('--min_i_sigma', action="store", dest="min_i_sigma", type=float, default=1.0)
parser.add_argument('--min_cc_half', action="store", dest="min_cc_half", type=float, default=50.)
parser.add_argument('--start_resol', action="store", dest="high_resol", type=float, default=1.4)
parser.add_argument('--resol_step_size', action="store", dest="resol_step_size", type=float, default=0.05)
parser.add_argument('--max_resol', action="store", dest="max_resol", type=float, default=3.5)
parser.add_argument('--space_group_num', action="store", dest="space_group_num", type=str)
parser.add_argument('--unit_cell_constants', action="store", dest="unit_cell_constants", type=str)
parser.add_argument('--only_correct_loops', dest='only_correct_loops', action='store_true', default=False)
args = parser.parse_args()
assert args.min_cc_half > 1., "Error: Min CC1/2 provided should be on the 0-100 scale!"
# Initial XDS run with liberally high resolution (expectation is this will not meet our criteria)
subprocess.call(["generate_XDS.INP", "\"%s\"" % args.image_fn_template]) # include quotes
if args.space_group_num and args.unit_cell_constants:
set_xds_inp_crystal_form(args)
else:
print >> sys.stderr, "Space group number and unit cell constants not specified; using XDS defaults"
curr_resol = args.high_resol
set_xds_inp_resol_range(curr_resol)
if not args.only_correct_loops:
# Run through IDXREF first. IDXREF will throw an error about un-indexed reflections in ~20% of cases.
# In a lot of cases this is fine -- we think it may just be because we are *initially* processing at
# ultra-high resolution. We will rely on subsequent xds steps to flag these datasets as crappy.
# Worst case, our CORRECT loop (below) will never find a resolution at which our criteria are met
# before it reaches the max resolution we want to consider, so the process will die there.
set_xds_inp_jobs("XYCORR INIT COLSPOT IDXREF")
subprocess.call(["xds"])
# Run the remaining steps following IDXREF
set_xds_inp_jobs("DEFPIX INTEGRATE CORRECT")
subprocess.call(["xds"])
subprocess.call(["cp", "CORRECT.LP", "CORRECT.LP_%.2f" % curr_resol])
# Iterate CORRECT as needed, slowly backing off on high resolution
set_xds_inp_jobs("CORRECT")
if not os.path.exists("CORRECT.LP"):
# This is probably only true if --only_correct_loops is used and CORRECT.LP has been deleted
subprocess.call(["xds"])
subprocess.call(["cp", "CORRECT.LP", "CORRECT.LP_%.2f" % curr_resol])
while not conditions_met(args) and curr_resol <= args.max_resol:
curr_resol += args.resol_step_size
set_xds_inp_resol_range(curr_resol)
subprocess.call(["xds"])
subprocess.call(["cp", "CORRECT.LP", "CORRECT.LP_%.2f" % curr_resol])
good_resol = curr_resol
# Iterate CORRECT in the opposite direction back toward higher resolution,
# but in smaller increments, to settle on the very highest resolution limit
# that meets all our criteria. Should go less than resol_step_size, at most.
import math
n_iters = int(math.ceil(args.resol_step_size / 0.01)) # 5 iterations by default
for n in range(1, n_iters):
curr_resol = good_resol - (0.01 * n)
set_xds_inp_resol_range(curr_resol)
subprocess.call(["xds"])
subprocess.call(["cp", "CORRECT.LP", "CORRECT.LP_%.2f" % curr_resol])
if not conditions_met(args):
# We went just too far -- undo this last step
print "%.2f A is too far -- backing off by 0.01 A" % curr_resol
curr_resol += 0.01
break
best_resol = curr_resol
# Re-run with the final best resolution, since we (likely?) over-shot a second ago,
# to get the final XDS_ASCII.HKL
set_xds_inp_resol_range(best_resol)
subprocess.call(["xds"])
subprocess.call(["cp", "CORRECT.LP", "CORRECT.LP_%.2f_FINAL" % curr_resol])
# Convert from HKL to MTZ with xdsconv
f = open("XDSCONV.INP", "w")
f.write("""
INPUT_FILE= XDS_ASCII.HKL
OUTPUT_FILE= temp.hkl CCP4
FRIEDEL'S_LAW= TRUE""")
f.close()
subprocess.call(["xdsconv"])
g = open("run_xdsconv.sh", "w")
g.write("""
f2mtz HKLOUT temp.mtz<F2MTZ.INP
cad HKLIN1 temp.mtz HKLOUT XDS_ASCII.mtz<<EOF
LABIN FILE 1 ALL
END
EOF""")
g.close()
subprocess.call(["source", "run_xdsconv.sh"])
print "********** Done at %.2f A! **********" % best_resol