-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathx_cmp.py
535 lines (480 loc) · 21.2 KB
/
x_cmp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
import sys
import os
from optparse import OptionParser
import subprocess
EXTND = 200
_fp_suffix=".false_positive"
_tp_suffix=".true_positive"
_fn_suffix=".false_negative"
class RsltCMP():
def __init__(self, sf_input, sf_bcmk):
self.sf_input = sf_input
self.sf_bcmk = sf_bcmk
self._fp_suffix=".false_positive"
self._tp_suffix=".true_positive"
#load the sites
def load_sites(self, sf_input):
m_sites = {}
with open(sf_input) as fin_input:
for line in fin_input:
if len(line)>1 and line[0]=="#":#bypass head lines
continue
fields = line.split()
chrm = fields[0]
pos = int(fields[1])
if chrm not in m_sites:
m_sites[chrm] = {}
if pos not in m_sites[chrm]:
m_sites[chrm][pos] = []
for svalue in fields[2:]:
m_sites[chrm][pos].append(svalue)
return m_sites
####
####
#process chrm name
def process_chrm_name(self, chrm, b_with_chr):
b_chrm_with_chr = False
if len(chrm) > 3 and chrm[:3] == "chr": ##Here remove the "chr"
b_chrm_with_chr = True
# print chrm, self.b_with_chr, b_chrm_with_chr ############################################################################
if b_with_chr == True and b_chrm_with_chr == True:
return chrm
elif b_with_chr == True and b_chrm_with_chr == False:
return "chr" + chrm
elif b_with_chr == False and b_chrm_with_chr == True:
return chrm[3:]
else:
return chrm
#m_hit save the position of in m_dict1
def cmp_dict(self, m_dict1, m_bcmk):
n_overlap = 0
m_unhit = {}
m_hit={}
m_hit_coordinate_bcmk={}
# print m_dict1.keys(), "dict keys"
# print m_bcmk.keys(), "bcmk keys"
b_contain_chr = False
if "chr1" in m_bcmk:
b_contain_chr = True
for chrm_1 in m_bcmk:
chrm = self.process_chrm_name(chrm_1, b_contain_chr)
if chrm not in m_dict1:
#print "chromosome {0} is not included in database\n".format(chrm)
if chrm not in m_unhit:
m_unhit[chrm]={}
for tmp_pos in m_bcmk[chrm_1]:
m_unhit[chrm][tmp_pos]=1
continue
for pos in m_bcmk[chrm_1]:
b_hit = False
for i in range(-1 * EXTND, EXTND):
if (pos + i) in m_dict1[chrm]:#
n_overlap += 1
b_hit = True
if chrm not in m_hit:
m_hit[chrm]={}
m_hit_coordinate_bcmk[chrm]={}
m_hit[chrm][pos+i]=1
m_hit_coordinate_bcmk[chrm][pos]=1
break
if b_hit == False:
if chrm not in m_unhit:
m_unhit[chrm] = {}
m_unhit[chrm][pos] = 1
#Here also need to check those in m_dic1, but not in m_bcmk
#and save them to m_unhit
# for chrm_1 in m_dict1:
# chrm = self.process_chrm_name(chrm_1, b_contain_chr)
# if chrm not in m_hit_coordinate_bcmk:
# for pos in m_dict1[]
return n_overlap, m_unhit, m_hit, m_hit_coordinate_bcmk
#exclude the sites
def exclude_sites(self, m_input, m_exclude, i_extnd):
m_new_input={}
for chrm in m_input:
for pos in m_input[chrm]:
if chrm not in m_exclude:
if chrm not in m_new_input:
m_new_input[chrm]={}
m_new_input[chrm][pos]=m_input[chrm][pos]
else:
b_hit=False
for tmp_pos in range(pos-i_extnd, pos+i_extnd):
if tmp_pos in m_exclude[chrm]:
b_hit=True
break
if b_hit==True:
continue
if chrm not in m_new_input:
m_new_input[chrm]={}
m_new_input[chrm][pos]=m_input[chrm][pos]
return m_new_input
####
#cmp results
def cmp_rslts(self, i_extd, sf_unhit, s_sample, m_exclude=None):
global EXTND
EXTND=i_extd
m_bcmk1 = self.load_sites(self.sf_bcmk)
m_input1 = self.load_sites(self.sf_input)
#here remove those sites in m_exclude
m_bcmk=m_bcmk1
m_input=m_input1
if m_exclude is not None:
m_bcmk =self.exclude_sites(m_bcmk1, m_exclude, i_extd)
m_input = self.exclude_sites(m_input1, m_exclude, i_extd)
n_bcmk, m_unhit1, m_hit1, m_hit_bcmk_pos1 = self.cmp_dict(m_bcmk, m_bcmk)
n_hit, m_unhit2, m_hit2, m_hit_bcmk_pos2 = self.cmp_dict(m_input, m_bcmk)
#n_hit_fp, m_unhit_fp, m_hit3 = self.cmp_dict(m_bcmk, m_input)
print("{0}/{1} hits/all".format(n_hit, n_bcmk))
with open(sf_unhit, "w") as fout_unhit:
for chrm in m_unhit2:
for pos in m_unhit2[chrm]:
fout_unhit.write(chrm + "\t" + str(pos) + "\n")
with open(sf_unhit+self._tp_suffix, "w") as fout_tp:
for chrm in m_hit2:
for pos in m_hit2[chrm]:
sinfo = "\t".join(m_input[chrm][pos])
fout_tp.write(chrm + "\t" + str(pos) + "\t" + sinfo + "\n")
with open(sf_unhit+s_sample+"."+self._tp_suffix, "w") as fout_tp2:
for chrm in m_hit_bcmk_pos2:
for pos in m_hit_bcmk_pos2[chrm]:
sinfo = "\t".join(m_bcmk[chrm][pos])
fout_tp2.write(chrm + "\t" + str(pos) + "\t" + sinfo + "\n")
# with open(sf_unhit+".false_positive", "w") as fout_fp:
# for chrm in m_unhit_fp:
# for pos in m_unhit_fp[chrm]:
# sinfo="\t".join(m_input[chrm][pos])
# fout_fp.write(chrm + "\t" + str(pos) + "\t" + sinfo+ "\n")
return n_hit
#cmp results
def cmp_rslts2(self, i_extd, sf_unhit, m_exclude=None):
global EXTND
EXTND=i_extd
m_bcmk1 = self.load_sites(self.sf_bcmk)
m_input1 = self.load_sites(self.sf_input)
# here remove those sites in m_exclude
m_bcmk = m_bcmk1
m_input = m_input1
if m_exclude is not None:
m_bcmk = self.exclude_sites(m_bcmk1, m_exclude, i_extd)
m_input = self.exclude_sites(m_input1, m_exclude, i_extd)
n_bcmk, m_unhit1, m_hit1, m_hit_bcmk_pos1 = self.cmp_dict(m_bcmk, m_bcmk)
n_hit, m_unhit2, m_hit2, m_hit_bcmk_pos2 = self.cmp_dict(m_input, m_bcmk)
n_hit_fp, m_unhit_fp, m_hit3, m_hit_bcmk_pos3 = self.cmp_dict(m_bcmk, m_input)
print("{0}/{1} hits/all".format(n_hit, n_bcmk))
with open(sf_unhit, "w") as fout_unhit:
for chrm in m_unhit2:
for pos in m_unhit2[chrm]:
fout_unhit.write(chrm + "\t" + str(pos) + "\n")
with open(sf_unhit+self._tp_suffix, "w") as fout_tp:
for chrm in m_hit2:
for pos in m_hit2[chrm]:
sinfo = "\t".join(m_input[chrm][pos])
fout_tp.write(chrm + "\t" + str(pos) + "\t" + sinfo + "\n")
with open(sf_unhit+self._fp_suffix, "w") as fout_fp:
for chrm in m_unhit_fp:
for pos in m_unhit_fp[chrm]:
if (chrm not in m_input) or (pos not in m_input[chrm]):
continue
sinfo="\t".join(m_input[chrm][pos])
fout_fp.write(chrm + "\t" + str(pos) + "\t" + sinfo+ "\n")
return n_hit
#####get the insertions fall in the focal region
def gnrt_ins_within_focal_region(self, sf_var, sf_hc_bed, sf_new_var):
if sf_hc_bed is None:
return
cmd="bedtools intersect -a {0} -b {1} > {2}".format(sf_hc_bed, sf_var, sf_new_var)
print(cmd)
subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE).communicate()
def set_input(self, sf_new):
self.sf_input=sf_new
####
def cnt_number_of_ins(self, sf_input):
n_cnt=0
with open(sf_input) as fin:
for line in fin:
if len(line) > 0 and line[0] == "#":
continue
n_cnt+=1
return n_cnt
####load in the result list
def load_in_rslt_list(self, sf_list):
m_rslts={}
with open(sf_list) as fin_list:
for line in fin_list:
fields=line.split()
sf_rslt=fields[0]
s_type=fields[1]
s_sample=fields[2]
if s_sample not in m_rslts:
m_rslts[s_sample]={}
if s_type not in m_rslts[s_sample]:
m_rslts[s_sample][s_type]=sf_rslt
return m_rslts
####
####
####
def check_one_sample_agnst_bcmk(sf_input, sf_bcmk, m_exclude_sites, sf_hc_bed, i_extd, s_type, sf_unhit, s_sample):
sf_hc_input = sf_input + ".focal_region.txt"
if sf_hc_bed is None:
sf_hc_input = sf_input
rslt_cmp = RsltCMP(sf_hc_input, sf_bcmk)
rslt_cmp.gnrt_ins_within_focal_region(sf_input, sf_hc_bed, sf_hc_input)
#here filter out those fall in "exlucded list"
n_ori_in = rslt_cmp.cnt_number_of_ins(sf_input)
n_hc_in = rslt_cmp.cnt_number_of_ins(sf_hc_input)
print("##########{0}############".format(s_type))
print("Original call set size: {0}\n".format(n_ori_in))
print("Fall in high confident regions: {0}\n".format(n_hc_in))
print("#########################")
n_hit=rslt_cmp.cmp_rslts(i_extd, sf_unhit, s_sample, m_exclude_sites)
return n_ori_in, n_hc_in, n_hit
####
####this one output the false positives to a separate file
def check_one_sample_agnst_bcmk2(sf_input, sf_bcmk, sf_exclude, sf_hc_bed, i_extd, s_type, sf_unhit):
sf_hc_input = sf_input + ".focal_region.txt"
if sf_hc_bed is None:
sf_hc_input = sf_input
rslt_cmp = RsltCMP(sf_hc_input, sf_bcmk)
rslt_cmp.gnrt_ins_within_focal_region(sf_input, sf_hc_bed, sf_hc_input)
m_exclude_sites = {}
if sf_exclude is not None and os.path.isfile(sf_exclude) == True:
m_exclude_sites = rslt_cmp.load_sites(sf_exclude)
n_ori_in = rslt_cmp.cnt_number_of_ins(sf_input)
n_hc_in = rslt_cmp.cnt_number_of_ins(sf_hc_input)
print("##########{0}############".format(s_type))
print("Original call set size: {0}\n".format(n_ori_in))
print("Fall in high confident regions: {0}\n".format(n_hc_in))
print("#########################")
n_hit=rslt_cmp.cmp_rslts2(i_extd, sf_unhit, m_exclude_sites)#
return n_ori_in, n_hc_in, n_hit
####
def gnrt_bed_from_input(sf_input, sf_new):
with open(sf_input) as fin, open(sf_new,"w") as fout_new:
for line in fin:
if len(line)>0 and line[0]=="#":#skip the header
continue
fields=line.split()
ins_chrm=fields[0]
ins_pos=int(fields[1])
s_line="{0}\t{1}\t{2}\n".format(ins_chrm, ins_pos, ins_pos+1)
fout_new.write(s_line)
####sf_list in format: sf_rslt s_rep_type sample
def cmp_for_list_of_rslts(sf_list, sf_bcmk, sf_exclude, sf_hc_bed, i_extd, sf_unhit):
m_cnt={}
l_sample_id=[]
m_tmp={}
rslt_cmp = RsltCMP(sf_list, sf_bcmk)
m_rslt_list = rslt_cmp.load_in_rslt_list(sf_list)
print(m_rslt_list)
n_bcmk=rslt_cmp.cnt_number_of_ins(sf_bcmk)
m_tp_fp_fn={}
m_exclude_sites = {}
if sf_exclude is not None and os.path.isfile(sf_exclude)==True:
m_exclude_sites=rslt_cmp.load_sites(sf_exclude)
for s_sample in m_rslt_list:
for s_rep_type in m_rslt_list[s_sample]:
if s_sample not in m_tmp:
l_sample_id.append(s_sample)
m_tmp[s_sample]=1
sf_input = m_rslt_list[s_sample][s_rep_type]
sf_new_input=sf_input+".tmp"
gnrt_bed_from_input(sf_input, sf_new_input)
n_ori_in, n_hc_in, n_hit=check_one_sample_agnst_bcmk(sf_new_input, sf_bcmk, m_exclude_sites, sf_hc_bed,
i_extd, s_rep_type, sf_unhit, s_sample)
if s_sample not in m_cnt:
m_cnt[s_sample]={}
m_cnt[s_sample][s_rep_type]=(n_ori_in, n_hc_in, n_hit)
####used for calc F1 score
n_sample_cnt=rslt_cmp.cnt_number_of_ins(sf_input)
n_tp=n_hit
n_fp=n_sample_cnt-n_tp
n_fn=n_bcmk-n_tp
if s_sample not in m_tp_fp_fn:
m_tp_fp_fn[s_sample]={}
m_tp_fp_fn[s_sample][s_rep_type]=(n_tp, n_fp, n_fn)
return m_cnt, l_sample_id, m_tp_fp_fn
####
####
def load_in_sample_id(sf_ids):
l_ids = []
with open(sf_ids) as fin_samples:
for line in fin_samples:
s_id = line.rstrip()
l_fields = s_id.split("_")
l_ids.append(l_fields[0])
return l_ids
####
def export_cnt_to_csv_for_point_chart(l_sample_info, m_cnt, sf_cnt):
sf_cnt1=sf_cnt+"_ori.csv"
sf_cnt2 = sf_cnt + "_fall_in_hc.csv"
sf_cnt3 = sf_cnt + "_hit.csv"
with open(sf_cnt1, "w") as fout_csv1, open(sf_cnt2, "w") as fout_csv2, open(sf_cnt3, "w") as fout_csv3:
s_head="Repeat\tMethod\tTP\tFP\n"
fout_csv1.write(s_head)
fout_csv2.write(s_head)
fout_csv3.write(s_head)
for s_sample in l_sample_info:
for s_type in m_cnt[s_sample]:
cnt1=m_cnt[s_sample][s_type][0]
fout_csv1.write(s_type+"\t"+s_sample+"\t"+str(cnt1)+"\n")
cnt2 = m_cnt[s_sample][s_type][1]
fout_csv2.write(s_type + "\t" + s_sample + "\t" + str(cnt2) + "\n")
cnt3 = m_cnt[s_sample][s_type][2]
fout_csv3.write(s_type + "\t" + s_sample + "\t" + str(cnt3) + "\n")
#
def calc_export_F1_score_sensitivity_FDR_csv(m_tp_fp_fn, sf_out):
with open(sf_out, "w") as fout_f1:
s_head="Method\tCoverage\tF1\tSensitivity\tFDR\tTP\tFP\tFN\n"
fout_f1.write(s_head)
for s_sample in m_tp_fp_fn:#for each sample
for s_type in m_tp_fp_fn[s_sample]:#for each repeat type
tp=m_tp_fp_fn[s_sample][s_type][0]
fp=m_tp_fp_fn[s_sample][s_type][1]
fn=m_tp_fp_fn[s_sample][s_type][2]
f1_score=(2.0*float(tp))/(2.0*float(tp)+float(fp)+float(fn))
tmp_fields=s_sample.split("_")
s_method=tmp_fields[0]
s_cov=tmp_fields[1]
f_sensitivity=float(tp)/(float(tp)+float(fn))
f_fdr=float(fp)/(float(fp)+float(tp))
fout_f1.write(s_method + "\t" + s_cov + "\t" + str(f1_score) + "\t" +
str(f_sensitivity) + "\t" + str(f_fdr) + "\t" + str(tp) +
"\t" + str(fp) + "\t" + str(fn) + "\n")
def calc_export_F1_score_sensitivity_FDR_csv2(m_tp_fp_fn, sf_out):
sf_out2=sf_out+".2"
with open(sf_out, "w") as fout_f1, open(sf_out2, "w") as fout_f2:
s_head = "Method\tCoverage\tF1\tSensitivity\tFDR\tCategory\tNumber of TE-Insertion\n"
fout_f1.write(s_head)
s_head2 = "Platform\tRatio\tCategory\n"
fout_f2.write(s_head2)
for s_sample in m_tp_fp_fn: # for each sample
for s_type in m_tp_fp_fn[s_sample]: # for each repeat type
tp = m_tp_fp_fn[s_sample][s_type][0]
fp = m_tp_fp_fn[s_sample][s_type][1]
fn = m_tp_fp_fn[s_sample][s_type][2]
f1_score = (2.0 * float(tp)) / (2.0 * float(tp) + float(fp) + float(fn))
tmp_fields = s_sample.split("_")
s_method = tmp_fields[0]
s_cov = tmp_fields[1]
f_sensitivity = float(tp) / (float(tp) + float(fn))
f_fdr = float(fp) / (float(fp) + float(tp))
fout_f1.write(s_method + "\t" + s_cov + "\t" + str(f1_score) + "\t" +
str(f_sensitivity) + "\t" + str(f_fdr) + "\tTP\t" + str(tp) + "\n")
fout_f1.write(s_method + "\t" + s_cov + "\t" + str(f1_score) + "\t" +
str(f_sensitivity) + "\t" + str(f_fdr) + "\tFP\t" + str(fp) + "\n")
fout_f2.write(s_method+"\t"+str(f_sensitivity)+"\tRecall\n")
fout_f2.write(s_method + "\t" + str(1-f_fdr) + "\t1-FDR\n")
# fout_f2.write(s_method + "\t" + s_cov + "\t" + str(f1_score) + "\t" +
# str(f_sensitivity) + "\t" + str(f_fdr) + "\tFN" + str(tp) +
# "\t" + str(fp) + "\t" + str(fn) + "\n")
####
####
def get_joint_info(sf_ori, sf_slct_sites, sf_joint):
m_slct={}
with open(sf_slct_sites) as fin_slct:
for line in fin_slct:
if len(line)>0 and line[0]=="#":
continue
fields=line.split()
chrm=fields[0]
pos=fields[1]
if chrm not in m_slct:
m_slct[chrm]={}
m_slct[chrm][pos]=1
with open(sf_ori) as fin_ori, open(sf_joint,"w") as fout:
for line in fin_ori:
if len(line)>0 and line[0]=="#":
continue
fields=line.split()
chrm=fields[0]
pos=fields[1]
if (chrm in m_slct) and (pos in m_slct[chrm]):
fout.write(line)
def get_joint_info2(sf_ori, sf_slct_sites, i_slack, sf_joint):
m_slct = {}
with open(sf_slct_sites) as fin_slct:
for line in fin_slct:
if len(line)>0 and line[0]=="#":
continue
fields = line.rstrip().split()
chrm = fields[0]
pos = int(fields[1])
if chrm not in m_slct:
m_slct[chrm] = {}
m_slct[chrm][pos] = 1
with open(sf_ori) as fin_ori, open(sf_joint, "w") as fout:
for line in fin_ori:
if len(line)>0 and line[0]=="#":
continue
fields = line.rstrip().split()
chrm = fields[0]
pos = int(fields[1])
for tmp_pos in range(pos-i_slack, pos+i_slack):
if (chrm in m_slct) and (tmp_pos in m_slct[chrm]):
fout.write(line)
break
####
####
def parse_option():
parser = OptionParser()
parser.add_option("-L",
action="store_true", dest="bgroup", default=False,
help="Input is a list of results")
parser.add_option("-i", "--input", dest="input",
help="Input file", metavar="FILE")
parser.add_option("--list", dest="list",
help="List of results", metavar="FILE")
parser.add_option("--bcmk", dest="bcmk",
help="Benchmark file", metavar="FILE")
parser.add_option("--bed", dest="focal_region", default=None,
help="Compare within the given region", metavar="FILE")
parser.add_option("--extnd", dest="extnd", type="int", default=200,
help="Slack value")
parser.add_option("--exclude", dest="exclude", default=None,
help="Sites need to be excluded", metavar="FILE")
parser.add_option("-o", "--output", dest="output",
help="The output file", metavar="FILE")
(options, args) = parser.parse_args()
return (options, args)
####
if __name__ == '__main__':
(options, args) = parse_option()#
b_group=options.bgroup
sf_list = options.list ####list of results of different repeat types in format: sf_rslt rep_type sample(each line)
sf_input = options.input
sf_bcmk = options.bcmk
sf_hc_bed = options.focal_region #the focal region
i_extd=options.extnd
sf_unhit = options.output
sf_exclude=options.exclude #pay attention that not check whether contain "chr" in chrm !!!!!!!!
if b_group is True:
m_cnt, l_sample_info, m_tp_fp_fn=cmp_for_list_of_rslts(sf_list, sf_bcmk, sf_exclude, sf_hc_bed, i_extd, sf_unhit)
#print m_cnt
sf_csv=sf_unhit
#l_sample_info=load_in_sample_id(sf_ids)
export_cnt_to_csv_for_point_chart(l_sample_info, m_cnt, sf_csv)
#calc and export F1 score
sf_f1_csv=sf_csv+".f1_score"
calc_export_F1_score_sensitivity_FDR_csv(m_tp_fp_fn, sf_f1_csv)
sf_f1_csv2=sf_csv+".tp_fp_barchart"
calc_export_F1_score_sensitivity_FDR_csv2(m_tp_fp_fn, sf_f1_csv2)
else:#single sample
s_type="None"
sf_new_input = sf_input + ".tmp"
gnrt_bed_from_input(sf_input, sf_new_input)
check_one_sample_agnst_bcmk2(sf_new_input, sf_bcmk, sf_exclude, sf_hc_bed, i_extd, s_type, sf_unhit)
#generate the false positive list (with info)
sf_slct_sites=sf_unhit+_fp_suffix
sf_joint=sf_slct_sites+"._with_info"
get_joint_info(sf_input, sf_slct_sites, sf_joint)
#generate the true positive list (with info), from the benchmark
sf_tp_sites=sf_unhit+_tp_suffix
sf_joint_tp=sf_tp_sites+"._with_info"
i_slack=i_extd
get_joint_info2(sf_bcmk, sf_tp_sites, i_slack, sf_joint_tp)
#generate the false negative list(with info)
sf_joint_fn=sf_unhit+_fn_suffix+"._with_info"
i_slack=i_extd
get_joint_info2(sf_bcmk, sf_unhit, i_slack, sf_joint_fn)
####