Skip to content

Commit

Permalink
Working argparse
Browse files Browse the repository at this point in the history
  • Loading branch information
Kevinlega committed Dec 15, 2017
1 parent a0404c0 commit fea6bd5
Show file tree
Hide file tree
Showing 3 changed files with 26,921 additions and 50 deletions.
124 changes: 74 additions & 50 deletions dbg.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# import multiprocessing as mp
import argparse
import collections, sys
import collections
from Bio import Seq, SeqIO, SeqRecord

# Create reverse complement
Expand All @@ -26,23 +26,23 @@ def bw(km):

# count kmers and build dictionary with counts
# use limit as cut off of very down regulated seq
def build(f,k=31,limit=1):
def build(fn,k=31,limit=1):
d = collections.defaultdict(int)

# For each filename in input parse fast.q file
# for f in fn:
reads = SeqIO.parse(f,'fastq')
# Extract reads
for read in reads:
seq_s = str(read.seq)
seq_l = seq_s.split('N')
#
for seq in seq_l:
for km in kmers(seq,k):
d[km] +=1
seq = twin(seq)
for km in kmers(seq,k):
d[km] += 1
for f in fn:
reads = SeqIO.parse(f,'fastq')
# Extract reads
for read in reads:
seq_s = str(read.seq)
seq_l = seq_s.split('N')
#
for seq in seq_l:
for km in kmers(seq,k):
d[km] +=1
seq = twin(seq)
for km in kmers(seq,k):
d[km] += 1

# Remove k-mer elements of dict that
d1 = [x for x in d if d[x] <= limit]
Expand Down Expand Up @@ -143,54 +143,78 @@ def kmer_count2(cs,d, k, id, file):
def kmer_count(cs,d, k, id, file):
for x in range(0,len(cs)+1-k):
key = cs[x:x+k]
# "F 1 * 0 538 1 32 ATGCGCTCGCTCGCTGAGCTGAC A:i:234"
count = d[key]
# write the fragment twice for organism A and maybe B
# <fragment> <- F <sid:id> <external:ref> <sbeg:pos> <send:pos> <fbeg:pos> <fend:pos> <alignment> <tag>*
file.write("F\t%s\tA:%d\t%d\t%d\t%d\t%s\t*\n"%(id,d[key][0],len(cs),x,x+k,key))

file.write("F\t%s\tB:%d\t%d\t%d\t%d\t%s\t*\n"%(id,d[key][1],len(cs),x,x+k,key))

# Write to line
def write_GFA2(G,cs,k,d):
filename = "test.gfa"
with open(filename, "w+") as file:
def write_GFA2(G,cs,k,d):
global args
if args.output: # If the output file name is given use it
filename = args.output
else: # else use standard one
filename = "output.gfa"

with open(filename, "w+") as file: # Open a file

file.write("H VN:Z:2.0\n")
for i,x in enumerate(cs):
file.write("S\t%d\t%d\t%s\t\n"%(i,len(x), x ))
kmer_count(x,d,k,i,file)
file.write("H VN:Z:2.0\n") # Write the header with the GFA version
for i,x in enumerate(cs): # Get the one contig and a number id for the contig
file.write("S\t%d\t%d\t%s\t\n"%(i,len(x), x )) # Write the segment(contig) <segment> <- S <sid:id> <slen:int> <sequence> <tag>*
kmer_count(x,d,k,i,file) # Function to get the fragments of organism A and B if included

for i in G:
for i in G: # Write the links
for j,o in G[i][0]:
file.write("L\t%d\t+\t%d\t%s\t%dM\n"%(i,j,o,k-1))
for j,o in G[i][1]:
file.write("L\t%d\t-\t%d\t%s\t%dM\n"%(i,j,o,k-1))

for i in d.keys():
file.write("#\t%s\tRC A:%d B:%d\n"%(i,d[i][0],d[i][1]))

def write_GFA(G,cs,k,d):
filename = "test.gfa"
with open(filename, "w+") as file:
file.write("H VN:Z:1.0")
for i,x in enumerate(cs):
print("S\t%d\t%s\t*\n"%(i,x))
file.write("#\t%s\tRC A:%d B:%d\n"%(i,d[i][0],d[i][1])) # Print the dictionary with the count of how many times it kmer appears


# def print_GFA(G,cs,k,d):

# with open(filename, "w+") as file:
# file.write("H VN:Z:1.0")
# for i,x in enumerate(cs):
# print("S\t%d\t%s\t*\n"%(i,x))

for i in G:
for j,o in G[i][0]:
print("L\t%d\t+\t%d\t%s\t%dM\n"%(i,j,o,k-1))
for j,o in G[i][1]:
print("L\t%d\t-\t%d\t%s\t%dM\n"%(i,j,o,k-1))
# for i in G:
# for j,o in G[i][0]:
# print("L\t%d\t+\t%d\t%s\t%dM\n"%(i,j,o,k-1))
# for j,o in G[i][1]:
# print("L\t%d\t-\t%d\t%s\t%dM\n"%(i,j,o,k-1))

for i in d.keys():
print("#\t%s\tRC A:%d B:%d"%(i,d[i][0],d[i][1]))
# for i in d.keys():
# print("#\t%s\tRC A:%d B:%d"%(i,d[i][0],d[i][1]))


if __name__ == "__main__":
if len(sys.argv) < 3: exit("args: <k> <reads_1.fq> ...")
k = int(sys.argv[1])
# d = build(sys.argv[2:], k, 1)
dA = build(sys.argv[2], k, 1)
dB = build(sys.argv[3], k, 1)

def main():
global args

dA = build(args.A, args.k, 1)

dB = build(args.B, args.k, 1)
d = merge_dicts(dA, dB)
with open('dict_of_ratio_1.txt', "w+") as f:
f.write(str(d))
G,cs = all_contigs(d,k)
write_GFA2(G,cs,k,d)
# with open('dict_of_ratio_1.txt', "w+") as f:
# f.write(str(d))
G,cs = all_contigs(d,args.k)
# print(cs)
write_GFA2(G,cs,args.k,d)


parser = argparse.ArgumentParser(description="Creates a GFA file with one or two organisms given a kmer size")
parser.add_argument("-k", type=int, required=True, help="the kmer size")
parser.add_argument("-A", nargs='+', required=True, help="Organism_A_files")
parser.add_argument("-B", nargs='+', required=True, help="Organism_B_files")
parser.add_argument("-output",required=False,help="Output GFA file name")
args = parser.parse_args()



if __name__ == "__main__":
main()
Loading

0 comments on commit fea6bd5

Please sign in to comment.