-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchlamy_protein2dna.pl
83 lines (70 loc) · 1.83 KB
/
chlamy_protein2dna.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/perl
# Script to convert protein seq into codon optimized dna for Chlamydomonas Reinhardtii
# Usage: chlamy_protein2dna.pl fastaFile dnaFile
# HB: 2017/07
if ($#ARGV < 1) {
print "Usage: cr_protein2dna.pl fastaFile dnaFile\n";
}
$fasta_file = $ARGV[0];
$dna_file = $ARGV[1];
my $prot = read_fasta_file($fasta_file);
print "Codon Usage Table: http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?species=3055\n";
print "Sequence:\n";
my $dna = aa2dna($prot);
open (OUT, ">$dna_file") or die "can't open $dna_file: $!\n";
print OUT "$dna\n";
close OUT;
print "Done\n";
sub aa2dna {
my ($aa) = shift;
$aa = uc $aa;
my (%opt_codon) = (
'S' => 'AGC', # Serine
'F' => 'TTC', # Phenylalanine
'L' => 'CTG', # Leucine
'Y' => 'TAC', # Tyrosine
'C' => 'TGC', # Cystein
'W' => 'TGG', # Tryptophan
'P' => 'CCC', # Proline
'H' => 'CAC', # Histidine
'Q' => 'CAG', # Glutamine
'R' => 'CGC', # Arginine
'I' => 'ATC', # Isoleucine
'M' => 'ATG', # Methionine
'T' => 'ACC', # Threonine
'N' => 'AAC', # Asparagine
'K' => 'AAG', # Lysine
'V' => 'GTG', # Valine
'A' => 'GCC', # Alanine
'D' => 'GAC', # Aspartic Acid
'E' => 'GAG', # Glutamic Acid
'G' => 'GGC' # Glycine
);
my @seq = split(//, $aa);
my $dna = '';
foreach $residue (@seq) {
if(exists $opt_codon{$residue}){
$dna = $dna . $opt_codon{$residue};
print "$residue => $opt_codon{$residue}\n"
} else {
print STDERR "Unrecognized residule !!\n";
exit;
}
}
return $dna;
}
sub read_fasta_file {
my $fasta_file = shift;
my $seq = "";
open (IN, $fasta_file) or die "can't open $fasta_file: $!\n";
while (<IN>) {
if (/>/) {
print "Parsing Fasta Header ...\n";
} else {
s/\s+//g; # remove whitespace
$seq .= $_; # add sequence
}
}
close IN; # finished with file
return $seq;
}