diff --git a/cmd/name.go b/cmd/name.go index 9340f35..e002b9c 100644 --- a/cmd/name.go +++ b/cmd/name.go @@ -10,6 +10,7 @@ import ( var trimMapout string var trimAuto bool +var trimUnaligned bool // nameCmd represents the name command var nameCmd = &cobra.Command{ @@ -35,10 +36,6 @@ Id -a is given, then names are generated with the pattern "S000". var aligns *align.AlignChannel var f *os.File - if aligns, err = readalign(infile); err != nil { - io.LogError(err) - return - } if f, err = openWriteFile(trimAlignOut); err != nil { io.LogError(err) return @@ -47,25 +44,55 @@ Id -a is given, then names are generated with the pattern "S000". namemap := make(map[string]string) curid := 1 - for al := range aligns.Achan { - if aligns.Err != nil { - err = aligns.Err + + if trimUnaligned { + var seqs align.SeqBag + if seqs, err = readsequences(infile); err != nil { io.LogError(err) return } - if trimAuto { - if err = al.TrimNamesAuto(namemap, &curid); err != nil { + if err = seqs.TrimNamesAuto(namemap, &curid); err != nil { io.LogError(err) return } } else { - if err = al.TrimNames(namemap, trimNb); err != nil { + if err = seqs.TrimNames(namemap, trimNb); err != nil { io.LogError(err) return } } - writeAlign(al, f) + writeSequences(seqs, f) + } else { + if aligns, err = readalign(infile); err != nil { + io.LogError(err) + return + } + for al := range aligns.Achan { + if aligns.Err != nil { + err = aligns.Err + io.LogError(err) + return + } + + if trimAuto { + if err = al.TrimNamesAuto(namemap, &curid); err != nil { + io.LogError(err) + return + } + } else { + if err = al.TrimNames(namemap, trimNb); err != nil { + io.LogError(err) + return + } + } + writeAlign(al, f) + } + + if aligns.Err != nil { + err = aligns.Err + io.LogError(err) + } } if trimMapout != "none" { if err = writeNameMap(namemap, trimMapout); err != nil { @@ -74,10 +101,6 @@ Id -a is given, then names are generated with the pattern "S000". } } - if aligns.Err != nil { - err = aligns.Err - io.LogError(err) - } return }, } @@ -102,6 +125,7 @@ func writeNameMap(namemap map[string]string, outfile string) (err error) { func init() { trimCmd.AddCommand(nameCmd) nameCmd.PersistentFlags().StringVarP(&trimMapout, "out-map", "m", "none", "Mapping output file") + nameCmd.PersistentFlags().BoolVar(&trimUnaligned, "unaligned", false, "Considers sequences as unaligned and format fasta (phylip, nexus,... options are ignored)") nameCmd.PersistentFlags().IntVarP(&trimNb, "nb-char", "n", 1, "Number of characters to keep in sequence names") nameCmd.PersistentFlags().BoolVarP(&trimAuto, "auto", "a", false, "Automatically generates sequence identifiers (priority over --nb-cchar)") } diff --git a/docs/commands/trim.md b/docs/commands/trim.md index 4868f43..db7485b 100644 --- a/docs/commands/trim.md +++ b/docs/commands/trim.md @@ -6,7 +6,7 @@ This command trims names of sequences or sequences themselves. Two sub-commands: -* `goalign trim name`: trims sequence names to n characters. It will also output the correspondance between old names and new names into a map file as well as the new alignment. If `-a` is given, then generates sequence names automatically. +* `goalign trim name`: trims sequence names to n characters. It will also output the correspondance between old names and new names into a map file as well as the new alignment. If `-a` is given, then generates sequence names automatically. If `--unaligned` is given, sequences are considered unaligned. * `goalign trim seq`: trims sequences from the left or from the right side, by n characters. #### Usage @@ -33,17 +33,24 @@ Usage: goalign trim name [flags] Flags: - -a, --auto Automatically generates random sequence identifiers (priority over --nb-cchar) + -a, --auto Automatically generates sequence identifiers (priority over --nb-cchar) + -h, --help help for name -n, --nb-char int Number of characters to keep in sequence names (default 1) -m, --out-map string Mapping output file (default "none") + --unaligned Considers sequences as unaligned and format fasta (phylip, nexus,... options are ignored) Global Flags: - -i, --align string Alignment input file (default "stdin") - -o, --out-align string Renamed alignment output file (default "stdout") - -p, --phylip Alignment is in phylip? False=Fasta - --input-strict Strict phylip input format (only used with -p) - --output-strict Strict phylip output format (only used with -p) - + -i, --align string Alignment input file (default "stdin") + --auto-detect Auto detects input format (overrides -p, -x and -u) + -u, --clustal Alignment is in clustal? default fasta + --ignore-identical int Ignore duplicated sequences that have the same name and potentially have same sequences, 0 : Does not ignore anything, 1: Ignore sequences having the same name (keep the first one whatever their sequence), 2: Ignore sequences having the same name and the same sequence + --input-strict Strict phylip input format (only used with -p) + -x, --nexus Alignment is in nexus? default fasta + --no-block Write Phylip sequences without space separated blocks (only used with -p) + --one-line Write Phylip sequences on 1 line (only used with -p) + -o, --out-align string Renamed alignment output file (default "stdout") + --output-strict Strict phylip output format (only used with -p) + -p, --phylip Alignment is in phylip? default fasta ``` @@ -53,15 +60,24 @@ Usage: goalign trim seq [flags] Flags: - -s, --from-start If true: trims n char from the left, otherwise from the right + -s, --from-start If true: trims n char from the start, else from the end + -h, --help help for seq -n, --nb-char int Number of characters to trim from sequences (default 1) Global Flags: - -i, --align string Alignment input file (default "stdin") - -o, --out-align string Renamed alignment output file (default "stdout") - -p, --phylip Alignment is in phylip? False=Fasta - --input-strict Strict phylip input format (only used with -p) - --output-strict Strict phylip output format (only used with -p) + -i, --align string Alignment input file (default "stdin") + --auto-detect Auto detects input format (overrides -p, -x and -u) + -u, --clustal Alignment is in clustal? default fasta + --ignore-identical int Ignore duplicated sequences that have the same name and potentially have same sequences, 0 : Does not ignore anything, 1: Ignore sequences having the same name (keep the first one whatever their sequence), 2: Ignore sequences having the same name and the same sequence + --input-strict Strict phylip input format (only used with -p) + -x, --nexus Alignment is in nexus? default fasta + --no-block Write Phylip sequences without space separated blocks (only used with -p) + --one-line Write Phylip sequences on 1 line (only used with -p) + -o, --out-align string Renamed alignment output file (default "stdout") + --output-strict Strict phylip output format (only used with -p) + -p, --phylip Alignment is in phylip? default fasta + --seed int Random Seed: -1 = nano seconds since 1970/01/01 00:00:00 (default -1) + -t, --threads int Number of threads (default 1) ``` #### Examples diff --git a/test.sh b/test.sh index cf7055e..443a5d4 100755 --- a/test.sh +++ b/test.sh @@ -2730,6 +2730,71 @@ diff -q -b result expected diff -q -b <(sort mapfile) <(sort mapfile2) rm -f expected result mapfile input mapfile2 +echo "->goalign trim name unaligned" +cat > input <Seq0000 +GATTA +>Seq0001 +ATTT +>Seq0002 +CCG +>Seq0003 +GG +EOF +cat > expected <S01 +GATTA +>S02 +ATTT +>S03 +CCG +>S04 +GG +EOF +cat > expectedmap < result +diff -q -b result expected +diff -q -b <(sort mapfile) <(sort expectedmap) +rm -f expected result expectedmap mapfile + + +echo "->goalign trim name auto unaligned" +cat > input <Seq0000 +GATTA +>Seq0001 +ATTT +>Seq0002 +CCG +>Seq0003 +GG +EOF +cat > expected <S1 +GATTA +>S2 +ATTT +>S3 +CCG +>S4 +GG +EOF +cat > expectedmap < result +diff -q -b result expected +diff -q -b <(sort mapfile) <(sort expectedmap) +rm -f expected result expectedmap mapfile + echo "->goalign trim seq" cat > expected <