Skip to content

Commit

Permalink
Added option --unaligned to goalign trim name
Browse files Browse the repository at this point in the history
  • Loading branch information
fredericlemoine committed May 4, 2021
1 parent 87ff7b0 commit df72af1
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 29 deletions.
54 changes: 39 additions & 15 deletions cmd/name.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (

var trimMapout string
var trimAuto bool
var trimUnaligned bool

// nameCmd represents the name command
var nameCmd = &cobra.Command{
Expand All @@ -35,10 +36,6 @@ Id -a is given, then names are generated with the pattern "S000<i>".
var aligns *align.AlignChannel
var f *os.File

if aligns, err = readalign(infile); err != nil {
io.LogError(err)
return
}
if f, err = openWriteFile(trimAlignOut); err != nil {
io.LogError(err)
return
Expand All @@ -47,25 +44,55 @@ Id -a is given, then names are generated with the pattern "S000<i>".

namemap := make(map[string]string)
curid := 1
for al := range aligns.Achan {
if aligns.Err != nil {
err = aligns.Err

if trimUnaligned {
var seqs align.SeqBag
if seqs, err = readsequences(infile); err != nil {
io.LogError(err)
return
}

if trimAuto {
if err = al.TrimNamesAuto(namemap, &curid); err != nil {
if err = seqs.TrimNamesAuto(namemap, &curid); err != nil {
io.LogError(err)
return
}
} else {
if err = al.TrimNames(namemap, trimNb); err != nil {
if err = seqs.TrimNames(namemap, trimNb); err != nil {
io.LogError(err)
return
}
}
writeAlign(al, f)
writeSequences(seqs, f)
} else {
if aligns, err = readalign(infile); err != nil {
io.LogError(err)
return
}
for al := range aligns.Achan {
if aligns.Err != nil {
err = aligns.Err
io.LogError(err)
return
}

if trimAuto {
if err = al.TrimNamesAuto(namemap, &curid); err != nil {
io.LogError(err)
return
}
} else {
if err = al.TrimNames(namemap, trimNb); err != nil {
io.LogError(err)
return
}
}
writeAlign(al, f)
}

if aligns.Err != nil {
err = aligns.Err
io.LogError(err)
}
}
if trimMapout != "none" {
if err = writeNameMap(namemap, trimMapout); err != nil {
Expand All @@ -74,10 +101,6 @@ Id -a is given, then names are generated with the pattern "S000<i>".
}
}

if aligns.Err != nil {
err = aligns.Err
io.LogError(err)
}
return
},
}
Expand All @@ -102,6 +125,7 @@ func writeNameMap(namemap map[string]string, outfile string) (err error) {
func init() {
trimCmd.AddCommand(nameCmd)
nameCmd.PersistentFlags().StringVarP(&trimMapout, "out-map", "m", "none", "Mapping output file")
nameCmd.PersistentFlags().BoolVar(&trimUnaligned, "unaligned", false, "Considers sequences as unaligned and format fasta (phylip, nexus,... options are ignored)")
nameCmd.PersistentFlags().IntVarP(&trimNb, "nb-char", "n", 1, "Number of characters to keep in sequence names")
nameCmd.PersistentFlags().BoolVarP(&trimAuto, "auto", "a", false, "Automatically generates sequence identifiers (priority over --nb-cchar)")
}
44 changes: 30 additions & 14 deletions docs/commands/trim.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
This command trims names of sequences or sequences themselves.

Two sub-commands:
* `goalign trim name`: trims sequence names to n characters. It will also output the correspondance between old names and new names into a map file as well as the new alignment. If `-a` is given, then generates sequence names automatically.
* `goalign trim name`: trims sequence names to n characters. It will also output the correspondance between old names and new names into a map file as well as the new alignment. If `-a` is given, then generates sequence names automatically. If `--unaligned` is given, sequences are considered unaligned.
* `goalign trim seq`: trims sequences from the left or from the right side, by n characters.

#### Usage
Expand All @@ -33,17 +33,24 @@ Usage:
goalign trim name [flags]
Flags:
-a, --auto Automatically generates random sequence identifiers (priority over --nb-cchar)
-a, --auto Automatically generates sequence identifiers (priority over --nb-cchar)
-h, --help help for name
-n, --nb-char int Number of characters to keep in sequence names (default 1)
-m, --out-map string Mapping output file (default "none")
--unaligned Considers sequences as unaligned and format fasta (phylip, nexus,... options are ignored)
Global Flags:
-i, --align string Alignment input file (default "stdin")
-o, --out-align string Renamed alignment output file (default "stdout")
-p, --phylip Alignment is in phylip? False=Fasta
--input-strict Strict phylip input format (only used with -p)
--output-strict Strict phylip output format (only used with -p)
-i, --align string Alignment input file (default "stdin")
--auto-detect Auto detects input format (overrides -p, -x and -u)
-u, --clustal Alignment is in clustal? default fasta
--ignore-identical int Ignore duplicated sequences that have the same name and potentially have same sequences, 0 : Does not ignore anything, 1: Ignore sequences having the same name (keep the first one whatever their sequence), 2: Ignore sequences having the same name and the same sequence
--input-strict Strict phylip input format (only used with -p)
-x, --nexus Alignment is in nexus? default fasta
--no-block Write Phylip sequences without space separated blocks (only used with -p)
--one-line Write Phylip sequences on 1 line (only used with -p)
-o, --out-align string Renamed alignment output file (default "stdout")
--output-strict Strict phylip output format (only used with -p)
-p, --phylip Alignment is in phylip? default fasta
```


Expand All @@ -53,15 +60,24 @@ Usage:
goalign trim seq [flags]
Flags:
-s, --from-start If true: trims n char from the left, otherwise from the right
-s, --from-start If true: trims n char from the start, else from the end
-h, --help help for seq
-n, --nb-char int Number of characters to trim from sequences (default 1)
Global Flags:
-i, --align string Alignment input file (default "stdin")
-o, --out-align string Renamed alignment output file (default "stdout")
-p, --phylip Alignment is in phylip? False=Fasta
--input-strict Strict phylip input format (only used with -p)
--output-strict Strict phylip output format (only used with -p)
-i, --align string Alignment input file (default "stdin")
--auto-detect Auto detects input format (overrides -p, -x and -u)
-u, --clustal Alignment is in clustal? default fasta
--ignore-identical int Ignore duplicated sequences that have the same name and potentially have same sequences, 0 : Does not ignore anything, 1: Ignore sequences having the same name (keep the first one whatever their sequence), 2: Ignore sequences having the same name and the same sequence
--input-strict Strict phylip input format (only used with -p)
-x, --nexus Alignment is in nexus? default fasta
--no-block Write Phylip sequences without space separated blocks (only used with -p)
--one-line Write Phylip sequences on 1 line (only used with -p)
-o, --out-align string Renamed alignment output file (default "stdout")
--output-strict Strict phylip output format (only used with -p)
-p, --phylip Alignment is in phylip? default fasta
--seed int Random Seed: -1 = nano seconds since 1970/01/01 00:00:00 (default -1)
-t, --threads int Number of threads (default 1)
```

#### Examples
Expand Down
65 changes: 65 additions & 0 deletions test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2730,6 +2730,71 @@ diff -q -b result expected
diff -q -b <(sort mapfile) <(sort mapfile2)
rm -f expected result mapfile input mapfile2

echo "->goalign trim name unaligned"
cat > input <<EOF
>Seq0000
GATTA
>Seq0001
ATTT
>Seq0002
CCG
>Seq0003
GG
EOF
cat > expected <<EOF
>S01
GATTA
>S02
ATTT
>S03
CCG
>S04
GG
EOF
cat > expectedmap <<EOF
Seq0002 S03
Seq0003 S04
Seq0000 S01
Seq0001 S02
EOF
${GOALIGN} trim name -n 3 -m mapfile --unaligned -i input > result
diff -q -b result expected
diff -q -b <(sort mapfile) <(sort expectedmap)
rm -f expected result expectedmap mapfile


echo "->goalign trim name auto unaligned"
cat > input <<EOF
>Seq0000
GATTA
>Seq0001
ATTT
>Seq0002
CCG
>Seq0003
GG
EOF
cat > expected <<EOF
>S1
GATTA
>S2
ATTT
>S3
CCG
>S4
GG
EOF
cat > expectedmap <<EOF
Seq0002 S3
Seq0003 S4
Seq0000 S1
Seq0001 S2
EOF
${GOALIGN} trim name -a --unaligned -m mapfile -i input > result
diff -q -b result expected
diff -q -b <(sort mapfile) <(sort expectedmap)
rm -f expected result expectedmap mapfile


echo "->goalign trim seq"
cat > expected <<EOF
Expand Down

0 comments on commit df72af1

Please sign in to comment.