Skip to content

Commit

Permalink
Corrected partition string + Added partitions to bootstrap
Browse files Browse the repository at this point in the history
  • Loading branch information
fredericlemoine committed Jan 14, 2020
1 parent 251fe46 commit 732ab49
Show file tree
Hide file tree
Showing 6 changed files with 161 additions and 108 deletions.
6 changes: 5 additions & 1 deletion align/align.go
Original file line number Diff line number Diff line change
Expand Up @@ -1389,7 +1389,11 @@ func (a *align) Split(part *PartitionSet) (als []Alignment, err error) {
alsimpl[pi].seqs[si].sequence = append(alsimpl[pi].seqs[si].sequence, seq.sequence[pos])
}
}
alsimpl[pi].length++
if firstpos {
alsimpl[pi].length = 1
} else {
alsimpl[pi].length++
}
firstpos = false
}
}
Expand Down
16 changes: 10 additions & 6 deletions align/partition.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,6 @@ func (ps *PartitionSet) CheckSites() (err error) {

func (ps *PartitionSet) String() string {
var buffer bytes.Buffer
for j, p := range ps.partitions {
fmt.Println(j, p)
}

for i, pn := range ps.names {
buffer.WriteString(ps.models[i])
buffer.WriteString(",")
Expand All @@ -91,7 +87,6 @@ func (ps *PartitionSet) String() string {
for j, p := range ps.partitions {

if p == i {
fmt.Println(pn, i)
if start == -1 {
start = j
end = j
Expand Down Expand Up @@ -148,7 +143,7 @@ func (ps *PartitionSet) Partition(position int) int {
return ps.partitions[position]
}

// Returns the name of the partition associated to the given code
// Returns the name of the partition associated to the given index
// If the code does not exist, then returns ""
func (ps *PartitionSet) PartitionName(code int) string {
if code < 0 || code > len(ps.names) {
Expand All @@ -157,6 +152,15 @@ func (ps *PartitionSet) PartitionName(code int) string {
return ps.names[code]
}

// Returns the name of the modele associated to the given index
// If the code does not exist, then returns ""
func (ps *PartitionSet) ModeleName(code int) string {
if code < 0 || code > len(ps.models) {
return ""
}
return ps.models[code]
}

// returns the length of the alignment
func (ps *PartitionSet) AliLength() int {
return ps.length
Expand Down
167 changes: 91 additions & 76 deletions cmd/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,21 @@ import (
"compress/gzip"
"errors"
"fmt"
"github.com/spf13/cobra"
"os"
"sync"
"time"

"github.com/evolbioinfo/goalign/align"
"github.com/evolbioinfo/goalign/io"
"github.com/spf13/cobra"
)

var bootstrapNb int
var bootstrapoutprefix string
var bootstrapOrder bool
var bootstraptar bool
var bootstrapgz bool

type outboot struct {
bootstr string
name string
}
var bootstrappartitionstr string
var bootstrapoutputpartitionstr string

// seqbootCmd represents the bootstrap command
var seqbootCmd = &cobra.Command{
Expand All @@ -44,18 +40,26 @@ The input may be a Phylip or Fasta file.
files will be in fasta format as well.
- It is possible to give a initial seed (--seed). In this case several runs of
the tool will give the exact same results (if run on 1 thread, an thus
computations are done on a single thread in this case).
the tool will give the exact same results.
Example of usage:
goalign build seqboot -i align.phylip -p -n 500 -o boot --tar-gz
goalign build seqboot -i align.phylip -p -n 500 -o boot_
`,
RunE: func(cmd *cobra.Command, args []string) (err error) {
var aligns *align.AlignChannel
var alignChan *align.AlignChannel
var aligns []align.Alignment
var al align.Alignment
var f *os.File
var tw *tar.Writer
var gw *gzip.Writer
var inputpartition, outputpartition *align.PartitionSet
var bootstring string
var boot, tmpboot align.Alignment

if aligns, err = readalign(infile); err != nil {
// We read input alignment
if alignChan, err = readalign(infile); err != nil {
io.LogError(err)
return
}
Expand All @@ -66,70 +70,34 @@ goalign build seqboot -i align.phylip -p -n 500 -o boot_
return
}

var f *os.File
var tw *tar.Writer
var gw *gzip.Writer

align, _ := <-aligns.Achan
if aligns.Err != nil {
err = aligns.Err
// We take the first alignment of the channel
al, _ = <-alignChan.Achan
if alignChan.Err != nil {
err = alignChan.Err
io.LogError(err)
return
}

bootidx := make(chan int, 100)
outchan := make(chan outboot, 100)

cpus := rootcpus
if bootstraptar {
cpus = min_int(1, cpus-1)
}

go func() {
for i := 0; i < bootstrapNb; i++ {
bootidx <- i
// If a partition file is given, then we parse it
if bootstrappartitionstr != "none" {
if inputpartition, err = parsePartition(bootstrappartitionstr, al.Length()); err != nil {
io.LogError(err)
return
}
close(bootidx)
}()

var wg sync.WaitGroup // For waiting end of step computation
// Seed is set => 1 thread
if cmd.Flags().Changed("seed") {
cpus = 1
}
for i := 0; i < cpus; i++ {
wg.Add(1)
go func() {
var bootstring string
for idx := range bootidx {
bootid := bootstrapoutprefix + fmt.Sprintf("%d", idx)
boot := align.BuildBootstrap()
if bootstrapOrder {
boot.ShuffleSequences()
}

bootstring = writeAlignString(boot)

// Output
if bootstraptar {
outchan <- outboot{bootstring, bootid}
} else {
if err2 := writenewfile(bootid, bootstrapgz, bootstring); err2 != nil {
io.LogError(err2)
err = err2
return
}
}
}
wg.Done()
}()
if err = inputpartition.CheckSites(); err != nil {
io.LogError(err)
return
}
if aligns, err = al.Split(inputpartition); err != nil {
io.LogError(err)
return
}
outputpartition = align.NewPartitionSet(al.Length())
//fmt.Println(bootstrappartition.String())
} else {
aligns = []align.Alignment{al}
}

go func() {
wg.Wait()
close(outchan)
}()

// Create new tar(/gz) file
if bootstraptar {
if bootstrapgz {
Expand All @@ -152,27 +120,72 @@ goalign build seqboot -i align.phylip -p -n 500 -o boot_
defer tw.Close()
}

idx := 0
for oboot := range outchan {
for idx := 0; idx < bootstrapNb; idx++ {
boot = nil
bootid := bootstrapoutprefix + fmt.Sprintf("%d", idx)
// There may be several alignments to process if there are
// several partitions. We generate bootstrap replicates
// for each partition, and then concatenate them all.
for _, a := range aligns {
tmpboot = a.BuildBootstrap()
if boot == nil {
boot = tmpboot
} else {
if err = boot.Concat(tmpboot); err != nil {
io.LogError(err)
return
}
}
}
// We shuffle sequence order
if bootstrapOrder {
boot.ShuffleSequences()
}

bootstring = writeAlignString(boot)

// Output
if bootstraptar {
if err = addstringtotargz(tw, oboot.name, oboot.bootstr); err != nil {
if err = addstringtotargz(tw, bootid, bootstring); err != nil {
io.LogError(err)
return
}
} else {
if err = writenewfile(bootid+alignExtension(), bootstrapgz, bootstring); err != nil {
io.LogError(err)
return
}
}
idx++
}

var start, end int = 0, 0
if outputpartition != nil {
for i, a := range aligns {
start = end
end = start + a.Length()
// We initialize an outputpartition
// Which will have all the sites of each
// partition grouped together.
outputpartition.AddRange(
inputpartition.PartitionName(i),
inputpartition.ModeleName(i),
start, end-1, 1)
}
if bootstrapoutputpartitionstr == "" {
bootstrapoutputpartitionstr = bootstrappartitionstr + "_boot"
}
writenewfile(bootstrapoutputpartitionstr, false, outputpartition.String())
}

return
},
}

func writenewfile(name string, gz bool, bootstring string) (err error) {
var f *os.File

ext := alignExtension()

if gz {
if f, err = os.Create(name + ext + ".gz"); err != nil {
if f, err = os.Create(name + ".gz"); err != nil {
return
} else {
gw := gzip.NewWriter(f)
Expand All @@ -183,7 +196,7 @@ func writenewfile(name string, gz bool, bootstring string) (err error) {
f.Close()
}
} else {
if f, err = os.Create(name + ext); err != nil {
if f, err = os.Create(name); err != nil {
return
} else {
f.WriteString(bootstring)
Expand Down Expand Up @@ -228,5 +241,7 @@ func init() {
seqbootCmd.PersistentFlags().BoolVar(&bootstraptar, "tar", false, "Will create a single tar file with all bootstrap alignments (one thread for tar, but not a bottleneck)")
seqbootCmd.PersistentFlags().BoolVar(&bootstrapgz, "gz", false, "Will gzip output file(s). Maybe slow if combined with --tar (only one thread working for tar/gz)")
seqbootCmd.PersistentFlags().IntVarP(&bootstrapNb, "nboot", "n", 1, "Number of bootstrap replicates to build")
seqbootCmd.PersistentFlags().StringVar(&bootstrappartitionstr, "partition", "none", "File containing definition of the partitions")
seqbootCmd.PersistentFlags().StringVar(&bootstrapoutputpartitionstr, "out-partition", "", "File containing output partitions (default: same name as input partition with _boot suffix)")
seqbootCmd.PersistentFlags().StringVarP(&bootstrapoutprefix, "out-prefix", "o", "none", "Prefix of output bootstrap files")
}
12 changes: 0 additions & 12 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,18 +227,6 @@ func writeAlign(al align.Alignment, f *os.File) {
}
}

func extension() string {
if rootphylip {
return ".phy"
} else if rootnexus {
return ".nx"
} else if rootclustal {
return ".clustal"
} else {
return ".fasta"
}
}

func writeAlignString(al align.Alignment) (out string) {
if rootphylip {
out = phylip.WriteAlignment(al, rootoutputstrict, rootoutputoneline, rootoutputnoblock)
Expand Down
8 changes: 4 additions & 4 deletions cmd/split.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@ package cmd

import (
"fmt"
"github.com/spf13/cobra"
"os"

"github.com/spf13/cobra"

"github.com/evolbioinfo/goalign/align"
"github.com/evolbioinfo/goalign/io"
)
Expand Down Expand Up @@ -64,7 +65,7 @@ goalign split -i align.phylip --partition partition.txt
}

for i, a := range splitAligns {
name := splitprefix + splitpartition.PartitionName(i) + extension()
name := splitprefix + splitpartition.PartitionName(i) + alignExtension()
if f, err = openWriteFile(name); err != nil {
io.LogError(err)
return
Expand All @@ -80,7 +81,6 @@ goalign split -i align.phylip --partition partition.txt
func init() {
RootCmd.AddCommand(splitCmd)

splitCmd.PersistentFlags().StringVar(&splitprefix, "prefix", "", "Prefix of output files")
splitCmd.PersistentFlags().StringVarP(&splitprefix, "out-prefix", "o", "", "Prefix of output files")
splitCmd.PersistentFlags().StringVar(&splitpartitionstr, "partition", "none", "File containing definition of the partitions")
splitCmd.PersistentFlags().StringVarP(&bootstrapoutprefix, "out-prefix", "o", "none", "Prefix of output bootstrap files")
}
Loading

0 comments on commit 732ab49

Please sign in to comment.