Skip to content

Commit

Permalink
Corrected entropy computation for sites with only gaps
Browse files Browse the repository at this point in the history
  • Loading branch information
fredericlemoine committed Mar 15, 2017
1 parent 06650d4 commit 39ec024
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 2 deletions.
3 changes: 3 additions & 0 deletions align/align.go
Original file line number Diff line number Diff line change
Expand Up @@ -635,5 +635,8 @@ func (a *align) Entropy(site int) (float64, error) {
if entropy != 0 {
entropy = -1.0 * entropy
}
if total == 0 {
return math.NaN(), nil
}
return entropy, nil
}
13 changes: 11 additions & 2 deletions cmd/computeentropy.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package cmd

import (
"fmt"
"math"

"github.com/spf13/cobra"
)
Expand Down Expand Up @@ -32,6 +33,10 @@ the computation does not take into account the following characters:
-> '*'
-> '-'
If a site is made fully of '-' or '*', then its entropy will be "NaN", and it will not
be taken into account in the average.
`,
Run: func(cmd *cobra.Command, args []string) {
nb := 0
Expand All @@ -42,19 +47,23 @@ the computation does not take into account the following characters:
}
avg := 0.0
for align := range rootaligns {
total := 0
for i := 0; i < align.Length(); i++ {
if e, err := align.Entropy(i); err != nil {
panic(err)
} else {
if entropyAverage {
avg += e
if !math.IsNaN(e) {
avg += e
total++
}
} else {
fmt.Println(fmt.Sprintf("%d\t%d\t%.3f", nb, i, e))
}
}
}
if entropyAverage {
fmt.Println(fmt.Sprintf("%d\t%.3f", nb, avg/float64(align.Length())))
fmt.Println(fmt.Sprintf("%d\t%.3f", nb, avg/float64(total)))
}
nb++
}
Expand Down

0 comments on commit 39ec024

Please sign in to comment.