From 39ec0246448316e628217ea6cf56131ab577b975 Mon Sep 17 00:00:00 2001 From: Frederic Lemoine Date: Wed, 15 Mar 2017 12:32:49 +0100 Subject: [PATCH] Corrected entropy computation for sites with only gaps --- align/align.go | 3 +++ cmd/computeentropy.go | 13 +++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/align/align.go b/align/align.go index 4103442..46b95a4 100644 --- a/align/align.go +++ b/align/align.go @@ -635,5 +635,8 @@ func (a *align) Entropy(site int) (float64, error) { if entropy != 0 { entropy = -1.0 * entropy } + if total == 0 { + return math.NaN(), nil + } return entropy, nil } diff --git a/cmd/computeentropy.go b/cmd/computeentropy.go index a005d3a..76ba6e2 100644 --- a/cmd/computeentropy.go +++ b/cmd/computeentropy.go @@ -2,6 +2,7 @@ package cmd import ( "fmt" + "math" "github.com/spf13/cobra" ) @@ -32,6 +33,10 @@ the computation does not take into account the following characters: -> '*' -> '-' + +If a site is made fully of '-' or '*', then its entropy will be "NaN", and it will not +be taken into account in the average. + `, Run: func(cmd *cobra.Command, args []string) { nb := 0 @@ -42,19 +47,23 @@ the computation does not take into account the following characters: } avg := 0.0 for align := range rootaligns { + total := 0 for i := 0; i < align.Length(); i++ { if e, err := align.Entropy(i); err != nil { panic(err) } else { if entropyAverage { - avg += e + if !math.IsNaN(e) { + avg += e + total++ + } } else { fmt.Println(fmt.Sprintf("%d\t%d\t%.3f", nb, i, e)) } } } if entropyAverage { - fmt.Println(fmt.Sprintf("%d\t%.3f", nb, avg/float64(align.Length()))) + fmt.Println(fmt.Sprintf("%d\t%.3f", nb, avg/float64(total))) } nb++ }