From 92c39431983cbf63e0748bd5aced5ef38c902158 Mon Sep 17 00:00:00 2001
From: Ryan Wick <rrwick@gmail.com>
Date: Tue, 6 Jun 2023 16:49:32 +1000
Subject: [PATCH] Bump version

---
 README.md               | 91 +++++++++++++++++++----------------------
 badread/__main__.py     | 54 ++++++++++++++++--------
 badread/identities.py   | 47 +++++++++++++++++++--
 badread/settings.py     |  1 +
 badread/simulate.py     |  2 +-
 badread/version.py      |  2 +-
 test/test_cli.py        | 50 +++++++++++++++-------
 test/test_identities.py | 30 ++++++++++++++
 8 files changed, 190 insertions(+), 87 deletions(-)

diff --git a/README.md b/README.md
index 57f4bc1..ad744e8 100644
--- a/README.md
+++ b/README.md
@@ -12,24 +12,24 @@ Badread is published in the [Journal of Open Source Software](http://joss.theoj.
 
 ## Table of contents
 
-  * [Requirements](#requirements)
-  * [Installation](#installation)
-  * [Quick usage](#quick-usage)
-  * [Method](#method)
-  * [Detailed usage](#detailed-usage)
-     * [Command line](#command-line)
-     * [Reference FASTA](#reference-fasta)
-     * [Fragment lengths](#fragment-lengths)
-     * [Read identities](#read-identities)
-     * [Error model](#error-model)
-     * [QScore model](#qscore-model)
-     * [Adapters](#adapters)
-     * [Junk and random reads](#junk-and-random-reads)
-     * [Chimeras](#chimeras)
-     * [Small plasmid bias](#small-plasmid-bias)
-     * [Glitches](#glitches)
-  * [Contributing](#contributing)
-  * [License](#license)
+* [Requirements](#requirements)
+* [Installation](#installation)
+* [Quick usage](#quick-usage)
+* [Method](#method)
+* [Detailed usage](#detailed-usage)
+  * [Command line](#command-line)
+  * [Reference FASTA](#reference-fasta)
+  * [Fragment lengths](#fragment-lengths)
+  * [Read identities](#read-identities)
+  * [Error model](#error-model)
+  * [QScore model](#qscore-model)
+  * [Adapters](#adapters)
+  * [Junk and random reads](#junk-and-random-reads)
+  * [Chimeras](#chimeras)
+  * [Small plasmid bias](#small-plasmid-bias)
+  * [Glitches](#glitches)
+* [Contributing](#contributing)
+* [License](#license)
 
 
 
@@ -90,29 +90,29 @@ badread simulate --reference ref.fasta --quantity 50x \
 To simulate older Oxford Nanopore reads (R9.4.1, worse basecalling):
 ```bash
 badread simulate --reference ref.fasta --quantity 50x \
---error_model pacbio2016 --qscore_model nanopore2020 --identity 90,98,5 \
+    --error_model nanopore2020 --qscore_model nanopore2020 --identity 90,98,5 \
     | gzip > reads.fastq.gz
 ```
 
-Alternatively, you can use Badread's built-in models to imitate older PacBio reads. This command also adjusts the identity and length distributions to be a bit more PacBio-2016-like:
+Very bad reads:
 ```bash
-badread simulate --reference ref.fasta --quantity 50x \
-    --error_model pacbio2016 --qscore_model pacbio2016 --identity 85,95,3 --length 7500,7500 \
+badread simulate --reference ref.fasta --quantity 50x --glitches 1000,100,100 \
+    --junk_reads 5 --random_reads 5 --chimeras 10 --identity 80,90,6 --length 4000,2000 \
     | gzip > reads.fastq.gz
 ```
 
-Very bad reads:
+Pretty good reads:
 ```bash
-badread simulate --reference ref.fasta --quantity 50x --glitches 1000,100,100 \
-    --junk_reads 5 --random_reads 5 --chimeras 10 --identity 75,90,8 \
+badread simulate --reference ref.fasta --quantity 50x --glitches 10000,10,10 \
+    --junk_reads 0.1 --random_reads 0.1 --chimeras 0.1 --identity 20,3 \
     | gzip > reads.fastq.gz
 ```
 
-Very nice reads:
+Very good reads:
 ```bash
 badread simulate --reference ref.fasta --quantity 50x --error_model random \
-    --qscore_model ideal --glitches 0,0,0 --junk_reads 0 --random_reads 0 \
-    --chimeras 0 --identity 99,100,2 --start_adapter_seq "" --end_adapter_seq "" \
+    --qscore_model ideal --glitches 0,0,0 --junk_reads 0 --random_reads 0 --chimeras 0 \
+    --identity 30,3 --length 40000,20000 --start_adapter_seq "" --end_adapter_seq "" \
     | gzip > reads.fastq.gz
 ```
 
@@ -225,19 +225,11 @@ For a couple of examples, check out [the reference FASTA page on the wiki](https
 
 ### Fragment lengths
 
-Badread generates fragment lengths from a [gamma distribution](https://en.wikipedia.org/wiki/Gamma_distribution). While a gamma distribution is usually parameterised with shape and scale (_k_ and _θ_) or shape and rate (_α_ and _β_), I don't find these particularly intuitive. So Badread instead defines the fragment lengths using mean and standard deviation.
+Badread generates fragment lengths from a [gamma distribution](https://en.wikipedia.org/wiki/Gamma_distribution). While a gamma distribution is usually parameterised with shape and scale (_k_ and _θ_) or shape and rate (_α_ and _β_), I don't find these particularly intuitive. So Badread instead defines fragment lengths using mean and standard deviation.
 
-There are two ways to think about fragment lengths: the distribution of the fragment lengths and the distribution of _bases_ in the fragments. The latter distribution is higher because larger fragments contribute more bases. The read N50 is the median of the base (red) distribution – half the bases will be in reads shorter than this and half in longer reads.
+There are two ways to think about fragment lengths: the distribution of the fragment lengths and the distribution of _bases_ in the fragments. The latter distribution is higher because larger fragments contribute more bases. The read N50 is the median of the base distribution – half the bases will be in reads shorter than this and half in longer reads.
 
-<table>
-    <tr>
-        <td>
-            <img align="right" src="images/default_frag_lengths.png" alt="Default length distribution" width="400">
-            Badread's default is <code>--length 15000,13000</code> (mean=15000, stdev=13000) which corresponds to a decent Nanopore run (N50=22.6 kbp). The fragment length distribution is in blue, while the base distribution is in red.<br><br>
-            To see the equations and interactively explore how different parameters affect the distributions, check out <a href="https://www.desmos.com/calculator/xrkqgzt4o5">this Desmos plot</a>.
-        </td>
-    </tr>
-</table>
+Badread's default is `--length 15000,13000` (mean=15000, stdev=13000) which corresponds to a decent Nanopore run (N50=22.6 kbp). To see the equations and interactively explore how different parameters affect the distributions, check out [this Desmos plot](https://www.desmos.com/calculator/z2a3yqssie).
 
 Note that these parameters control the length of the _fragments_, not the final _reads_. These differ because: adapters are added to fragments, glitches can lengthen/shorten fragments, adding read errors can change the length (especially if the error model is biased towards insertions or deletions) and chimeras are made by concatenating multiple fragments together.
 
@@ -245,17 +237,16 @@ Note that these parameters control the length of the _fragments_, not the final
 
 ### Read identities
 
-Badread generates read identities from a [beta distribution](https://en.wikipedia.org/wiki/Beta_distribution). Like with fragment lengths, Badread defines the distribution with mean and standard deviation instead of using the more formal (and less intuitive) shape parameters (_α_ and _β_). In addition, Badread scales the distribution down using a maximum value, for a total of three parameters: `mean,max,stdev`.
-
-<table>
-    <tr>
-        <td>
-            <img align="right" src="images/default_identities.png" alt="Default identity distribution" width="400">
-            Badread's default is <code>--identity 87.5,97.5,5</code> which corresponds to an okay (but not great) Nanopore sequencing run.<br><br>
-            To see the equations and interactively explore how different parameters affect the distribution, check out <a href="https://www.desmos.com/calculator/q7qw6rq2lb">this Desmos plot</a>.
-        </td>
-    </tr>
-</table>
+Badread can generate read identities in two alternative ways.
+
+The first way uses a [beta distribution](https://en.wikipedia.org/wiki/Beta_distribution) to sample read identities. Like with fragment lengths, Badread defines the distribution with mean and standard deviation instead of using the more formal (and less intuitive) shape parameters (_α_ and _β_). In addition, Badread scales the distribution down using a maximum value, for a total of three parameters. To use this method, give three comma-delimited values (identity mean, max, stdev), e.g. `--identity 95,99,2.5`.
+
+The second way uses a [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution) to sample read [qscores](https://en.wikipedia.org/wiki/Phred_quality_score). To use this method, give two comma-delimited values (qscore mean and stdev), e.g. `--identity 20,4`. This approach is better suited to high accuracy reads like ONT duplex or PacBio HiFi.
+
+Badread's default is `--identity 95,99,2.5` which corresponds to an okay (but not great) R10.4.1 Nanopore sequencing run. To see the equations and interactively explore how different parameters affect the distribution, check out these Desmos plots:
+* [three-parameter identity beta distribution](https://www.desmos.com/calculator/u9fivqmisa)
+* [two-parameter qscore normal distribution (qscore on x-axis)](https://www.desmos.com/calculator/kpganphxj0)
+* [two-parameter qscore normal distribution (identity on x-axis)](https://www.desmos.com/calculator/kg9s0dvbup)
 
 For detail on how Badread defines identity, check out [this page on the wiki](https://github.com/rrwick/Badread/wiki/Definition-of-identity).
 
diff --git a/badread/__main__.py b/badread/__main__.py
index f2bc3b9..68a78ba 100644
--- a/badread/__main__.py
+++ b/badread/__main__.py
@@ -97,7 +97,8 @@ def simulate_subparser(subparsers):
                           help='Fragment length distribution (mean and stdev, '
                                'default: DEFAULT)')
     sim_args.add_argument('--identity', type=str, default='95,99,2.5',
-                          help='Sequencing identity distribution (mean, max and stdev, '
+                          help='Sequencing identity distribution (mean, max and stdev for beta '
+                               'distribution, or mean and stdev for normal qscore distribution , '
                                'default: DEFAULT)')
     sim_args.add_argument('--error_model', type=str, default='nanopore2023',
                           help='Can be "nanopore2018", "nanopore2020", "nanopore2023", '
@@ -274,24 +275,20 @@ def check_simulate_args(args):
 
     try:
         identity_parameters = [float(x) for x in args.identity.split(',')]
-        args.mean_identity = identity_parameters[0]
-        args.max_identity = identity_parameters[1]
-        args.identity_stdev = identity_parameters[2]
+        if len(identity_parameters) == 2:
+            args.mean_identity = identity_parameters[0]
+            args.max_identity = None
+            args.identity_stdev = identity_parameters[1]
+            check_qscore_identities(args)
+        elif len(identity_parameters) == 3:
+            args.mean_identity = identity_parameters[0]
+            args.max_identity = identity_parameters[1]
+            args.identity_stdev = identity_parameters[2]
+            check_beta_identities(args)
+        else:
+            sys.exit('Error: could not parse --identity values')
     except (ValueError, IndexError):
         sys.exit('Error: could not parse --identity values')
-    if args.mean_identity > 100.0:
-        sys.exit('Error: mean read identity cannot be more than 100')
-    if args.max_identity > 100.0:
-        sys.exit('Error: max read identity cannot be more than 100')
-    if args.mean_identity <= settings.MIN_MEAN_READ_IDENTITY:
-        sys.exit(f'Error: mean read identity must be at least {settings.MIN_MEAN_READ_IDENTITY}')
-    if args.max_identity <= settings.MIN_MEAN_READ_IDENTITY:
-        sys.exit(f'Error: max read identity must be at least {settings.MIN_MEAN_READ_IDENTITY}')
-    if args.mean_identity > args.max_identity:
-        sys.exit(f'Error: mean identity ({args.mean_identity}) cannot be larger than max '
-                 f'identity ({args.max_identity})')
-    if args.identity_stdev < 0.0:
-        sys.exit('Error: read identity stdev cannot be negative')
 
     try:
         glitch_parameters = [float(x) for x in args.glitches.split(',')]
@@ -315,6 +312,29 @@ def check_simulate_args(args):
                 sys.exit('Error: --end_adapter_seq must be a DNA sequence or a number')
 
 
+def check_beta_identities(args):
+    if args.mean_identity > 100.0:
+        sys.exit('Error: mean read identity cannot be more than 100')
+    if args.max_identity > 100.0:
+        sys.exit('Error: max read identity cannot be more than 100')
+    if args.mean_identity <= settings.MIN_MEAN_READ_IDENTITY:
+        sys.exit(f'Error: mean read identity must be at least {settings.MIN_MEAN_READ_IDENTITY}')
+    if args.max_identity <= settings.MIN_MEAN_READ_IDENTITY:
+        sys.exit(f'Error: max read identity must be at least {settings.MIN_MEAN_READ_IDENTITY}')
+    if args.mean_identity > args.max_identity:
+        sys.exit(f'Error: mean identity ({args.mean_identity}) cannot be larger than max '
+                 f'identity ({args.max_identity})')
+    if args.identity_stdev < 0.0:
+        sys.exit('Error: read identity stdev cannot be negative')
+
+
+def check_qscore_identities(args):
+    if args.mean_identity <= settings.MIN_MEAN_READ_QSCORE:
+        sys.exit(f'Error: mean read identity must be at least {settings.MIN_MEAN_READ_QSCORE}')
+    if args.identity_stdev < 0.0:
+        sys.exit('Error: read qscore stdev cannot be negative')
+
+
 def check_python_version():
     if sys.version_info.major < 3 or sys.version_info.minor < 6:
         sys.exit('Error: Badread requires Python 3.6 or later')
diff --git a/badread/identities.py b/badread/identities.py
index 98aa99d..e0c4bf7 100755
--- a/badread/identities.py
+++ b/badread/identities.py
@@ -1,6 +1,5 @@
 """
-This module contains a class for describing read identity distributions (described by the beta
-distribution) and related functions.
+This module contains a class for describing read identity distributions and related functions.
 
 Copyright 2018 Ryan Wick (rrwick@gmail.com)
 https://github.com/rrwick/Badread
@@ -23,11 +22,26 @@
 class Identities(object):
 
     def __init__(self, mean, stdev, max_identity, output=sys.stderr):
+        self.mean, self.stdev, self.max_identity = None, None, None
+        self.beta_a, self.beta_b = None, None
+
+        print('', file=output)
+
+        # There are two possible types of identity distributions: a three-parameter beta
+        # distribution that describes read identities, or a two-parameter normal distribution that
+        # describes read qscores.
+        if max_identity is None:
+            self.type = "normal"
+            self.set_up_normal(mean, stdev, output)
+        else:
+            self.type = "beta"
+            self.set_up_beta(mean, stdev, max_identity, output)
+
+    def set_up_beta(self, mean, stdev, max_identity, output):
         # Divide by 100 to convert from percentage to fraction
         self.mean = mean / 100.0
         self.stdev = stdev / 100.0
         self.max_identity = max_identity / 100.0
-        print('', file=output)
 
         if self.mean == self.max_identity:
             self.beta_a, self.beta_b = None, None
@@ -35,7 +49,7 @@ def __init__(self, mean, stdev, max_identity, output=sys.stderr):
         elif self.stdev == 0.0:
             self.max_identity = self.mean
             print(f'Using a constant read identity of {self.mean * 100}%', file=output)
-        else:  # beta distribution
+        else:
             print('Generating read identities from a beta distribution:', file=output)
             self.beta_a, self.beta_b = beta_parameters(mean, stdev, max_identity)
             print_in_two_columns(f'  mean  = {float_to_str(self.mean * 100):>3}%',
@@ -47,12 +61,37 @@ def __init__(self, mean, stdev, max_identity, output=sys.stderr):
                                  output=output)
             quickhist_beta(self.beta_a, self.beta_b, self.max_identity, 8, output=output)
 
+    def set_up_normal(self, mean, stdev, output):
+        self.mean = mean
+        self.stdev = stdev
+
+        if self.stdev == 0.0:
+            self.max_identity = self.mean
+            print(f'Using a constant read qscore of {self.mean}', file=output)
+        else:
+            print('Generating read qscores from a normal distribution:', file=output)
+            print(f'  mean  = {float_to_str(self.mean):>3}', file=output)
+            print(f'  stdev = {float_to_str(self.stdev):>3}', file=output)
+
     def get_identity(self):
+        while True:
+            if self.type == "beta":
+                identity = self.get_beta_identity()
+            else:
+                identity = self.get_normal_identity()
+            if 0 <= identity <= 100:
+                return identity
+
+    def get_beta_identity(self):
         if self.mean == self.max_identity:
             return self.mean
         else:  # beta distribution
             return self.max_identity * np.random.beta(self.beta_a, self.beta_b)
 
+    def get_normal_identity(self):
+        qscore = np.random.normal(self.mean, self.stdev)
+        return 1.0 - 10**(-qscore / 10)
+
 
 def beta_parameters(beta_mean, beta_stdev, beta_max):
     u, s, m = beta_mean, beta_stdev, beta_max
diff --git a/badread/settings.py b/badread/settings.py
index edcb002..7d80310 100644
--- a/badread/settings.py
+++ b/badread/settings.py
@@ -29,6 +29,7 @@
 # identity (e.g. 50%) as that might break some things. These settings control how low they can go.
 MIN_MEAN_READ_LENGTH = 100
 MIN_MEAN_READ_IDENTITY = 50
+MIN_MEAN_READ_QSCORE = 5
 
 
 # If a random qscore model is used, this controls the range of the qualities.
diff --git a/badread/simulate.py b/badread/simulate.py
index 41518e8..492afbd 100644
--- a/badread/simulate.py
+++ b/badread/simulate.py
@@ -72,7 +72,7 @@ def simulate(args, output=sys.stderr):
 
         info.append(f'length={len(seq)}')
         info.append(f'error-free_length={len(fragment)}')
-        info.append(f'read_identity={actual_identity * 100.0:.2f}%')
+        info.append(f'read_identity={actual_identity * 100.0:.3f}%')
 
         read_name = uuid.UUID(int=random.getrandbits(128))
         info = ' '.join(info)
diff --git a/badread/version.py b/badread/version.py
index 5cb304d..aae7198 100644
--- a/badread/version.py
+++ b/badread/version.py
@@ -14,4 +14,4 @@
 If not, see <http://www.gnu.org/licenses/>.
 """
 
-__version__ = '0.3.0'
+__version__ = '0.4.0'
diff --git a/test/test_cli.py b/test/test_cli.py
index 941f98d..29d5848 100644
--- a/test/test_cli.py
+++ b/test/test_cli.py
@@ -184,7 +184,7 @@ def test_simulate_help_2(self):
         self.assertEqual(out.getvalue().strip(), '')
         self.assertTrue('simulate' in err.getvalue().strip())
 
-    def test_simulate_args(self):
+    def test_simulate_args_1(self):
         args = badread.__main__.parse_args(['simulate', '--reference', self.ref,
                                            '--quantity', '50x', '--length', '5432,123',
                                            '--identity', '78,89,4', '--glitches', '9876,12,34'])
@@ -200,6 +200,22 @@ def test_simulate_args(self):
         self.assertEqual(args.glitch_size, 12)
         self.assertEqual(args.glitch_skip, 34)
 
+    def test_simulate_args_2(self):
+        args = badread.__main__.parse_args(['simulate', '--reference', self.ref,
+                                           '--quantity', '250M', '--length', '2345,321',
+                                           '--identity', '20,2', '--glitches', '6789,21,43'])
+        self.assertEqual(args.reference, self.ref)
+        self.assertEqual(args.quantity, '250M')
+        badread.__main__.check_simulate_args(args)
+        self.assertEqual(args.mean_frag_length, 2345)
+        self.assertEqual(args.frag_length_stdev, 321)
+        self.assertEqual(args.mean_identity, 20)
+        self.assertEqual(args.max_identity, None)
+        self.assertEqual(args.identity_stdev, 2)
+        self.assertEqual(args.glitch_rate, 6789)
+        self.assertEqual(args.glitch_size, 21)
+        self.assertEqual(args.glitch_skip, 43)
+
     def test_bad_simulated_args_1(self):
         self.check_simulate_error(['--chimera', '101'], 'cannot be greater than')
 
@@ -232,45 +248,51 @@ def test_bad_simulated_args_10(self):
         self.check_simulate_error(['--identity', '85,A,5'], 'could not parse')
 
     def test_bad_simulated_args_11(self):
-        self.check_simulate_error(['--identity', '85,101,5'], 'cannot be more than')
+        self.check_simulate_error(['--identity', '20,19,18,17'], 'could not parse')
 
     def test_bad_simulated_args_12(self):
-        self.check_simulate_error(['--identity', '101,102,5'], 'cannot be more than')
+        self.check_simulate_error(['--identity', '85,101,5'], 'cannot be more than')
 
     def test_bad_simulated_args_13(self):
-        self.check_simulate_error(['--identity', '85,80,5'], 'cannot be larger than max')
+        self.check_simulate_error(['--identity', '101,102,5'], 'cannot be more than')
 
     def test_bad_simulated_args_14(self):
-        self.check_simulate_error(['--identity', '85,90,-3'], 'cannot be negative')
+        self.check_simulate_error(['--identity', '85,80,5'], 'cannot be larger than max')
 
     def test_bad_simulated_args_15(self):
-        self.check_simulate_error(['--identity', '30,90,5'], 'must be at least')
+        self.check_simulate_error(['--identity', '85,90,-3'], 'cannot be negative')
 
     def test_bad_simulated_args_16(self):
-        self.check_simulate_error(['--identity', '90,30,5'], 'must be at least')
+        self.check_simulate_error(['--identity', '30,90,5'], 'must be at least')
 
     def test_bad_simulated_args_17(self):
-        self.check_simulate_error(['--glitches', 'abc'], 'could not parse')
+        self.check_simulate_error(['--identity', '90,30,5'], 'must be at least')
 
     def test_bad_simulated_args_18(self):
-        self.check_simulate_error(['--glitches', '500,Z,10'], 'could not parse')
+        self.check_simulate_error(['--identity', '1,1'], 'must be at least')
 
     def test_bad_simulated_args_19(self):
-        self.check_simulate_error(['--glitches', '500,-10,10'], 'must contain non-negative')
+        self.check_simulate_error(['--glitches', 'abc'], 'could not parse')
 
     def test_bad_simulated_args_20(self):
-        self.check_simulate_error(['--error_model', 'not_a_file'], 'or a filename')
+        self.check_simulate_error(['--glitches', '500,Z,10'], 'could not parse')
 
     def test_bad_simulated_args_21(self):
-        self.check_simulate_error(['--qscore_model', 'not_a_file'], 'or a filename')
+        self.check_simulate_error(['--glitches', '500,-10,10'], 'must contain non-negative')
 
     def test_bad_simulated_args_22(self):
-        self.check_simulate_error([], 'not a file', ref='not_a_file')
+        self.check_simulate_error(['--error_model', 'not_a_file'], 'or a filename')
 
     def test_bad_simulated_args_23(self):
-        self.check_simulate_error(['--start_adapter_seq', 'ACGTQ'], 'must be a DNA sequence')
+        self.check_simulate_error(['--qscore_model', 'not_a_file'], 'or a filename')
 
     def test_bad_simulated_args_24(self):
+        self.check_simulate_error([], 'not a file', ref='not_a_file')
+
+    def test_bad_simulated_args_25(self):
+        self.check_simulate_error(['--start_adapter_seq', 'ACGTQ'], 'must be a DNA sequence')
+
+    def test_bad_simulated_args_26(self):
         self.check_simulate_error(['--end_adapter_seq', '12ACT'], 'must be a DNA sequence')
 
     def test_good_simulated_args_1(self):
diff --git a/test/test_identities.py b/test/test_identities.py
index 3afa512..1f34f6f 100644
--- a/test/test_identities.py
+++ b/test/test_identities.py
@@ -89,3 +89,33 @@ def test_bad_identity(self):
             identities = badread.identities.Identities(81.9, 5.5, 82.1, output=self.null)
             identities.get_identity()
         self.assertTrue('invalid beta parameters' in str(cm.exception))
+
+
+class TestNormalIdentity(unittest.TestCase):
+
+    def setUp(self):
+        self.null = open(os.devnull, 'w')
+        self.trials = 100000
+
+    def tearDown(self):
+        self.null.close()
+
+    def test_normal_identity_1(self):
+        identities = badread.identities.Identities(20, 2, None, output=self.null)
+        mean = sum(identities.get_identity() for _ in range(self.trials)) / self.trials
+        self.assertAlmostEqual(mean, 0.98888, delta=0.01)
+
+    def test_normal_identity_2(self):
+        identities = badread.identities.Identities(10, 0, None, output=self.null)
+        mean = sum(identities.get_identity() for _ in range(self.trials)) / self.trials
+        self.assertAlmostEqual(mean, 0.9, delta=0.01)
+
+    def test_normal_identity_3(self):
+        identities = badread.identities.Identities(20, 0, None, output=self.null)
+        mean = sum(identities.get_identity() for _ in range(self.trials)) / self.trials
+        self.assertAlmostEqual(mean, 0.99, delta=0.01)
+
+    def test_normal_identity_4(self):
+        identities = badread.identities.Identities(30, 0, None, output=self.null)
+        mean = sum(identities.get_identity() for _ in range(self.trials)) / self.trials
+        self.assertAlmostEqual(mean, 0.999, delta=0.01)