From bfdb770550ce34ecfc4c52ab68fd3cd40d4db7eb Mon Sep 17 00:00:00 2001
From: Fredrik Bagge Carlson <baggepinnen@gmail.com>
Date: Wed, 18 Oct 2023 10:51:41 +0200
Subject: [PATCH 1/9] add tutorials

---
 docs/make.jl                                  |  5 ++
 docs/src/index.md                             |  7 ++-
 docs/src/tutorials/error_recovery.md          | 46 ++++++++++++++++++
 docs/src/tutorials/hot_loop.md                | 47 +++++++++++++++++++
 .../optional_debugging_and_logging.md         | 47 +++++++++++++++++++
 5 files changed, 150 insertions(+), 2 deletions(-)
 create mode 100644 docs/src/tutorials/error_recovery.md
 create mode 100644 docs/src/tutorials/hot_loop.md
 create mode 100644 docs/src/tutorials/optional_debugging_and_logging.md

diff --git a/docs/make.jl b/docs/make.jl
index 875377f..465b2fe 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -7,6 +7,11 @@ makedocs(
       warnonly = [:missing_docs],
       pages = [
             "Home" => "index.md",
+            "Tutorials" => [
+                  "Optional debugging and logging" => "tutorials/optional_debugging_and_logging.md",
+                  "Hot loops" => "tutorials/hot_loop.md",
+                  "Minimum latency error recovery" => "tutorials/error_recovery.md",
+            ],
             "API" => "api.md",
       ],
       format = Documenter.HTML(prettyurls = haskey(ENV, "CI")),
diff --git a/docs/src/index.md b/docs/src/index.md
index 4d62925..13b268d 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -37,10 +37,13 @@ Some functions that we do not expect may allocate memory, like `sin`, actually m
 ```@example README
 length(check_allocs(sin, (Float64,)))
 ```
-The reason for this is that `sin` may **throw an error**, and the exception object requires some allocations. We can ignore allocations that only happen when throwing errors by passing `ignore_throw=true`:
+The reason for this is that `sin` may **throw an error**, and the exception path requires some allocations. We can ignore allocations that only happen when throwing errors by passing `ignore_throw=true`:
 
 ```@example README
-length(check_allocs(sin, (Float64,); ignore_throw=true)) # ignore allocations that only happen when throwing errors
+allocs = check_allocs(sin, (Float64,); ignore_throw=true) # ignore allocations that only happen when throwing errors
+
+using Test
+@test isempty(allocs)
 ```
 
 ## Limitations
diff --git a/docs/src/tutorials/error_recovery.md b/docs/src/tutorials/error_recovery.md
new file mode 100644
index 0000000..c4ded97
--- /dev/null
+++ b/docs/src/tutorials/error_recovery.md
@@ -0,0 +1,46 @@
+# Guaranteed Error Recovery
+
+Safety-critical real-time systems are often required to have performance critical error-recovery logic. While errors are not supposed to occur, they sometimes do anyways 😦, and when they do, we may want to make sure that the recovery logic runs with minimum latency.
+
+In the following example, we are executing a loop that may throw an error. We can tell [`check_allocs`](@ref) that we allow allocations on the error path by passing `ignore_throw=true`, but a bigger problem may arise, the garbage collector may be invoked by the allocation, and introduce an unbounded latency before we execute the error recovery logic.
+
+To guard ourselves against this, we may follow these steps
+1. Prove that the function does not allocate memory except for on exception paths.
+2. Since we have proved that we are not allocating memory, we may disable the garbage collector. This prevents it from running before the error recovery logic.
+3. To make sure that the garbage collector is re-enabled after an error has been recovered from, we re-enable it in a `finally` block.
+
+
+
+```@example ERROR
+function treading_lightly()
+    a = 0.0
+    GC.enable(false) # Turn off the GC before entering the loop
+    try
+        for i = 10:-1:-1
+            a += sqrt(i) # This throws an error for negative values of i
+        end
+    catch
+        exit_gracefully() # This function is supposed to run with minimum latency
+    finally
+        GC.enable(true) # Always turn the GC back on before exiting the function
+    end
+    a
+end
+exit_gracefully() = println("Calling mother")
+
+using AllocCheck, Test
+allocs = check_allocs(treading_lightly, (); ignore_throw=true) # Check that it's safe to proceed
+```
+
+[`check_allocs`](@ref) returned a single allocation instance, associated with turning the GC back on. This is not a problem here since when this would hypothetically occur, we have already executed the hot loop and recovered from the error. To make sure that this is indeed the allocation we are seeing, we need to verify the identity of the allocation before proceeding. We may do this by verifying that the allocation comes from the `enable` function:
+```@example ERROR
+@test only(allocs).backtrace[1].func === :enable
+```
+
+The compiler may in the future become smarter and elide any allocation we are seeing, so this test may fail in the future. However, having checked that the only allocations that occur are acceptable to us, it's now safe to proceed:
+
+```@example ERROR
+val = treading_lightly()
+@test val ≈ 22.468278186204103  # hide
+```
+
diff --git a/docs/src/tutorials/hot_loop.md b/docs/src/tutorials/hot_loop.md
new file mode 100644
index 0000000..0321e6e
--- /dev/null
+++ b/docs/src/tutorials/hot_loop.md
@@ -0,0 +1,47 @@
+# Allocations followed by a hot loop
+A common pattern in high-performance Julia code, as well as in real-time systems, is to initially allocate some working memory, followed by the execution of a performance sensitive _hot loop_ that should perform no allocations. In the example below, we show a function `run_almost_forever` that resembles the implementation of a simple control system. The function starts by allocating a large `logvector` in which some measurement data is to be saved, followed by the execution of a loop which should run with as predictable timing as possible, i.e., we do not want to perform any allocations or invoke the garbage collector while executing the loop.
+```@example HOT_LOOP
+function run_almost_forever()
+    N = 100_000 # A large number
+    logvector = zeros(N) # Allocate a large vector for storing results
+    for i = 1:N # Run a hot loop that may not allocate
+        y = sample_measurement()
+        logvector[i] = y
+        u = controller(y)
+        apply_control(u)
+        Libc.systemsleep(0.01)
+    end
+end
+
+# Silly implementations of the functions used in the example
+sample_measurement() = randn()
+controller(y) = -2y
+apply_control(u) = nothing
+```
+
+Here, the primary concern is the loop, while the preamble of the function should be allowed to allocate memory. The recommended strategy in this case is to refactor the function into a separate preamble and loop, like this
+```@example HOT_LOOP
+function run_almost_forever2() # The preamble that performs allocations
+    N = 100_000 # A large number
+    logvector = zeros(N) # Allocate a large vector for storing results
+    run_almost_forever!(logvector)
+end
+
+function run_almost_forever!(logvector) # The hot loop that is allocation free
+    for i = eachindex(logvector) # Run a hot loop that may not allocate
+        y = sample_measurement()
+        logvector[i] = y
+        u = controller(y)
+        apply_control(u)
+        Libc.systemsleep(0.01)
+    end
+end
+```
+
+We may now analyze the loop function `run_almost_forever!` to verify that it does not allocate memory:
+```@example HOT_LOOP
+using AllocCheck, Test
+allocs = check_allocs(run_almost_forever!, (Vector{Float64},));
+@test isempty(allocs)
+```
+
diff --git a/docs/src/tutorials/optional_debugging_and_logging.md b/docs/src/tutorials/optional_debugging_and_logging.md
new file mode 100644
index 0000000..6b03a0b
--- /dev/null
+++ b/docs/src/tutorials/optional_debugging_and_logging.md
@@ -0,0 +1,47 @@
+# Optional debugging and logging
+
+For debugging purposes, it may sometimes be beneficial to include logging statements in a function, for example
+```@example DEBUGGING
+function myfun(verbose::Bool)
+    a = 0.0
+    for i = 1:3
+        a = a + i
+        verbose && @info "a = $a"
+    end
+end
+```
+Here, the printing of some relevant information is only performed if `verbose = true`. While the printing is optional, and not performed if `verbose = false`, [`check_allocs`](@ref) operates on _types rather than values_, i.e., `check_allocs` only knows that the argument is of type `Bool`, not that it may have the value `false`:
+```@example DEBUGGING
+using AllocCheck
+check_allocs(myfun, (Bool,)) |> length
+```
+Indeed, this function was determined to potentially allocate memory.
+
+To allow such optional features while still being able to prove that a function does not allocate if the allocating features are turned off, we may lift the _value_ `true` into the _type domain_, we do this by means of the `Val` type:
+```@example DEBUGGING
+function typed_myfun(::Val{verbose}) where verbose
+    a = 0.0
+    for i = 1:3
+        a = a + i
+        verbose && @info "a = $a"
+    end
+end
+
+check_allocs(typed_myfun, (Val{false},)) |> length
+```
+
+The compiler, and thus also AllocCheck, now knows that the value of `verbose` is `false`, since this is encoded in the _type_ `Val{false}`. The compiler can use this knowledge to figure out that the `@info` statement won't be executed, and thus prove that the function will not allocate memory.
+
+The user may still use this function with the debug print enabled by calling it like
+```@example DEBUGGING
+typed_myfun(Val{true}())
+```
+
+
+## Advanced: Constant propagation
+Sometimes, the compiler is able to use _constant propagation_ to determine what path through a program will be taken based on the _value of constants_. We demonstrate this effect below, where the value `verbose = false` is hard-coded
+```@example DEBUGGING
+my_outer_function() = myfun(false) # Hard coded value false
+check_allocs(my_outer_function, ()) |> length
+```
+When looking at `my_outer_function`, the compiler knows that `verbose = false` since this constant is hard coded into the program, and the compiler thus has the same amount of information here as when the value was lifted into the type domain. Constant propagation is considered a performance optimization that the compiler may or may not perform, and it is thus recommended to use the `Val` type to lift values into the type domain to guarantee that the compiler will use this information.
\ No newline at end of file

From ec36feaa90e7c0f3c43ad3b7e05dc14caf131eb1 Mon Sep 17 00:00:00 2001
From: Fredrik Bagge Carlson <baggepinnen@gmail.com>
Date: Thu, 19 Oct 2023 03:59:22 +0200
Subject: [PATCH 2/9] add more elaborate workflow in hot loop tutorial

tidy up
---
 docs/src/tutorials/hot_loop.md                | 36 +++++++++++++++++--
 .../optional_debugging_and_logging.md         |  1 +
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/docs/src/tutorials/hot_loop.md b/docs/src/tutorials/hot_loop.md
index 0321e6e..13e8816 100644
--- a/docs/src/tutorials/hot_loop.md
+++ b/docs/src/tutorials/hot_loop.md
@@ -14,9 +14,10 @@ function run_almost_forever()
 end
 
 # Silly implementations of the functions used in the example
-sample_measurement() = randn()
+sample_measurement() = 2.0
 controller(y) = -2y
 apply_control(u) = nothing
+nothing # hide
 ```
 
 Here, the primary concern is the loop, while the preamble of the function should be allowed to allocate memory. The recommended strategy in this case is to refactor the function into a separate preamble and loop, like this
@@ -30,12 +31,13 @@ end
 function run_almost_forever!(logvector) # The hot loop that is allocation free
     for i = eachindex(logvector) # Run a hot loop that may not allocate
         y = sample_measurement()
-        logvector[i] = y
+        @inbounds logvector[i] = y
         u = controller(y)
         apply_control(u)
         Libc.systemsleep(0.01)
     end
 end
+nothing # hide
 ```
 
 We may now analyze the loop function `run_almost_forever!` to verify that it does not allocate memory:
@@ -45,3 +47,33 @@ allocs = check_allocs(run_almost_forever!, (Vector{Float64},));
 @test isempty(allocs)
 ```
 
+
+## More complicated initialization
+In practice, a function may need to perform several distinct allocations upfront, including potentially allocating objects of potentially complicated types, like closures etc. In situations like this, the following pattern may be useful:
+```julia
+struct Workspace
+    ... # All you need to run the hot loop
+end
+
+function setup()
+    # Allocate and initialize the workspace
+    return workspace
+end
+
+function run!(workspace::Workspace)
+    ... # The hot loop
+end
+
+function run()
+    workspace = setup()
+    run!(workspace)
+end
+```
+
+Where `workspace` is either a custom struct designed to serve as a workspace for the hot loop, or simply a tuple of all the objects required.
+
+The benefit of breaking the function up into two parts which are called from a third, is that we may now create the workspace object individually, and use it to compute the type of the arguments to the `run!` function that we are interested in analyzing:
+```julia
+workspace = setup()
+allocs = check_allocs(run!, (typeof(workspace),))
+```
\ No newline at end of file
diff --git a/docs/src/tutorials/optional_debugging_and_logging.md b/docs/src/tutorials/optional_debugging_and_logging.md
index 6b03a0b..7c3a0c4 100644
--- a/docs/src/tutorials/optional_debugging_and_logging.md
+++ b/docs/src/tutorials/optional_debugging_and_logging.md
@@ -9,6 +9,7 @@ function myfun(verbose::Bool)
         verbose && @info "a = $a"
     end
 end
+nothing # hide
 ```
 Here, the printing of some relevant information is only performed if `verbose = true`. While the printing is optional, and not performed if `verbose = false`, [`check_allocs`](@ref) operates on _types rather than values_, i.e., `check_allocs` only knows that the argument is of type `Bool`, not that it may have the value `false`:
 ```@example DEBUGGING

From bac12c23e953df5b5427a291fc64a8dc1dc5cff2 Mon Sep 17 00:00:00 2001
From: Fredrik Bagge Carlson <baggepinnen@gmail.com>
Date: Fri, 10 Nov 2023 08:14:19 +0100
Subject: [PATCH 3/9] reflect that `GC.enable` is marked allocation free

---
 docs/src/tutorials/error_recovery.md | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/docs/src/tutorials/error_recovery.md b/docs/src/tutorials/error_recovery.md
index c4ded97..1098591 100644
--- a/docs/src/tutorials/error_recovery.md
+++ b/docs/src/tutorials/error_recovery.md
@@ -32,12 +32,7 @@ using AllocCheck, Test
 allocs = check_allocs(treading_lightly, (); ignore_throw=true) # Check that it's safe to proceed
 ```
 
-[`check_allocs`](@ref) returned a single allocation instance, associated with turning the GC back on. This is not a problem here since when this would hypothetically occur, we have already executed the hot loop and recovered from the error. To make sure that this is indeed the allocation we are seeing, we need to verify the identity of the allocation before proceeding. We may do this by verifying that the allocation comes from the `enable` function:
-```@example ERROR
-@test only(allocs).backtrace[1].func === :enable
-```
-
-The compiler may in the future become smarter and elide any allocation we are seeing, so this test may fail in the future. However, having checked that the only allocations that occur are acceptable to us, it's now safe to proceed:
+[`check_allocs`](@ref) returned zero allocations.
 
 ```@example ERROR
 val = treading_lightly()

From fbec5cf4cafc69c77f43b69454989cdad9b94e46 Mon Sep 17 00:00:00 2001
From: Fredrik Bagge Carlson <baggepinnen@gmail.com>
Date: Fri, 10 Nov 2023 08:15:41 +0100
Subject: [PATCH 4/9] reference julia docs on value types

---
 docs/src/tutorials/optional_debugging_and_logging.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/tutorials/optional_debugging_and_logging.md b/docs/src/tutorials/optional_debugging_and_logging.md
index 7c3a0c4..91624dd 100644
--- a/docs/src/tutorials/optional_debugging_and_logging.md
+++ b/docs/src/tutorials/optional_debugging_and_logging.md
@@ -18,7 +18,7 @@ check_allocs(myfun, (Bool,)) |> length
 ```
 Indeed, this function was determined to potentially allocate memory.
 
-To allow such optional features while still being able to prove that a function does not allocate if the allocating features are turned off, we may lift the _value_ `true` into the _type domain_, we do this by means of the `Val` type:
+To allow such optional features while still being able to prove that a function does not allocate if the allocating features are turned off, we may [lift the _value_ `true` into the _type domain_](https://docs.julialang.org/en/v1/manual/types/#%22Value-types%22), we do this by means of the `Val` type:
 ```@example DEBUGGING
 function typed_myfun(::Val{verbose}) where verbose
     a = 0.0

From 7c5a6d5acb42fd89b2c2987498e5a37f2c2e706d Mon Sep 17 00:00:00 2001
From: Fredrik Bagge Carlson <baggepinnen@gmail.com>
Date: Fri, 10 Nov 2023 08:22:36 +0100
Subject: [PATCH 5/9] elaborate example creating a workspace struct

---
 docs/src/tutorials/hot_loop.md | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/docs/src/tutorials/hot_loop.md b/docs/src/tutorials/hot_loop.md
index 13e8816..2153370 100644
--- a/docs/src/tutorials/hot_loop.md
+++ b/docs/src/tutorials/hot_loop.md
@@ -52,16 +52,24 @@ allocs = check_allocs(run_almost_forever!, (Vector{Float64},));
 In practice, a function may need to perform several distinct allocations upfront, including potentially allocating objects of potentially complicated types, like closures etc. In situations like this, the following pattern may be useful:
 ```julia
 struct Workspace
-    ... # All you need to run the hot loop
+    # All you need to run the hot loop, for example:
+    cache1::Vector{Float64}
+    cache2::Matrix{Float64}
 end
 
-function setup()
+function setup(max_iterations::Int = 100_000)
     # Allocate and initialize the workspace
-    return workspace
+    cache1 = zeros(max_iterations)
+    cache2 = zeros(max_iterations, max_iterations)
+    return Workspace(cache1, cache2)
 end
 
 function run!(workspace::Workspace)
-    ... # The hot loop
+    # The hot loop
+    for i = eachindex(workspace.cache1)
+        workspace.cache1[i] = my_important_calculation() # The allocated cache is modified in place
+        ...
+    end
 end
 
 function run()
@@ -70,7 +78,7 @@ function run()
 end
 ```
 
-Where `workspace` is either a custom struct designed to serve as a workspace for the hot loop, or simply a tuple of all the objects required.
+Here, `workspace` is a custom struct designed to serve as a workspace for the hot loop, but it could also be realized as a simple tuple of all the allocated objects required for the computations. Note, the struct `Workspace` in this example was not marked as mutable. However, its contents, the two cache arrays, are. This means that the `run!` function may modify the contents of the cache arrays.
 
 The benefit of breaking the function up into two parts which are called from a third, is that we may now create the workspace object individually, and use it to compute the type of the arguments to the `run!` function that we are interested in analyzing:
 ```julia

From 7dd6101374d9d447589dfc7ada592710c2b241da Mon Sep 17 00:00:00 2001
From: Fredrik Bagge Carlson <baggepinnen@gmail.com>
Date: Fri, 10 Nov 2023 09:15:27 +0100
Subject: [PATCH 6/9] reflect `ignore_throw` by default

---
 docs/src/tutorials/error_recovery.md | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/docs/src/tutorials/error_recovery.md b/docs/src/tutorials/error_recovery.md
index 1098591..eb9abab 100644
--- a/docs/src/tutorials/error_recovery.md
+++ b/docs/src/tutorials/error_recovery.md
@@ -2,7 +2,7 @@
 
 Safety-critical real-time systems are often required to have performance critical error-recovery logic. While errors are not supposed to occur, they sometimes do anyways 😦, and when they do, we may want to make sure that the recovery logic runs with minimum latency.
 
-In the following example, we are executing a loop that may throw an error. We can tell [`check_allocs`](@ref) that we allow allocations on the error path by passing `ignore_throw=true`, but a bigger problem may arise, the garbage collector may be invoked by the allocation, and introduce an unbounded latency before we execute the error recovery logic.
+In the following example, we are executing a loop that may throw an error. By default [`check_allocs`](@ref) allows allocations on the error path, i.e., allocations that occur as a consequence of an exception being thrown. This can cause the garbage collector to be invoked by the allocation, and introduce an unbounded latency before we execute the error recovery logic.
 
 To guard ourselves against this, we may follow these steps
 1. Prove that the function does not allocate memory except for on exception paths.
@@ -29,13 +29,32 @@ end
 exit_gracefully() = println("Calling mother")
 
 using AllocCheck, Test
-allocs = check_allocs(treading_lightly, (); ignore_throw=true) # Check that it's safe to proceed
+allocs = check_allocs(treading_lightly, ()) # Check that it's safe to proceed
+```
+```@example ERROR
+@test isempty(allocs)
 ```
 
-[`check_allocs`](@ref) returned zero allocations.
+[`check_allocs`](@ref) returned zero allocations. If we invoke [`check_allocs`](@ref) with the flag `ignore_throw = false`, we will see that the function may allocate memory on the error path:
+
+```@example ERROR
+allocs = check_allocs(treading_lightly, (); ignore_throw = false)
+length(allocs)
+```
+
+Finally, we test that the function is producing the expected result:
 
 ```@example ERROR
 val = treading_lightly()
 @test val ≈ 22.468278186204103  # hide
 ```
 
+In this example, we accepted an allocation on the exception path with the motivation that it occurred once only, after which the program was terminated. Implicit in this approach is an assumption that the exception path does not allocate too much memory to execute the error recovery logic before the garbage collector is turned back on. We should thus convince ourselves that this assumption is valid, e.g., by means of testing:
+    
+```@example ERROR
+treading_lightly() # Warm start
+allocated_memory = @allocated treading_lightly() # A call that triggers the exception path
+@test allocated_memory < 1e4
+```
+
+The allocations sites reported with the flag `ignore_throw = false` may be used as a guide as to what to test.
\ No newline at end of file

From 502f23e91330e2fc907f7405a026c53804eb7590 Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Wed, 15 Nov 2023 10:42:30 -0500
Subject: [PATCH 7/9] Comment out broken test

I'm not sure why this is failing in the Documenter environment, but for
now let's just comment it out and try to investigate this upstream.
---
 docs/src/tutorials/error_recovery.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/src/tutorials/error_recovery.md b/docs/src/tutorials/error_recovery.md
index eb9abab..212a5e1 100644
--- a/docs/src/tutorials/error_recovery.md
+++ b/docs/src/tutorials/error_recovery.md
@@ -54,7 +54,7 @@ In this example, we accepted an allocation on the exception path with the motiva
 ```@example ERROR
 treading_lightly() # Warm start
 allocated_memory = @allocated treading_lightly() # A call that triggers the exception path
-@test allocated_memory < 1e4
+# @test allocated_memory < 1e4
 ```
 
-The allocations sites reported with the flag `ignore_throw = false` may be used as a guide as to what to test.
\ No newline at end of file
+The allocations sites reported with the flag `ignore_throw = false` may be used as a guide as to what to test.

From 773c64e1662ee9f93a580b90225865352bf28e32 Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Fri, 17 Nov 2023 11:39:01 -0500
Subject: [PATCH 8/9] Update docs for new `@check_allocs` macro

---
 README.md                                     |  4 +-
 docs/src/api.md                               |  6 ++-
 docs/src/index.md                             | 47 ++++++++++++-------
 .../optional_debugging_and_logging.md         | 36 ++++++++++----
 src/AllocCheck.jl                             |  2 +-
 src/macro.jl                                  | 33 +++++++++++++
 6 files changed, 97 insertions(+), 31 deletions(-)

diff --git a/README.md b/README.md
index c5587d6..50943b1 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,6 @@ julia> length(check_allocs(sin, (Float64,); ignore_throw=true)) # ignore allocat
 
 #### Limitations
 
- 1. Runtime dispatch
+ Every call into a `@check_allocs` function behaves like a dynamic dispatch. This means that it can trigger compilation dynamically (involving lots of allocation), and even when the function has already been compiled, a small amount of allocation is still expected on function entry.
 
-   Any runtime dispatch is conservatively assumed to allocate.
+ For most applications, the solution is to use `@check_allocs` to wrap your top-level entry point or your main application loop, in which case those applications are only incurred once. `@check_allocs` will guarantee that no dynamic compilation or allocation occurs once your function has started running.
diff --git a/docs/src/api.md b/docs/src/api.md
index f6882c9..f262571 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -8,4 +8,8 @@
 
 ```@docs
 AllocCheck.check_allocs
-```
\ No newline at end of file
+```
+
+```@docs
+AllocCheck.@check_allocs
+```
diff --git a/docs/src/index.md b/docs/src/index.md
index 13b268d..1d35aeb 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -6,47 +6,58 @@ AllocCheck operates on _functions_, trying to statically determine wether or not
 
 ## Getting started
 
-The main entry point to check allocations is the function [`check_allocs`](@ref), which takes the function to check as the first argument, and a tuple of argument types as the second argument:
-```@example README
+The primary entry point to check allocations is the macro [`@check_allocs`](@ref) which is used to annotate a function definition that you'd like to enforce allocation checks for:
+```@repl README
 using AllocCheck
-mymod(x) = mod(x, 2.5)
+using Test # hide
+@check_allocs mymod(x) = mod(x, 2.5)
 
-check_allocs(mymod, (Float64,))
+mymod(1.5) # call automatically checked for allocations
 ```
-This returned an empty array, indicating that the function was proven to not allocate any memory 🎉
+This call happened without error, indicating that the function was proven to not allocate any memory after it starts 🎉
 
 
 When used on a function that may allocate memory
-```@example README
-linsolve(a, b) = a \ b
+```@repl README
+@check_allocs linsolve(a, b) = a \ b
 
-allocs = check_allocs(linsolve, (Matrix{Float64}, Vector{Float64}));
-length(allocs)
+linsolve(rand(10,10), rand(10))
 ```
-we get a non-empty array of allocation instances. Each allocation instance contains some useful information, for example
+the function call raises an `AllocCheckFailure`.
+
+The `errors` field allows us to inspect the individual errors to get some useful information. For example:
 
 ```@example README
-allocs[1]
+try
+  linsolve(rand(10,10), rand(10))
+catch err
+  err.allocs[1]
+end
 ```
 
 we see what type of object was allocated, and where in the code the allocation appeared.
 
 
 ### Functions that throw exceptions
+
 Some functions that we do not expect may allocate memory, like `sin`, actually may:
 ```@example README
-length(check_allocs(sin, (Float64,)))
+@allocated try sin(Inf) catch end
 ```
-The reason for this is that `sin` may **throw an error**, and the exception path requires some allocations. We can ignore allocations that only happen when throwing errors by passing `ignore_throw=true`:
 
+The reason for this is that `sin` needs to allocate if it **throws an error**.
+
+By default, `@check_allocs` ignores all such allocations and assumes that no exceptions are thrown. If you care about detecting these allocations anyway, you can use `ignore_throw=false`:
 ```@example README
-allocs = check_allocs(sin, (Float64,); ignore_throw=true) # ignore allocations that only happen when throwing errors
+@check_allocs mysin1(x) = sin(x)
+@check_allocs ignore_throw=false mysin2(x) = sin(x)
 
-using Test
-@test isempty(allocs)
+@test mysin1(1.5) == sin(1.5)
+@test_throws AllocCheckFailure mysin2(1.5)
 ```
 
 ## Limitations
 
- 1. Runtime dispatch
-   Any runtime dispatch is conservatively assumed to allocate.
\ No newline at end of file
+ Every call into a `@check_allocs` function behaves like a dynamic dispatch. This means that it can trigger compilation dynamically (involving lots of allocation), and even when the function has already been compiled, a small amount of allocation is still expected on function entry.
+
+ For most applications, the solution is to use `@check_allocs` to wrap your top-level entry point or your main application loop, in which case those applications are only incurred once. `@check_allocs` will guarantee that no dynamic compilation or allocation occurs once your function has started running.
diff --git a/docs/src/tutorials/optional_debugging_and_logging.md b/docs/src/tutorials/optional_debugging_and_logging.md
index 91624dd..e069b34 100644
--- a/docs/src/tutorials/optional_debugging_and_logging.md
+++ b/docs/src/tutorials/optional_debugging_and_logging.md
@@ -2,7 +2,8 @@
 
 For debugging purposes, it may sometimes be beneficial to include logging statements in a function, for example
 ```@example DEBUGGING
-function myfun(verbose::Bool)
+using AllocCheck # hide
+@check_allocs function myfun(verbose::Bool)
     a = 0.0
     for i = 1:3
         a = a + i
@@ -12,9 +13,8 @@ end
 nothing # hide
 ```
 Here, the printing of some relevant information is only performed if `verbose = true`. While the printing is optional, and not performed if `verbose = false`, [`check_allocs`](@ref) operates on _types rather than values_, i.e., `check_allocs` only knows that the argument is of type `Bool`, not that it may have the value `false`:
-```@example DEBUGGING
-using AllocCheck
-check_allocs(myfun, (Bool,)) |> length
+```@repl DEBUGGING
+myfun(false)
 ```
 Indeed, this function was determined to potentially allocate memory.
 
@@ -28,7 +28,7 @@ function typed_myfun(::Val{verbose}) where verbose
     end
 end
 
-check_allocs(typed_myfun, (Val{false},)) |> length
+length(check_allocs(typed_myfun, (Val{false},)))
 ```
 
 The compiler, and thus also AllocCheck, now knows that the value of `verbose` is `false`, since this is encoded in the _type_ `Val{false}`. The compiler can use this knowledge to figure out that the `@info` statement won't be executed, and thus prove that the function will not allocate memory.
@@ -40,9 +40,27 @@ typed_myfun(Val{true}())
 
 
 ## Advanced: Constant propagation
-Sometimes, the compiler is able to use _constant propagation_ to determine what path through a program will be taken based on the _value of constants_. We demonstrate this effect below, where the value `verbose = false` is hard-coded
+
+Sometimes, code written without this trick will still work just fine with AllocCheck.
+
+That's because in some limited scenarios, the compiler is able to use _constant propagation_ to determine what path through a program will be taken based on the _value of constants_.
+
+We demonstrate this effect below, where the value `verbose = false` is hard-coded into the function:
 ```@example DEBUGGING
-my_outer_function() = myfun(false) # Hard coded value false
-check_allocs(my_outer_function, ()) |> length
+@check_allocs function constant_myfun()
+    verbose = false
+    a = 0.0
+    for i = 1:3
+        a = a + i
+        verbose && @info "a = $a"
+    end
+    return a
+end
+
+constant_myfun()
 ```
-When looking at `my_outer_function`, the compiler knows that `verbose = false` since this constant is hard coded into the program, and the compiler thus has the same amount of information here as when the value was lifted into the type domain. Constant propagation is considered a performance optimization that the compiler may or may not perform, and it is thus recommended to use the `Val` type to lift values into the type domain to guarantee that the compiler will use this information.
\ No newline at end of file
+
+When looking at `constant_myfun`, the compiler knows that `verbose = false` since this constant is hard coded into the program. Sometimes, the compiler can even propagate constant values all the way into called functions.
+
+This is useful, but it's not guaranteed to happen in general. The `Val{T}` trick described here ensures that the variable is propagated as a constant everywhere it is required.
+
diff --git a/src/AllocCheck.jl b/src/AllocCheck.jl
index d316d6d..d755448 100644
--- a/src/AllocCheck.jl
+++ b/src/AllocCheck.jl
@@ -295,7 +295,7 @@ function find_allocs!(mod::LLVM.Module, meta; ignore_throw=true)
 end
 
 """
-    check_allocs(func, types; entry_abi=:specfunc, ret_mod=false)
+    check_allocs(func, types; ignore_throw=true)
 
 Compiles the given function and types to LLVM IR and checks for allocations.
 Returns a vector of `AllocationSite` structs, each containing a `CallInst` and a backtrace.
diff --git a/src/macro.jl b/src/macro.jl
index 1eb29d2..bf46a31 100644
--- a/src/macro.jl
+++ b/src/macro.jl
@@ -23,6 +23,39 @@ function extract_keywords(ex0)
     return kws, arg
 end
 
+"""
+    @check_allocs ignore_throw=true (function def)
+
+Wraps the provided function definition so that all calls to it will be automatically
+checked for allocations.
+
+If the check fails, an `AllocCheckFailure` exception is thrown containing the detailed
+failures, including the backtrace for each defect.
+
+Note: All calls to the wrapped function are effectively a dynamic dispatch, which
+means they are type-unstable and may allocate memory at function _entry_. `@check_allocs`
+only guarantees the absence of allocations after the function has started running.
+
+# Example
+```jldoctest
+julia> @check_allocs multiply(x,y) = x*y
+multiply (generic function with 1 method)
+
+julia> multiply(1.5, 3.5) # no allocations for Float64
+5.25
+
+julia> multiply(rand(3,3), rand(3,3)) # matmul needs to allocate the result
+ERROR: @check_alloc function contains 1 allocations.
+
+Stacktrace:
+ [1] macro expansion
+   @ ~/repos/AllocCheck/src/macro.jl:134 [inlined]
+ [2] multiply(x::Matrix{Float64}, y::Matrix{Float64})
+   @ Main ./REPL[2]:133
+ [3] top-level scope
+   @ REPL[5]:1
+```
+"""
 macro check_allocs(ex...)
     kws, body = extract_keywords(ex)
     if _is_func_def(body)

From 8a1132168870041d4c9ddb796cfe2b2ff968305a Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Fri, 17 Nov 2023 11:47:58 -0500
Subject: [PATCH 9/9] Update README based on new docs

---
 README.md | 66 +++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 54 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 50943b1..a9b4254 100644
--- a/README.md
+++ b/README.md
@@ -4,26 +4,68 @@
 
 [![](https://img.shields.io/badge/docs-dev-blue.svg)](https://juliacomputing.github.io/AllocCheck.jl/dev/)
 
-AllocCheck.jl is a Julia package that statically checks if a function call may allocate, analyzing the generated LLVM IR of it and it's callees using LLVM.jl and GPUCompiler.jl
+[AllocCheck.jl](https://github.com/JuliaComputing/AllocCheck.jl) is a Julia package that statically checks if a function call may allocate, analyzing the generated LLVM IR of it and it's callees using LLVM.jl and GPUCompiler.jl
 
-#### Examples
+AllocCheck operates on _functions_, trying to statically determine wether or not a function _may_ allocate memory, and if so, _where_ that allocation appears. This is different from measuring allocations using, e.g., `@time` or `@allocated`, which measures the allocations that _did_ happen during the execution of a function.
 
+## Getting started
+
+The primary entry point to check allocations is the macro [`@check_allocs`](@ref) which is used to annotate a function definition that you'd like to enforce allocation checks for:
 ```julia
-julia> mymod(x) = mod(x, 2.5)
+julia> using AllocCheck
+
+julia> @check_allocs multiply(x,y) = x * y
+multiply (generic function with 1 method)
+
+julia> multiply(1.5, 2.5) # call automatically checked for allocations
+3.75
+
+julia> multiply(rand(3,3), rand(3,3)) # result matrix requires an allocation
+ERROR: @check_alloc function contains 1 allocations.
+```
+
+The `multiply(::Float64, ::Float64)` call happened without error, indicating that the function was proven not to allocate. On the other hand, the `multiply(::Matrix{Float64}, ::Matrix{Float64})` call raised an `AllocCheckFailure` due to one internal allocation.
 
-julia> length(check_allocs(mymod, (Float64,)))
-0
+The `allocs` field can be used to inspect the individual errors:
+```julia
+julia> try multiply(rand(3,3), rand(3,3)) catch err err.allocs[1] end
+Allocation of Matrix{Float64} in ./boot.jl:477
+  | Array{T,2}(::UndefInitializer, m::Int, n::Int) where {T} =
 
-julia> linsolve(a, b) = a \ b
+Stacktrace:
+ [1] Array
+   @ ./boot.jl:477 [inlined]
+ [2] Array
+   @ ./boot.jl:485 [inlined]
+ [3] similar
+   @ ./array.jl:418 [inlined]
+ [4] *(A::Matrix{Float64}, B::Matrix{Float64})
+   @ LinearAlgebra ~/.julia/juliaup/julia-1.10.0-rc1+0.x64.linux.gnu/share/julia/stdlib/v1.10/LinearAlgebra/src/matmul.jl:113
+ [5] var"##multiply#235"(x::Matrix{Float64}, y::Matrix{Float64})
+   @ Main ./REPL[13]:1
+```
+
+### Functions that throw exceptions
+
+Some functions that we do not expect may allocate memory, like `sin`, actually may:
+```julia
+julia> @allocated try sin(Inf) catch end
+48
+```
+
+The reason for this is that `sin` needs to allocate if it **throws an error**.
+
+By default, `@check_allocs` ignores all such allocations and assumes that no exceptions are thrown. If you care about detecting these allocations anyway, you can use `ignore_throw=false`:
+```julia
+julia> @check_allocs mysin1(x) = sin(x)
 
-julia> length(check_allocs(linsolve, (Matrix{Float64}, Vector{Float64})))
-175
+julia> @check_allocs ignore_throw=false mysin2(x) = sin(x)
 
-julia> length(check_allocs(sin, (Float64,)))
-2
+julia> mysin1(1.5)
+0.9974949866040544
 
-julia> length(check_allocs(sin, (Float64,); ignore_throw=true)) # ignore allocations that only happen when throwing errors
-0
+julia> mysin2(1.5)
+ERROR: @check_alloc function contains 2 allocations.
 ```
 
 #### Limitations