From a2b56f97406b79109b0ce55a4f308d3222adebcb Mon Sep 17 00:00:00 2001
From: Matthew Parkinson <mattpark@microsoft.com>
Date: Wed, 29 Jan 2020 11:25:13 +0000
Subject: [PATCH 1/4] Remove a notify_using that regressed perf

The performance on Windows was significantly regressed by the
notify_using during the bump allocation.  This change removes that.
It appears that the pages are already committed by
the large allocator.
---
 src/mem/slab.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/src/mem/slab.h b/src/mem/slab.h
index 7d0db15ac..5bcbd247d 100644
--- a/src/mem/slab.h
+++ b/src/mem/slab.h
@@ -68,8 +68,10 @@ namespace snmalloc
         }
         else
         {
+          // Allocate the last object on the current page if there is one,
+          // and then thread the next free list worth of allocations.
+          bool crossed_page_boundary = false;
           void* curr = nullptr;
-          bool commit = false;
           while (true)
           {
             size_t newbumpptr = bumpptr + rsize;
@@ -78,15 +80,12 @@ namespace snmalloc
 
             if (alignedbumpptr != alignednewbumpptr)
             {
-              // We have committed once already.
-              if (commit)
+              // We have crossed a page boundary already, so
+              // lets stop building our free list.
+              if (crossed_page_boundary)
                 break;
 
-              memory_provider.template notify_using<NoZero>(
-                pointer_offset(this, alignedbumpptr),
-                alignednewbumpptr - alignedbumpptr);
-
-              commit = true;
+              crossed_page_boundary = true;
             }
 
             if (curr == nullptr)

From 2e4b28999105510c0623af769adf58d8b45c4d44 Mon Sep 17 00:00:00 2001
From: Matthew Parkinson <mattpark@microsoft.com>
Date: Wed, 29 Jan 2020 11:58:57 +0000
Subject: [PATCH 2/4] Removed DecommitAll strategy

The DecommitAll strategy performs badly.  We are not
functionally testing it, and it does not seem investing in it due to its
performance.
---
 src/mem/alloc.h       | 13 +++++--------
 src/mem/allocconfig.h |  4 ----
 src/mem/mediumslab.h  | 10 ++--------
 src/mem/slab.h        | 12 ++++++------
 src/mem/superslab.h   | 35 +++++------------------------------
 5 files changed, 18 insertions(+), 56 deletions(-)

diff --git a/src/mem/alloc.h b/src/mem/alloc.h
index dd88db227..43eef2059 100644
--- a/src/mem/alloc.h
+++ b/src/mem/alloc.h
@@ -930,8 +930,7 @@ namespace snmalloc
 
         if (super != nullptr)
         {
-          Slab* slab =
-            super->alloc_short_slab(sizeclass, large_allocator.memory_provider);
+          Slab* slab = super->alloc_short_slab(sizeclass);
           assert(super->is_full());
           return slab;
         }
@@ -941,8 +940,7 @@ namespace snmalloc
         if ((allow_reserve == NoReserve) && (super == nullptr))
           return nullptr;
 
-        Slab* slab =
-          super->alloc_short_slab(sizeclass, large_allocator.memory_provider);
+        Slab* slab = super->alloc_short_slab(sizeclass);
         reposition_superslab(super);
         return slab;
       }
@@ -952,8 +950,7 @@ namespace snmalloc
       if ((allow_reserve == NoReserve) && (super == nullptr))
         return nullptr;
 
-      Slab* slab =
-        super->alloc_slab(sizeclass, large_allocator.memory_provider);
+      Slab* slab = super->alloc_slab(sizeclass);
       reposition_superslab(super);
       return slab;
     }
@@ -1074,7 +1071,7 @@ namespace snmalloc
       SlabList* sl = &small_classes[sizeclass];
       Slab* slab = Metaslab::get_slab(p);
       Superslab::Action a =
-        slab->dealloc_slow(sl, super, p, large_allocator.memory_provider);
+        slab->dealloc_slow(sl, super, p);
       if (likely(a == Superslab::NoSlabReturn))
         return;
       stats().sizeclass_dealloc_slab(sizeclass);
@@ -1191,7 +1188,7 @@ namespace snmalloc
     {
       MEASURE_TIME(medium_dealloc, 4, 16);
       stats().sizeclass_dealloc(sizeclass);
-      bool was_full = slab->dealloc(p, large_allocator.memory_provider);
+      bool was_full = slab->dealloc(p);
 
 #ifdef CHECK_CLIENT
       if (!is_multiple_of_sizeclass(
diff --git a/src/mem/allocconfig.h b/src/mem/allocconfig.h
index 6238af32f..cc7713639 100644
--- a/src/mem/allocconfig.h
+++ b/src/mem/allocconfig.h
@@ -68,10 +68,6 @@ namespace snmalloc
      * Decommit superslabs when they are entirely empty.
      */
     DecommitSuper,
-    /**
-     * Decommit all slabs once they are empty.
-     */
-    DecommitAll,
     /**
      * Decommit superslabs only when we are informed of memory pressure by the
      * OS, do not decommit anything in normal operation.
diff --git a/src/mem/mediumslab.h b/src/mem/mediumslab.h
index 835c05c92..0ce171e8c 100644
--- a/src/mem/mediumslab.h
+++ b/src/mem/mediumslab.h
@@ -87,16 +87,13 @@ namespace snmalloc
       assert(is_aligned_block<OS_PAGE_SIZE>(p, OS_PAGE_SIZE));
       size = bits::align_up(size, OS_PAGE_SIZE);
 
-      if constexpr (decommit_strategy == DecommitAll)
-        memory_provider.template notify_using<zero_mem>(p, size);
-      else if constexpr (zero_mem == YesZero)
+      if constexpr (zero_mem == YesZero)
         memory_provider.template zero<true>(p, size);
 
       return p;
     }
 
-    template<typename MemoryProvider>
-    bool dealloc(void* p, MemoryProvider& memory_provider)
+    bool dealloc(void* p)
     {
       assert(head > 0);
 
@@ -105,9 +102,6 @@ namespace snmalloc
       free++;
       stack[--head] = pointer_to_index(p);
 
-      if constexpr (decommit_strategy == DecommitAll)
-        memory_provider.notify_not_using(p, sizeclass_to_size(sizeclass));
-
       return was_full;
     }
 
diff --git a/src/mem/slab.h b/src/mem/slab.h
index 5bcbd247d..ce71f7636 100644
--- a/src/mem/slab.h
+++ b/src/mem/slab.h
@@ -178,9 +178,8 @@ namespace snmalloc
     // This does not need to remove the "use" as done by the fast path.
     // Returns a complex return code for managing the superslab meta data.
     // i.e. This deallocation could make an entire superslab free.
-    template<typename MemoryProvider>
     SNMALLOC_SLOW_PATH typename Superslab::Action dealloc_slow(
-      SlabList* sl, Superslab* super, void* p, MemoryProvider& memory_provider)
+      SlabList* sl, Superslab* super, void* p)
     {
       Metaslab& meta = super->get_meta(this);
 
@@ -191,9 +190,9 @@ namespace snmalloc
         {
           // Dealloc on the superslab.
           if (is_short())
-            return super->dealloc_short_slab(memory_provider);
+            return super->dealloc_short_slab();
 
-          return super->dealloc_slab(this, memory_provider);
+          return super->dealloc_slab(this);
         }
         // Update the head and the sizeclass link.
         uint16_t index = pointer_to_index(p);
@@ -212,10 +211,11 @@ namespace snmalloc
       sl->remove(meta.get_link(this));
 
       if (is_short())
-        return super->dealloc_short_slab(memory_provider);
+        return super->dealloc_short_slab();
 
-      return super->dealloc_slab(this, memory_provider);
+      return super->dealloc_slab(this);
     }
+
     bool is_short()
     {
       return Metaslab::is_short(this);
diff --git a/src/mem/superslab.h b/src/mem/superslab.h
index 70a86d106..e74987843 100644
--- a/src/mem/superslab.h
+++ b/src/mem/superslab.h
@@ -154,29 +154,21 @@ namespace snmalloc
       return meta[slab_to_index(slab)];
     }
 
-    template<typename MemoryProvider>
-    Slab*
-    alloc_short_slab(sizeclass_t sizeclass, MemoryProvider& memory_provider)
+    Slab* alloc_short_slab(sizeclass_t sizeclass)
     {
       if ((used & 1) == 1)
-        return alloc_slab(sizeclass, memory_provider);
+        return alloc_slab(sizeclass);
 
       meta[0].allocated = 1;
       meta[0].head = nullptr;
       meta[0].sizeclass = static_cast<uint8_t>(sizeclass);
       meta[0].link = get_initial_offset(sizeclass, true);
 
-      {
-        memory_provider.template notify_using<NoZero>(
-          pointer_offset(this, OS_PAGE_SIZE), SLAB_SIZE - OS_PAGE_SIZE);
-      }
-
       used++;
       return (Slab*)this;
     }
 
-    template<typename MemoryProvider>
-    Slab* alloc_slab(sizeclass_t sizeclass, MemoryProvider& memory_provider)
+    Slab* alloc_slab(sizeclass_t sizeclass)
     {
       uint8_t h = head;
       Slab* slab = pointer_cast<Slab>(
@@ -192,17 +184,11 @@ namespace snmalloc
       head = h + n + 1;
       used += 2;
 
-      if constexpr (decommit_strategy == DecommitAll)
-      {
-        memory_provider.template notify_using<NoZero>(slab, SLAB_SIZE);
-      }
-
       return slab;
     }
 
     // Returns true, if this alters the value of get_status
-    template<typename MemoryProvider>
-    Action dealloc_slab(Slab* slab, MemoryProvider& memory_provider)
+    Action dealloc_slab(Slab* slab)
     {
       // This is not the short slab.
       uint8_t index = static_cast<uint8_t>(slab_to_index(slab));
@@ -214,9 +200,6 @@ namespace snmalloc
       bool was_almost_full = is_almost_full();
       used -= 2;
 
-      if constexpr (decommit_strategy == DecommitAll)
-        memory_provider.notify_not_using(slab, SLAB_SIZE);
-
       assert(meta[index].is_unused());
       if (was_almost_full || is_empty())
         return StatusChange;
@@ -225,16 +208,8 @@ namespace snmalloc
     }
 
     // Returns true, if this alters the value of get_status
-    template<typename MemoryProvider>
-    Action dealloc_short_slab(MemoryProvider& memory_provider)
+    Action dealloc_short_slab()
     {
-      // This is the short slab.
-      if constexpr (decommit_strategy == DecommitAll)
-      {
-        memory_provider.notify_not_using(
-          pointer_offset(this, OS_PAGE_SIZE), SLAB_SIZE - OS_PAGE_SIZE);
-      }
-
       bool was_full = is_full();
       used--;
 

From 2e289573c8a8179b4583a6fcb891fd39cda714b6 Mon Sep 17 00:00:00 2001
From: Matthew Parkinson <mattpark@microsoft.com>
Date: Wed, 29 Jan 2020 12:17:16 +0000
Subject: [PATCH 3/4] Move all decommit strategy into LargeAllocator

The code for decommit was distribured in the code base.
Removing Decommit All means that it can logically reside in
lthe arge allocator.
---
 src/mem/alloc.h      | 28 +---------------------------
 src/mem/largealloc.h | 20 +++++++++++++++++++-
 2 files changed, 20 insertions(+), 28 deletions(-)

diff --git a/src/mem/alloc.h b/src/mem/alloc.h
index 43eef2059..8d1ae1228 100644
--- a/src/mem/alloc.h
+++ b/src/mem/alloc.h
@@ -1111,20 +1111,6 @@ namespace snmalloc
         {
           super_available.remove(super);
 
-          if constexpr (decommit_strategy == DecommitSuper)
-          {
-            large_allocator.memory_provider.notify_not_using(
-              pointer_offset(super, OS_PAGE_SIZE),
-              SUPERSLAB_SIZE - OS_PAGE_SIZE);
-          }
-          else if constexpr (decommit_strategy == DecommitSuperLazy)
-          {
-            static_assert(
-              pal_supports<LowMemoryNotification, MemoryProvider>,
-              "A lazy decommit strategy cannot be implemented on platforms "
-              "without low memory notifications");
-          }
-
           chunkmap().clear_slab(super);
           large_allocator.dealloc(super, 0);
           stats().superslab_push();
@@ -1208,12 +1194,6 @@ namespace snmalloc
           sc->remove(slab);
         }
 
-        if constexpr (decommit_strategy == DecommitSuper)
-        {
-          large_allocator.memory_provider.notify_not_using(
-            pointer_offset(slab, OS_PAGE_SIZE), SUPERSLAB_SIZE - OS_PAGE_SIZE);
-        }
-
         chunkmap().clear_slab(slab);
         large_allocator.dealloc(slab, 0);
         stats().superslab_push();
@@ -1261,19 +1241,13 @@ namespace snmalloc
       MEASURE_TIME(large_dealloc, 4, 16);
 
       size_t size_bits = bits::next_pow2_bits(size);
-      size_t rsize = bits::one_at_bit(size_bits);
-      assert(rsize >= SUPERSLAB_SIZE);
+      assert(bits::one_at_bit(size_bits) >= SUPERSLAB_SIZE);
       size_t large_class = size_bits - SUPERSLAB_BITS;
 
       chunkmap().clear_large_size(p, size);
 
       stats().large_dealloc(large_class);
 
-      // Cross-reference largealloc's alloc() decommitted condition.
-      if ((decommit_strategy != DecommitNone) || (large_class > 0))
-        large_allocator.memory_provider.notify_not_using(
-          pointer_offset(p, OS_PAGE_SIZE), rsize - OS_PAGE_SIZE);
-
       // Initialise in order to set the correct SlabKind.
       Largeslab* slab = static_cast<Largeslab*>(p);
       slab->init();
diff --git a/src/mem/largealloc.h b/src/mem/largealloc.h
index c70a9b95d..291457cf2 100644
--- a/src/mem/largealloc.h
+++ b/src/mem/largealloc.h
@@ -362,7 +362,7 @@ namespace snmalloc
         bool decommitted =
           ((decommit_strategy == DecommitSuperLazy) &&
            (static_cast<Baseslab*>(p)->get_kind() == Decommitted)) ||
-          (large_class > 0) || (decommit_strategy != DecommitNone);
+          (large_class > 0) || (decommit_strategy == DecommitSuper);
 
         if (decommitted)
         {
@@ -392,6 +392,24 @@ namespace snmalloc
 
     void dealloc(void* p, size_t large_class)
     {
+      if constexpr (decommit_strategy == DecommitSuperLazy)
+      {
+        static_assert(
+          pal_supports<LowMemoryNotification, MemoryProvider>,
+          "A lazy decommit strategy cannot be implemented on platforms "
+          "without low memory notifications");
+      }
+
+      // Cross-reference largealloc's alloc() decommitted condition.
+      if ((decommit_strategy != DecommitNone) 
+          && (large_class != 0 || decommit_strategy == DecommitSuper))
+      {
+        size_t rsize = bits::one_at_bit(SUPERSLAB_BITS) << large_class;
+
+        memory_provider.notify_not_using(
+          pointer_offset(p, OS_PAGE_SIZE), rsize - OS_PAGE_SIZE);
+      }
+
       stats.superslab_push();
       memory_provider.large_stack[large_class].push(static_cast<Largeslab*>(p));
       memory_provider.lazy_decommit_if_needed();

From 053b5a30ef7eedc04908fcc6ca3d14d984217b9c Mon Sep 17 00:00:00 2001
From: Matthew Parkinson <mattpark@microsoft.com>
Date: Wed, 29 Jan 2020 16:37:46 +0000
Subject: [PATCH 4/4] Minor code tidying.

---
 src/ds/helpers.h     |  2 +-
 src/mem/largealloc.h |  8 +++++---
 src/mem/metaslab.h   |  4 ++--
 src/mem/superslab.h  | 26 +++++++++++++-------------
 4 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/src/ds/helpers.h b/src/ds/helpers.h
index 5e22ef563..4e6905441 100644
--- a/src/ds/helpers.h
+++ b/src/ds/helpers.h
@@ -56,7 +56,7 @@ namespace snmalloc
       length == bits::next_pow2_const(length), "Must be a power of two.");
 
   private:
-    T value;
+    T value = 0;
 
   public:
     operator T()
diff --git a/src/mem/largealloc.h b/src/mem/largealloc.h
index 291457cf2..93c5cffba 100644
--- a/src/mem/largealloc.h
+++ b/src/mem/largealloc.h
@@ -80,7 +80,7 @@ namespace snmalloc
      */
     std::atomic<uint64_t> last_low_memory_epoch = 0;
     std::atomic_flag lazy_decommit_guard;
-    void lazy_decommit()
+    SNMALLOC_SLOW_PATH void lazy_decommit()
     {
       // If another thread is try to do lazy decommit, let it continue.  If
       // we try to parallelise this, we'll most likely end up waiting on the
@@ -93,6 +93,7 @@ namespace snmalloc
       // the memory that we can.  Start with the small size classes so that we
       // hit cached superslabs first.
       // FIXME: We probably shouldn't do this all at once.
+      // FIXME: We currently Decommit all the sizeclasses larger than 0.
       for (size_t large_class = 0; large_class < NUM_LARGE_CLASSES;
            large_class++)
       {
@@ -327,7 +328,8 @@ namespace snmalloc
     void* alloc(size_t large_class, size_t size)
     {
       size_t rsize = bits::one_at_bit(SUPERSLAB_BITS) << large_class;
-      if (size == 0)
+      // For superslab size, we always commit the whole range.
+      if (large_class == 0)
         size = rsize;
 
       void* p = memory_provider.large_stack[large_class].pop();
@@ -407,7 +409,7 @@ namespace snmalloc
         size_t rsize = bits::one_at_bit(SUPERSLAB_BITS) << large_class;
 
         memory_provider.notify_not_using(
-          pointer_offset(p, OS_PAGE_SIZE), rsize - OS_PAGE_SIZE);
+        pointer_offset(p, OS_PAGE_SIZE), rsize - OS_PAGE_SIZE);
       }
 
       stats.superslab_push();
diff --git a/src/mem/metaslab.h b/src/mem/metaslab.h
index 6a87c8989..fbfcc9a94 100644
--- a/src/mem/metaslab.h
+++ b/src/mem/metaslab.h
@@ -36,7 +36,7 @@ namespace snmalloc
      *  The list will be (allocated - needed - 1) long. The -1 is
      *  for the `link` element which is not in the free list.
      */
-    void* head;
+    void* head = nullptr;
 
     /**
      *  How many entries are not in the free list of slab, i.e.
@@ -51,7 +51,7 @@ namespace snmalloc
     /**
      *  How many entries have been allocated from this slab.
      */
-    uint16_t allocated;
+    uint16_t allocated = 0;
 
     // When a slab has free space it will be on the has space list for
     // that size class.  We use an empty block in this slab to be the
diff --git a/src/mem/superslab.h b/src/mem/superslab.h
index e74987843..b0a036a98 100644
--- a/src/mem/superslab.h
+++ b/src/mem/superslab.h
@@ -84,7 +84,7 @@ namespace snmalloc
       {
         if (kind != Fresh)
         {
-          // If this wasn't previously Fresh, we need to zero some things.
+        // If this wasn't previously Fresh, we need to zero some things.
           used = 0;
           for (size_t i = 0; i < SLAB_COUNT; i++)
           {
@@ -97,21 +97,21 @@ namespace snmalloc
         kind = Super;
         // Point head at the first non-short slab.
         head = 1;
+      }
 
 #ifndef NDEBUG
-        auto curr = head;
-        for (size_t i = 0; i < SLAB_COUNT - used - 1; i++)
-        {
-          curr = (curr + meta[curr].next + 1) & (SLAB_COUNT - 1);
-        }
-        assert(curr == 0);
+      auto curr = head;
+      for (size_t i = 0; i < SLAB_COUNT - used - 1; i++)
+      {
+        curr = (curr + meta[curr].next + 1) & (SLAB_COUNT - 1);
+      }
+      if (curr != 0) abort();
 
-        for (size_t i = 0; i < SLAB_COUNT; i++)
-        {
-          assert(meta[i].is_unused());
-        }
-#endif
+      for (size_t i = 0; i < SLAB_COUNT; i++)
+      {
+        assert(meta[i].is_unused());
       }
+#endif
     }
 
     bool is_empty()
@@ -165,7 +165,7 @@ namespace snmalloc
       meta[0].link = get_initial_offset(sizeclass, true);
 
       used++;
-      return (Slab*)this;
+      return reinterpret_cast<Slab*>(this);
     }
 
     Slab* alloc_slab(sizeclass_t sizeclass)