From 30f245391582d26ee3e375a8f9f877baa6201cb0 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 26 Nov 2024 14:12:19 -0500
Subject: [PATCH 001/135] Have CMake install cparse-llvm

---
 CMakeLists.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7c39f2b..ca6121c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -224,6 +224,9 @@ target_link_libraries(cparse-llvm PUBLIC SALT_LLVM_TOOLING) # Inherit definition
 # You can try adding -static
 target_link_options(cparse-llvm PUBLIC -Wl,--as-needed -Wl,--no-allow-shlib-undefined -Wl,--no-undefined)
 
+# Install the target
+install(TARGETS cparse-llvm DESTINATION bin)
+
 #---------------
 # Tests
 #---------------

From 7b3727e3f7960c44942e7380fa40cbe860214c12 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 26 Nov 2024 11:15:01 -0800
Subject: [PATCH 002/135] Add spack.yaml file and environment setup script for
 Gilgamesh

---
 activate-salt-fm-env.sh | 14 ++++++++++++
 spack.yaml              | 50 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)
 create mode 100644 activate-salt-fm-env.sh
 create mode 100644 spack.yaml

diff --git a/activate-salt-fm-env.sh b/activate-salt-fm-env.sh
new file mode 100644
index 0000000..55a44fe
--- /dev/null
+++ b/activate-salt-fm-env.sh
@@ -0,0 +1,14 @@
+echo "purging loaded modules"
+module purge
+echo "listing loaded modules:"
+module list
+#echo "loading llvm and mpich:"
+echo "loading llvm and gcc:"
+module load llvm/git.086d8e6bb5daf8de43880ba90258c49e0fabf2c9_19.1.4-zpacv56
+#module load mpich/4.2.3-ugxzfxf
+module load gcc/14.2.0-ttkqi3s
+#module load tau/master-l3jx42k
+export LD_LIBRARY_PATH="/storage/packages/salt-fm/spack/opt/spack/linux-centos7-x86_64/gcc-10.2.1/gcc-14.2.0-ttkqi3sp7xwrxtwftfysf54cl4jje4qk/lib64:$LD_LIBRARY_PATH"
+echo "listing loaded modules:"
+module list
+echo "Finished"
diff --git a/spack.yaml b/spack.yaml
new file mode 100644
index 0000000..99b2e05
--- /dev/null
+++ b/spack.yaml
@@ -0,0 +1,50 @@
+# export SALT_ROOT=/storage/packages/salt-fm
+# export PATH=$SALT_ROOT/base/tools/bin:$PATH
+# module use /storage/packages/salt-fm/spack/share/spack/modules/linux-rhel8-x86_64
+# module use /storage/packages/salt-fm/spack/share/spack/modules/linux-centos7-x86_64/
+
+spack:
+  view: false
+
+  concretizer:
+    reuse: false
+    unify: true
+
+  packages:
+    all:
+      require: "target=x86_64 %gcc"
+
+  modules:
+    default:
+      enable:
+      - tcl
+      tcl:
+        projections:
+          all: '{name}/{version}'
+        verbose: true
+        all:
+          autoload: direct
+          environment:
+            set:
+              '{name}_ROOT': '{PREFIX}'
+
+  compilers:
+  - compiler:
+      spec: gcc@=14.2.0
+      paths:
+        cc: /storage/packages/salt-fm/spack/opt/spack/linux-centos7-x86_64/gcc-10.2.1/gcc-14.2.0-ttkqi3sp7xwrxtwftfysf54cl4jje4qk/bin/gcc
+        cxx: /storage/packages/salt-fm/spack/opt/spack/linux-centos7-x86_64/gcc-10.2.1/gcc-14.2.0-ttkqi3sp7xwrxtwftfysf54cl4jje4qk/bin/g++
+        f77: /storage/packages/salt-fm/spack/opt/spack/linux-centos7-x86_64/gcc-10.2.1/gcc-14.2.0-ttkqi3sp7xwrxtwftfysf54cl4jje4qk/bin/gfortran
+        fc: /storage/packages/salt-fm/spack/opt/spack/linux-centos7-x86_64/gcc-10.2.1/gcc-14.2.0-ttkqi3sp7xwrxtwftfysf54cl4jje4qk/bin/gfortran
+      flags: {}
+      operating_system: rhel8
+      target: x86_64
+      modules: []
+      environment: {}
+      extra_rpaths: []
+
+  specs:
+  - llvm@git.086d8e6bb5daf8de43880ba90258c49e0fabf2c9=19.1.4 +flang +mlir openmp=project
+  #  -   tau@master%gcc@14.2.0~adios2+binutils~comm~craycnl~cuda~disable-no-pie~dyninst+elf+fortran~gasnet+io~level_zero+libdwarf+libunwind+mpi~ompt~opari~opencl+openmp+otf2+papi+pdt~phase~ppc64le~profileparam+pthreads~python~rocm~rocprofiler~rocprofv2~roctracer~scorep~shmem~sqlite~syscall~x86_64 build_system=generic arch=linux-rhel8-x86_64
+  - tau@master +mpi +openmp cflags=-Wno-error=implicit-function-declaration
+  - mpich ~wrapperrpath

From b52ecf353f8bfd35e0735b5e556b6fe99ac5bc31 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 3 Dec 2024 11:27:12 -0800
Subject: [PATCH 003/135] Update spack.yaml for gcc compiler & remove LD
 library hack

---
 activate-salt-fm-env.sh |  1 -
 spack.yaml              | 15 ++++++++++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/activate-salt-fm-env.sh b/activate-salt-fm-env.sh
index 55a44fe..bde6fc9 100644
--- a/activate-salt-fm-env.sh
+++ b/activate-salt-fm-env.sh
@@ -8,7 +8,6 @@ module load llvm/git.086d8e6bb5daf8de43880ba90258c49e0fabf2c9_19.1.4-zpacv56
 #module load mpich/4.2.3-ugxzfxf
 module load gcc/14.2.0-ttkqi3s
 #module load tau/master-l3jx42k
-export LD_LIBRARY_PATH="/storage/packages/salt-fm/spack/opt/spack/linux-centos7-x86_64/gcc-10.2.1/gcc-14.2.0-ttkqi3sp7xwrxtwftfysf54cl4jje4qk/lib64:$LD_LIBRARY_PATH"
 echo "listing loaded modules:"
 module list
 echo "Finished"
diff --git a/spack.yaml b/spack.yaml
index 99b2e05..34bbd2a 100644
--- a/spack.yaml
+++ b/spack.yaml
@@ -12,7 +12,7 @@ spack:
 
   packages:
     all:
-      require: "target=x86_64 %gcc"
+      require: target=x86_64 %gcc
 
   modules:
     default:
@@ -42,6 +42,19 @@ spack:
       modules: []
       environment: {}
       extra_rpaths: []
+  - compiler:
+      spec: clang@=19.1.5
+      paths:
+        cc: /storage/packages/salt-fm/spack/opt/spack/linux-rhel8-x86_64/gcc-14.2.0/llvm-git.086d8e6bb5daf8de43880ba90258c49e0fabf2c9_19.1.4-zpacv56yg4t4223ih6aqzzzrt3b3i4sg/bin/clang
+        cxx: /storage/packages/salt-fm/spack/opt/spack/linux-rhel8-x86_64/gcc-14.2.0/llvm-git.086d8e6bb5daf8de43880ba90258c49e0fabf2c9_19.1.4-zpacv56yg4t4223ih6aqzzzrt3b3i4sg/bin/clang++
+        f77: /storage/packages/salt-fm/spack/opt/spack/linux-rhel8-x86_64/gcc-14.2.0/llvm-git.086d8e6bb5daf8de43880ba90258c49e0fabf2c9_19.1.4-zpacv56yg4t4223ih6aqzzzrt3b3i4sg/bin/flang
+        fc: /storage/packages/salt-fm/spack/opt/spack/linux-rhel8-x86_64/gcc-14.2.0/llvm-git.086d8e6bb5daf8de43880ba90258c49e0fabf2c9_19.1.4-zpacv56yg4t4223ih6aqzzzrt3b3i4sg/bin/flang
+      flags: {}
+      operating_system: rhel8
+      target: x86_64
+      modules: []
+      environment: {}
+      extra_rpaths: []
 
   specs:
   - llvm@git.086d8e6bb5daf8de43880ba90258c49e0fabf2c9=19.1.4 +flang +mlir openmp=project

From ccc547b476ad6166005d5312b8f08d2b0837b6da Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 3 Dec 2024 13:11:04 -0800
Subject: [PATCH 004/135] Added Fortran instrumentation config file

---
 config_files/tau_fortran_config.yaml | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 config_files/tau_fortran_config.yaml

diff --git a/config_files/tau_fortran_config.yaml b/config_files/tau_fortran_config.yaml
new file mode 100644
index 0000000..c0e340c
--- /dev/null
+++ b/config_files/tau_fortran_config.yaml
@@ -0,0 +1,24 @@
+# Config variables:
+#   ${full_timer_name}: "procedure_name [file_path {start}-{end}]"
+
+instrumentation: tauFortran
+
+main_insert:
+  - "      integer, save :: tauProfileTimer(2) = [0, 0]"
+  - "      call tau_profile_init()"
+  - "      call tau_profile_timer(tauProfileTimer, &"
+  - "        \"${full_timer_name}\")"
+  - "      call tau_profile_start(tauProfileTimer)"
+  - "#ifndef TAU_MPI"
+  - "      call  tau_profile_set_node(0);"
+  - "#endif ! TAU_MPI
+
+
+procedure_begin_insert:
+  - "      integer, save :: tauProfileTimer(2) = [0, 0]"
+  - "      call tau_profile_timer(tauProfileTimer, &"
+  - "        \"${full_timer_name}\")"
+  - "      call tau_profile_start(tauProfileTimer)"
+
+procedure_end_insert:
+  - "      call tau_profile_stop(tauProfileTimer)"

From 9e129f00af8771051ee4bfd29e87e22193f44f82 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 3 Dec 2024 13:13:40 -0800
Subject: [PATCH 005/135] Update Fortran config file to match fortran
 nomenclature

---
 config_files/tau_fortran_config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config_files/tau_fortran_config.yaml b/config_files/tau_fortran_config.yaml
index c0e340c..53014d5 100644
--- a/config_files/tau_fortran_config.yaml
+++ b/config_files/tau_fortran_config.yaml
@@ -3,7 +3,7 @@
 
 instrumentation: tauFortran
 
-main_insert:
+program_insert:
   - "      integer, save :: tauProfileTimer(2) = [0, 0]"
   - "      call tau_profile_init()"
   - "      call tau_profile_timer(tauProfileTimer, &"

From 37c6d13bd1a263a6735ade868e3ff9648b27af59 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 3 Dec 2024 16:29:33 -0500
Subject: [PATCH 006/135] Fix missing close quote

---
 config_files/tau_fortran_config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config_files/tau_fortran_config.yaml b/config_files/tau_fortran_config.yaml
index 53014d5..56aa6bc 100644
--- a/config_files/tau_fortran_config.yaml
+++ b/config_files/tau_fortran_config.yaml
@@ -11,7 +11,7 @@ program_insert:
   - "      call tau_profile_start(tauProfileTimer)"
   - "#ifndef TAU_MPI"
   - "      call  tau_profile_set_node(0);"
-  - "#endif ! TAU_MPI
+  - "#endif ! TAU_MPI"
 
 
 procedure_begin_insert:

From 977afb9348b8ff8b2c899010c152391bc08de110 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 3 Dec 2024 16:32:35 -0500
Subject: [PATCH 007/135] Do not set TAU node in function calls

It only needs to be done once from the main program...
---
 config_files/tau_config.yaml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/config_files/tau_config.yaml b/config_files/tau_config.yaml
index f90c035..4bb77fb 100644
--- a/config_files/tau_config.yaml
+++ b/config_files/tau_config.yaml
@@ -17,11 +17,6 @@ main_insert:
 
 function_begin_insert:
   - "    TAU_PROFILE_TIMER(tautimer, \"${full_timer_name}\", \" \", TAU_USER);"
-  - "#ifndef TAU_MPI"
-  - "#ifndef TAU_SHMEM"
-  - "    TAU_PROFILE_SET_NODE(0);"
-  - "#endif /* TAU_SHMEM */"
-  - "#endif /* TAU_MPI */"
   - "    TAU_PROFILE_START(tautimer);"
 
 function_end_insert:

From f4b64abd9310c1e065d0971e0eb9fe75d21ce2bd Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 3 Dec 2024 17:25:26 -0500
Subject: [PATCH 008/135] CMake silence policy warning and use pre-installed
 TAU

---
 CMakeLists.txt | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ca6121c..350346b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,6 +5,9 @@
 cmake_minimum_required(VERSION 3.13.1)
 # Ensure policies are set as they have been tested
 cmake_policy(VERSION 3.13.1...3.23.2)
+if(POLICY CMP0144)
+  cmake_policy(SET CMP0144 NEW)
+endif()
 if(POLICY CMP0074)
   cmake_policy(SET CMP0074 NEW)
 endif()
@@ -327,7 +330,13 @@ endforeach()
 # and -pthread -bfd=download -unwind=download -libdwarf=download 
 # -otf=download
 
-set(TAU_ROOT "/usr/local" CACHE PATH "TAU Root Directory")
+# Check if TAU_ROOT is set as an environment variable and if not set it as a CMake cache variable to /usr/local
+# otherwise, use the value from the environment
+if(NOT DEFINED $ENV{TAU_ROOT})
+  set(TAU_ROOT "/usr/local" CACHE PATH "TAU Root Directory")
+else()
+  set(TAU_ROOT $ENV{TAU_ROOT} CACHE PATH "TAU Root Directory")
+endif()
 
 find_file(TAU_CLANG_MAKEFILE Makefile.tau-clang-pthread
 PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/lib

From 40502b0694423ecb87bd582b26d322ee2bc9df3a Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Wed, 4 Dec 2024 13:23:40 -0800
Subject: [PATCH 009/135] Remove unused source files

These files were formerly used, but are no longer required after a
previous code reorganization.
---
 include/tau_datatypes.h |  43 ----
 src/parse.cpp           | 530 ----------------------------------------
 2 files changed, 573 deletions(-)
 delete mode 100644 include/tau_datatypes.h
 delete mode 100644 src/parse.cpp

diff --git a/include/tau_datatypes.h b/include/tau_datatypes.h
deleted file mode 100644
index b72f6e9..0000000
--- a/include/tau_datatypes.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/****************************************************************************
-**			TAU Portable Profiling Package			   **
-**			http://www.cs.uoregon.edu/research/tau	           **
-*****************************************************************************/
-
-#include <string>
-
-enum instrumentKind_t { TAU_LOOPS, TAU_LINE, TAU_ROUTINE_DECL, TAU_ROUTINE_ENTRY, TAU_ROUTINE_EXIT, TAU_ABORT, TAU_NOT_SPECIFIED, TAU_IO, TAU_MEMORY, TAU_TIMER, TAU_PHASE, TAU_INIT, TAU_FORALL, TAU_BARRIER, TAU_FENCE, TAU_NOTIFY };
-
-/* For C instrumentation */
-enum itemKind_t { ROUTINE, BODY_BEGIN, FIRST_EXECSTMT, BODY_END, RETURN, EXIT, INSTRUMENTATION_POINT, START_TIMER, STOP_TIMER, START_DO_TIMER, GOTO_STOP_TIMER, START_LOOP_TIMER, STOP_LOOP_TIMER, ALLOCATE_STMT, DEALLOCATE_STMT, IO_STMT };
-enum itemAttr_t { BEFORE, AFTER, NOT_APPLICABLE};
-enum itemQualifier_t { STATIC, DYNAMIC, NOT_SPECIFIED};
-enum tau_language_t { tau_c, tau_cplusplus, tau_fortran, tau_upc };
-
-#ifndef TAU_DYNINST
-#include <pdbAll.h>
-struct itemRef {
-  itemRef(const pdbItem *i, bool isT);
-  itemRef(const pdbItem *i, itemKind_t k, int l, int c);
-  itemRef(const pdbItem *i, itemKind_t k, int l, int c, std::string code, itemAttr_t);
-  //itemRef(const pdbItem *i, itemKind_t k, itemQualifier_t q, std::string name);
-  itemRef(const pdbItem *i, bool isT, int l, int c);
-  itemRef(const pdbItem *i, itemKind_t k, pdbLoc start, pdbLoc stop);
-  const pdbItem *item;
-  itemKind_t kind; /* For C instrumentation */ 
-  bool     isTarget;
-  bool     isDynamic;
-  bool     isPhase;
-  int      line;
-  int      col;
-  pdbLoc   begin;
-  pdbLoc   end;
-  std::string   snippet;
-  itemAttr_t attribute;
-};
-#endif /* TAU_DYNINST */
-
-/***************************************************************************
- * $RCSfile: tau_datatypes.h,v $   $Author: geimer $
- * $Revision: 1.13 $   $Date: 2008/12/11 16:02:22 $
- * VERSION_ID: $Id: tau_datatypes.h,v 1.13 2008/12/11 16:02:22 geimer Exp $
- ***************************************************************************/
diff --git a/src/parse.cpp b/src/parse.cpp
deleted file mode 100644
index dae6a9a..0000000
--- a/src/parse.cpp
+++ /dev/null
@@ -1,530 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <string>
-#include <cstring>
-#include <algorithm>
-#include <clang-c/Index.h>
-#include "llvm/Support/CommandLine.h"
-using namespace std;
-
-// make sure begin func comes before returns and such
-#define BEGIN_FUNC 0
-#define RETURN_FUNC 1
-#define MULTILINE_RETURN_FUNC 2
-#define EXIT_FUNC 3
-#define NUM_LOC_TYPES 4
-
-char* loc_typ_strs[NUM_LOC_TYPES] =
-  {
-    "begin func",
-    "return",
-    "multiline return",
-    "exit"
-  };
-
-typedef struct inst_loc {
-  int line = -1;
-  int col = -1;
-  int kind = -1;
-  CXType return_type;
-  const char* func_name;
-  const char* full_timer_name;
-  bool has_args = false;
-} inst_loc;
-
-std::string current_file;
-
-bool comp_inst_loc (inst_loc* first, inst_loc* second) {
-  if (first->line != second->line) {
-    return first->line < second->line;
-  }
-  else if (first->col != second->col) {
-    return first->col < second->col;
-  }
-  else { // SOME PEOPLE have functions that are just {} so we need to make sure begin comes before return
-    return first->kind < second->kind;
-  }
-}
-
-std::vector<inst_loc*> inst_locations;
-
-void dump_inst_loc(inst_loc* loc) {
-  printf("\tLine:     %d\n", loc->line);
-  printf("\tCol:      %d\n", loc->col);
-  printf("\tKind:     %s\n", loc_typ_strs[loc->kind]);
-  printf("\tRet type: %s\n", clang_getCString(clang_getTypeSpelling(loc->return_type)));
-  printf("\tName:     %s\n", loc->func_name);
-  printf("\tTimer:      %s\n", loc->full_timer_name);
-  printf("\tHas args: %s\n", loc->has_args ? "Yes" : "No");
-}
-
-void dump_inst_loc(inst_loc* loc, int n) {
-  printf("location %d\n", n);
-  dump_inst_loc(loc);
-}
-
-void dump_all_locs() {
-  for (int i = 0; i < inst_locations.size(); i++) {
-    dump_inst_loc(inst_locations[i], i);
-  }
-}
-
-// dummy test function for parsing things
-int hello() {
-	{
-	printf("hello\n");
-	}
-	{{
-  return 42;
-	}}
-}
-
-ostream& operator<<(ostream& stream, const CXString& str)
-{
-  stream << clang_getCString(str);
-  clang_disposeString(str);
-  return stream;
-}
-
-// assumes funcdecl is CXCursor_FunctionDecl
-void makeFuncAndTimerNames(CXCursor funcdecl, std::string& func_name, std::string& timer_name) {
-  CXSourceRange extent = clang_getCursorExtent(funcdecl);
-  CXSourceLocation start_loc = clang_getRangeStart(extent);
-  CXSourceLocation end_loc = clang_getRangeEnd(extent);
-
-  unsigned int start_line;
-  unsigned int start_col;
-  unsigned int end_line;
-  unsigned int end_col;
-
-  // for compount stmts this *is* the L/R brace location, yay
-  clang_getSpellingLocation(start_loc, nullptr, &start_line, &start_col, nullptr );
-  clang_getSpellingLocation(end_loc, nullptr, &end_line, &end_col, nullptr );
-
-  std::string sig = clang_getCString(clang_getTypeSpelling(clang_getCursorType(funcdecl)));
-  func_name = clang_getCString(clang_getCursorSpelling(funcdecl));
-  sig.insert(sig.find_first_of("("), func_name);
-
-  CXLanguageKind lang = clang_getCursorLanguage(funcdecl);
-  std::string lang_string;
-  if (lang == CXLanguage_C) {
-    lang_string = "C";
-  }
-  else if (lang == CXLanguage_CPlusPlus) {
-    lang_string = "C++";
-  }
-  else {
-    lang_string = "invalid";
-  }
-
-  timer_name = sig + " " + lang_string + " [{" + current_file + "} {"
-    + to_string(start_line) + "," + to_string(start_col) + "}-{"
-    + to_string(end_line) + "," + to_string(end_col-1) + "}]";
-}
-
-void handleFuncStartEnd(CXCursor c, CXCursor parent) {
-  // do things with functions: https://bastian.rieck.me/blog/posts/2016/baby_steps_libclang_function_extents/
-  CXSourceRange extent = clang_getCursorExtent(c);
-  CXSourceLocation start_loc = clang_getRangeStart(extent);
-  CXSourceLocation end_loc = clang_getRangeEnd(extent);
-
-  unsigned int start_line;
-  unsigned int start_col;
-  unsigned int end_line;
-  unsigned int end_col;
-
-  // for compount stmts this *is* the L/R brace location, yay
-  clang_getSpellingLocation(start_loc, nullptr, &start_line, &start_col, nullptr );
-  clang_getSpellingLocation(end_loc, nullptr, &end_line, &end_col, nullptr );
-
-  std::string func_name;
-  std::string timer_name;
-  makeFuncAndTimerNames(parent, func_name, timer_name);
-
-  char* func_name_c = new char[func_name.length()+1];
-  std::strcpy(func_name_c, func_name.c_str());
-
-  char* timer_name_c = new char[timer_name.length()+1];
-  std::strcpy(timer_name_c, timer_name.c_str());
-
-  // the cols are 1-indexed, but the strings of each line are 0-indexed
-  // the given start_col is the index of the open brace, so to go 1 after the char at the start
-  // inst location, start_col is fine
-  // but end_col is *one after* the close brace, so to go one before the char at the end inst
-  // char, we need to do end_col - 2
-
-  inst_loc* start = new inst_loc;
-  start->line = start_line;
-  start->col = start_col;
-  start->kind = BEGIN_FUNC;
-  start->return_type = clang_getResultType(clang_getCursorType(parent));
-  start->func_name = func_name_c;
-  start->full_timer_name = timer_name_c;
-  start->has_args = clang_Cursor_getNumArguments(parent) > 0;
-
-  inst_locations.push_back(start);
-
-  inst_loc* end = new inst_loc;
-  end->line = end_line;
-  end->col = end_col - 2;
-  end->kind = RETURN_FUNC;
-  end->return_type = clang_getResultType(clang_getCursorType(parent));
-  end->func_name = func_name_c;
-  end->full_timer_name = timer_name_c;
-  end->has_args = clang_Cursor_getNumArguments(parent) > 0;
-
-  inst_locations.push_back(end);
-
-  // cout << "Cursor '" << clang_getCursorSpelling(c)
-  //   << "' of kind '"
-  //   << clang_getCursorKindSpelling(clang_getCursorKind(c)) << "'\n";
-  // cout << "\tStart " << start_line << "," << start_col
-  //   << "; End " << end_line << "," << end_col << "\n";
-}
-
-void handleReturn(CXCursor c, CXCursor parent, CXCursor encl_function) {
-  CXSourceRange extent = clang_getCursorExtent(c);
-  CXSourceLocation start_loc = clang_getRangeStart(extent);
-  CXSourceLocation end_loc = clang_getRangeEnd(extent);
-
-  unsigned int start_line;
-  unsigned int start_col;
-  unsigned int end_line;
-  unsigned int end_col;
-
-  // for compount stmts this *is* the L/R brace location, yay
-  clang_getSpellingLocation(start_loc, nullptr, &start_line, &start_col, nullptr );
-  clang_getSpellingLocation(end_loc, nullptr, &end_line, &end_col, nullptr );
-
-  printf("return start %d:%d\n", start_line, start_col);
-  printf("return end   %d:%d\n", end_line, end_col);
-
-  CXSourceLocation return_loc = clang_getCursorLocation(c);
-
-  unsigned int ret_line = 0;
-  unsigned int ret_col = 0;
-
-  clang_getSpellingLocation(return_loc, nullptr, &ret_line, &ret_col, nullptr );
-
-  std::string func_name;
-  std::string timer_name;
-  makeFuncAndTimerNames(encl_function, func_name, timer_name);
-
-  char* func_name_c = new char[func_name.length()+1];
-  std::strcpy(func_name_c, func_name.c_str());
-
-  char* timer_name_c = new char[timer_name.length()+1];
-  std::strcpy(timer_name_c, timer_name.c_str());
-
-  printf("func type %s\n", clang_getCString(clang_getTypeSpelling(clang_getCursorType(encl_function))));
-  printf("returns a %s\n", clang_getCString(clang_getTypeSpelling(clang_getResultType(clang_getCursorType(encl_function)))));
-
-  // need to do col - 1 because cols are 1-indexed and line string is 0-indexed
-  inst_loc* ret = new inst_loc;
-  ret->line = ret_line;
-  ret->col = ret_col - 1;
-  ret->kind = start_line == end_line ? RETURN_FUNC : MULTILINE_RETURN_FUNC;
-  ret->func_name = func_name_c;
-  ret->return_type = clang_getResultType(clang_getCursorType(encl_function));
-  ret->full_timer_name = timer_name_c;
-  ret->has_args = clang_Cursor_getNumArguments(encl_function) > 0;
-
-  inst_locations.push_back(ret);
-
-  std::cout << "Parent: '" << clang_getCursorSpelling(encl_function)
-    << "' of kind " << clang_getCursorKindSpelling(clang_getCursorKind(encl_function)) <<"\n";
-}
-
-CXChildVisitResult traverse(CXCursor c, CXCursor parent, CXClientData client_data)
-{
-	CXSourceLocation location = clang_getCursorLocation(c);
-	// ignore anything that isn't in this file
-  if(clang_Location_isFromMainFile(location) == 0) {
-    return CXChildVisit_Continue;
-	}
-
-  // cout << "Cursor '" << clang_getCursorSpelling(c)
-  //   << "' of kind '"
-  //   << clang_getCursorKindSpelling(clang_getCursorKind(c)) << "'\n";
-
-  CXCursorKind kind = clang_getCursorKind(c);
-
-  // if (kind == CXCursorKind::CXCursor_FunctionDecl ||         // what it sounds like
-  //    kind == CXCursorKind::CXCursor_CXXMethod ||            // also what it sounds like
-  //    kind == CXCursorKind::CXCursor_FunctionTemplate ||     // CXX templates
-  //    kind == CXCursorKind::CXCursor_Constructor ||          // constructors and destructors
-  //    kind == CXCursorKind::CXCursor_Destructor ||
-  //    kind == CXCursorKind::CXCursor_ConversionFunction) {   // operator overloads, strangely (node name CXXConversion)
-  //     printf("setting client data\n");
-  //     client_data = &c;
-  // }
-  //
-  // CXCursorKind parent_kind = clang_getCursorKind(parent);
-  // if (parent_kind == CXCursorKind::CXCursor_FunctionDecl ||
-  //    parent_kind == CXCursorKind::CXCursor_CXXMethod ||
-  //    parent_kind == CXCursorKind::CXCursor_FunctionTemplate ||
-  //    parent_kind == CXCursorKind::CXCursor_Constructor ||
-  //    parent_kind == CXCursorKind::CXCursor_Destructor ||
-  //    parent_kind == CXCursorKind::CXCursor_ConversionFunction) {
-  //   printf("parent kind is function\n");
-  //   if (kind == CXCursorKind::CXCursor_CompoundStmt) {
-  //     printf("handling function\n");
-  //     handleFuncStartEnd(c, parent);
-  //   }
-  // }
-  //
-  // if (kind == CXCursorKind::CXCursor_ReturnStmt) {
-  //   printf("handling return\n");
-  //   CXCursor encl_function = *(reinterpret_cast<CXCursor*>(client_data));
-  //   handleReturn(c, parent, encl_function);
-  // }
-  //
-  // // ignore anything inside a lambda
-  // //TODO: setting to profile lambdas too
-  // if (kind == CXCursorKind::CXCursor_LambdaExpr) {
-  //   printf("ignoring lambda\n");
-  //   return CXChildVisit_Continue;
-  // }
-
-  unsigned int curLevel  = *( reinterpret_cast<unsigned int*>( client_data ) );
-  unsigned int nextLevel = curLevel + 1;
-
-  std::cout << std::string( curLevel, '-' ) << " " << clang_getCString(clang_getCursorKindSpelling(clang_getCursorKind(c)))
-    << " (" << clang_getCString(clang_getCursorSpelling(c)) << ")\n";
-
-  clang_visitChildren(c, traverse, &nextLevel);
-
-	return CXChildVisit_Continue;
-}
-
-void make_begin_func_code(inst_loc* loc, std::string& code) {
-  code += "\tTAU_PROFILE_TIMER(tautimer, \"";
-  code += loc->full_timer_name;
-  code += "\", \" \", ";
-  code += strcmp(loc->func_name, "main") == 0 ? "TAU_DEFAULT" : "TAU_USER";
-  code += ");\n";
-  if (strcmp(loc->func_name, "main") == 0 && loc->has_args) {
-    code += "\tTAU_INIT(&argc, &argv);\n";
-  }
-
-	// not needed
-  //code += "#ifndef TAU_MPI\n";
-  //code += "#ifndef TAU_SHMEM\n";
-  //code += "  TAU_PROFILE_SET_NODE(0);\n";
-  //code += "#endif /* TAU_SHMEM */\n";
-  //code += "#endif /* TAU_MPI */\n"; // set node 0
-  code += "\tTAU_PROFILE_START(tautimer);\n";
-
-  //code += "{\n"; //TODO is this needed? if yes, matching close bracket before end func
-}
-
-void make_begin_func_code_cxx(inst_loc* loc, std::string& code) {
-  code += "\tTAU_PROFILE(\"";
-  code += loc->full_timer_name;
-  code += "\", \" \", ";
-  code += strcmp(loc->func_name, "main") == 0 ? "TAU_DEFAULT" : "TAU_USER";
-  code += ");\n";
-  if (strcmp(loc->func_name, "main") == 0 && loc->has_args) {
-    code += "\tTAU_INIT(&argc, &argv);\n";
-  }
-}
-
-bool make_end_func_code(inst_loc* loc, std::string& code, std::string& line) {
-  // void is easy, just drop in the stop
-  // also do this if the line does not actually contain "return"; eg, int main() with no explicit return
-  if (loc->return_type.kind == CXType_Void || line.find("return") == std::string::npos) {
-    code += "\tTAU_PROFILE_STOP(tautimer);\n";
-    return false;
-  }
-  // types are harder, need to pull the arg to return before the stop in case it does things
-  else {
-    code += "\t{ ";
-    code += clang_getCString(clang_getTypeSpelling(loc->return_type));
-    code += " tau_ret_val = ";
-    int first_pos = line.find("return") + 6;
-    int last_pos = line.find(";", first_pos);
-    code += line.substr(first_pos, last_pos);
-    code += " TAU_PROFILE_STOP(tautimer); return tau_ret_val; }\n";
-    return true;
-  }
-
-}
-
-void instrument_file(ifstream& og_file, ofstream& inst_file, bool use_cxx_api) {
-  std::string line;
-  int lineno = 0;
-  // if (inst_locations.size() == 0) {
-  //   printf("No instrumentation locations for this file!");
-  //   return;
-  // }
-  auto inst_loc_iter = inst_locations.begin();
-
-  inst_file << "#include <Profile/Profiler.h>\n";
-  inst_file << "#line 1 \"" << current_file << "\"\n";
-
-  while (getline(og_file, line)) {
-    lineno++;
-    // short circuit if we run out of inst locations to avoid segfaults :)
-    // need the if and the while because sometimes 2+ inst locations are on the same line
-    if (inst_loc_iter != inst_locations.end()) {
-      inst_loc* curr_inst_loc = *inst_loc_iter;
-      if (lineno == curr_inst_loc->line) {
-        curr_inst_loc = *inst_loc_iter;
-        int num_inst_locs_this_line = 0;
-        bool should_skip_printing_next_line = false;
-        std::string start = line.substr(0,curr_inst_loc->col);
-        std::string end;
-        while (inst_loc_iter != inst_locations.end() && lineno == curr_inst_loc->line) {
-          if (curr_inst_loc->col < line.size()) {
-            end = line.substr(curr_inst_loc->col);
-          }
-          else {
-            end = "";
-          }
-          if (num_inst_locs_this_line == 0) {
-            inst_file << start;
-          }
-          std::string inst_code;
-          switch (curr_inst_loc->kind) {
-            case BEGIN_FUNC:
-              inst_file << "\n#line " << lineno << "\n";
-              if (use_cxx_api) {
-                make_begin_func_code_cxx(curr_inst_loc, inst_code);
-              }
-              else {
-                make_begin_func_code(curr_inst_loc, inst_code);
-              }
-              inst_file << inst_code;
-              inst_file << "#line " << lineno << "\n";
-              break;
-            case RETURN_FUNC:
-              if (!use_cxx_api) {
-                inst_file << "\n#line " << lineno << "\n";
-                should_skip_printing_next_line = make_end_func_code(curr_inst_loc, inst_code, line);
-                inst_file << inst_code;
-                inst_file << "#line " << lineno << "\n";
-              }
-              break;
-            case MULTILINE_RETURN_FUNC:
-              if (!use_cxx_api) {
-                inst_file << "\n#line " << lineno << "\n";
-                while (line.find(";") == string::npos) {
-                  std::string templine;
-                  getline(og_file, templine);
-                  line += templine;
-                  lineno++;
-                }
-                should_skip_printing_next_line = make_end_func_code(curr_inst_loc, inst_code, line);
-                inst_file << inst_code;
-                inst_file << "#line " << lineno << "\n";
-              }
-            default:
-              break;
-          }
-
-          inst_loc_iter++;
-          curr_inst_loc = *inst_loc_iter;
-          num_inst_locs_this_line++;
-        }
-        if (!should_skip_printing_next_line) {
-          inst_file << end << "\n";
-        }
-      }
-      else {
-        inst_file << line << "\n";
-      }
-    }
-    else {
-      inst_file << line << "\n";
-    }
-  }
-}
-
-llvm::cl::opt<string> outputfile("tau_output", llvm::cl::desc("Specify name of output instrumented file"), llvm::cl::value_desc("filename"));
-llvm::cl::opt<string> inputfile(llvm::cl::Positional, llvm::cl::desc("<input_file>"), llvm::cl::Required, llvm::cl::init("-"));
-llvm::cl::opt<string> compileargs("compile_flags", llvm::cl::desc("Compilation flags (DO NOT include input file name)"), llvm::cl::value_desc("\"-arg1 -arg2 ...\""));
-llvm::cl::opt<bool> use_cxx_api("use_cxx_api", llvm::cl::desc("Use TAU's C++ instrumentation API"), llvm::cl::init(false));
-
-int main(int argc, char* argv[])
-{
-  llvm::cl::ParseCommandLineOptions(argc, argv);
-  current_file = inputfile;
-  CXIndex index = clang_createIndex(1, 1);
-  size_t pos = 0;
-  std::vector<string> compile_flags_vec;
-  while ((pos = compileargs.find(" ")) != std::string::npos) {
-      compile_flags_vec.push_back(compileargs.substr(0, pos));
-      compileargs.erase(0, pos + 1);
-  }
-  compile_flags_vec.push_back(compileargs);
-  const char* compile_flags[compile_flags_vec.size()];
-  for (int i = 0; i < compile_flags_vec.size(); i++) {
-    compile_flags[i] = compile_flags_vec[i].c_str();
-    cout << compile_flags[i] << "\n";
-  }
-  // const char* args[] = {"-lclang", "-g", "-I/home/alisterj/.local/include"};
-  CXTranslationUnit unit = clang_parseTranslationUnit(
-    index,
-    inputfile.c_str(), compile_flags, compile_flags_vec.size(),
-    nullptr, 0,
-    CXTranslationUnit_KeepGoing
-    | CXTranslationUnit_RetainExcludedConditionalBlocks
-    | CXTranslationUnit_IncludeAttributedTypes
-    | CXTranslationUnit_VisitImplicitAttributes
-    | CXTranslationUnit_IgnoreNonErrorsFromIncludedFiles );
-
-  if (unit == nullptr)
-  {
-    cerr << "Unable to parse translation unit. Quitting." << endl;
-    exit(-1);
-  }
-
-  CXCursor cursor = clang_getTranslationUnitCursor(unit);
-
-  unsigned int level = 0;
-  clang_visitChildren(cursor, traverse, &level);
-
-  //sort by line numbers then cols so looping goes well
-  std::sort(inst_locations.begin(), inst_locations.end(), comp_inst_loc);
-
-  dump_all_locs();
-
-  std::ifstream og_file;
-  std::ofstream inst_file;
-  std::string newname = inputfile;
-  if (!outputfile.empty()) {
-    newname = outputfile;
-  }
-  else {
-    newname.insert(newname.find_last_of("."), ".inst");
-  }
-  printf("new filename: %s\n", newname.c_str());
-
-  inst_file.open(newname);
-  og_file.open(inputfile);
-
-  instrument_file(og_file, inst_file, use_cxx_api);
-  og_file.close();
-  inst_file.close();
-
-  clang_disposeTranslationUnit(unit);
-  clang_disposeIndex(index);
-}
-
-// TODO
-  // test on DCA
-  // selective instrumentation
-
-
-
-// for grid???
-// #!/bin/bash
-//
-// module load cuda/10.2
-// module load gcc/8.1
-// module list
-//
-// make distclean
-// ./configure CXX="nvcc -ccbin=g++"
-// export NVCC_APPEND_FLAGS="-Xcompiler -finstrument-functions -Xcompiler -finstrument-functions-exclude-file-list=.h,.hpp,include"

From f87e6d7bf15094c83bcb25f6c2309e3c3d729fe0 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Wed, 4 Dec 2024 13:25:12 -0800
Subject: [PATCH 010/135] Set CMAKE_CXX_STANDARD=17

---
 CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 350346b..d721acd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -78,6 +78,8 @@ if(CMAKE_ARGS)
   message("Passed command line CMAKE_ARGS: ${CMAKE_ARGS}")
 endif()
 
+set(CMAKE_CXX_STANDARD 17)
+
 #-----------------------------
 # Create the main SALT project
 #-----------------------------

From 9dd706ae51bfc310f79b0dcf9295254704ca5710 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Wed, 4 Dec 2024 16:00:09 -0800
Subject: [PATCH 011/135] Add attempt to build the PrintFlangFunctionNames
 example

Add source to llvm-project/flang/examples/PrintFlangFunctionNames and
attempt to build out of tree. Compiles and links but crashes when run.
---
 CMakeLists.txt                  | 82 ++++++++++++++++++++++++++++++++-
 src/PrintFlangFunctionNames.cpp | 78 +++++++++++++++++++++++++++++++
 2 files changed, 158 insertions(+), 2 deletions(-)
 create mode 100644 src/PrintFlangFunctionNames.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 350346b..3d8295e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -102,6 +102,7 @@ project(SALT
 find_package(LLVM REQUIRED CONFIG)
 find_package(Clang REQUIRED CONFIG)
 
+
 # get_cmake_property(_variableNames VARIABLES)
 # list (SORT _variableNames)
 # foreach (_variableName ${_variableNames})
@@ -151,6 +152,8 @@ configure_file(
   "${CMAKE_SOURCE_DIR}/include/clang_header_includes.h.in" "${CMAKE_BINARY_DIR}/include/clang_header_includes.h"
   @ONLY)
 
+
+
 #------------------------------------------
 # Specify clang and system libraries needed
 #------------------------------------------
@@ -172,6 +175,7 @@ set(CLANG_LIBS
   clangRewriteFrontend
 )
 
+
 #-----------------------------------------
 # Add an interface library to link against
 #-----------------------------------------
@@ -179,12 +183,13 @@ set(CLANG_LIBS
 # This provides an abstraction and shorthand so that you can just `target_link_libraries()` against
 # the SALT_LLVM_TOOLING interface library
 add_library(SALT_LLVM_TOOLING INTERFACE)
-target_compile_features(SALT_LLVM_TOOLING INTERFACE cxx_std_14)
+target_compile_features(SALT_LLVM_TOOLING INTERFACE cxx_std_17)
 target_include_directories(SALT_LLVM_TOOLING INTERFACE ${LLVM_INCLUDE_DIRS})
 target_compile_definitions(SALT_LLVM_TOOLING INTERFACE ${LLVM_DEFINITIONS_LIST})
 target_compile_options(SALT_LLVM_TOOLING INTERFACE ${USE_RTTI} -Wall -Werror -Wpedantic)
 target_link_libraries(SALT_LLVM_TOOLING INTERFACE ${CLANG_LIBS} ${LLVM_LIBS})
 
+
 #---------------------------------
 # List the header and source files
 #---------------------------------
@@ -215,6 +220,8 @@ set(CPARSE_LLVM_SRCS
 
 list(TRANSFORM CPARSE_LLVM_SRCS PREPEND "${CMAKE_SOURCE_DIR}/src/")
 
+
+
 #---------------------
 # Add the main targets
 #---------------------
@@ -222,7 +229,7 @@ list(TRANSFORM CPARSE_LLVM_SRCS PREPEND "${CMAKE_SOURCE_DIR}/src/")
 add_executable(cparse-llvm)
 target_sources(cparse-llvm PUBLIC ${CPARSE_LLVM_SRCS})
 target_include_directories(cparse-llvm PUBLIC "${CMAKE_SOURCE_DIR}/include" "${CMAKE_BINARY_DIR}/include")
-target_compile_features(cparse-llvm PUBLIC cxx_std_14)
+target_compile_features(cparse-llvm PUBLIC cxx_std_17)
 target_link_libraries(cparse-llvm PUBLIC SALT_LLVM_TOOLING) # Inherit definitions, compile features, etc.
 # You can try adding -static
 target_link_options(cparse-llvm PUBLIC -Wl,--as-needed -Wl,--no-allow-shlib-undefined -Wl,--no-undefined)
@@ -230,6 +237,77 @@ target_link_options(cparse-llvm PUBLIC -Wl,--as-needed -Wl,--no-allow-shlib-unde
 # Install the target
 install(TARGETS cparse-llvm DESTINATION bin)
 
+
+#---------------------
+# Flang Frontend library
+#---------------------
+
+# Flang package requires MLIR
+find_package(MLIR CONFIG)
+if(MLIR_FOUND)
+  message(STATUS "Found MLIR -- will check for Flang")
+  find_package(Flang CONFIG)
+else()
+  message(STATUS "MLIR not found -- skipping Flang")
+endif()
+
+if(MLIR_FOUND AND Flang_FOUND)
+    message(STATUS "Found Flang -- will build Flang frontend plugin")
+                
+# Variables set i1n FlangConfig.cmake
+    message(STATUS "FLANG_CMAKE_DIR: ${FLANG_CMAKE_DIR}")
+    message(STATUS "FLANG_EXPORTED_TARGETS: ${FLANG_EXPORTED_TARGETS}")
+    message(STATUS "FLANG_INCLUDE_DIRS: ${FLANG_INCLUDE_DIRS}")
+
+# Libraries required by Flang plugin
+    set(FLANG_LIBS
+    flangFrontend
+    )
+
+# Interface for Flang frontend plugins
+    add_library(SALT_FLANG_FRONTEND INTERFACE)
+    target_compile_features(SALT_FLANG_FRONTEND INTERFACE cxx_std_17)
+    target_include_directories(SALT_FLANG_FRONTEND INTERFACE ${LLVM_INCLUDE_DIRS} ${FLANG_INCLUDE_DIRS})
+    target_compile_definitions(SALT_FLANG_FRONTEND INTERFACE ${LLVM_DEFINITIONS_LIST})
+    target_compile_options(SALT_FLANG_FRONTEND INTERFACE ${USE_RTTI} -Wall -Werror -Wpedantic)
+    target_link_libraries(SALT_FLANG_FRONTEND INTERFACE ${FLANG_LIBS})
+
+# Endianness definitions are required, and Flang does not export a definitions list
+    include(TestBigEndian)
+    test_big_endian(IS_BIGENDIAN)
+    if (IS_BIGENDIAN)
+        target_compile_definitions(SALT_FLANG_FRONTEND INTERFACE FLANG_BIG_ENDIAN=1)
+    else ()
+        target_compile_definitions(SALT_FLANG_FRONTEND INTERFACE FLANG_LITTLE_ENDIAN=1)
+    endif ()
+
+    set(SALT_FLANG_PLUGIN_HEADER_FILES
+    selectfile.hpp
+    tau_datatypes.h
+    )
+    list(TRANSFORM SALT_FLANG_PLUGIN_HEADER_FILES PREPEND "${CMAKE_SOURCE_DIR}/include/")
+
+    set(SALT_FLANG_PLUGIN_SRCS
+    PrintFlangFunctionNames.cpp 
+    )
+    list(TRANSFORM SALT_FLANG_PLUGIN_SRCS PREPEND "${CMAKE_SOURCE_DIR}/src/")
+
+    add_library(salt-flang-plugin SHARED)
+    target_sources(salt-flang-plugin PUBLIC ${SALT_FLANG_PLUGIN_SRCS})
+    target_include_directories(salt-flang-plugin PUBLIC "${CMAKE_SOURCE_DIR}/include" "${CMAKE_BINARY_DIR}/include" )
+    target_compile_features(salt-flang-plugin PUBLIC cxx_std_17)
+    target_link_libraries(salt-flang-plugin PUBLIC SALT_FLANG_FRONTEND)
+    target_link_options(salt-flang-plugin PUBLIC -Wl,--as-needed -Wl,--no-allow-shlib-undefined -Wl,--no-undefined)
+
+    install(TARGETS salt-flang-plugin DESTINATION lib)
+
+else()
+    message(STATUS "Flang not found -- skipping Flang frontend plugin")
+endif()
+
+
+
+
 #---------------
 # Tests
 #---------------
diff --git a/src/PrintFlangFunctionNames.cpp b/src/PrintFlangFunctionNames.cpp
new file mode 100644
index 0000000..4a84c3b
--- /dev/null
+++ b/src/PrintFlangFunctionNames.cpp
@@ -0,0 +1,78 @@
+//===-- PrintFlangFunctionNames.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Small example Flang plugin to count/print Functions & Subroutines names.
+// It walks the Parse Tree using a Visitor struct that has Post functions for
+// FunctionStmt and SubroutineStmt to access the names of functions &
+// subroutines. It also has Pre functions for FunctionSubprogram and
+// SubroutineSubprogram so a Bool can be set to show that it is the definition
+// of a function/subroutine, and not print those that are in an Interface.
+// This plugin does not recognise Statement Functions or Module Procedures,
+// which could be dealt with through StmtFunctionStmt and MpSubprogramStmt nodes
+// respectively.
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Frontend/FrontendActions.h"
+#include "flang/Frontend/FrontendPluginRegistry.h"
+#include "flang/Parser/dump-parse-tree.h"
+#include "flang/Parser/parsing.h"
+
+using namespace Fortran::frontend;
+
+class PrintFunctionNamesAction : public PluginParseTreeAction {
+
+  // Visitor struct that defines Pre/Post functions for different types of nodes
+  struct ParseTreeVisitor {
+    template <typename A> bool Pre(const A &) { return true; }
+    template <typename A> void Post(const A &) {}
+
+    bool Pre(const Fortran::parser::FunctionSubprogram &) {
+      isInSubprogram_ = true;
+      return true;
+    }
+    void Post(const Fortran::parser::FunctionStmt &f) {
+      if (isInSubprogram_) {
+        llvm::outs() << "Function:\t"
+                     << std::get<Fortran::parser::Name>(f.t).ToString() << "\n";
+        fcounter++;
+        isInSubprogram_ = false;
+      }
+    }
+
+    bool Pre(const Fortran::parser::SubroutineSubprogram &) {
+      isInSubprogram_ = true;
+      return true;
+    }
+    void Post(const Fortran::parser::SubroutineStmt &s) {
+      if (isInSubprogram_) {
+        llvm::outs() << "Subroutine:\t"
+                     << std::get<Fortran::parser::Name>(s.t).ToString() << "\n";
+        scounter++;
+        isInSubprogram_ = false;
+      }
+    }
+
+    int fcounter{0};
+    int scounter{0};
+
+  private:
+    bool isInSubprogram_{false};
+  };
+
+  void executeAction() override {
+    ParseTreeVisitor visitor;
+    Fortran::parser::Walk(getParsing().parseTree(), visitor);
+
+    llvm::outs() << "\n====   Functions: " << visitor.fcounter << " ====\n";
+    llvm::outs() << "==== Subroutines: " << visitor.scounter << " ====\n";
+  }
+};
+
+static FrontendPluginRegistry::Add<PrintFunctionNamesAction> X(
+    "print-fns", "Print Function names");

From 8411f62267a10a95c4f0f8d3bd90068a0d8d0170 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Wed, 4 Dec 2024 17:17:23 -0800
Subject: [PATCH 012/135] Fix Flang plugin link stage

Unlike Clang plugins, Flang plugins are *not* supposed to be linked
against the Flang library. Remove link to flangFrontend, allow undefined
symbols. The example plugin works now.
---
 CMakeLists.txt | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3d8295e..2768679 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -259,18 +259,12 @@ if(MLIR_FOUND AND Flang_FOUND)
     message(STATUS "FLANG_EXPORTED_TARGETS: ${FLANG_EXPORTED_TARGETS}")
     message(STATUS "FLANG_INCLUDE_DIRS: ${FLANG_INCLUDE_DIRS}")
 
-# Libraries required by Flang plugin
-    set(FLANG_LIBS
-    flangFrontend
-    )
-
 # Interface for Flang frontend plugins
     add_library(SALT_FLANG_FRONTEND INTERFACE)
     target_compile_features(SALT_FLANG_FRONTEND INTERFACE cxx_std_17)
     target_include_directories(SALT_FLANG_FRONTEND INTERFACE ${LLVM_INCLUDE_DIRS} ${FLANG_INCLUDE_DIRS})
     target_compile_definitions(SALT_FLANG_FRONTEND INTERFACE ${LLVM_DEFINITIONS_LIST})
     target_compile_options(SALT_FLANG_FRONTEND INTERFACE ${USE_RTTI} -Wall -Werror -Wpedantic)
-    target_link_libraries(SALT_FLANG_FRONTEND INTERFACE ${FLANG_LIBS})
 
 # Endianness definitions are required, and Flang does not export a definitions list
     include(TestBigEndian)
@@ -297,7 +291,7 @@ if(MLIR_FOUND AND Flang_FOUND)
     target_include_directories(salt-flang-plugin PUBLIC "${CMAKE_SOURCE_DIR}/include" "${CMAKE_BINARY_DIR}/include" )
     target_compile_features(salt-flang-plugin PUBLIC cxx_std_17)
     target_link_libraries(salt-flang-plugin PUBLIC SALT_FLANG_FRONTEND)
-    target_link_options(salt-flang-plugin PUBLIC -Wl,--as-needed -Wl,--no-allow-shlib-undefined -Wl,--no-undefined)
+    target_link_options(salt-flang-plugin PUBLIC -Wl,--as-needed -Wl,-undefined -Wl,dynamic_lookup)
 
     install(TARGETS salt-flang-plugin DESTINATION lib)
 

From 539d74954207daca8d64d91f676447eecd1abc78 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Thu, 5 Dec 2024 17:49:16 -0500
Subject: [PATCH 013/135] Make injected TAU procedure names uppercase again

Fortran is case agnostic, but the tests for the C/C++ instrumentor
check for uppercase injected instrumentation
---
 config_files/tau_fortran_config.yaml | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/config_files/tau_fortran_config.yaml b/config_files/tau_fortran_config.yaml
index 56aa6bc..b26122d 100644
--- a/config_files/tau_fortran_config.yaml
+++ b/config_files/tau_fortran_config.yaml
@@ -5,20 +5,20 @@ instrumentation: tauFortran
 
 program_insert:
   - "      integer, save :: tauProfileTimer(2) = [0, 0]"
-  - "      call tau_profile_init()"
-  - "      call tau_profile_timer(tauProfileTimer, &"
+  - "      call TAU_PROFILE_INIT()"
+  - "      call TAU_PROFILE_TIMER(tauProfileTimer, &"
   - "        \"${full_timer_name}\")"
-  - "      call tau_profile_start(tauProfileTimer)"
+  - "      call TAU_PROFILE_START(tauProfileTimer)"
   - "#ifndef TAU_MPI"
-  - "      call  tau_profile_set_node(0);"
+  - "      call  TAU_PROFILE_SET_NODE(0);"
   - "#endif ! TAU_MPI"
 
 
 procedure_begin_insert:
   - "      integer, save :: tauProfileTimer(2) = [0, 0]"
-  - "      call tau_profile_timer(tauProfileTimer, &"
+  - "      call TAU_PROFILE_TIMER(tauProfileTimer, &"
   - "        \"${full_timer_name}\")"
-  - "      call tau_profile_start(tauProfileTimer)"
+  - "      call TAU_PROFILE_START(tauProfileTimer)"
 
 procedure_end_insert:
-  - "      call tau_profile_stop(tauProfileTimer)"
+  - "      call TAU_PROFILE_STOP(tauProfileTimer)"

From 619130654a760d72b33cb57f0e0c23f4c586e755 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Thu, 5 Dec 2024 17:50:38 -0500
Subject: [PATCH 014/135] CMake: Fix a typo & update testing to find TAU in
 multiple places

---
 CMakeLists.txt | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2768679..7d56c1b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,7 +4,7 @@
 
 cmake_minimum_required(VERSION 3.13.1)
 # Ensure policies are set as they have been tested
-cmake_policy(VERSION 3.13.1...3.23.2)
+cmake_policy(VERSION 3.13.1...3.31.2)
 if(POLICY CMP0144)
   cmake_policy(SET CMP0144 NEW)
 endif()
@@ -383,7 +383,7 @@ function(add_instrumentor_test test_src)
   set_tests_properties(${TEST_NAME}_exists
     PROPERTIES
     DEPENDS ${TEST_NAME}
-    PASS_REGULAR_EXPERSSION "TAU_"
+    PASS_REGULAR_EXPRESSION "TAU_"
   )
 endfunction()
 
@@ -404,17 +404,26 @@ endforeach()
 
 # Check if TAU_ROOT is set as an environment variable and if not set it as a CMake cache variable to /usr/local
 # otherwise, use the value from the environment
-if(NOT DEFINED $ENV{TAU_ROOT})
-  set(TAU_ROOT "/usr/local" CACHE PATH "TAU Root Directory")
-else()
+if(NOT DEFINED ENV{TAU_ROOT})
+  find_program(TAU_EXEC tau_exec
+    PATH_SUFFIXES x86_64 x86_64/bin craycnl craycnl/bin apple apple/bin
+  )
+  if(NOT TAU_EXEC)
+    message(FATAL_ERROR "TAU not found. Please set TAU_ROOT to the TAU installation directory.")
+  else()
+    get_filename_component(TAU_ROOT ${TAU_EXEC} DIRECTORY) # This will be a bin directory
+    get_filename_component(TAU_ROOT ${TAU_ROOT} DIRECTORY) # This might be an arch directory
+    string(REGEX REPLACE "(/x86_64$)|(/apple$)|(/craycnl$)" "" TAU_ROOT ${TAU_ROOT})
   set(TAU_ROOT $ENV{TAU_ROOT} CACHE PATH "TAU Root Directory")
 endif()
 
-find_file(TAU_CLANG_MAKEFILE Makefile.tau-clang-pthread
-PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/lib
+find_file(TAU_CLANG_MAKEFILE 
+  NAMES Makefile.tau-clang-pthread
+  PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/lib
 )
-find_file(TAU_GCC_MAKEFILE Makefile.tau-pthread
-PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/lib
+find_file(TAU_GCC_MAKEFILE
+  NAMES Makefile.tau-pthread Makefile.tau-pthread-pdt
+  PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/lib
 )
 find_program(TAUCC tau_cc.sh
 PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/bin
@@ -431,11 +440,11 @@ set(TAU_HEADER_LOCATIONS
 )
 set(TAU_CLANG_HEADER_LOCATIONS
   -I${TAU_ROOT}/x86_64/libdwarf-clang/include
-  -I${TAU_ROOT}/x86_64/libunwind-1.3.1-clang/include
+  -I${TAU_ROOT}/x86_64/libunwind-1.6.2-clang/include
 )
 set(TAU_GCC_HEADER_LOCATIONS
-  -I${TAU_ROOT}/x86_64/libdwarf-clang/include
-  -I${TAU_ROOT}/x86_64/libunwind-1.3.1-clang/include
+  -I${TAU_ROOT}/x86_64/libdwarf-gcc/include
+  -I${TAU_ROOT}/x86_64/libunwind-1.6.2-gcc/include
 )
 # End of section that note applies to
 ################

From 8a449bb8432156f5109db28d9138931668107cb5 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Thu, 5 Dec 2024 14:58:23 -0800
Subject: [PATCH 015/135] Replace example plugin with file for instrumentor
 plugin

---
 CMakeLists.txt                                |  2 +-
 ...s.cpp => salt_instrument_flang_plugin.cpp} | 34 ++++++++-----------
 2 files changed, 16 insertions(+), 20 deletions(-)
 rename src/{PrintFlangFunctionNames.cpp => salt_instrument_flang_plugin.cpp} (62%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2768679..da4ab81 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -282,7 +282,7 @@ if(MLIR_FOUND AND Flang_FOUND)
     list(TRANSFORM SALT_FLANG_PLUGIN_HEADER_FILES PREPEND "${CMAKE_SOURCE_DIR}/include/")
 
     set(SALT_FLANG_PLUGIN_SRCS
-    PrintFlangFunctionNames.cpp 
+    salt_instrument_flang_plugin.cpp
     )
     list(TRANSFORM SALT_FLANG_PLUGIN_SRCS PREPEND "${CMAKE_SOURCE_DIR}/src/")
 
diff --git a/src/PrintFlangFunctionNames.cpp b/src/salt_instrument_flang_plugin.cpp
similarity index 62%
rename from src/PrintFlangFunctionNames.cpp
rename to src/salt_instrument_flang_plugin.cpp
index 4a84c3b..7ed29f5 100644
--- a/src/PrintFlangFunctionNames.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -1,22 +1,18 @@
-//===-- PrintFlangFunctionNames.cpp ---------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Small example Flang plugin to count/print Functions & Subroutines names.
-// It walks the Parse Tree using a Visitor struct that has Post functions for
-// FunctionStmt and SubroutineStmt to access the names of functions &
-// subroutines. It also has Pre functions for FunctionSubprogram and
-// SubroutineSubprogram so a Bool can be set to show that it is the definition
-// of a function/subroutine, and not print those that are in an Interface.
-// This plugin does not recognise Statement Functions or Module Procedures,
-// which could be dealt with through StmtFunctionStmt and MpSubprogramStmt nodes
-// respectively.
-//
-//===----------------------------------------------------------------------===//
+/*
+Copyright (C) 2024, ParaTools, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
 
 #include "flang/Frontend/FrontendActions.h"
 #include "flang/Frontend/FrontendPluginRegistry.h"

From 04c22515a26c47a25f7b0b3d19755101b92ae8ca Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Thu, 5 Dec 2024 15:01:45 -0800
Subject: [PATCH 016/135] Add missing endif() that was causing CMake error

---
 CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ffd6523..dd4243e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -417,6 +417,7 @@ if(NOT DEFINED ENV{TAU_ROOT})
     get_filename_component(TAU_ROOT ${TAU_ROOT} DIRECTORY) # This might be an arch directory
     string(REGEX REPLACE "(/x86_64$)|(/apple$)|(/craycnl$)" "" TAU_ROOT ${TAU_ROOT})
   set(TAU_ROOT $ENV{TAU_ROOT} CACHE PATH "TAU Root Directory")
+  endif()
 endif()
 
 find_file(TAU_CLANG_MAKEFILE 

From c412ec07bdd535256a148a2b29c6dbd32c822e3b Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Thu, 5 Dec 2024 15:05:30 -0800
Subject: [PATCH 017/135] Fix CMake syntax error

---
 CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index dd4243e..df8915c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -416,8 +416,9 @@ if(NOT DEFINED ENV{TAU_ROOT})
     get_filename_component(TAU_ROOT ${TAU_EXEC} DIRECTORY) # This will be a bin directory
     get_filename_component(TAU_ROOT ${TAU_ROOT} DIRECTORY) # This might be an arch directory
     string(REGEX REPLACE "(/x86_64$)|(/apple$)|(/craycnl$)" "" TAU_ROOT ${TAU_ROOT})
-  set(TAU_ROOT $ENV{TAU_ROOT} CACHE PATH "TAU Root Directory")
   endif()
+  else()
+    set(TAU_ROOT $ENV{TAU_ROOT} CACHE PATH "TAU Root Directory")
 endif()
 
 find_file(TAU_CLANG_MAKEFILE 

From 415d4e8ea0f7a2cab6d26a1aac8652b261842fce Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Thu, 5 Dec 2024 15:08:44 -0800
Subject: [PATCH 018/135] Start reorganizing plugin

---
 src/salt_instrument_flang_plugin.cpp | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 7ed29f5..5e64a83 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -21,10 +21,10 @@ limitations under the License.
 
 using namespace Fortran::frontend;
 
-class PrintFunctionNamesAction : public PluginParseTreeAction {
+class SaltInstrumentAction : public PluginParseTreeAction {
 
   // Visitor struct that defines Pre/Post functions for different types of nodes
-  struct ParseTreeVisitor {
+  struct SaltInstrumentParseTreeVisitor {
     template <typename A> bool Pre(const A &) { return true; }
     template <typename A> void Post(const A &) {}
 
@@ -36,7 +36,6 @@ class PrintFunctionNamesAction : public PluginParseTreeAction {
       if (isInSubprogram_) {
         llvm::outs() << "Function:\t"
                      << std::get<Fortran::parser::Name>(f.t).ToString() << "\n";
-        fcounter++;
         isInSubprogram_ = false;
       }
     }
@@ -49,26 +48,23 @@ class PrintFunctionNamesAction : public PluginParseTreeAction {
       if (isInSubprogram_) {
         llvm::outs() << "Subroutine:\t"
                      << std::get<Fortran::parser::Name>(s.t).ToString() << "\n";
-        scounter++;
         isInSubprogram_ = false;
       }
     }
 
-    int fcounter{0};
-    int scounter{0};
-
   private:
     bool isInSubprogram_{false};
   };
 
   void executeAction() override {
-    ParseTreeVisitor visitor;
+    llvm::outs() << "==== SALT Instrumentor Plugin starting ====\n";
+
+    SaltInstrumentParseTreeVisitor visitor;
     Fortran::parser::Walk(getParsing().parseTree(), visitor);
 
-    llvm::outs() << "\n====   Functions: " << visitor.fcounter << " ====\n";
-    llvm::outs() << "==== Subroutines: " << visitor.scounter << " ====\n";
+    llvm::outs() << "==== SALT Instrumentor Plugin finished ====\n";
   }
 };
 
-static FrontendPluginRegistry::Add<PrintFunctionNamesAction> X(
-    "print-fns", "Print Function names");
+static FrontendPluginRegistry::Add<SaltInstrumentAction> X(
+    "salt-instrument", "Apply SALT Instrumentation");

From 205ace617cfedc2b4a68b7683e71e72dc755a344 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Thu, 5 Dec 2024 15:29:47 -0800
Subject: [PATCH 019/135] Add ProgramStmt handler, print Program name

---
 src/salt_instrument_flang_plugin.cpp | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 5e64a83..3bfe56f 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -14,6 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
+/* SALT-FM Flang Fortran Instrumentor Plugin */
+
 #include "flang/Frontend/FrontendActions.h"
 #include "flang/Frontend/FrontendPluginRegistry.h"
 #include "flang/Parser/dump-parse-tree.h"
@@ -21,17 +23,36 @@ limitations under the License.
 
 using namespace Fortran::frontend;
 
+/**
+ * The main action of the Salt instrumentor.
+ * Visits each node in the parse tree.
+ */
 class SaltInstrumentAction : public PluginParseTreeAction {
-
-  // Visitor struct that defines Pre/Post functions for different types of nodes
   struct SaltInstrumentParseTreeVisitor {
+    // Default empty visit functions for otherwise unhandled types.
     template <typename A> bool Pre(const A &) { return true; }
     template <typename A> void Post(const A &) {}
 
+    // Override all types that we want to visit.
+    // Pre occurs when first visiting a node.
+    // Post occurs when returning from the node's children.
+    // See https://flang.llvm.org/docs/Parsing.html for information on the parse tree.
+    // There are three types of parse tree nodes:
+    // Wrappers, with a single data member, always named `v`.
+    // Tuples, encapsulating multiple values in a data member named `t` of type std::tuple.
+    // Discriminated unions, one of several types stored in data member named `u` of type std::variant.
+    // Use std::get() to retrieve value from `t` or `u`
+
     bool Pre(const Fortran::parser::FunctionSubprogram &) {
       isInSubprogram_ = true;
       return true;
     }
+
+    void Post(const Fortran::parser::ProgramStmt & program) {
+      llvm::outs() << "Program: \t"
+                   <<  program.v.ToString() << "\n";
+    }
+
     void Post(const Fortran::parser::FunctionStmt &f) {
       if (isInSubprogram_) {
         llvm::outs() << "Function:\t"

From 9f8ea63b1fe963b8836a512a85782b8ca7fa45b2 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Thu, 5 Dec 2024 16:48:17 -0800
Subject: [PATCH 020/135] Add a few more comments

---
 src/salt_instrument_flang_plugin.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 3bfe56f..623b37c 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -16,6 +16,9 @@ limitations under the License.
 
 /* SALT-FM Flang Fortran Instrumentor Plugin */
 
+// See https://flang.llvm.org/docs/FlangDriver.html#frontend-driver-plugins
+// for documentation of the Flang frontend plugin interface
+
 #include "flang/Frontend/FrontendActions.h"
 #include "flang/Frontend/FrontendPluginRegistry.h"
 #include "flang/Parser/dump-parse-tree.h"
@@ -37,6 +40,7 @@ class SaltInstrumentAction : public PluginParseTreeAction {
     // Pre occurs when first visiting a node.
     // Post occurs when returning from the node's children.
     // See https://flang.llvm.org/docs/Parsing.html for information on the parse tree.
+    // Parse tree types are defined in: include/flang/Parser/parse-tree.h
     // There are three types of parse tree nodes:
     // Wrappers, with a single data member, always named `v`.
     // Tuples, encapsulating multiple values in a data member named `t` of type std::tuple.

From ce1c1258f94af1c70531aa9c7366acc982e4a3e6 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Thu, 5 Dec 2024 16:03:19 -0800
Subject: [PATCH 021/135] Make logic for finding & using TAU a bit more robust

It still needs a rework/refactor though...
Writing a CMake find module for TAU would be nice to do someday.
---
 CMakeLists.txt | 77 +++++++++++++++++++++++++++-----------------------
 1 file changed, 41 insertions(+), 36 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index df8915c..855ddc8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -301,6 +301,45 @@ else()
     message(STATUS "Flang not found -- skipping Flang frontend plugin")
 endif()
 
+# Check if TAU_ROOT is set as an environment variable and if not set it as a CMake cache variable to /usr/local
+# otherwise, use the value from the environment
+if(NOT DEFINED ENV{TAU_ROOT})
+  find_program(TAU_EXEC tau_exec
+    PATH_SUFFIXES x86_64 x86_64/bin craycnl craycnl/bin apple apple/bin
+  )
+  if(NOT TAU_EXEC)
+    message(FATAL_ERROR "TAU not found. Please set TAU_ROOT to the TAU installation directory.")
+  else()
+    get_filename_component(TAU_ROOT ${TAU_EXEC} DIRECTORY) # This will be a bin directory
+    get_filename_component(TAU_ROOT ${TAU_ROOT} DIRECTORY) # This might be an arch directory
+    string(REGEX REPLACE "(/x86_64$)|(/apple$)|(/craycnl$)" "" TAU_ROOT ${TAU_ROOT})
+  endif()
+  else()
+    set(TAU_ROOT $ENV{TAU_ROOT} CACHE PATH "TAU Root Directory")
+endif()
+
+find_file(TAU_CLANG_MAKEFILE
+  NAMES Makefile.tau-clang-pthread
+  PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/lib
+  REQUIRED
+)
+find_file(TAU_GCC_MAKEFILE
+  NAMES Makefile.tau-pthread Makefile.tau-pthread-pdt
+  PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/lib
+  REQUIRED
+)
+find_program(TAUCC tau_cc.sh
+  PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/bin
+  REQUIRED
+)
+find_program(TAUCXX tau_cxx.sh
+  PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/bin
+  REQUIRED
+)
+find_program(TAU_EXEC tau_exec
+  PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/bin
+  REQUIRED
+)
 
 
 
@@ -404,40 +443,6 @@ endforeach()
 # and -pthread -bfd=download -unwind=download -libdwarf=download 
 # -otf=download
 
-# Check if TAU_ROOT is set as an environment variable and if not set it as a CMake cache variable to /usr/local
-# otherwise, use the value from the environment
-if(NOT DEFINED ENV{TAU_ROOT})
-  find_program(TAU_EXEC tau_exec
-    PATH_SUFFIXES x86_64 x86_64/bin craycnl craycnl/bin apple apple/bin
-  )
-  if(NOT TAU_EXEC)
-    message(FATAL_ERROR "TAU not found. Please set TAU_ROOT to the TAU installation directory.")
-  else()
-    get_filename_component(TAU_ROOT ${TAU_EXEC} DIRECTORY) # This will be a bin directory
-    get_filename_component(TAU_ROOT ${TAU_ROOT} DIRECTORY) # This might be an arch directory
-    string(REGEX REPLACE "(/x86_64$)|(/apple$)|(/craycnl$)" "" TAU_ROOT ${TAU_ROOT})
-  endif()
-  else()
-    set(TAU_ROOT $ENV{TAU_ROOT} CACHE PATH "TAU Root Directory")
-endif()
-
-find_file(TAU_CLANG_MAKEFILE 
-  NAMES Makefile.tau-clang-pthread
-  PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/lib
-)
-find_file(TAU_GCC_MAKEFILE
-  NAMES Makefile.tau-pthread Makefile.tau-pthread-pdt
-  PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/lib
-)
-find_program(TAUCC tau_cc.sh
-PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/bin
-)
-find_program(TAUCXX tau_cxx.sh
-PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/bin
-)
-find_program(TAU_EXEC tau_exec
-PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/bin
-)
 
 set(TAU_HEADER_LOCATIONS
   -I${TAU_ROOT}/include
@@ -516,9 +521,9 @@ function(compile_instrumented test_src)
   endif()
 
   if(${TEST_LANG} STREQUAL "c")
-    set(TAUC tau_cc.sh)
+    set(TAUC ${TAUCXX})
   elseif(${TEST_LANG} STREQUAL "cpp")
-    set(TAUC tau_cxx.sh)
+    set(TAUC ${TAUCC})
   else()
     message( FATAL_ERROR "Unknown test source file extension: ${TEST_LANG}")
   endif()

From 876eb1f5cdeef857d6f89caf626508899707ddfd Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Fri, 6 Dec 2024 09:44:13 -0500
Subject: [PATCH 022/135] Update environment script for new TAU installed on UO
 machines

---
 activate-salt-fm-env.sh | 7 ++++++-
 spack.yaml              | 3 +--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/activate-salt-fm-env.sh b/activate-salt-fm-env.sh
index bde6fc9..83bdac3 100644
--- a/activate-salt-fm-env.sh
+++ b/activate-salt-fm-env.sh
@@ -1,3 +1,9 @@
+#!/usr/bin/env bash
+export SALT_ROOT=/storage/packages/salt-fm
+export PATH="$SALT_ROOT/base/tools/bin:$PATH"
+export PATH="$SALT_ROOT/opt/tau/x86_64/bin:$PATH"
+module use /storage/packages/salt-fm/spack/share/spack/modules/linux-rhel8-x86_64
+module use /storage/packages/salt-fm/spack/share/spack/modules/linux-centos7-x86_64/
 echo "purging loaded modules"
 module purge
 echo "listing loaded modules:"
@@ -7,7 +13,6 @@ echo "loading llvm and gcc:"
 module load llvm/git.086d8e6bb5daf8de43880ba90258c49e0fabf2c9_19.1.4-zpacv56
 #module load mpich/4.2.3-ugxzfxf
 module load gcc/14.2.0-ttkqi3s
-#module load tau/master-l3jx42k
 echo "listing loaded modules:"
 module list
 echo "Finished"
diff --git a/spack.yaml b/spack.yaml
index 34bbd2a..b2f00ef 100644
--- a/spack.yaml
+++ b/spack.yaml
@@ -58,6 +58,5 @@ spack:
 
   specs:
   - llvm@git.086d8e6bb5daf8de43880ba90258c49e0fabf2c9=19.1.4 +flang +mlir openmp=project
-  #  -   tau@master%gcc@14.2.0~adios2+binutils~comm~craycnl~cuda~disable-no-pie~dyninst+elf+fortran~gasnet+io~level_zero+libdwarf+libunwind+mpi~ompt~opari~opencl+openmp+otf2+papi+pdt~phase~ppc64le~profileparam+pthreads~python~rocm~rocprofiler~rocprofv2~roctracer~scorep~shmem~sqlite~syscall~x86_64 build_system=generic arch=linux-rhel8-x86_64
-  - tau@master +mpi +openmp cflags=-Wno-error=implicit-function-declaration
   - mpich ~wrapperrpath
+  
\ No newline at end of file

From e11ca4a229179d09d49f4ae0db700dd8c2e7477a Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Fri, 6 Dec 2024 06:52:32 -0800
Subject: [PATCH 023/135] Update instructions in build&test script &
 environment setup script

---
 activate-salt-fm-env.sh |  4 ++++
 build_and_test.sh       | 13 +++++++++++++
 2 files changed, 17 insertions(+)
 create mode 100644 build_and_test.sh

diff --git a/activate-salt-fm-env.sh b/activate-salt-fm-env.sh
index 83bdac3..c4a3774 100644
--- a/activate-salt-fm-env.sh
+++ b/activate-salt-fm-env.sh
@@ -1,4 +1,8 @@
 #!/usr/bin/env bash
+# Sourec this file on UO machines to setup your SALT-FM development environment
+# `source activate-salt-fm-env.sh`
+# After this you can quickly configure, build, and test using `./build_and_test.sh`
+
 export SALT_ROOT=/storage/packages/salt-fm
 export PATH="$SALT_ROOT/base/tools/bin:$PATH"
 export PATH="$SALT_ROOT/opt/tau/x86_64/bin:$PATH"
diff --git a/build_and_test.sh b/build_and_test.sh
new file mode 100644
index 0000000..eccc51d
--- /dev/null
+++ b/build_and_test.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+# Quick script to configure, build and test SALT-FM
+# On Gilgamesh or other UO systems, you can load the SALT-FM stack/environment with:
+# `source activate-salt-fm-env.sh`
+
+set -o errexit
+set -o nounset
+set -o pipefile
+set -o verbose
+
+cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -Wdev -Wdeprecated -G Ninja -S . -B build
+cmake --build build --parallel 8 --verbose || cmake --build build --verbose
+( cd build && ctest --output-on-failure )

From 38434c19305b4a6b129e5f2f91b2c44370a1251f Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Fri, 6 Dec 2024 06:59:37 -0800
Subject: [PATCH 024/135] Fix typo and permissions of build_and_test.sh

---
 build_and_test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 mode change 100644 => 100755 build_and_test.sh

diff --git a/build_and_test.sh b/build_and_test.sh
old mode 100644
new mode 100755
index eccc51d..fbdffd0
--- a/build_and_test.sh
+++ b/build_and_test.sh
@@ -5,7 +5,7 @@
 
 set -o errexit
 set -o nounset
-set -o pipefile
+set -o pipefail
 set -o verbose
 
 cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -Wdev -Wdeprecated -G Ninja -S . -B build

From cf7635733bf41e5d0fb2db12a0d52fa141444934 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Fri, 6 Dec 2024 12:23:47 -0800
Subject: [PATCH 025/135] Experimenting with getting source position of parse
 tree nodes

---
 src/salt_instrument_flang_plugin.cpp | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 623b37c..19d99f2 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -32,6 +32,15 @@ using namespace Fortran::frontend;
  */
 class SaltInstrumentAction : public PluginParseTreeAction {
   struct SaltInstrumentParseTreeVisitor {
+
+    explicit SaltInstrumentParseTreeVisitor(Fortran::parser::Parsing *parsing)
+      : parsing(parsing) {
+    }
+
+    // Pass in the parser object from the Action to the Visitor
+    // so that we can use it while processing parse tree nodes.
+    Fortran::parser::Parsing *parsing{nullptr};
+
     // Default empty visit functions for otherwise unhandled types.
     template <typename A> bool Pre(const A &) { return true; }
     template <typename A> void Post(const A &) {}
@@ -52,9 +61,15 @@ class SaltInstrumentAction : public PluginParseTreeAction {
       return true;
     }
 
+    // See https://github.com/llvm/llvm-project/blob/main/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
+    // for examples of getting source position for a parse tree node
     void Post(const Fortran::parser::ProgramStmt & program) {
+      const auto & pos = parsing->allCooked().GetSourcePositionRange(program.v.source);
       llvm::outs() << "Program: \t"
-                   <<  program.v.ToString() << "\n";
+                   <<  program.v.ToString()
+                   << "\t (" << pos->first.line << ", " << pos->first.column << ")"
+                   << "\t (" << pos->second.line << ", " << pos->second.column << ")"
+                   << "\n";
     }
 
     void Post(const Fortran::parser::FunctionStmt &f) {
@@ -84,8 +99,9 @@ class SaltInstrumentAction : public PluginParseTreeAction {
   void executeAction() override {
     llvm::outs() << "==== SALT Instrumentor Plugin starting ====\n";
 
-    SaltInstrumentParseTreeVisitor visitor;
-    Fortran::parser::Walk(getParsing().parseTree(), visitor);
+    Fortran::parser::Parsing & parsing = getParsing();
+    SaltInstrumentParseTreeVisitor visitor{&parsing};
+    Fortran::parser::Walk(parsing.parseTree(), visitor);
 
     llvm::outs() << "==== SALT Instrumentor Plugin finished ====\n";
   }

From 81a83083f44bf99475f46b4162b0f186033e97d4 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Fri, 6 Dec 2024 13:12:57 -0800
Subject: [PATCH 026/135] Fix typo in comment in CMakeLists.txt

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 855ddc8..3b4f735 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -256,7 +256,7 @@ endif()
 if(MLIR_FOUND AND Flang_FOUND)
     message(STATUS "Found Flang -- will build Flang frontend plugin")
                 
-# Variables set i1n FlangConfig.cmake
+# Variables set in FlangConfig.cmake
     message(STATUS "FLANG_CMAKE_DIR: ${FLANG_CMAKE_DIR}")
     message(STATUS "FLANG_EXPORTED_TARGETS: ${FLANG_EXPORTED_TARGETS}")
     message(STATUS "FLANG_INCLUDE_DIRS: ${FLANG_INCLUDE_DIRS}")

From 83eba75d030476d40fd4b32632700d2df78b9555 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Fri, 6 Dec 2024 13:13:33 -0800
Subject: [PATCH 027/135] Add simple Fortran examples from TAU to tests/fortran

---
 tests/fortran/cubes.f           | 15 +++++++++++++++
 tests/fortran/cubes.hand-inst.f | 22 ++++++++++++++++++++++
 tests/{ => fortran}/hello.f90   |  0
 tests/fortran/loop_test.f90     | 33 +++++++++++++++++++++++++++++++++
 4 files changed, 70 insertions(+)
 create mode 100644 tests/fortran/cubes.f
 create mode 100644 tests/fortran/cubes.hand-inst.f
 rename tests/{ => fortran}/hello.f90 (100%)
 create mode 100644 tests/fortran/loop_test.f90

diff --git a/tests/fortran/cubes.f b/tests/fortran/cubes.f
new file mode 100644
index 0000000..55226db
--- /dev/null
+++ b/tests/fortran/cubes.f
@@ -0,0 +1,15 @@
+cc34567 Cubes program
+      PROGRAM SUM_OF_CUBES 
+      INTEGER :: H, T, U 
+      ! This program prints all 3-digit numbers that 
+      ! equal the sum of the cubes of their digits. 
+      DO H = 1, 9 
+        DO T = 0, 9 
+          DO U = 0, 9 
+          IF (100*H + 10*T + U == H**3 + T**3 + U**3) THEN
+             PRINT "(3I1)", H, T, U 
+	  ENDIF
+          END DO 
+        END DO 
+      END DO 
+      END PROGRAM SUM_OF_CUBES
diff --git a/tests/fortran/cubes.hand-inst.f b/tests/fortran/cubes.hand-inst.f
new file mode 100644
index 0000000..e553c7a
--- /dev/null
+++ b/tests/fortran/cubes.hand-inst.f
@@ -0,0 +1,22 @@
+cc34567 Cubes program
+      PROGRAM SUM_OF_CUBES 
+       integer profiler(2) / 0, 0 /
+	save profiler
+      INTEGER :: H, T, U 
+        call TAU_PROFILE_INIT()
+        call TAU_PROFILE_TIMER(profiler, 'PROGRAM SUM_OF_CUBES')
+        call TAU_PROFILE_START(profiler)
+        call TAU_PROFILE_SET_NODE(0)
+      ! This program prints all 3-digit numbers that 
+      ! equal the sum of the cubes of their digits. 
+      DO H = 1, 9 
+        DO T = 0, 9 
+          DO U = 0, 9 
+          IF (100*H + 10*T + U == H**3 + T**3 + U**3) THEN
+             PRINT "(3I1)", H, T, U 
+	  ENDIF
+          END DO 
+        END DO 
+      END DO 
+      call TAU_PROFILE_STOP(profiler)
+      END PROGRAM SUM_OF_CUBES
diff --git a/tests/hello.f90 b/tests/fortran/hello.f90
similarity index 100%
rename from tests/hello.f90
rename to tests/fortran/hello.f90
diff --git a/tests/fortran/loop_test.f90 b/tests/fortran/loop_test.f90
new file mode 100644
index 0000000..647e103
--- /dev/null
+++ b/tests/fortran/loop_test.f90
@@ -0,0 +1,33 @@
+subroutine bar(arg)
+  integer arg
+
+  print *, "inside bar arg = ", arg
+end subroutine bar
+
+subroutine foo(iVal)
+  integer iVal
+  integer j, k
+! Do something here...
+  print *, "Iteration = ", iVal
+        do j = 1, 5
+          do k = 1, 2
+            print *, "j = ", j
+          end do
+        end do
+
+        do 10, i = 1, 3
+        call bar(i+iVal)
+10      continue
+        print *, "after calling bar in foo"
+      end
+
+program main
+  integer i
+
+  print *, "test program"
+
+  do 10, i = 1, 3
+    call foo(i)
+10  continue
+end program main
+

From e26ad59b0e9fe072fecaaa32d917944259ed7bf4 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Fri, 6 Dec 2024 15:31:05 -0800
Subject: [PATCH 028/135] Find main program

---
 src/salt_instrument_flang_plugin.cpp | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 19d99f2..1c33b8e 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -46,9 +46,11 @@ class SaltInstrumentAction : public PluginParseTreeAction {
     template <typename A> void Post(const A &) {}
 
     // Override all types that we want to visit.
+
     // Pre occurs when first visiting a node.
     // Post occurs when returning from the node's children.
     // See https://flang.llvm.org/docs/Parsing.html for information on the parse tree.
+
     // Parse tree types are defined in: include/flang/Parser/parse-tree.h
     // There are three types of parse tree nodes:
     // Wrappers, with a single data member, always named `v`.
@@ -56,17 +58,30 @@ class SaltInstrumentAction : public PluginParseTreeAction {
     // Discriminated unions, one of several types stored in data member named `u` of type std::variant.
     // Use std::get() to retrieve value from `t` or `u`
 
+    // See https://github.com/llvm/llvm-project/blob/main/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
+    // for examples of getting source position for a parse tree node
+
+    bool Pre(const Fortran::parser::MainProgram &) {
+      llvm::outs() << "Entering main program\n";
+      isInMainProgram_ = true;
+      return true;
+    }
+
+    void Post(const Fortran::parser::MainProgram & ) {
+      llvm::outs() << "Exiting main program: " << mainProgramName_ << "\n";
+      isInMainProgram_ = false;
+    }
+
     bool Pre(const Fortran::parser::FunctionSubprogram &) {
       isInSubprogram_ = true;
       return true;
     }
 
-    // See https://github.com/llvm/llvm-project/blob/main/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
-    // for examples of getting source position for a parse tree node
     void Post(const Fortran::parser::ProgramStmt & program) {
+      mainProgramName_ = program.v.ToString();
       const auto & pos = parsing->allCooked().GetSourcePositionRange(program.v.source);
       llvm::outs() << "Program: \t"
-                   <<  program.v.ToString()
+                   <<  mainProgramName_
                    << "\t (" << pos->first.line << ", " << pos->first.column << ")"
                    << "\t (" << pos->second.line << ", " << pos->second.column << ")"
                    << "\n";
@@ -94,6 +109,8 @@ class SaltInstrumentAction : public PluginParseTreeAction {
 
   private:
     bool isInSubprogram_{false};
+    bool isInMainProgram_{false};
+    std::string mainProgramName_;
   };
 
   void executeAction() override {

From 0a5a597503bf6db577547420f33f3b863a758f39 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Sun, 8 Dec 2024 12:05:52 -0500
Subject: [PATCH 029/135] Build flang/FM components & upgrade CI to LLVM 19 &
 latest salt-dev

---
 .github/workflows/CI.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
index bd35f68..346ba57 100644
--- a/.github/workflows/CI.yaml
+++ b/.github/workflows/CI.yaml
@@ -4,7 +4,7 @@ jobs:
   build:
     runs-on: ubuntu-latest
     container:
-      image: paratools/salt-dev:1.1
+      image: paratools/salt-dev:1.3
     steps:
       - uses: actions/checkout@v3
         with:

From 4bfed0da12827c31fc0918216ac900654a22c50f Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Sun, 8 Dec 2024 14:58:21 -0500
Subject: [PATCH 030/135] See if we can bypass checking SSL cert on git
 checkout

---
 .github/workflows/CI.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
index 346ba57..64d1d1f 100644
--- a/.github/workflows/CI.yaml
+++ b/.github/workflows/CI.yaml
@@ -1,5 +1,7 @@
 name: CI
 on: push
+env: 
+  GIT_SSL_NO_VERIFY: true
 jobs:
   build:
     runs-on: ubuntu-latest

From 8f55e15e7d2fb7b930c15513cefc88ec3177337f Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Mon, 9 Dec 2024 09:04:38 -0800
Subject: [PATCH 031/135] Update to use TAU module provided by Luke

---
 activate-salt-fm-env.sh | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/activate-salt-fm-env.sh b/activate-salt-fm-env.sh
index c4a3774..9759dbd 100644
--- a/activate-salt-fm-env.sh
+++ b/activate-salt-fm-env.sh
@@ -4,8 +4,8 @@
 # After this you can quickly configure, build, and test using `./build_and_test.sh`
 
 export SALT_ROOT=/storage/packages/salt-fm
-export PATH="$SALT_ROOT/base/tools/bin:$PATH"
-export PATH="$SALT_ROOT/opt/tau/x86_64/bin:$PATH"
+# export PATH="$SALT_ROOT/base/tools/bin:$PATH"
+module use /packages/salt-fm/modules
 module use /storage/packages/salt-fm/spack/share/spack/modules/linux-rhel8-x86_64
 module use /storage/packages/salt-fm/spack/share/spack/modules/linux-centos7-x86_64/
 echo "purging loaded modules"
@@ -17,6 +17,10 @@ echo "loading llvm and gcc:"
 module load llvm/git.086d8e6bb5daf8de43880ba90258c49e0fabf2c9_19.1.4-zpacv56
 #module load mpich/4.2.3-ugxzfxf
 module load gcc/14.2.0-ttkqi3s
+echo "loading ninja"
+module load ninja
+echo "loading tau"
+module load tau/2.34
 echo "listing loaded modules:"
 module list
 echo "Finished"

From a63963169ff8bc3a4425236fc341f89bccb14cc7 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Mon, 9 Dec 2024 12:14:52 -0500
Subject: [PATCH 032/135] Fixed swapped TAU CXX and C compilers in CMake tests

---
 CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3b4f735..b3a6bd5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -521,9 +521,9 @@ function(compile_instrumented test_src)
   endif()
 
   if(${TEST_LANG} STREQUAL "c")
-    set(TAUC ${TAUCXX})
-  elseif(${TEST_LANG} STREQUAL "cpp")
     set(TAUC ${TAUCC})
+  elseif(${TEST_LANG} STREQUAL "cpp")
+    set(TAUC ${TAUCXX})
   else()
     message( FATAL_ERROR "Unknown test source file extension: ${TEST_LANG}")
   endif()

From de3a4852642f59bc276186c712a1141129589de0 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Mon, 9 Dec 2024 16:54:26 -0500
Subject: [PATCH 033/135] Catch CTest errors when linking fails and compiling
 w/ clang

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b3a6bd5..0fcf904 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -554,7 +554,7 @@ function(compile_instrumented test_src)
       ENVIRONMENT TAU_MAKEFILE=${TAU_${comp}_MAKEFILE}
       FIXTURES_REQUIRED clean_${lower_comp}_${TEST_NAME}_objects
       DEPENDS ${depends_on}
-      FAIL_REGULAR_EXPRESSION "[Dd]isabling instrumentation of source code;[Ss]witching to compiler-based instrumentation;[Cc]ompiling with [Nn]on-[Ii]nstrumented [Rr]egular [Cc]ode"
+      FAIL_REGULAR_EXPRESSION "[Dd]isabling instrumentation of source code;[Ss]witching to compiler-based instrumentation;[Cc]ompiling with [Nn]on-[Ii]nstrumented [Rr]egular [Cc]ode;[Ee]rror:"
     )
     add_test(NAME rm_old_${lower_comp}_${TEST_NAME}_profiles
       COMMAND

From 53df586d53592111f33b4b999f71333455e91c6b Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Mon, 9 Dec 2024 17:07:05 -0500
Subject: [PATCH 034/135] Improve output in build_and_test.sh when tests fail

---
 build_and_test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build_and_test.sh b/build_and_test.sh
index fbdffd0..d865ea4 100755
--- a/build_and_test.sh
+++ b/build_and_test.sh
@@ -10,4 +10,4 @@ set -o verbose
 
 cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -Wdev -Wdeprecated -G Ninja -S . -B build
 cmake --build build --parallel 8 --verbose || cmake --build build --verbose
-( cd build && ctest --output-on-failure )
+( cd build && ( ctest --output-on-failure || ctest --rerun-failed --verbose ) )

From f4a74b5d7a7256ad6cdc334115f9f9d1bcbd115f Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Mon, 9 Dec 2024 17:12:49 -0500
Subject: [PATCH 035/135] Add SALT-FM logo

---
 cmake/SALT-logo.txt | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/cmake/SALT-logo.txt b/cmake/SALT-logo.txt
index d717edd..47f8cfd 100644
--- a/cmake/SALT-logo.txt
+++ b/cmake/SALT-logo.txt
@@ -1,7 +1,7 @@
 
-███████╗ █████╗ ██╗  ████████╗
-██╔════╝██╔══██╗██║  ╚══██╔══╝
-███████╗███████║██║     ██║
-╚════██║██╔══██║██║     ██║
-███████║██║  ██║███████╗██║
-╚══════╝╚═╝  ╚═╝╚══════╝╚═╝
+███████╗ █████╗ ██╗  ████████╗   ███████╗███╗   ███╗
+██╔════╝██╔══██╗██║  ╚══██╔══╝   ██╔════╝████╗ ████║
+███████╗███████║██║     ██║█████╗█████╗  ██╔████╔██║
+╚════██║██╔══██║██║     ██║╚════╝██╔══╝  ██║╚██╔╝██║
+███████║██║  ██║███████╗██║      ██║     ██║ ╚═╝ ██║
+╚══════╝╚═╝  ╚═╝╚══════╝╚═╝      ╚═╝     ╚═╝     ╚═╝

From d3c5988cb561baa449ed80a7c5819d51142d1387 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Mon, 9 Dec 2024 17:15:09 -0500
Subject: [PATCH 036/135] Update SALT-FM version & cmake greeting

---
 CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0fcf904..e47635d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -53,12 +53,12 @@ check_out_of_source_build()
 # SALT project version
 # Later we can get this from `git describe`
 set (SALT_VERSION_MAJOR 0)
-set (SALT_VERSION_MINOR 1)
+set (SALT_VERSION_MINOR 2)
 
 # Print project logo, version and tag line
 file(READ ${CMAKE_CURRENT_LIST_DIR}/cmake/SALT-logo.txt SALT_LOGO)
 string(JOIN "\n         " SALT_GREETING "${SALT_LOGO}"
-  "SALT, v${SALT_VERSION_MAJOR}.${SALT_VERSION_MINOR}: An LLVM-based Source Analysis Toolkit for HPC")
+  "SALT-FM, v${SALT_VERSION_MAJOR}.${SALT_VERSION_MINOR}: An LLVM-based Source Analysis Toolkit for HPC")
 if(NOT isMultiConfig)
   string(APPEND SALT_GREETING "\n         Build type: ${CMAKE_BUILD_TYPE}")
 endif()

From 9b45b7145c1db46335d2c7e73049ac4ed6df3df7 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 10 Dec 2024 13:54:40 -0500
Subject: [PATCH 037/135] Make locating TAU more robust

---
 CMakeLists.txt | 68 +++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 53 insertions(+), 15 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e47635d..32c6e7a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -83,7 +83,7 @@ set(CMAKE_CXX_STANDARD 17)
 #-----------------------------
 # Create the main SALT project
 #-----------------------------
-project(SALT
+project(SALT-FM
   VERSION "${SALT_VERSION_MAJOR}.${SALT_VERSION_MINOR}"
   DESCRIPTION "An LLVM-based Source Analysis Tookit for HPC"
   HOMEPAGE_URL "https://github.com/ParaToolsInc/salt"
@@ -191,7 +191,6 @@ target_compile_definitions(SALT_LLVM_TOOLING INTERFACE ${LLVM_DEFINITIONS_LIST})
 target_compile_options(SALT_LLVM_TOOLING INTERFACE ${USE_RTTI} -Wall -Werror -Wpedantic)
 target_link_libraries(SALT_LLVM_TOOLING INTERFACE ${CLANG_LIBS} ${LLVM_LIBS})
 
-
 #---------------------------------
 # List the header and source files
 #---------------------------------
@@ -301,11 +300,16 @@ else()
     message(STATUS "Flang not found -- skipping Flang frontend plugin")
 endif()
 
+#---------------------
+# Find TAU locations for testing
+#---------------------
+
 # Check if TAU_ROOT is set as an environment variable and if not set it as a CMake cache variable to /usr/local
 # otherwise, use the value from the environment
+set(TAU_PATH_SUFFIXES x86_64 x86_64/bin craycnl craycnl/bin apple apple/bin)
 if(NOT DEFINED ENV{TAU_ROOT})
   find_program(TAU_EXEC tau_exec
-    PATH_SUFFIXES x86_64 x86_64/bin craycnl craycnl/bin apple apple/bin
+    PATH_SUFFIXES ${TAU_PATH_SUFFIXES}
   )
   if(NOT TAU_EXEC)
     message(FATAL_ERROR "TAU not found. Please set TAU_ROOT to the TAU installation directory.")
@@ -318,30 +322,64 @@ if(NOT DEFINED ENV{TAU_ROOT})
     set(TAU_ROOT $ENV{TAU_ROOT} CACHE PATH "TAU Root Directory")
 endif()
 
+# Find the TAU makefiles, executables, scripts, libraries, etc.
+message(STATUS "TAU_ROOT: ${TAU_ROOT}")
+
+find_program(TAU_EXEC tau_exec
+  PATHS ${TAU_ROOT}/x86_64 ${TAU_ROOT}/apple ${TAU_ROOT}/craycnl
+  PATH_SUFFIXES bin
+  REQUIRED
+)
+get_filename_component(TAU_ARCH_DIR ${TAU_EXEC} DIRECTORY)
+get_filename_component(TAU_ARCH_DIR ${TAU_ARCH_DIR} DIRECTORY)
+message(STATUS "TAU_ARCH_DIR: ${TAU_ARCH_DIR}")
+
 find_file(TAU_CLANG_MAKEFILE
   NAMES Makefile.tau-clang-pthread
-  PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/lib
+  PATHS ${TAU_ARCH_DIR}  PATH_SUFFIXES lib
   REQUIRED
 )
 find_file(TAU_GCC_MAKEFILE
   NAMES Makefile.tau-pthread Makefile.tau-pthread-pdt
-  PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/lib
+  PATHS ${TAU_ARCH_DIR}  PATH_SUFFIXES lib
   REQUIRED
 )
 find_program(TAUCC tau_cc.sh
-  PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/bin
+  PATHS ${TAU_ARCH_DIR}  PATH_SUFFIXES bin
   REQUIRED
 )
 find_program(TAUCXX tau_cxx.sh
-  PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/bin
+  PATHS ${TAU_ARCH_DIR}  PATH_SUFFIXES bin
   REQUIRED
 )
-find_program(TAU_EXEC tau_exec
-  PATHS ${TAU_ROOT}  PATH_SUFFIXES x86_64 x86_64/bin
+file(GLOB TAU_GCC_LIBUNWIND_DIR ${TAU_ARCH_DIR}/libunwind-*-gcc)
+if(NOT TAU_GCC_LIBUNWIND_DIR)
+  message(FATAL_ERROR "libunwind not found for TAU's gcc build")
+endif()
+find_path(TAU_GCC_LIBUNWIND_INCLUDE_DIR
+  NAMES include
+  PATHS ${TAU_GCC_LIBUNWIND_DIR}
+  REQUIRED
+)
+file(GLOB TAU_CLANG_LIBUNWIND_DIR ${TAU_ARCH_DIR}/libunwind-*-clang)
+if(NOT TAU_CLANG_LIBUNWIND_DIR)
+  message(FATAL_ERROR "libunwind not found for TAU's clang build")
+endif()
+find_path(TAU_CLANG_LIBUNWIND_INCLUDE_DIR
+  NAMES include
+  PATHS ${TAU_CLANG_LIBUNWIND_DIR}
+  REQUIRED
+)
+find_path(TAU_GCC_LIBDWARF_INCLUDE_DIR
+  NAMES include
+  PATHS ${TAU_ARCH_DIR}/libdwarf-gcc
+  REQUIRED
+)
+find_path(TAU_CLANG_LIBDWARF_INCLUDE_DIR
+  NAMES include
+  PATHS ${TAU_ARCH_DIR}/libdwarf-clang
   REQUIRED
 )
-
-
 
 #---------------
 # Tests
@@ -448,12 +486,12 @@ set(TAU_HEADER_LOCATIONS
   -I${TAU_ROOT}/include
 )
 set(TAU_CLANG_HEADER_LOCATIONS
-  -I${TAU_ROOT}/x86_64/libdwarf-clang/include
-  -I${TAU_ROOT}/x86_64/libunwind-1.6.2-clang/include
+  -I${TAU_CLANG_LIBDWARF_INCLUDE_DIR}
+  -I${TAU_CLANG_LIBUNWIND_INCLUDE_DIR}
 )
 set(TAU_GCC_HEADER_LOCATIONS
-  -I${TAU_ROOT}/x86_64/libdwarf-gcc/include
-  -I${TAU_ROOT}/x86_64/libunwind-1.6.2-gcc/include
+  -I${TAU_GCC_LIBDWARF_INCLUDE_DIR}
+  -I${TAU_GCC_LIBUNWIND_INCLUDE_DIR}
 )
 # End of section that note applies to
 ################

From 30137a8b949488636af0d917c5e706837a915f81 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 10 Dec 2024 16:08:23 -0500
Subject: [PATCH 038/135] Modernize some of the fortran examples

---
 tests/fortran/cubes.f           | 28 ++++++++++++++--------------
 tests/fortran/cubes.hand-inst.f | 32 ++++++++++++++++----------------
 tests/fortran/loop_test.f90     | 12 ++++++------
 3 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/tests/fortran/cubes.f b/tests/fortran/cubes.f
index 55226db..8592ee7 100644
--- a/tests/fortran/cubes.f
+++ b/tests/fortran/cubes.f
@@ -1,15 +1,15 @@
-cc34567 Cubes program
-      PROGRAM SUM_OF_CUBES 
-      INTEGER :: H, T, U 
-      ! This program prints all 3-digit numbers that 
+      program sum_of_cubes 
+      implicit none
+      integer :: h, t, u 
+      ! this program prints all 3-digit numbers that 
       ! equal the sum of the cubes of their digits. 
-      DO H = 1, 9 
-        DO T = 0, 9 
-          DO U = 0, 9 
-          IF (100*H + 10*T + U == H**3 + T**3 + U**3) THEN
-             PRINT "(3I1)", H, T, U 
-	  ENDIF
-          END DO 
-        END DO 
-      END DO 
-      END PROGRAM SUM_OF_CUBES
+      do h = 1, 9 
+        do t = 0, 9 
+          do u = 0, 9 
+          if (100*h + 10*t + u == h**3 + t**3 + u**3) then
+             print "(3I1)", h, t, u 
+	        endif
+          end do 
+        end do 
+      end do 
+      end program sum_of_cubes
diff --git a/tests/fortran/cubes.hand-inst.f b/tests/fortran/cubes.hand-inst.f
index e553c7a..8b337de 100644
--- a/tests/fortran/cubes.hand-inst.f
+++ b/tests/fortran/cubes.hand-inst.f
@@ -1,22 +1,22 @@
-cc34567 Cubes program
-      PROGRAM SUM_OF_CUBES 
-       integer profiler(2) / 0, 0 /
-	save profiler
-      INTEGER :: H, T, U 
+!c34567 Cubes program
+      program sum_of_cubes 
+        integer profiler(2) / 0, 0 /
+        save profiler
+        integer :: H, T, U 
         call TAU_PROFILE_INIT()
-        call TAU_PROFILE_TIMER(profiler, 'PROGRAM SUM_OF_CUBES')
+        call TAU_PROFILE_TIMER(profiler, 'program sum_of_cubes')
         call TAU_PROFILE_START(profiler)
         call TAU_PROFILE_SET_NODE(0)
       ! This program prints all 3-digit numbers that 
       ! equal the sum of the cubes of their digits. 
-      DO H = 1, 9 
-        DO T = 0, 9 
-          DO U = 0, 9 
-          IF (100*H + 10*T + U == H**3 + T**3 + U**3) THEN
-             PRINT "(3I1)", H, T, U 
-	  ENDIF
-          END DO 
-        END DO 
-      END DO 
+      do h = 1, 9 
+        do t = 0, 9 
+          do u = 0, 9 
+          if (100*h + 10*t + u == h**3 + t**3 + u**3) then
+             print "(3I1)", h, t, u 
+	        endif
+          end do 
+        end do 
+      end do
       call TAU_PROFILE_STOP(profiler)
-      END PROGRAM SUM_OF_CUBES
+      end program sum_of_cubes
diff --git a/tests/fortran/loop_test.f90 b/tests/fortran/loop_test.f90
index 647e103..9b10b0e 100644
--- a/tests/fortran/loop_test.f90
+++ b/tests/fortran/loop_test.f90
@@ -15,19 +15,19 @@ subroutine foo(iVal)
           end do
         end do
 
-        do 10, i = 1, 3
-        call bar(i+iVal)
-10      continue
+        do i = 1, 3
+          call bar(i+iVal)
+        end do
         print *, "after calling bar in foo"
-      end
+end subroutine foo
 
 program main
   integer i
 
   print *, "test program"
 
-  do 10, i = 1, 3
+  do i = 1, 3
     call foo(i)
-10  continue
+  end do
 end program main
 

From e830e19d0a50da6e4d926dd46f98c57d365074ba Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 10 Dec 2024 16:58:49 -0500
Subject: [PATCH 039/135] Update testing to allow || and use tau wrapper
 scripts

* CTest tests can be run in parallel
* The testing now uses the TAU wrapper scripts SALT support, rather
  than manually passing defines, libraries, etc.
* Tests for each compiler are in separate subdirectories of the build
  directory to allow tests to be run in parallel and so that
  output/artifacts can be kept and examined
---
 .github/workflows/CI.yaml |  2 +-
 CMakeLists.txt            | 92 ++++++++++-----------------------------
 build_and_test.sh         |  2 +-
 3 files changed, 25 insertions(+), 71 deletions(-)

diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
index 64d1d1f..9278d3a 100644
--- a/.github/workflows/CI.yaml
+++ b/.github/workflows/CI.yaml
@@ -23,4 +23,4 @@ jobs:
           echo "Cores: $(nproc) "
           cmake --build build --parallel $(nproc) || cmake --build build --verbose
       - name: Run Tests
-        run: cd build && ctest --output-on-failure
+        run: cd build && ctest -j --output-on-failure || ctest --rerun-failed --verbose
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 32c6e7a..4f80f6e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -470,67 +470,13 @@ foreach(test_source IN LISTS TESTS_LIST)
   add_instrumentor_test(${test_source})
 endforeach()
 
-
-################
-# The following default paths are fragile and somewhat specific to
-# the salt-dev container.
-# For alternative TAU installs add -DTAU_ROOT=<dir> to the
-# cmake invication. Make sure TAU is built with these
-# configurations:  -pthread -cc=clang -c++=clang++ -bfd=download 
-# -unwind=download -dwarf=download -otf=download
-# and -pthread -bfd=download -unwind=download -libdwarf=download 
-# -otf=download
-
-
-set(TAU_HEADER_LOCATIONS
-  -I${TAU_ROOT}/include
-)
-set(TAU_CLANG_HEADER_LOCATIONS
-  -I${TAU_CLANG_LIBDWARF_INCLUDE_DIR}
-  -I${TAU_CLANG_LIBUNWIND_INCLUDE_DIR}
-)
-set(TAU_GCC_HEADER_LOCATIONS
-  -I${TAU_GCC_LIBDWARF_INCLUDE_DIR}
-  -I${TAU_GCC_LIBUNWIND_INCLUDE_DIR}
-)
-# End of section that note applies to
-################
-
-
-set(TAU_COMPILE_OPTIONS -fPIC -g)
-set(TAU_C_DEFINITIONS
-  -DEBS_CLOCK_RES=1
-  -DHAVE_GNU_DEMANGLE
-  -DHAVE_TR1_HASH_MAP
-  -DPROFILING_ON
-  -DPTHREADS
-  -DTAU_BFD
-  -DTAU_DOT_H_LESS_HEADERS
-  -DTAU_DWARF
-  -DTAU_ELF_BFD
-  -DTAU_LARGEFILE
-  -DTAU_LINUX_TIMERS
-  -DTAU_OTF2
-  -DTAU_NO_FORTRAN
-  -DTAU_PTHREAD_PRELOAD
-  -DTAU_SS_ALLOC_SUPPORT
-  -DTAU_STRSIGNAL_OK
-  -DTAU_TRACK_LD_LOADER
-  -DTAU_UNIFY
-  -DTAU_UNWIND
-  -DTAU_USE_LIBUNWIND
-  -DTAU_USE_TLS
-  -D_LARGEFILE64_SOURCE
-  # -Dpthread_create=tau_pthread_create
-  # -Dpthread_exit=tau_pthread_exit
-)
-
-set(TAU_C_CLANG_DEFINITIONS
-  -DTAU_CLANG
-)
-set(TAU_C_GCC_DEFINITIONS
-  -DTAU_GNU
-)
+set(compilers_to_test gcc clang)
+foreach(comp IN LISTS compilers_to_test)
+  set(lower_comp ${comp})
+  string(TOUPPER ${comp} comp)
+  add_test(NAME setup_${comp}_dir
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_BINARY_DIR}/${comp})
+endforeach()
 
 function(compile_instrumented test_src)
   # This is the 2nd of 2 functions for adding tests.
@@ -542,7 +488,7 @@ function(compile_instrumented test_src)
   get_filename_component(TEST_BASE_NAME ${test_src} NAME_WE)
   set(TEST_NAME "${TEST_BASE_NAME}")
   # This next line depends on the previous function implementation
-  set(depends_on instrument_${TEST_BASE_NAME})
+  set(depends_on instrument_${TEST_BASE_NAME}) # Right now this is just to prevent tests from running at the same time from clobbering the instrumented source file
   get_filename_component(TEST_LANG ${test_src} LAST_EXT)
   string(REPLACE "." "" TEST_LANG ${TEST_LANG})
   set(extra_args ${ARGN})
@@ -566,26 +512,28 @@ function(compile_instrumented test_src)
     message( FATAL_ERROR "Unknown test source file extension: ${TEST_LANG}")
   endif()
 
-  set(test_path ${TEST_BASE_NAME}.inst.${TEST_LANG})
-  set(TAUC_OPTS -optVerbose -optLinkOnly -optNoMpi)
+  set(test_path ${CMAKE_SOURCE_DIR}/tests/${TEST_BASE_NAME}.${TEST_LANG})
+  set(TAUC_OPTS -optVerbose -optSaltInst -optSaltParser=$<TARGET_FILE:cparse-llvm> -optSaltConfigFile=${CMAKE_SOURCE_DIR}/config_files/tau_config.yaml)
   set(compile_opts ${TAU_COMPILE_OPTIONS})
-  set(compilers_to_test gcc clang)
   foreach(comp IN LISTS compilers_to_test)
     set(lower_comp ${comp})
     string(TOUPPER ${comp} comp)
-    set(defs ${TAU_C_DEFINITIONS} ${TAU_C_${comp}_DEFINITIONS})
-    set(includes ${TAU_HEADER_LOCATIONS} ${TAU_${comp}_HEADER_LOCATIONS})
+    # Fixture to cleanup old instrumented source, object files, and executables
     add_test(NAME rm_${lower_comp}_${TEST_NAME}_objects
       COMMAND
-      ${CMAKE_COMMAND} -E rm -rf ${TEST_BASE_NAME}.o ${TEST_BASE_NAME}.inst.o
+      ${CMAKE_COMMAND} -E rm -rf ${TEST_BASE_NAME}.o ${TEST_BASE_NAME}.inst.o ${TEST_BASE_NAME}.inst.${TEST_LANG}
+      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${comp}
     )
     set_tests_properties(rm_${lower_comp}_${TEST_NAME}_objects
       PROPERTIES
       FIXTURES_SETUP clean_${lower_comp}_${TEST_NAME}_objects
+      DEPENDS setup_${comp}_dir
     )
+    # Test to actually instrument and build the test source using TAU compiler wrappers & slat parser
     add_test(NAME compile_${lower_comp}_${TEST_NAME}
       COMMAND
-      ${TAUC} ${TAUC_OPTS} ${includes} ${defs} ${compiler_opts} -o ${TEST_BASE_NAME}.${lower_comp} ${test_path}
+      ${TAUC} ${TAUC_OPTS} ${compiler_opts} -o ${TEST_BASE_NAME}.${lower_comp} ${test_path}
+      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${lower_comp}
     )
     set_tests_properties(compile_${lower_comp}_${TEST_NAME}
       PROPERTIES
@@ -594,17 +542,22 @@ function(compile_instrumented test_src)
       DEPENDS ${depends_on}
       FAIL_REGULAR_EXPRESSION "[Dd]isabling instrumentation of source code;[Ss]witching to compiler-based instrumentation;[Cc]ompiling with [Nn]on-[Ii]nstrumented [Rr]egular [Cc]ode;[Ee]rror:"
     )
+    # Fixture to cleanup old profile directories
     add_test(NAME rm_old_${lower_comp}_${TEST_NAME}_profiles
       COMMAND
       ${CMAKE_COMMAND} -E rm -rf ${TEST_BASE_NAME}.${lower_comp}.d
+      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${comp}
     )
     set_tests_properties(rm_old_${lower_comp}_${TEST_NAME}_profiles
       PROPERTIES
       FIXTURES_SETUP clean_${lower_comp}_${TEST_NAME}_profiles
+      DEPENDS setup_${comp}_dir
     )
+    # Profile w/ TAU and Verify profiles are created
     add_test(NAME run_${lower_comp}_${TEST_NAME}
       COMMAND
       ${TAU_EXEC} -T serial,pthread ./${TEST_BASE_NAME}.${lower_comp}
+      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${comp}
     )
     set_tests_properties(run_${lower_comp}_${TEST_NAME}
       PROPERTIES
@@ -615,6 +568,7 @@ function(compile_instrumented test_src)
     )
     add_test(NAME check_${lower_comp}_${TEST_NAME}_profile
       COMMAND ${CMAKE_COMMAND} -E cat ./${TEST_BASE_NAME}.${lower_comp}.d/profile.0.0.0
+      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${comp}
     )
     set_tests_properties(check_${lower_comp}_${TEST_NAME}_profile
       PROPERTIES
diff --git a/build_and_test.sh b/build_and_test.sh
index d865ea4..b4ebc8e 100755
--- a/build_and_test.sh
+++ b/build_and_test.sh
@@ -10,4 +10,4 @@ set -o verbose
 
 cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -Wdev -Wdeprecated -G Ninja -S . -B build
 cmake --build build --parallel 8 --verbose || cmake --build build --verbose
-( cd build && ( ctest --output-on-failure || ctest --rerun-failed --verbose ) )
+( cd build && ( ctest -j --output-on-failure || ctest --rerun-failed --verbose ) )

From b03a34f501e593b26955afccaf39348a2b8bca6a Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Tue, 10 Dec 2024 14:05:56 -0800
Subject: [PATCH 040/135] Experimenting with getting file name, opening output
 file

---
 src/salt_instrument_flang_plugin.cpp | 260 +++++++++++++++++----------
 1 file changed, 170 insertions(+), 90 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 1c33b8e..4c12bde 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -31,97 +31,177 @@ using namespace Fortran::frontend;
  * Visits each node in the parse tree.
  */
 class SaltInstrumentAction : public PluginParseTreeAction {
-  struct SaltInstrumentParseTreeVisitor {
-
-    explicit SaltInstrumentParseTreeVisitor(Fortran::parser::Parsing *parsing)
-      : parsing(parsing) {
-    }
-
-    // Pass in the parser object from the Action to the Visitor
-    // so that we can use it while processing parse tree nodes.
-    Fortran::parser::Parsing *parsing{nullptr};
-
-    // Default empty visit functions for otherwise unhandled types.
-    template <typename A> bool Pre(const A &) { return true; }
-    template <typename A> void Post(const A &) {}
-
-    // Override all types that we want to visit.
-
-    // Pre occurs when first visiting a node.
-    // Post occurs when returning from the node's children.
-    // See https://flang.llvm.org/docs/Parsing.html for information on the parse tree.
-
-    // Parse tree types are defined in: include/flang/Parser/parse-tree.h
-    // There are three types of parse tree nodes:
-    // Wrappers, with a single data member, always named `v`.
-    // Tuples, encapsulating multiple values in a data member named `t` of type std::tuple.
-    // Discriminated unions, one of several types stored in data member named `u` of type std::variant.
-    // Use std::get() to retrieve value from `t` or `u`
-
-    // See https://github.com/llvm/llvm-project/blob/main/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
-    // for examples of getting source position for a parse tree node
-
-    bool Pre(const Fortran::parser::MainProgram &) {
-      llvm::outs() << "Entering main program\n";
-      isInMainProgram_ = true;
-      return true;
-    }
-
-    void Post(const Fortran::parser::MainProgram & ) {
-      llvm::outs() << "Exiting main program: " << mainProgramName_ << "\n";
-      isInMainProgram_ = false;
-    }
-
-    bool Pre(const Fortran::parser::FunctionSubprogram &) {
-      isInSubprogram_ = true;
-      return true;
-    }
-
-    void Post(const Fortran::parser::ProgramStmt & program) {
-      mainProgramName_ = program.v.ToString();
-      const auto & pos = parsing->allCooked().GetSourcePositionRange(program.v.source);
-      llvm::outs() << "Program: \t"
-                   <<  mainProgramName_
-                   << "\t (" << pos->first.line << ", " << pos->first.column << ")"
-                   << "\t (" << pos->second.line << ", " << pos->second.column << ")"
-                   << "\n";
-    }
-
-    void Post(const Fortran::parser::FunctionStmt &f) {
-      if (isInSubprogram_) {
-        llvm::outs() << "Function:\t"
-                     << std::get<Fortran::parser::Name>(f.t).ToString() << "\n";
-        isInSubprogram_ = false;
-      }
-    }
-
-    bool Pre(const Fortran::parser::SubroutineSubprogram &) {
-      isInSubprogram_ = true;
-      return true;
-    }
-    void Post(const Fortran::parser::SubroutineStmt &s) {
-      if (isInSubprogram_) {
-        llvm::outs() << "Subroutine:\t"
-                     << std::get<Fortran::parser::Name>(s.t).ToString() << "\n";
-        isInSubprogram_ = false;
-      }
+    enum class SaltInstrumentationPointType {
+        PROGRAM_BEGIN,
+        PROCEDURE_BEGIN,
+        PROCEDURE_END
+    };
+
+    struct SaltInstrumentationPoint {
+        SaltInstrumentationPoint(SaltInstrumentationPointType instrumentation_point_type,
+                                 int start_line,
+                                 const std::optional<std::string> &timer_name = std::nullopt)
+            : instrumentationPointType(instrumentation_point_type),
+              startLine(start_line),
+              timerName(timer_name) {
+        }
+
+
+        SaltInstrumentationPointType instrumentationPointType;
+        int startLine;
+        std::optional<std::string> timerName;
+    };
+
+    struct SaltInstrumentParseTreeVisitor {
+        explicit SaltInstrumentParseTreeVisitor(Fortran::parser::Parsing *parsing)
+            : parsing(parsing) {
+        }
+
+        /**
+         * Mark a line where a given type of instrumentation is needed.
+         * For PROGRAM_BEGIN and PROCEDURE_BEGIN, a timer name is needed.
+         * For PROCEDURE_END, a timer name is not needed.
+         * Instrumentation will be added after start_line.
+         */
+        void addInstrumentationPoint(SaltInstrumentationPointType instrumentation_point_type,
+                                     int start_line,
+                                     const std::optional<std::string> &timer_name = std::nullopt) {
+            instrumentationPoints_.emplace_back(
+                instrumentation_point_type, start_line, timer_name);
+        }
+
+        auto &getInstrumentationPoints() {
+            return instrumentationPoints_;
+        }
+
+        void setInputFileNameIfNeeded(const std::string &inputName) {
+            if (inputFileName_.empty()) {
+                inputFileName_ = inputName;
+            }
+        }
+
+         std::string getInputFileName() {
+            return inputFileName_;
+        }
+
+        // Default empty visit functions for otherwise unhandled types.
+        template<typename A>
+        static bool Pre(const A &) { return true; }
+
+        template<typename A>
+        static void Post(const A &) {
+        }
+
+        // Override all types that we want to visit.
+
+        // Pre occurs when first visiting a node.
+        // Post occurs when returning from the node's children.
+        // See https://flang.llvm.org/docs/Parsing.html for information on the parse tree.
+
+        // Parse tree types are defined in: include/flang/Parser/parse-tree.h
+        // There are three types of parse tree nodes:
+        // Wrappers, with a single data member, always named `v`.
+        // Tuples, encapsulating multiple values in a data member named `t` of type std::tuple.
+        // Discriminated unions, one of several types stored in data member named `u` of type std::variant.
+        // Use std::get() to retrieve value from `t` or `u`
+
+        // See https://github.com/llvm/llvm-project/blob/main/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
+        // for examples of getting source position for a parse tree node
+
+        bool Pre(const Fortran::parser::MainProgram &) {
+            llvm::outs() << "Entering main program\n";
+            isInMainProgram_ = true;
+            return true;
+        }
+
+        void Post(const Fortran::parser::MainProgram &) {
+            llvm::outs() << "Exiting main program: " << mainProgramName_ << "\n";
+            isInMainProgram_ = false;
+        }
+
+        void Post(const Fortran::parser::ProgramStmt &program) {
+            mainProgramName_ = program.v.ToString();
+            const auto &pos = parsing->allCooked().GetSourcePositionRange(program.v.source);
+            setInputFileNameIfNeeded(pos->first.sourceFile->path());
+            llvm::outs() << "Program: \t"
+                    << mainProgramName_
+                    << "\t" << inputFileName_
+                    << "\t (" << pos->first.line << ", " << pos->first.column << ")"
+                    << "\t (" << pos->second.line << ", " << pos->second.column << ")"
+                    << "\n";
+        }
+
+        void Post(const Fortran::parser::FunctionStmt &f) {
+            auto &name = std::get<Fortran::parser::Name>(f.t);
+            setInputFileNameIfNeeded(
+                parsing->allCooked().GetSourcePositionRange(name.source)->first.sourceFile->path());
+            if (isInSubprogram_) {
+                llvm::outs() << "Function:\t"
+                        << name.ToString() << "\n";
+                isInSubprogram_ = false;
+            }
+        }
+
+        bool Pre(const Fortran::parser::FunctionSubprogram &) {
+            isInSubprogram_ = true;
+            return true;
+        }
+
+        bool Pre(const Fortran::parser::SubroutineSubprogram &) {
+            isInSubprogram_ = true;
+            return true;
+        }
+
+        void Post(const Fortran::parser::SubroutineStmt &s) {
+            auto &name = std::get<Fortran::parser::Name>(s.t);
+            setInputFileNameIfNeeded(
+                parsing->allCooked().GetSourcePositionRange(name.source)->first.sourceFile->path());
+            if (isInSubprogram_) {
+                llvm::outs() << "Subroutine:\t"
+                        << name.ToString() << "\n";
+                isInSubprogram_ = false;
+            }
+        }
+
+        void Post(const Fortran::parser::EndProgramStmt & endProgram) {
+            setInputFileNameIfNeeded(
+                parsing->allCooked().GetSourcePositionRange(endProgram.v->source)->first.sourceFile->path());
+        }
+
+    private:
+        // Keeps track of current state of traversal
+        bool isInSubprogram_{false};
+        bool isInMainProgram_{false};
+        std::string mainProgramName_;
+        std::string inputFileName_;
+
+        std::vector<SaltInstrumentationPoint> instrumentationPoints_;
+
+        // Pass in the parser object from the Action to the Visitor
+        // so that we can use it while processing parse tree nodes.
+        Fortran::parser::Parsing *parsing{nullptr};
+    };
+
+    void executeAction() override {
+        llvm::outs() << "==== SALT Instrumentor Plugin starting ====\n";
+
+        Fortran::parser::Parsing &parsing = getParsing();
+        parsing.parseTree()
+        // TODO figure out the actual extension of the input and reuse in extension of output file
+        // currently we just use inst.f always
+        SaltInstrumentParseTreeVisitor visitor{&parsing};
+        Walk(parsing.parseTree(), visitor);
+
+        const std::string inputFile = visitor.getInputFileName();
+        llvm::outs() << "File: " << inputFile << "\n";
+        auto const extPos = inputFile.find_last_of('.');
+        const auto inputFileExt = inputFile.substr(extPos + 1);
+        llvm::outs() << inputFileExt << "\n";
+
+        auto outputFile = createOutputFile("inst.f");
+
+        llvm::outs() << "==== SALT Instrumentor Plugin finished ====\n";
     }
-
-  private:
-    bool isInSubprogram_{false};
-    bool isInMainProgram_{false};
-    std::string mainProgramName_;
-  };
-
-  void executeAction() override {
-    llvm::outs() << "==== SALT Instrumentor Plugin starting ====\n";
-
-    Fortran::parser::Parsing & parsing = getParsing();
-    SaltInstrumentParseTreeVisitor visitor{&parsing};
-    Fortran::parser::Walk(parsing.parseTree(), visitor);
-
-    llvm::outs() << "==== SALT Instrumentor Plugin finished ====\n";
-  }
 };
 
 static FrontendPluginRegistry::Add<SaltInstrumentAction> X(

From 022adfcbfbfd6e259f722d4f7a77af5a8cc83f57 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 10 Dec 2024 14:09:13 -0800
Subject: [PATCH 041/135] Fix bad working directory for tests

case-insensitive macOS lead me to miss this until tested on UO machine
---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4f80f6e..40368de 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -533,7 +533,7 @@ function(compile_instrumented test_src)
     add_test(NAME compile_${lower_comp}_${TEST_NAME}
       COMMAND
       ${TAUC} ${TAUC_OPTS} ${compiler_opts} -o ${TEST_BASE_NAME}.${lower_comp} ${test_path}
-      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${lower_comp}
+      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${comp}
     )
     set_tests_properties(compile_${lower_comp}_${TEST_NAME}
       PROPERTIES

From 14dca1beaccfe6ec864d223a7fc9309f78a320c7 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Tue, 10 Dec 2024 14:54:50 -0800
Subject: [PATCH 042/135] Get input file name, extension, open output inst file

---
 src/salt_instrument_flang_plugin.cpp | 79 +++++++++++++++++-----------
 1 file changed, 49 insertions(+), 30 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 4c12bde..04faea9 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -74,16 +74,6 @@ class SaltInstrumentAction : public PluginParseTreeAction {
             return instrumentationPoints_;
         }
 
-        void setInputFileNameIfNeeded(const std::string &inputName) {
-            if (inputFileName_.empty()) {
-                inputFileName_ = inputName;
-            }
-        }
-
-         std::string getInputFileName() {
-            return inputFileName_;
-        }
-
         // Default empty visit functions for otherwise unhandled types.
         template<typename A>
         static bool Pre(const A &) { return true; }
@@ -122,10 +112,8 @@ class SaltInstrumentAction : public PluginParseTreeAction {
         void Post(const Fortran::parser::ProgramStmt &program) {
             mainProgramName_ = program.v.ToString();
             const auto &pos = parsing->allCooked().GetSourcePositionRange(program.v.source);
-            setInputFileNameIfNeeded(pos->first.sourceFile->path());
             llvm::outs() << "Program: \t"
                     << mainProgramName_
-                    << "\t" << inputFileName_
                     << "\t (" << pos->first.line << ", " << pos->first.column << ")"
                     << "\t (" << pos->second.line << ", " << pos->second.column << ")"
                     << "\n";
@@ -133,8 +121,6 @@ class SaltInstrumentAction : public PluginParseTreeAction {
 
         void Post(const Fortran::parser::FunctionStmt &f) {
             auto &name = std::get<Fortran::parser::Name>(f.t);
-            setInputFileNameIfNeeded(
-                parsing->allCooked().GetSourcePositionRange(name.source)->first.sourceFile->path());
             if (isInSubprogram_) {
                 llvm::outs() << "Function:\t"
                         << name.ToString() << "\n";
@@ -154,8 +140,6 @@ class SaltInstrumentAction : public PluginParseTreeAction {
 
         void Post(const Fortran::parser::SubroutineStmt &s) {
             auto &name = std::get<Fortran::parser::Name>(s.t);
-            setInputFileNameIfNeeded(
-                parsing->allCooked().GetSourcePositionRange(name.source)->first.sourceFile->path());
             if (isInSubprogram_) {
                 llvm::outs() << "Subroutine:\t"
                         << name.ToString() << "\n";
@@ -163,9 +147,8 @@ class SaltInstrumentAction : public PluginParseTreeAction {
             }
         }
 
-        void Post(const Fortran::parser::EndProgramStmt & endProgram) {
-            setInputFileNameIfNeeded(
-                parsing->allCooked().GetSourcePositionRange(endProgram.v->source)->first.sourceFile->path());
+        void Post(const Fortran::parser::EndProgramStmt &endProgram) {
+            (void) endProgram; //TODO handle endprogram
         }
 
     private:
@@ -173,7 +156,6 @@ class SaltInstrumentAction : public PluginParseTreeAction {
         bool isInSubprogram_{false};
         bool isInMainProgram_{false};
         std::string mainProgramName_;
-        std::string inputFileName_;
 
         std::vector<SaltInstrumentationPoint> instrumentationPoints_;
 
@@ -182,23 +164,60 @@ class SaltInstrumentAction : public PluginParseTreeAction {
         Fortran::parser::Parsing *parsing{nullptr};
     };
 
+    /**
+     * Get the source file represented by a given parse tree
+     *
+     * See function BuildRuntimeDerivedTypeTables() in
+     * flang/lib/Semantics/runtime-type-info.cpp for example
+     * of getting the source file name.
+     */
+    static std::optional<std::string> getInputFilePath(Fortran::parser::Parsing &parsing) {
+        const auto &allSources{parsing.allCooked().allSources()};
+        if (auto firstProv{allSources.GetFirstFileProvenance()}) {
+            if (const auto *srcFile{allSources.GetSourceFile(firstProv->start())}) {
+                return srcFile->path();
+            }
+        }
+        return std::nullopt;
+    }
+
+
+    /**
+     * This is the entry point for the plugin.
+     */
     void executeAction() override {
         llvm::outs() << "==== SALT Instrumentor Plugin starting ====\n";
 
+        // This is the object through which we access the parse tree
+        // and the source
         Fortran::parser::Parsing &parsing = getParsing();
-        parsing.parseTree()
-        // TODO figure out the actual extension of the input and reuse in extension of output file
-        // currently we just use inst.f always
+
+        // Get the path to the input file
+        const auto inputFilePath = getInputFilePath(parsing);
+        if (!inputFilePath) {
+            llvm::outs() << "ERROR: Unable to find input file name!\n";
+            std::exit(-1);
+        }
+        llvm::outs() << "Have input file: " << *inputFilePath << "\n";
+
+        // Get the extension of the input file
+        // For input file 'filename.ext' we will output to 'filename.inst.ext'
+        std::string inputFileExtension;
+        if (auto const extPos = inputFilePath->find_last_of('.'); extPos == std::string::npos) {
+            inputFileExtension = "f90"; // Default if for some reason file has no extension
+        } else {
+            inputFileExtension = inputFilePath->substr(extPos + 1); // Part of string past last '.'
+        }
+
+        // Open an output file for writing the instrumented code
+        const std::string outputFileExtension = "inst."s + inputFileExtension;
+        auto outputFile = createOutputFile(outputFileExtension);
+
+        // Walk the parse tree
         SaltInstrumentParseTreeVisitor visitor{&parsing};
         Walk(parsing.parseTree(), visitor);
 
-        const std::string inputFile = visitor.getInputFileName();
-        llvm::outs() << "File: " << inputFile << "\n";
-        auto const extPos = inputFile.find_last_of('.');
-        const auto inputFileExt = inputFile.substr(extPos + 1);
-        llvm::outs() << inputFileExt << "\n";
-
-        auto outputFile = createOutputFile("inst.f");
+        // TODO write the instrumented code
 
         llvm::outs() << "==== SALT Instrumentor Plugin finished ====\n";
     }

From f2ae1429a346ce40ef59f4de5a13c847b1d24697 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Tue, 10 Dec 2024 15:10:57 -0800
Subject: [PATCH 043/135] Remove unnecessary leftovers from plugin
 experimentation

---
 src/salt_instrument_flang_plugin.cpp | 33 ----------------------------
 1 file changed, 33 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 04faea9..96dcbe3 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -119,41 +119,8 @@ class SaltInstrumentAction : public PluginParseTreeAction {
                     << "\n";
         }
 
-        void Post(const Fortran::parser::FunctionStmt &f) {
-            auto &name = std::get<Fortran::parser::Name>(f.t);
-            if (isInSubprogram_) {
-                llvm::outs() << "Function:\t"
-                        << name.ToString() << "\n";
-                isInSubprogram_ = false;
-            }
-        }
-
-        bool Pre(const Fortran::parser::FunctionSubprogram &) {
-            isInSubprogram_ = true;
-            return true;
-        }
-
-        bool Pre(const Fortran::parser::SubroutineSubprogram &) {
-            isInSubprogram_ = true;
-            return true;
-        }
-
-        void Post(const Fortran::parser::SubroutineStmt &s) {
-            auto &name = std::get<Fortran::parser::Name>(s.t);
-            if (isInSubprogram_) {
-                llvm::outs() << "Subroutine:\t"
-                        << name.ToString() << "\n";
-                isInSubprogram_ = false;
-            }
-        }
-
-        void Post(const Fortran::parser::EndProgramStmt &endProgram) {
-            (void) endProgram; //TODO handle endprogram
-        }
-
     private:
         // Keeps track of current state of traversal
-        bool isInSubprogram_{false};
         bool isInMainProgram_{false};
         std::string mainProgramName_;
 

From 415727784e9dad1aa9340215f3392ea5e057f1e5 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Tue, 10 Dec 2024 15:50:58 -0800
Subject: [PATCH 044/135] Reorganize parse tree visitor, mark if in main
 program or subprogram

---
 src/salt_instrument_flang_plugin.cpp | 45 ++++++++++++++++++++++------
 1 file changed, 36 insertions(+), 9 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 96dcbe3..fce0a4e 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -99,36 +99,63 @@ class SaltInstrumentAction : public PluginParseTreeAction {
         // for examples of getting source position for a parse tree node
 
         bool Pre(const Fortran::parser::MainProgram &) {
-            llvm::outs() << "Entering main program\n";
             isInMainProgram_ = true;
             return true;
         }
 
         void Post(const Fortran::parser::MainProgram &) {
-            llvm::outs() << "Exiting main program: " << mainProgramName_ << "\n";
+            llvm::outs() << "Exit main program: " << mainProgramName_ << "\n";
             isInMainProgram_ = false;
         }
 
         void Post(const Fortran::parser::ProgramStmt &program) {
             mainProgramName_ = program.v.ToString();
-            const auto &pos = parsing->allCooked().GetSourcePositionRange(program.v.source);
-            llvm::outs() << "Program: \t"
-                    << mainProgramName_
-                    << "\t (" << pos->first.line << ", " << pos->first.column << ")"
-                    << "\t (" << pos->second.line << ", " << pos->second.column << ")"
-                    << "\n";
+            //const auto &pos = parsing->allCooked().GetSourcePositionRange(program.v.source);
+            llvm::outs() << "Enter main program: " << mainProgramName_ << "\n";
         }
 
+        bool Pre(const Fortran::parser::SubroutineStmt &subroutineStmt) {
+            subprogramName_ = std::get<Fortran::parser::Name>(subroutineStmt.t).ToString();
+            llvm::outs() << "Enter Subroutine: " << subprogramName_ << "\n";
+            return true;
+        }
+
+        void Post(const Fortran::parser::SubroutineSubprogram &) {
+            llvm::outs() << "Exit Subroutine: " << subprogramName_ << "\n";
+            subprogramName_.clear();
+        }
+
+        bool Pre(const Fortran::parser::FunctionStmt &functionStmt) {
+            subprogramName_ = std::get<Fortran::parser::Name>(functionStmt.t).ToString();
+            llvm::outs() << "Enter Function: " << subprogramName_ << "\n";
+            return true;
+        }
+
+        void Post(const Fortran::parser::FunctionSubprogram &) {
+            llvm::outs() << "Exit Function: " << subprogramName_ << "\n";
+            subprogramName_.clear();
+        }
+
+        bool Pre(const Fortran::parser::ExecutionPart & executionPart) {
+            (void)executionPart; // TODO handle execution part
+            // Need to get the FIRST and the LAST components
+            // Insert timer start before first component
+            // Insert timer end after last component
+            return true;
+        }
+
+
     private:
         // Keeps track of current state of traversal
         bool isInMainProgram_{false};
         std::string mainProgramName_;
+        std::string subprogramName_;
 
         std::vector<SaltInstrumentationPoint> instrumentationPoints_;
 
         // Pass in the parser object from the Action to the Visitor
         // so that we can use it while processing parse tree nodes.
-        Fortran::parser::Parsing *parsing{nullptr};
+        [[maybe_unused]] Fortran::parser::Parsing *parsing{nullptr};
     };
 
     /**

From c2a2afdcc322bc756c563d2015b81388fe1c1872 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 10 Dec 2024 19:37:03 -0500
Subject: [PATCH 045/135] Hacky fix for missing `/usr/local/bin/flang` and bad
 tau_cc.sh link line

This will be fixed in the next release of salt-dev
---
 .github/workflows/CI.yaml | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
index 9278d3a..1f56d9d 100644
--- a/.github/workflows/CI.yaml
+++ b/.github/workflows/CI.yaml
@@ -11,6 +11,17 @@ jobs:
       - uses: actions/checkout@v3
         with:
           submodules: recursive
+      - name: patch TAU & link flang
+        run: |
+          sed -i 's/#FLANG_NO_MX_OPTS#//' /usr/local/x86_64/lib/Makefile.tau-clang-pthread || true
+          if [[ ! -e /usr/local/bin/flang ]]; then
+            echo "Linking flang-new as flang"
+            ln -s /usr/local/bin/flang-new /usr/local/bin/flang
+          else
+            ls -la /usr/local/bin/flang
+          fi
+          tau_f90.sh -show
+          tau_f90.sh --version
       - name: Print working directory
         run: |
             echo "Working directory: $(pwd)"

From 28740ca472a22dae5e458bd46a34ce98414b09fd Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 10 Dec 2024 19:41:55 -0500
Subject: [PATCH 046/135] Fix bad test command in CI workflow

---
 .github/workflows/CI.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
index 1f56d9d..93b0abd 100644
--- a/.github/workflows/CI.yaml
+++ b/.github/workflows/CI.yaml
@@ -13,8 +13,8 @@ jobs:
           submodules: recursive
       - name: patch TAU & link flang
         run: |
-          sed -i 's/#FLANG_NO_MX_OPTS#//' /usr/local/x86_64/lib/Makefile.tau-clang-pthread || true
-          if [[ ! -e /usr/local/bin/flang ]]; then
+          sed -i 's/#FLANG_NO_MX_OPTS#//' /usr/local/x86_64/lib/Makefile.tau-clang-pthread || echo "sed command returned non-zero status"
+          if ! [ -e /usr/local/bin/flang ]; then
             echo "Linking flang-new as flang"
             ln -s /usr/local/bin/flang-new /usr/local/bin/flang
           else

From 2538264939a3b493051fcdc0b3b314500686cab2 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 10 Dec 2024 19:47:58 -0500
Subject: [PATCH 047/135] Set TAU_MAKEFILE so that we can see output of
 tau_cc.sh

---
 .github/workflows/CI.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
index 93b0abd..f3b873b 100644
--- a/.github/workflows/CI.yaml
+++ b/.github/workflows/CI.yaml
@@ -20,8 +20,12 @@ jobs:
           else
             ls -la /usr/local/bin/flang
           fi
+          export TAU_MAKEFILE=/usr/local/x86_64/lib/Makefile.tau-clang-pthread
           tau_f90.sh -show
           tau_f90.sh --version
+          tau_cxx.sh -show
+          tau_cxx.sh --version
+          unset TAU_MAKEFILE
       - name: Print working directory
         run: |
             echo "Working directory: $(pwd)"

From 0489ae4dc79bc780f07457cfe6e10a8aebe3a293 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 10 Dec 2024 20:17:51 -0500
Subject: [PATCH 048/135] Fix/debug docker image monkey patch

---
 .github/workflows/CI.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
index f3b873b..0eb939e 100644
--- a/.github/workflows/CI.yaml
+++ b/.github/workflows/CI.yaml
@@ -14,9 +14,11 @@ jobs:
       - name: patch TAU & link flang
         run: |
           sed -i 's/#FLANG_NO_MX_OPTS#//' /usr/local/x86_64/lib/Makefile.tau-clang-pthread || echo "sed command returned non-zero status"
+          FLANG_NEW="$(which flang-new)"
           if ! [ -e /usr/local/bin/flang ]; then
             echo "Linking flang-new as flang"
-            ln -s /usr/local/bin/flang-new /usr/local/bin/flang
+            ln -s "$FLANG_NEW" /usr/local/bin/flang
+            ls -la /usr/local/bin/flang
           else
             ls -la /usr/local/bin/flang
           fi

From 341b028b9652c4831bd7b4f4a59829bea66ff84b Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 10 Dec 2024 20:21:30 -0500
Subject: [PATCH 049/135] Add comment to CI.yaml to explain & remind about
 removing the hacky patch

---
 .github/workflows/CI.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
index 0eb939e..b4c4069 100644
--- a/.github/workflows/CI.yaml
+++ b/.github/workflows/CI.yaml
@@ -11,7 +11,7 @@ jobs:
       - uses: actions/checkout@v3
         with:
           submodules: recursive
-      - name: patch TAU & link flang
+      - name: patch TAU & link flang # This can be deleted once the salt-dev image is updated
         run: |
           sed -i 's/#FLANG_NO_MX_OPTS#//' /usr/local/x86_64/lib/Makefile.tau-clang-pthread || echo "sed command returned non-zero status"
           FLANG_NEW="$(which flang-new)"

From 4fde5fbaceaad4e9175934fe6afe20db155355cb Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Tue, 10 Dec 2024 18:31:34 -0800
Subject: [PATCH 050/135] Trying to get location for ExecutionPartConstructs

---
 src/salt_instrument_flang_plugin.cpp | 72 ++++++++++++++++++++++++++--
 1 file changed, 69 insertions(+), 3 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index fce0a4e..c1b580a 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -19,10 +19,14 @@ limitations under the License.
 // See https://flang.llvm.org/docs/FlangDriver.html#frontend-driver-plugins
 // for documentation of the Flang frontend plugin interface
 
+#include <clang/Basic/SourceLocation.h>
+
 #include "flang/Frontend/FrontendActions.h"
 #include "flang/Frontend/FrontendPluginRegistry.h"
 #include "flang/Parser/dump-parse-tree.h"
 #include "flang/Parser/parsing.h"
+#include "flang/Parser/source.h"
+#include "flang/Common/indirection.h"
 
 using namespace Fortran::frontend;
 
@@ -74,6 +78,10 @@ class SaltInstrumentAction : public PluginParseTreeAction {
             return instrumentationPoints_;
         }
 
+        Fortran::parser::SourcePosition locationFromSource(const Fortran::parser::CharBlock &charBlock) const {
+            return parsing->allCooked().GetSourcePositionRange(charBlock)->first;
+        }
+
         // Default empty visit functions for otherwise unhandled types.
         template<typename A>
         static bool Pre(const A &) { return true; }
@@ -136,15 +144,73 @@ class SaltInstrumentAction : public PluginParseTreeAction {
             subprogramName_.clear();
         }
 
-        bool Pre(const Fortran::parser::ExecutionPart & executionPart) {
-            (void)executionPart; // TODO handle execution part
+        Fortran::parser::SourcePosition getLocation(const Fortran::parser::ExecutableConstruct &construct) {
+            return std::visit(
+                Fortran::common::visitors{
+                    [&](const auto &c) -> Fortran::parser::SourcePosition {
+                        return locationFromSource(c.source);
+                    }
+                }, construct.u);
+
+            /*
+            std::variant<Statement<ActionStmt>, common::Indirection<AssociateConstruct>,
+      common::Indirection<BlockConstruct>, common::Indirection<CaseConstruct>,
+      common::Indirection<ChangeTeamConstruct>,
+      common::Indirection<CriticalConstruct>,
+      Statement<common::Indirection<LabelDoStmt>>,
+      Statement<common::Indirection<EndDoStmt>>,
+      common::Indirection<DoConstruct>, common::Indirection<IfConstruct>,
+      common::Indirection<SelectRankConstruct>,
+      common::Indirection<SelectTypeConstruct>,
+      common::Indirection<WhereConstruct>, common::Indirection<ForallConstruct>,
+      common::Indirection<CompilerDirective>,
+      common::Indirection<OpenACCConstruct>,
+      common::Indirection<AccEndCombinedDirective>,
+      common::Indirection<OpenMPConstruct>,
+      common::Indirection<OmpEndLoopDirective>,
+      common::Indirection<CUFKernelDoConstruct>>
+      u;
+            */
+        }
+
+        Fortran::parser::SourcePosition getLocation(const Fortran::parser::ExecutionPartConstruct &construct) {
+            // Possibilities for ExecutionPartConstruct:
+            //   ExecutableConstruct
+            //   Statement<common::Indirection<FormatStmt>>
+            //   Statement<common::Indirection<EntryStmt>>
+            //   Statement<common::Indirection<DataStmt>>
+            //   Statement<common::Indirection<NamelistStmt>>
+            //   ErrorRecovery
+            return std::visit(
+                Fortran::common::visitors{
+                    [&](const Fortran::parser::ExecutableConstruct &c) -> Fortran::parser::SourcePosition {
+                        return getLocation(c);
+                    },
+                    [&](const auto &c) -> Fortran::parser::SourcePosition {
+                        return locationFromSource(c.source);
+                    },
+                    [&](const Fortran::parser::ErrorRecovery &c) -> Fortran::parser::SourcePosition {
+                        DIE("Should not encounter ErrorRecovery in parse tree");
+                    }
+                }, construct.u);
+        }
+
+        bool Pre(const Fortran::parser::ExecutionPart &executionPart) {
+            (void) executionPart; // TODO handle execution part
             // Need to get the FIRST and the LAST components
             // Insert timer start before first component
+            // Use main program insert if in main program, else subprogram insert
             // Insert timer end after last component
+
+            const Fortran::parser::Block &block = executionPart.v;
+            if (block.empty()) {
+                llvm::outs() << "WARNING: Execution part empty.\n";
+                return true;
+            }
+
             return true;
         }
 
-
     private:
         // Keeps track of current state of traversal
         bool isInMainProgram_{false};

From abfde4cec76f7c657192318b9a19c2614ec9b4bb Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Tue, 10 Dec 2024 19:58:04 -0800
Subject: [PATCH 051/135] Partial support for getting location for
 ExecutableConstruct

---
 src/salt_instrument_flang_plugin.cpp | 153 ++++++++++++++++++++++-----
 1 file changed, 126 insertions(+), 27 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index c1b580a..55f3743 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -144,43 +144,142 @@ class SaltInstrumentAction : public PluginParseTreeAction {
             subprogramName_.clear();
         }
 
-        Fortran::parser::SourcePosition getLocation(const Fortran::parser::ExecutableConstruct &construct) {
+        Fortran::parser::SourcePosition getLocation(const Fortran::parser::OpenMPDeclarativeConstruct &construct) {
+            // This function is based on the equivalent function in
+            // flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
+            return std::visit(
+                [&](const auto &o) -> Fortran::parser::SourcePosition {
+                    return locationFromSource(o.source);
+                },
+                construct.u);
+        }
+
+        Fortran::parser::SourcePosition getLocation(const Fortran::parser::OpenMPConstruct &construct) {
+            // This function is based on the equivalent function in
+            // flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
+            return std::visit(
+                Fortran::common::visitors{
+                    [&](const Fortran::parser::OpenMPStandaloneConstruct &c) -> Fortran::parser::SourcePosition {
+                        return locationFromSource(c.source);
+                    },
+                    // OpenMPSectionsConstruct, OpenMPLoopConstruct,
+                    // OpenMPBlockConstruct, OpenMPCriticalConstruct Get the source from
+                    // the directive field.
+                    [&](const auto &c) -> Fortran::parser::SourcePosition {
+                        const Fortran::parser::CharBlock &source{std::get<0>(c.t).source};
+                        return locationFromSource(source);
+                    },
+                    [&](const Fortran::parser::OpenMPAtomicConstruct &c) -> Fortran::parser::SourcePosition {
+                        return std::visit(
+                            [&](const auto &o) -> Fortran::parser::SourcePosition {
+                                const Fortran::parser::CharBlock &source{
+                                    std::get<Fortran::parser::Verbatim>(o.t).source
+                                };
+                                return locationFromSource(source);
+                            },
+                            c.u);
+                    },
+                    [&](const Fortran::parser::OpenMPSectionConstruct &c) -> Fortran::parser::SourcePosition {
+                        const Fortran::parser::CharBlock &source{c.source};
+                        return locationFromSource(source);
+                    },
+                },
+                construct.u);
+        }
+
+        Fortran::parser::SourcePosition getLocation(const Fortran::parser::OpenACCConstruct &construct) {
+            // This function is based on the equivalent function in
+            // flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
             return std::visit(
                 Fortran::common::visitors{
                     [&](const auto &c) -> Fortran::parser::SourcePosition {
                         return locationFromSource(c.source);
-                    }
+                    },
+                    [&](const Fortran::parser::OpenACCBlockConstruct &c) -> Fortran::parser::SourcePosition {
+                        return locationFromSource(std::get<Fortran::parser::AccBeginBlockDirective>(c.t).source);
+                    },
+                    [&](const Fortran::parser::OpenACCLoopConstruct &c) -> Fortran::parser::SourcePosition {
+                        return locationFromSource(std::get<Fortran::parser::AccBeginLoopDirective>(c.t).source);
+                    },
                 }, construct.u);
+        }
 
-            /*
-            std::variant<Statement<ActionStmt>, common::Indirection<AssociateConstruct>,
-      common::Indirection<BlockConstruct>, common::Indirection<CaseConstruct>,
-      common::Indirection<ChangeTeamConstruct>,
-      common::Indirection<CriticalConstruct>,
-      Statement<common::Indirection<LabelDoStmt>>,
-      Statement<common::Indirection<EndDoStmt>>,
-      common::Indirection<DoConstruct>, common::Indirection<IfConstruct>,
-      common::Indirection<SelectRankConstruct>,
-      common::Indirection<SelectTypeConstruct>,
-      common::Indirection<WhereConstruct>, common::Indirection<ForallConstruct>,
-      common::Indirection<CompilerDirective>,
-      common::Indirection<OpenACCConstruct>,
-      common::Indirection<AccEndCombinedDirective>,
-      common::Indirection<OpenMPConstruct>,
-      common::Indirection<OmpEndLoopDirective>,
-      common::Indirection<CUFKernelDoConstruct>>
-      u;
+        Fortran::parser::SourcePosition getLocation(const Fortran::parser::ExecutableConstruct &construct) {
+            /* Possibilities for ExecutableConstruct:
+                 Statement<ActionStmt>
+                 common::Indirection<AssociateConstruct>
+                 common::Indirection<BlockConstruct>
+                 common::Indirection<CaseConstruct>,
+                 common::Indirection<ChangeTeamConstruct>
+                 common::Indirection<CriticalConstruct>
+                 Statement<common::Indirection<LabelDoStmt>>
+                 Statement<common::Indirection<EndDoStmt>>
+                 common::Indirection<DoConstruct
+                 common::Indirection<IfConstruct>,
+                 common::Indirection<SelectRankConstruct>,
+                 common::Indirection<SelectTypeConstruct>,
+                 common::Indirection<WhereConstruct>
+                 common::Indirection<ForallConstruct>,
+                 common::Indirection<CompilerDirective>,
+                 common::Indirection<OpenACCConstruct>,
+                 common::Indirection<AccEndCombinedDirective>,
+                 common::Indirection<OpenMPConstruct>,
+                 common::Indirection<OmpEndLoopDirective>,
+                 common::Indirection<CUFKernelDoConstruct>
             */
+            return std::visit(
+                Fortran::common::visitors{
+                    [&](const auto &c) -> Fortran::parser::SourcePosition {
+                        return locationFromSource(c.source);
+                    },
+                    [&](const Fortran::common::Indirection<Fortran::parser::CUFKernelDoConstruct> &c) ->
+                Fortran::parser::SourcePosition {
+                        return locationFromSource(std::get<0>(c.value().t).source);
+                    },
+                    [&](const Fortran::common::Indirection<Fortran::parser::OmpEndLoopDirective> &c) ->
+                Fortran::parser::SourcePosition {
+                        return locationFromSource(c.value().source);
+                    },
+                    [&](const Fortran::common::Indirection<Fortran::parser::OpenMPConstruct> &c) ->
+                Fortran::parser::SourcePosition {
+                        return getLocation(c.value());
+                    },
+                    [&](const Fortran::common::Indirection<Fortran::parser::AccEndCombinedDirective> &c) ->
+                Fortran::parser::SourcePosition {
+                        return locationFromSource(c.value().source);
+                    },
+                    [&](const Fortran::common::Indirection<Fortran::parser::OpenACCConstruct> &c) ->
+                Fortran::parser::SourcePosition {
+                        return getLocation(c.value());
+                    },
+                    [&](const Fortran::common::Indirection<Fortran::parser::CompilerDirective> & c)->
+                    Fortran::parser::SourcePosition {
+                        return locationFromSource(c.value().source);
+                    },
+                    [&](const Fortran::common::Indirection<Fortran::parser::ForallConstruct> &c) ->
+                Fortran::parser::SourcePosition {
+                        return locationFromSource(
+                            std::get<Fortran::parser::Statement<Fortran::parser::ForallConstructStmt> >(c.value().t).
+                            source);
+                    },
+                    [&](const Fortran::common::Indirection<Fortran::parser::WhereConstruct> &c) ->
+                Fortran::parser::SourcePosition {
+                        return locationFromSource(
+                            std::get<Fortran::parser::Statement<Fortran::parser::WhereConstructStmt> >(c.value().t).
+                            source);
+                    }
+                }, construct.u);
         }
 
         Fortran::parser::SourcePosition getLocation(const Fortran::parser::ExecutionPartConstruct &construct) {
-            // Possibilities for ExecutionPartConstruct:
-            //   ExecutableConstruct
-            //   Statement<common::Indirection<FormatStmt>>
-            //   Statement<common::Indirection<EntryStmt>>
-            //   Statement<common::Indirection<DataStmt>>
-            //   Statement<common::Indirection<NamelistStmt>>
-            //   ErrorRecovery
+            /* Possibilities for ExecutionPartConstruct:
+             *   ExecutableConstruct
+             *   Statement<common::Indirection<FormatStmt>>
+             *   Statement<common::Indirection<EntryStmt>>
+             *   Statement<common::Indirection<DataStmt>>
+             *   Statement<common::Indirection<NamelistStmt>>
+             *   ErrorRecovery
+             */
             return std::visit(
                 Fortran::common::visitors{
                     [&](const Fortran::parser::ExecutableConstruct &c) -> Fortran::parser::SourcePosition {

From 512b12cae37d127634649ec979311eae33cb6f7d Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Tue, 10 Dec 2024 20:24:35 -0800
Subject: [PATCH 052/135] All cases handled for GetLocation of
 ExecutableConstruct

---
 src/salt_instrument_flang_plugin.cpp | 47 ++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 55f3743..f0dfcd9 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -267,6 +267,53 @@ class SaltInstrumentAction : public PluginParseTreeAction {
                         return locationFromSource(
                             std::get<Fortran::parser::Statement<Fortran::parser::WhereConstructStmt> >(c.value().t).
                             source);
+                    },
+                    [&](const Fortran::common::Indirection<Fortran::parser::SelectTypeConstruct> &c) ->
+                Fortran::parser::SourcePosition {
+                        return locationFromSource(
+                            std::get<Fortran::parser::Statement<Fortran::parser::SelectTypeStmt> >(c.value().t).source);
+                    },
+                    [&](const Fortran::common::Indirection<Fortran::parser::SelectRankConstruct> &c) ->
+                Fortran::parser::SourcePosition {
+                        return locationFromSource(
+                            std::get<Fortran::parser::Statement<Fortran::parser::SelectRankStmt> >(c.value().t).
+                            source);
+                    },
+                    [&](const Fortran::common::Indirection<Fortran::parser::IfConstruct> &c) ->
+                Fortran::parser::SourcePosition {
+                        return locationFromSource(
+                            std::get<Fortran::parser::Statement<Fortran::parser::IfThenStmt> >(c.value().t).source);
+                    },
+                    [&](const Fortran::common::Indirection<Fortran::parser::DoConstruct> &c) ->
+                Fortran::parser::SourcePosition {
+                        return locationFromSource(
+                            std::get<Fortran::parser::Statement<Fortran::parser::NonLabelDoStmt> >(c.value().t).source);
+                    },
+                    [&](const Fortran::common::Indirection<Fortran::parser::CriticalConstruct> &c) ->
+                Fortran::parser::SourcePosition {
+                        return locationFromSource(
+                            std::get<Fortran::parser::Statement<Fortran::parser::CriticalStmt> >(c.value().t).source);
+                    },
+                    [&](const Fortran::common::Indirection<Fortran::parser::ChangeTeamConstruct> &c) ->
+                Fortran::parser::SourcePosition {
+                       return locationFromSource(
+                           std::get<Fortran::parser::Statement<Fortran::parser::ChangeTeamStmt> >(c.value().t).source);
+                    },
+                    [&](const Fortran::common::Indirection<Fortran::parser::CaseConstruct> &c) ->
+                Fortran::parser::SourcePosition {
+                        return locationFromSource(
+                            std::get<Fortran::parser::Statement<Fortran::parser::SelectCaseStmt> >(c.value().t).source);
+                    },
+                    [&](const Fortran::common::Indirection<Fortran::parser::BlockConstruct> &c) ->
+                Fortran::parser::SourcePosition {
+                        return locationFromSource(
+                            std::get<Fortran::parser::Statement<Fortran::parser::BlockStmt> >(c.value().t).source);
+                    },
+                    [&](const Fortran::common::Indirection<Fortran::parser::AssociateConstruct> &c) ->
+                Fortran::parser::SourcePosition {
+                            return locationFromSource(
+                                std::get<Fortran::parser::Statement<Fortran::parser::AssociateStmt> >(c.value().t).
+                                source);
                     }
                 }, construct.u);
         }

From 1359a46631126ebaa28c4574b5003f5d03a23c9c Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Tue, 10 Dec 2024 20:34:42 -0800
Subject: [PATCH 053/135] Plugin prints line number of each
 ExecutionPartConstruct encountered

---
 src/salt_instrument_flang_plugin.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index f0dfcd9..a6254d2 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -354,6 +354,11 @@ class SaltInstrumentAction : public PluginParseTreeAction {
                 return true;
             }
 
+            for (const Fortran::parser::ExecutionPartConstruct & construct : block) {
+                Fortran::parser::SourcePosition loc{getLocation(construct)};
+                llvm::outs() << loc.line << "\n";
+            }
+
             return true;
         }
 

From fe07eb55e5b45062909b142c1aafc46cf9b56905 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Wed, 11 Dec 2024 11:09:21 -0500
Subject: [PATCH 054/135] Revert monkeypatch hacks to make CI work

Now that the docker image is setup correctly remove the monkeypatch
hacks to fix the docker image before running the tests
---
 .github/workflows/CI.yaml | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
index b4c4069..9278d3a 100644
--- a/.github/workflows/CI.yaml
+++ b/.github/workflows/CI.yaml
@@ -11,23 +11,6 @@ jobs:
       - uses: actions/checkout@v3
         with:
           submodules: recursive
-      - name: patch TAU & link flang # This can be deleted once the salt-dev image is updated
-        run: |
-          sed -i 's/#FLANG_NO_MX_OPTS#//' /usr/local/x86_64/lib/Makefile.tau-clang-pthread || echo "sed command returned non-zero status"
-          FLANG_NEW="$(which flang-new)"
-          if ! [ -e /usr/local/bin/flang ]; then
-            echo "Linking flang-new as flang"
-            ln -s "$FLANG_NEW" /usr/local/bin/flang
-            ls -la /usr/local/bin/flang
-          else
-            ls -la /usr/local/bin/flang
-          fi
-          export TAU_MAKEFILE=/usr/local/x86_64/lib/Makefile.tau-clang-pthread
-          tau_f90.sh -show
-          tau_f90.sh --version
-          tau_cxx.sh -show
-          tau_cxx.sh --version
-          unset TAU_MAKEFILE
       - name: Print working directory
         run: |
             echo "Working directory: $(pwd)"

From 71bd8fb97ca67523d361672a11d764e44f6fe25f Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Wed, 11 Dec 2024 11:54:17 -0500
Subject: [PATCH 055/135] Try to improve debugging & output of CI

Also, fail CI when initial build attempt fails and initial testing
attempt fails, even if subsequent attempts pass.
---
 .github/workflows/CI.yaml | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
index 9278d3a..8d4dc99 100644
--- a/.github/workflows/CI.yaml
+++ b/.github/workflows/CI.yaml
@@ -1,7 +1,5 @@
 name: CI
 on: push
-env: 
-  GIT_SSL_NO_VERIFY: true
 jobs:
   build:
     runs-on: ubuntu-latest
@@ -19,8 +17,33 @@ jobs:
       - name: Configure SALT
         run: cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -Wdev -Wdeprecated -G Ninja -S . -B build
       - name: Compile SALT
+        id: build
         run: |
           echo "Cores: $(nproc) "
-          cmake --build build --parallel $(nproc) || cmake --build build --verbose
+          if cmake --build build --parallel $(nproc) ; then
+            echo "build_status=success" >> $GITHUB_OUTPUT
+          else
+            echo "build_status=failure" >> $GITHUB_OUTPUT
+            echo "::error::Initial build failed, re-attempting build with verbose output but failing CI"
+            echo "Initial build failed, re-attempting build with verbose output but failing CI"
+            if ! cmake --build build --verbose ; then
+              echo "Verbose build failed on second attempt"
+              echo "::error::Verbose build failed on second attempt"
+              exit 1
+            fi
+          fi
       - name: Run Tests
-        run: cd build && ctest -j --output-on-failure || ctest --rerun-failed --verbose
+        working-directory: build
+        run: |
+          if ! ctest -j --output-on-failure ; then
+            echo "::error::Tests failed, re-running tests with verbose output"
+            echo "Tests failed, re-running tests with verbose output"
+            if ! ctest --rerun-failed --verbose ; then
+              echo "Rerunning failed tests failed on second attempt"
+              echo "::error::Rerunning failed tests failed on second attempt"
+            fi
+            exit 1
+          fi
+      - name: Fail CI on build failure
+        if: ${{ steps.build.outputs.build_status == 'failure' }}
+        run: echo "Failing build" ; exit 1

From 1adaccbe1488dd8b437187a21c7851ac22f7c93b Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Wed, 11 Dec 2024 11:59:03 -0500
Subject: [PATCH 056/135] See if pulling from ghcr.io is faster than dockerhub

---
 .github/workflows/CI.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
index 8d4dc99..f96c406 100644
--- a/.github/workflows/CI.yaml
+++ b/.github/workflows/CI.yaml
@@ -4,7 +4,7 @@ jobs:
   build:
     runs-on: ubuntu-latest
     container:
-      image: paratools/salt-dev:1.3
+      image: ghcr.io/paratoolsinc/salt-dev:1.3
     steps:
       - uses: actions/checkout@v3
         with:

From d022c364a4c103ac7abc5e2d5cf85170de83b685 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Wed, 11 Dec 2024 15:54:05 -0800
Subject: [PATCH 057/135] Record instrumentation points

---
 src/salt_instrument_flang_plugin.cpp | 88 +++++++++++++++++++++-------
 tests/fortran/emptyprog.f90          |  2 +
 tests/fortran/funcsub.f90            | 33 +++++++++++
 tests/fortran/trivial.f90            |  1 +
 4 files changed, 104 insertions(+), 20 deletions(-)
 create mode 100644 tests/fortran/emptyprog.f90
 create mode 100644 tests/fortran/funcsub.f90
 create mode 100644 tests/fortran/trivial.f90

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index a6254d2..cb51111 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -19,6 +19,13 @@ limitations under the License.
 // See https://flang.llvm.org/docs/FlangDriver.html#frontend-driver-plugins
 // for documentation of the Flang frontend plugin interface
 
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <variant>
+#include <optional>
+#include <tuple>
+
 #include <clang/Basic/SourceLocation.h>
 
 #include "flang/Frontend/FrontendActions.h"
@@ -30,15 +37,17 @@ limitations under the License.
 
 using namespace Fortran::frontend;
 
+// TODO Split declarations into a separate header file.
+
 /**
  * The main action of the Salt instrumentor.
  * Visits each node in the parse tree.
  */
 class SaltInstrumentAction : public PluginParseTreeAction {
     enum class SaltInstrumentationPointType {
-        PROGRAM_BEGIN,
-        PROCEDURE_BEGIN,
-        PROCEDURE_END
+        PROGRAM_BEGIN,    // Declare profiler, initialize TAU, set node, start timer
+        PROCEDURE_BEGIN,  // Declare profiler, start timer
+        PROCEDURE_END     // Stop timer
     };
 
     struct SaltInstrumentationPoint {
@@ -74,12 +83,23 @@ class SaltInstrumentAction : public PluginParseTreeAction {
                 instrumentation_point_type, start_line, timer_name);
         }
 
-        auto &getInstrumentationPoints() {
+        const auto & getInstrumentationPoints() const {
             return instrumentationPoints_;
         }
 
-        Fortran::parser::SourcePosition locationFromSource(const Fortran::parser::CharBlock &charBlock) const {
-            return parsing->allCooked().GetSourcePositionRange(charBlock)->first;
+        /**
+         * From a CharBlock object (generally held in the `source` field of a parse tree node,
+         * get the source position (file, line, column).
+         * If `end` is set, returns the ending position of the block.
+         * If `end` is not set (and by default), returns the starting position of the block.
+         */
+        [[nodiscard]] Fortran::parser::SourcePosition locationFromSource(
+            const Fortran::parser::CharBlock &charBlock, const bool end = false) const {
+            const auto & sourceRange{parsing->allCooked().GetSourcePositionRange(charBlock)};
+            if (end) {
+                return sourceRange->second;
+            }
+            return sourceRange->first;
         }
 
         // Default empty visit functions for otherwise unhandled types.
@@ -88,6 +108,7 @@ class SaltInstrumentAction : public PluginParseTreeAction {
 
         template<typename A>
         static void Post(const A &) {
+            // this space intentionally left blank
         }
 
         // Override all types that we want to visit.
@@ -144,6 +165,8 @@ class SaltInstrumentAction : public PluginParseTreeAction {
             subprogramName_.clear();
         }
 
+        // TODO split location-getting routines into a separate file
+
         Fortran::parser::SourcePosition getLocation(const Fortran::parser::OpenMPDeclarativeConstruct &construct) {
             // This function is based on the equivalent function in
             // flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
@@ -234,7 +257,7 @@ class SaltInstrumentAction : public PluginParseTreeAction {
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::CUFKernelDoConstruct> &c) ->
                 Fortran::parser::SourcePosition {
-                        return locationFromSource(std::get<0>(c.value().t).source);
+                        return locationFromSource(std::get<Fortran::parser::CUFKernelDoConstruct::Directive>(c.value().t).source);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::OmpEndLoopDirective> &c) ->
                 Fortran::parser::SourcePosition {
@@ -342,8 +365,7 @@ class SaltInstrumentAction : public PluginParseTreeAction {
         }
 
         bool Pre(const Fortran::parser::ExecutionPart &executionPart) {
-            (void) executionPart; // TODO handle execution part
-            // Need to get the FIRST and the LAST components
+            // TODO Need to get the FIRST and the LAST components
             // Insert timer start before first component
             // Use main program insert if in main program, else subprogram insert
             // Insert timer end after last component
@@ -351,12 +373,20 @@ class SaltInstrumentAction : public PluginParseTreeAction {
             const Fortran::parser::Block &block = executionPart.v;
             if (block.empty()) {
                 llvm::outs() << "WARNING: Execution part empty.\n";
-                return true;
-            }
-
-            for (const Fortran::parser::ExecutionPartConstruct & construct : block) {
-                Fortran::parser::SourcePosition loc{getLocation(construct)};
-                llvm::outs() << loc.line << "\n";
+            } else {
+                const Fortran::parser::SourcePosition startLoc{getLocation(block.front())};
+                const Fortran::parser::SourcePosition endLoc{getLocation(block.back())};
+                if (isInMainProgram_) {
+                    llvm::outs() << "Program begin \"" << mainProgramName_ << "\" at " << startLoc.line << "\n";
+                    addInstrumentationPoint(SaltInstrumentationPointType::PROGRAM_BEGIN, startLoc.line,
+                                            mainProgramName_);
+                } else{
+                    llvm::outs() << "Subprogram begin \"" << subprogramName_ << "\" at " << startLoc.line << "\n";
+                    addInstrumentationPoint(SaltInstrumentationPointType::PROCEDURE_BEGIN, startLoc.line,
+                                            subprogramName_);
+                }
+                llvm::outs() << "End at " << endLoc.line << "\n";
+                addInstrumentationPoint(SaltInstrumentationPointType::PROCEDURE_END, endLoc.line);
             }
 
             return true;
@@ -373,7 +403,7 @@ class SaltInstrumentAction : public PluginParseTreeAction {
         // Pass in the parser object from the Action to the Visitor
         // so that we can use it while processing parse tree nodes.
         [[maybe_unused]] Fortran::parser::Parsing *parsing{nullptr};
-    };
+    }; // SaltInstrumentParseTreeVisitor
 
     /**
      * Get the source file represented by a given parse tree
@@ -384,7 +414,7 @@ class SaltInstrumentAction : public PluginParseTreeAction {
      */
     static std::optional<std::string> getInputFilePath(Fortran::parser::Parsing &parsing) {
         const auto &allSources{parsing.allCooked().allSources()};
-        if (auto firstProv{allSources.GetFirstFileProvenance()}) {
+        if (const auto firstProv{allSources.GetFirstFileProvenance()}) {
             if (const auto *srcFile{allSources.GetSourceFile(firstProv->start())}) {
                 return srcFile->path();
             }
@@ -392,6 +422,21 @@ class SaltInstrumentAction : public PluginParseTreeAction {
         return std::nullopt;
     }
 
+    static void instrumentFile(const std::string &inputFilePath, llvm::raw_pwrite_stream &outputStream,
+                               const SaltInstrumentParseTreeVisitor &visitor) {
+        std::ifstream inputStream{inputFilePath};
+        if (!inputStream) {
+            llvm::errs() << "ERROR: Could not open input file" << inputFilePath << "\n";
+            std::exit(-2);
+        }
+        std::string line;
+        int lineNum{0};
+        while (std::getline(inputStream, line)) {
+            ++lineNum;
+            outputStream << line << "\n";
+        }
+        (void)lineNum;
+    }
 
     /**
      * This is the entry point for the plugin.
@@ -406,7 +451,7 @@ class SaltInstrumentAction : public PluginParseTreeAction {
         // Get the path to the input file
         const auto inputFilePath = getInputFilePath(parsing);
         if (!inputFilePath) {
-            llvm::outs() << "ERROR: Unable to find input file name!\n";
+            llvm::errs() << "ERROR: Unable to find input file name!\n";
             std::exit(-1);
         }
         llvm::outs() << "Have input file: " << *inputFilePath << "\n";
@@ -422,17 +467,20 @@ class SaltInstrumentAction : public PluginParseTreeAction {
 
         // Open an output file for writing the instrumented code
         const std::string outputFileExtension = "inst."s + inputFileExtension;
-        auto outputFile = createOutputFile(outputFileExtension);
+        const auto outputFileStream = createOutputFile(outputFileExtension);
 
         // Walk the parse tree
         SaltInstrumentParseTreeVisitor visitor{&parsing};
         Walk(parsing.parseTree(), visitor);
 
         // TODO write the instrumented code
+        instrumentFile(*inputFilePath, *outputFileStream, visitor);
+
+        outputFileStream->flush();
 
         llvm::outs() << "==== SALT Instrumentor Plugin finished ====\n";
     }
 };
 
-static FrontendPluginRegistry::Add<SaltInstrumentAction> X(
+[[maybe_unused]] static FrontendPluginRegistry::Add<SaltInstrumentAction> X(
     "salt-instrument", "Apply SALT Instrumentation");
diff --git a/tests/fortran/emptyprog.f90 b/tests/fortran/emptyprog.f90
new file mode 100644
index 0000000..8c5b796
--- /dev/null
+++ b/tests/fortran/emptyprog.f90
@@ -0,0 +1,2 @@
+program empty
+end program
diff --git a/tests/fortran/funcsub.f90 b/tests/fortran/funcsub.f90
new file mode 100644
index 0000000..52b5995
--- /dev/null
+++ b/tests/fortran/funcsub.f90
@@ -0,0 +1,33 @@
+function func(i) result(j)
+    integer, intent (in) :: i ! input
+    integer              :: j ! output
+
+    j = i**2 + i**3
+end function
+
+subroutine square_cube(i, isquare, icube)
+    integer, intent (in)  :: i              ! input
+    integer, intent (out) :: isquare, icube ! output
+
+    isquare = i**2
+    icube   = i**3
+end subroutine
+
+subroutine hello
+  print *, "Hello world"
+end subroutine
+
+program main
+    implicit none
+    external square_cube ! external subroutine
+    integer :: isq, icub
+    integer :: i
+    integer :: func
+
+    call square_cube(4, isq, icub)
+    print *, "i,i^2,i^3=", 4, isq, icub
+
+    i = 3
+    print *, "sum of the square and cube of", i, "is", func(i)
+end program
+
diff --git a/tests/fortran/trivial.f90 b/tests/fortran/trivial.f90
new file mode 100644
index 0000000..a6a9baf
--- /dev/null
+++ b/tests/fortran/trivial.f90
@@ -0,0 +1 @@
+end

From b9ee59599c991f039a32721d377744df78f414d6 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Wed, 11 Dec 2024 20:08:48 -0800
Subject: [PATCH 058/135] Add comments at every instrumentation point.

The last thing to implement is to read the config file and use the
strings defined there in place of the comments.
---
 src/salt_instrument_flang_plugin.cpp | 79 ++++++++++++++++++----------
 1 file changed, 51 insertions(+), 28 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index cb51111..24088e9 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -43,28 +43,34 @@ using namespace Fortran::frontend;
  * The main action of the Salt instrumentor.
  * Visits each node in the parse tree.
  */
-class SaltInstrumentAction : public PluginParseTreeAction {
+class SaltInstrumentAction final : public PluginParseTreeAction {
     enum class SaltInstrumentationPointType {
-        PROGRAM_BEGIN,    // Declare profiler, initialize TAU, set node, start timer
-        PROCEDURE_BEGIN,  // Declare profiler, start timer
-        PROCEDURE_END     // Stop timer
+        PROGRAM_BEGIN, // Declare profiler, initialize TAU, set node, start timer
+        PROCEDURE_BEGIN, // Declare profiler, start timer
+        PROCEDURE_END // Stop timer
     };
 
     struct SaltInstrumentationPoint {
-        SaltInstrumentationPoint(SaltInstrumentationPointType instrumentation_point_type,
-                                 int start_line,
+        SaltInstrumentationPoint(const SaltInstrumentationPointType instrumentation_point_type,
+                                 const int start_line,
                                  const std::optional<std::string> &timer_name = std::nullopt)
             : instrumentationPointType(instrumentation_point_type),
               startLine(start_line),
               timerName(timer_name) {
         }
 
+        [[nodiscard]] bool instrumentBefore() const {
+            return instrumentationPointType == SaltInstrumentationPointType::PROGRAM_BEGIN || instrumentationPointType
+                   == SaltInstrumentationPointType::PROCEDURE_BEGIN;
+        }
+
 
         SaltInstrumentationPointType instrumentationPointType;
         int startLine;
         std::optional<std::string> timerName;
     };
 
+
     struct SaltInstrumentParseTreeVisitor {
         explicit SaltInstrumentParseTreeVisitor(Fortran::parser::Parsing *parsing)
             : parsing(parsing) {
@@ -83,7 +89,7 @@ class SaltInstrumentAction : public PluginParseTreeAction {
                 instrumentation_point_type, start_line, timer_name);
         }
 
-        const auto & getInstrumentationPoints() const {
+        [[nodiscard]] const auto &getInstrumentationPoints() const {
             return instrumentationPoints_;
         }
 
@@ -95,7 +101,7 @@ class SaltInstrumentAction : public PluginParseTreeAction {
          */
         [[nodiscard]] Fortran::parser::SourcePosition locationFromSource(
             const Fortran::parser::CharBlock &charBlock, const bool end = false) const {
-            const auto & sourceRange{parsing->allCooked().GetSourcePositionRange(charBlock)};
+            const auto &sourceRange{parsing->allCooked().GetSourcePositionRange(charBlock)};
             if (end) {
                 return sourceRange->second;
             }
@@ -257,7 +263,8 @@ class SaltInstrumentAction : public PluginParseTreeAction {
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::CUFKernelDoConstruct> &c) ->
                 Fortran::parser::SourcePosition {
-                        return locationFromSource(std::get<Fortran::parser::CUFKernelDoConstruct::Directive>(c.value().t).source);
+                        return locationFromSource(
+                            std::get<Fortran::parser::CUFKernelDoConstruct::Directive>(c.value().t).source);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::OmpEndLoopDirective> &c) ->
                 Fortran::parser::SourcePosition {
@@ -275,8 +282,8 @@ class SaltInstrumentAction : public PluginParseTreeAction {
                 Fortran::parser::SourcePosition {
                         return getLocation(c.value());
                     },
-                    [&](const Fortran::common::Indirection<Fortran::parser::CompilerDirective> & c)->
-                    Fortran::parser::SourcePosition {
+                    [&](const Fortran::common::Indirection<Fortran::parser::CompilerDirective> &c)->
+                Fortran::parser::SourcePosition {
                         return locationFromSource(c.value().source);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::ForallConstruct> &c) ->
@@ -319,8 +326,8 @@ class SaltInstrumentAction : public PluginParseTreeAction {
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::ChangeTeamConstruct> &c) ->
                 Fortran::parser::SourcePosition {
-                       return locationFromSource(
-                           std::get<Fortran::parser::Statement<Fortran::parser::ChangeTeamStmt> >(c.value().t).source);
+                        return locationFromSource(
+                            std::get<Fortran::parser::Statement<Fortran::parser::ChangeTeamStmt> >(c.value().t).source);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::CaseConstruct> &c) ->
                 Fortran::parser::SourcePosition {
@@ -334,9 +341,9 @@ class SaltInstrumentAction : public PluginParseTreeAction {
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::AssociateConstruct> &c) ->
                 Fortran::parser::SourcePosition {
-                            return locationFromSource(
-                                std::get<Fortran::parser::Statement<Fortran::parser::AssociateStmt> >(c.value().t).
-                                source);
+                        return locationFromSource(
+                            std::get<Fortran::parser::Statement<Fortran::parser::AssociateStmt> >(c.value().t).
+                            source);
                     }
                 }, construct.u);
         }
@@ -358,20 +365,14 @@ class SaltInstrumentAction : public PluginParseTreeAction {
                     [&](const auto &c) -> Fortran::parser::SourcePosition {
                         return locationFromSource(c.source);
                     },
-                    [&](const Fortran::parser::ErrorRecovery &c) -> Fortran::parser::SourcePosition {
+                    [&](const Fortran::parser::ErrorRecovery &) -> Fortran::parser::SourcePosition {
                         DIE("Should not encounter ErrorRecovery in parse tree");
                     }
                 }, construct.u);
         }
 
         bool Pre(const Fortran::parser::ExecutionPart &executionPart) {
-            // TODO Need to get the FIRST and the LAST components
-            // Insert timer start before first component
-            // Use main program insert if in main program, else subprogram insert
-            // Insert timer end after last component
-
-            const Fortran::parser::Block &block = executionPart.v;
-            if (block.empty()) {
+            if (const Fortran::parser::Block &block = executionPart.v; block.empty()) {
                 llvm::outs() << "WARNING: Execution part empty.\n";
             } else {
                 const Fortran::parser::SourcePosition startLoc{getLocation(block.front())};
@@ -380,7 +381,7 @@ class SaltInstrumentAction : public PluginParseTreeAction {
                     llvm::outs() << "Program begin \"" << mainProgramName_ << "\" at " << startLoc.line << "\n";
                     addInstrumentationPoint(SaltInstrumentationPointType::PROGRAM_BEGIN, startLoc.line,
                                             mainProgramName_);
-                } else{
+                } else {
                     llvm::outs() << "Subprogram begin \"" << subprogramName_ << "\" at " << startLoc.line << "\n";
                     addInstrumentationPoint(SaltInstrumentationPointType::PROCEDURE_BEGIN, startLoc.line,
                                             subprogramName_);
@@ -422,6 +423,19 @@ class SaltInstrumentAction : public PluginParseTreeAction {
         return std::nullopt;
     }
 
+    [[nodiscard]] static std::string getInstrumentationPointString(SaltInstrumentationPointType type) {
+        switch (type) {
+            case SaltInstrumentationPointType::PROCEDURE_BEGIN:
+                return "! PROCEDURE BEGIN";
+            case SaltInstrumentationPointType::PROGRAM_BEGIN:
+                return "! PROGRAM BEGIN";
+            case SaltInstrumentationPointType::PROCEDURE_END:
+                return "! PROCEDURE END";
+            default:
+                CRASH_NO_CASE;
+        }
+    }
+
     static void instrumentFile(const std::string &inputFilePath, llvm::raw_pwrite_stream &outputStream,
                                const SaltInstrumentParseTreeVisitor &visitor) {
         std::ifstream inputStream{inputFilePath};
@@ -431,11 +445,20 @@ class SaltInstrumentAction : public PluginParseTreeAction {
         }
         std::string line;
         int lineNum{0};
+        const auto &instPts{visitor.getInstrumentationPoints()};
+        auto instIter{instPts.cbegin()};
         while (std::getline(inputStream, line)) {
             ++lineNum;
+            if (instIter != instPts.cend() && instIter->startLine == lineNum && instIter->instrumentBefore()) {
+                outputStream << getInstrumentationPointString(instIter->instrumentationPointType) << "\n";
+                ++instIter;
+            }
             outputStream << line << "\n";
+            if (instIter != instPts.cend() && instIter->startLine == lineNum && !instIter->instrumentBefore()) {
+                outputStream << getInstrumentationPointString(instIter->instrumentationPointType) << "\n";
+                ++instIter;
+            }
         }
-        (void)lineNum;
     }
 
     /**
@@ -469,11 +492,11 @@ class SaltInstrumentAction : public PluginParseTreeAction {
         const std::string outputFileExtension = "inst."s + inputFileExtension;
         const auto outputFileStream = createOutputFile(outputFileExtension);
 
-        // Walk the parse tree
+        // Walk the parse tree -- marks nodes for instrumentation
         SaltInstrumentParseTreeVisitor visitor{&parsing};
         Walk(parsing.parseTree(), visitor);
 
-        // TODO write the instrumented code
+        // Use the instrumentation points stored in the Visitor to write the instrumented file.
         instrumentFile(*inputFilePath, *outputFileStream, visitor);
 
         outputFileStream->flush();

From 1aa4b5e96ec417a74320e9073ee27dd2570b91fc Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Wed, 11 Dec 2024 21:05:49 -0800
Subject: [PATCH 059/135] Fix getting end position of Constructs

Getting the end position of the statement that starts the construct just
gives you the line and column that ends the statement, e.g., for DO it
gives you end of "DO'. To get the end position, we have to instead get
the end position of the end statement, stored in a different field of
the tuple.
---
 src/salt_instrument_flang_plugin.cpp | 187 +++++++++++++++++++--------
 1 file changed, 136 insertions(+), 51 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 24088e9..780e58e 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -100,7 +100,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
          * If `end` is not set (and by default), returns the starting position of the block.
          */
         [[nodiscard]] Fortran::parser::SourcePosition locationFromSource(
-            const Fortran::parser::CharBlock &charBlock, const bool end = false) const {
+            const Fortran::parser::CharBlock &charBlock, const bool end) const {
             const auto &sourceRange{parsing->allCooked().GetSourcePositionRange(charBlock)};
             if (end) {
                 return sourceRange->second;
@@ -173,30 +173,31 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
 
         // TODO split location-getting routines into a separate file
 
-        Fortran::parser::SourcePosition getLocation(const Fortran::parser::OpenMPDeclarativeConstruct &construct) {
+        Fortran::parser::SourcePosition getLocation(const Fortran::parser::OpenMPDeclarativeConstruct &construct,
+                                                    const bool end) {
             // This function is based on the equivalent function in
             // flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
             return std::visit(
                 [&](const auto &o) -> Fortran::parser::SourcePosition {
-                    return locationFromSource(o.source);
+                    return locationFromSource(o.source, end);
                 },
                 construct.u);
         }
 
-        Fortran::parser::SourcePosition getLocation(const Fortran::parser::OpenMPConstruct &construct) {
+        Fortran::parser::SourcePosition getLocation(const Fortran::parser::OpenMPConstruct &construct, const bool end) {
             // This function is based on the equivalent function in
             // flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
             return std::visit(
                 Fortran::common::visitors{
                     [&](const Fortran::parser::OpenMPStandaloneConstruct &c) -> Fortran::parser::SourcePosition {
-                        return locationFromSource(c.source);
+                        return locationFromSource(c.source, end);
                     },
                     // OpenMPSectionsConstruct, OpenMPLoopConstruct,
                     // OpenMPBlockConstruct, OpenMPCriticalConstruct Get the source from
                     // the directive field.
                     [&](const auto &c) -> Fortran::parser::SourcePosition {
                         const Fortran::parser::CharBlock &source{std::get<0>(c.t).source};
-                        return locationFromSource(source);
+                        return locationFromSource(source, end);
                     },
                     [&](const Fortran::parser::OpenMPAtomicConstruct &c) -> Fortran::parser::SourcePosition {
                         return std::visit(
@@ -204,151 +205,232 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                                 const Fortran::parser::CharBlock &source{
                                     std::get<Fortran::parser::Verbatim>(o.t).source
                                 };
-                                return locationFromSource(source);
+                                return locationFromSource(source, end);
                             },
                             c.u);
                     },
                     [&](const Fortran::parser::OpenMPSectionConstruct &c) -> Fortran::parser::SourcePosition {
                         const Fortran::parser::CharBlock &source{c.source};
-                        return locationFromSource(source);
+                        return locationFromSource(source, end);
                     },
                 },
                 construct.u);
         }
 
-        Fortran::parser::SourcePosition getLocation(const Fortran::parser::OpenACCConstruct &construct) {
+        Fortran::parser::SourcePosition
+        getLocation(const Fortran::parser::OpenACCConstruct &construct, const bool end) {
             // This function is based on the equivalent function in
             // flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
             return std::visit(
                 Fortran::common::visitors{
                     [&](const auto &c) -> Fortran::parser::SourcePosition {
-                        return locationFromSource(c.source);
+                        return locationFromSource(c.source, end);
                     },
                     [&](const Fortran::parser::OpenACCBlockConstruct &c) -> Fortran::parser::SourcePosition {
-                        return locationFromSource(std::get<Fortran::parser::AccBeginBlockDirective>(c.t).source);
+                        if (end) {
+                            return locationFromSource(std::get<Fortran::parser::AccEndBlockDirective>(c.t).source,
+                                                      end);
+                        }
+                        return locationFromSource(std::get<Fortran::parser::AccBeginBlockDirective>(c.t).source, end);
                     },
                     [&](const Fortran::parser::OpenACCLoopConstruct &c) -> Fortran::parser::SourcePosition {
-                        return locationFromSource(std::get<Fortran::parser::AccBeginLoopDirective>(c.t).source);
+                        // TODO handle end case (complicated because end statement and do construct are optional)
+                        return locationFromSource(std::get<Fortran::parser::AccBeginLoopDirective>(c.t).source, end);
                     },
                 }, construct.u);
         }
 
-        Fortran::parser::SourcePosition getLocation(const Fortran::parser::ExecutableConstruct &construct) {
+        Fortran::parser::SourcePosition getLocation(const Fortran::parser::ExecutableConstruct &construct,
+                                                    const bool end) {
             /* Possibilities for ExecutableConstruct:
                  Statement<ActionStmt>
                  common::Indirection<AssociateConstruct>
                  common::Indirection<BlockConstruct>
-                 common::Indirection<CaseConstruct>,
+                 common::Indirection<CaseConstruct>
                  common::Indirection<ChangeTeamConstruct>
                  common::Indirection<CriticalConstruct>
                  Statement<common::Indirection<LabelDoStmt>>
                  Statement<common::Indirection<EndDoStmt>>
-                 common::Indirection<DoConstruct
-                 common::Indirection<IfConstruct>,
-                 common::Indirection<SelectRankConstruct>,
-                 common::Indirection<SelectTypeConstruct>,
+                 common::Indirection<DoConstruct>
+                 common::Indirection<IfConstruct>
+                 common::Indirection<SelectRankConstruct>
+                 common::Indirection<SelectTypeConstruct>
                  common::Indirection<WhereConstruct>
-                 common::Indirection<ForallConstruct>,
-                 common::Indirection<CompilerDirective>,
-                 common::Indirection<OpenACCConstruct>,
-                 common::Indirection<AccEndCombinedDirective>,
-                 common::Indirection<OpenMPConstruct>,
-                 common::Indirection<OmpEndLoopDirective>,
+                 common::Indirection<ForallConstruct>
+                 common::Indirection<CompilerDirective>
+                 common::Indirection<OpenACCConstruct>
+                 common::Indirection<AccEndCombinedDirective>
+                 common::Indirection<OpenMPConstruct>
+                 common::Indirection<OmpEndLoopDirective>
                  common::Indirection<CUFKernelDoConstruct>
             */
             return std::visit(
                 Fortran::common::visitors{
                     [&](const auto &c) -> Fortran::parser::SourcePosition {
-                        return locationFromSource(c.source);
+                        return locationFromSource(c.source, end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::CUFKernelDoConstruct> &c) ->
                 Fortran::parser::SourcePosition {
+                        if (end) {
+                            const auto &optionalConstruct = std::get<std::optional<Fortran::parser::DoConstruct> >(
+                                c.value().t);
+                            if (optionalConstruct.has_value()) {
+                                return locationFromSource(
+                                    std::get<Fortran::parser::Statement<Fortran::parser::EndDoStmt> >(
+                                        optionalConstruct.value().t).source, end);
+                            }
+                        }
                         return locationFromSource(
-                            std::get<Fortran::parser::CUFKernelDoConstruct::Directive>(c.value().t).source);
+                            std::get<Fortran::parser::CUFKernelDoConstruct::Directive>(c.value().t).source, end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::OmpEndLoopDirective> &c) ->
                 Fortran::parser::SourcePosition {
-                        return locationFromSource(c.value().source);
+                        return locationFromSource(c.value().source, end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::OpenMPConstruct> &c) ->
                 Fortran::parser::SourcePosition {
-                        return getLocation(c.value());
+                        return getLocation(c.value(), end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::AccEndCombinedDirective> &c) ->
                 Fortran::parser::SourcePosition {
-                        return locationFromSource(c.value().source);
+                        return locationFromSource(c.value().source, end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::OpenACCConstruct> &c) ->
                 Fortran::parser::SourcePosition {
-                        return getLocation(c.value());
+                        return getLocation(c.value(), end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::CompilerDirective> &c)->
                 Fortran::parser::SourcePosition {
-                        return locationFromSource(c.value().source);
+                        return locationFromSource(c.value().source, end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::ForallConstruct> &c) ->
                 Fortran::parser::SourcePosition {
+                        if (end) {
+                            return locationFromSource(
+                                std::get<Fortran::parser::Statement<Fortran::parser::EndForallStmt> >(c.value().t).
+                                source, end);
+                        }
                         return locationFromSource(
                             std::get<Fortran::parser::Statement<Fortran::parser::ForallConstructStmt> >(c.value().t).
-                            source);
+                            source, end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::WhereConstruct> &c) ->
                 Fortran::parser::SourcePosition {
+                        if (end) {
+                            return locationFromSource(
+                                std::get<Fortran::parser::Statement<Fortran::parser::EndWhereStmt> >(c.value().t).
+                                source, end);
+                        }
                         return locationFromSource(
                             std::get<Fortran::parser::Statement<Fortran::parser::WhereConstructStmt> >(c.value().t).
-                            source);
+                            source, end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::SelectTypeConstruct> &c) ->
                 Fortran::parser::SourcePosition {
+                        if (end) {
+                            return locationFromSource(
+                                std::get<Fortran::parser::Statement<Fortran::parser::EndSelectStmt> >(c.value().t).
+                                source, end);
+                        }
                         return locationFromSource(
-                            std::get<Fortran::parser::Statement<Fortran::parser::SelectTypeStmt> >(c.value().t).source);
+                            std::get<Fortran::parser::Statement<Fortran::parser::SelectTypeStmt> >(c.value().t).source,
+                            end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::SelectRankConstruct> &c) ->
                 Fortran::parser::SourcePosition {
+                        if (end) {
+                            return locationFromSource(
+                                std::get<Fortran::parser::Statement<Fortran::parser::EndSelectStmt> >(c.value().t).
+                                source, end);
+                        }
                         return locationFromSource(
                             std::get<Fortran::parser::Statement<Fortran::parser::SelectRankStmt> >(c.value().t).
-                            source);
+                            source, end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::IfConstruct> &c) ->
                 Fortran::parser::SourcePosition {
+                        if (end) {
+                            return locationFromSource(
+                                std::get<Fortran::parser::Statement<Fortran::parser::EndIfStmt> >(c.value().t).source,
+                                end);
+                        }
                         return locationFromSource(
-                            std::get<Fortran::parser::Statement<Fortran::parser::IfThenStmt> >(c.value().t).source);
+                            std::get<Fortran::parser::Statement<Fortran::parser::IfThenStmt> >(c.value().t).source,
+                            end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::DoConstruct> &c) ->
                 Fortran::parser::SourcePosition {
+                        if (end) {
+                            return locationFromSource(
+                                std::get<Fortran::parser::Statement<Fortran::parser::EndDoStmt> >(c.value().t).source,
+                                end);
+                        }
                         return locationFromSource(
-                            std::get<Fortran::parser::Statement<Fortran::parser::NonLabelDoStmt> >(c.value().t).source);
+                            std::get<Fortran::parser::Statement<Fortran::parser::NonLabelDoStmt> >(c.value().t).source,
+                            end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::CriticalConstruct> &c) ->
                 Fortran::parser::SourcePosition {
+                        if (end) {
+                            return locationFromSource(
+                                std::get<Fortran::parser::Statement<Fortran::parser::EndCriticalStmt> >(c.value().t).
+                                source,
+                                end);
+                        }
                         return locationFromSource(
-                            std::get<Fortran::parser::Statement<Fortran::parser::CriticalStmt> >(c.value().t).source);
+                            std::get<Fortran::parser::Statement<Fortran::parser::CriticalStmt> >(c.value().t).source,
+                            end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::ChangeTeamConstruct> &c) ->
                 Fortran::parser::SourcePosition {
+                        if (end) {
+                            return locationFromSource(
+                                std::get<Fortran::parser::Statement<Fortran::parser::EndChangeTeamStmt> >(c.value().t).
+                                source,
+                                end);
+                        }
                         return locationFromSource(
-                            std::get<Fortran::parser::Statement<Fortran::parser::ChangeTeamStmt> >(c.value().t).source);
+                            std::get<Fortran::parser::Statement<Fortran::parser::ChangeTeamStmt> >(c.value().t).source,
+                            end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::CaseConstruct> &c) ->
                 Fortran::parser::SourcePosition {
+                        if (end) {
+                            return locationFromSource(
+                                std::get<Fortran::parser::Statement<Fortran::parser::EndSelectStmt> >(c.value().t).
+                                source, end);
+                        }
                         return locationFromSource(
-                            std::get<Fortran::parser::Statement<Fortran::parser::SelectCaseStmt> >(c.value().t).source);
+                            std::get<Fortran::parser::Statement<Fortran::parser::SelectCaseStmt> >(c.value().t).source,
+                            end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::BlockConstruct> &c) ->
                 Fortran::parser::SourcePosition {
+                        if (end) {
+                            return locationFromSource(
+                                std::get<Fortran::parser::Statement<Fortran::parser::EndBlockStmt> >(c.value().t).
+                                source,
+                                end);
+                        }
                         return locationFromSource(
-                            std::get<Fortran::parser::Statement<Fortran::parser::BlockStmt> >(c.value().t).source);
+                            std::get<Fortran::parser::Statement<Fortran::parser::BlockStmt> >(c.value().t).source, end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::AssociateConstruct> &c) ->
                 Fortran::parser::SourcePosition {
+                        if (end) {
+                            return locationFromSource(
+                                std::get<Fortran::parser::Statement<Fortran::parser::EndAssociateStmt> >(c.value().t).
+                                source, end);
+                        }
                         return locationFromSource(
                             std::get<Fortran::parser::Statement<Fortran::parser::AssociateStmt> >(c.value().t).
-                            source);
+                            source, end);
                     }
                 }, construct.u);
         }
 
-        Fortran::parser::SourcePosition getLocation(const Fortran::parser::ExecutionPartConstruct &construct) {
+        Fortran::parser::SourcePosition getLocation(const Fortran::parser::ExecutionPartConstruct &construct,
+                                                    const bool end) {
             /* Possibilities for ExecutionPartConstruct:
              *   ExecutableConstruct
              *   Statement<common::Indirection<FormatStmt>>
@@ -360,10 +442,10 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             return std::visit(
                 Fortran::common::visitors{
                     [&](const Fortran::parser::ExecutableConstruct &c) -> Fortran::parser::SourcePosition {
-                        return getLocation(c);
+                        return getLocation(c, end);
                     },
                     [&](const auto &c) -> Fortran::parser::SourcePosition {
-                        return locationFromSource(c.source);
+                        return locationFromSource(c.source, end);
                     },
                     [&](const Fortran::parser::ErrorRecovery &) -> Fortran::parser::SourcePosition {
                         DIE("Should not encounter ErrorRecovery in parse tree");
@@ -375,18 +457,21 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             if (const Fortran::parser::Block &block = executionPart.v; block.empty()) {
                 llvm::outs() << "WARNING: Execution part empty.\n";
             } else {
-                const Fortran::parser::SourcePosition startLoc{getLocation(block.front())};
-                const Fortran::parser::SourcePosition endLoc{getLocation(block.back())};
+                llvm::outs() << "ExecutionPart num blocks: " << block.size() << "\n";
+                const Fortran::parser::SourcePosition startLoc{getLocation(block.front(), false)};
+                const Fortran::parser::SourcePosition endLoc{getLocation(block.back(), true)};
                 if (isInMainProgram_) {
-                    llvm::outs() << "Program begin \"" << mainProgramName_ << "\" at " << startLoc.line << "\n";
+                    llvm::outs() << "Program begin \"" << mainProgramName_ << "\" at " << startLoc.line << ", " <<
+                            startLoc.column << "\n";
                     addInstrumentationPoint(SaltInstrumentationPointType::PROGRAM_BEGIN, startLoc.line,
                                             mainProgramName_);
                 } else {
-                    llvm::outs() << "Subprogram begin \"" << subprogramName_ << "\" at " << startLoc.line << "\n";
+                    llvm::outs() << "Subprogram begin \"" << subprogramName_ << "\" at " << startLoc.line << ", " <<
+                            startLoc.column << "\n";
                     addInstrumentationPoint(SaltInstrumentationPointType::PROCEDURE_BEGIN, startLoc.line,
                                             subprogramName_);
                 }
-                llvm::outs() << "End at " << endLoc.line << "\n";
+                llvm::outs() << "End at " << endLoc.line << ", " << endLoc.column << "\n";
                 addInstrumentationPoint(SaltInstrumentationPointType::PROCEDURE_END, endLoc.line);
             }
 
@@ -423,7 +508,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         return std::nullopt;
     }
 
-    [[nodiscard]] static std::string getInstrumentationPointString(SaltInstrumentationPointType type) {
+    [[nodiscard]] static std::string getInstrumentationPointString(const SaltInstrumentationPointType type) {
         switch (type) {
             case SaltInstrumentationPointType::PROCEDURE_BEGIN:
                 return "! PROCEDURE BEGIN";

From e781b9a7809dad85ebcb66eead52c0c470be7578 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Thu, 12 Dec 2024 10:43:05 -0500
Subject: [PATCH 060/135] Add Sameer's myhi.f example

---
 tests/fortran/hello.f90 |  3 ++-
 tests/fortran/myhi.f    | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)
 create mode 100644 tests/fortran/myhi.f

diff --git a/tests/fortran/hello.f90 b/tests/fortran/hello.f90
index 4d18401..0763445 100644
--- a/tests/fortran/hello.f90
+++ b/tests/fortran/hello.f90
@@ -1,3 +1,4 @@
-program hello 
+program hello
+  implicit none
   print *, "Hello world"
 end
diff --git a/tests/fortran/myhi.f b/tests/fortran/myhi.f
new file mode 100644
index 0000000..883e5c4
--- /dev/null
+++ b/tests/fortran/myhi.f
@@ -0,0 +1,22 @@
+cc hello.f
+cc --------
+cc-----------------------------------------------------------------------------
+
+      subroutine HELLOWORLD(iVal)
+        integer iVal
+
+cc Do something here...
+     print *, "Iteration = ", iVal
+cc       HelloWorld = iVal
+      end
+
+      program main
+        integer i
+
+
+      print *, "test program"
+
+        do 10, i = 1, 10
+        call HELLOWORLD(i)
+10      continue
+      end

From 5ca227ebe9ee894170a1adf82d3c7926a53b30fe Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Thu, 12 Dec 2024 09:44:31 -0800
Subject: [PATCH 061/135] Link fortran_config.yaml to tau_fortran_config.yaml

---
 config_files/fortran_config.yaml | 1 +
 1 file changed, 1 insertion(+)
 create mode 120000 config_files/fortran_config.yaml

diff --git a/config_files/fortran_config.yaml b/config_files/fortran_config.yaml
new file mode 120000
index 0000000..d7cf882
--- /dev/null
+++ b/config_files/fortran_config.yaml
@@ -0,0 +1 @@
+tau_fortran_config.yaml
\ No newline at end of file

From e70e67fb8d2a089068d356f0bfe911d38692562c Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Thu, 12 Dec 2024 14:33:42 -0500
Subject: [PATCH 062/135] Handle test directory creation better & pass
 debugging define on debug builds

---
 CMakeLists.txt | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 40368de..1d11e34 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -188,7 +188,7 @@ add_library(SALT_LLVM_TOOLING INTERFACE)
 target_compile_features(SALT_LLVM_TOOLING INTERFACE cxx_std_17)
 target_include_directories(SALT_LLVM_TOOLING INTERFACE ${LLVM_INCLUDE_DIRS})
 target_compile_definitions(SALT_LLVM_TOOLING INTERFACE ${LLVM_DEFINITIONS_LIST})
-target_compile_options(SALT_LLVM_TOOLING INTERFACE ${USE_RTTI} -Wall -Werror -Wpedantic)
+target_compile_options(SALT_LLVM_TOOLING INTERFACE ${USE_RTTI} -Wall -Wpedantic $<$<CONFIG:Debug>:-Wno-gnu-zero-variadic-macro-arguments>)
 target_link_libraries(SALT_LLVM_TOOLING INTERFACE ${CLANG_LIBS} ${LLVM_LIBS})
 
 #---------------------------------
@@ -234,7 +234,8 @@ target_compile_features(cparse-llvm PUBLIC cxx_std_17)
 target_link_libraries(cparse-llvm PUBLIC SALT_LLVM_TOOLING) # Inherit definitions, compile features, etc.
 # You can try adding -static
 target_link_options(cparse-llvm PUBLIC -Wl,--as-needed -Wl,--no-allow-shlib-undefined -Wl,--no-undefined)
-
+# Turn on debug output if a debug build is being built
+target_compile_definitions(cparse-llvm PUBLIC $<$<CONFIG:Debug>:DEBUG_NO_WAY>)
 # Install the target
 install(TARGETS cparse-llvm DESTINATION bin)
 
@@ -545,7 +546,7 @@ function(compile_instrumented test_src)
     # Fixture to cleanup old profile directories
     add_test(NAME rm_old_${lower_comp}_${TEST_NAME}_profiles
       COMMAND
-      ${CMAKE_COMMAND} -E rm -rf ${TEST_BASE_NAME}.${lower_comp}.d
+      ${CMAKE_COMMAND} -E rm -rf ${TEST_BASE_NAME}.d
       WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${comp}
     )
     set_tests_properties(rm_old_${lower_comp}_${TEST_NAME}_profiles
@@ -553,6 +554,16 @@ function(compile_instrumented test_src)
       FIXTURES_SETUP clean_${lower_comp}_${TEST_NAME}_profiles
       DEPENDS setup_${comp}_dir
     )
+    add_test(NAME mkdir_${TEST_BASE_NAME}.${lower_comp}.d
+      COMMAND
+      ${CMAKE_COMMAND} -E make_directory ${TEST_BASE_NAME}.d
+      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${comp}
+    )
+    set_tests_properties(mkdir_${TEST_BASE_NAME}.${lower_comp}.d
+      PROPERTIES
+      FIXTURES_SETUP clean_${lower_comp}_${TEST_NAME}_profiles
+      DEPENDS rm_old_${lower_comp}_${TEST_NAME}_profiles
+    )
     # Profile w/ TAU and Verify profiles are created
     add_test(NAME run_${lower_comp}_${TEST_NAME}
       COMMAND
@@ -561,13 +572,13 @@ function(compile_instrumented test_src)
     )
     set_tests_properties(run_${lower_comp}_${TEST_NAME}
       PROPERTIES
-      ENVIRONMENT "TAU_MAKEFILE=${TAU_${comp}_MAKEFILE};PROFILEDIR=${TEST_BASE_NAME}.${lower_comp}.d"
+      ENVIRONMENT "TAU_MAKEFILE=${TAU_${comp}_MAKEFILE};PROFILEDIR=${TEST_BASE_NAME}.d"
       DEPENDS compile_${lower_comp}_${TEST_NAME}
       FIXTURES_REQUIRED clean_${lower_comp}_${TEST_NAME}_profiles
       FAIL_REGULAR_EXPRESSION "[Cc]ommand not found;[Ss]egmentation;[Ff]ault;[Ee]rror"
     )
     add_test(NAME check_${lower_comp}_${TEST_NAME}_profile
-      COMMAND ${CMAKE_COMMAND} -E cat ./${TEST_BASE_NAME}.${lower_comp}.d/profile.0.0.0
+      COMMAND ${CMAKE_COMMAND} -E cat ./${TEST_BASE_NAME}.d/profile.0.0.0
       WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${comp}
     )
     set_tests_properties(check_${lower_comp}_${TEST_NAME}_profile

From e8454c0785dcf2964b0eef76148571ae7e079560 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Thu, 12 Dec 2024 14:35:42 -0500
Subject: [PATCH 063/135] Fixes #21: TAU_SET_NODE always gets called from main

---
 CMakeLists.txt       |  4 ++--
 src/instrumentor.cpp | 12 +++++++++---
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1d11e34..0a258ac 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -463,7 +463,7 @@ function(add_instrumentor_test test_src)
   set_tests_properties(${TEST_NAME}_exists
     PROPERTIES
     DEPENDS ${TEST_NAME}
-    PASS_REGULAR_EXPRESSION "TAU_"
+    PASS_REGULAR_EXPRESSION "TAU_PROFILE_SET_NODE"
   )
 endfunction()
 
@@ -583,7 +583,7 @@ function(compile_instrumented test_src)
     )
     set_tests_properties(check_${lower_comp}_${TEST_NAME}_profile
       PROPERTIES
-      PASS_REGULAR_EXPRESSION "GROUP=\"TAU_USER"
+      PASS_REGULAR_EXPRESSION "GROUP=\"TAU_DEFAULT\""
       FAIL_REGULAR_EXPRESSION "addr=\<0x"
       DEPENDS run_${lower_comp}_${TEST_NAME}
     )
diff --git a/src/instrumentor.cpp b/src/instrumentor.cpp
index 9ef19ce..97a7212 100644
--- a/src/instrumentor.cpp
+++ b/src/instrumentor.cpp
@@ -85,7 +85,7 @@ void dump_inst_loc(inst_loc *loc)
     DPRINT("\tCol:                    %d\n", loc->col);
     DPRINT("\tKind:                 %s\n", loc_typ_strs[loc->kind]);
     DPRINT("\tRet type:         %s\n", loc->return_type);
-    DPRINT("\tName:                 %s\n", loc->func_name);
+    DPRINT("\tName:                 \"%s\"\n", loc->func_name);
     DPRINT("\tTimer:                    %s\n", loc->full_timer_name);
     DPRINT("\tHas args:         %s\n", loc->has_args ? "Yes" : "No");
     DPRINT("\tIs ret ptr:     %s\n", loc->is_return_ptr ? "Yes" : "No");
@@ -143,10 +143,11 @@ std::string ReplacePhrase(std::string str, std::string phrase, std::string to_re
 
 void make_begin_func_code(inst_loc *loc, std::string &code, ryml::Tree yaml_tree)
 {
-
+    /* dump the location */
+    /* dump_inst_loc(loc); */
     if (!loc->skip)
     {
-        if (strcmp(loc->func_name, "main") == 0 && loc->has_args)
+        if (strcmp(loc->func_name, "main") == 0 )
         {
             // Insert on main function
             for (ryml::NodeRef const& child : yaml_tree["main_insert"].children()) 
@@ -155,6 +156,11 @@ void make_begin_func_code(inst_loc *loc, std::string &code, ryml::Tree yaml_tree
                 ss << child.val();
                 std::string updated_str;
                 updated_str  = ReplacePhrase(ss.str(), "${full_timer_name}", loc->full_timer_name);
+                /* handle the case where main does NOT have arguments */
+                if (!loc->has_args)
+                {
+                    updated_str = ReplacePhrase(updated_str, "    TAU_INIT(&argc, &argv);", "/* TAU_INIT() skipped, no arguments */");
+                }
                 code += updated_str + "\n";
             }
         }

From 894f69ce053281aed2290cddb92bbe58fec82001 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Thu, 12 Dec 2024 12:17:02 -0800
Subject: [PATCH 064/135] Read yaml config file specified by
 SALT_FORTRAN_CONFIG_FILE env var

---
 src/salt_instrument_flang_plugin.cpp | 63 +++++++++++++++++++++++-----
 1 file changed, 52 insertions(+), 11 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 780e58e..2d25b3d 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -21,11 +21,18 @@ limitations under the License.
 
 #include <iostream>
 #include <fstream>
+#include <sstream>
 #include <string>
 #include <variant>
 #include <optional>
 #include <tuple>
 
+
+#define RYML_SINGLE_HDR_DEFINE_NOW
+#define RYML_SHARED
+
+#include <ryml_all.hpp>
+
 #include <clang/Basic/SourceLocation.h>
 
 #include "flang/Frontend/FrontendActions.h"
@@ -35,9 +42,13 @@ limitations under the License.
 #include "flang/Parser/source.h"
 #include "flang/Common/indirection.h"
 
+// TODO Split declarations into a separate header file.
+
+#define SALT_FORTRAN_CONFIG_FILE_VAR "SALT_FORTRAN_CONFIG_FILE"
+#define SALT_FORTRAN_CONFIG_DEFAULT_PATH "config_files/fortran_config.yaml"
+
 using namespace Fortran::frontend;
 
-// TODO Split declarations into a separate header file.
 
 /**
  * The main action of the Salt instrumentor.
@@ -173,8 +184,9 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
 
         // TODO split location-getting routines into a separate file
 
-        Fortran::parser::SourcePosition getLocation(const Fortran::parser::OpenMPDeclarativeConstruct &construct,
-                                                    const bool end) {
+        [[nodiscard]] Fortran::parser::SourcePosition getLocation(
+            const Fortran::parser::OpenMPDeclarativeConstruct &construct,
+            const bool end) {
             // This function is based on the equivalent function in
             // flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
             return std::visit(
@@ -184,7 +196,8 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                 construct.u);
         }
 
-        Fortran::parser::SourcePosition getLocation(const Fortran::parser::OpenMPConstruct &construct, const bool end) {
+        [[nodiscard]] Fortran::parser::SourcePosition getLocation(const Fortran::parser::OpenMPConstruct &construct,
+                                                                  const bool end) {
             // This function is based on the equivalent function in
             // flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
             return std::visit(
@@ -217,7 +230,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                 construct.u);
         }
 
-        Fortran::parser::SourcePosition
+        [[nodiscard]] Fortran::parser::SourcePosition
         getLocation(const Fortran::parser::OpenACCConstruct &construct, const bool end) {
             // This function is based on the equivalent function in
             // flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
@@ -240,8 +253,8 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                 }, construct.u);
         }
 
-        Fortran::parser::SourcePosition getLocation(const Fortran::parser::ExecutableConstruct &construct,
-                                                    const bool end) {
+        [[nodiscard]] Fortran::parser::SourcePosition getLocation(const Fortran::parser::ExecutableConstruct &construct,
+                                                                  const bool end) {
             /* Possibilities for ExecutableConstruct:
                  Statement<ActionStmt>
                  common::Indirection<AssociateConstruct>
@@ -429,8 +442,9 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                 }, construct.u);
         }
 
-        Fortran::parser::SourcePosition getLocation(const Fortran::parser::ExecutionPartConstruct &construct,
-                                                    const bool end) {
+        [[nodiscard]] Fortran::parser::SourcePosition getLocation(
+            const Fortran::parser::ExecutionPartConstruct &construct,
+            const bool end) {
             /* Possibilities for ExecutionPartConstruct:
              *   ExecutableConstruct
              *   Statement<common::Indirection<FormatStmt>>
@@ -488,7 +502,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
 
         // Pass in the parser object from the Action to the Visitor
         // so that we can use it while processing parse tree nodes.
-        [[maybe_unused]] Fortran::parser::Parsing *parsing{nullptr};
+        Fortran::parser::Parsing *parsing{nullptr};
     }; // SaltInstrumentParseTreeVisitor
 
     /**
@@ -498,7 +512,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
      * flang/lib/Semantics/runtime-type-info.cpp for example
      * of getting the source file name.
      */
-    static std::optional<std::string> getInputFilePath(Fortran::parser::Parsing &parsing) {
+    [[nodiscard]] static std::optional<std::string> getInputFilePath(Fortran::parser::Parsing &parsing) {
         const auto &allSources{parsing.allCooked().allSources()};
         if (const auto firstProv{allSources.GetFirstFileProvenance()}) {
             if (const auto *srcFile{allSources.GetSourceFile(firstProv->start())}) {
@@ -508,6 +522,8 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         return std::nullopt;
     }
 
+
+
     [[nodiscard]] static std::string getInstrumentationPointString(const SaltInstrumentationPointType type) {
         switch (type) {
             case SaltInstrumentationPointType::PROCEDURE_BEGIN:
@@ -546,6 +562,26 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         }
     }
 
+    [[nodiscard]] static std::string getConfigPath() {
+        if (const char *val = getenv(SALT_FORTRAN_CONFIG_FILE_VAR)) {
+            return std::string{val};
+        }
+        return SALT_FORTRAN_CONFIG_DEFAULT_PATH;
+    }
+
+    [[nodiscard]] static ryml::Tree getConfigYamlTree(const std::string &configPath) {
+        std::ifstream inputStream{configPath};
+        if (!inputStream) {
+            llvm::errs() << "ERROR: Could not open configuration file " << configPath << "\n"
+                    << "Set " SALT_FORTRAN_CONFIG_FILE_VAR " to path to desired configuration file.\n";
+            std::exit(-3);
+        }
+        std::stringstream configStream;
+        configStream << inputStream.rdbuf();
+        // TODO handle errors if config yaml doesn't parse
+        return ryml::parse_in_arena(ryml::to_csubstr(configStream.str()));
+    }
+
     /**
      * This is the entry point for the plugin.
      */
@@ -564,6 +600,11 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         }
         llvm::outs() << "Have input file: " << *inputFilePath << "\n";
 
+        const std::string configPath{getConfigPath()};
+        ryml::Tree yamlTree = getConfigYamlTree(configPath);
+        //TODO call read yaml func
+
+
         // Get the extension of the input file
         // For input file 'filename.ext' we will output to 'filename.inst.ext'
         std::string inputFileExtension;

From 7f71189619700e05b637a18f36f4d78b96b7acee Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Thu, 12 Dec 2024 15:57:20 -0500
Subject: [PATCH 065/135] Fix myhi.f

---
 tests/fortran/myhi.f | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/fortran/myhi.f b/tests/fortran/myhi.f
index 883e5c4..e200628 100644
--- a/tests/fortran/myhi.f
+++ b/tests/fortran/myhi.f
@@ -6,7 +6,7 @@ subroutine HELLOWORLD(iVal)
         integer iVal
 
 cc Do something here...
-     print *, "Iteration = ", iVal
+         print *, "Iteration = ", iVal
 cc       HelloWorld = iVal
       end
 

From 319c42f7c1b3196db77434af9656460dfead64d7 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Thu, 12 Dec 2024 15:59:19 -0500
Subject: [PATCH 066/135] Add script to wrap the running of the plugin

---
 src/fparse-llvm | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100755 src/fparse-llvm

diff --git a/src/fparse-llvm b/src/fparse-llvm
new file mode 100755
index 0000000..29e298c
--- /dev/null
+++ b/src/fparse-llvm
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+# Copyright (C) 2024, ParaTools, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+set -o errexit
+set -o nounset
+set -o pipefail
+#set -o verbose
+#set -o xtrace
+
+# This script invokes an LLVM flang frontend plugin to parse and instrument Fortran code
+flang-new -fc1 -load ./libsalt-flang-plugin.so -plugin salt-instrument "${@}"

From 4beb1cb902ffeebc054ad4a072f8f230f540e800 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Thu, 12 Dec 2024 16:28:15 -0500
Subject: [PATCH 067/135] Run fparse-llvm isntrumentor on Fortran test files

---
 CMakeLists.txt                      | 27 +++++++++++++++++++++++++++
 src/{fparse-llvm => fparse-llvm.in} |  0
 2 files changed, 27 insertions(+)
 rename src/{fparse-llvm => fparse-llvm.in} (100%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0a258ac..da67055 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -297,6 +297,10 @@ if(MLIR_FOUND AND Flang_FOUND)
 
     install(TARGETS salt-flang-plugin DESTINATION lib)
 
+    configure_file(${CMAKE_SOURCE_DIR}/src/fparse-llvm.in ${CMAKE_BINARY_DIR}/fparse-llvm @ONLY)
+    install(PROGRAMS ${CMAKE_BINARY_DIR}/fparse-llvm
+      TYPE BIN)
+
 else()
     message(STATUS "Flang not found -- skipping Flang frontend plugin")
 endif()
@@ -593,3 +597,26 @@ endfunction()
 foreach(test_source IN LISTS TESTS_LIST)
   compile_instrumented(${test_source})
 endforeach()
+
+# Add some Fortran tests for SALT-FM
+set(FORTRAN_TESTS_SOURCES_LIST
+  myhi.f
+  cubes.f
+  emptyprog.f90
+  funcsub.f90
+  hello.f90
+  loop_test.f90
+  trivial.f90
+)
+
+foreach(test_source IN LISTS FORTRAN_TESTS_SOURCES_LIST)
+  add_test(NAME instrument_${test_source}
+    COMMAND ./fparse-llvm ${CMAKE_SOURCE_DIR}/tests/fortran/${test_source}
+    )
+  set_tests_properties(instrument_${test_source}
+    PROPERTIES
+    REQUIRED_FILES "${CMAKE_BINARY_DIR}/fparse-llvm"
+    ENVIRONMENT "SALT_FORTRAN_CONFIG_FILE=${CMAKE_SOURCE_DIR}/config_files/tau_fortran_config.yaml"
+    PASS_REGULAR_EXPRESSION "SALT Instrumentor Plugin finished"
+  )
+endforeach()
diff --git a/src/fparse-llvm b/src/fparse-llvm.in
similarity index 100%
rename from src/fparse-llvm
rename to src/fparse-llvm.in

From cf5499c388e018739b0a0fb0205e359833f31c2d Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Thu, 12 Dec 2024 14:02:34 -0800
Subject: [PATCH 068/135] Place instrumentation from config file into
 instrumented output

---
 src/salt_instrument_flang_plugin.cpp | 62 ++++++++++++++++++++--------
 1 file changed, 44 insertions(+), 18 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 2d25b3d..6d35aad 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -26,6 +26,7 @@ limitations under the License.
 #include <variant>
 #include <optional>
 #include <tuple>
+#include <regex>
 
 
 #define RYML_SINGLE_HDR_DEFINE_NOW
@@ -47,6 +48,12 @@ limitations under the License.
 #define SALT_FORTRAN_CONFIG_FILE_VAR "SALT_FORTRAN_CONFIG_FILE"
 #define SALT_FORTRAN_CONFIG_DEFAULT_PATH "config_files/fortran_config.yaml"
 
+#define SALT_FORTRAN_PROGRAM_BEGIN_KEY "program_insert"
+#define SALT_FORTRAN_PROCEDURE_BEGIN_KEY "procedure_begin_insert"
+#define SALT_FORTRAN_PROCEDURE_END_KEY "procedure_end_insert"
+
+#define SALT_FORTRAN_TIMER_NAME_TEMPLATE R"(\$\{full_timer_name\})"
+
 using namespace Fortran::frontend;
 
 
@@ -61,6 +68,8 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         PROCEDURE_END // Stop timer
     };
 
+    typedef std::map<SaltInstrumentationPointType, const std::string> InstrumentationMap;
+
     struct SaltInstrumentationPoint {
         SaltInstrumentationPoint(const SaltInstrumentationPointType instrumentation_point_type,
                                  const int start_line,
@@ -523,22 +532,19 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
     }
 
 
-
-    [[nodiscard]] static std::string getInstrumentationPointString(const SaltInstrumentationPointType type) {
-        switch (type) {
-            case SaltInstrumentationPointType::PROCEDURE_BEGIN:
-                return "! PROCEDURE BEGIN";
-            case SaltInstrumentationPointType::PROGRAM_BEGIN:
-                return "! PROGRAM BEGIN";
-            case SaltInstrumentationPointType::PROCEDURE_END:
-                return "! PROCEDURE END";
-            default:
-                CRASH_NO_CASE;
+    [[nodiscard]] static std::string getInstrumentationPointString(const SaltInstrumentationPoint & instPt,
+                                                                   const InstrumentationMap &instMap) {
+        std::string instTemplate = instMap.at(instPt.instrumentationPointType);
+        if (instPt.timerName.has_value()) {
+            instTemplate = std::regex_replace(instTemplate, std::regex(SALT_FORTRAN_TIMER_NAME_TEMPLATE),
+                                              instPt.timerName.value());
         }
+        return instTemplate;
     }
 
     static void instrumentFile(const std::string &inputFilePath, llvm::raw_pwrite_stream &outputStream,
-                               const SaltInstrumentParseTreeVisitor &visitor) {
+                               const SaltInstrumentParseTreeVisitor &visitor,
+                               const InstrumentationMap & instMap) {
         std::ifstream inputStream{inputFilePath};
         if (!inputStream) {
             llvm::errs() << "ERROR: Could not open input file" << inputFilePath << "\n";
@@ -551,12 +557,12 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         while (std::getline(inputStream, line)) {
             ++lineNum;
             if (instIter != instPts.cend() && instIter->startLine == lineNum && instIter->instrumentBefore()) {
-                outputStream << getInstrumentationPointString(instIter->instrumentationPointType) << "\n";
+                outputStream << getInstrumentationPointString(*instIter, instMap) << "\n";
                 ++instIter;
             }
             outputStream << line << "\n";
             if (instIter != instPts.cend() && instIter->startLine == lineNum && !instIter->instrumentBefore()) {
-                outputStream << getInstrumentationPointString(instIter->instrumentationPointType) << "\n";
+                outputStream << getInstrumentationPointString(*instIter, instMap) << "\n";
                 ++instIter;
             }
         }
@@ -582,6 +588,27 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         return ryml::parse_in_arena(ryml::to_csubstr(configStream.str()));
     }
 
+    [[nodiscard]] static InstrumentationMap getInstrumentationMap(const ryml::Tree &tree) {
+        InstrumentationMap map;
+        std::stringstream ss;
+        // TODO validate yaml, print error if field missing
+        for (const ryml::NodeRef child: tree[SALT_FORTRAN_PROGRAM_BEGIN_KEY].children()) {
+            ss << child.val() << "\n";
+        }
+        map.emplace(SaltInstrumentationPointType::PROGRAM_BEGIN, ss.str());
+        ss.str(""s);
+        for (const ryml::NodeRef child: tree[SALT_FORTRAN_PROCEDURE_BEGIN_KEY].children()) {
+            ss << child.val() << "\n";
+        }
+        map.emplace(SaltInstrumentationPointType::PROCEDURE_BEGIN, ss.str());
+        ss.str(""s);
+        for (const ryml::NodeRef child: tree[SALT_FORTRAN_PROCEDURE_END_KEY].children()) {
+            ss << child.val() << "\n";
+        }
+        map.emplace(SaltInstrumentationPointType::PROCEDURE_END, ss.str());
+        return map;
+    }
+
     /**
      * This is the entry point for the plugin.
      */
@@ -601,9 +628,8 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         llvm::outs() << "Have input file: " << *inputFilePath << "\n";
 
         const std::string configPath{getConfigPath()};
-        ryml::Tree yamlTree = getConfigYamlTree(configPath);
-        //TODO call read yaml func
-
+        const ryml::Tree yamlTree = getConfigYamlTree(configPath);
+        const InstrumentationMap instMap = getInstrumentationMap(yamlTree);
 
         // Get the extension of the input file
         // For input file 'filename.ext' we will output to 'filename.inst.ext'
@@ -623,7 +649,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         Walk(parsing.parseTree(), visitor);
 
         // Use the instrumentation points stored in the Visitor to write the instrumented file.
-        instrumentFile(*inputFilePath, *outputFileStream, visitor);
+        instrumentFile(*inputFilePath, *outputFileStream, visitor, instMap);
 
         outputFileStream->flush();
 

From d3f210d79e03fffa9ee4fb18112acf26666a749c Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Thu, 12 Dec 2024 18:05:00 -0500
Subject: [PATCH 069/135] Add more sophisticated command line parsing for
 fparse-llvm script

---
 src/fparse-llvm.in | 71 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 70 insertions(+), 1 deletion(-)

diff --git a/src/fparse-llvm.in b/src/fparse-llvm.in
index 29e298c..5dd07f6 100755
--- a/src/fparse-llvm.in
+++ b/src/fparse-llvm.in
@@ -21,5 +21,74 @@ set -o pipefail
 #set -o verbose
 #set -o xtrace
 
+# Loop over the arguments and check for the output file -o flag and it's argument
+# If found, set the output file name to the argument following the -o flag and remove the -o flag and argument from the argument list
+# If not found, set the output file name to the first argument with a .inst extension
+args=()
+expecting_output_file=false
+show=false
+for arg in "$@"; do
+    echo "working on arg: $arg"
+    if $expecting_output_file; then
+        output_file="$arg"
+        expecting_output_file=false
+        shift || true
+        #echo "args remaining: $*"
+    elif [[ $arg == -o ]]; then
+        shift
+        expecting_output_file=true
+        #echo "args remaining: $*"
+    elif [[ $arg == -o* ]]; then
+        output_file="${arg#-o}"
+        shift || true
+        #echo "args remaining: $*"
+    elif [[ $arg == *.[Ff]90 || $arg == *.[Ff] || $arg == *.[Ff]03 ]]; then
+        input_file="$arg"
+        shift || true
+        #echo "args remaining: $*"
+    elif [[ $arg == -show ]]; then
+        show=true
+        shift || true
+        #echo "args remaining: $*"
+    elif [[ -n ${arg:-} ]]; then
+        args+=("${arg}")
+        shift || true
+        #echo "args remaining: $*"
+    fi
+done
+
+#echo "args: \"${args[*]}\""
+# print the argument list
+if [[ -z "${input_file:-}" ]]; then
+    input_file="${args[0]}"
+    args=("${args[@]:1}")
+fi
+
+echo "input file: ${input_file:-\"<None given>\" }"
+
+# If no output file is given, emit the output file in the current working directory
+if [[ -z "${output_file:-}" ]]; then
+    if [[ ${input_file} == *.* ]]; then
+      file_ext=".${input_file##*.}"
+    else
+        file_ext=""
+    fi
+    if [[ "${input_file}" == */* ]]; then
+        output_file="${input_file%.*}.inst${file_ext}"
+        output_file="$(pwd)/${output_file##*/}"
+    else
+        output_file="$(pwd)/${input_file%.*}.inst${file_ext}"
+    fi
+    
+fi
+echo "output file: ${output_file:-\"<None given>\" }"
+echo "Remaining Arguments: ${args[*]}"
+
 # This script invokes an LLVM flang frontend plugin to parse and instrument Fortran code
-flang-new -fc1 -load ./libsalt-flang-plugin.so -plugin salt-instrument "${@}"
+cmd=(flang-new -fc1 -load ./libsalt-flang-plugin.so -plugin salt-instrument "${input_file}" -o "${output_file}" "${args[@]}")
+if $show; then
+    echo "cmd: ${cmd[*]}"
+else
+    echo "Running: ${cmd[*]}"
+    "${cmd[@]}"
+fi

From 49760c242b14bf19e2315fbf67ebfd11dad554af Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Thu, 12 Dec 2024 20:49:41 -0500
Subject: [PATCH 070/135] Fix/cleanup some of the fortran tests

There were some sneaky tab characters that needed to be removed.
Also, there seems to be a bug in flang that causes Sameer's myhi.f
example to have some issues. Basically, you can't have a construct
ended by a continue statement as the last line of a subprogram or
program. Adding a unlabled continue statement between the continue
that terminates the construct and the end statement corresponding to
the procedure or program fixes this.
---
 tests/fortran/cubes.f | 2 +-
 tests/fortran/myhi.f  | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/fortran/cubes.f b/tests/fortran/cubes.f
index 8592ee7..41987cf 100644
--- a/tests/fortran/cubes.f
+++ b/tests/fortran/cubes.f
@@ -8,7 +8,7 @@ program sum_of_cubes
           do u = 0, 9 
           if (100*h + 10*t + u == h**3 + t**3 + u**3) then
              print "(3I1)", h, t, u 
-	        endif
+          endif
           end do 
         end do 
       end do 
diff --git a/tests/fortran/myhi.f b/tests/fortran/myhi.f
index e200628..e4c6a5b 100644
--- a/tests/fortran/myhi.f
+++ b/tests/fortran/myhi.f
@@ -16,7 +16,8 @@ program main
 
       print *, "test program"
 
-        do 10, i = 1, 10
-        call HELLOWORLD(i)
-10      continue
+      do 10, i = 1, 10
+          call HELLOWORLD(i)
+  10   continue
+      continue
       end

From e870cc4386d094dabfeb7db2ed74536dba49992e Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Thu, 12 Dec 2024 20:54:02 -0500
Subject: [PATCH 071/135] Make flang and gfortran happy about f77 continuation
 lines

GFortran doesn't like when f77 lines are terminated by a line
continuation. Flang is perfectly ok with this. As a work around, you
can put the line continuation inside of the string literal. This works
for flang and gfortran for F77 and modern Fortran.
---
 config_files/tau_fortran_config.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/config_files/tau_fortran_config.yaml b/config_files/tau_fortran_config.yaml
index b26122d..2d6d965 100644
--- a/config_files/tau_fortran_config.yaml
+++ b/config_files/tau_fortran_config.yaml
@@ -6,8 +6,8 @@ instrumentation: tauFortran
 program_insert:
   - "      integer, save :: tauProfileTimer(2) = [0, 0]"
   - "      call TAU_PROFILE_INIT()"
-  - "      call TAU_PROFILE_TIMER(tauProfileTimer, &"
-  - "        \"${full_timer_name}\")"
+  - "      call TAU_PROFILE_TIMER(tauProfileTimer, \"&"
+  - "     &${full_timer_name}\")"
   - "      call TAU_PROFILE_START(tauProfileTimer)"
   - "#ifndef TAU_MPI"
   - "      call  TAU_PROFILE_SET_NODE(0);"
@@ -16,8 +16,8 @@ program_insert:
 
 procedure_begin_insert:
   - "      integer, save :: tauProfileTimer(2) = [0, 0]"
-  - "      call TAU_PROFILE_TIMER(tauProfileTimer, &"
-  - "        \"${full_timer_name}\")"
+  - "      call TAU_PROFILE_TIMER(tauProfileTimer, \"&"
+  - "     &${full_timer_name}\")"
   - "      call TAU_PROFILE_START(tauProfileTimer)"
 
 procedure_end_insert:

From d2eba16c9908078dde023aad0f91eeecf9e1fff1 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Thu, 12 Dec 2024 20:56:36 -0500
Subject: [PATCH 072/135] Add a usage message, handle return codes better &
 smoke test fparse-llvm

---
 CMakeLists.txt     |  9 +++++++++
 src/fparse-llvm.in | 24 ++++++++++++++++++++++--
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index da67055..83d7b91 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -609,6 +609,15 @@ set(FORTRAN_TESTS_SOURCES_LIST
   trivial.f90
 )
 
+# Add a smoke test of the fparse-llvm script
+add_test(NAME fparse_llvm_smoke_test
+  COMMAND ${CMAKE_BINARY_DIR}/fparse-llvm -h)
+set_tests_properties(fparse_llvm_smoke_test
+  PROPERTIES
+  LABELS smoke
+  PASS_REGULAR_EXPRESSION "Usage"
+)
+
 foreach(test_source IN LISTS FORTRAN_TESTS_SOURCES_LIST)
   add_test(NAME instrument_${test_source}
     COMMAND ./fparse-llvm ${CMAKE_SOURCE_DIR}/tests/fortran/${test_source}
diff --git a/src/fparse-llvm.in b/src/fparse-llvm.in
index 5dd07f6..a92fa9a 100755
--- a/src/fparse-llvm.in
+++ b/src/fparse-llvm.in
@@ -21,6 +21,22 @@ set -o pipefail
 #set -o verbose
 #set -o xtrace
 
+# Add a help/usage message function
+function usage {
+    echo "Usage: $0 [-h] [-o output_file] [-show] input_file [args]"
+    echo "         -h: print this help message and exit"    
+    echo "         -o output_file: specify the output file name"
+    echo "         -show: print the command line without running it"
+    echo "          input_file: the Fortran source file to parse and instrument"
+    echo "          args: additional arguments to pass to the flang compiler (include flags etc.)"
+
+}
+
+if [[ $# -eq 0 ]]; then
+    usage
+    exit 1
+fi
+
 # Loop over the arguments and check for the output file -o flag and it's argument
 # If found, set the output file name to the argument following the -o flag and remove the -o flag and argument from the argument list
 # If not found, set the output file name to the first argument with a .inst extension
@@ -28,8 +44,11 @@ args=()
 expecting_output_file=false
 show=false
 for arg in "$@"; do
-    echo "working on arg: $arg"
-    if $expecting_output_file; then
+    #echo "working on arg: $arg"
+    if [[ $arg == -h ]]; then
+        usage
+        exit 0
+    elif $expecting_output_file; then
         output_file="$arg"
         expecting_output_file=false
         shift || true
@@ -91,4 +110,5 @@ if $show; then
 else
     echo "Running: ${cmd[*]}"
     "${cmd[@]}"
+    exit $?
 fi

From 684dc465680ff78c09aecf26bd5d774dd5b38473 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Thu, 12 Dec 2024 20:57:35 -0500
Subject: [PATCH 073/135] Add the first tests compiling instrumented Fortran &
 link against TAU

---
 CMakeLists.txt | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 83d7b91..dfd26bf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -357,6 +357,10 @@ find_program(TAUCXX tau_cxx.sh
   PATHS ${TAU_ARCH_DIR}  PATH_SUFFIXES bin
   REQUIRED
 )
+find_program(TAUF90 tau_f90.sh
+  PATHS ${TAU_ARCH_DIR}  PATH_SUFFIXES bin
+  REQUIRED
+)
 file(GLOB TAU_GCC_LIBUNWIND_DIR ${TAU_ARCH_DIR}/libunwind-*-gcc)
 if(NOT TAU_GCC_LIBUNWIND_DIR)
   message(FATAL_ERROR "libunwind not found for TAU's gcc build")
@@ -629,3 +633,41 @@ foreach(test_source IN LISTS FORTRAN_TESTS_SOURCES_LIST)
     PASS_REGULAR_EXPRESSION "SALT Instrumentor Plugin finished"
   )
 endforeach()
+
+set(fortran_compilers_to_test gfortran flang-new)
+set(TAU_F90_OPTS -optVerbose -optLinkOnly)
+foreach(compiler IN LISTS fortran_compilers_to_test)
+  STRING(TOUPPER ${compiler} upper_comp)
+  add_test(NAME setup_${compiler}_dir
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_BINARY_DIR}/${upper_comp})
+endforeach()
+
+foreach(test_source IN LISTS FORTRAN_TESTS_SOURCES_LIST)
+  # Get the name of the instrumented source file
+  get_filename_component(TEST_BASE_NAME ${test_source} NAME_WLE)
+  get_filename_component(TEST_LANG ${test_source} LAST_EXT)
+  set(TEST_INST_SOURCE ${TEST_BASE_NAME}.inst${TEST_LANG})
+
+  foreach(compiler IN LISTS fortran_compilers_to_test)
+    STRING(TOUPPER ${compiler} upper_comp)
+    if(${compiler} STREQUAL "gfortran")
+      set(mapped_comp GCC)
+    elseif(${compiler} STREQUAL "flang-new")
+      set(mapped_comp CLANG)
+    elseif(${compiler} STREQUAL "flang")
+      set(mapped_comp CLANG)
+    else()
+      message(FATAL_ERROR "Unknown compiler: ${compiler}")
+    endif()
+    add_test(NAME compile_${upper_comp}_${test_source}
+      COMMAND ${TAUF90} ${TAU_F90_OPTS} -o ${TEST_BASE_NAME} ${CMAKE_BINARY_DIR}/${TEST_INST_SOURCE}
+      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${upper_comp}
+      )
+    set_tests_properties(compile_${upper_comp}_${test_source}
+      PROPERTIES
+      ENVIRONMENT "TAU_MAKEFILE=${TAU_${mapped_comp}_MAKEFILE}"
+      DEPENDS "instrument_${test_source};setup_${compiler}_dir"
+      FAIL_REGULAR_EXPRESSION "[Ee]rror"
+    )
+  endforeach()
+endforeach()
\ No newline at end of file

From 35da0ac8421bcd9c66cc95c4a19cba0f83f65840 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Thu, 12 Dec 2024 21:21:05 -0500
Subject: [PATCH 074/135] Add test to profile fortran test code and check
 profiles

---
 CMakeLists.txt | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index dfd26bf..bf3e8aa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -669,5 +669,26 @@ foreach(test_source IN LISTS FORTRAN_TESTS_SOURCES_LIST)
       DEPENDS "instrument_${test_source};setup_${compiler}_dir"
       FAIL_REGULAR_EXPRESSION "[Ee]rror"
     )
+    # Profile with TAU and Verify profiles are created
+    add_test(NAME run_${upper_comp}_${test_source}
+      COMMAND ${TAU_EXEC} -T serial,pthread ./${TEST_BASE_NAME}
+      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${upper_comp}
+    )
+    set_tests_properties(run_${upper_comp}_${test_source}
+      PROPERTIES
+      ENVIRONMENT "TAU_MAKEFILE=${TAU_${mapped_comp}_MAKEFILE};PROFILEDIR=${TEST_BASE_NAME}.d"
+      DEPENDS compile_${upper_comp}_${test_source}
+      FAIL_REGULAR_EXPRESSION "[Cc]ommand not found;[Ss]egmentation;[Ff]ault;[Ee]rror"
+    )
+    add_test(NAME check_${upper_comp}_${test_source}_profile
+      COMMAND ${CMAKE_COMMAND} -E cat ./${TEST_BASE_NAME}.d/profile.0.0.0
+      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${upper_comp}
+    )
+    set_tests_properties(check_${upper_comp}_${test_source}_profile
+      PROPERTIES
+      PASS_REGULAR_EXPRESSION "GROUP=\"TAU_DEFAULT\""
+      FAIL_REGULAR_EXPRESSION "addr=\<0x"
+      DEPENDS run_${upper_comp}_${test_source}
+    )
   endforeach()
 endforeach()
\ No newline at end of file

From 6873bb1b325d230de83265daca94f5e1f969304b Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Fri, 13 Dec 2024 10:23:45 -0500
Subject: [PATCH 075/135] Emit capitalized F file extensions for instrumented
 code

---
 CMakeLists.txt                       | 16 +++++++++++++---
 src/fparse-llvm.in                   |  4 ++--
 src/salt_instrument_flang_plugin.cpp | 10 ++++++++--
 3 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index bf3e8aa..e3e077b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -640,34 +640,44 @@ foreach(compiler IN LISTS fortran_compilers_to_test)
   STRING(TOUPPER ${compiler} upper_comp)
   add_test(NAME setup_${compiler}_dir
     COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_BINARY_DIR}/${upper_comp})
+  set_tests_properties(setup_${compiler}_dir
+    PROPERTIES
+    FIXTURES_SETUP ${upper_comp}_dir
+  )
 endforeach()
 
 foreach(test_source IN LISTS FORTRAN_TESTS_SOURCES_LIST)
   # Get the name of the instrumented source file
   get_filename_component(TEST_BASE_NAME ${test_source} NAME_WLE)
   get_filename_component(TEST_LANG ${test_source} LAST_EXT)
+  # fparse-llvm is adding preprocessor directives and should emit uppercase file extensions (e.g., .F90)
+  string(TOUPPER ${TEST_LANG} TEST_LANG)
   set(TEST_INST_SOURCE ${TEST_BASE_NAME}.inst${TEST_LANG})
 
   foreach(compiler IN LISTS fortran_compilers_to_test)
     STRING(TOUPPER ${compiler} upper_comp)
     if(${compiler} STREQUAL "gfortran")
       set(mapped_comp GCC)
+      set(EXTRA_FLAGS -Wpedantic -Wextra -Wno-missing-include-dirs -Werror)
     elseif(${compiler} STREQUAL "flang-new")
       set(mapped_comp CLANG)
+      set(EXTRA_FLAGS -Werror)
     elseif(${compiler} STREQUAL "flang")
       set(mapped_comp CLANG)
+      set(EXTRA_FLAGS -Werror)
     else()
       message(FATAL_ERROR "Unknown compiler: ${compiler}")
     endif()
     add_test(NAME compile_${upper_comp}_${test_source}
-      COMMAND ${TAUF90} ${TAU_F90_OPTS} -o ${TEST_BASE_NAME} ${CMAKE_BINARY_DIR}/${TEST_INST_SOURCE}
+      COMMAND ${TAUF90} ${TAU_F90_OPTS} -o ${TEST_BASE_NAME} -Wall ${EXTRA_FLAGS} ${CMAKE_BINARY_DIR}/${TEST_INST_SOURCE}
       WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${upper_comp}
       )
     set_tests_properties(compile_${upper_comp}_${test_source}
       PROPERTIES
       ENVIRONMENT "TAU_MAKEFILE=${TAU_${mapped_comp}_MAKEFILE}"
-      DEPENDS "instrument_${test_source};setup_${compiler}_dir"
-      FAIL_REGULAR_EXPRESSION "[Ee]rror"
+      DEPENDS "instrument_${test_source}"
+      FIXTURES_REQUIRED ${upper_comp}_dir
+      FAIL_REGULAR_EXPRESSION "[^W][Ee]rror"
     )
     # Profile with TAU and Verify profiles are created
     add_test(NAME run_${upper_comp}_${test_source}
diff --git a/src/fparse-llvm.in b/src/fparse-llvm.in
index a92fa9a..bbecda5 100755
--- a/src/fparse-llvm.in
+++ b/src/fparse-llvm.in
@@ -93,10 +93,10 @@ if [[ -z "${output_file:-}" ]]; then
         file_ext=""
     fi
     if [[ "${input_file}" == */* ]]; then
-        output_file="${input_file%.*}.inst${file_ext}"
+        output_file="${input_file%.*}.inst${file_ext//f/F}"
         output_file="$(pwd)/${output_file##*/}"
     else
-        output_file="$(pwd)/${input_file%.*}.inst${file_ext}"
+        output_file="$(pwd)/${input_file%.*}.inst${file_ext//f/F}"
     fi
     
 fi
diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 6d35aad..9daf83e 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -632,12 +632,18 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         const InstrumentationMap instMap = getInstrumentationMap(yamlTree);
 
         // Get the extension of the input file
-        // For input file 'filename.ext' we will output to 'filename.inst.ext'
+        // For input file 'filename.ext' we will output to 'filename.inst.Ext'
+        // Since we are adding preprocessor directives in the emitted code,
+        // the file extension should be capitalized.
         std::string inputFileExtension;
         if (auto const extPos = inputFilePath->find_last_of('.'); extPos == std::string::npos) {
-            inputFileExtension = "f90"; // Default if for some reason file has no extension
+            inputFileExtension = "F90"; // Default if for some reason file has no extension
         } else {
             inputFileExtension = inputFilePath->substr(extPos + 1); // Part of string past last '.'
+                // Capitalize the first character of inputFileExtension
+            if (!inputFileExtension.empty()) {
+                inputFileExtension[0] = std::toupper(inputFileExtension[0]);
+            }
         }
 
         // Open an output file for writing the instrumented code

From ab3ca29e05cc09238c583030db232fc49de766f1 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Fri, 13 Dec 2024 15:43:28 -0800
Subject: [PATCH 076/135] Add file name, line numbers to timer names

Not yet handling column numbers, always 1.
End line is assumed to be line after last statement.
---
 src/salt_instrument_flang_plugin.cpp | 36 +++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 9daf83e..ccaee15 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -44,6 +44,7 @@ limitations under the License.
 #include "flang/Common/indirection.h"
 
 // TODO Split declarations into a separate header file.
+// TODO Put debug output behind verbose flag
 
 #define SALT_FORTRAN_CONFIG_FILE_VAR "SALT_FORTRAN_CONFIG_FILE"
 #define SALT_FORTRAN_CONFIG_DEFAULT_PATH "config_files/fortran_config.yaml"
@@ -93,7 +94,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
 
     struct SaltInstrumentParseTreeVisitor {
         explicit SaltInstrumentParseTreeVisitor(Fortran::parser::Parsing *parsing)
-            : parsing(parsing) {
+            : mainProgramLine_(0), subProgramLine_(0), parsing(parsing) {
         }
 
         /**
@@ -165,12 +166,14 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
 
         void Post(const Fortran::parser::ProgramStmt &program) {
             mainProgramName_ = program.v.ToString();
-            //const auto &pos = parsing->allCooked().GetSourcePositionRange(program.v.source);
+            mainProgramLine_ = parsing->allCooked().GetSourcePositionRange(program.v.source)->first.line;
             llvm::outs() << "Enter main program: " << mainProgramName_ << "\n";
         }
 
         bool Pre(const Fortran::parser::SubroutineStmt &subroutineStmt) {
-            subprogramName_ = std::get<Fortran::parser::Name>(subroutineStmt.t).ToString();
+            const auto & name =std::get<Fortran::parser::Name>(subroutineStmt.t);
+            subprogramName_ = name.ToString();
+            subProgramLine_ = parsing->allCooked().GetSourcePositionRange(name.source)->first.line;
             llvm::outs() << "Enter Subroutine: " << subprogramName_ << "\n";
             return true;
         }
@@ -181,7 +184,9 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         }
 
         bool Pre(const Fortran::parser::FunctionStmt &functionStmt) {
-            subprogramName_ = std::get<Fortran::parser::Name>(functionStmt.t).ToString();
+            const auto &name = std::get<Fortran::parser::Name>(functionStmt.t);
+            subprogramName_ = name.ToString();
+            subProgramLine_ = parsing->allCooked().GetSourcePositionRange(name.source)->first.line;
             llvm::outs() << "Enter Function: " << subprogramName_ << "\n";
             return true;
         }
@@ -189,10 +194,14 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         void Post(const Fortran::parser::FunctionSubprogram &) {
             llvm::outs() << "Exit Function: " << subprogramName_ << "\n";
             subprogramName_.clear();
+            subProgramLine_ = 0;
         }
 
         // TODO split location-getting routines into a separate file
 
+        // TODO The source position functions can fail if no source position exists
+        //      Need to handle that case better.
+
         [[nodiscard]] Fortran::parser::SourcePosition getLocation(
             const Fortran::parser::OpenMPDeclarativeConstruct &construct,
             const bool end) {
@@ -483,16 +492,28 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                 llvm::outs() << "ExecutionPart num blocks: " << block.size() << "\n";
                 const Fortran::parser::SourcePosition startLoc{getLocation(block.front(), false)};
                 const Fortran::parser::SourcePosition endLoc{getLocation(block.back(), true)};
+                // TODO this assumes that the program end statement ends the next line after
+                //      the last statement, but there could be whitespace/comments. Need to actually
+                //      find the end statement. End statement may not have source position if name
+                //      not listed -- need to find workaround.
+                std::stringstream ss;
+                ss << (isInMainProgram_ ? mainProgramName_ : subprogramName_);
+                ss << " [{" << startLoc.sourceFile->path() << "} {";
+                ss << (isInMainProgram_ ? mainProgramLine_ : subProgramLine_);
+                ss << ",1}-{"; // TODO column number, first char of program/subroutine/function stmt
+                ss << endLoc.line + 1;
+                ss << ",1}]";  // TODO column number, last char of end stmt
+                const std::string timerName{ss.str()};
                 if (isInMainProgram_) {
                     llvm::outs() << "Program begin \"" << mainProgramName_ << "\" at " << startLoc.line << ", " <<
                             startLoc.column << "\n";
                     addInstrumentationPoint(SaltInstrumentationPointType::PROGRAM_BEGIN, startLoc.line,
-                                            mainProgramName_);
+                                            timerName);
                 } else {
                     llvm::outs() << "Subprogram begin \"" << subprogramName_ << "\" at " << startLoc.line << ", " <<
                             startLoc.column << "\n";
                     addInstrumentationPoint(SaltInstrumentationPointType::PROCEDURE_BEGIN, startLoc.line,
-                                            subprogramName_);
+                                            timerName);
                 }
                 llvm::outs() << "End at " << endLoc.line << ", " << endLoc.column << "\n";
                 addInstrumentationPoint(SaltInstrumentationPointType::PROCEDURE_END, endLoc.line);
@@ -505,7 +526,9 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         // Keeps track of current state of traversal
         bool isInMainProgram_{false};
         std::string mainProgramName_;
+        int mainProgramLine_;
         std::string subprogramName_;
+        int subProgramLine_;
 
         std::vector<SaltInstrumentationPoint> instrumentationPoints_;
 
@@ -627,6 +650,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         }
         llvm::outs() << "Have input file: " << *inputFilePath << "\n";
 
+        // Read and parse the yaml configuration file
         const std::string configPath{getConfigPath()};
         const ryml::Tree yamlTree = getConfigYamlTree(configPath);
         const InstrumentationMap instMap = getInstrumentationMap(yamlTree);

From cfbc5530fc45444ec16c5532d7fab72712c3cb19 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Fri, 13 Dec 2024 15:59:36 -0800
Subject: [PATCH 077/135] Handle case where SALT_FORTRAN_CONFIG_FILE is set,
 but empty.

---
 src/salt_instrument_flang_plugin.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index ccaee15..3ef9bfc 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -592,8 +592,12 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
     }
 
     [[nodiscard]] static std::string getConfigPath() {
+        // If config path env var is set and non-empty, use that;
+        // otherwise use default.
         if (const char *val = getenv(SALT_FORTRAN_CONFIG_FILE_VAR)) {
-            return std::string{val};
+            if (const std::string configPath{val}; !configPath.empty()) {
+                return configPath;
+            }
         }
         return SALT_FORTRAN_CONFIG_DEFAULT_PATH;
     }
@@ -602,7 +606,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         std::ifstream inputStream{configPath};
         if (!inputStream) {
             llvm::errs() << "ERROR: Could not open configuration file " << configPath << "\n"
-                    << "Set " SALT_FORTRAN_CONFIG_FILE_VAR " to path to desired configuration file.\n";
+                    << "Set $" SALT_FORTRAN_CONFIG_FILE_VAR " to path to desired configuration file.\n";
             std::exit(-3);
         }
         std::stringstream configStream;

From f3c6b935eb58387c14393b6e3c59a4a73c041fdb Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Fri, 13 Dec 2024 16:02:45 -0800
Subject: [PATCH 078/135] Cast result of std::toupper to char

Oddly, std::toupper returns int, not char, which makes
clang-tidy complains about the implicit narrowing.
Make the narrowing explicit to silence the warning.
---
 src/salt_instrument_flang_plugin.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 3ef9bfc..25952a1 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -670,7 +670,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             inputFileExtension = inputFilePath->substr(extPos + 1); // Part of string past last '.'
                 // Capitalize the first character of inputFileExtension
             if (!inputFileExtension.empty()) {
-                inputFileExtension[0] = std::toupper(inputFileExtension[0]);
+                inputFileExtension[0] = static_cast<char>(std::toupper(inputFileExtension[0]));
             }
         }
 

From 7854547aa356a7bf37add09d661421e5e3f91c14 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Fri, 13 Dec 2024 21:30:21 -0500
Subject: [PATCH 079/135] Small cleanup to tau_fortran_config.yaml

---
 config_files/tau_fortran_config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/config_files/tau_fortran_config.yaml b/config_files/tau_fortran_config.yaml
index 2d6d965..9f5414d 100644
--- a/config_files/tau_fortran_config.yaml
+++ b/config_files/tau_fortran_config.yaml
@@ -10,8 +10,8 @@ program_insert:
   - "     &${full_timer_name}\")"
   - "      call TAU_PROFILE_START(tauProfileTimer)"
   - "#ifndef TAU_MPI"
-  - "      call  TAU_PROFILE_SET_NODE(0);"
-  - "#endif ! TAU_MPI"
+  - "      call  TAU_PROFILE_SET_NODE(0)"
+  - "#endif"
 
 
 procedure_begin_insert:

From ce2f564758c6cd72414b5a332d468e26f396133a Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Sat, 14 Dec 2024 16:12:36 -0500
Subject: [PATCH 080/135] Split full timer name into 64 character chunks

Use string litteral line continuations compatible with Fortran 77 and
modern free form Fortran.
This ensures old school 72 character line length limits are respected.
---
 config_files/tau_fortran_config.yaml |  8 ++++----
 src/salt_instrument_flang_plugin.cpp | 19 +++++++++++++++++--
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/config_files/tau_fortran_config.yaml b/config_files/tau_fortran_config.yaml
index 9f5414d..8e40eb9 100644
--- a/config_files/tau_fortran_config.yaml
+++ b/config_files/tau_fortran_config.yaml
@@ -6,8 +6,8 @@ instrumentation: tauFortran
 program_insert:
   - "      integer, save :: tauProfileTimer(2) = [0, 0]"
   - "      call TAU_PROFILE_INIT()"
-  - "      call TAU_PROFILE_TIMER(tauProfileTimer, \"&"
-  - "     &${full_timer_name}\")"
+  - "      call TAU_PROFILE_TIMER(tauProfileTimer, \"${full_timer_name}&"
+  - "     &\")"
   - "      call TAU_PROFILE_START(tauProfileTimer)"
   - "#ifndef TAU_MPI"
   - "      call  TAU_PROFILE_SET_NODE(0)"
@@ -16,8 +16,8 @@ program_insert:
 
 procedure_begin_insert:
   - "      integer, save :: tauProfileTimer(2) = [0, 0]"
-  - "      call TAU_PROFILE_TIMER(tauProfileTimer, \"&"
-  - "     &${full_timer_name}\")"
+  - "      call TAU_PROFILE_TIMER(tauProfileTimer, \"${full_timer_name}&"
+  - "     &\")"
   - "      call TAU_PROFILE_START(tauProfileTimer)"
 
 procedure_end_insert:
diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 25952a1..ad47c5c 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -54,6 +54,8 @@ limitations under the License.
 #define SALT_FORTRAN_PROCEDURE_END_KEY "procedure_end_insert"
 
 #define SALT_FORTRAN_TIMER_NAME_TEMPLATE R"(\$\{full_timer_name\})"
+#define SALT_FORTRAN_STRING_SPLITTER "&\n     &"
+#define SALT_F77_LINE_LENGTH 64
 
 using namespace Fortran::frontend;
 
@@ -503,17 +505,30 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                 ss << ",1}-{"; // TODO column number, first char of program/subroutine/function stmt
                 ss << endLoc.line + 1;
                 ss << ",1}]";  // TODO column number, last char of end stmt
+
                 const std::string timerName{ss.str()};
+
+                // Split the timername string so that it will fit between Fortran 77's 72 character limit,
+                // and use character string line continuation syntax compatible with Fortran 77 and modern
+                // Fortran.
+                std::stringstream ss2;
+                for (size_t i = 0; i < timerName.size(); i += SALT_F77_LINE_LENGTH) {
+                    ss2 << SALT_FORTRAN_STRING_SPLITTER;
+                    ss2 << timerName.substr(i, SALT_F77_LINE_LENGTH);
+                }
+
+                const std::string splitTimerName{ss2.str()};
+
                 if (isInMainProgram_) {
                     llvm::outs() << "Program begin \"" << mainProgramName_ << "\" at " << startLoc.line << ", " <<
                             startLoc.column << "\n";
                     addInstrumentationPoint(SaltInstrumentationPointType::PROGRAM_BEGIN, startLoc.line,
-                                            timerName);
+                                            splitTimerName);
                 } else {
                     llvm::outs() << "Subprogram begin \"" << subprogramName_ << "\" at " << startLoc.line << ", " <<
                             startLoc.column << "\n";
                     addInstrumentationPoint(SaltInstrumentationPointType::PROCEDURE_BEGIN, startLoc.line,
-                                            timerName);
+                                            splitTimerName);
                 }
                 llvm::outs() << "End at " << endLoc.line << ", " << endLoc.column << "\n";
                 addInstrumentationPoint(SaltInstrumentationPointType::PROCEDURE_END, endLoc.line);

From 51c594ff51341e1cf2cabd8ce8644e0521ac6134 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Sat, 14 Dec 2024 17:21:26 -0500
Subject: [PATCH 081/135] Add some logic to automatically find the config file

---
 src/fparse-llvm.in | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/src/fparse-llvm.in b/src/fparse-llvm.in
index bbecda5..28a32dd 100755
--- a/src/fparse-llvm.in
+++ b/src/fparse-llvm.in
@@ -21,6 +21,20 @@ set -o pipefail
 #set -o verbose
 #set -o xtrace
 
+readonly _VERSION=@SALT_VERSION_MAJOR@.@SALT_VERSION_MINOR@
+readonly _FORTRAN_CONFIG_FILE_BUILD=@CMAKE_SOURCE_DIR@/config_files/tau_fortran_config.yaml
+readonly _FORTRAN_CONFIG_FILE_INSTALL=@SALT_CONFIGFILES_INSTALL_DIR@/tau_fortran_config.yaml
+readonly _INSTALL_DEST=@CMAKE_INSTALL_PREFIX@
+
+# get the absolute path of this script
+readonly _SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# Check if the script is being run from the install directory
+if [[ -f "${_SCRIPT_DIR}/$0" && "${_SCRIPT_DIR}" == ${_INSTALL_DEST}* ]]; then
+    FORTRAN_CONFIG_FILE="${_FORTRAN_CONFIG_FILE_INSTALL}"
+else
+    FORTRAN_CONFIG_FILE="${_FORTRAN_CONFIG_FILE_BUILD}"
+fi
+
 # Add a help/usage message function
 function usage {
     echo "Usage: $0 [-h] [-o output_file] [-show] input_file [args]"
@@ -69,6 +83,10 @@ for arg in "$@"; do
         show=true
         shift || true
         #echo "args remaining: $*"
+    elif [[ $arg == --config_file=* ]]; then
+        FORTRAN_CONFIG_FILE="${arg#--config_file=}"
+        shift || true
+        #echo "args remaining: $*"
     elif [[ -n ${arg:-} ]]; then
         args+=("${arg}")
         shift || true
@@ -104,11 +122,19 @@ echo "output file: ${output_file:-\"<None given>\" }"
 echo "Remaining Arguments: ${args[*]}"
 
 # This script invokes an LLVM flang frontend plugin to parse and instrument Fortran code
-cmd=(flang-new -fc1 -load ./libsalt-flang-plugin.so -plugin salt-instrument "${input_file}" -o "${output_file}" "${args[@]}")
+cmd=(flang-new
+    -fc1
+    -load ./libsalt-flang-plugin.so
+    -plugin salt-instrument
+    "${input_file}" 
+    -o "${output_file}"
+    "${args[@]}")
 if $show; then
+    echo "SALT_FORTRAN_CONFIG_FILE=\"${FORTRAN_CONFIG_FILE}\""
     echo "cmd: ${cmd[*]}"
 else
+    echo "SALT_FORTRAN_CONFIG_FILE=\"${FORTRAN_CONFIG_FILE}\""
     echo "Running: ${cmd[*]}"
-    "${cmd[@]}"
+    SALT_FORTRAN_CONFIG_FILE="${FORTRAN_CONFIG_FILE}" "${cmd[@]}"
     exit $?
 fi

From 8599e95abe5a3564e925f297ede4eae8182727ac Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Sun, 15 Dec 2024 15:17:55 -0500
Subject: [PATCH 082/135] Unify the TAU configuration file: add a Fortran node

---
 config_files/tau_config.yaml | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/config_files/tau_config.yaml b/config_files/tau_config.yaml
index 4bb77fb..03bb209 100644
--- a/config_files/tau_config.yaml
+++ b/config_files/tau_config.yaml
@@ -22,3 +22,26 @@ function_begin_insert:
 function_end_insert:
   - "TAU_PROFILE_STOP(tautimer);"
 
+Fortran:
+# Config variables:
+#   ${full_timer_name}: "procedure_name [file_path {start}-{end}]"
+  instrumentation: tauFortran
+  program_insert:
+    - "      integer, save :: tauProfileTimer(2) = [0, 0]"
+    - "      call TAU_PROFILE_INIT()"
+    - "      call TAU_PROFILE_TIMER(tauProfileTimer, \"${full_timer_name}&"
+    - "     &\")"
+    - "      call TAU_PROFILE_START(tauProfileTimer)"
+    - "#ifndef TAU_MPI"
+    - "      call  TAU_PROFILE_SET_NODE(0)"
+    - "#endif"
+
+
+  procedure_begin_insert:
+    - "      integer, save :: tauProfileTimer(2) = [0, 0]"
+    - "      call TAU_PROFILE_TIMER(tauProfileTimer, \"${full_timer_name}&"
+    - "     &\")"
+    - "      call TAU_PROFILE_START(tauProfileTimer)"
+
+  procedure_end_insert:
+    - "      call TAU_PROFILE_STOP(tauProfileTimer)"
\ No newline at end of file

From e099a560dcf7a9683983196a0657876cb99a2dd0 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Sun, 15 Dec 2024 15:18:54 -0500
Subject: [PATCH 083/135] Fix a (buffer overflow?) bug

There was either a bug in instrumentor.cpp or there is still a bug in
ryml.
When processing the updated config file that includes C and Fortran,
ryml would throw a parse error and show text appended to the last line
of the config file (that's not actually there) in its error output.
---
 src/instrumentor.cpp | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/instrumentor.cpp b/src/instrumentor.cpp
index 97a7212..9093d1c 100644
--- a/src/instrumentor.cpp
+++ b/src/instrumentor.cpp
@@ -1126,9 +1126,16 @@ void instrumentor::instrument()
         ryml::Tree yaml_tree;
         if (FILE *config_file = fopen(configfile.c_str(), "r"))
         {
-            std::string contents = file_get_contents(config_file);
-            yaml_tree = ryml::parse_in_arena(ryml::to_csubstr(contents));
-            ryml::emit(yaml_tree, yaml_tree.root_id(), config_file);
+            std::ifstream inputStream{configfile.c_str()};
+            if (!inputStream) {
+                llvm::errs() << "ERROR: Could not open configuration file " << configfile.c_str() << "\n";
+                std::exit(-3);
+            }
+            std::stringstream configStream;
+            configStream << inputStream.rdbuf();
+            // TODO handle errors if config yaml doesn't parse
+            yaml_tree = ryml::parse_in_arena(ryml::to_csubstr(configStream.str()));
+
             fclose(config_file);
         }
         else

From 6c2699a23fd5be114902cf183743d19f07f369d6 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Sun, 15 Dec 2024 15:21:31 -0500
Subject: [PATCH 084/135] Put the Fortran instrumentation below a Fortran key
 in the config file

This will help allow the unification of the C/C++ and Fortran config
files.
Add some (probably insufficient) error handling for the returned yaml
nodes.
---
 src/salt_instrument_flang_plugin.cpp | 39 +++++++++++++++++++++++++---
 1 file changed, 35 insertions(+), 4 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index ad47c5c..c295cd4 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -49,6 +49,7 @@ limitations under the License.
 #define SALT_FORTRAN_CONFIG_FILE_VAR "SALT_FORTRAN_CONFIG_FILE"
 #define SALT_FORTRAN_CONFIG_DEFAULT_PATH "config_files/fortran_config.yaml"
 
+#define SALT_FORTRAN_KEY "Fortran"
 #define SALT_FORTRAN_PROGRAM_BEGIN_KEY "program_insert"
 #define SALT_FORTRAN_PROCEDURE_BEGIN_KEY "procedure_begin_insert"
 #define SALT_FORTRAN_PROCEDURE_END_KEY "procedure_end_insert"
@@ -633,21 +634,51 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
     [[nodiscard]] static InstrumentationMap getInstrumentationMap(const ryml::Tree &tree) {
         InstrumentationMap map;
         std::stringstream ss;
-        // TODO validate yaml, print error if field missing
-        for (const ryml::NodeRef child: tree[SALT_FORTRAN_PROGRAM_BEGIN_KEY].children()) {
+
+        // Access the "Fortran" node
+        ryml::NodeRef fortranNode = tree[SALT_FORTRAN_KEY];
+
+        // Validate that the "Fortran" node exists
+        if (!fortranNode.valid()) {
+            llvm::errs() << "ERROR: '" << SALT_FORTRAN_KEY << "' key not found in the configuration file.\n";
+            std::exit(-3);
+        }
+
+        // Access and process the "program_begin_insert" node
+        ryml::NodeRef programBeginNode = fortranNode[SALT_FORTRAN_PROGRAM_BEGIN_KEY];
+        if (!programBeginNode.valid()) {
+            llvm::errs() << "ERROR: '" << SALT_FORTRAN_PROGRAM_BEGIN_KEY << "' key not found under 'Fortran'.\n";
+            std::exit(-3);
+        }
+        for (const ryml::NodeRef child : programBeginNode.children()) {
             ss << child.val() << "\n";
         }
         map.emplace(SaltInstrumentationPointType::PROGRAM_BEGIN, ss.str());
         ss.str(""s);
-        for (const ryml::NodeRef child: tree[SALT_FORTRAN_PROCEDURE_BEGIN_KEY].children()) {
+
+        // Access and process the "procedure_begin_insert" node
+        ryml::NodeRef procedureBeginNode = fortranNode[SALT_FORTRAN_PROCEDURE_BEGIN_KEY];
+        if (!procedureBeginNode.valid()) {
+            llvm::errs() << "ERROR: '" << SALT_FORTRAN_PROCEDURE_BEGIN_KEY << "' key not found under 'Fortran'.\n";
+            std::exit(-3);
+        }
+        for (const ryml::NodeRef child : procedureBeginNode.children()) {
             ss << child.val() << "\n";
         }
         map.emplace(SaltInstrumentationPointType::PROCEDURE_BEGIN, ss.str());
         ss.str(""s);
-        for (const ryml::NodeRef child: tree[SALT_FORTRAN_PROCEDURE_END_KEY].children()) {
+
+        // Access and process the "procedure_end_insert" node
+        ryml::NodeRef procedureEndNode = fortranNode[SALT_FORTRAN_PROCEDURE_END_KEY];
+        if (!procedureEndNode.valid()) {
+            llvm::errs() << "ERROR: '" << SALT_FORTRAN_PROCEDURE_END_KEY << "' key not found under 'Fortran'.\n";
+            std::exit(-3);
+        }
+        for (const ryml::NodeRef child : procedureEndNode.children()) {
             ss << child.val() << "\n";
         }
         map.emplace(SaltInstrumentationPointType::PROCEDURE_END, ss.str());
+
         return map;
     }
 

From c754fd125677cc842e0b18ae98581c34937a6c64 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Sun, 15 Dec 2024 15:31:40 -0500
Subject: [PATCH 085/135] Use a parent node in the config file for the Fortran
 instrumentation

---
 config_files/tau_fortran_config.yaml | 37 ++++++++++++++--------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/config_files/tau_fortran_config.yaml b/config_files/tau_fortran_config.yaml
index 8e40eb9..cc87b4a 100644
--- a/config_files/tau_fortran_config.yaml
+++ b/config_files/tau_fortran_config.yaml
@@ -1,24 +1,23 @@
+Fortran:
 # Config variables:
 #   ${full_timer_name}: "procedure_name [file_path {start}-{end}]"
+  instrumentation: tauFortran
+  program_insert:
+    - "      integer, save :: tauProfileTimer(2) = [0, 0]"
+    - "      call TAU_PROFILE_INIT()"
+    - "      call TAU_PROFILE_TIMER(tauProfileTimer, \"${full_timer_name}&"
+    - "     &\")"
+    - "      call TAU_PROFILE_START(tauProfileTimer)"
+    - "#ifndef TAU_MPI"
+    - "      call  TAU_PROFILE_SET_NODE(0)"
+    - "#endif"
 
-instrumentation: tauFortran
 
-program_insert:
-  - "      integer, save :: tauProfileTimer(2) = [0, 0]"
-  - "      call TAU_PROFILE_INIT()"
-  - "      call TAU_PROFILE_TIMER(tauProfileTimer, \"${full_timer_name}&"
-  - "     &\")"
-  - "      call TAU_PROFILE_START(tauProfileTimer)"
-  - "#ifndef TAU_MPI"
-  - "      call  TAU_PROFILE_SET_NODE(0)"
-  - "#endif"
+  procedure_begin_insert:
+    - "      integer, save :: tauProfileTimer(2) = [0, 0]"
+    - "      call TAU_PROFILE_TIMER(tauProfileTimer, \"${full_timer_name}&"
+    - "     &\")"
+    - "      call TAU_PROFILE_START(tauProfileTimer)"
 
-
-procedure_begin_insert:
-  - "      integer, save :: tauProfileTimer(2) = [0, 0]"
-  - "      call TAU_PROFILE_TIMER(tauProfileTimer, \"${full_timer_name}&"
-  - "     &\")"
-  - "      call TAU_PROFILE_START(tauProfileTimer)"
-
-procedure_end_insert:
-  - "      call TAU_PROFILE_STOP(tauProfileTimer)"
+  procedure_end_insert:
+    - "      call TAU_PROFILE_STOP(tauProfileTimer)"

From 601e2e96bfee048bab469285430f7cc2bc6458b7 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Sun, 15 Dec 2024 15:51:47 -0500
Subject: [PATCH 086/135] Unify TAU config files and remove old Fortran
 specific ones

---
 CMakeLists.txt                       |  2 +-
 config_files/fortran_config.yaml     |  1 -
 config_files/tau_fortran_config.yaml | 23 -----------------------
 src/fparse-llvm.in                   |  4 ++--
 src/salt_instrument_flang_plugin.cpp |  2 +-
 5 files changed, 4 insertions(+), 28 deletions(-)
 delete mode 120000 config_files/fortran_config.yaml
 delete mode 100644 config_files/tau_fortran_config.yaml

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e3e077b..5101802 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -629,7 +629,7 @@ foreach(test_source IN LISTS FORTRAN_TESTS_SOURCES_LIST)
   set_tests_properties(instrument_${test_source}
     PROPERTIES
     REQUIRED_FILES "${CMAKE_BINARY_DIR}/fparse-llvm"
-    ENVIRONMENT "SALT_FORTRAN_CONFIG_FILE=${CMAKE_SOURCE_DIR}/config_files/tau_fortran_config.yaml"
+    ENVIRONMENT "SALT_FORTRAN_CONFIG_FILE=${CMAKE_SOURCE_DIR}/config_files/tau_config.yaml"
     PASS_REGULAR_EXPRESSION "SALT Instrumentor Plugin finished"
   )
 endforeach()
diff --git a/config_files/fortran_config.yaml b/config_files/fortran_config.yaml
deleted file mode 120000
index d7cf882..0000000
--- a/config_files/fortran_config.yaml
+++ /dev/null
@@ -1 +0,0 @@
-tau_fortran_config.yaml
\ No newline at end of file
diff --git a/config_files/tau_fortran_config.yaml b/config_files/tau_fortran_config.yaml
deleted file mode 100644
index cc87b4a..0000000
--- a/config_files/tau_fortran_config.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-Fortran:
-# Config variables:
-#   ${full_timer_name}: "procedure_name [file_path {start}-{end}]"
-  instrumentation: tauFortran
-  program_insert:
-    - "      integer, save :: tauProfileTimer(2) = [0, 0]"
-    - "      call TAU_PROFILE_INIT()"
-    - "      call TAU_PROFILE_TIMER(tauProfileTimer, \"${full_timer_name}&"
-    - "     &\")"
-    - "      call TAU_PROFILE_START(tauProfileTimer)"
-    - "#ifndef TAU_MPI"
-    - "      call  TAU_PROFILE_SET_NODE(0)"
-    - "#endif"
-
-
-  procedure_begin_insert:
-    - "      integer, save :: tauProfileTimer(2) = [0, 0]"
-    - "      call TAU_PROFILE_TIMER(tauProfileTimer, \"${full_timer_name}&"
-    - "     &\")"
-    - "      call TAU_PROFILE_START(tauProfileTimer)"
-
-  procedure_end_insert:
-    - "      call TAU_PROFILE_STOP(tauProfileTimer)"
diff --git a/src/fparse-llvm.in b/src/fparse-llvm.in
index 28a32dd..1c3328f 100755
--- a/src/fparse-llvm.in
+++ b/src/fparse-llvm.in
@@ -22,8 +22,8 @@ set -o pipefail
 #set -o xtrace
 
 readonly _VERSION=@SALT_VERSION_MAJOR@.@SALT_VERSION_MINOR@
-readonly _FORTRAN_CONFIG_FILE_BUILD=@CMAKE_SOURCE_DIR@/config_files/tau_fortran_config.yaml
-readonly _FORTRAN_CONFIG_FILE_INSTALL=@SALT_CONFIGFILES_INSTALL_DIR@/tau_fortran_config.yaml
+readonly _FORTRAN_CONFIG_FILE_BUILD=@CMAKE_SOURCE_DIR@/config_files/tau_config.yaml
+readonly _FORTRAN_CONFIG_FILE_INSTALL=@SALT_CONFIGFILES_INSTALL_DIR@/tau_config.yaml
 readonly _INSTALL_DEST=@CMAKE_INSTALL_PREFIX@
 
 # get the absolute path of this script
diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index c295cd4..37c52e9 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -47,7 +47,7 @@ limitations under the License.
 // TODO Put debug output behind verbose flag
 
 #define SALT_FORTRAN_CONFIG_FILE_VAR "SALT_FORTRAN_CONFIG_FILE"
-#define SALT_FORTRAN_CONFIG_DEFAULT_PATH "config_files/fortran_config.yaml"
+#define SALT_FORTRAN_CONFIG_DEFAULT_PATH "config_files/tau_config.yaml"
 
 #define SALT_FORTRAN_KEY "Fortran"
 #define SALT_FORTRAN_PROGRAM_BEGIN_KEY "program_insert"

From 7e5d16c4f2f164a22e2e70b17850dc604787122c Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Sun, 15 Dec 2024 17:40:56 -0500
Subject: [PATCH 087/135] Make fparse-llvm work with tau_f90.sh

More unification and tidying/cleanup is required
---
 src/fparse-llvm.in | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/fparse-llvm.in b/src/fparse-llvm.in
index 1c3328f..9150558 100755
--- a/src/fparse-llvm.in
+++ b/src/fparse-llvm.in
@@ -39,7 +39,7 @@ fi
 function usage {
     echo "Usage: $0 [-h] [-o output_file] [-show] input_file [args]"
     echo "         -h: print this help message and exit"    
-    echo "         -o output_file: specify the output file name"
+    echo "         --tau_output=output_file: specify the output file name"
     echo "         -show: print the command line without running it"
     echo "          input_file: the Fortran source file to parse and instrument"
     echo "          args: additional arguments to pass to the flang compiler (include flags etc.)"
@@ -54,11 +54,19 @@ fi
 # Loop over the arguments and check for the output file -o flag and it's argument
 # If found, set the output file name to the argument following the -o flag and remove the -o flag and argument from the argument list
 # If not found, set the output file name to the first argument with a .inst extension
+#echo "Passed command line arguments: $*"
+
 args=()
 expecting_output_file=false
+forward_remaining_args=false
 show=false
 for arg in "$@"; do
     #echo "working on arg: $arg"
+    if [[ $forward_remaining_args == true ]]; then
+        args+=("${@}")
+        #echo "args remaining: $*"
+        break
+    fi
     if [[ $arg == -h ]]; then
         usage
         exit 0
@@ -67,7 +75,7 @@ for arg in "$@"; do
         expecting_output_file=false
         shift || true
         #echo "args remaining: $*"
-    elif [[ $arg == -o ]]; then
+    elif [[ $arg == -o || $arg == --tau_output ]]; then
         shift
         expecting_output_file=true
         #echo "args remaining: $*"
@@ -75,6 +83,10 @@ for arg in "$@"; do
         output_file="${arg#-o}"
         shift || true
         #echo "args remaining: $*"
+    elif [[ $arg == --tau_output=* ]]; then
+        output_file="${arg#--tau_output=}"
+        shift || true
+        #echo "args remaining: $*"
     elif [[ $arg == *.[Ff]90 || $arg == *.[Ff] || $arg == *.[Ff]03 ]]; then
         input_file="$arg"
         shift || true
@@ -87,6 +99,10 @@ for arg in "$@"; do
         FORTRAN_CONFIG_FILE="${arg#--config_file=}"
         shift || true
         #echo "args remaining: $*"
+    elif [[ $arg == -- ]]; then
+        shift
+        forward_remaining_args=true
+        #echo "args remaining: $*"
     elif [[ -n ${arg:-} ]]; then
         args+=("${arg}")
         shift || true

From 3d9cc5fffca0917154e41c34ce511aa578c96c24 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Sun, 15 Dec 2024 17:43:08 -0500
Subject: [PATCH 088/135] Add matmult fortran test

(but don't run it in CI or normal testing)
---
 tests/fortran/matmult.f90 | 133 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 133 insertions(+)
 create mode 100644 tests/fortran/matmult.f90

diff --git a/tests/fortran/matmult.f90 b/tests/fortran/matmult.f90
new file mode 100644
index 0000000..e73db34
--- /dev/null
+++ b/tests/fortran/matmult.f90
@@ -0,0 +1,133 @@
+!**********************************************************************
+!     matmult.f90 - simple matrix multiply implementation 
+!************************************************************************
+      subroutine initialize(a, b, n)
+        double precision a(n,n)
+        double precision b(n,n)
+        integer n
+
+! first initialize the A matrix
+        do i = 1,n 
+          do j = 1,n 
+            a(j,i) = i 
+          end do
+        end do
+
+! then initialize the B matrix
+        do i = 1,n 
+          do j = 1,n 
+            b(j,i) = i 
+          end do
+        end do
+
+      end subroutine initialize
+      
+      subroutine multiply_matrices(answer, buffer, b, matsize)
+        double precision buffer(matsize), answer(matsize)
+        double precision b(matsize, matsize)
+        integer i, j
+! multiply the row with the column 
+
+        do i = 1,matsize 
+          answer(i) = 0.0 
+          do j = 1,matsize 
+            answer(i) = answer(i) + buffer(j)*b(j,i) 
+          end do
+        end do
+      end subroutine multiply_matrices
+
+      program main
+      include "mpif.h"
+
+      integer SIZE_OF_MATRIX
+      parameter (SIZE_OF_MATRIX = 1000) 
+! try changing this value to 2000 to get rid of transient effects 
+! at startup
+      double precision a(SIZE_OF_MATRIX,SIZE_OF_MATRIX) 
+      double precision b(SIZE_OF_MATRIX,SIZE_OF_MATRIX) 
+      double precision c(SIZE_OF_MATRIX,SIZE_OF_MATRIX) 
+      double precision buffer(SIZE_OF_MATRIX), answer(SIZE_OF_MATRIX)
+
+      integer myid, master, maxpe, ierr, status(MPI_STATUS_SIZE) 
+      integer i, j, numsent, sender 
+      integer answertype, row, flag
+      integer matsize
+
+      call MPI_INIT( ierr ) 
+      call MPI_COMM_RANK( MPI_COMM_WORLD, myid, ierr ) 
+      call MPI_COMM_SIZE( MPI_COMM_WORLD, maxpe, ierr ) 
+      print *, "Process ", myid, " of ", maxpe, " is active"
+
+      master = 0 
+      matsize = SIZE_OF_MATRIX 
+
+      if ( myid .eq. master ) then 
+! master initializes and then dispatches 
+! initialize a and b 
+        call initialize(a, b, matsize)
+        numsent = 0
+
+! send b to each other process 
+        do i = 1,matsize 
+          call MPI_BCAST(b(1,i), matsize, MPI_DOUBLE_PRECISION, master, &
+             MPI_COMM_WORLD, ierr) 
+        end do
+
+! send a row of a to each other process; tag with row number 
+        do i = 1,maxpe-1 
+          do j = 1,matsize 
+            buffer(j) = a(i,j) 
+          end do
+          call MPI_SEND(buffer, matsize, MPI_DOUBLE_PRECISION, i,       &
+             i, MPI_COMM_WORLD, ierr) 
+          numsent = numsent+1 
+        end do
+
+        do i = 1,matsize 
+          call MPI_RECV(answer, matsize, MPI_DOUBLE_PRECISION,          &
+           MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, status, ierr)
+          sender = status(MPI_SOURCE) 
+          answertype = status(MPI_TAG) 
+          do j = 1,matsize 
+            c(answertype,j) = answer(j) 
+          end do
+
+          if (numsent .lt. matsize) then 
+            do j = 1,matsize 
+              buffer(j) = a(numsent+1,j) 
+            end do
+            call MPI_SEND(buffer, matsize, MPI_DOUBLE_PRECISION, sender,&
+              numsent+1, MPI_COMM_WORLD, ierr) 
+            numsent = numsent+1 
+          else
+            buffer(1) = 1.0 
+            call MPI_SEND(buffer, 1, MPI_DOUBLE_PRECISION, sender, 0,      &
+                 MPI_COMM_WORLD, ierr) 
+          endif 
+        end do
+
+! print out one element of the answer
+        print *, "c(", matsize, ",", matsize, ") = ", c(matsize,matsize)
+      else 
+! workers receive B, then compute rows of C until done message 
+        do i = 1,matsize 
+          call MPI_BCAST(b(1,i), matsize, MPI_DOUBLE_PRECISION, master, &
+                 MPI_COMM_WORLD, ierr) 
+        end do
+        flag = 1
+        do while (flag .ne. 0)
+          call MPI_RECV(buffer, matsize, MPI_DOUBLE_PRECISION, master,  &
+            MPI_ANY_TAG, MPI_COMM_WORLD, status, ierr) 
+          row = status(MPI_TAG) 
+          flag = row
+          if (flag .ne. 0) then
+! multiply the matrices here using C(i,j) += sum (A(i,k)* B(k,j))
+            call multiply_matrices(answer, buffer, b, matsize)
+            call MPI_SEND(answer, matsize, MPI_DOUBLE_PRECISION, master,&
+               row, MPI_COMM_WORLD, ierr) 
+          endif 
+        end do
+      endif
+
+      call MPI_FINALIZE(ierr) 
+      end program main

From bdc3aa7fdbdd038003ccede34d6a07de6fdd9fc4 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Sun, 15 Dec 2024 17:52:46 -0500
Subject: [PATCH 089/135] Hack to get matmult working with tau_f90.sh

Still need to unify C and Fortran instrumentors
---
 build_and_test.sh  | 2 +-
 src/fparse-llvm.in | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/build_and_test.sh b/build_and_test.sh
index b4ebc8e..c608ea5 100755
--- a/build_and_test.sh
+++ b/build_and_test.sh
@@ -10,4 +10,4 @@ set -o verbose
 
 cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -Wdev -Wdeprecated -G Ninja -S . -B build
 cmake --build build --parallel 8 --verbose || cmake --build build --verbose
-( cd build && ( ctest -j --output-on-failure || ctest --rerun-failed --verbose ) )
+#( cd build && ( ctest -j --output-on-failure || ctest --rerun-failed --verbose ) )
diff --git a/src/fparse-llvm.in b/src/fparse-llvm.in
index 9150558..1b12f63 100755
--- a/src/fparse-llvm.in
+++ b/src/fparse-llvm.in
@@ -101,7 +101,10 @@ for arg in "$@"; do
         #echo "args remaining: $*"
     elif [[ $arg == -- ]]; then
         shift
-        forward_remaining_args=true
+        #forward_remaining_args=true
+        #echo "args remaining: $*"
+    elif [[ $arg == -fPIC ]]; then
+        shift || true
         #echo "args remaining: $*"
     elif [[ -n ${arg:-} ]]; then
         args+=("${arg}")

From 8e469e40b2d6c7bc439d8bcf154d4a359dfa8e12 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Sun, 15 Dec 2024 17:55:57 -0500
Subject: [PATCH 090/135] Revert build_and_test.sh script

---
 build_and_test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build_and_test.sh b/build_and_test.sh
index c608ea5..b4ebc8e 100755
--- a/build_and_test.sh
+++ b/build_and_test.sh
@@ -10,4 +10,4 @@ set -o verbose
 
 cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -Wdev -Wdeprecated -G Ninja -S . -B build
 cmake --build build --parallel 8 --verbose || cmake --build build --verbose
-#( cd build && ( ctest -j --output-on-failure || ctest --rerun-failed --verbose ) )
+( cd build && ( ctest -j --output-on-failure || ctest --rerun-failed --verbose ) )

From 9ee13c83b52ed9a2be4bb0098130120b49fd545b Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Sun, 15 Dec 2024 18:22:02 -0500
Subject: [PATCH 091/135] Additional hack to get the tau_f90.sh command line
 working

---
 src/fparse-llvm.in | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/fparse-llvm.in b/src/fparse-llvm.in
index 1b12f63..d8226b1 100755
--- a/src/fparse-llvm.in
+++ b/src/fparse-llvm.in
@@ -106,6 +106,9 @@ for arg in "$@"; do
     elif [[ $arg == -fPIC ]]; then
         shift || true
         #echo "args remaining: $*"
+    elif [[ $arg == -g ]]; then
+        shift || true
+        #echo "args remaining: $*"
     elif [[ -n ${arg:-} ]]; then
         args+=("${arg}")
         shift || true

From df7947ab0cf2612a35a8125c4cfc6d27162eb544 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Sun, 15 Dec 2024 16:08:36 -0800
Subject: [PATCH 092/135] fix tau-version used for testing

---
 activate-salt-fm-env.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/activate-salt-fm-env.sh b/activate-salt-fm-env.sh
index 9759dbd..71015e0 100644
--- a/activate-salt-fm-env.sh
+++ b/activate-salt-fm-env.sh
@@ -19,8 +19,8 @@ module load llvm/git.086d8e6bb5daf8de43880ba90258c49e0fabf2c9_19.1.4-zpacv56
 module load gcc/14.2.0-ttkqi3s
 echo "loading ninja"
 module load ninja
-echo "loading tau"
-module load tau/2.34
+#echo "loading tau"
+module load /packages/salt-fm/modules/tau/2.34
 echo "listing loaded modules:"
 module list
 echo "Finished"

From eea98508fc1151fff904435575bfd86e5b0ab5ac Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Mon, 16 Dec 2024 08:22:54 -0500
Subject: [PATCH 093/135] Fix issue where fparse-llvm can't find plugin so lib

---
 src/fparse-llvm.in | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/fparse-llvm.in b/src/fparse-llvm.in
index d8226b1..12eac17 100755
--- a/src/fparse-llvm.in
+++ b/src/fparse-llvm.in
@@ -21,18 +21,23 @@ set -o pipefail
 #set -o verbose
 #set -o xtrace
 
+readonly _SALTFM_PLUGIN_SO=libsalt-flang-plugin.so
 readonly _VERSION=@SALT_VERSION_MAJOR@.@SALT_VERSION_MINOR@
 readonly _FORTRAN_CONFIG_FILE_BUILD=@CMAKE_SOURCE_DIR@/config_files/tau_config.yaml
 readonly _FORTRAN_CONFIG_FILE_INSTALL=@SALT_CONFIGFILES_INSTALL_DIR@/tau_config.yaml
 readonly _INSTALL_DEST=@CMAKE_INSTALL_PREFIX@
+readonly _SALT_PLUGIN_SO_BUILD=@CMAKE_BINARY_DIR@/${_SALTFM_PLUGIN_SO}
+readonly _SALT_PLUGIN_SO_INSTALL=@SALT_PLUGIN_INSTALL_DIR@/${_SALTFM_PLUGIN_SO}
 
 # get the absolute path of this script
 readonly _SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 # Check if the script is being run from the install directory
 if [[ -f "${_SCRIPT_DIR}/$0" && "${_SCRIPT_DIR}" == ${_INSTALL_DEST}* ]]; then
     FORTRAN_CONFIG_FILE="${_FORTRAN_CONFIG_FILE_INSTALL}"
+    SALT_PLUGIN_SO="${_SALT_PLUGIN_SO_INSTALL}"
 else
     FORTRAN_CONFIG_FILE="${_FORTRAN_CONFIG_FILE_BUILD}"
+    SALT_PLUGIN_SO="${_SALT_PLUGIN_SO_BUILD}"
 fi
 
 # Add a help/usage message function
@@ -146,7 +151,7 @@ echo "Remaining Arguments: ${args[*]}"
 # This script invokes an LLVM flang frontend plugin to parse and instrument Fortran code
 cmd=(flang-new
     -fc1
-    -load ./libsalt-flang-plugin.so
+    -load "${SALT_PLUGIN_SO}"
     -plugin salt-instrument
     "${input_file}" 
     -o "${output_file}"

From fc34baaf6f4cbd9619b5cf941972d11b52d408e2 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Mon, 16 Dec 2024 09:05:45 -0500
Subject: [PATCH 094/135] Sanitize more options passed to fparse-llvm

---
 src/fparse-llvm.in | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/fparse-llvm.in b/src/fparse-llvm.in
index 12eac17..ac398ca 100755
--- a/src/fparse-llvm.in
+++ b/src/fparse-llvm.in
@@ -104,6 +104,7 @@ for arg in "$@"; do
         FORTRAN_CONFIG_FILE="${arg#--config_file=}"
         shift || true
         #echo "args remaining: $*"
+    # Begin sanitizing options/flags that cause the frontend plugin to throw an error
     elif [[ $arg == -- ]]; then
         shift
         #forward_remaining_args=true
@@ -114,6 +115,18 @@ for arg in "$@"; do
     elif [[ $arg == -g ]]; then
         shift || true
         #echo "args remaining: $*"
+    elif [[ $arg == -c ]]; then
+        shift || true
+        #echo "args remaining: $*"
+    elif [[ $arg == -s ]]; then
+        shift || true
+        #echo "args remaining: $*"
+    elif [[ $arg == -shared || $arg == -static ]]; then
+        shift || true
+        #echo "args remaining: $*"
+    elif [[ $arg == -Wl,* ]]; then
+        shift || true
+        #echo "args remaining: $*"
     elif [[ -n ${arg:-} ]]; then
         args+=("${arg}")
         shift || true

From c61452b8cc731ac4262e8f2b9a4423acc22bf9e2 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Mon, 16 Dec 2024 13:57:32 -0500
Subject: [PATCH 095/135] Filter flags & arguments forwarded to flang-new -fc1
 ...

---
 src/fparse-llvm.in | 145 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 113 insertions(+), 32 deletions(-)

diff --git a/src/fparse-llvm.in b/src/fparse-llvm.in
index ac398ca..9712a7c 100755
--- a/src/fparse-llvm.in
+++ b/src/fparse-llvm.in
@@ -56,22 +56,105 @@ if [[ $# -eq 0 ]]; then
     exit 1
 fi
 
+# Set a whitelist of flags that can be passed to the flang frontend plugin
+# This is where libtooling for flang could help
+# List derived from output of `flang-new -fc1 -help`
+declare -a -r _WHITELISTED_FLAGS=( 
+  -cpp
+  -dM
+  -E
+  -falternative-parameter-statement
+  -fbackslash
+  -fcolor-diagnostics
+  -ffixed-form
+  -ffree-form
+  -fimplicit-none
+  -flogical-abbreviations
+  -fno-reformat
+  -fopenacc
+  -fopenmp-is-target-device
+  -fopenmp-target-debug
+  -fopenmp
+  -fsyntax-only
+  -funderscoring
+  -fxor-operator
+  -help
+  -init-only
+  -nocpp
+  -pedantic
+  -pthread
+  -P
+  -save-temps
+  -S
+  -version
+  -w
+  "-ffixed-line-length=.*"
+  "-finput-charset=.*"
+  "-fopenmp-version=.*"
+  "-save-temps=.*"
+  "-std=.*"
+)
+_WHITELISTED_REGEX="(${_WHITELISTED_FLAGS[0]})"
+# Loop over the secend element of _WHITELISTED_FLAGS to the end and add them to the regex
+for flag in "${_WHITELISTED_FLAGS[@]:1}"; do
+    _WHITELISTED_REGEX="${_WHITELISTED_REGEX}|(${flag})"
+done
+declare -r -a _BLACKLISTED_FLAGS=(
+    "-Wl,.*"
+    "--"
+)
+_BLACKLISTED_REGEX="(${_BLACKLISTED_FLAGS[0]})"
+for flag in "${_BLACKLISTED_FLAGS[@]:1}"; do
+    _BLACKLISTED_REGEX="${_BLACKLISTED_REGEX}|(${flag})"
+done
+declare -r -a _WHITELISTED_FLAGS_WARNING_ARG=(
+    "-W[^l].*"
+)
+_WHITELISTED_WARNING_REGEX="(${_WHITELISTED_FLAGS_WARNING_ARG[0]})"
+for flag in "${_WHITELISTED_FLAGS_WARNING_ARG[@]:1}"; do
+    _WHITELISTED_WARNING_REGEX="${_WHITELISTED_WARNING_REGEX}|(${flag})"
+done
+declare -r -a _WHITELISTED_FLAGS_MAYBE_SPACE_ARG=(
+    "-I"
+    "-J"
+    "-D"
+    "-U"
+)
+for flag in "${_WHITELISTED_FLAGS_MAYBE_SPACE_ARG[@]}"; do
+    _WHITELISTED_REGEX="${_WHITELISTED_REGEX}|(${flag}.+)"
+done
+
+declare -r -a _WHITELISTED_FLAGS_YES_SPACE_ARG=(
+    "-module-dir"
+    "-module-suffix"
+    "-x"
+)
+_WHITELISTED_OPT_ARG_REGEX="(${_WHITELISTED_FLAGS_YES_SPACE_ARG[0]})"
+for flag in "${_WHITELISTED_FLAGS_YES_SPACE_ARG[@]:1}"; do
+    _WHITELISTED_OPT_ARG_REGEX="${_WHITELISTED_OPT_ARG_REGEX}|(${flag})"
+done
+for flag in "${_WHITELISTED_FLAGS_MAYBE_SPACE_ARG[@]}"; do
+    _WHITELISTED_OPT_ARG_REGEX="${_WHITELISTED_OPT_ARG_REGEX}|(${flag})"
+done
+
+# echo "WHITELISTED_REGEX: ${_WHITELISTED_REGEX}"
+# echo "BLACKLISTED_REGEX: ${_BLACKLISTED_REGEX}"
+# echo "WHITELISTED_WARNING_REGEX: ${_WHITELISTED_WARNING_REGEX}"
+# echo "WHITELISTED_OPT_ARG_REGEX: ${_WHITELISTED_OPT_ARG_REGEX}"
+
+
 # Loop over the arguments and check for the output file -o flag and it's argument
 # If found, set the output file name to the argument following the -o flag and remove the -o flag and argument from the argument list
 # If not found, set the output file name to the first argument with a .inst extension
 #echo "Passed command line arguments: $*"
 
 args=()
+original_args=("$@")
 expecting_output_file=false
-forward_remaining_args=false
+expecting_arg_to_forward=false
 show=false
 for arg in "$@"; do
     #echo "working on arg: $arg"
-    if [[ $forward_remaining_args == true ]]; then
-        args+=("${@}")
-        #echo "args remaining: $*"
-        break
-    fi
     if [[ $arg == -h ]]; then
         usage
         exit 0
@@ -80,13 +163,14 @@ for arg in "$@"; do
         expecting_output_file=false
         shift || true
         #echo "args remaining: $*"
-    elif [[ $arg == -o || $arg == --tau_output ]]; then
+    elif $expecting_arg_to_forward; then
+        args+=("$arg")
+        expecting_arg_to_forward=false
         shift
-        expecting_output_file=true
         #echo "args remaining: $*"
-    elif [[ $arg == -o* ]]; then
-        output_file="${arg#-o}"
-        shift || true
+    elif [[ $arg == --tau_output ]]; then
+        shift
+        expecting_output_file=true
         #echo "args remaining: $*"
     elif [[ $arg == --tau_output=* ]]; then
         output_file="${arg#--tau_output=}"
@@ -105,32 +189,29 @@ for arg in "$@"; do
         shift || true
         #echo "args remaining: $*"
     # Begin sanitizing options/flags that cause the frontend plugin to throw an error
-    elif [[ $arg == -- ]]; then
-        shift
-        #forward_remaining_args=true
-        #echo "args remaining: $*"
-    elif [[ $arg == -fPIC ]]; then
-        shift || true
-        #echo "args remaining: $*"
-    elif [[ $arg == -g ]]; then
-        shift || true
-        #echo "args remaining: $*"
-    elif [[ $arg == -c ]]; then
+    elif [[ ${arg:-} =~ $_BLACKLISTED_REGEX ]]; then
         shift || true
-        #echo "args remaining: $*"
-    elif [[ $arg == -s ]]; then
+        echo "Removed blacklisted flag: $arg"
+    elif [[ "${arg:-}" =~ $_WHITELISTED_WARNING_REGEX ]]; then
+        # Only safe to do after ensuring -Wl,* blacklisted flag is removed
+        if [[ "${arg:-}" == -Werror ]]; then
+            args+=("$arg")
+        fi
+        # Flang frontend oesn't (yet) support -Wall, -Wextra, etc. only -Werror, so throw others away
         shift || true
-        #echo "args remaining: $*"
-    elif [[ $arg == -shared || $arg == -static ]]; then
+        #echo "Added whitelisted warning flag: $arg"
+    elif [[ ${arg:-} =~ ${_WHITELISTED_REGEX} ]]; then
+        args+=("$arg")
         shift || true
-        #echo "args remaining: $*"
-    elif [[ $arg == -Wl,* ]]; then
+        #echo "Added whitelisted flag: $arg"
+    elif [[ ${arg:-} =~ ${_WHITELISTED_OPT_ARG_REGEX} ]]; then
+        args+=("$arg")
+        expecting_arg_to_forward=true
         shift || true
-        #echo "args remaining: $*"
-    elif [[ -n ${arg:-} ]]; then
-        args+=("${arg}")
+        #echo "Added whitelisted flag with argument: $arg"
+    else
+        echo "Removed unknown flag: $arg"
         shift || true
-        #echo "args remaining: $*"
     fi
 done
 

From 49a1b74e57f5733edc2dcaf010f74ea1af874cc5 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Mon, 16 Dec 2024 11:19:39 -0800
Subject: [PATCH 096/135] Instrument return statements.

- Splits handling of Pre and Post for ExecutionPart so that children
(where ReturnStmt is found) are visited before writing the end
instrumentation for the (sub)program. This ensures that instrumentation
points are added in order.

- Visit ExecutableConstruct and access ReturnStmt through it. This is
necessary to get the source location because the location is held in a
wrapper of ReturnStmt rather than in it directly.
---
 src/salt_instrument_flang_plugin.cpp | 115 ++++++++++++++++++---------
 1 file changed, 76 insertions(+), 39 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 37c52e9..dcbb2ae 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -69,7 +69,8 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
     enum class SaltInstrumentationPointType {
         PROGRAM_BEGIN, // Declare profiler, initialize TAU, set node, start timer
         PROCEDURE_BEGIN, // Declare profiler, start timer
-        PROCEDURE_END // Stop timer
+        PROCEDURE_END, // Stop timer on the line after
+        RETURN_STMT //  Stop timer on the line before
     };
 
     typedef std::map<SaltInstrumentationPointType, const std::string> InstrumentationMap;
@@ -84,8 +85,9 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         }
 
         [[nodiscard]] bool instrumentBefore() const {
-            return instrumentationPointType == SaltInstrumentationPointType::PROGRAM_BEGIN || instrumentationPointType
-                   == SaltInstrumentationPointType::PROCEDURE_BEGIN;
+            return instrumentationPointType == SaltInstrumentationPointType::PROGRAM_BEGIN
+                    || instrumentationPointType == SaltInstrumentationPointType::PROCEDURE_BEGIN
+                    || instrumentationPointType == SaltInstrumentationPointType::RETURN_STMT;
         }
 
 
@@ -488,56 +490,87 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                 }, construct.u);
         }
 
+        // Split handling of ExecutionPart into two phases
+        // so that we insert Instrumentation Points in order
+        // even if we separately insert them in visitors for
+        // children of ExecutionPart.
         bool Pre(const Fortran::parser::ExecutionPart &executionPart) {
+            handleExecutionPart(executionPart, true);
+            return true;
+        }
+
+        void Post(const Fortran::parser::ExecutionPart &executionPart) {
+            handleExecutionPart(executionPart, false);
+        }
+
+        void handleExecutionPart(const Fortran::parser::ExecutionPart &executionPart, bool pre) {
             if (const Fortran::parser::Block &block = executionPart.v; block.empty()) {
                 llvm::outs() << "WARNING: Execution part empty.\n";
             } else {
-                llvm::outs() << "ExecutionPart num blocks: " << block.size() << "\n";
                 const Fortran::parser::SourcePosition startLoc{getLocation(block.front(), false)};
                 const Fortran::parser::SourcePosition endLoc{getLocation(block.back(), true)};
-                // TODO this assumes that the program end statement ends the next line after
-                //      the last statement, but there could be whitespace/comments. Need to actually
-                //      find the end statement. End statement may not have source position if name
-                //      not listed -- need to find workaround.
-                std::stringstream ss;
-                ss << (isInMainProgram_ ? mainProgramName_ : subprogramName_);
-                ss << " [{" << startLoc.sourceFile->path() << "} {";
-                ss << (isInMainProgram_ ? mainProgramLine_ : subProgramLine_);
-                ss << ",1}-{"; // TODO column number, first char of program/subroutine/function stmt
-                ss << endLoc.line + 1;
-                ss << ",1}]";  // TODO column number, last char of end stmt
-
-                const std::string timerName{ss.str()};
-
-                // Split the timername string so that it will fit between Fortran 77's 72 character limit,
-                // and use character string line continuation syntax compatible with Fortran 77 and modern
-                // Fortran.
-                std::stringstream ss2;
-                for (size_t i = 0; i < timerName.size(); i += SALT_F77_LINE_LENGTH) {
-                    ss2 << SALT_FORTRAN_STRING_SPLITTER;
-                    ss2 << timerName.substr(i, SALT_F77_LINE_LENGTH);
-                }
 
-                const std::string splitTimerName{ss2.str()};
+                // Insert the timer start in the Pre phase (when we first visit the node)
+                // and the timer stop in the Post phase (when we return after visiting the node's children).
+                if (pre) {
+                    // TODO this assumes that the program end statement ends the next line after
+                    //      the last statement, but there could be whitespace/comments. Need to actually
+                    //      find the end statement. End statement may not have source position if name
+                    //      not listed -- need to find workaround.
+                    std::stringstream ss;
+                    ss << (isInMainProgram_ ? mainProgramName_ : subprogramName_);
+                    ss << " [{" << startLoc.sourceFile->path() << "} {";
+                    ss << (isInMainProgram_ ? mainProgramLine_ : subProgramLine_);
+                    ss << ",1}-{"; // TODO column number, first char of program/subroutine/function stmt
+                    ss << endLoc.line + 1;
+                    ss << ",1}]";  // TODO column number, last char of end stmt
+
+                    const std::string timerName{ss.str()};
+
+                    // Split the timername string so that it will fit between Fortran 77's 72 character limit,
+                    // and use character string line continuation syntax compatible with Fortran 77 and modern
+                    // Fortran.
+                    std::stringstream ss2;
+                    for (size_t i = 0; i < timerName.size(); i += SALT_F77_LINE_LENGTH) {
+                        ss2 << SALT_FORTRAN_STRING_SPLITTER;
+                        ss2 << timerName.substr(i, SALT_F77_LINE_LENGTH);
+                    }
 
-                if (isInMainProgram_) {
-                    llvm::outs() << "Program begin \"" << mainProgramName_ << "\" at " << startLoc.line << ", " <<
-                            startLoc.column << "\n";
-                    addInstrumentationPoint(SaltInstrumentationPointType::PROGRAM_BEGIN, startLoc.line,
-                                            splitTimerName);
+                    const std::string splitTimerName{ss2.str()};
+
+                    if (isInMainProgram_) {
+                        llvm::outs() << "Program begin \"" << mainProgramName_ << "\" at " << startLoc.line << ", " <<
+                                startLoc.column << "\n";
+                        addInstrumentationPoint(SaltInstrumentationPointType::PROGRAM_BEGIN, startLoc.line,
+                                                splitTimerName);
+                    } else {
+                        llvm::outs() << "Subprogram begin \"" << subprogramName_ << "\" at " << startLoc.line << ", " <<
+                                startLoc.column << "\n";
+                        addInstrumentationPoint(SaltInstrumentationPointType::PROCEDURE_BEGIN, startLoc.line,
+                                                splitTimerName);
+                    }
                 } else {
-                    llvm::outs() << "Subprogram begin \"" << subprogramName_ << "\" at " << startLoc.line << ", " <<
-                            startLoc.column << "\n";
-                    addInstrumentationPoint(SaltInstrumentationPointType::PROCEDURE_BEGIN, startLoc.line,
-                                            splitTimerName);
+                    llvm::outs() << "End at " << endLoc.line << ", " << endLoc.column << "\n";
+                    addInstrumentationPoint(SaltInstrumentationPointType::PROCEDURE_END, endLoc.line);
                 }
-                llvm::outs() << "End at " << endLoc.line << ", " << endLoc.column << "\n";
-                addInstrumentationPoint(SaltInstrumentationPointType::PROCEDURE_END, endLoc.line);
             }
+        }
 
+        // A ReturnStmt does not have a source, so we instead need to get access to the wrapper Statement that does.
+        // Here we get the ReturnStmt through ExecutableConstruct -> Statement<ActionStmt> -> Indirection<ReturnStmt>
+        bool Pre(const Fortran::parser::ExecutableConstruct & execConstruct) {
+            if (const auto actionStmt = std::get_if<Fortran::parser::Statement<Fortran::parser::ActionStmt> >(
+                &execConstruct.u)) {
+                if (std::holds_alternative<Fortran::common::Indirection<Fortran::parser::ReturnStmt>>(actionStmt->statement.u)) {
+                    const Fortran::parser::SourcePosition returnPos{ locationFromSource(actionStmt->source, false)};
+                    llvm::outs() << "Return statement at " << returnPos.line << "\n";
+                    addInstrumentationPoint(SaltInstrumentationPointType::RETURN_STMT, returnPos.line);
+                }
+            }
             return true;
         }
 
+
     private:
         // Keeps track of current state of traversal
         bool isInMainProgram_{false};
@@ -573,9 +606,10 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
 
     [[nodiscard]] static std::string getInstrumentationPointString(const SaltInstrumentationPoint & instPt,
                                                                    const InstrumentationMap &instMap) {
+        static std::regex timerNameRegex{SALT_FORTRAN_TIMER_NAME_TEMPLATE};
         std::string instTemplate = instMap.at(instPt.instrumentationPointType);
         if (instPt.timerName.has_value()) {
-            instTemplate = std::regex_replace(instTemplate, std::regex(SALT_FORTRAN_TIMER_NAME_TEMPLATE),
+            instTemplate = std::regex_replace(instTemplate, timerNameRegex,
                                               instPt.timerName.value());
         }
         return instTemplate;
@@ -678,6 +712,9 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             ss << child.val() << "\n";
         }
         map.emplace(SaltInstrumentationPointType::PROCEDURE_END, ss.str());
+        // The return statement uses the same text as procedure end,
+        // but is inserted before the line instead of after.
+        map.emplace(SaltInstrumentationPointType::RETURN_STMT, ss.str());
 
         return map;
     }

From fcc0e72c0c497d3c02a331f654b3173a516ad504 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Mon, 16 Dec 2024 13:30:17 -0800
Subject: [PATCH 097/135] Activate MPICH in UO salt-fm environment

---
 activate-salt-fm-env.sh | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/activate-salt-fm-env.sh b/activate-salt-fm-env.sh
index 71015e0..a9be431 100644
--- a/activate-salt-fm-env.sh
+++ b/activate-salt-fm-env.sh
@@ -12,10 +12,9 @@ echo "purging loaded modules"
 module purge
 echo "listing loaded modules:"
 module list
-#echo "loading llvm and mpich:"
-echo "loading llvm and gcc:"
+echo "loading llvm, gcc and mpich:"
 module load llvm/git.086d8e6bb5daf8de43880ba90258c49e0fabf2c9_19.1.4-zpacv56
-#module load mpich/4.2.3-ugxzfxf
+module load mpich/4.2.3-ugxzfxf
 module load gcc/14.2.0-ttkqi3s
 echo "loading ninja"
 module load ninja

From 44638009537678f2c0eaf319d96f71af24e59829 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Tue, 17 Dec 2024 15:08:18 -0800
Subject: [PATCH 098/135] Crash if we try to use a nonexistent source location.

Previously we would get a garbage value/exhibit undefined behavior if
the source location was absent. Use checked value() to get value from
std::optional, which checks if the value exists and throws an exception
if not.
---
 src/salt_instrument_flang_plugin.cpp | 103 +++++++++++++++------------
 1 file changed, 58 insertions(+), 45 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index dcbb2ae..3bed23d 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -125,13 +125,15 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
          * If `end` is set, returns the ending position of the block.
          * If `end` is not set (and by default), returns the starting position of the block.
          */
-        [[nodiscard]] Fortran::parser::SourcePosition locationFromSource(
+        [[nodiscard]] std::optional<Fortran::parser::SourcePosition> locationFromSource(
             const Fortran::parser::CharBlock &charBlock, const bool end) const {
-            const auto &sourceRange{parsing->allCooked().GetSourcePositionRange(charBlock)};
-            if (end) {
-                return sourceRange->second;
+            if (const auto &sourceRange{parsing->allCooked().GetSourcePositionRange(charBlock)}; sourceRange.has_value()) {
+                if (end) {
+                    return sourceRange->second;
+                }
+                return sourceRange->first;
             }
-            return sourceRange->first;
+            return std::nullopt;
         }
 
         // Default empty visit functions for otherwise unhandled types.
@@ -207,37 +209,37 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         // TODO The source position functions can fail if no source position exists
         //      Need to handle that case better.
 
-        [[nodiscard]] Fortran::parser::SourcePosition getLocation(
+        [[nodiscard]] std::optional<Fortran::parser::SourcePosition> getLocation(
             const Fortran::parser::OpenMPDeclarativeConstruct &construct,
             const bool end) {
             // This function is based on the equivalent function in
             // flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
             return std::visit(
-                [&](const auto &o) -> Fortran::parser::SourcePosition {
+                [&](const auto &o) -> std::optional<Fortran::parser::SourcePosition> {
                     return locationFromSource(o.source, end);
                 },
                 construct.u);
         }
 
-        [[nodiscard]] Fortran::parser::SourcePosition getLocation(const Fortran::parser::OpenMPConstruct &construct,
+        [[nodiscard]] std::optional<Fortran::parser::SourcePosition> getLocation(const Fortran::parser::OpenMPConstruct &construct,
                                                                   const bool end) {
             // This function is based on the equivalent function in
             // flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
             return std::visit(
                 Fortran::common::visitors{
-                    [&](const Fortran::parser::OpenMPStandaloneConstruct &c) -> Fortran::parser::SourcePosition {
+                    [&](const Fortran::parser::OpenMPStandaloneConstruct &c) -> std::optional<Fortran::parser::SourcePosition> {
                         return locationFromSource(c.source, end);
                     },
                     // OpenMPSectionsConstruct, OpenMPLoopConstruct,
                     // OpenMPBlockConstruct, OpenMPCriticalConstruct Get the source from
                     // the directive field.
-                    [&](const auto &c) -> Fortran::parser::SourcePosition {
+                    [&](const auto &c) -> std::optional<Fortran::parser::SourcePosition> {
                         const Fortran::parser::CharBlock &source{std::get<0>(c.t).source};
                         return locationFromSource(source, end);
                     },
-                    [&](const Fortran::parser::OpenMPAtomicConstruct &c) -> Fortran::parser::SourcePosition {
+                    [&](const Fortran::parser::OpenMPAtomicConstruct &c) -> std::optional<Fortran::parser::SourcePosition> {
                         return std::visit(
-                            [&](const auto &o) -> Fortran::parser::SourcePosition {
+                            [&](const auto &o) -> std::optional<Fortran::parser::SourcePosition> {
                                 const Fortran::parser::CharBlock &source{
                                     std::get<Fortran::parser::Verbatim>(o.t).source
                                 };
@@ -245,7 +247,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                             },
                             c.u);
                     },
-                    [&](const Fortran::parser::OpenMPSectionConstruct &c) -> Fortran::parser::SourcePosition {
+                    [&](const Fortran::parser::OpenMPSectionConstruct &c) -> std::optional<Fortran::parser::SourcePosition> {
                         const Fortran::parser::CharBlock &source{c.source};
                         return locationFromSource(source, end);
                     },
@@ -253,30 +255,30 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                 construct.u);
         }
 
-        [[nodiscard]] Fortran::parser::SourcePosition
+        [[nodiscard]] std::optional<Fortran::parser::SourcePosition>
         getLocation(const Fortran::parser::OpenACCConstruct &construct, const bool end) {
             // This function is based on the equivalent function in
             // flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
             return std::visit(
                 Fortran::common::visitors{
-                    [&](const auto &c) -> Fortran::parser::SourcePosition {
+                    [&](const auto &c) -> std::optional<Fortran::parser::SourcePosition> {
                         return locationFromSource(c.source, end);
                     },
-                    [&](const Fortran::parser::OpenACCBlockConstruct &c) -> Fortran::parser::SourcePosition {
+                    [&](const Fortran::parser::OpenACCBlockConstruct &c) -> std::optional<Fortran::parser::SourcePosition> {
                         if (end) {
                             return locationFromSource(std::get<Fortran::parser::AccEndBlockDirective>(c.t).source,
                                                       end);
                         }
                         return locationFromSource(std::get<Fortran::parser::AccBeginBlockDirective>(c.t).source, end);
                     },
-                    [&](const Fortran::parser::OpenACCLoopConstruct &c) -> Fortran::parser::SourcePosition {
+                    [&](const Fortran::parser::OpenACCLoopConstruct &c) -> std::optional<Fortran::parser::SourcePosition> {
                         // TODO handle end case (complicated because end statement and do construct are optional)
                         return locationFromSource(std::get<Fortran::parser::AccBeginLoopDirective>(c.t).source, end);
                     },
                 }, construct.u);
         }
 
-        [[nodiscard]] Fortran::parser::SourcePosition getLocation(const Fortran::parser::ExecutableConstruct &construct,
+        [[nodiscard]] std::optional<Fortran::parser::SourcePosition> getLocation(const Fortran::parser::ExecutableConstruct &construct,
                                                                   const bool end) {
             /* Possibilities for ExecutableConstruct:
                  Statement<ActionStmt>
@@ -302,11 +304,11 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             */
             return std::visit(
                 Fortran::common::visitors{
-                    [&](const auto &c) -> Fortran::parser::SourcePosition {
+                    [&](const auto &c) -> std::optional<Fortran::parser::SourcePosition> {
                         return locationFromSource(c.source, end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::CUFKernelDoConstruct> &c) ->
-                Fortran::parser::SourcePosition {
+                std::optional<Fortran::parser::SourcePosition> {
                         if (end) {
                             const auto &optionalConstruct = std::get<std::optional<Fortran::parser::DoConstruct> >(
                                 c.value().t);
@@ -320,27 +322,27 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                             std::get<Fortran::parser::CUFKernelDoConstruct::Directive>(c.value().t).source, end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::OmpEndLoopDirective> &c) ->
-                Fortran::parser::SourcePosition {
+                std::optional<Fortran::parser::SourcePosition> {
                         return locationFromSource(c.value().source, end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::OpenMPConstruct> &c) ->
-                Fortran::parser::SourcePosition {
+                std::optional<Fortran::parser::SourcePosition> {
                         return getLocation(c.value(), end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::AccEndCombinedDirective> &c) ->
-                Fortran::parser::SourcePosition {
+                std::optional<Fortran::parser::SourcePosition> {
                         return locationFromSource(c.value().source, end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::OpenACCConstruct> &c) ->
-                Fortran::parser::SourcePosition {
+                std::optional<Fortran::parser::SourcePosition> {
                         return getLocation(c.value(), end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::CompilerDirective> &c)->
-                Fortran::parser::SourcePosition {
+                std::optional<Fortran::parser::SourcePosition> {
                         return locationFromSource(c.value().source, end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::ForallConstruct> &c) ->
-                Fortran::parser::SourcePosition {
+                std::optional<Fortran::parser::SourcePosition> {
                         if (end) {
                             return locationFromSource(
                                 std::get<Fortran::parser::Statement<Fortran::parser::EndForallStmt> >(c.value().t).
@@ -351,7 +353,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                             source, end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::WhereConstruct> &c) ->
-                Fortran::parser::SourcePosition {
+                std::optional<Fortran::parser::SourcePosition> {
                         if (end) {
                             return locationFromSource(
                                 std::get<Fortran::parser::Statement<Fortran::parser::EndWhereStmt> >(c.value().t).
@@ -362,7 +364,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                             source, end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::SelectTypeConstruct> &c) ->
-                Fortran::parser::SourcePosition {
+                std::optional<Fortran::parser::SourcePosition> {
                         if (end) {
                             return locationFromSource(
                                 std::get<Fortran::parser::Statement<Fortran::parser::EndSelectStmt> >(c.value().t).
@@ -373,7 +375,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                             end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::SelectRankConstruct> &c) ->
-                Fortran::parser::SourcePosition {
+                std::optional<Fortran::parser::SourcePosition> {
                         if (end) {
                             return locationFromSource(
                                 std::get<Fortran::parser::Statement<Fortran::parser::EndSelectStmt> >(c.value().t).
@@ -384,7 +386,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                             source, end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::IfConstruct> &c) ->
-                Fortran::parser::SourcePosition {
+                std::optional<Fortran::parser::SourcePosition> {
                         if (end) {
                             return locationFromSource(
                                 std::get<Fortran::parser::Statement<Fortran::parser::EndIfStmt> >(c.value().t).source,
@@ -395,7 +397,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                             end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::DoConstruct> &c) ->
-                Fortran::parser::SourcePosition {
+                std::optional<Fortran::parser::SourcePosition> {
                         if (end) {
                             return locationFromSource(
                                 std::get<Fortran::parser::Statement<Fortran::parser::EndDoStmt> >(c.value().t).source,
@@ -406,7 +408,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                             end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::CriticalConstruct> &c) ->
-                Fortran::parser::SourcePosition {
+                std::optional<Fortran::parser::SourcePosition> {
                         if (end) {
                             return locationFromSource(
                                 std::get<Fortran::parser::Statement<Fortran::parser::EndCriticalStmt> >(c.value().t).
@@ -418,7 +420,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                             end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::ChangeTeamConstruct> &c) ->
-                Fortran::parser::SourcePosition {
+                std::optional<Fortran::parser::SourcePosition> {
                         if (end) {
                             return locationFromSource(
                                 std::get<Fortran::parser::Statement<Fortran::parser::EndChangeTeamStmt> >(c.value().t).
@@ -430,7 +432,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                             end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::CaseConstruct> &c) ->
-                Fortran::parser::SourcePosition {
+                std::optional<Fortran::parser::SourcePosition> {
                         if (end) {
                             return locationFromSource(
                                 std::get<Fortran::parser::Statement<Fortran::parser::EndSelectStmt> >(c.value().t).
@@ -441,7 +443,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                             end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::BlockConstruct> &c) ->
-                Fortran::parser::SourcePosition {
+                std::optional<Fortran::parser::SourcePosition> {
                         if (end) {
                             return locationFromSource(
                                 std::get<Fortran::parser::Statement<Fortran::parser::EndBlockStmt> >(c.value().t).
@@ -452,7 +454,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                             std::get<Fortran::parser::Statement<Fortran::parser::BlockStmt> >(c.value().t).source, end);
                     },
                     [&](const Fortran::common::Indirection<Fortran::parser::AssociateConstruct> &c) ->
-                Fortran::parser::SourcePosition {
+                std::optional<Fortran::parser::SourcePosition> {
                         if (end) {
                             return locationFromSource(
                                 std::get<Fortran::parser::Statement<Fortran::parser::EndAssociateStmt> >(c.value().t).
@@ -465,7 +467,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                 }, construct.u);
         }
 
-        [[nodiscard]] Fortran::parser::SourcePosition getLocation(
+        [[nodiscard]] std::optional<Fortran::parser::SourcePosition> getLocation(
             const Fortran::parser::ExecutionPartConstruct &construct,
             const bool end) {
             /* Possibilities for ExecutionPartConstruct:
@@ -478,13 +480,13 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
              */
             return std::visit(
                 Fortran::common::visitors{
-                    [&](const Fortran::parser::ExecutableConstruct &c) -> Fortran::parser::SourcePosition {
+                    [&](const Fortran::parser::ExecutableConstruct &c) -> std::optional<Fortran::parser::SourcePosition> {
                         return getLocation(c, end);
                     },
-                    [&](const auto &c) -> Fortran::parser::SourcePosition {
+                    [&](const auto &c) -> std::optional<Fortran::parser::SourcePosition> {
                         return locationFromSource(c.source, end);
                     },
-                    [&](const Fortran::parser::ErrorRecovery &) -> Fortran::parser::SourcePosition {
+                    [&](const Fortran::parser::ErrorRecovery &) -> std::optional<Fortran::parser::SourcePosition> {
                         DIE("Should not encounter ErrorRecovery in parse tree");
                     }
                 }, construct.u);
@@ -507,8 +509,18 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             if (const Fortran::parser::Block &block = executionPart.v; block.empty()) {
                 llvm::outs() << "WARNING: Execution part empty.\n";
             } else {
-                const Fortran::parser::SourcePosition startLoc{getLocation(block.front(), false)};
-                const Fortran::parser::SourcePosition endLoc{getLocation(block.back(), true)};
+                const std::optional startLocOpt{getLocation(block.front(), false)};
+                const std::optional endLocOpt{getLocation(block.back(), true)};
+
+                if (!startLocOpt.has_value()) {
+                    llvm::errs() << "ERROR: execution part had no start source location!\n";
+                }
+                if (!endLocOpt.has_value()) {
+                    llvm::errs() << "ERROR: execution part had no end source location!\n";
+                }
+
+                const auto startLoc{startLocOpt.value()};
+                const auto endLoc{endLocOpt.value()};
 
                 // Insert the timer start in the Pre phase (when we first visit the node)
                 // and the timer stop in the Post phase (when we return after visiting the node's children).
@@ -562,9 +574,10 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             if (const auto actionStmt = std::get_if<Fortran::parser::Statement<Fortran::parser::ActionStmt> >(
                 &execConstruct.u)) {
                 if (std::holds_alternative<Fortran::common::Indirection<Fortran::parser::ReturnStmt>>(actionStmt->statement.u)) {
-                    const Fortran::parser::SourcePosition returnPos{ locationFromSource(actionStmt->source, false)};
-                    llvm::outs() << "Return statement at " << returnPos.line << "\n";
-                    addInstrumentationPoint(SaltInstrumentationPointType::RETURN_STMT, returnPos.line);
+                    const std::optional returnPos{ locationFromSource(actionStmt->source, false)};
+                    const int returnLine{returnPos.value().line};
+                    llvm::outs() << "Return statement at " << returnLine << "\n";
+                    addInstrumentationPoint(SaltInstrumentationPointType::RETURN_STMT, returnLine);
                 }
             }
             return true;

From 5debb90eea0ec9fabaefbb957227dff08d89b2ca Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Tue, 17 Dec 2024 15:11:25 -0800
Subject: [PATCH 099/135] Break up new long lines created by std::optional

---
 src/salt_instrument_flang_plugin.cpp | 59 ++++++++++++++++------------
 1 file changed, 34 insertions(+), 25 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 3bed23d..c06beb8 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -86,8 +86,8 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
 
         [[nodiscard]] bool instrumentBefore() const {
             return instrumentationPointType == SaltInstrumentationPointType::PROGRAM_BEGIN
-                    || instrumentationPointType == SaltInstrumentationPointType::PROCEDURE_BEGIN
-                    || instrumentationPointType == SaltInstrumentationPointType::RETURN_STMT;
+                   || instrumentationPointType == SaltInstrumentationPointType::PROCEDURE_BEGIN
+                   || instrumentationPointType == SaltInstrumentationPointType::RETURN_STMT;
         }
 
 
@@ -127,7 +127,8 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
          */
         [[nodiscard]] std::optional<Fortran::parser::SourcePosition> locationFromSource(
             const Fortran::parser::CharBlock &charBlock, const bool end) const {
-            if (const auto &sourceRange{parsing->allCooked().GetSourcePositionRange(charBlock)}; sourceRange.has_value()) {
+            if (const auto &sourceRange{parsing->allCooked().GetSourcePositionRange(charBlock)}; sourceRange.
+                has_value()) {
                 if (end) {
                     return sourceRange->second;
                 }
@@ -178,7 +179,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         }
 
         bool Pre(const Fortran::parser::SubroutineStmt &subroutineStmt) {
-            const auto & name =std::get<Fortran::parser::Name>(subroutineStmt.t);
+            const auto &name = std::get<Fortran::parser::Name>(subroutineStmt.t);
             subprogramName_ = name.ToString();
             subProgramLine_ = parsing->allCooked().GetSourcePositionRange(name.source)->first.line;
             llvm::outs() << "Enter Subroutine: " << subprogramName_ << "\n";
@@ -221,13 +222,15 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                 construct.u);
         }
 
-        [[nodiscard]] std::optional<Fortran::parser::SourcePosition> getLocation(const Fortran::parser::OpenMPConstruct &construct,
-                                                                  const bool end) {
+        [[nodiscard]] std::optional<Fortran::parser::SourcePosition> getLocation(
+            const Fortran::parser::OpenMPConstruct &construct,
+            const bool end) {
             // This function is based on the equivalent function in
             // flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
             return std::visit(
                 Fortran::common::visitors{
-                    [&](const Fortran::parser::OpenMPStandaloneConstruct &c) -> std::optional<Fortran::parser::SourcePosition> {
+                    [&](const Fortran::parser::OpenMPStandaloneConstruct &c) -> std::optional<
+                Fortran::parser::SourcePosition> {
                         return locationFromSource(c.source, end);
                     },
                     // OpenMPSectionsConstruct, OpenMPLoopConstruct,
@@ -237,7 +240,8 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                         const Fortran::parser::CharBlock &source{std::get<0>(c.t).source};
                         return locationFromSource(source, end);
                     },
-                    [&](const Fortran::parser::OpenMPAtomicConstruct &c) -> std::optional<Fortran::parser::SourcePosition> {
+                    [&](const Fortran::parser::OpenMPAtomicConstruct &c) -> std::optional<
+                Fortran::parser::SourcePosition> {
                         return std::visit(
                             [&](const auto &o) -> std::optional<Fortran::parser::SourcePosition> {
                                 const Fortran::parser::CharBlock &source{
@@ -247,7 +251,8 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                             },
                             c.u);
                     },
-                    [&](const Fortran::parser::OpenMPSectionConstruct &c) -> std::optional<Fortran::parser::SourcePosition> {
+                    [&](const Fortran::parser::OpenMPSectionConstruct &c) -> std::optional<
+                Fortran::parser::SourcePosition> {
                         const Fortran::parser::CharBlock &source{c.source};
                         return locationFromSource(source, end);
                     },
@@ -264,22 +269,25 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                     [&](const auto &c) -> std::optional<Fortran::parser::SourcePosition> {
                         return locationFromSource(c.source, end);
                     },
-                    [&](const Fortran::parser::OpenACCBlockConstruct &c) -> std::optional<Fortran::parser::SourcePosition> {
+                    [&](const Fortran::parser::OpenACCBlockConstruct &c) -> std::optional<
+                Fortran::parser::SourcePosition> {
                         if (end) {
                             return locationFromSource(std::get<Fortran::parser::AccEndBlockDirective>(c.t).source,
                                                       end);
                         }
                         return locationFromSource(std::get<Fortran::parser::AccBeginBlockDirective>(c.t).source, end);
                     },
-                    [&](const Fortran::parser::OpenACCLoopConstruct &c) -> std::optional<Fortran::parser::SourcePosition> {
+                    [&](const Fortran::parser::OpenACCLoopConstruct &c) -> std::optional<
+                Fortran::parser::SourcePosition> {
                         // TODO handle end case (complicated because end statement and do construct are optional)
                         return locationFromSource(std::get<Fortran::parser::AccBeginLoopDirective>(c.t).source, end);
                     },
                 }, construct.u);
         }
 
-        [[nodiscard]] std::optional<Fortran::parser::SourcePosition> getLocation(const Fortran::parser::ExecutableConstruct &construct,
-                                                                  const bool end) {
+        [[nodiscard]] std::optional<Fortran::parser::SourcePosition> getLocation(
+            const Fortran::parser::ExecutableConstruct &construct,
+            const bool end) {
             /* Possibilities for ExecutableConstruct:
                  Statement<ActionStmt>
                  common::Indirection<AssociateConstruct>
@@ -480,7 +488,8 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
              */
             return std::visit(
                 Fortran::common::visitors{
-                    [&](const Fortran::parser::ExecutableConstruct &c) -> std::optional<Fortran::parser::SourcePosition> {
+                    [&](const Fortran::parser::ExecutableConstruct &c) -> std::optional<
+                Fortran::parser::SourcePosition> {
                         return getLocation(c, end);
                     },
                     [&](const auto &c) -> std::optional<Fortran::parser::SourcePosition> {
@@ -535,7 +544,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                     ss << (isInMainProgram_ ? mainProgramLine_ : subProgramLine_);
                     ss << ",1}-{"; // TODO column number, first char of program/subroutine/function stmt
                     ss << endLoc.line + 1;
-                    ss << ",1}]";  // TODO column number, last char of end stmt
+                    ss << ",1}]"; // TODO column number, last char of end stmt
 
                     const std::string timerName{ss.str()};
 
@@ -570,11 +579,12 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
 
         // A ReturnStmt does not have a source, so we instead need to get access to the wrapper Statement that does.
         // Here we get the ReturnStmt through ExecutableConstruct -> Statement<ActionStmt> -> Indirection<ReturnStmt>
-        bool Pre(const Fortran::parser::ExecutableConstruct & execConstruct) {
+        bool Pre(const Fortran::parser::ExecutableConstruct &execConstruct) {
             if (const auto actionStmt = std::get_if<Fortran::parser::Statement<Fortran::parser::ActionStmt> >(
                 &execConstruct.u)) {
-                if (std::holds_alternative<Fortran::common::Indirection<Fortran::parser::ReturnStmt>>(actionStmt->statement.u)) {
-                    const std::optional returnPos{ locationFromSource(actionStmt->source, false)};
+                if (std::holds_alternative<Fortran::common::Indirection<Fortran::parser::ReturnStmt> >(
+                    actionStmt->statement.u)) {
+                    const std::optional returnPos{locationFromSource(actionStmt->source, false)};
                     const int returnLine{returnPos.value().line};
                     llvm::outs() << "Return statement at " << returnLine << "\n";
                     addInstrumentationPoint(SaltInstrumentationPointType::RETURN_STMT, returnLine);
@@ -583,7 +593,6 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             return true;
         }
 
-
     private:
         // Keeps track of current state of traversal
         bool isInMainProgram_{false};
@@ -617,7 +626,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
     }
 
 
-    [[nodiscard]] static std::string getInstrumentationPointString(const SaltInstrumentationPoint & instPt,
+    [[nodiscard]] static std::string getInstrumentationPointString(const SaltInstrumentationPoint &instPt,
                                                                    const InstrumentationMap &instMap) {
         static std::regex timerNameRegex{SALT_FORTRAN_TIMER_NAME_TEMPLATE};
         std::string instTemplate = instMap.at(instPt.instrumentationPointType);
@@ -630,7 +639,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
 
     static void instrumentFile(const std::string &inputFilePath, llvm::raw_pwrite_stream &outputStream,
                                const SaltInstrumentParseTreeVisitor &visitor,
-                               const InstrumentationMap & instMap) {
+                               const InstrumentationMap &instMap) {
         std::ifstream inputStream{inputFilePath};
         if (!inputStream) {
             llvm::errs() << "ERROR: Could not open input file" << inputFilePath << "\n";
@@ -697,7 +706,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             llvm::errs() << "ERROR: '" << SALT_FORTRAN_PROGRAM_BEGIN_KEY << "' key not found under 'Fortran'.\n";
             std::exit(-3);
         }
-        for (const ryml::NodeRef child : programBeginNode.children()) {
+        for (const ryml::NodeRef child: programBeginNode.children()) {
             ss << child.val() << "\n";
         }
         map.emplace(SaltInstrumentationPointType::PROGRAM_BEGIN, ss.str());
@@ -709,7 +718,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             llvm::errs() << "ERROR: '" << SALT_FORTRAN_PROCEDURE_BEGIN_KEY << "' key not found under 'Fortran'.\n";
             std::exit(-3);
         }
-        for (const ryml::NodeRef child : procedureBeginNode.children()) {
+        for (const ryml::NodeRef child: procedureBeginNode.children()) {
             ss << child.val() << "\n";
         }
         map.emplace(SaltInstrumentationPointType::PROCEDURE_BEGIN, ss.str());
@@ -721,7 +730,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             llvm::errs() << "ERROR: '" << SALT_FORTRAN_PROCEDURE_END_KEY << "' key not found under 'Fortran'.\n";
             std::exit(-3);
         }
-        for (const ryml::NodeRef child : procedureEndNode.children()) {
+        for (const ryml::NodeRef child: procedureEndNode.children()) {
             ss << child.val() << "\n";
         }
         map.emplace(SaltInstrumentationPointType::PROCEDURE_END, ss.str());
@@ -764,7 +773,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             inputFileExtension = "F90"; // Default if for some reason file has no extension
         } else {
             inputFileExtension = inputFilePath->substr(extPos + 1); // Part of string past last '.'
-                // Capitalize the first character of inputFileExtension
+            // Capitalize the first character of inputFileExtension
             if (!inputFileExtension.empty()) {
                 inputFileExtension[0] = static_cast<char>(std::toupper(inputFileExtension[0]));
             }

From fa43fe93dfdc1899cdd38067d554c21c8b07c4d8 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Tue, 17 Dec 2024 15:11:54 -0800
Subject: [PATCH 100/135] Make SaltInstrumentationPoint comparable by line
 number

---
 src/salt_instrument_flang_plugin.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index c06beb8..1232c54 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -90,6 +90,9 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                    || instrumentationPointType == SaltInstrumentationPointType::RETURN_STMT;
         }
 
+        bool operator<(const SaltInstrumentationPoint &other) const {
+            return startLine < other.startLine;
+        }
 
         SaltInstrumentationPointType instrumentationPointType;
         int startLine;

From 37fee15cb242acf7b52beb19b5c931d27c957657 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Tue, 17 Dec 2024 15:44:09 -0800
Subject: [PATCH 101/135] Sanity check order of instrumentation points, handle
 missing end case

---
 src/salt_instrument_flang_plugin.cpp | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 1232c54..a6a3c41 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -91,6 +91,9 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         }
 
         bool operator<(const SaltInstrumentationPoint &other) const {
+            if (startLine == other.startLine) {
+                return instrumentBefore();
+            }
             return startLine < other.startLine;
         }
 
@@ -210,9 +213,6 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
 
         // TODO split location-getting routines into a separate file
 
-        // TODO The source position functions can fail if no source position exists
-        //      Need to handle that case better.
-
         [[nodiscard]] std::optional<Fortran::parser::SourcePosition> getLocation(
             const Fortran::parser::OpenMPDeclarativeConstruct &construct,
             const bool end) {
@@ -282,7 +282,14 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                     },
                     [&](const Fortran::parser::OpenACCLoopConstruct &c) -> std::optional<
                 Fortran::parser::SourcePosition> {
-                        // TODO handle end case (complicated because end statement and do construct are optional)
+                        if (end) {
+                            if (const auto &maybeDo = std::get<std::optional<Fortran::parser::DoConstruct> >(c.t);
+                                maybeDo.has_value()) {
+                                return locationFromSource(
+                                    std::get<Fortran::parser::Statement<Fortran::parser::EndDoStmt> >(maybeDo.value().t).
+                                    source, end);
+                            }
+                        }
                         return locationFromSource(std::get<Fortran::parser::AccBeginLoopDirective>(c.t).source, end);
                     },
                 }, construct.u);
@@ -651,6 +658,12 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         std::string line;
         int lineNum{0};
         const auto &instPts{visitor.getInstrumentationPoints()};
+
+        // Sanity check: are instrumentation points in the right order?
+        if (!std::is_sorted(instPts.cbegin(), instPts.cend())) {
+            DIE("ERROR: Instrumentation points not sorted by line number!\n");
+        }
+
         auto instIter{instPts.cbegin()};
         while (std::getline(inputStream, line)) {
             ++lineNum;

From 8ae8087cf61c25f8e133e2e990f40513e807e9cc Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Wed, 18 Dec 2024 16:25:03 -0800
Subject: [PATCH 102/135] Add debug function to print instrumentation points

---
 src/salt_instrument_flang_plugin.cpp | 44 ++++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 3 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index a6a3c41..e06751c 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -27,6 +27,7 @@ limitations under the License.
 #include <optional>
 #include <tuple>
 #include <regex>
+#include <algorithm>
 
 
 #define RYML_SINGLE_HDR_DEFINE_NOW
@@ -73,7 +74,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         RETURN_STMT //  Stop timer on the line before
     };
 
-    typedef std::map<SaltInstrumentationPointType, const std::string> InstrumentationMap;
+    using InstrumentationMap = std::map<SaltInstrumentationPointType, const std::string>;
 
     struct SaltInstrumentationPoint {
         SaltInstrumentationPoint(const SaltInstrumentationPointType instrumentation_point_type,
@@ -92,11 +93,38 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
 
         bool operator<(const SaltInstrumentationPoint &other) const {
             if (startLine == other.startLine) {
-                return instrumentBefore();
+                if (instrumentBefore() && !other.instrumentBefore()) {
+                    return true;
+                }
+                return false;
             }
             return startLine < other.startLine;
         }
 
+        [[nodiscard]] std::string typeString() const {
+            switch (instrumentationPointType) {
+                case SaltInstrumentationPointType::PROGRAM_BEGIN:
+                    return "PROGRAM_BEGIN"s;
+                case SaltInstrumentationPointType::PROCEDURE_BEGIN:
+                    return "PROCEDURE_BEGIN"s;
+                case SaltInstrumentationPointType::PROCEDURE_END:
+                    return "PROCEDURE_END"s;
+                case SaltInstrumentationPointType::RETURN_STMT:
+                    return "RETURN_STMT"s;
+                default:
+                    CRASH_NO_CASE;
+            }
+        }
+
+        [[nodiscard]] std::string toString() const {
+            std::stringstream ss;
+            ss << startLine << "\t";
+            ss << (instrumentBefore() ? "before" : "after") << "\t";
+            ss << typeString() << "\t";
+            ss << "\"" << timerName.value_or("<no name>") << "\"";
+            return ss.str();
+        }
+
         SaltInstrumentationPointType instrumentationPointType;
         int startLine;
         std::optional<std::string> timerName;
@@ -125,6 +153,14 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             return instrumentationPoints_;
         }
 
+        [[nodiscard]] std::string dumpInstrumentationPoints() const {
+            std::stringstream ss;
+            for (const auto & instPt : getInstrumentationPoints()) {
+                ss << instPt.toString() << "\n";
+            }
+            return ss.str();
+        }
+
         /**
          * From a CharBlock object (generally held in the `source` field of a parse tree node,
          * get the source position (file, line, column).
@@ -558,7 +594,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
 
                     const std::string timerName{ss.str()};
 
-                    // Split the timername string so that it will fit between Fortran 77's 72 character limit,
+                    // Split the timerName string so that it will fit between Fortran 77's 72-character limit,
                     // and use character string line continuation syntax compatible with Fortran 77 and modern
                     // Fortran.
                     std::stringstream ss2;
@@ -659,6 +695,8 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         int lineNum{0};
         const auto &instPts{visitor.getInstrumentationPoints()};
 
+        llvm::outs() << "Will perform instrumentation:\n" << visitor.dumpInstrumentationPoints();
+
         // Sanity check: are instrumentation points in the right order?
         if (!std::is_sorted(instPts.cbegin(), instPts.cend())) {
             DIE("ERROR: Instrumentation points not sorted by line number!\n");

From 2dc71598a228e39c9c36355baf4cbe4dacb51f90 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Thu, 19 Dec 2024 14:35:40 -0500
Subject: [PATCH 103/135] Add a test case to stress the instrumentor with
 interface blocks

The procedure in the main program below the contains statement gets
the wrong procedure name inserted in the instrumentation.
---
 tests/fortran/interface_test.f90 | 110 +++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100644 tests/fortran/interface_test.f90

diff --git a/tests/fortran/interface_test.f90 b/tests/fortran/interface_test.f90
new file mode 100644
index 0000000..191f2a3
--- /dev/null
+++ b/tests/fortran/interface_test.f90
@@ -0,0 +1,110 @@
+module mod_w_interfaces
+    use iso_fortran_env ! Check that we can import an intrinsic module
+    implicit none
+    interface add
+        module procedure add_int
+        module procedure add_real
+        module procedure add_complex
+        function add_r_mat(a, b) result(c)
+            real, dimension(:,:), intent(in) :: a, b
+            real, dimension(size(a,1), size(a,2)) :: c
+        end function
+        function add_r_vec(a, b) result(c)
+            real, dimension(:), intent(in) :: a, b
+            real, dimension(size(a)) :: c
+        end function add_r_vec
+    end interface add
+
+contains
+
+    function add_int(a, b) result(c)
+        integer, intent(in) :: a, b
+        integer :: c
+        c = a + b
+    end function add_int
+    function add_complex(a, b) result(c)
+        complex, intent(in) :: a, b
+        complex :: c
+        c = a + b
+    end function
+
+    function add_real(a, b) result(c)
+        real, intent(in) :: a, b
+        real :: c
+        c = a + b
+    end function add_real
+end module mod_w_interfaces
+
+program test_add_overloaded
+    use mod_w_interfaces, only: add
+    use iso_fortran_env, only: output_unit
+    implicit none
+
+    integer, parameter :: ai = 1, bi = 2
+    real, parameter :: ar = 1.0, br = 2.0
+    complex, parameter :: ac = (1.0, 0.0), bc = (2.0, 1.0)
+    integer :: ci
+    real :: cr
+    complex :: cc
+
+    print *, "Adding integers"
+    write(*, *) "testing write statement on output_unit"
+    write(output_unit, *) "testing write statement on output_unit"
+    ci = add(ai, bi)
+    cr = add(ar, br)
+    cc = add(ac, bc)
+
+    contains
+        function test_add(a,b) result(c)
+            use iso_fortran_env, only: output_unit
+            class(*), intent(in) :: a, b
+            class(*), allocatable :: c
+
+            interface other_interface
+                function times_vec(a, b) result(c)
+                    implicit none
+                    real, dimension(:), intent(in) :: a, b
+                    real, dimension(size(a)) :: c
+                end function
+                function multiply(a, b) result(c)
+                    implicit none
+                    class(*), intent(in) :: a, b
+                    class(*), allocatable :: c
+                end function multiply
+            end interface other_interface
+
+            write(output_unit, '(A)') "adding a and b"
+            if ( .not. same_type_as(a, b) ) then
+                print *, "Error: arguments must be of the same type"
+                return
+            else
+                allocate(c, mold=a)
+                select type(a)
+                type is (integer)
+                    write(output_unit, *) "a and b are integers"
+                    select type(b)
+                    type is (integer)
+                        write(output_unit, *) "a + b: ", a, '+', b
+                        c = add(a, b)
+                    end select
+                type is (real)
+                    write(output_unit, *) "a and b are reals"
+                    select type(b)
+                    type is (real)
+                        write(output_unit, *) "a + b: ", a, '+', b
+                        c = add(a, b)
+                    end select
+                type is (complex)
+                    write(output_unit, *) "a and b are complex"
+                    select type(b)
+                    type is (complex)
+                        write(output_unit, *) "a + b: ", a, '+', b
+                        c = add(a, b)
+                    end select
+                class default
+                    print *, "Error: unsupported type"
+                end select
+            end if
+        end function test_add
+
+end program test_add_overloaded

From aed21ae9ef43cf6adb726b01274cd0a0fec72a50 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Thu, 19 Dec 2024 17:45:29 -0800
Subject: [PATCH 104/135] Fix instrumentation of subprograms with only return
 statement

Previously assumed that a given line would have at most one
instrumentation point before and at most one after, but in the case of a
subprogram with only a single return statement, then that one line has
three instrumentation points. Fixed by handling arbitrarily many before
and after instrumentation points per line.
---
 src/salt_instrument_flang_plugin.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index e06751c..b6d863d 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -574,8 +574,8 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                     llvm::errs() << "ERROR: execution part had no end source location!\n";
                 }
 
-                const auto startLoc{startLocOpt.value()};
-                const auto endLoc{endLocOpt.value()};
+                const auto& startLoc{startLocOpt.value()};
+                const auto& endLoc{endLocOpt.value()};
 
                 // Insert the timer start in the Pre phase (when we first visit the node)
                 // and the timer stop in the Post phase (when we return after visiting the node's children).
@@ -705,12 +705,12 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         auto instIter{instPts.cbegin()};
         while (std::getline(inputStream, line)) {
             ++lineNum;
-            if (instIter != instPts.cend() && instIter->startLine == lineNum && instIter->instrumentBefore()) {
+            while (instIter != instPts.cend() && instIter->startLine == lineNum && instIter->instrumentBefore()) {
                 outputStream << getInstrumentationPointString(*instIter, instMap) << "\n";
                 ++instIter;
             }
             outputStream << line << "\n";
-            if (instIter != instPts.cend() && instIter->startLine == lineNum && !instIter->instrumentBefore()) {
+            while (instIter != instPts.cend() && instIter->startLine == lineNum && !instIter->instrumentBefore()) {
                 outputStream << getInstrumentationPointString(*instIter, instMap) << "\n";
                 ++instIter;
             }

From 2b7de35171e2b3555e59e60d9478e40ac6bf9f03 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Thu, 19 Dec 2024 18:00:24 -0800
Subject: [PATCH 105/135] Add test of return-only subroutine

---
 CMakeLists.txt                | 3 ++-
 tests/fortran/return-only.f90 | 7 +++++++
 2 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 tests/fortran/return-only.f90

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5101802..d6f1d21 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -611,6 +611,7 @@ set(FORTRAN_TESTS_SOURCES_LIST
   hello.f90
   loop_test.f90
   trivial.f90
+  return-only.f90
 )
 
 # Add a smoke test of the fparse-llvm script
@@ -701,4 +702,4 @@ foreach(test_source IN LISTS FORTRAN_TESTS_SOURCES_LIST)
       DEPENDS run_${upper_comp}_${test_source}
     )
   endforeach()
-endforeach()
\ No newline at end of file
+endforeach()
diff --git a/tests/fortran/return-only.f90 b/tests/fortran/return-only.f90
new file mode 100644
index 0000000..c185cca
--- /dev/null
+++ b/tests/fortran/return-only.f90
@@ -0,0 +1,7 @@
+      subroutine foo
+          return
+      end subroutine foo
+
+      program main
+          call foo
+      end program main

From 4d47584370250bd09e6655a1521866a141d51e8a Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Fri, 20 Dec 2024 12:40:17 -0500
Subject: [PATCH 106/135] Do not descend into interface blocks

There is never executable code in Fortran interface blocks (which can
end up being arbitrarily nested), so have the visitor skip descending
into child nodes.
---
 src/salt_instrument_flang_plugin.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index b6d863d..19a4210 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -204,6 +204,9 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         // See https://github.com/llvm/llvm-project/blob/main/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
         // for examples of getting source position for a parse tree node
 
+        // Never descend into InterfaceSpecification nodes, they can't contain executable statements.
+        bool Pre(const Fortran::parser::InterfaceSpecification &) { return false; }
+
         bool Pre(const Fortran::parser::MainProgram &) {
             isInMainProgram_ = true;
             return true;

From 88e4f1a4985c7c4f646ee528e69c52faa7049dbb Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Fri, 20 Dec 2024 16:10:12 -0800
Subject: [PATCH 107/135] Add instrumentation for an if statement containing
 return

Change to if-then-endif with timer stop and return in body
---
 CMakeLists.txt                       |  1 +
 src/salt_instrument_flang_plugin.cpp | 96 ++++++++++++++++++++++------
 tests/fortran/if-stmt.f90            | 17 +++++
 3 files changed, 95 insertions(+), 19 deletions(-)
 create mode 100644 tests/fortran/if-stmt.f90

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d6f1d21..35a861c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -612,6 +612,7 @@ set(FORTRAN_TESTS_SOURCES_LIST
   loop_test.f90
   trivial.f90
   return-only.f90
+  if-stmt.f90
 )
 
 # Add a smoke test of the fparse-llvm script
diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 19a4210..b90f8d4 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -68,27 +68,34 @@ using namespace Fortran::frontend;
  */
 class SaltInstrumentAction final : public PluginParseTreeAction {
     enum class SaltInstrumentationPointType {
-        PROGRAM_BEGIN, // Declare profiler, initialize TAU, set node, start timer
-        PROCEDURE_BEGIN, // Declare profiler, start timer
-        PROCEDURE_END, // Stop timer on the line after
-        RETURN_STMT //  Stop timer on the line before
+        PROGRAM_BEGIN,     // Declare profiler, initialize TAU, set node, start timer
+        PROCEDURE_BEGIN,   // Declare profiler, start timer
+        PROCEDURE_END,     // Stop timer on the line after
+        RETURN_STMT,       // Stop timer on the line before
+        IF_RETURN          // Transform if to if-then-endif, stop timer before return
     };
 
     using InstrumentationMap = std::map<SaltInstrumentationPointType, const std::string>;
 
+    // TODO Refactor. The SaltInstrumentationPoint is getting complicated enough that this
+    // should be refactored to subclasses instead of having a bunch of fields that only
+    // sometimes apply.
     struct SaltInstrumentationPoint {
         SaltInstrumentationPoint(const SaltInstrumentationPointType instrumentation_point_type,
                                  const int start_line,
-                                 const std::optional<std::string> &timer_name = std::nullopt)
+                                 const std::optional<std::string> &timer_name = std::nullopt,
+                                 const int conditional_column = 0)
             : instrumentationPointType(instrumentation_point_type),
               startLine(start_line),
-              timerName(timer_name) {
+              timerName(timer_name),
+              conditionalColumn(conditional_column) {
         }
 
         [[nodiscard]] bool instrumentBefore() const {
             return instrumentationPointType == SaltInstrumentationPointType::PROGRAM_BEGIN
                    || instrumentationPointType == SaltInstrumentationPointType::PROCEDURE_BEGIN
-                   || instrumentationPointType == SaltInstrumentationPointType::RETURN_STMT;
+                   || instrumentationPointType == SaltInstrumentationPointType::RETURN_STMT
+                   || instrumentationPointType == SaltInstrumentationPointType::IF_RETURN;
         }
 
         bool operator<(const SaltInstrumentationPoint &other) const {
@@ -111,6 +118,8 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                     return "PROCEDURE_END"s;
                 case SaltInstrumentationPointType::RETURN_STMT:
                     return "RETURN_STMT"s;
+                case SaltInstrumentationPointType::IF_RETURN:
+                    return "IF_RETURN"s;
                 default:
                     CRASH_NO_CASE;
             }
@@ -122,12 +131,16 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             ss << (instrumentBefore() ? "before" : "after") << "\t";
             ss << typeString() << "\t";
             ss << "\"" << timerName.value_or("<no name>") << "\"";
+            if (instrumentationPointType == SaltInstrumentationPointType::IF_RETURN) {
+                ss << "\t" << conditionalColumn;
+            }
             return ss.str();
         }
 
         SaltInstrumentationPointType instrumentationPointType;
         int startLine;
         std::optional<std::string> timerName;
+        int conditionalColumn;
     };
 
 
@@ -143,10 +156,11 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
          * Instrumentation will be added after start_line.
          */
         void addInstrumentationPoint(SaltInstrumentationPointType instrumentation_point_type,
-                                     int start_line,
-                                     const std::optional<std::string> &timer_name = std::nullopt) {
+                                     const int start_line,
+                                     const std::optional<std::string> &timer_name = std::nullopt,
+                                     const int conditional_column = 0) {
             instrumentationPoints_.emplace_back(
-                instrumentation_point_type, start_line, timer_name);
+                instrumentation_point_type, start_line, timer_name, conditional_column);
         }
 
         [[nodiscard]] const auto &getInstrumentationPoints() const {
@@ -155,7 +169,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
 
         [[nodiscard]] std::string dumpInstrumentationPoints() const {
             std::stringstream ss;
-            for (const auto & instPt : getInstrumentationPoints()) {
+            for (const auto &instPt: getInstrumentationPoints()) {
                 ss << instPt.toString() << "\n";
             }
             return ss.str();
@@ -204,9 +218,6 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         // See https://github.com/llvm/llvm-project/blob/main/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
         // for examples of getting source position for a parse tree node
 
-        // Never descend into InterfaceSpecification nodes, they can't contain executable statements.
-        bool Pre(const Fortran::parser::InterfaceSpecification &) { return false; }
-
         bool Pre(const Fortran::parser::MainProgram &) {
             isInMainProgram_ = true;
             return true;
@@ -325,7 +336,8 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                             if (const auto &maybeDo = std::get<std::optional<Fortran::parser::DoConstruct> >(c.t);
                                 maybeDo.has_value()) {
                                 return locationFromSource(
-                                    std::get<Fortran::parser::Statement<Fortran::parser::EndDoStmt> >(maybeDo.value().t).
+                                    std::get<Fortran::parser::Statement<Fortran::parser::EndDoStmt> >(maybeDo.value().t)
+                                    .
                                     source, end);
                             }
                         }
@@ -550,6 +562,29 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                 }, construct.u);
         }
 
+        bool Pre(const Fortran::parser::IfStmt &ifStmt) {
+            if (const auto &ifAction{
+                    std::get<Fortran::parser::UnlabeledStatement<Fortran::parser::ActionStmt> >(ifStmt.t)
+                };
+                std::holds_alternative<Fortran::common::Indirection<
+                    Fortran::parser::ReturnStmt> >(ifAction.statement.u)) {
+                const auto startPos{
+                    locationFromSource(std::get<Fortran::parser::ScalarLogicalExpr>(ifStmt.t).thing.thing.value().source,
+                                       false).value()
+                };
+                const auto endPos{
+                    locationFromSource(std::get<Fortran::parser::ScalarLogicalExpr>(ifStmt.t).thing.thing.value().source,
+                                       true).value()
+                };
+                llvm::outs() << "If-return, conditional: (" << startPos.line << "," << startPos.column << ") - "
+                             << "(" << endPos.line << "," << endPos.column << ")\n";
+                // TODO this assumes that the conditional fits on one list
+                // make more robust, test with more cases
+                addInstrumentationPoint(SaltInstrumentationPointType::IF_RETURN, startPos.line, std::nullopt, endPos.column);
+            }
+            return true;
+        }
+
         // Split handling of ExecutionPart into two phases
         // so that we insert Instrumentation Points in order
         // even if we separately insert them in visitors for
@@ -577,8 +612,8 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                     llvm::errs() << "ERROR: execution part had no end source location!\n";
                 }
 
-                const auto& startLoc{startLocOpt.value()};
-                const auto& endLoc{endLocOpt.value()};
+                const auto &startLoc{startLocOpt.value()};
+                const auto &endLoc{endLocOpt.value()};
 
                 // Insert the timer start in the Pre phase (when we first visit the node)
                 // and the timer stop in the Post phase (when we return after visiting the node's children).
@@ -706,13 +741,33 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         }
 
         auto instIter{instPts.cbegin()};
+        bool shouldOutputLine{true};
         while (std::getline(inputStream, line)) {
             ++lineNum;
+            shouldOutputLine = true;
             while (instIter != instPts.cend() && instIter->startLine == lineNum && instIter->instrumentBefore()) {
-                outputStream << getInstrumentationPointString(*instIter, instMap) << "\n";
+                // Need special case for if-return because it requires a more elaborate transformation
+                // than simply inserting lines
+                // TODO instead of special case have three kinds of instrumentation: before, after, and REPLACE
+                // TODO handle return <value> case
+                // TODO handle multi-line
+                // TODO handle line continuation if too long
+                if (instIter->instrumentationPointType == SaltInstrumentationPointType::IF_RETURN) {
+                   shouldOutputLine = false;
+                   line.erase(instIter->conditionalColumn);
+                   line.insert(instIter->conditionalColumn, " then");
+                   outputStream << line << "\n";
+                   outputStream << getInstrumentationPointString(*instIter, instMap) << "\n";
+                   outputStream << "      return\n";
+                   outputStream << "      endif\n";
+                } else {
+                    outputStream << getInstrumentationPointString(*instIter, instMap) << "\n";
+                }
                 ++instIter;
             }
-            outputStream << line << "\n";
+            if (shouldOutputLine) {
+                outputStream << line << "\n";
+            }
             while (instIter != instPts.cend() && instIter->startLine == lineNum && !instIter->instrumentBefore()) {
                 outputStream << getInstrumentationPointString(*instIter, instMap) << "\n";
                 ++instIter;
@@ -794,6 +849,9 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         // The return statement uses the same text as procedure end,
         // but is inserted before the line instead of after.
         map.emplace(SaltInstrumentationPointType::RETURN_STMT, ss.str());
+        // The if-return statement uses the same text as procedure end,
+        // but requires transformation to if-then-endif
+        map.emplace(SaltInstrumentationPointType::IF_RETURN, ss.str());
 
         return map;
     }
diff --git a/tests/fortran/if-stmt.f90 b/tests/fortran/if-stmt.f90
new file mode 100644
index 0000000..9a134ce
--- /dev/null
+++ b/tests/fortran/if-stmt.f90
@@ -0,0 +1,17 @@
+      subroutine print_message()
+          print *, "i was 1"
+      end subroutine print_message
+
+      subroutine if_with_return(i)
+          integer, intent (in)  :: i
+          if (i == 0) return
+          print *, "i was not zero"
+          if (i == 1) call print_message
+      end subroutine if_with_return
+
+      program main
+          implicit none
+          call if_with_return(0)
+          call if_with_return(1)
+          call if_with_return(2)
+      end program main

From f733e6914df17385dd7622fce68c74308db3cc8d Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Sat, 21 Dec 2024 08:28:35 -0500
Subject: [PATCH 108/135] Add back fix for not descending into Fortran
 interface spec blocks

---
 src/salt_instrument_flang_plugin.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index b90f8d4..83599b1 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -218,6 +218,9 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         // See https://github.com/llvm/llvm-project/blob/main/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
         // for examples of getting source position for a parse tree node
 
+        // Never descend into InterfaceSpecification nodes, they can't contain executable statements.
+        bool Pre(const Fortran::parser::InterfaceSpecification &) { return false; }
+
         bool Pre(const Fortran::parser::MainProgram &) {
             isInMainProgram_ = true;
             return true;

From 84a415830624e4bba9c618e7774feb855532af3b Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Fri, 3 Jan 2025 15:32:23 -0800
Subject: [PATCH 109/135] Read file specific in $SALT_FORTRAN_SELECT_FILE in
 Flang plugin

Read and parse select files. Not yet acting on the directives.
---
 CMakeLists.txt                       |  1 +
 include/selectfile.hpp               |  2 +-
 src/salt_instrument_flang_plugin.cpp | 73 ++++++++++++++++++++--------
 src/selectfile.cpp                   | 11 ++++-
 4 files changed, 64 insertions(+), 23 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 35a861c..06711b7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -284,6 +284,7 @@ if(MLIR_FOUND AND Flang_FOUND)
     list(TRANSFORM SALT_FLANG_PLUGIN_HEADER_FILES PREPEND "${CMAKE_SOURCE_DIR}/include/")
 
     set(SALT_FLANG_PLUGIN_SRCS
+    selectfile.cpp
     salt_instrument_flang_plugin.cpp
     )
     list(TRANSFORM SALT_FLANG_PLUGIN_SRCS PREPEND "${CMAKE_SOURCE_DIR}/src/")
diff --git a/include/selectfile.hpp b/include/selectfile.hpp
index 5b56759..a6b8124 100644
--- a/include/selectfile.hpp
+++ b/include/selectfile.hpp
@@ -72,6 +72,6 @@ extern std::list<std::string> fileincludelist;
 extern std::list<std::string> fileexcludelist;
 
 // void parseInstrumentationCommand(char *line, int lineno);
-void processInstrumentationRequests(const char *fname);
+bool processInstrumentationRequests(const char *fname);
 
 #endif
diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 83599b1..b844c0b 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -44,12 +44,16 @@ limitations under the License.
 #include "flang/Parser/source.h"
 #include "flang/Common/indirection.h"
 
+#include "selectfile.hpp"
+
 // TODO Split declarations into a separate header file.
 // TODO Put debug output behind verbose flag
 
 #define SALT_FORTRAN_CONFIG_FILE_VAR "SALT_FORTRAN_CONFIG_FILE"
 #define SALT_FORTRAN_CONFIG_DEFAULT_PATH "config_files/tau_config.yaml"
 
+#define SALT_FORTRAN_SELECT_FILE_VAR "SALT_FORTRAN_SELECT_FILE"
+
 #define SALT_FORTRAN_KEY "Fortran"
 #define SALT_FORTRAN_PROGRAM_BEGIN_KEY "program_insert"
 #define SALT_FORTRAN_PROCEDURE_BEGIN_KEY "procedure_begin_insert"
@@ -68,11 +72,11 @@ using namespace Fortran::frontend;
  */
 class SaltInstrumentAction final : public PluginParseTreeAction {
     enum class SaltInstrumentationPointType {
-        PROGRAM_BEGIN,     // Declare profiler, initialize TAU, set node, start timer
-        PROCEDURE_BEGIN,   // Declare profiler, start timer
-        PROCEDURE_END,     // Stop timer on the line after
-        RETURN_STMT,       // Stop timer on the line before
-        IF_RETURN          // Transform if to if-then-endif, stop timer before return
+        PROGRAM_BEGIN, // Declare profiler, initialize TAU, set node, start timer
+        PROCEDURE_BEGIN, // Declare profiler, start timer
+        PROCEDURE_END, // Stop timer on the line after
+        RETURN_STMT, // Stop timer on the line before
+        IF_RETURN // Transform if to if-then-endif, stop timer before return
     };
 
     using InstrumentationMap = std::map<SaltInstrumentationPointType, const std::string>;
@@ -219,7 +223,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         // for examples of getting source position for a parse tree node
 
         // Never descend into InterfaceSpecification nodes, they can't contain executable statements.
-        bool Pre(const Fortran::parser::InterfaceSpecification &) { return false; }
+        static bool Pre(const Fortran::parser::InterfaceSpecification &) { return false; }
 
         bool Pre(const Fortran::parser::MainProgram &) {
             isInMainProgram_ = true;
@@ -572,18 +576,21 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                 std::holds_alternative<Fortran::common::Indirection<
                     Fortran::parser::ReturnStmt> >(ifAction.statement.u)) {
                 const auto startPos{
-                    locationFromSource(std::get<Fortran::parser::ScalarLogicalExpr>(ifStmt.t).thing.thing.value().source,
-                                       false).value()
+                    locationFromSource(
+                        std::get<Fortran::parser::ScalarLogicalExpr>(ifStmt.t).thing.thing.value().source,
+                        false).value()
                 };
                 const auto endPos{
-                    locationFromSource(std::get<Fortran::parser::ScalarLogicalExpr>(ifStmt.t).thing.thing.value().source,
-                                       true).value()
+                    locationFromSource(
+                        std::get<Fortran::parser::ScalarLogicalExpr>(ifStmt.t).thing.thing.value().source,
+                        true).value()
                 };
                 llvm::outs() << "If-return, conditional: (" << startPos.line << "," << startPos.column << ") - "
-                             << "(" << endPos.line << "," << endPos.column << ")\n";
+                        << "(" << endPos.line << "," << endPos.column << ")\n";
                 // TODO this assumes that the conditional fits on one list
                 // make more robust, test with more cases
-                addInstrumentationPoint(SaltInstrumentationPointType::IF_RETURN, startPos.line, std::nullopt, endPos.column);
+                addInstrumentationPoint(SaltInstrumentationPointType::IF_RETURN, startPos.line, std::nullopt,
+                                        endPos.column);
             }
             return true;
         }
@@ -744,7 +751,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         }
 
         auto instIter{instPts.cbegin()};
-        bool shouldOutputLine{true};
+        bool shouldOutputLine{};
         while (std::getline(inputStream, line)) {
             ++lineNum;
             shouldOutputLine = true;
@@ -756,13 +763,13 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                 // TODO handle multi-line
                 // TODO handle line continuation if too long
                 if (instIter->instrumentationPointType == SaltInstrumentationPointType::IF_RETURN) {
-                   shouldOutputLine = false;
-                   line.erase(instIter->conditionalColumn);
-                   line.insert(instIter->conditionalColumn, " then");
-                   outputStream << line << "\n";
-                   outputStream << getInstrumentationPointString(*instIter, instMap) << "\n";
-                   outputStream << "      return\n";
-                   outputStream << "      endif\n";
+                    shouldOutputLine = false;
+                    line.erase(instIter->conditionalColumn);
+                    line.insert(instIter->conditionalColumn, " then");
+                    outputStream << line << "\n";
+                    outputStream << getInstrumentationPointString(*instIter, instMap) << "\n";
+                    outputStream << "      return\n";
+                    outputStream << "      endif\n";
                 } else {
                     outputStream << getInstrumentationPointString(*instIter, instMap) << "\n";
                 }
@@ -789,6 +796,15 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         return SALT_FORTRAN_CONFIG_DEFAULT_PATH;
     }
 
+    [[nodiscard]] static std::optional<std::string> getSelectFilePath() {
+        if (const char *val = getenv(SALT_FORTRAN_SELECT_FILE_VAR)) {
+            if (std::string selectFile{val}; !selectFile.empty()) {
+                return selectFile;
+            }
+        }
+        return std::nullopt;
+    }
+
     [[nodiscard]] static ryml::Tree getConfigYamlTree(const std::string &configPath) {
         std::ifstream inputStream{configPath};
         if (!inputStream) {
@@ -882,6 +898,23 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         const ryml::Tree yamlTree = getConfigYamlTree(configPath);
         const InstrumentationMap instMap = getInstrumentationMap(yamlTree);
 
+        if (const auto selectPath{getSelectFilePath()}; selectPath.has_value()) {
+            if (processInstrumentationRequests(selectPath->c_str())) {
+                const auto printStr = [&](const auto &a) { llvm::outs() << a << "\n"; };
+                llvm::outs() << "File include list:\n";
+                std::for_each(fileincludelist.cbegin(), fileincludelist.cend(), printStr);
+                llvm::outs() << "File exclude list:\n";
+                std::for_each(fileexcludelist.cbegin(), fileexcludelist.cend(), printStr);
+                llvm::outs() << "Include list:\n";
+                std::for_each(includelist.cbegin(), includelist.cend(), printStr);
+                llvm::outs() << "Exclude list:\n";
+                std::for_each(excludelist.cbegin(), excludelist.cend(), printStr);
+            } else {
+                llvm::errs() << "ERROR: Unable to read selective instrumentation file at " << selectPath << "\n";
+                std::exit(-4);
+            }
+        }
+
         // Get the extension of the input file
         // For input file 'filename.ext' we will output to 'filename.inst.Ext'
         // Since we are adding preprocessor directives in the emitted code,
diff --git a/src/selectfile.cpp b/src/selectfile.cpp
index 49a4e8b..aa790dc 100644
--- a/src/selectfile.cpp
+++ b/src/selectfile.cpp
@@ -9,6 +9,10 @@
 #include "selectfile.hpp"
 #include "dprint.hpp"
 
+// TODO support all selective instrumentation types
+// TODO refactor this to class instead of using global variables
+// TODO modernize C++
+
 std::list<std::string> excludelist;
 std::list<std::string> includelist;
 std::list<std::string> fileincludelist;
@@ -684,7 +688,7 @@ void parseError(const char *message, char *line, int lineno, int column)
 
 #define SALT_UNUSED(expr) do { (void)(expr); } while (0)
 
-void processInstrumentationRequests(const char *fname)
+bool processInstrumentationRequests(const char *fname)
 {
 
   std::ifstream input(fname);
@@ -695,7 +699,8 @@ void processInstrumentationRequests(const char *fname)
 
 
   if (!input) {
-    std::cerr << "ERROR: Cannot open file: " << fname << std::endl;
+    std::cerr << "ERROR: Cannot open selective instrumentation file: " << fname << std::endl;
+    return false;
   }
 
 
@@ -883,4 +888,6 @@ void processInstrumentationRequests(const char *fname)
 
   DPRINT0("fileexcludelist\n");
   dump_list(fileexcludelist);
+
+  return true;
 }

From afd4130f9b1d1233ee2c3eb398d0851e7724e9df Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Thu, 2 Jan 2025 16:42:09 -0500
Subject: [PATCH 110/135] Create structure in the build dir & for installation

---
 CMakeLists.txt     | 35 +++++++++++++++++++++++++----------
 src/fparse-llvm.in | 15 +++------------
 2 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 35a861c..1109b98 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -66,7 +66,6 @@ string(APPEND SALT_GREETING "\n")
 string(PREPEND SALT_GREETING "\n")
 message(STATUS "${SALT_GREETING}")
 
-
 #---------------------------------------------------------------------------------
 # Get & print diagnostics about flags passed to CMake and build/source directories
 #---------------------------------------------------------------------------------
@@ -89,6 +88,8 @@ project(SALT-FM
   HOMEPAGE_URL "https://github.com/ParaToolsInc/salt"
   LANGUAGES CXX C)
 
+include(GNUInstallDirs)
+
 #----------
 # Find LLVM
 #----------
@@ -237,8 +238,19 @@ target_link_options(cparse-llvm PUBLIC -Wl,--as-needed -Wl,--no-allow-shlib-unde
 # Turn on debug output if a debug build is being built
 target_compile_definitions(cparse-llvm PUBLIC $<$<CONFIG:Debug>:DEBUG_NO_WAY>)
 # Install the target
-install(TARGETS cparse-llvm DESTINATION bin)
-
+install(TARGETS cparse-llvm DESTINATION ${CMAKE_INSTALL_BINDIR})
+set_target_properties(cparse-llvm PROPERTIES 
+  RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}")
+
+#------------------------------------------------------
+# Handle config files in build directory & installation
+#------------------------------------------------------
+# Copy ${CMAKE_SOURCE_DIR}/config_files to ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_DATADIR}/saltfm/config_files
+# and install them to ${CMAKE_INSTALL_DATADIR}/saltfm/config_files
+file(COPY ${CMAKE_SOURCE_DIR}/config_files
+  DESTINATION ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_DATADIR}/saltfm)
+install(DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_DATADIR}/saltfm
+  DESTINATION ${CMAKE_INSTALL_DATADIR})
 
 #---------------------
 # Flang Frontend library
@@ -294,12 +306,15 @@ if(MLIR_FOUND AND Flang_FOUND)
     target_compile_features(salt-flang-plugin PUBLIC cxx_std_17)
     target_link_libraries(salt-flang-plugin PUBLIC SALT_FLANG_FRONTEND)
     target_link_options(salt-flang-plugin PUBLIC -Wl,--as-needed -Wl,-undefined -Wl,dynamic_lookup)
+    set_target_properties(salt-flang-plugin PROPERTIES
+      ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}"
+      LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}")
 
-    install(TARGETS salt-flang-plugin DESTINATION lib)
+    install(TARGETS salt-flang-plugin DESTINATION ${CMAKE_INSTALL_LIBDIR})
 
-    configure_file(${CMAKE_SOURCE_DIR}/src/fparse-llvm.in ${CMAKE_BINARY_DIR}/fparse-llvm @ONLY)
-    install(PROGRAMS ${CMAKE_BINARY_DIR}/fparse-llvm
-      TYPE BIN)
+    configure_file(${CMAKE_SOURCE_DIR}/src/fparse-llvm.in ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/fparse-llvm @ONLY)
+    install(PROGRAMS ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/fparse-llvm
+      TYPE BIN) # TYPE BIN installs into CMAKE_INSTALL_BINDIR
 
 else()
     message(STATUS "Flang not found -- skipping Flang frontend plugin")
@@ -617,7 +632,7 @@ set(FORTRAN_TESTS_SOURCES_LIST
 
 # Add a smoke test of the fparse-llvm script
 add_test(NAME fparse_llvm_smoke_test
-  COMMAND ${CMAKE_BINARY_DIR}/fparse-llvm -h)
+  COMMAND ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/fparse-llvm -h)
 set_tests_properties(fparse_llvm_smoke_test
   PROPERTIES
   LABELS smoke
@@ -626,11 +641,11 @@ set_tests_properties(fparse_llvm_smoke_test
 
 foreach(test_source IN LISTS FORTRAN_TESTS_SOURCES_LIST)
   add_test(NAME instrument_${test_source}
-    COMMAND ./fparse-llvm ${CMAKE_SOURCE_DIR}/tests/fortran/${test_source}
+    COMMAND ./${CMAKE_INSTALL_BINDIR}/fparse-llvm ${CMAKE_SOURCE_DIR}/tests/fortran/${test_source}
     )
   set_tests_properties(instrument_${test_source}
     PROPERTIES
-    REQUIRED_FILES "${CMAKE_BINARY_DIR}/fparse-llvm"
+    REQUIRED_FILES "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/fparse-llvm"
     ENVIRONMENT "SALT_FORTRAN_CONFIG_FILE=${CMAKE_SOURCE_DIR}/config_files/tau_config.yaml"
     PASS_REGULAR_EXPRESSION "SALT Instrumentor Plugin finished"
   )
diff --git a/src/fparse-llvm.in b/src/fparse-llvm.in
index 9712a7c..4b2dbcd 100755
--- a/src/fparse-llvm.in
+++ b/src/fparse-llvm.in
@@ -23,22 +23,13 @@ set -o pipefail
 
 readonly _SALTFM_PLUGIN_SO=libsalt-flang-plugin.so
 readonly _VERSION=@SALT_VERSION_MAJOR@.@SALT_VERSION_MINOR@
-readonly _FORTRAN_CONFIG_FILE_BUILD=@CMAKE_SOURCE_DIR@/config_files/tau_config.yaml
-readonly _FORTRAN_CONFIG_FILE_INSTALL=@SALT_CONFIGFILES_INSTALL_DIR@/tau_config.yaml
-readonly _INSTALL_DEST=@CMAKE_INSTALL_PREFIX@
-readonly _SALT_PLUGIN_SO_BUILD=@CMAKE_BINARY_DIR@/${_SALTFM_PLUGIN_SO}
-readonly _SALT_PLUGIN_SO_INSTALL=@SALT_PLUGIN_INSTALL_DIR@/${_SALTFM_PLUGIN_SO}
 
 # get the absolute path of this script
 readonly _SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 # Check if the script is being run from the install directory
-if [[ -f "${_SCRIPT_DIR}/$0" && "${_SCRIPT_DIR}" == ${_INSTALL_DEST}* ]]; then
-    FORTRAN_CONFIG_FILE="${_FORTRAN_CONFIG_FILE_INSTALL}"
-    SALT_PLUGIN_SO="${_SALT_PLUGIN_SO_INSTALL}"
-else
-    FORTRAN_CONFIG_FILE="${_FORTRAN_CONFIG_FILE_BUILD}"
-    SALT_PLUGIN_SO="${_SALT_PLUGIN_SO_BUILD}"
-fi
+
+SALT_PLUGIN_SO="${_SCRIPT_DIR}/../@CMAKE_INSTALL_LIBDIR@/${_SALTFM_PLUGIN_SO}"
+FORTRAN_CONFIG_FILE="${_SCRIPT_DIR}/../@CMAKE_INSTALL_DATADIR@/saltfm/config_files/tau_config.yaml"
 
 # Add a help/usage message function
 function usage {

From 2d79c7afdd58e33a2c4ed4891c0f7f03c44e5ce5 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Mon, 6 Jan 2025 16:34:57 -0800
Subject: [PATCH 111/135] Support FILE_INCLUDE_LIST and FILE_EXCLUDE_LIST

Still need to add routine-level include/exclude, and tests for selective
instrumentation.
---
 src/salt_instrument_flang_plugin.cpp | 92 ++++++++++++++++++++++++----
 1 file changed, 81 insertions(+), 11 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index b844c0b..6ddac51 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -28,6 +28,7 @@ limitations under the License.
 #include <tuple>
 #include <regex>
 #include <algorithm>
+#include <filesystem>
 
 
 #define RYML_SINGLE_HDR_DEFINE_NOW
@@ -149,8 +150,8 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
 
 
     struct SaltInstrumentParseTreeVisitor {
-        explicit SaltInstrumentParseTreeVisitor(Fortran::parser::Parsing *parsing)
-            : mainProgramLine_(0), subProgramLine_(0), parsing(parsing) {
+        explicit SaltInstrumentParseTreeVisitor(Fortran::parser::Parsing *parsing, const bool skipInstrument = false)
+            : mainProgramLine_(0), subProgramLine_(0), skipInstrument_(skipInstrument), parsing(parsing) {
         }
 
         /**
@@ -163,8 +164,10 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                                      const int start_line,
                                      const std::optional<std::string> &timer_name = std::nullopt,
                                      const int conditional_column = 0) {
-            instrumentationPoints_.emplace_back(
-                instrumentation_point_type, start_line, timer_name, conditional_column);
+            if (!skipInstrument_) {
+                instrumentationPoints_.emplace_back(
+                    instrumentation_point_type, start_line, timer_name, conditional_column);
+            }
         }
 
         [[nodiscard]] const auto &getInstrumentationPoints() const {
@@ -695,6 +698,8 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         std::string subprogramName_;
         int subProgramLine_;
 
+        bool skipInstrument_;
+
         std::vector<SaltInstrumentationPoint> instrumentationPoints_;
 
         // Pass in the parser object from the Action to the Visitor
@@ -875,6 +880,59 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         return map;
     }
 
+    [[nodiscard]] static std::string convertToRegexForm(const std::string &globString) {
+        // Convert lines in shell glob format (where "*" means zero or more characters)
+        // to regex version (where ".*" means zero or more characters).
+        static std::regex starRegex{R"(\*)"};
+        return std::regex_replace(globString, starRegex, ".*");
+    }
+
+    [[nodiscard]] static bool shouldInstrumentFile(const std::filesystem::path &filePath) {
+        // Check if this file should be instrumented.
+        // It should if:
+        //   - No file include or file exclude list is specified
+        //   - An exclude list is present and the file is not in it
+        //   - An include list is present and the file is in it
+
+        if (fileincludelist.empty() && fileexcludelist.empty()) {
+            return true;
+        }
+
+        bool fileInExcludeList{false};
+        const auto filePart{filePath.filename()};
+        if (!fileexcludelist.empty()) {
+            for (const auto &excludeEntry: fileexcludelist) {
+                if (const std::regex excludeRegex{convertToRegexForm(excludeEntry)}; std::regex_search(
+                    filePart.string(), excludeRegex)) {
+                    fileInExcludeList = true;
+                    break;
+                }
+            }
+        }
+        if (fileInExcludeList) {
+            return false;
+        }
+        bool fileInIncludeList{false};
+        if (!fileincludelist.empty()) {
+            for (const auto &includeEntry: fileincludelist) {
+                if (const std::regex includeRegex{convertToRegexForm(includeEntry)}; std::regex_search(
+                    filePart.string(), includeRegex)) {
+                    fileInIncludeList = true;
+                    break;
+                }
+            }
+        }
+
+        if (!fileincludelist.empty()) {
+            if (fileInIncludeList) {
+                return true;
+            }
+            return false;
+        }
+
+        return true;
+    }
+
     /**
      * This is the entry point for the plugin.
      */
@@ -886,12 +944,14 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         Fortran::parser::Parsing &parsing = getParsing();
 
         // Get the path to the input file
-        const auto inputFilePath = getInputFilePath(parsing);
-        if (!inputFilePath) {
+        const auto inputFilePathStr = getInputFilePath(parsing);
+        if (!inputFilePathStr) {
             llvm::errs() << "ERROR: Unable to find input file name!\n";
             std::exit(-1);
         }
-        llvm::outs() << "Have input file: " << *inputFilePath << "\n";
+        llvm::outs() << "Have input file: " << *inputFilePathStr << "\n";
+
+        const std::filesystem::path inputFilePath{inputFilePathStr.value()};
 
         // Read and parse the yaml configuration file
         const std::string configPath{getConfigPath()};
@@ -915,31 +975,41 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             }
         }
 
+
         // Get the extension of the input file
         // For input file 'filename.ext' we will output to 'filename.inst.Ext'
         // Since we are adding preprocessor directives in the emitted code,
         // the file extension should be capitalized.
         std::string inputFileExtension;
-        if (auto const extPos = inputFilePath->find_last_of('.'); extPos == std::string::npos) {
+        if (auto const extPos = inputFilePath.string().find_last_of('.'); extPos == std::string::npos) {
             inputFileExtension = "F90"; // Default if for some reason file has no extension
         } else {
-            inputFileExtension = inputFilePath->substr(extPos + 1); // Part of string past last '.'
+            inputFileExtension = inputFilePath.string().substr(extPos + 1); // Part of string past last '.'
             // Capitalize the first character of inputFileExtension
             if (!inputFileExtension.empty()) {
                 inputFileExtension[0] = static_cast<char>(std::toupper(inputFileExtension[0]));
             }
         }
 
+
         // Open an output file for writing the instrumented code
         const std::string outputFileExtension = "inst."s + inputFileExtension;
         const auto outputFileStream = createOutputFile(outputFileExtension);
 
+        // If visitor has skipInstrument set, no instrumentation points are added
+        // so the file is output into the .inst file unchanged.
+        bool skipInstrument{false};
+        if (!shouldInstrumentFile(inputFilePath)) {
+            llvm::outs() << "Skipping instrumentation of " << inputFilePath
+                    << " due to selective instrumentation.\n";
+            skipInstrument = true;
+        }
         // Walk the parse tree -- marks nodes for instrumentation
-        SaltInstrumentParseTreeVisitor visitor{&parsing};
+        SaltInstrumentParseTreeVisitor visitor{&parsing, skipInstrument};
         Walk(parsing.parseTree(), visitor);
 
         // Use the instrumentation points stored in the Visitor to write the instrumented file.
-        instrumentFile(*inputFilePath, *outputFileStream, visitor, instMap);
+        instrumentFile(inputFilePath, *outputFileStream, visitor, instMap);
 
         outputFileStream->flush();
 

From 2671585adfecff41c01b1f2ffb59c9de3507b724 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Tue, 7 Jan 2025 15:49:32 -0800
Subject: [PATCH 112/135] Support routine-level include/exclude in selective
 instrumentation.

---
 src/salt_instrument_flang_plugin.cpp | 106 +++++++++++++++++++++------
 1 file changed, 82 insertions(+), 24 deletions(-)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 6ddac51..13e476f 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -1,5 +1,5 @@
 /*
-Copyright (C) 2024, ParaTools, Inc.
+Copyright (C) 2024-2025, ParaTools, Inc.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -151,7 +151,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
 
     struct SaltInstrumentParseTreeVisitor {
         explicit SaltInstrumentParseTreeVisitor(Fortran::parser::Parsing *parsing, const bool skipInstrument = false)
-            : mainProgramLine_(0), subProgramLine_(0), skipInstrument_(skipInstrument), parsing(parsing) {
+            : mainProgramLine_(0), subProgramLine_(0), skipInstrumentFile_(skipInstrument), parsing(parsing) {
         }
 
         /**
@@ -164,7 +164,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                                      const int start_line,
                                      const std::optional<std::string> &timer_name = std::nullopt,
                                      const int conditional_column = 0) {
-            if (!skipInstrument_) {
+            if (!skipInstrumentFile_ && !skipInstrumentSubprogram_) {
                 instrumentationPoints_.emplace_back(
                     instrumentation_point_type, start_line, timer_name, conditional_column);
             }
@@ -200,6 +200,55 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             return std::nullopt;
         }
 
+        [[nodiscard]] static std::string convertWildcardToRegexForm(const std::string &wildString) {
+            // Escape all regex special characters
+            static const std::regex metacharacters(R"([\.\^\$\+\(\)\[\]\{\}\|\?\*])");
+            const std::string escapedString{std::regex_replace(wildString, metacharacters, R"(\$&)")};
+            // Convert lines in TAU select file format (where "#" means zero or more characters)
+            // to regex version (where ".*" means zero or more characters).
+            // "#" is used for wildcard in routine names in TAU selective instrumentation files
+            // because "*" can be used in C/C++ function identifiers as part of pointer types.
+            static const std::regex hashRegex{R"(#)"};
+            return std::regex_replace(escapedString, hashRegex, ".*");
+        }
+
+        [[nodiscard]] static bool shouldInstrumentSubprogram(const std::string &subprogramName) {
+            // Check if this subprogram should be instrumented.
+            // It should if:
+            //   - No include or exclude list is specified
+            //   - An exclude list is present and the subprogram is not in it
+            //   - An include list is present and the subprogram is in it (and not on the exclude list)
+
+            if (includelist.empty() && excludelist.empty()) {
+                return true;
+            }
+
+            for (const auto &excludeEntry: excludelist) {
+                if (const std::regex excludeRegex{convertWildcardToRegexForm(excludeEntry)}; std::regex_search(
+                    subprogramName, excludeRegex)) {
+                    return false;
+                }
+            }
+
+            bool subprogramInIncludeList{false};
+            for (const auto &includeEntry: includelist) {
+                if (const std::regex includeRegex{convertWildcardToRegexForm(includeEntry)}; std::regex_search(
+                    subprogramName, includeRegex)) {
+                    subprogramInIncludeList = true;
+                    break;
+                }
+            }
+
+            if (!includelist.empty()) {
+                if (subprogramInIncludeList) {
+                    return true;
+                }
+                return false;
+            }
+
+            return true;
+        }
+
         // Default empty visit functions for otherwise unhandled types.
         template<typename A>
         static bool Pre(const A &) { return true; }
@@ -249,11 +298,17 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             subprogramName_ = name.ToString();
             subProgramLine_ = parsing->allCooked().GetSourcePositionRange(name.source)->first.line;
             llvm::outs() << "Enter Subroutine: " << subprogramName_ << "\n";
+            if (!shouldInstrumentSubprogram(subprogramName_)) {
+                llvm::outs() << "Skipping instrumentation of " << subprogramName_ <<
+                        " due to selective instrumentation\n";
+                skipInstrumentSubprogram_ = true;
+            }
             return true;
         }
 
         void Post(const Fortran::parser::SubroutineSubprogram &) {
             llvm::outs() << "Exit Subroutine: " << subprogramName_ << "\n";
+            skipInstrumentSubprogram_ = false;
             subprogramName_.clear();
         }
 
@@ -262,11 +317,17 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             subprogramName_ = name.ToString();
             subProgramLine_ = parsing->allCooked().GetSourcePositionRange(name.source)->first.line;
             llvm::outs() << "Enter Function: " << subprogramName_ << "\n";
+            if (!shouldInstrumentSubprogram(subprogramName_)) {
+                llvm::outs() << "Skipping instrumentation of " << subprogramName_ <<
+                        " due to selective instrumentation\n";
+                skipInstrumentSubprogram_ = true;
+            }
             return true;
         }
 
         void Post(const Fortran::parser::FunctionSubprogram &) {
             llvm::outs() << "Exit Function: " << subprogramName_ << "\n";
+            skipInstrumentSubprogram_ = false;
             subprogramName_.clear();
             subProgramLine_ = 0;
         }
@@ -698,7 +759,8 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         std::string subprogramName_;
         int subProgramLine_;
 
-        bool skipInstrument_;
+        bool skipInstrumentFile_;
+        bool skipInstrumentSubprogram_{false};
 
         std::vector<SaltInstrumentationPoint> instrumentationPoints_;
 
@@ -880,11 +942,15 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
         return map;
     }
 
-    [[nodiscard]] static std::string convertToRegexForm(const std::string &globString) {
+    [[nodiscard]] static std::string convertGlobToRegexForm(const std::string &globString) {
         // Convert lines in shell glob format (where "*" means zero or more characters)
         // to regex version (where ".*" means zero or more characters).
+        // This is used for files in TAU selective instrumentation files.
         static std::regex starRegex{R"(\*)"};
-        return std::regex_replace(globString, starRegex, ".*");
+        const std::string starString{std::regex_replace(globString, starRegex, ".*")};
+        // Escape all special regex characters except for "*" which was previously handled.
+        static const std::regex metacharacters(R"([\.\^\$\+\(\)\[\]\{\}\|\?])");
+        return std::regex_replace(starString, metacharacters, R"(\$&)");
     }
 
     [[nodiscard]] static bool shouldInstrumentFile(const std::filesystem::path &filePath) {
@@ -898,28 +964,20 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             return true;
         }
 
-        bool fileInExcludeList{false};
         const auto filePart{filePath.filename()};
-        if (!fileexcludelist.empty()) {
-            for (const auto &excludeEntry: fileexcludelist) {
-                if (const std::regex excludeRegex{convertToRegexForm(excludeEntry)}; std::regex_search(
-                    filePart.string(), excludeRegex)) {
-                    fileInExcludeList = true;
-                    break;
-                }
+        for (const auto &excludeEntry: fileexcludelist) {
+            if (const std::regex excludeRegex{convertGlobToRegexForm(excludeEntry)}; std::regex_search(
+                filePart.string(), excludeRegex)) {
+                return false;
             }
         }
-        if (fileInExcludeList) {
-            return false;
-        }
+
         bool fileInIncludeList{false};
-        if (!fileincludelist.empty()) {
-            for (const auto &includeEntry: fileincludelist) {
-                if (const std::regex includeRegex{convertToRegexForm(includeEntry)}; std::regex_search(
-                    filePart.string(), includeRegex)) {
-                    fileInIncludeList = true;
-                    break;
-                }
+        for (const auto &includeEntry: fileincludelist) {
+            if (const std::regex includeRegex{convertGlobToRegexForm(includeEntry)}; std::regex_search(
+                filePart.string(), includeRegex)) {
+                fileInIncludeList = true;
+                break;
             }
         }
 

From 1d04ceb8c0fa2ab8fd5d708962b39d8a8ecac604 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Tue, 14 Jan 2025 15:35:16 -0800
Subject: [PATCH 113/135] Split source location functions into separate file.

The source location routines could be used for purposes other than
instrumentation, so this commit moves them to a separate file and adds a
header so they could be used other than in the instrumentor. Pass the
parser as an argument so that they don't need to be inside the visitor
class.
---
 CMakeLists.txt                       |   2 +
 include/flang_source_location.hpp    |  90 +++++++
 src/flang_source_location.cpp        | 336 +++++++++++++++++++++++++
 src/salt_instrument_flang_plugin.cpp | 353 +--------------------------
 4 files changed, 433 insertions(+), 348 deletions(-)
 create mode 100644 include/flang_source_location.hpp
 create mode 100644 src/flang_source_location.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a89ce1f..3bf91d4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -292,11 +292,13 @@ if(MLIR_FOUND AND Flang_FOUND)
     set(SALT_FLANG_PLUGIN_HEADER_FILES
     selectfile.hpp
     tau_datatypes.h
+    flang_source_location.hpp
     )
     list(TRANSFORM SALT_FLANG_PLUGIN_HEADER_FILES PREPEND "${CMAKE_SOURCE_DIR}/include/")
 
     set(SALT_FLANG_PLUGIN_SRCS
     selectfile.cpp
+    flang_source_location.cpp
     salt_instrument_flang_plugin.cpp
     )
     list(TRANSFORM SALT_FLANG_PLUGIN_SRCS PREPEND "${CMAKE_SOURCE_DIR}/src/")
diff --git a/include/flang_source_location.hpp b/include/flang_source_location.hpp
new file mode 100644
index 0000000..728ad62
--- /dev/null
+++ b/include/flang_source_location.hpp
@@ -0,0 +1,90 @@
+/* Copyright (C) 2025, ParaTools, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef FLANG_SOURCE_LOCATION_H
+#define FLANG_SOURCE_LOCATION_H
+
+
+#include <optional>
+
+#include "flang/Parser/char-block.h"
+#include "flang/Parser/parsing.h"
+#include "flang/Parser/source.h"
+
+namespace salt::fortran {
+    /**
+     * From a CharBlock object (generally held in the `source` field of a parse tree node,
+     * get the source position (file, line, column).
+     * If `end` is set, returns the ending position of the block.
+     * If `end` is not set (and by default), returns the starting position of the block.
+     */
+    [[nodiscard]] std::optional<Fortran::parser::SourcePosition> locationFromSource(
+        const Fortran::parser::Parsing *parsing,
+        const Fortran::parser::CharBlock &charBlock,
+        bool end);
+
+    /**
+    * Gets the location (if present) associated with an OpenMPDeclarativeConstruct.
+    * If `end` is set, returns the ending position of the block.
+    * If `end` is not set (and by default), returns the starting position of the block.
+    */
+    [[nodiscard]] std::optional<Fortran::parser::SourcePosition> getLocation(
+        const Fortran::parser::Parsing *parsing,
+        const Fortran::parser::OpenMPDeclarativeConstruct &construct,
+        bool end);
+
+    /**
+    * Gets the location (if present) associated with an OpenMPConstruct.
+    * If `end` is set, returns the ending position of the block.
+    * If `end` is not set (and by default), returns the starting position of the block.
+    */
+    [[nodiscard]] std::optional<Fortran::parser::SourcePosition> getLocation(
+        const Fortran::parser::Parsing *parsing,
+        const Fortran::parser::OpenMPConstruct &construct,
+        bool end);
+
+    /**
+    * Gets the location (if present) associated with an OpenACCConstruct.
+    * If `end` is set, returns the ending position of the block.
+    * If `end` is not set (and by default), returns the starting position of the block.
+    */
+    [[nodiscard]] std::optional<Fortran::parser::SourcePosition> getLocation(
+        const Fortran::parser::Parsing *parsing,
+        const Fortran::parser::OpenACCConstruct &construct,
+        bool end);
+
+    /**
+    * Gets the location (if present) associated with an ExecutableConstruct.
+    * If `end` is set, returns the ending position of the block.
+    * If `end` is not set (and by default), returns the starting position of the block.
+    */
+    [[nodiscard]] std::optional<Fortran::parser::SourcePosition> getLocation(
+        const Fortran::parser::Parsing *parsing,
+        const Fortran::parser::ExecutableConstruct &construct,
+        bool end);
+
+    /**
+    * Gets the location (if present) associated with an ExecutionPartConstruct.
+    * If `end` is set, returns the ending position of the block.
+    * If `end` is not set (and by default), returns the starting position of the block.
+    */
+    [[nodiscard]] std::optional<Fortran::parser::SourcePosition> getLocation(
+        const Fortran::parser::Parsing *parsing,
+        const Fortran::parser::ExecutionPartConstruct &construct,
+        bool end);
+
+}
+
+#endif //FLANG_SOURCE_LOCATION_H
diff --git a/src/flang_source_location.cpp b/src/flang_source_location.cpp
new file mode 100644
index 0000000..15194ee
--- /dev/null
+++ b/src/flang_source_location.cpp
@@ -0,0 +1,336 @@
+/* Copyright (C) 2025, ParaTools, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "flang/Parser/char-block.h"
+#include "flang/Parser/parsing.h"
+#include "flang/Parser/source.h"
+
+#include "flang_source_location.hpp"
+
+[[nodiscard]] std::optional<Fortran::parser::SourcePosition> salt::fortran::locationFromSource(
+    const Fortran::parser::Parsing *parsing, const Fortran::parser::CharBlock &charBlock, const bool end) {
+    if (const auto &sourceRange{parsing->allCooked().GetSourcePositionRange(charBlock)}; sourceRange.
+        has_value()) {
+        if (end) {
+            return sourceRange->second;
+        }
+        return sourceRange->first;
+    }
+    return std::nullopt;
+}
+
+[[nodiscard]] std::optional<Fortran::parser::SourcePosition> salt::fortran::getLocation(
+    const Fortran::parser::Parsing *parsing,
+    const Fortran::parser::OpenMPDeclarativeConstruct &construct,
+    const bool end) {
+    // This function is based on the equivalent function in
+    // flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
+    return std::visit(
+        [&](const auto &o) -> std::optional<Fortran::parser::SourcePosition> {
+            return locationFromSource(parsing, o.source, end);
+        },
+        construct.u);
+}
+
+[[nodiscard]] std::optional<Fortran::parser::SourcePosition> salt::fortran::getLocation(
+    const Fortran::parser::Parsing *parsing,
+    const Fortran::parser::OpenMPConstruct &construct,
+    const bool end) {
+    // This function is based on the equivalent function in
+    // flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
+    return std::visit(
+        Fortran::common::visitors{
+            [&](const Fortran::parser::OpenMPStandaloneConstruct &c) -> std::optional<
+        Fortran::parser::SourcePosition> {
+                return locationFromSource(parsing, c.source, end);
+            },
+            // OpenMPSectionsConstruct, OpenMPLoopConstruct,
+            // OpenMPBlockConstruct, OpenMPCriticalConstruct Get the source from
+            // the directive field.
+            [&](const auto &c) -> std::optional<Fortran::parser::SourcePosition> {
+                const Fortran::parser::CharBlock &source{std::get<0>(c.t).source};
+                return locationFromSource(parsing, source, end);
+            },
+            [&](const Fortran::parser::OpenMPAtomicConstruct &c) -> std::optional<
+        Fortran::parser::SourcePosition> {
+                return std::visit(
+                    [&](const auto &o) -> std::optional<Fortran::parser::SourcePosition> {
+                        const Fortran::parser::CharBlock &source{
+                            std::get<Fortran::parser::Verbatim>(o.t).source
+                        };
+                        return locationFromSource(parsing, source, end);
+                    },
+                    c.u);
+            },
+            [&](const Fortran::parser::OpenMPSectionConstruct &c) -> std::optional<
+        Fortran::parser::SourcePosition> {
+                const Fortran::parser::CharBlock &source{c.source};
+                return locationFromSource(parsing, source, end);
+            },
+        },
+        construct.u);
+}
+
+[[nodiscard]] std::optional<Fortran::parser::SourcePosition> salt::fortran::getLocation(
+    const Fortran::parser::Parsing *parsing,
+    const Fortran::parser::OpenACCConstruct &construct, const bool end) {
+    // This function is based on the equivalent function in
+    // flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
+    return std::visit(
+        Fortran::common::visitors{
+            [&](const auto &c) -> std::optional<Fortran::parser::SourcePosition> {
+                return locationFromSource(parsing, c.source, end);
+            },
+            [&](const Fortran::parser::OpenACCBlockConstruct &c) -> std::optional<
+        Fortran::parser::SourcePosition> {
+                if (end) {
+                    return locationFromSource(parsing, std::get<Fortran::parser::AccEndBlockDirective>(c.t).source,
+                                              end);
+                }
+                return locationFromSource(parsing, std::get<Fortran::parser::AccBeginBlockDirective>(c.t).source, end);
+            },
+            [&](const Fortran::parser::OpenACCLoopConstruct &c) -> std::optional<
+        Fortran::parser::SourcePosition> {
+                if (end) {
+                    if (const auto &maybeDo = std::get<std::optional<Fortran::parser::DoConstruct> >(c.t);
+                        maybeDo.has_value()) {
+                        return locationFromSource(parsing,
+                            std::get<Fortran::parser::Statement<Fortran::parser::EndDoStmt> >(maybeDo.value().t)
+                            .
+                            source, end);
+                    }
+                }
+                return locationFromSource(parsing, std::get<Fortran::parser::AccBeginLoopDirective>(c.t).source, end);
+            },
+        }, construct.u);
+}
+
+[[nodiscard]] std::optional<Fortran::parser::SourcePosition> salt::fortran::getLocation(
+    const Fortran::parser::Parsing *parsing,
+    const Fortran::parser::ExecutableConstruct &construct,
+    const bool end) {
+    /* Possibilities for ExecutableConstruct:
+         Statement<ActionStmt>
+         common::Indirection<AssociateConstruct>
+         common::Indirection<BlockConstruct>
+         common::Indirection<CaseConstruct>
+         common::Indirection<ChangeTeamConstruct>
+         common::Indirection<CriticalConstruct>
+         Statement<common::Indirection<LabelDoStmt>>
+         Statement<common::Indirection<EndDoStmt>>
+         common::Indirection<DoConstruct>
+         common::Indirection<IfConstruct>
+         common::Indirection<SelectRankConstruct>
+         common::Indirection<SelectTypeConstruct>
+         common::Indirection<WhereConstruct>
+         common::Indirection<ForallConstruct>
+         common::Indirection<CompilerDirective>
+         common::Indirection<OpenACCConstruct>
+         common::Indirection<AccEndCombinedDirective>
+         common::Indirection<OpenMPConstruct>
+         common::Indirection<OmpEndLoopDirective>
+         common::Indirection<CUFKernelDoConstruct>
+    */
+    return std::visit(
+        Fortran::common::visitors{
+            [&](const auto &c) -> std::optional<Fortran::parser::SourcePosition> {
+                return locationFromSource(parsing, c.source, end);
+            },
+            [&](const Fortran::common::Indirection<Fortran::parser::CUFKernelDoConstruct> &c) ->
+        std::optional<Fortran::parser::SourcePosition> {
+                if (end) {
+                    const auto &optionalConstruct = std::get<std::optional<Fortran::parser::DoConstruct> >(
+                        c.value().t);
+                    if (optionalConstruct.has_value()) {
+                        return locationFromSource(parsing,
+                            std::get<Fortran::parser::Statement<Fortran::parser::EndDoStmt> >(
+                                optionalConstruct.value().t).source, end);
+                    }
+                }
+                return locationFromSource(parsing,
+                    std::get<Fortran::parser::CUFKernelDoConstruct::Directive>(c.value().t).source, end);
+            },
+            [&](const Fortran::common::Indirection<Fortran::parser::OmpEndLoopDirective> &c) ->
+        std::optional<Fortran::parser::SourcePosition> {
+                return locationFromSource(parsing, c.value().source, end);
+            },
+            [&](const Fortran::common::Indirection<Fortran::parser::OpenMPConstruct> &c) ->
+        std::optional<Fortran::parser::SourcePosition> {
+                return getLocation(parsing, c.value(), end);
+            },
+            [&](const Fortran::common::Indirection<Fortran::parser::AccEndCombinedDirective> &c) ->
+        std::optional<Fortran::parser::SourcePosition> {
+                return locationFromSource(parsing, c.value().source, end);
+            },
+            [&](const Fortran::common::Indirection<Fortran::parser::OpenACCConstruct> &c) ->
+        std::optional<Fortran::parser::SourcePosition> {
+                return getLocation(parsing, c.value(), end);
+            },
+            [&](const Fortran::common::Indirection<Fortran::parser::CompilerDirective> &c)->
+        std::optional<Fortran::parser::SourcePosition> {
+                return locationFromSource(parsing, c.value().source, end);
+            },
+            [&](const Fortran::common::Indirection<Fortran::parser::ForallConstruct> &c) ->
+        std::optional<Fortran::parser::SourcePosition> {
+                if (end) {
+                    return locationFromSource(parsing,
+                        std::get<Fortran::parser::Statement<Fortran::parser::EndForallStmt> >(c.value().t).
+                        source, end);
+                }
+                return locationFromSource(parsing,
+                    std::get<Fortran::parser::Statement<Fortran::parser::ForallConstructStmt> >(c.value().t).
+                    source, end);
+            },
+            [&](const Fortran::common::Indirection<Fortran::parser::WhereConstruct> &c) ->
+        std::optional<Fortran::parser::SourcePosition> {
+                if (end) {
+                    return locationFromSource(parsing,
+                        std::get<Fortran::parser::Statement<Fortran::parser::EndWhereStmt> >(c.value().t).
+                        source, end);
+                }
+                return locationFromSource(parsing,
+                    std::get<Fortran::parser::Statement<Fortran::parser::WhereConstructStmt> >(c.value().t).
+                    source, end);
+            },
+            [&](const Fortran::common::Indirection<Fortran::parser::SelectTypeConstruct> &c) ->
+        std::optional<Fortran::parser::SourcePosition> {
+                if (end) {
+                    return locationFromSource(parsing,
+                        std::get<Fortran::parser::Statement<Fortran::parser::EndSelectStmt> >(c.value().t).
+                        source, end);
+                }
+                return locationFromSource(parsing,
+                    std::get<Fortran::parser::Statement<Fortran::parser::SelectTypeStmt> >(c.value().t).source,
+                    end);
+            },
+            [&](const Fortran::common::Indirection<Fortran::parser::SelectRankConstruct> &c) ->
+        std::optional<Fortran::parser::SourcePosition> {
+                if (end) {
+                    return locationFromSource(parsing,
+                        std::get<Fortran::parser::Statement<Fortran::parser::EndSelectStmt> >(c.value().t).
+                        source, end);
+                }
+                return locationFromSource(parsing,
+                    std::get<Fortran::parser::Statement<Fortran::parser::SelectRankStmt> >(c.value().t).
+                    source, end);
+            },
+            [&](const Fortran::common::Indirection<Fortran::parser::IfConstruct> &c) ->
+        std::optional<Fortran::parser::SourcePosition> {
+                if (end) {
+                    return locationFromSource(parsing,
+                        std::get<Fortran::parser::Statement<Fortran::parser::EndIfStmt> >(c.value().t).source,
+                        end);
+                }
+                return locationFromSource(parsing,
+                    std::get<Fortran::parser::Statement<Fortran::parser::IfThenStmt> >(c.value().t).source,
+                    end);
+            },
+            [&](const Fortran::common::Indirection<Fortran::parser::DoConstruct> &c) ->
+        std::optional<Fortran::parser::SourcePosition> {
+                if (end) {
+                    return locationFromSource(parsing,
+                        std::get<Fortran::parser::Statement<Fortran::parser::EndDoStmt> >(c.value().t).source,
+                        end);
+                }
+                return locationFromSource(parsing,
+                    std::get<Fortran::parser::Statement<Fortran::parser::NonLabelDoStmt> >(c.value().t).source,
+                    end);
+            },
+            [&](const Fortran::common::Indirection<Fortran::parser::CriticalConstruct> &c) ->
+        std::optional<Fortran::parser::SourcePosition> {
+                if (end) {
+                    return locationFromSource(parsing,
+                        std::get<Fortran::parser::Statement<Fortran::parser::EndCriticalStmt> >(c.value().t).
+                        source,
+                        end);
+                }
+                return locationFromSource(parsing,
+                    std::get<Fortran::parser::Statement<Fortran::parser::CriticalStmt> >(c.value().t).source,
+                    end);
+            },
+            [&](const Fortran::common::Indirection<Fortran::parser::ChangeTeamConstruct> &c) ->
+        std::optional<Fortran::parser::SourcePosition> {
+                if (end) {
+                    return locationFromSource(parsing,
+                        std::get<Fortran::parser::Statement<Fortran::parser::EndChangeTeamStmt> >(c.value().t).
+                        source,
+                        end);
+                }
+                return locationFromSource(parsing,
+                    std::get<Fortran::parser::Statement<Fortran::parser::ChangeTeamStmt> >(c.value().t).source,
+                    end);
+            },
+            [&](const Fortran::common::Indirection<Fortran::parser::CaseConstruct> &c) ->
+        std::optional<Fortran::parser::SourcePosition> {
+                if (end) {
+                    return locationFromSource(parsing,
+                        std::get<Fortran::parser::Statement<Fortran::parser::EndSelectStmt> >(c.value().t).
+                        source, end);
+                }
+                return locationFromSource(parsing,
+                    std::get<Fortran::parser::Statement<Fortran::parser::SelectCaseStmt> >(c.value().t).source,
+                    end);
+            },
+            [&](const Fortran::common::Indirection<Fortran::parser::BlockConstruct> &c) ->
+        std::optional<Fortran::parser::SourcePosition> {
+                if (end) {
+                    return locationFromSource(parsing,
+                        std::get<Fortran::parser::Statement<Fortran::parser::EndBlockStmt> >(c.value().t).
+                        source,
+                        end);
+                }
+                return locationFromSource(parsing,
+                    std::get<Fortran::parser::Statement<Fortran::parser::BlockStmt> >(c.value().t).source, end);
+            },
+            [&](const Fortran::common::Indirection<Fortran::parser::AssociateConstruct> &c) ->
+        std::optional<Fortran::parser::SourcePosition> {
+                if (end) {
+                    return locationFromSource(parsing,
+                        std::get<Fortran::parser::Statement<Fortran::parser::EndAssociateStmt> >(c.value().t).
+                        source, end);
+                }
+                return locationFromSource(parsing,
+                    std::get<Fortran::parser::Statement<Fortran::parser::AssociateStmt> >(c.value().t).
+                    source, end);
+            }
+        }, construct.u);
+}
+
+[[nodiscard]] std::optional<Fortran::parser::SourcePosition> salt::fortran::getLocation(
+    const Fortran::parser::Parsing *parsing,
+    const Fortran::parser::ExecutionPartConstruct &construct,
+    const bool end) {
+    /* Possibilities for ExecutionPartConstruct:
+     *   ExecutableConstruct
+     *   Statement<common::Indirection<FormatStmt>>
+     *   Statement<common::Indirection<EntryStmt>>
+     *   Statement<common::Indirection<DataStmt>>
+     *   Statement<common::Indirection<NamelistStmt>>
+     *   ErrorRecovery
+     */
+    return std::visit(
+        Fortran::common::visitors{
+            [&](const Fortran::parser::ExecutableConstruct &c) -> std::optional<
+        Fortran::parser::SourcePosition> {
+                return getLocation(parsing, c, end);
+            },
+            [&](const auto &c) -> std::optional<Fortran::parser::SourcePosition> {
+                return locationFromSource(parsing, c.source, end);
+            },
+            [&](const Fortran::parser::ErrorRecovery &) -> std::optional<Fortran::parser::SourcePosition> {
+                DIE("Should not encounter ErrorRecovery in parse tree");
+            }
+        }, construct.u);
+}
diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 13e476f..4c32c06 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -46,6 +46,7 @@ limitations under the License.
 #include "flang/Common/indirection.h"
 
 #include "selectfile.hpp"
+#include "flang_source_location.hpp"
 
 // TODO Split declarations into a separate header file.
 // TODO Put debug output behind verbose flag
@@ -65,6 +66,7 @@ limitations under the License.
 #define SALT_F77_LINE_LENGTH 64
 
 using namespace Fortran::frontend;
+using namespace salt::fortran;
 
 
 /**
@@ -182,24 +184,6 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             return ss.str();
         }
 
-        /**
-         * From a CharBlock object (generally held in the `source` field of a parse tree node,
-         * get the source position (file, line, column).
-         * If `end` is set, returns the ending position of the block.
-         * If `end` is not set (and by default), returns the starting position of the block.
-         */
-        [[nodiscard]] std::optional<Fortran::parser::SourcePosition> locationFromSource(
-            const Fortran::parser::CharBlock &charBlock, const bool end) const {
-            if (const auto &sourceRange{parsing->allCooked().GetSourcePositionRange(charBlock)}; sourceRange.
-                has_value()) {
-                if (end) {
-                    return sourceRange->second;
-                }
-                return sourceRange->first;
-            }
-            return std::nullopt;
-        }
-
         [[nodiscard]] static std::string convertWildcardToRegexForm(const std::string &wildString) {
             // Escape all regex special characters
             static const std::regex metacharacters(R"([\.\^\$\+\(\)\[\]\{\}\|\?\*])");
@@ -332,333 +316,6 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             subProgramLine_ = 0;
         }
 
-        // TODO split location-getting routines into a separate file
-
-        [[nodiscard]] std::optional<Fortran::parser::SourcePosition> getLocation(
-            const Fortran::parser::OpenMPDeclarativeConstruct &construct,
-            const bool end) {
-            // This function is based on the equivalent function in
-            // flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
-            return std::visit(
-                [&](const auto &o) -> std::optional<Fortran::parser::SourcePosition> {
-                    return locationFromSource(o.source, end);
-                },
-                construct.u);
-        }
-
-        [[nodiscard]] std::optional<Fortran::parser::SourcePosition> getLocation(
-            const Fortran::parser::OpenMPConstruct &construct,
-            const bool end) {
-            // This function is based on the equivalent function in
-            // flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
-            return std::visit(
-                Fortran::common::visitors{
-                    [&](const Fortran::parser::OpenMPStandaloneConstruct &c) -> std::optional<
-                Fortran::parser::SourcePosition> {
-                        return locationFromSource(c.source, end);
-                    },
-                    // OpenMPSectionsConstruct, OpenMPLoopConstruct,
-                    // OpenMPBlockConstruct, OpenMPCriticalConstruct Get the source from
-                    // the directive field.
-                    [&](const auto &c) -> std::optional<Fortran::parser::SourcePosition> {
-                        const Fortran::parser::CharBlock &source{std::get<0>(c.t).source};
-                        return locationFromSource(source, end);
-                    },
-                    [&](const Fortran::parser::OpenMPAtomicConstruct &c) -> std::optional<
-                Fortran::parser::SourcePosition> {
-                        return std::visit(
-                            [&](const auto &o) -> std::optional<Fortran::parser::SourcePosition> {
-                                const Fortran::parser::CharBlock &source{
-                                    std::get<Fortran::parser::Verbatim>(o.t).source
-                                };
-                                return locationFromSource(source, end);
-                            },
-                            c.u);
-                    },
-                    [&](const Fortran::parser::OpenMPSectionConstruct &c) -> std::optional<
-                Fortran::parser::SourcePosition> {
-                        const Fortran::parser::CharBlock &source{c.source};
-                        return locationFromSource(source, end);
-                    },
-                },
-                construct.u);
-        }
-
-        [[nodiscard]] std::optional<Fortran::parser::SourcePosition>
-        getLocation(const Fortran::parser::OpenACCConstruct &construct, const bool end) {
-            // This function is based on the equivalent function in
-            // flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
-            return std::visit(
-                Fortran::common::visitors{
-                    [&](const auto &c) -> std::optional<Fortran::parser::SourcePosition> {
-                        return locationFromSource(c.source, end);
-                    },
-                    [&](const Fortran::parser::OpenACCBlockConstruct &c) -> std::optional<
-                Fortran::parser::SourcePosition> {
-                        if (end) {
-                            return locationFromSource(std::get<Fortran::parser::AccEndBlockDirective>(c.t).source,
-                                                      end);
-                        }
-                        return locationFromSource(std::get<Fortran::parser::AccBeginBlockDirective>(c.t).source, end);
-                    },
-                    [&](const Fortran::parser::OpenACCLoopConstruct &c) -> std::optional<
-                Fortran::parser::SourcePosition> {
-                        if (end) {
-                            if (const auto &maybeDo = std::get<std::optional<Fortran::parser::DoConstruct> >(c.t);
-                                maybeDo.has_value()) {
-                                return locationFromSource(
-                                    std::get<Fortran::parser::Statement<Fortran::parser::EndDoStmt> >(maybeDo.value().t)
-                                    .
-                                    source, end);
-                            }
-                        }
-                        return locationFromSource(std::get<Fortran::parser::AccBeginLoopDirective>(c.t).source, end);
-                    },
-                }, construct.u);
-        }
-
-        [[nodiscard]] std::optional<Fortran::parser::SourcePosition> getLocation(
-            const Fortran::parser::ExecutableConstruct &construct,
-            const bool end) {
-            /* Possibilities for ExecutableConstruct:
-                 Statement<ActionStmt>
-                 common::Indirection<AssociateConstruct>
-                 common::Indirection<BlockConstruct>
-                 common::Indirection<CaseConstruct>
-                 common::Indirection<ChangeTeamConstruct>
-                 common::Indirection<CriticalConstruct>
-                 Statement<common::Indirection<LabelDoStmt>>
-                 Statement<common::Indirection<EndDoStmt>>
-                 common::Indirection<DoConstruct>
-                 common::Indirection<IfConstruct>
-                 common::Indirection<SelectRankConstruct>
-                 common::Indirection<SelectTypeConstruct>
-                 common::Indirection<WhereConstruct>
-                 common::Indirection<ForallConstruct>
-                 common::Indirection<CompilerDirective>
-                 common::Indirection<OpenACCConstruct>
-                 common::Indirection<AccEndCombinedDirective>
-                 common::Indirection<OpenMPConstruct>
-                 common::Indirection<OmpEndLoopDirective>
-                 common::Indirection<CUFKernelDoConstruct>
-            */
-            return std::visit(
-                Fortran::common::visitors{
-                    [&](const auto &c) -> std::optional<Fortran::parser::SourcePosition> {
-                        return locationFromSource(c.source, end);
-                    },
-                    [&](const Fortran::common::Indirection<Fortran::parser::CUFKernelDoConstruct> &c) ->
-                std::optional<Fortran::parser::SourcePosition> {
-                        if (end) {
-                            const auto &optionalConstruct = std::get<std::optional<Fortran::parser::DoConstruct> >(
-                                c.value().t);
-                            if (optionalConstruct.has_value()) {
-                                return locationFromSource(
-                                    std::get<Fortran::parser::Statement<Fortran::parser::EndDoStmt> >(
-                                        optionalConstruct.value().t).source, end);
-                            }
-                        }
-                        return locationFromSource(
-                            std::get<Fortran::parser::CUFKernelDoConstruct::Directive>(c.value().t).source, end);
-                    },
-                    [&](const Fortran::common::Indirection<Fortran::parser::OmpEndLoopDirective> &c) ->
-                std::optional<Fortran::parser::SourcePosition> {
-                        return locationFromSource(c.value().source, end);
-                    },
-                    [&](const Fortran::common::Indirection<Fortran::parser::OpenMPConstruct> &c) ->
-                std::optional<Fortran::parser::SourcePosition> {
-                        return getLocation(c.value(), end);
-                    },
-                    [&](const Fortran::common::Indirection<Fortran::parser::AccEndCombinedDirective> &c) ->
-                std::optional<Fortran::parser::SourcePosition> {
-                        return locationFromSource(c.value().source, end);
-                    },
-                    [&](const Fortran::common::Indirection<Fortran::parser::OpenACCConstruct> &c) ->
-                std::optional<Fortran::parser::SourcePosition> {
-                        return getLocation(c.value(), end);
-                    },
-                    [&](const Fortran::common::Indirection<Fortran::parser::CompilerDirective> &c)->
-                std::optional<Fortran::parser::SourcePosition> {
-                        return locationFromSource(c.value().source, end);
-                    },
-                    [&](const Fortran::common::Indirection<Fortran::parser::ForallConstruct> &c) ->
-                std::optional<Fortran::parser::SourcePosition> {
-                        if (end) {
-                            return locationFromSource(
-                                std::get<Fortran::parser::Statement<Fortran::parser::EndForallStmt> >(c.value().t).
-                                source, end);
-                        }
-                        return locationFromSource(
-                            std::get<Fortran::parser::Statement<Fortran::parser::ForallConstructStmt> >(c.value().t).
-                            source, end);
-                    },
-                    [&](const Fortran::common::Indirection<Fortran::parser::WhereConstruct> &c) ->
-                std::optional<Fortran::parser::SourcePosition> {
-                        if (end) {
-                            return locationFromSource(
-                                std::get<Fortran::parser::Statement<Fortran::parser::EndWhereStmt> >(c.value().t).
-                                source, end);
-                        }
-                        return locationFromSource(
-                            std::get<Fortran::parser::Statement<Fortran::parser::WhereConstructStmt> >(c.value().t).
-                            source, end);
-                    },
-                    [&](const Fortran::common::Indirection<Fortran::parser::SelectTypeConstruct> &c) ->
-                std::optional<Fortran::parser::SourcePosition> {
-                        if (end) {
-                            return locationFromSource(
-                                std::get<Fortran::parser::Statement<Fortran::parser::EndSelectStmt> >(c.value().t).
-                                source, end);
-                        }
-                        return locationFromSource(
-                            std::get<Fortran::parser::Statement<Fortran::parser::SelectTypeStmt> >(c.value().t).source,
-                            end);
-                    },
-                    [&](const Fortran::common::Indirection<Fortran::parser::SelectRankConstruct> &c) ->
-                std::optional<Fortran::parser::SourcePosition> {
-                        if (end) {
-                            return locationFromSource(
-                                std::get<Fortran::parser::Statement<Fortran::parser::EndSelectStmt> >(c.value().t).
-                                source, end);
-                        }
-                        return locationFromSource(
-                            std::get<Fortran::parser::Statement<Fortran::parser::SelectRankStmt> >(c.value().t).
-                            source, end);
-                    },
-                    [&](const Fortran::common::Indirection<Fortran::parser::IfConstruct> &c) ->
-                std::optional<Fortran::parser::SourcePosition> {
-                        if (end) {
-                            return locationFromSource(
-                                std::get<Fortran::parser::Statement<Fortran::parser::EndIfStmt> >(c.value().t).source,
-                                end);
-                        }
-                        return locationFromSource(
-                            std::get<Fortran::parser::Statement<Fortran::parser::IfThenStmt> >(c.value().t).source,
-                            end);
-                    },
-                    [&](const Fortran::common::Indirection<Fortran::parser::DoConstruct> &c) ->
-                std::optional<Fortran::parser::SourcePosition> {
-                        if (end) {
-                            return locationFromSource(
-                                std::get<Fortran::parser::Statement<Fortran::parser::EndDoStmt> >(c.value().t).source,
-                                end);
-                        }
-                        return locationFromSource(
-                            std::get<Fortran::parser::Statement<Fortran::parser::NonLabelDoStmt> >(c.value().t).source,
-                            end);
-                    },
-                    [&](const Fortran::common::Indirection<Fortran::parser::CriticalConstruct> &c) ->
-                std::optional<Fortran::parser::SourcePosition> {
-                        if (end) {
-                            return locationFromSource(
-                                std::get<Fortran::parser::Statement<Fortran::parser::EndCriticalStmt> >(c.value().t).
-                                source,
-                                end);
-                        }
-                        return locationFromSource(
-                            std::get<Fortran::parser::Statement<Fortran::parser::CriticalStmt> >(c.value().t).source,
-                            end);
-                    },
-                    [&](const Fortran::common::Indirection<Fortran::parser::ChangeTeamConstruct> &c) ->
-                std::optional<Fortran::parser::SourcePosition> {
-                        if (end) {
-                            return locationFromSource(
-                                std::get<Fortran::parser::Statement<Fortran::parser::EndChangeTeamStmt> >(c.value().t).
-                                source,
-                                end);
-                        }
-                        return locationFromSource(
-                            std::get<Fortran::parser::Statement<Fortran::parser::ChangeTeamStmt> >(c.value().t).source,
-                            end);
-                    },
-                    [&](const Fortran::common::Indirection<Fortran::parser::CaseConstruct> &c) ->
-                std::optional<Fortran::parser::SourcePosition> {
-                        if (end) {
-                            return locationFromSource(
-                                std::get<Fortran::parser::Statement<Fortran::parser::EndSelectStmt> >(c.value().t).
-                                source, end);
-                        }
-                        return locationFromSource(
-                            std::get<Fortran::parser::Statement<Fortran::parser::SelectCaseStmt> >(c.value().t).source,
-                            end);
-                    },
-                    [&](const Fortran::common::Indirection<Fortran::parser::BlockConstruct> &c) ->
-                std::optional<Fortran::parser::SourcePosition> {
-                        if (end) {
-                            return locationFromSource(
-                                std::get<Fortran::parser::Statement<Fortran::parser::EndBlockStmt> >(c.value().t).
-                                source,
-                                end);
-                        }
-                        return locationFromSource(
-                            std::get<Fortran::parser::Statement<Fortran::parser::BlockStmt> >(c.value().t).source, end);
-                    },
-                    [&](const Fortran::common::Indirection<Fortran::parser::AssociateConstruct> &c) ->
-                std::optional<Fortran::parser::SourcePosition> {
-                        if (end) {
-                            return locationFromSource(
-                                std::get<Fortran::parser::Statement<Fortran::parser::EndAssociateStmt> >(c.value().t).
-                                source, end);
-                        }
-                        return locationFromSource(
-                            std::get<Fortran::parser::Statement<Fortran::parser::AssociateStmt> >(c.value().t).
-                            source, end);
-                    }
-                }, construct.u);
-        }
-
-        [[nodiscard]] std::optional<Fortran::parser::SourcePosition> getLocation(
-            const Fortran::parser::ExecutionPartConstruct &construct,
-            const bool end) {
-            /* Possibilities for ExecutionPartConstruct:
-             *   ExecutableConstruct
-             *   Statement<common::Indirection<FormatStmt>>
-             *   Statement<common::Indirection<EntryStmt>>
-             *   Statement<common::Indirection<DataStmt>>
-             *   Statement<common::Indirection<NamelistStmt>>
-             *   ErrorRecovery
-             */
-            return std::visit(
-                Fortran::common::visitors{
-                    [&](const Fortran::parser::ExecutableConstruct &c) -> std::optional<
-                Fortran::parser::SourcePosition> {
-                        return getLocation(c, end);
-                    },
-                    [&](const auto &c) -> std::optional<Fortran::parser::SourcePosition> {
-                        return locationFromSource(c.source, end);
-                    },
-                    [&](const Fortran::parser::ErrorRecovery &) -> std::optional<Fortran::parser::SourcePosition> {
-                        DIE("Should not encounter ErrorRecovery in parse tree");
-                    }
-                }, construct.u);
-        }
-
-        bool Pre(const Fortran::parser::IfStmt &ifStmt) {
-            if (const auto &ifAction{
-                    std::get<Fortran::parser::UnlabeledStatement<Fortran::parser::ActionStmt> >(ifStmt.t)
-                };
-                std::holds_alternative<Fortran::common::Indirection<
-                    Fortran::parser::ReturnStmt> >(ifAction.statement.u)) {
-                const auto startPos{
-                    locationFromSource(
-                        std::get<Fortran::parser::ScalarLogicalExpr>(ifStmt.t).thing.thing.value().source,
-                        false).value()
-                };
-                const auto endPos{
-                    locationFromSource(
-                        std::get<Fortran::parser::ScalarLogicalExpr>(ifStmt.t).thing.thing.value().source,
-                        true).value()
-                };
-                llvm::outs() << "If-return, conditional: (" << startPos.line << "," << startPos.column << ") - "
-                        << "(" << endPos.line << "," << endPos.column << ")\n";
-                // TODO this assumes that the conditional fits on one list
-                // make more robust, test with more cases
-                addInstrumentationPoint(SaltInstrumentationPointType::IF_RETURN, startPos.line, std::nullopt,
-                                        endPos.column);
-            }
-            return true;
-        }
-
         // Split handling of ExecutionPart into two phases
         // so that we insert Instrumentation Points in order
         // even if we separately insert them in visitors for
@@ -676,8 +333,8 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             if (const Fortran::parser::Block &block = executionPart.v; block.empty()) {
                 llvm::outs() << "WARNING: Execution part empty.\n";
             } else {
-                const std::optional startLocOpt{getLocation(block.front(), false)};
-                const std::optional endLocOpt{getLocation(block.back(), true)};
+                const std::optional startLocOpt{getLocation(parsing, block.front(), false)};
+                const std::optional endLocOpt{getLocation(parsing, block.back(), true)};
 
                 if (!startLocOpt.has_value()) {
                     llvm::errs() << "ERROR: execution part had no start source location!\n";
@@ -742,7 +399,7 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
                 &execConstruct.u)) {
                 if (std::holds_alternative<Fortran::common::Indirection<Fortran::parser::ReturnStmt> >(
                     actionStmt->statement.u)) {
-                    const std::optional returnPos{locationFromSource(actionStmt->source, false)};
+                    const std::optional returnPos{locationFromSource(parsing, actionStmt->source, false)};
                     const int returnLine{returnPos.value().line};
                     llvm::outs() << "Return statement at " << returnLine << "\n";
                     addInstrumentationPoint(SaltInstrumentationPointType::RETURN_STMT, returnLine);

From 66c4eaa8445a7b58f1a4ba48adb5da8b96013ff1 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Tue, 14 Jan 2025 15:48:21 -0800
Subject: [PATCH 114/135] Fix if-return instrumentation

---
 src/salt_instrument_flang_plugin.cpp | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 4c32c06..1131edc 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -408,6 +408,32 @@ class SaltInstrumentAction final : public PluginParseTreeAction {
             return true;
         }
 
+        bool Pre(const Fortran::parser::IfStmt &ifStmt) {
+            if (const auto &ifAction{
+                    std::get<Fortran::parser::UnlabeledStatement<Fortran::parser::ActionStmt> >(ifStmt.t)
+                };
+                std::holds_alternative<Fortran::common::Indirection<
+                    Fortran::parser::ReturnStmt> >(ifAction.statement.u)) {
+                const auto startPos{
+                    locationFromSource(parsing,
+                        std::get<Fortran::parser::ScalarLogicalExpr>(ifStmt.t).thing.thing.value().source,
+                        false).value()
+                };
+                const auto endPos{
+                    locationFromSource(parsing,
+                        std::get<Fortran::parser::ScalarLogicalExpr>(ifStmt.t).thing.thing.value().source,
+                        true).value()
+                };
+                llvm::outs() << "If-return, conditional: (" << startPos.line << "," << startPos.column << ") - "
+                        << "(" << endPos.line << "," << endPos.column << ")\n";
+                // TODO this assumes that the conditional fits on one list
+                // make more robust, test with more cases
+                addInstrumentationPoint(SaltInstrumentationPointType::IF_RETURN, startPos.line, std::nullopt,
+                                        endPos.column);
+            }
+            return true;
+        }
+
     private:
         // Keeps track of current state of traversal
         bool isInMainProgram_{false};

From de02a21975688b955150a295707fd2e992f7ff4f Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Wed, 15 Jan 2025 19:56:25 -0800
Subject: [PATCH 115/135] First stage of instrumentation point rewrite

New classes added, but not yet hooked up to the instrumentor.
---
 CMakeLists.txt                              |   9 +-
 include/flang_instrumentation_constants.hpp |  39 +++++
 include/flang_instrumentation_point.hpp     | 167 ++++++++++++++++++++
 src/flang_instrumentation_point.cpp         | 116 ++++++++++++++
 src/salt_instrument_flang_plugin.cpp        |  15 +-
 5 files changed, 330 insertions(+), 16 deletions(-)
 create mode 100644 include/flang_instrumentation_constants.hpp
 create mode 100644 include/flang_instrumentation_point.hpp
 create mode 100644 src/flang_instrumentation_point.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3bf91d4..d21f09c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -202,8 +202,7 @@ set(SALT_HEADER_FILES
   dprint.hpp
   ryml_all.hpp
   selectfile.hpp
-  tau_datatypes.h
-  tooling.hpp
+  instrumentor.hpp
 )
 
 list(TRANSFORM SALT_HEADER_FILES PREPEND "${CMAKE_SOURCE_DIR}/include/")
@@ -230,6 +229,7 @@ list(TRANSFORM CPARSE_LLVM_SRCS PREPEND "${CMAKE_SOURCE_DIR}/src/")
 # If we refactor into a library that executables link against it will simplify this and reduce repitition
 add_executable(cparse-llvm)
 target_sources(cparse-llvm PUBLIC ${CPARSE_LLVM_SRCS})
+target_sources(cparse-llvm PUBLIC FILE_SET headers TYPE HEADERS FILES ${SALT_HEADER_FILES})
 target_include_directories(cparse-llvm PUBLIC "${CMAKE_SOURCE_DIR}/include" "${CMAKE_BINARY_DIR}/include")
 target_compile_features(cparse-llvm PUBLIC cxx_std_17)
 target_link_libraries(cparse-llvm PUBLIC SALT_LLVM_TOOLING) # Inherit definitions, compile features, etc.
@@ -291,20 +291,23 @@ if(MLIR_FOUND AND Flang_FOUND)
 
     set(SALT_FLANG_PLUGIN_HEADER_FILES
     selectfile.hpp
-    tau_datatypes.h
     flang_source_location.hpp
+    flang_instrumentation_constants.hpp
+    flang_instrumentation_point.hpp
     )
     list(TRANSFORM SALT_FLANG_PLUGIN_HEADER_FILES PREPEND "${CMAKE_SOURCE_DIR}/include/")
 
     set(SALT_FLANG_PLUGIN_SRCS
     selectfile.cpp
     flang_source_location.cpp
+    flang_instrumentation_point.cpp
     salt_instrument_flang_plugin.cpp
     )
     list(TRANSFORM SALT_FLANG_PLUGIN_SRCS PREPEND "${CMAKE_SOURCE_DIR}/src/")
 
     add_library(salt-flang-plugin SHARED)
     target_sources(salt-flang-plugin PUBLIC ${SALT_FLANG_PLUGIN_SRCS})
+    target_sources(salt-flang-plugin PUBLIC FILE_SET headers TYPE HEADERS FILES ${SALT_FLANG_PLUGIN_HEADER_FILES})
     target_include_directories(salt-flang-plugin PUBLIC "${CMAKE_SOURCE_DIR}/include" "${CMAKE_BINARY_DIR}/include" )
     target_compile_features(salt-flang-plugin PUBLIC cxx_std_17)
     target_link_libraries(salt-flang-plugin PUBLIC SALT_FLANG_FRONTEND)
diff --git a/include/flang_instrumentation_constants.hpp b/include/flang_instrumentation_constants.hpp
new file mode 100644
index 0000000..67b01ce
--- /dev/null
+++ b/include/flang_instrumentation_constants.hpp
@@ -0,0 +1,39 @@
+/* Copyright (C) 2025, ParaTools, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef FLANG_INSTRUMENTATION_CONSTANTS_HPP
+#define FLANG_INSTRUMENTATION_CONSTANTS_HPP
+
+// Configuration file environment variable
+#define SALT_FORTRAN_CONFIG_FILE_VAR "SALT_FORTRAN_CONFIG_FILE"
+#define SALT_FORTRAN_CONFIG_DEFAULT_PATH "config_files/tau_config.yaml"
+
+// Selective instrumentation environment variable
+#define SALT_FORTRAN_SELECT_FILE_VAR "SALT_FORTRAN_SELECT_FILE"
+
+// Configuration file YAML keys
+#define SALT_FORTRAN_KEY "Fortran"
+#define SALT_FORTRAN_PROGRAM_BEGIN_KEY "program_insert"
+#define SALT_FORTRAN_PROCEDURE_BEGIN_KEY "procedure_begin_insert"
+#define SALT_FORTRAN_PROCEDURE_END_KEY "procedure_end_insert"
+
+// Configuration file template replacement strings
+#define SALT_FORTRAN_TIMER_NAME_TEMPLATE R"(\$\{full_timer_name\})"
+
+// Fortran line splitting
+#define SALT_FORTRAN_STRING_SPLITTER "&\n     &"
+#define SALT_F77_LINE_LENGTH 64
+
+#endif //FLANG_INSTRUMENTATION_CONSTANTS_HPP
diff --git a/include/flang_instrumentation_point.hpp b/include/flang_instrumentation_point.hpp
new file mode 100644
index 0000000..d432644
--- /dev/null
+++ b/include/flang_instrumentation_point.hpp
@@ -0,0 +1,167 @@
+/* Copyright (C) 2025, ParaTools, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef FLANG_INSTRUMENTATION_POINT_HPP
+#define FLANG_INSTRUMENTATION_POINT_HPP
+
+#include <string>
+#include <map>
+#include <utility>
+
+namespace salt::fortran {
+    enum class InstrumentationPointType {
+        PROGRAM_BEGIN, // Declare profiler, initialize TAU, set node, start timer
+        PROCEDURE_BEGIN, // Declare profiler, start timer
+        PROCEDURE_END, // Stop timer on the line after
+        RETURN_STMT, // Stop timer on the line before
+        IF_RETURN // Transform if to if-then-endif, stop timer before return
+    };
+
+    enum class InstrumentationLocation {
+        BEFORE,
+        AFTER,
+        REPLACE
+    };
+
+    using InstrumentationMap = std::map<InstrumentationPointType, const std::string>;
+
+    class InstrumentationPoint {
+    public:
+        InstrumentationPoint(const InstrumentationPointType type, const int line,
+                             const InstrumentationLocation location) : instrumentationType_(type), line_(line),
+                                                                       location_(location) {
+        }
+
+        virtual ~InstrumentationPoint() = default;
+
+        [[nodiscard]] InstrumentationPointType instrumentationType() const {
+            return instrumentationType_;
+        }
+
+        [[nodiscard]] int line() const {
+            return line_;
+        }
+
+        [[nodiscard]] InstrumentationLocation location() const {
+            return location_;
+        }
+
+        [[nodiscard]] bool instrumentBefore() const {
+            return location() == InstrumentationLocation::BEFORE;
+        }
+
+        bool operator<(const InstrumentationPoint &other) const {
+            if (line() == other.line()) {
+                if (instrumentBefore() && !other.instrumentBefore()) {
+                    return true;
+                }
+                return false;
+            }
+            return line() < other.line();
+        }
+
+        [[nodiscard]] std::string typeString() const;
+
+        [[nodiscard]] std::string locationString() const;
+
+        [[nodiscard]] virtual std::string toString() const;
+
+        [[nodiscard]] virtual std::string instrumentationString(const InstrumentationMap &instMap,
+                                                                const std::string &lineText) const;
+
+    private:
+        const InstrumentationPointType instrumentationType_;
+        const int line_;
+        const InstrumentationLocation location_;
+    };
+
+    class ProgramBeginInstrumentationPoint final : public InstrumentationPoint {
+    public:
+        ProgramBeginInstrumentationPoint(const int line, std::string timerName) : InstrumentationPoint(
+                InstrumentationPointType::PROGRAM_BEGIN, line, InstrumentationLocation::BEFORE),
+            timerName_(std::move(timerName)) {
+        }
+
+        [[nodiscard]] std::string timerName() const {
+            return timerName_;
+        }
+
+        [[nodiscard]] std::string toString() const override;
+
+        [[nodiscard]] std::string instrumentationString(const InstrumentationMap &instMap,
+                                                        const std::string &lineText) const override;
+
+    private:
+        const std::string timerName_;
+    };
+
+    class ProcedureBeginInstrumentationPoint final : public InstrumentationPoint {
+    public:
+        ProcedureBeginInstrumentationPoint(const int line, std::string timerName) : InstrumentationPoint(
+                InstrumentationPointType::PROCEDURE_BEGIN,
+                line,
+                InstrumentationLocation::BEFORE),
+            timerName_(std::move(timerName)) {
+        }
+
+        [[nodiscard]] std::string timerName() const {
+            return timerName_;
+        }
+
+        [[nodiscard]] std::string toString() const override;
+
+        [[nodiscard]] std::string instrumentationString(const InstrumentationMap &instMap,
+                                                        const std::string &lineText) const override;
+
+    private:
+        const std::string timerName_;
+    };
+
+    class ProcedureEndInstrumentationPoint final : public InstrumentationPoint {
+    public:
+        explicit ProcedureEndInstrumentationPoint(const int line) : InstrumentationPoint(
+            InstrumentationPointType::PROCEDURE_END, line, InstrumentationLocation::AFTER) {
+        }
+    };
+
+    class ReturnStmtInstrumentationPoint final : public InstrumentationPoint {
+    public:
+        explicit ReturnStmtInstrumentationPoint(const int line) : InstrumentationPoint(
+            InstrumentationPointType::RETURN_STMT, line, InstrumentationLocation::BEFORE) {
+        }
+    };
+
+    class IfReturnStmtInstrumentationPoint final : public InstrumentationPoint {
+    public:
+        explicit IfReturnStmtInstrumentationPoint(const int line, const int conditionalColumn) : InstrumentationPoint(
+                InstrumentationPointType::IF_RETURN, line, InstrumentationLocation::REPLACE),
+            conditionalColumn_(conditionalColumn) {
+        }
+
+        [[nodiscard]] int conditionalColumn() const {
+            return conditionalColumn_;
+        }
+
+        [[nodiscard]] std::string toString() const override;
+
+        [[nodiscard]] std::string instrumentationString(const InstrumentationMap &instMap,
+                                                        const std::string &lineText) const override;
+
+    private:
+        const int conditionalColumn_;
+    };
+}
+
+#endif //FLANG_INSTRUMENTATION_POINT_HPP
diff --git a/src/flang_instrumentation_point.cpp b/src/flang_instrumentation_point.cpp
new file mode 100644
index 0000000..64d6e49
--- /dev/null
+++ b/src/flang_instrumentation_point.cpp
@@ -0,0 +1,116 @@
+/* Copyright (C) 2025, ParaTools, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+
+#include <string>
+#include <sstream>
+#include <regex>
+
+#include "flang/Common/idioms.h"
+
+#include "flang_instrumentation_point.hpp"
+#include "flang_instrumentation_constants.hpp"
+
+
+using namespace std::string_literals;
+
+std::string salt::fortran::InstrumentationPoint::typeString() const {
+    switch (instrumentationType()) {
+        case InstrumentationPointType::PROGRAM_BEGIN:
+            return "PROGRAM_BEGIN"s;
+        case InstrumentationPointType::PROCEDURE_BEGIN:
+            return "PROCEDURE_BEGIN"s;
+        case InstrumentationPointType::PROCEDURE_END:
+            return "PROCEDURE_END"s;
+        case InstrumentationPointType::RETURN_STMT:
+            return "RETURN_STMT"s;
+        case InstrumentationPointType::IF_RETURN:
+            return "IF_RETURN"s;
+        default:
+            CRASH_NO_CASE;
+    }
+}
+
+std::string salt::fortran::InstrumentationPoint::locationString() const {
+    switch (location()) {
+        case InstrumentationLocation::BEFORE:
+            return "BEFORE"s;
+        case InstrumentationLocation::AFTER:
+            return "AFTER"s;
+        case InstrumentationLocation::REPLACE:
+            return "REPLACE"s;
+        default:
+            CRASH_NO_CASE;
+    }
+}
+
+std::string salt::fortran::InstrumentationPoint::toString() const {
+    std::stringstream ss;
+    ss << line() << "\t";
+    ss << locationString() << "\t";
+    ss << typeString() << "\t";
+    return ss.str();
+}
+
+std::string salt::fortran::InstrumentationPoint::instrumentationString(const InstrumentationMap &instMap,
+                                                                       [[maybe_unused]] const std::string &lineText)
+const {
+    return instMap.at(instrumentationType());
+}
+
+std::string salt::fortran::ProgramBeginInstrumentationPoint::toString() const {
+    std::stringstream ss;
+    ss << InstrumentationPoint::toString();
+    ss << "\"" << timerName() << "\"\t";
+    return ss.str();
+}
+
+std::string salt::fortran::ProgramBeginInstrumentationPoint::instrumentationString(
+    const InstrumentationMap &instMap, [[maybe_unused]] const std::string &lineText) const {
+    static std::regex timerNameRegex{SALT_FORTRAN_TIMER_NAME_TEMPLATE};
+    const std::string instTemplate{InstrumentationPoint::instrumentationString(instMap, lineText)};
+    return std::regex_replace(instTemplate, timerNameRegex, timerName());
+}
+
+std::string salt::fortran::ProcedureBeginInstrumentationPoint::toString() const {
+    std::stringstream ss;
+    ss << InstrumentationPoint::toString();
+    ss << timerName() << "\t";
+    return ss.str();
+}
+
+std::string salt::fortran::ProcedureBeginInstrumentationPoint::instrumentationString(
+    const InstrumentationMap &instMap, [[maybe_unused]] const std::string &lineText) const {
+    static std::regex timerNameRegex{SALT_FORTRAN_TIMER_NAME_TEMPLATE};
+    const std::string instTemplate{InstrumentationPoint::instrumentationString(instMap, lineText)};
+    return std::regex_replace(instTemplate, timerNameRegex, timerName());
+}
+
+std::string salt::fortran::IfReturnStmtInstrumentationPoint::toString() const {
+    std::stringstream ss;
+    ss << InstrumentationPoint::toString();
+    ss << conditionalColumn() << "\t";
+    return ss.str();
+}
+
+std::string salt::fortran::IfReturnStmtInstrumentationPoint::instrumentationString(
+    const InstrumentationMap &instMap, const std::string &lineText) const {
+    std::stringstream ss;
+    ss << lineText.substr(0, conditionalColumn()) << " then\n";
+    ss << InstrumentationPoint::instrumentationString(instMap, lineText) << "\n";
+    ss << "      return\n";
+    ss << "      endif\n";
+    return ss.str();
+}
diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
index 1131edc..0de44af 100644
--- a/src/salt_instrument_flang_plugin.cpp
+++ b/src/salt_instrument_flang_plugin.cpp
@@ -45,25 +45,14 @@ limitations under the License.
 #include "flang/Parser/source.h"
 #include "flang/Common/indirection.h"
 
+#include "flang_instrumentation_constants.hpp"
 #include "selectfile.hpp"
 #include "flang_source_location.hpp"
+#include "flang_instrumentation_point.hpp"
 
 // TODO Split declarations into a separate header file.
 // TODO Put debug output behind verbose flag
 
-#define SALT_FORTRAN_CONFIG_FILE_VAR "SALT_FORTRAN_CONFIG_FILE"
-#define SALT_FORTRAN_CONFIG_DEFAULT_PATH "config_files/tau_config.yaml"
-
-#define SALT_FORTRAN_SELECT_FILE_VAR "SALT_FORTRAN_SELECT_FILE"
-
-#define SALT_FORTRAN_KEY "Fortran"
-#define SALT_FORTRAN_PROGRAM_BEGIN_KEY "program_insert"
-#define SALT_FORTRAN_PROCEDURE_BEGIN_KEY "procedure_begin_insert"
-#define SALT_FORTRAN_PROCEDURE_END_KEY "procedure_end_insert"
-
-#define SALT_FORTRAN_TIMER_NAME_TEMPLATE R"(\$\{full_timer_name\})"
-#define SALT_FORTRAN_STRING_SPLITTER "&\n     &"
-#define SALT_F77_LINE_LENGTH 64
 
 using namespace Fortran::frontend;
 using namespace salt::fortran;

From 6630ef91739a29ae0efb97e105edad6776da61a1 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Thu, 16 Jan 2025 14:41:35 -0800
Subject: [PATCH 116/135] Remove special cases from instrumentation code

Instead of having a special case for If-Return constructs, there are now
three locations: BEFORE, REPLACE, and AFTER. Instrumentation points for
If-Return now uses REPLACE to replace a line of code.
---
 CMakeLists.txt                       |   2 +-
 src/flang_instrumentation_point.cpp  |   3 +-
 src/flang_salt_instrument_plugin.cpp | 686 ++++++++++++++++++++++++
 src/salt_instrument_flang_plugin.cpp | 751 ---------------------------
 4 files changed, 689 insertions(+), 753 deletions(-)
 create mode 100644 src/flang_salt_instrument_plugin.cpp
 delete mode 100644 src/salt_instrument_flang_plugin.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d21f09c..fa17b74 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -301,7 +301,7 @@ if(MLIR_FOUND AND Flang_FOUND)
     selectfile.cpp
     flang_source_location.cpp
     flang_instrumentation_point.cpp
-    salt_instrument_flang_plugin.cpp
+    flang_salt_instrument_plugin.cpp
     )
     list(TRANSFORM SALT_FLANG_PLUGIN_SRCS PREPEND "${CMAKE_SOURCE_DIR}/src/")
 
diff --git a/src/flang_instrumentation_point.cpp b/src/flang_instrumentation_point.cpp
index 64d6e49..f88e8db 100644
--- a/src/flang_instrumentation_point.cpp
+++ b/src/flang_instrumentation_point.cpp
@@ -20,6 +20,7 @@ limitations under the License.
 
 #include "flang/Common/idioms.h"
 
+#include "dprint.hpp"
 #include "flang_instrumentation_point.hpp"
 #include "flang_instrumentation_constants.hpp"
 
@@ -87,7 +88,7 @@ std::string salt::fortran::ProgramBeginInstrumentationPoint::instrumentationStri
 std::string salt::fortran::ProcedureBeginInstrumentationPoint::toString() const {
     std::stringstream ss;
     ss << InstrumentationPoint::toString();
-    ss << timerName() << "\t";
+    ss << "\"" << timerName() << "\"\t";
     return ss.str();
 }
 
diff --git a/src/flang_salt_instrument_plugin.cpp b/src/flang_salt_instrument_plugin.cpp
new file mode 100644
index 0000000..50e9957
--- /dev/null
+++ b/src/flang_salt_instrument_plugin.cpp
@@ -0,0 +1,686 @@
+/*
+Copyright (C) 2024-2025, ParaTools, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+/* SALT-FM Flang Fortran Instrumentor Plugin */
+
+// See https://flang.llvm.org/docs/FlangDriver.html#frontend-driver-plugins
+// for documentation of the Flang frontend plugin interface
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <variant>
+#include <optional>
+#include <tuple>
+#include <regex>
+#include <algorithm>
+#include <filesystem>
+
+
+#define RYML_SINGLE_HDR_DEFINE_NOW
+#define RYML_SHARED
+
+#include <ryml_all.hpp>
+
+#include <clang/Basic/SourceLocation.h>
+
+#include "flang/Frontend/FrontendActions.h"
+#include "flang/Frontend/FrontendPluginRegistry.h"
+#include "flang/Parser/dump-parse-tree.h"
+#include "flang/Parser/parsing.h"
+#include "flang/Parser/source.h"
+#include "flang/Common/indirection.h"
+
+#include "flang_instrumentation_constants.hpp"
+#include "selectfile.hpp"
+#include "flang_source_location.hpp"
+#include "flang_instrumentation_point.hpp"
+
+// TODO Put debug output behind verbose flag
+
+using namespace std::string_literals;
+using namespace Fortran::frontend;
+
+/**
+ * The main action of the Salt instrumentor.
+ * Visits each node in the parse tree.
+ */
+namespace salt::fortran {
+
+    class SaltInstrumentAction final : public PluginParseTreeAction {
+        struct SaltInstrumentParseTreeVisitor {
+            explicit SaltInstrumentParseTreeVisitor(Fortran::parser::Parsing *parsing,
+                                                    const bool skipInstrument = false)
+                : mainProgramLine_(0), subProgramLine_(0), skipInstrumentFile_(skipInstrument), parsing(parsing) {
+            }
+
+            bool shouldInstrument() const {
+                return !skipInstrumentFile_ && !skipInstrumentSubprogram_;
+            }
+
+            void addProgramBeginInstrumentation(const int start_line, const std::string &timer_name) {
+                if (shouldInstrument()) {
+                    instrumentationPoints_.emplace_back(
+                        std::make_unique<ProgramBeginInstrumentationPoint>(start_line, timer_name));
+                }
+            }
+
+            void addProcedureBeginInstrumentation(const int start_line, const std::string &timer_name) {
+                if (shouldInstrument()) {
+                    instrumentationPoints_.emplace_back(
+                        std::make_unique<ProcedureBeginInstrumentationPoint>(start_line, timer_name));
+                }
+            }
+
+            void addProcedureEndInstrumentation(const int end_line) {
+                if (shouldInstrument()) {
+                    instrumentationPoints_.emplace_back(std::make_unique<ProcedureEndInstrumentationPoint>(end_line));
+                }
+            }
+
+            void addReturnStmtInstrumentation(const int end_line) {
+                if (shouldInstrument()) {
+                    instrumentationPoints_.emplace_back(std::make_unique<ReturnStmtInstrumentationPoint>(end_line));
+                }
+            }
+
+            void addIfReturnStmtInstrumentation(const int end_line, const int conditional_column) {
+                if (shouldInstrument()) {
+                    instrumentationPoints_.emplace_back(
+                        std::make_unique<IfReturnStmtInstrumentationPoint>(end_line, conditional_column));
+                }
+            }
+
+            [[nodiscard]] const auto &getInstrumentationPoints() const {
+                return instrumentationPoints_;
+            }
+
+            [[nodiscard]] std::string dumpInstrumentationPoints() const {
+                std::stringstream ss;
+                for (const auto &instPt: getInstrumentationPoints()) {
+                    ss << instPt->toString() << "\n";
+                }
+                return ss.str();
+            }
+
+            [[nodiscard]] static std::string convertWildcardToRegexForm(const std::string &wildString) {
+                // Escape all regex special characters
+                static const std::regex metacharacters(R"([\.\^\$\+\(\)\[\]\{\}\|\?\*])");
+                const std::string escapedString{std::regex_replace(wildString, metacharacters, R"(\$&)")};
+                // Convert lines in TAU select file format (where "#" means zero or more characters)
+                // to regex version (where ".*" means zero or more characters).
+                // "#" is used for wildcard in routine names in TAU selective instrumentation files
+                // because "*" can be used in C/C++ function identifiers as part of pointer types.
+                static const std::regex hashRegex{R"(#)"};
+                return std::regex_replace(escapedString, hashRegex, ".*");
+            }
+
+            [[nodiscard]] static bool shouldInstrumentSubprogram(const std::string &subprogramName) {
+                // Check if this subprogram should be instrumented.
+                // It should if:
+                //   - No include or exclude list is specified
+                //   - An exclude list is present and the subprogram is not in it
+                //   - An include list is present and the subprogram is in it (and not on the exclude list)
+
+                if (includelist.empty() && excludelist.empty()) {
+                    return true;
+                }
+
+                for (const auto &excludeEntry: excludelist) {
+                    if (const std::regex excludeRegex{convertWildcardToRegexForm(excludeEntry)}; std::regex_search(
+                        subprogramName, excludeRegex)) {
+                        return false;
+                    }
+                }
+
+                bool subprogramInIncludeList{false};
+                for (const auto &includeEntry: includelist) {
+                    if (const std::regex includeRegex{convertWildcardToRegexForm(includeEntry)}; std::regex_search(
+                        subprogramName, includeRegex)) {
+                        subprogramInIncludeList = true;
+                        break;
+                    }
+                }
+
+                if (!includelist.empty()) {
+                    if (subprogramInIncludeList) {
+                        return true;
+                    }
+                    return false;
+                }
+
+                return true;
+            }
+
+            // Default empty visit functions for otherwise unhandled types.
+            template<typename A>
+            static bool Pre(const A &) { return true; }
+
+            template<typename A>
+            static void Post(const A &) {
+                // this space intentionally left blank
+            }
+
+            // Override all types that we want to visit.
+
+            // Pre occurs when first visiting a node.
+            // Post occurs when returning from the node's children.
+            // See https://flang.llvm.org/docs/Parsing.html for information on the parse tree.
+
+            // Parse tree types are defined in: include/flang/Parser/parse-tree.h
+            // There are three types of parse tree nodes:
+            // Wrappers, with a single data member, always named `v`.
+            // Tuples, encapsulating multiple values in a data member named `t` of type std::tuple.
+            // Discriminated unions, one of several types stored in data member named `u` of type std::variant.
+            // Use std::get() to retrieve value from `t` or `u`
+
+            // See https://github.com/llvm/llvm-project/blob/main/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
+            // for examples of getting source position for a parse tree node
+
+            // Never descend into InterfaceSpecification nodes, they can't contain executable statements.
+            static bool Pre(const Fortran::parser::InterfaceSpecification &) { return false; }
+
+            bool Pre(const Fortran::parser::MainProgram &) {
+                isInMainProgram_ = true;
+                return true;
+            }
+
+            void Post(const Fortran::parser::MainProgram &) {
+                llvm::outs() << "Exit main program: " << mainProgramName_ << "\n";
+                isInMainProgram_ = false;
+            }
+
+            void Post(const Fortran::parser::ProgramStmt &program) {
+                mainProgramName_ = program.v.ToString();
+                mainProgramLine_ = parsing->allCooked().GetSourcePositionRange(program.v.source)->first.line;
+                llvm::outs() << "Enter main program: " << mainProgramName_ << "\n";
+            }
+
+            bool Pre(const Fortran::parser::SubroutineStmt &subroutineStmt) {
+                const auto &name = std::get<Fortran::parser::Name>(subroutineStmt.t);
+                subprogramName_ = name.ToString();
+                subProgramLine_ = parsing->allCooked().GetSourcePositionRange(name.source)->first.line;
+                llvm::outs() << "Enter Subroutine: " << subprogramName_ << "\n";
+                if (!shouldInstrumentSubprogram(subprogramName_)) {
+                    llvm::outs() << "Skipping instrumentation of " << subprogramName_ <<
+                            " due to selective instrumentation\n";
+                    skipInstrumentSubprogram_ = true;
+                }
+                return true;
+            }
+
+            void Post(const Fortran::parser::SubroutineSubprogram &) {
+                llvm::outs() << "Exit Subroutine: " << subprogramName_ << "\n";
+                skipInstrumentSubprogram_ = false;
+                subprogramName_.clear();
+            }
+
+            bool Pre(const Fortran::parser::FunctionStmt &functionStmt) {
+                const auto &name = std::get<Fortran::parser::Name>(functionStmt.t);
+                subprogramName_ = name.ToString();
+                subProgramLine_ = parsing->allCooked().GetSourcePositionRange(name.source)->first.line;
+                llvm::outs() << "Enter Function: " << subprogramName_ << "\n";
+                if (!shouldInstrumentSubprogram(subprogramName_)) {
+                    llvm::outs() << "Skipping instrumentation of " << subprogramName_ <<
+                            " due to selective instrumentation\n";
+                    skipInstrumentSubprogram_ = true;
+                }
+                return true;
+            }
+
+            void Post(const Fortran::parser::FunctionSubprogram &) {
+                llvm::outs() << "Exit Function: " << subprogramName_ << "\n";
+                skipInstrumentSubprogram_ = false;
+                subprogramName_.clear();
+                subProgramLine_ = 0;
+            }
+
+            // Split handling of ExecutionPart into two phases
+            // so that we insert Instrumentation Points in order
+            // even if we separately insert them in visitors for
+            // children of ExecutionPart.
+            bool Pre(const Fortran::parser::ExecutionPart &executionPart) {
+                handleExecutionPart(executionPart, true);
+                return true;
+            }
+
+            void Post(const Fortran::parser::ExecutionPart &executionPart) {
+                handleExecutionPart(executionPart, false);
+            }
+
+            void handleExecutionPart(const Fortran::parser::ExecutionPart &executionPart, bool pre) {
+                if (const Fortran::parser::Block &block = executionPart.v; block.empty()) {
+                    llvm::outs() << "WARNING: Execution part empty.\n";
+                } else {
+                    const std::optional startLocOpt{getLocation(parsing, block.front(), false)};
+                    const std::optional endLocOpt{getLocation(parsing, block.back(), true)};
+
+                    if (!startLocOpt.has_value()) {
+                        llvm::errs() << "ERROR: execution part had no start source location!\n";
+                    }
+                    if (!endLocOpt.has_value()) {
+                        llvm::errs() << "ERROR: execution part had no end source location!\n";
+                    }
+
+                    const auto &startLoc{startLocOpt.value()};
+                    const auto &endLoc{endLocOpt.value()};
+
+                    // Insert the timer start in the Pre phase (when we first visit the node)
+                    // and the timer stop in the Post phase (when we return after visiting the node's children).
+                    if (pre) {
+                        // TODO this assumes that the program end statement ends the next line after
+                        //      the last statement, but there could be whitespace/comments. Need to actually
+                        //      find the end statement. End statement may not have source position if name
+                        //      not listed -- need to find workaround.
+                        std::stringstream ss;
+                        ss << (isInMainProgram_ ? mainProgramName_ : subprogramName_);
+                        ss << " [{" << startLoc.sourceFile->path() << "} {";
+                        ss << (isInMainProgram_ ? mainProgramLine_ : subProgramLine_);
+                        ss << ",1}-{"; // TODO column number, first char of program/subroutine/function stmt
+                        ss << endLoc.line + 1;
+                        ss << ",1}]"; // TODO column number, last char of end stmt
+
+                        const std::string timerName{ss.str()};
+
+                        // Split the timerName string so that it will fit between Fortran 77's 72-character limit,
+                        // and use character string line continuation syntax compatible with Fortran 77 and modern
+                        // Fortran.
+                        std::stringstream ss2;
+                        for (size_t i = 0; i < timerName.size(); i += SALT_F77_LINE_LENGTH) {
+                            ss2 << SALT_FORTRAN_STRING_SPLITTER;
+                            ss2 << timerName.substr(i, SALT_F77_LINE_LENGTH);
+                        }
+
+                        const std::string splitTimerName{ss2.str()};
+
+                        if (isInMainProgram_) {
+                            llvm::outs() << "Program begin \"" << mainProgramName_ << "\" at " << startLoc.line << ", "
+                                    <<
+                                    startLoc.column << "\n";
+                            addProgramBeginInstrumentation(startLoc.line, splitTimerName);
+                        } else {
+                            llvm::outs() << "Subprogram begin \"" << subprogramName_ << "\" at " << startLoc.line <<
+                                    ", " <<
+                                    startLoc.column << "\n";
+                            addProcedureBeginInstrumentation(startLoc.line, splitTimerName);
+                        }
+                    } else {
+                        llvm::outs() << "End at " << endLoc.line << ", " << endLoc.column << "\n";
+                        addProcedureEndInstrumentation(endLoc.line);
+                    }
+                }
+            }
+
+            // A ReturnStmt does not have a source, so we instead need to get access to the wrapper Statement that does.
+            // Here we get the ReturnStmt through ExecutableConstruct -> Statement<ActionStmt> -> Indirection<ReturnStmt>
+            bool Pre(const Fortran::parser::ExecutableConstruct &execConstruct) {
+                if (const auto actionStmt = std::get_if<Fortran::parser::Statement<Fortran::parser::ActionStmt> >(
+                    &execConstruct.u)) {
+                    if (std::holds_alternative<Fortran::common::Indirection<Fortran::parser::ReturnStmt> >(
+                        actionStmt->statement.u)) {
+                        const std::optional returnPos{locationFromSource(parsing, actionStmt->source, false)};
+                        const int returnLine{returnPos.value().line};
+                        llvm::outs() << "Return statement at " << returnLine << "\n";
+                        addReturnStmtInstrumentation(returnLine);
+                    }
+                }
+                return true;
+            }
+
+            bool Pre(const Fortran::parser::IfStmt &ifStmt) {
+                if (const auto &ifAction{
+                        std::get<Fortran::parser::UnlabeledStatement<Fortran::parser::ActionStmt> >(ifStmt.t)
+                    };
+                    std::holds_alternative<Fortran::common::Indirection<
+                        Fortran::parser::ReturnStmt> >(ifAction.statement.u)) {
+                    const auto startPos{
+                        locationFromSource(parsing,
+                                           std::get<Fortran::parser::ScalarLogicalExpr>(ifStmt.t).thing.thing.value().
+                                           source,
+                                           false).value()
+                    };
+                    const auto endPos{
+                        locationFromSource(parsing,
+                                           std::get<Fortran::parser::ScalarLogicalExpr>(ifStmt.t).thing.thing.value().
+                                           source,
+                                           true).value()
+                    };
+                    llvm::outs() << "If-return, conditional: (" << startPos.line << "," << startPos.column << ") - "
+                            << "(" << endPos.line << "," << endPos.column << ")\n";
+                    // TODO handle return <value> case
+                    // TODO handle multi-line
+                    // TODO handle line continuation if too long
+                    addIfReturnStmtInstrumentation(startPos.line, endPos.column);
+                }
+                return true;
+            }
+
+        private:
+            // Keeps track of current state of traversal
+            bool isInMainProgram_{false};
+            std::string mainProgramName_;
+            int mainProgramLine_;
+            std::string subprogramName_;
+            int subProgramLine_;
+
+            bool skipInstrumentFile_;
+            bool skipInstrumentSubprogram_{false};
+
+            std::vector<std::unique_ptr<const InstrumentationPoint> > instrumentationPoints_;
+
+            // Pass in the parser object from the Action to the Visitor
+            // so that we can use it while processing parse tree nodes.
+            Fortran::parser::Parsing *parsing{nullptr};
+        }; // SaltInstrumentParseTreeVisitor
+
+        /**
+         * Get the source file represented by a given parse tree
+         *
+         * See function BuildRuntimeDerivedTypeTables() in
+         * flang/lib/Semantics/runtime-type-info.cpp for example
+         * of getting the source file name.
+         */
+        [[nodiscard]] static std::optional<std::string> getInputFilePath(Fortran::parser::Parsing &parsing) {
+            const auto &allSources{parsing.allCooked().allSources()};
+            if (const auto firstProv{allSources.GetFirstFileProvenance()}) {
+                if (const auto *srcFile{allSources.GetSourceFile(firstProv->start())}) {
+                    return srcFile->path();
+                }
+            }
+            return std::nullopt;
+        }
+
+        static void instrumentFile(const std::string &inputFilePath, llvm::raw_pwrite_stream &outputStream,
+                                   const SaltInstrumentParseTreeVisitor &visitor,
+                                   const InstrumentationMap &instMap) {
+            std::ifstream inputStream{inputFilePath};
+            if (!inputStream) {
+                llvm::errs() << "ERROR: Could not open input file" << inputFilePath << "\n";
+                std::exit(-2);
+            }
+            std::string lineText;
+            int lineNum{0};
+            const auto &instPts{visitor.getInstrumentationPoints()};
+
+            llvm::outs() << "Will perform instrumentation:\n" << visitor.dumpInstrumentationPoints();
+
+            // Sanity check: are instrumentation points in the right order?
+            if (!std::is_sorted(instPts.cbegin(), instPts.cend(), [&](const auto &p1, const auto &p2) {
+                return *p1 < *p2;
+            })) {
+                DIE("ERROR: Instrumentation points not sorted by line number!\n");
+            }
+
+            auto instIter{instPts.cbegin()};
+            while (std::getline(inputStream, lineText)) {
+                ++lineNum;
+
+                // First, process instrumentation points that come BEFORE this line.
+                while (instIter != instPts.cend() && (*instIter)->line() == lineNum && (*instIter)->location() ==
+                       InstrumentationLocation::BEFORE) {
+                    outputStream << (*instIter)->instrumentationString(instMap, lineText) << "\n";
+                    ++instIter;
+                }
+
+                // Then, process instrumentation points that REPLACE this line.
+                bool shouldOutputLine{true};
+                while (instIter != instPts.cend() && (*instIter)->line() == lineNum && (*instIter)->location() ==
+                       InstrumentationLocation::REPLACE) {
+                    outputStream << (*instIter)->instrumentationString(instMap, lineText) << "\n";
+                    shouldOutputLine = false;
+                    ++instIter;
+                }
+
+                // Output the current line, if not replaced.
+                if (shouldOutputLine) {
+                    outputStream << lineText << "\n";
+                }
+
+                // Finally, process instrumentation points that come AFTER this line.
+                while (instIter != instPts.cend() && (*instIter)->line() == lineNum && (*instIter)->location() ==
+                       InstrumentationLocation::AFTER) {
+                    outputStream << (*instIter)->instrumentationString(instMap, lineText) << "\n";
+                    ++instIter;
+                }
+            }
+        }
+
+        [[nodiscard]] static std::string getConfigPath() {
+            // If config path env var is set and non-empty, use that;
+            // otherwise use default.
+            if (const char *val = getenv(SALT_FORTRAN_CONFIG_FILE_VAR)) {
+                if (const std::string configPath{val}; !configPath.empty()) {
+                    return configPath;
+                }
+            }
+            return SALT_FORTRAN_CONFIG_DEFAULT_PATH;
+        }
+
+        [[nodiscard]] static std::optional<std::string> getSelectFilePath() {
+            if (const char *val = getenv(SALT_FORTRAN_SELECT_FILE_VAR)) {
+                if (std::string selectFile{val}; !selectFile.empty()) {
+                    return selectFile;
+                }
+            }
+            return std::nullopt;
+        }
+
+        [[nodiscard]] static ryml::Tree getConfigYamlTree(const std::string &configPath) {
+            std::ifstream inputStream{configPath};
+            if (!inputStream) {
+                llvm::errs() << "ERROR: Could not open configuration file " << configPath << "\n"
+                        << "Set $" SALT_FORTRAN_CONFIG_FILE_VAR " to path to desired configuration file.\n";
+                std::exit(-3);
+            }
+            std::stringstream configStream;
+            configStream << inputStream.rdbuf();
+            // TODO handle errors if config yaml doesn't parse
+            return ryml::parse_in_arena(ryml::to_csubstr(configStream.str()));
+        }
+
+        [[nodiscard]] static InstrumentationMap getInstrumentationMap(const ryml::Tree &tree) {
+            InstrumentationMap map;
+            std::stringstream ss;
+
+            // Access the "Fortran" node
+            ryml::NodeRef fortranNode = tree[SALT_FORTRAN_KEY];
+
+            // Validate that the "Fortran" node exists
+            if (!fortranNode.valid()) {
+                llvm::errs() << "ERROR: '" << SALT_FORTRAN_KEY << "' key not found in the configuration file.\n";
+                std::exit(-3);
+            }
+
+            // Access and process the "program_begin_insert" node
+            ryml::NodeRef programBeginNode = fortranNode[SALT_FORTRAN_PROGRAM_BEGIN_KEY];
+            if (!programBeginNode.valid()) {
+                llvm::errs() << "ERROR: '" << SALT_FORTRAN_PROGRAM_BEGIN_KEY << "' key not found under 'Fortran'.\n";
+                std::exit(-3);
+            }
+            for (const ryml::NodeRef child: programBeginNode.children()) {
+                ss << child.val() << "\n";
+            }
+            map.emplace(InstrumentationPointType::PROGRAM_BEGIN, ss.str());
+            ss.str(""s);
+
+            // Access and process the "procedure_begin_insert" node
+            ryml::NodeRef procedureBeginNode = fortranNode[SALT_FORTRAN_PROCEDURE_BEGIN_KEY];
+            if (!procedureBeginNode.valid()) {
+                llvm::errs() << "ERROR: '" << SALT_FORTRAN_PROCEDURE_BEGIN_KEY << "' key not found under 'Fortran'.\n";
+                std::exit(-3);
+            }
+            for (const ryml::NodeRef child: procedureBeginNode.children()) {
+                ss << child.val() << "\n";
+            }
+            map.emplace(InstrumentationPointType::PROCEDURE_BEGIN, ss.str());
+            ss.str(""s);
+
+            // Access and process the "procedure_end_insert" node
+            ryml::NodeRef procedureEndNode = fortranNode[SALT_FORTRAN_PROCEDURE_END_KEY];
+            if (!procedureEndNode.valid()) {
+                llvm::errs() << "ERROR: '" << SALT_FORTRAN_PROCEDURE_END_KEY << "' key not found under 'Fortran'.\n";
+                std::exit(-3);
+            }
+            for (const ryml::NodeRef child: procedureEndNode.children()) {
+                ss << child.val() << "\n";
+            }
+            map.emplace(InstrumentationPointType::PROCEDURE_END, ss.str());
+            // The return statement uses the same text as procedure end,
+            // but is inserted before the line instead of after.
+            map.emplace(InstrumentationPointType::RETURN_STMT, ss.str());
+            // The if-return statement uses the same text as procedure end,
+            // but requires transformation to if-then-endif
+            map.emplace(InstrumentationPointType::IF_RETURN, ss.str());
+
+            return map;
+        }
+
+        [[nodiscard]] static std::string convertGlobToRegexForm(const std::string &globString) {
+            // Convert lines in shell glob format (where "*" means zero or more characters)
+            // to regex version (where ".*" means zero or more characters).
+            // This is used for files in TAU selective instrumentation files.
+            static std::regex starRegex{R"(\*)"};
+            const std::string starString{std::regex_replace(globString, starRegex, ".*")};
+            // Escape all special regex characters except for "*" which was previously handled.
+            static const std::regex metacharacters(R"([\.\^\$\+\(\)\[\]\{\}\|\?])");
+            return std::regex_replace(starString, metacharacters, R"(\$&)");
+        }
+
+        [[nodiscard]] static bool shouldInstrumentFile(const std::filesystem::path &filePath) {
+            // Check if this file should be instrumented.
+            // It should if:
+            //   - No file include or file exclude list is specified
+            //   - An exclude list is present and the file is not in it
+            //   - An include list is present and the file is in it
+
+            if (fileincludelist.empty() && fileexcludelist.empty()) {
+                return true;
+            }
+
+            const auto filePart{filePath.filename()};
+            for (const auto &excludeEntry: fileexcludelist) {
+                if (const std::regex excludeRegex{convertGlobToRegexForm(excludeEntry)}; std::regex_search(
+                    filePart.string(), excludeRegex)) {
+                    return false;
+                }
+            }
+
+            bool fileInIncludeList{false};
+            for (const auto &includeEntry: fileincludelist) {
+                if (const std::regex includeRegex{convertGlobToRegexForm(includeEntry)}; std::regex_search(
+                    filePart.string(), includeRegex)) {
+                    fileInIncludeList = true;
+                    break;
+                }
+            }
+
+            if (!fileincludelist.empty()) {
+                if (fileInIncludeList) {
+                    return true;
+                }
+                return false;
+            }
+
+            return true;
+        }
+
+        /**
+         * This is the entry point for the plugin.
+         */
+        void executeAction() override {
+            llvm::outs() << "==== SALT Instrumentor Plugin starting ====\n";
+
+            // This is the object through which we access the parse tree
+            // and the source
+            Fortran::parser::Parsing &parsing = getParsing();
+
+            // Get the path to the input file
+            const auto inputFilePathStr = getInputFilePath(parsing);
+            if (!inputFilePathStr) {
+                llvm::errs() << "ERROR: Unable to find input file name!\n";
+                std::exit(-1);
+            }
+            llvm::outs() << "Have input file: " << *inputFilePathStr << "\n";
+
+            const std::filesystem::path inputFilePath{inputFilePathStr.value()};
+
+            // Read and parse the yaml configuration file
+            const std::string configPath{getConfigPath()};
+            const ryml::Tree yamlTree = getConfigYamlTree(configPath);
+            const InstrumentationMap instMap = getInstrumentationMap(yamlTree);
+
+            if (const auto selectPath{getSelectFilePath()}; selectPath.has_value()) {
+                if (processInstrumentationRequests(selectPath->c_str())) {
+                    const auto printStr = [&](const auto &a) { llvm::outs() << a << "\n"; };
+                    llvm::outs() << "File include list:\n";
+                    std::for_each(fileincludelist.cbegin(), fileincludelist.cend(), printStr);
+                    llvm::outs() << "File exclude list:\n";
+                    std::for_each(fileexcludelist.cbegin(), fileexcludelist.cend(), printStr);
+                    llvm::outs() << "Include list:\n";
+                    std::for_each(includelist.cbegin(), includelist.cend(), printStr);
+                    llvm::outs() << "Exclude list:\n";
+                    std::for_each(excludelist.cbegin(), excludelist.cend(), printStr);
+                } else {
+                    llvm::errs() << "ERROR: Unable to read selective instrumentation file at " << selectPath << "\n";
+                    std::exit(-4);
+                }
+            }
+
+            // Get the extension of the input file
+            // For input file 'filename.ext' we will output to 'filename.inst.Ext'
+            // Since we are adding preprocessor directives in the emitted code,
+            // the file extension should be capitalized.
+            std::string inputFileExtension;
+            if (auto const extPos = inputFilePath.string().find_last_of('.'); extPos == std::string::npos) {
+                inputFileExtension = "F90"; // Default if for some reason file has no extension
+            } else {
+                inputFileExtension = inputFilePath.string().substr(extPos + 1); // Part of string past last '.'
+                // Capitalize the first character of inputFileExtension
+                if (!inputFileExtension.empty()) {
+                    inputFileExtension[0] = static_cast<char>(std::toupper(inputFileExtension[0]));
+                }
+            }
+
+            // Open an output file for writing the instrumented code
+            const std::string outputFileExtension = "inst."s + inputFileExtension;
+            const auto outputFileStream = createOutputFile(outputFileExtension);
+
+            // If visitor has skipInstrument set, no instrumentation points are added
+            // so the file is output into the .inst file unchanged.
+            bool skipInstrument{false};
+            if (!shouldInstrumentFile(inputFilePath)) {
+                llvm::outs() << "Skipping instrumentation of " << inputFilePath
+                        << " due to selective instrumentation.\n";
+                skipInstrument = true;
+            }
+            // Walk the parse tree -- marks nodes for instrumentation
+            SaltInstrumentParseTreeVisitor visitor{&parsing, skipInstrument};
+            Walk(parsing.parseTree(), visitor);
+
+            // Use the instrumentation points stored in the Visitor to write the instrumented file.
+            instrumentFile(inputFilePath, *outputFileStream, visitor, instMap);
+
+            outputFileStream->flush();
+
+            llvm::outs() << "==== SALT Instrumentor Plugin finished ====\n";
+        }
+    };
+
+}
+
+[[maybe_unused]] static FrontendPluginRegistry::Add<salt::fortran::SaltInstrumentAction> X(
+    "salt-instrument", "Apply SALT Instrumentation");
diff --git a/src/salt_instrument_flang_plugin.cpp b/src/salt_instrument_flang_plugin.cpp
deleted file mode 100644
index 0de44af..0000000
--- a/src/salt_instrument_flang_plugin.cpp
+++ /dev/null
@@ -1,751 +0,0 @@
-/*
-Copyright (C) 2024-2025, ParaTools, Inc.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-        http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-/* SALT-FM Flang Fortran Instrumentor Plugin */
-
-// See https://flang.llvm.org/docs/FlangDriver.html#frontend-driver-plugins
-// for documentation of the Flang frontend plugin interface
-
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <string>
-#include <variant>
-#include <optional>
-#include <tuple>
-#include <regex>
-#include <algorithm>
-#include <filesystem>
-
-
-#define RYML_SINGLE_HDR_DEFINE_NOW
-#define RYML_SHARED
-
-#include <ryml_all.hpp>
-
-#include <clang/Basic/SourceLocation.h>
-
-#include "flang/Frontend/FrontendActions.h"
-#include "flang/Frontend/FrontendPluginRegistry.h"
-#include "flang/Parser/dump-parse-tree.h"
-#include "flang/Parser/parsing.h"
-#include "flang/Parser/source.h"
-#include "flang/Common/indirection.h"
-
-#include "flang_instrumentation_constants.hpp"
-#include "selectfile.hpp"
-#include "flang_source_location.hpp"
-#include "flang_instrumentation_point.hpp"
-
-// TODO Split declarations into a separate header file.
-// TODO Put debug output behind verbose flag
-
-
-using namespace Fortran::frontend;
-using namespace salt::fortran;
-
-
-/**
- * The main action of the Salt instrumentor.
- * Visits each node in the parse tree.
- */
-class SaltInstrumentAction final : public PluginParseTreeAction {
-    enum class SaltInstrumentationPointType {
-        PROGRAM_BEGIN, // Declare profiler, initialize TAU, set node, start timer
-        PROCEDURE_BEGIN, // Declare profiler, start timer
-        PROCEDURE_END, // Stop timer on the line after
-        RETURN_STMT, // Stop timer on the line before
-        IF_RETURN // Transform if to if-then-endif, stop timer before return
-    };
-
-    using InstrumentationMap = std::map<SaltInstrumentationPointType, const std::string>;
-
-    // TODO Refactor. The SaltInstrumentationPoint is getting complicated enough that this
-    // should be refactored to subclasses instead of having a bunch of fields that only
-    // sometimes apply.
-    struct SaltInstrumentationPoint {
-        SaltInstrumentationPoint(const SaltInstrumentationPointType instrumentation_point_type,
-                                 const int start_line,
-                                 const std::optional<std::string> &timer_name = std::nullopt,
-                                 const int conditional_column = 0)
-            : instrumentationPointType(instrumentation_point_type),
-              startLine(start_line),
-              timerName(timer_name),
-              conditionalColumn(conditional_column) {
-        }
-
-        [[nodiscard]] bool instrumentBefore() const {
-            return instrumentationPointType == SaltInstrumentationPointType::PROGRAM_BEGIN
-                   || instrumentationPointType == SaltInstrumentationPointType::PROCEDURE_BEGIN
-                   || instrumentationPointType == SaltInstrumentationPointType::RETURN_STMT
-                   || instrumentationPointType == SaltInstrumentationPointType::IF_RETURN;
-        }
-
-        bool operator<(const SaltInstrumentationPoint &other) const {
-            if (startLine == other.startLine) {
-                if (instrumentBefore() && !other.instrumentBefore()) {
-                    return true;
-                }
-                return false;
-            }
-            return startLine < other.startLine;
-        }
-
-        [[nodiscard]] std::string typeString() const {
-            switch (instrumentationPointType) {
-                case SaltInstrumentationPointType::PROGRAM_BEGIN:
-                    return "PROGRAM_BEGIN"s;
-                case SaltInstrumentationPointType::PROCEDURE_BEGIN:
-                    return "PROCEDURE_BEGIN"s;
-                case SaltInstrumentationPointType::PROCEDURE_END:
-                    return "PROCEDURE_END"s;
-                case SaltInstrumentationPointType::RETURN_STMT:
-                    return "RETURN_STMT"s;
-                case SaltInstrumentationPointType::IF_RETURN:
-                    return "IF_RETURN"s;
-                default:
-                    CRASH_NO_CASE;
-            }
-        }
-
-        [[nodiscard]] std::string toString() const {
-            std::stringstream ss;
-            ss << startLine << "\t";
-            ss << (instrumentBefore() ? "before" : "after") << "\t";
-            ss << typeString() << "\t";
-            ss << "\"" << timerName.value_or("<no name>") << "\"";
-            if (instrumentationPointType == SaltInstrumentationPointType::IF_RETURN) {
-                ss << "\t" << conditionalColumn;
-            }
-            return ss.str();
-        }
-
-        SaltInstrumentationPointType instrumentationPointType;
-        int startLine;
-        std::optional<std::string> timerName;
-        int conditionalColumn;
-    };
-
-
-    struct SaltInstrumentParseTreeVisitor {
-        explicit SaltInstrumentParseTreeVisitor(Fortran::parser::Parsing *parsing, const bool skipInstrument = false)
-            : mainProgramLine_(0), subProgramLine_(0), skipInstrumentFile_(skipInstrument), parsing(parsing) {
-        }
-
-        /**
-         * Mark a line where a given type of instrumentation is needed.
-         * For PROGRAM_BEGIN and PROCEDURE_BEGIN, a timer name is needed.
-         * For PROCEDURE_END, a timer name is not needed.
-         * Instrumentation will be added after start_line.
-         */
-        void addInstrumentationPoint(SaltInstrumentationPointType instrumentation_point_type,
-                                     const int start_line,
-                                     const std::optional<std::string> &timer_name = std::nullopt,
-                                     const int conditional_column = 0) {
-            if (!skipInstrumentFile_ && !skipInstrumentSubprogram_) {
-                instrumentationPoints_.emplace_back(
-                    instrumentation_point_type, start_line, timer_name, conditional_column);
-            }
-        }
-
-        [[nodiscard]] const auto &getInstrumentationPoints() const {
-            return instrumentationPoints_;
-        }
-
-        [[nodiscard]] std::string dumpInstrumentationPoints() const {
-            std::stringstream ss;
-            for (const auto &instPt: getInstrumentationPoints()) {
-                ss << instPt.toString() << "\n";
-            }
-            return ss.str();
-        }
-
-        [[nodiscard]] static std::string convertWildcardToRegexForm(const std::string &wildString) {
-            // Escape all regex special characters
-            static const std::regex metacharacters(R"([\.\^\$\+\(\)\[\]\{\}\|\?\*])");
-            const std::string escapedString{std::regex_replace(wildString, metacharacters, R"(\$&)")};
-            // Convert lines in TAU select file format (where "#" means zero or more characters)
-            // to regex version (where ".*" means zero or more characters).
-            // "#" is used for wildcard in routine names in TAU selective instrumentation files
-            // because "*" can be used in C/C++ function identifiers as part of pointer types.
-            static const std::regex hashRegex{R"(#)"};
-            return std::regex_replace(escapedString, hashRegex, ".*");
-        }
-
-        [[nodiscard]] static bool shouldInstrumentSubprogram(const std::string &subprogramName) {
-            // Check if this subprogram should be instrumented.
-            // It should if:
-            //   - No include or exclude list is specified
-            //   - An exclude list is present and the subprogram is not in it
-            //   - An include list is present and the subprogram is in it (and not on the exclude list)
-
-            if (includelist.empty() && excludelist.empty()) {
-                return true;
-            }
-
-            for (const auto &excludeEntry: excludelist) {
-                if (const std::regex excludeRegex{convertWildcardToRegexForm(excludeEntry)}; std::regex_search(
-                    subprogramName, excludeRegex)) {
-                    return false;
-                }
-            }
-
-            bool subprogramInIncludeList{false};
-            for (const auto &includeEntry: includelist) {
-                if (const std::regex includeRegex{convertWildcardToRegexForm(includeEntry)}; std::regex_search(
-                    subprogramName, includeRegex)) {
-                    subprogramInIncludeList = true;
-                    break;
-                }
-            }
-
-            if (!includelist.empty()) {
-                if (subprogramInIncludeList) {
-                    return true;
-                }
-                return false;
-            }
-
-            return true;
-        }
-
-        // Default empty visit functions for otherwise unhandled types.
-        template<typename A>
-        static bool Pre(const A &) { return true; }
-
-        template<typename A>
-        static void Post(const A &) {
-            // this space intentionally left blank
-        }
-
-        // Override all types that we want to visit.
-
-        // Pre occurs when first visiting a node.
-        // Post occurs when returning from the node's children.
-        // See https://flang.llvm.org/docs/Parsing.html for information on the parse tree.
-
-        // Parse tree types are defined in: include/flang/Parser/parse-tree.h
-        // There are three types of parse tree nodes:
-        // Wrappers, with a single data member, always named `v`.
-        // Tuples, encapsulating multiple values in a data member named `t` of type std::tuple.
-        // Discriminated unions, one of several types stored in data member named `u` of type std::variant.
-        // Use std::get() to retrieve value from `t` or `u`
-
-        // See https://github.com/llvm/llvm-project/blob/main/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
-        // for examples of getting source position for a parse tree node
-
-        // Never descend into InterfaceSpecification nodes, they can't contain executable statements.
-        static bool Pre(const Fortran::parser::InterfaceSpecification &) { return false; }
-
-        bool Pre(const Fortran::parser::MainProgram &) {
-            isInMainProgram_ = true;
-            return true;
-        }
-
-        void Post(const Fortran::parser::MainProgram &) {
-            llvm::outs() << "Exit main program: " << mainProgramName_ << "\n";
-            isInMainProgram_ = false;
-        }
-
-        void Post(const Fortran::parser::ProgramStmt &program) {
-            mainProgramName_ = program.v.ToString();
-            mainProgramLine_ = parsing->allCooked().GetSourcePositionRange(program.v.source)->first.line;
-            llvm::outs() << "Enter main program: " << mainProgramName_ << "\n";
-        }
-
-        bool Pre(const Fortran::parser::SubroutineStmt &subroutineStmt) {
-            const auto &name = std::get<Fortran::parser::Name>(subroutineStmt.t);
-            subprogramName_ = name.ToString();
-            subProgramLine_ = parsing->allCooked().GetSourcePositionRange(name.source)->first.line;
-            llvm::outs() << "Enter Subroutine: " << subprogramName_ << "\n";
-            if (!shouldInstrumentSubprogram(subprogramName_)) {
-                llvm::outs() << "Skipping instrumentation of " << subprogramName_ <<
-                        " due to selective instrumentation\n";
-                skipInstrumentSubprogram_ = true;
-            }
-            return true;
-        }
-
-        void Post(const Fortran::parser::SubroutineSubprogram &) {
-            llvm::outs() << "Exit Subroutine: " << subprogramName_ << "\n";
-            skipInstrumentSubprogram_ = false;
-            subprogramName_.clear();
-        }
-
-        bool Pre(const Fortran::parser::FunctionStmt &functionStmt) {
-            const auto &name = std::get<Fortran::parser::Name>(functionStmt.t);
-            subprogramName_ = name.ToString();
-            subProgramLine_ = parsing->allCooked().GetSourcePositionRange(name.source)->first.line;
-            llvm::outs() << "Enter Function: " << subprogramName_ << "\n";
-            if (!shouldInstrumentSubprogram(subprogramName_)) {
-                llvm::outs() << "Skipping instrumentation of " << subprogramName_ <<
-                        " due to selective instrumentation\n";
-                skipInstrumentSubprogram_ = true;
-            }
-            return true;
-        }
-
-        void Post(const Fortran::parser::FunctionSubprogram &) {
-            llvm::outs() << "Exit Function: " << subprogramName_ << "\n";
-            skipInstrumentSubprogram_ = false;
-            subprogramName_.clear();
-            subProgramLine_ = 0;
-        }
-
-        // Split handling of ExecutionPart into two phases
-        // so that we insert Instrumentation Points in order
-        // even if we separately insert them in visitors for
-        // children of ExecutionPart.
-        bool Pre(const Fortran::parser::ExecutionPart &executionPart) {
-            handleExecutionPart(executionPart, true);
-            return true;
-        }
-
-        void Post(const Fortran::parser::ExecutionPart &executionPart) {
-            handleExecutionPart(executionPart, false);
-        }
-
-        void handleExecutionPart(const Fortran::parser::ExecutionPart &executionPart, bool pre) {
-            if (const Fortran::parser::Block &block = executionPart.v; block.empty()) {
-                llvm::outs() << "WARNING: Execution part empty.\n";
-            } else {
-                const std::optional startLocOpt{getLocation(parsing, block.front(), false)};
-                const std::optional endLocOpt{getLocation(parsing, block.back(), true)};
-
-                if (!startLocOpt.has_value()) {
-                    llvm::errs() << "ERROR: execution part had no start source location!\n";
-                }
-                if (!endLocOpt.has_value()) {
-                    llvm::errs() << "ERROR: execution part had no end source location!\n";
-                }
-
-                const auto &startLoc{startLocOpt.value()};
-                const auto &endLoc{endLocOpt.value()};
-
-                // Insert the timer start in the Pre phase (when we first visit the node)
-                // and the timer stop in the Post phase (when we return after visiting the node's children).
-                if (pre) {
-                    // TODO this assumes that the program end statement ends the next line after
-                    //      the last statement, but there could be whitespace/comments. Need to actually
-                    //      find the end statement. End statement may not have source position if name
-                    //      not listed -- need to find workaround.
-                    std::stringstream ss;
-                    ss << (isInMainProgram_ ? mainProgramName_ : subprogramName_);
-                    ss << " [{" << startLoc.sourceFile->path() << "} {";
-                    ss << (isInMainProgram_ ? mainProgramLine_ : subProgramLine_);
-                    ss << ",1}-{"; // TODO column number, first char of program/subroutine/function stmt
-                    ss << endLoc.line + 1;
-                    ss << ",1}]"; // TODO column number, last char of end stmt
-
-                    const std::string timerName{ss.str()};
-
-                    // Split the timerName string so that it will fit between Fortran 77's 72-character limit,
-                    // and use character string line continuation syntax compatible with Fortran 77 and modern
-                    // Fortran.
-                    std::stringstream ss2;
-                    for (size_t i = 0; i < timerName.size(); i += SALT_F77_LINE_LENGTH) {
-                        ss2 << SALT_FORTRAN_STRING_SPLITTER;
-                        ss2 << timerName.substr(i, SALT_F77_LINE_LENGTH);
-                    }
-
-                    const std::string splitTimerName{ss2.str()};
-
-                    if (isInMainProgram_) {
-                        llvm::outs() << "Program begin \"" << mainProgramName_ << "\" at " << startLoc.line << ", " <<
-                                startLoc.column << "\n";
-                        addInstrumentationPoint(SaltInstrumentationPointType::PROGRAM_BEGIN, startLoc.line,
-                                                splitTimerName);
-                    } else {
-                        llvm::outs() << "Subprogram begin \"" << subprogramName_ << "\" at " << startLoc.line << ", " <<
-                                startLoc.column << "\n";
-                        addInstrumentationPoint(SaltInstrumentationPointType::PROCEDURE_BEGIN, startLoc.line,
-                                                splitTimerName);
-                    }
-                } else {
-                    llvm::outs() << "End at " << endLoc.line << ", " << endLoc.column << "\n";
-                    addInstrumentationPoint(SaltInstrumentationPointType::PROCEDURE_END, endLoc.line);
-                }
-            }
-        }
-
-        // A ReturnStmt does not have a source, so we instead need to get access to the wrapper Statement that does.
-        // Here we get the ReturnStmt through ExecutableConstruct -> Statement<ActionStmt> -> Indirection<ReturnStmt>
-        bool Pre(const Fortran::parser::ExecutableConstruct &execConstruct) {
-            if (const auto actionStmt = std::get_if<Fortran::parser::Statement<Fortran::parser::ActionStmt> >(
-                &execConstruct.u)) {
-                if (std::holds_alternative<Fortran::common::Indirection<Fortran::parser::ReturnStmt> >(
-                    actionStmt->statement.u)) {
-                    const std::optional returnPos{locationFromSource(parsing, actionStmt->source, false)};
-                    const int returnLine{returnPos.value().line};
-                    llvm::outs() << "Return statement at " << returnLine << "\n";
-                    addInstrumentationPoint(SaltInstrumentationPointType::RETURN_STMT, returnLine);
-                }
-            }
-            return true;
-        }
-
-        bool Pre(const Fortran::parser::IfStmt &ifStmt) {
-            if (const auto &ifAction{
-                    std::get<Fortran::parser::UnlabeledStatement<Fortran::parser::ActionStmt> >(ifStmt.t)
-                };
-                std::holds_alternative<Fortran::common::Indirection<
-                    Fortran::parser::ReturnStmt> >(ifAction.statement.u)) {
-                const auto startPos{
-                    locationFromSource(parsing,
-                        std::get<Fortran::parser::ScalarLogicalExpr>(ifStmt.t).thing.thing.value().source,
-                        false).value()
-                };
-                const auto endPos{
-                    locationFromSource(parsing,
-                        std::get<Fortran::parser::ScalarLogicalExpr>(ifStmt.t).thing.thing.value().source,
-                        true).value()
-                };
-                llvm::outs() << "If-return, conditional: (" << startPos.line << "," << startPos.column << ") - "
-                        << "(" << endPos.line << "," << endPos.column << ")\n";
-                // TODO this assumes that the conditional fits on one list
-                // make more robust, test with more cases
-                addInstrumentationPoint(SaltInstrumentationPointType::IF_RETURN, startPos.line, std::nullopt,
-                                        endPos.column);
-            }
-            return true;
-        }
-
-    private:
-        // Keeps track of current state of traversal
-        bool isInMainProgram_{false};
-        std::string mainProgramName_;
-        int mainProgramLine_;
-        std::string subprogramName_;
-        int subProgramLine_;
-
-        bool skipInstrumentFile_;
-        bool skipInstrumentSubprogram_{false};
-
-        std::vector<SaltInstrumentationPoint> instrumentationPoints_;
-
-        // Pass in the parser object from the Action to the Visitor
-        // so that we can use it while processing parse tree nodes.
-        Fortran::parser::Parsing *parsing{nullptr};
-    }; // SaltInstrumentParseTreeVisitor
-
-    /**
-     * Get the source file represented by a given parse tree
-     *
-     * See function BuildRuntimeDerivedTypeTables() in
-     * flang/lib/Semantics/runtime-type-info.cpp for example
-     * of getting the source file name.
-     */
-    [[nodiscard]] static std::optional<std::string> getInputFilePath(Fortran::parser::Parsing &parsing) {
-        const auto &allSources{parsing.allCooked().allSources()};
-        if (const auto firstProv{allSources.GetFirstFileProvenance()}) {
-            if (const auto *srcFile{allSources.GetSourceFile(firstProv->start())}) {
-                return srcFile->path();
-            }
-        }
-        return std::nullopt;
-    }
-
-
-    [[nodiscard]] static std::string getInstrumentationPointString(const SaltInstrumentationPoint &instPt,
-                                                                   const InstrumentationMap &instMap) {
-        static std::regex timerNameRegex{SALT_FORTRAN_TIMER_NAME_TEMPLATE};
-        std::string instTemplate = instMap.at(instPt.instrumentationPointType);
-        if (instPt.timerName.has_value()) {
-            instTemplate = std::regex_replace(instTemplate, timerNameRegex,
-                                              instPt.timerName.value());
-        }
-        return instTemplate;
-    }
-
-    static void instrumentFile(const std::string &inputFilePath, llvm::raw_pwrite_stream &outputStream,
-                               const SaltInstrumentParseTreeVisitor &visitor,
-                               const InstrumentationMap &instMap) {
-        std::ifstream inputStream{inputFilePath};
-        if (!inputStream) {
-            llvm::errs() << "ERROR: Could not open input file" << inputFilePath << "\n";
-            std::exit(-2);
-        }
-        std::string line;
-        int lineNum{0};
-        const auto &instPts{visitor.getInstrumentationPoints()};
-
-        llvm::outs() << "Will perform instrumentation:\n" << visitor.dumpInstrumentationPoints();
-
-        // Sanity check: are instrumentation points in the right order?
-        if (!std::is_sorted(instPts.cbegin(), instPts.cend())) {
-            DIE("ERROR: Instrumentation points not sorted by line number!\n");
-        }
-
-        auto instIter{instPts.cbegin()};
-        bool shouldOutputLine{};
-        while (std::getline(inputStream, line)) {
-            ++lineNum;
-            shouldOutputLine = true;
-            while (instIter != instPts.cend() && instIter->startLine == lineNum && instIter->instrumentBefore()) {
-                // Need special case for if-return because it requires a more elaborate transformation
-                // than simply inserting lines
-                // TODO instead of special case have three kinds of instrumentation: before, after, and REPLACE
-                // TODO handle return <value> case
-                // TODO handle multi-line
-                // TODO handle line continuation if too long
-                if (instIter->instrumentationPointType == SaltInstrumentationPointType::IF_RETURN) {
-                    shouldOutputLine = false;
-                    line.erase(instIter->conditionalColumn);
-                    line.insert(instIter->conditionalColumn, " then");
-                    outputStream << line << "\n";
-                    outputStream << getInstrumentationPointString(*instIter, instMap) << "\n";
-                    outputStream << "      return\n";
-                    outputStream << "      endif\n";
-                } else {
-                    outputStream << getInstrumentationPointString(*instIter, instMap) << "\n";
-                }
-                ++instIter;
-            }
-            if (shouldOutputLine) {
-                outputStream << line << "\n";
-            }
-            while (instIter != instPts.cend() && instIter->startLine == lineNum && !instIter->instrumentBefore()) {
-                outputStream << getInstrumentationPointString(*instIter, instMap) << "\n";
-                ++instIter;
-            }
-        }
-    }
-
-    [[nodiscard]] static std::string getConfigPath() {
-        // If config path env var is set and non-empty, use that;
-        // otherwise use default.
-        if (const char *val = getenv(SALT_FORTRAN_CONFIG_FILE_VAR)) {
-            if (const std::string configPath{val}; !configPath.empty()) {
-                return configPath;
-            }
-        }
-        return SALT_FORTRAN_CONFIG_DEFAULT_PATH;
-    }
-
-    [[nodiscard]] static std::optional<std::string> getSelectFilePath() {
-        if (const char *val = getenv(SALT_FORTRAN_SELECT_FILE_VAR)) {
-            if (std::string selectFile{val}; !selectFile.empty()) {
-                return selectFile;
-            }
-        }
-        return std::nullopt;
-    }
-
-    [[nodiscard]] static ryml::Tree getConfigYamlTree(const std::string &configPath) {
-        std::ifstream inputStream{configPath};
-        if (!inputStream) {
-            llvm::errs() << "ERROR: Could not open configuration file " << configPath << "\n"
-                    << "Set $" SALT_FORTRAN_CONFIG_FILE_VAR " to path to desired configuration file.\n";
-            std::exit(-3);
-        }
-        std::stringstream configStream;
-        configStream << inputStream.rdbuf();
-        // TODO handle errors if config yaml doesn't parse
-        return ryml::parse_in_arena(ryml::to_csubstr(configStream.str()));
-    }
-
-    [[nodiscard]] static InstrumentationMap getInstrumentationMap(const ryml::Tree &tree) {
-        InstrumentationMap map;
-        std::stringstream ss;
-
-        // Access the "Fortran" node
-        ryml::NodeRef fortranNode = tree[SALT_FORTRAN_KEY];
-
-        // Validate that the "Fortran" node exists
-        if (!fortranNode.valid()) {
-            llvm::errs() << "ERROR: '" << SALT_FORTRAN_KEY << "' key not found in the configuration file.\n";
-            std::exit(-3);
-        }
-
-        // Access and process the "program_begin_insert" node
-        ryml::NodeRef programBeginNode = fortranNode[SALT_FORTRAN_PROGRAM_BEGIN_KEY];
-        if (!programBeginNode.valid()) {
-            llvm::errs() << "ERROR: '" << SALT_FORTRAN_PROGRAM_BEGIN_KEY << "' key not found under 'Fortran'.\n";
-            std::exit(-3);
-        }
-        for (const ryml::NodeRef child: programBeginNode.children()) {
-            ss << child.val() << "\n";
-        }
-        map.emplace(SaltInstrumentationPointType::PROGRAM_BEGIN, ss.str());
-        ss.str(""s);
-
-        // Access and process the "procedure_begin_insert" node
-        ryml::NodeRef procedureBeginNode = fortranNode[SALT_FORTRAN_PROCEDURE_BEGIN_KEY];
-        if (!procedureBeginNode.valid()) {
-            llvm::errs() << "ERROR: '" << SALT_FORTRAN_PROCEDURE_BEGIN_KEY << "' key not found under 'Fortran'.\n";
-            std::exit(-3);
-        }
-        for (const ryml::NodeRef child: procedureBeginNode.children()) {
-            ss << child.val() << "\n";
-        }
-        map.emplace(SaltInstrumentationPointType::PROCEDURE_BEGIN, ss.str());
-        ss.str(""s);
-
-        // Access and process the "procedure_end_insert" node
-        ryml::NodeRef procedureEndNode = fortranNode[SALT_FORTRAN_PROCEDURE_END_KEY];
-        if (!procedureEndNode.valid()) {
-            llvm::errs() << "ERROR: '" << SALT_FORTRAN_PROCEDURE_END_KEY << "' key not found under 'Fortran'.\n";
-            std::exit(-3);
-        }
-        for (const ryml::NodeRef child: procedureEndNode.children()) {
-            ss << child.val() << "\n";
-        }
-        map.emplace(SaltInstrumentationPointType::PROCEDURE_END, ss.str());
-        // The return statement uses the same text as procedure end,
-        // but is inserted before the line instead of after.
-        map.emplace(SaltInstrumentationPointType::RETURN_STMT, ss.str());
-        // The if-return statement uses the same text as procedure end,
-        // but requires transformation to if-then-endif
-        map.emplace(SaltInstrumentationPointType::IF_RETURN, ss.str());
-
-        return map;
-    }
-
-    [[nodiscard]] static std::string convertGlobToRegexForm(const std::string &globString) {
-        // Convert lines in shell glob format (where "*" means zero or more characters)
-        // to regex version (where ".*" means zero or more characters).
-        // This is used for files in TAU selective instrumentation files.
-        static std::regex starRegex{R"(\*)"};
-        const std::string starString{std::regex_replace(globString, starRegex, ".*")};
-        // Escape all special regex characters except for "*" which was previously handled.
-        static const std::regex metacharacters(R"([\.\^\$\+\(\)\[\]\{\}\|\?])");
-        return std::regex_replace(starString, metacharacters, R"(\$&)");
-    }
-
-    [[nodiscard]] static bool shouldInstrumentFile(const std::filesystem::path &filePath) {
-        // Check if this file should be instrumented.
-        // It should if:
-        //   - No file include or file exclude list is specified
-        //   - An exclude list is present and the file is not in it
-        //   - An include list is present and the file is in it
-
-        if (fileincludelist.empty() && fileexcludelist.empty()) {
-            return true;
-        }
-
-        const auto filePart{filePath.filename()};
-        for (const auto &excludeEntry: fileexcludelist) {
-            if (const std::regex excludeRegex{convertGlobToRegexForm(excludeEntry)}; std::regex_search(
-                filePart.string(), excludeRegex)) {
-                return false;
-            }
-        }
-
-        bool fileInIncludeList{false};
-        for (const auto &includeEntry: fileincludelist) {
-            if (const std::regex includeRegex{convertGlobToRegexForm(includeEntry)}; std::regex_search(
-                filePart.string(), includeRegex)) {
-                fileInIncludeList = true;
-                break;
-            }
-        }
-
-        if (!fileincludelist.empty()) {
-            if (fileInIncludeList) {
-                return true;
-            }
-            return false;
-        }
-
-        return true;
-    }
-
-    /**
-     * This is the entry point for the plugin.
-     */
-    void executeAction() override {
-        llvm::outs() << "==== SALT Instrumentor Plugin starting ====\n";
-
-        // This is the object through which we access the parse tree
-        // and the source
-        Fortran::parser::Parsing &parsing = getParsing();
-
-        // Get the path to the input file
-        const auto inputFilePathStr = getInputFilePath(parsing);
-        if (!inputFilePathStr) {
-            llvm::errs() << "ERROR: Unable to find input file name!\n";
-            std::exit(-1);
-        }
-        llvm::outs() << "Have input file: " << *inputFilePathStr << "\n";
-
-        const std::filesystem::path inputFilePath{inputFilePathStr.value()};
-
-        // Read and parse the yaml configuration file
-        const std::string configPath{getConfigPath()};
-        const ryml::Tree yamlTree = getConfigYamlTree(configPath);
-        const InstrumentationMap instMap = getInstrumentationMap(yamlTree);
-
-        if (const auto selectPath{getSelectFilePath()}; selectPath.has_value()) {
-            if (processInstrumentationRequests(selectPath->c_str())) {
-                const auto printStr = [&](const auto &a) { llvm::outs() << a << "\n"; };
-                llvm::outs() << "File include list:\n";
-                std::for_each(fileincludelist.cbegin(), fileincludelist.cend(), printStr);
-                llvm::outs() << "File exclude list:\n";
-                std::for_each(fileexcludelist.cbegin(), fileexcludelist.cend(), printStr);
-                llvm::outs() << "Include list:\n";
-                std::for_each(includelist.cbegin(), includelist.cend(), printStr);
-                llvm::outs() << "Exclude list:\n";
-                std::for_each(excludelist.cbegin(), excludelist.cend(), printStr);
-            } else {
-                llvm::errs() << "ERROR: Unable to read selective instrumentation file at " << selectPath << "\n";
-                std::exit(-4);
-            }
-        }
-
-
-        // Get the extension of the input file
-        // For input file 'filename.ext' we will output to 'filename.inst.Ext'
-        // Since we are adding preprocessor directives in the emitted code,
-        // the file extension should be capitalized.
-        std::string inputFileExtension;
-        if (auto const extPos = inputFilePath.string().find_last_of('.'); extPos == std::string::npos) {
-            inputFileExtension = "F90"; // Default if for some reason file has no extension
-        } else {
-            inputFileExtension = inputFilePath.string().substr(extPos + 1); // Part of string past last '.'
-            // Capitalize the first character of inputFileExtension
-            if (!inputFileExtension.empty()) {
-                inputFileExtension[0] = static_cast<char>(std::toupper(inputFileExtension[0]));
-            }
-        }
-
-
-        // Open an output file for writing the instrumented code
-        const std::string outputFileExtension = "inst."s + inputFileExtension;
-        const auto outputFileStream = createOutputFile(outputFileExtension);
-
-        // If visitor has skipInstrument set, no instrumentation points are added
-        // so the file is output into the .inst file unchanged.
-        bool skipInstrument{false};
-        if (!shouldInstrumentFile(inputFilePath)) {
-            llvm::outs() << "Skipping instrumentation of " << inputFilePath
-                    << " due to selective instrumentation.\n";
-            skipInstrument = true;
-        }
-        // Walk the parse tree -- marks nodes for instrumentation
-        SaltInstrumentParseTreeVisitor visitor{&parsing, skipInstrument};
-        Walk(parsing.parseTree(), visitor);
-
-        // Use the instrumentation points stored in the Visitor to write the instrumented file.
-        instrumentFile(inputFilePath, *outputFileStream, visitor, instMap);
-
-        outputFileStream->flush();
-
-        llvm::outs() << "==== SALT Instrumentor Plugin finished ====\n";
-    }
-};
-
-[[maybe_unused]] static FrontendPluginRegistry::Add<SaltInstrumentAction> X(
-    "salt-instrument", "Apply SALT Instrumentation");

From d0e1f94db6376ee59fdb90bdace622bd3a49c587 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Thu, 16 Jan 2025 15:30:29 -0800
Subject: [PATCH 117/135] Add SALT_FORTRAN_VERBOSE env var

Only outputs on error unless SALT_FORTRAN_VERBOSE is set to non-zero
value. Set SALT_FORTRAN_VERBOSE in tests so that check for output
succeeds.
---
 CMakeLists.txt                              |  4 +-
 include/dprint.hpp                          | 12 ++++
 include/flang_instrumentation_constants.hpp |  3 +
 src/dprint.cpp                              | 29 +++++++++
 src/flang_salt_instrument_plugin.cpp        | 70 ++++++++++++---------
 5 files changed, 87 insertions(+), 31 deletions(-)
 create mode 100644 src/dprint.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fa17b74..760b86e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -290,6 +290,7 @@ if(MLIR_FOUND AND Flang_FOUND)
     endif ()
 
     set(SALT_FLANG_PLUGIN_HEADER_FILES
+    dprint.hpp
     selectfile.hpp
     flang_source_location.hpp
     flang_instrumentation_constants.hpp
@@ -298,6 +299,7 @@ if(MLIR_FOUND AND Flang_FOUND)
     list(TRANSFORM SALT_FLANG_PLUGIN_HEADER_FILES PREPEND "${CMAKE_SOURCE_DIR}/include/")
 
     set(SALT_FLANG_PLUGIN_SRCS
+    dprint.cpp
     selectfile.cpp
     flang_source_location.cpp
     flang_instrumentation_point.cpp
@@ -652,7 +654,7 @@ foreach(test_source IN LISTS FORTRAN_TESTS_SOURCES_LIST)
   set_tests_properties(instrument_${test_source}
     PROPERTIES
     REQUIRED_FILES "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/fparse-llvm"
-    ENVIRONMENT "SALT_FORTRAN_CONFIG_FILE=${CMAKE_SOURCE_DIR}/config_files/tau_config.yaml"
+    ENVIRONMENT "SALT_FORTRAN_CONFIG_FILE=${CMAKE_SOURCE_DIR}/config_files/tau_config.yaml;SALT_FORTRAN_VERBOSE=1"
     PASS_REGULAR_EXPRESSION "SALT Instrumentor Plugin finished"
   )
 endforeach()
diff --git a/include/dprint.hpp b/include/dprint.hpp
index 8fa4c92..d2aaf80 100644
--- a/include/dprint.hpp
+++ b/include/dprint.hpp
@@ -1,3 +1,14 @@
+#ifndef DPRINT_HPP
+#define DPRINT_HPP
+
+#include "llvm/Support/raw_ostream.h"
+
+namespace salt {
+    void enableVerbose();
+
+    llvm::raw_ostream &verboseStream();
+}
+
 #ifdef DEBUG_NO_WAY
 #define DPRINT(__fmt, ...) printf(__fmt, ##__VA_ARGS__)
 #define DPRINT0(__fmt) printf(__fmt)
@@ -6,3 +17,4 @@
 #define DPRINT0(__fmt)
 #endif
 
+#endif //DPRINT_HPP
diff --git a/include/flang_instrumentation_constants.hpp b/include/flang_instrumentation_constants.hpp
index 67b01ce..2841601 100644
--- a/include/flang_instrumentation_constants.hpp
+++ b/include/flang_instrumentation_constants.hpp
@@ -16,6 +16,9 @@ limitations under the License.
 #ifndef FLANG_INSTRUMENTATION_CONSTANTS_HPP
 #define FLANG_INSTRUMENTATION_CONSTANTS_HPP
 
+// Verbose flag environment variable
+#define SALT_FORTRAN_VERBOSE_VAR "SALT_FORTRAN_VERBOSE"
+
 // Configuration file environment variable
 #define SALT_FORTRAN_CONFIG_FILE_VAR "SALT_FORTRAN_CONFIG_FILE"
 #define SALT_FORTRAN_CONFIG_DEFAULT_PATH "config_files/tau_config.yaml"
diff --git a/src/dprint.cpp b/src/dprint.cpp
new file mode 100644
index 0000000..9d2915c
--- /dev/null
+++ b/src/dprint.cpp
@@ -0,0 +1,29 @@
+/* Copyright (C) 2025, ParaTools, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "dprint.hpp"
+
+static bool verboseEnabled{false};
+
+void salt::enableVerbose() {
+    verboseEnabled = true;
+}
+
+llvm::raw_ostream & salt::verboseStream() {
+    if (verboseEnabled) {
+        return llvm::outs();
+    }
+    return llvm::nulls();
+}
diff --git a/src/flang_salt_instrument_plugin.cpp b/src/flang_salt_instrument_plugin.cpp
index 50e9957..7903b8c 100644
--- a/src/flang_salt_instrument_plugin.cpp
+++ b/src/flang_salt_instrument_plugin.cpp
@@ -34,6 +34,7 @@ limitations under the License.
 #define RYML_SINGLE_HDR_DEFINE_NOW
 #define RYML_SHARED
 
+#include <dprint.hpp>
 #include <ryml_all.hpp>
 
 #include <clang/Basic/SourceLocation.h>
@@ -60,7 +61,6 @@ using namespace Fortran::frontend;
  * Visits each node in the parse tree.
  */
 namespace salt::fortran {
-
     class SaltInstrumentAction final : public PluginParseTreeAction {
         struct SaltInstrumentParseTreeVisitor {
             explicit SaltInstrumentParseTreeVisitor(Fortran::parser::Parsing *parsing,
@@ -200,23 +200,23 @@ namespace salt::fortran {
             }
 
             void Post(const Fortran::parser::MainProgram &) {
-                llvm::outs() << "Exit main program: " << mainProgramName_ << "\n";
+                verboseStream() << "Exit main program: " << mainProgramName_ << "\n";
                 isInMainProgram_ = false;
             }
 
             void Post(const Fortran::parser::ProgramStmt &program) {
                 mainProgramName_ = program.v.ToString();
                 mainProgramLine_ = parsing->allCooked().GetSourcePositionRange(program.v.source)->first.line;
-                llvm::outs() << "Enter main program: " << mainProgramName_ << "\n";
+                verboseStream() << "Enter main program: " << mainProgramName_ << "\n";
             }
 
             bool Pre(const Fortran::parser::SubroutineStmt &subroutineStmt) {
                 const auto &name = std::get<Fortran::parser::Name>(subroutineStmt.t);
                 subprogramName_ = name.ToString();
                 subProgramLine_ = parsing->allCooked().GetSourcePositionRange(name.source)->first.line;
-                llvm::outs() << "Enter Subroutine: " << subprogramName_ << "\n";
+                verboseStream() << "Enter Subroutine: " << subprogramName_ << "\n";
                 if (!shouldInstrumentSubprogram(subprogramName_)) {
-                    llvm::outs() << "Skipping instrumentation of " << subprogramName_ <<
+                    verboseStream() << "Skipping instrumentation of " << subprogramName_ <<
                             " due to selective instrumentation\n";
                     skipInstrumentSubprogram_ = true;
                 }
@@ -224,7 +224,7 @@ namespace salt::fortran {
             }
 
             void Post(const Fortran::parser::SubroutineSubprogram &) {
-                llvm::outs() << "Exit Subroutine: " << subprogramName_ << "\n";
+                verboseStream() << "Exit Subroutine: " << subprogramName_ << "\n";
                 skipInstrumentSubprogram_ = false;
                 subprogramName_.clear();
             }
@@ -233,9 +233,9 @@ namespace salt::fortran {
                 const auto &name = std::get<Fortran::parser::Name>(functionStmt.t);
                 subprogramName_ = name.ToString();
                 subProgramLine_ = parsing->allCooked().GetSourcePositionRange(name.source)->first.line;
-                llvm::outs() << "Enter Function: " << subprogramName_ << "\n";
+                verboseStream() << "Enter Function: " << subprogramName_ << "\n";
                 if (!shouldInstrumentSubprogram(subprogramName_)) {
-                    llvm::outs() << "Skipping instrumentation of " << subprogramName_ <<
+                    verboseStream() << "Skipping instrumentation of " << subprogramName_ <<
                             " due to selective instrumentation\n";
                     skipInstrumentSubprogram_ = true;
                 }
@@ -243,7 +243,7 @@ namespace salt::fortran {
             }
 
             void Post(const Fortran::parser::FunctionSubprogram &) {
-                llvm::outs() << "Exit Function: " << subprogramName_ << "\n";
+                verboseStream() << "Exit Function: " << subprogramName_ << "\n";
                 skipInstrumentSubprogram_ = false;
                 subprogramName_.clear();
                 subProgramLine_ = 0;
@@ -264,7 +264,7 @@ namespace salt::fortran {
 
             void handleExecutionPart(const Fortran::parser::ExecutionPart &executionPart, bool pre) {
                 if (const Fortran::parser::Block &block = executionPart.v; block.empty()) {
-                    llvm::outs() << "WARNING: Execution part empty.\n";
+                    verboseStream() << "WARNING: Execution part empty.\n";
                 } else {
                     const std::optional startLocOpt{getLocation(parsing, block.front(), false)};
                     const std::optional endLocOpt{getLocation(parsing, block.back(), true)};
@@ -308,18 +308,18 @@ namespace salt::fortran {
                         const std::string splitTimerName{ss2.str()};
 
                         if (isInMainProgram_) {
-                            llvm::outs() << "Program begin \"" << mainProgramName_ << "\" at " << startLoc.line << ", "
+                            verboseStream() << "Program begin \"" << mainProgramName_ << "\" at " << startLoc.line << ", "
                                     <<
                                     startLoc.column << "\n";
                             addProgramBeginInstrumentation(startLoc.line, splitTimerName);
                         } else {
-                            llvm::outs() << "Subprogram begin \"" << subprogramName_ << "\" at " << startLoc.line <<
+                            verboseStream() << "Subprogram begin \"" << subprogramName_ << "\" at " << startLoc.line <<
                                     ", " <<
                                     startLoc.column << "\n";
                             addProcedureBeginInstrumentation(startLoc.line, splitTimerName);
                         }
                     } else {
-                        llvm::outs() << "End at " << endLoc.line << ", " << endLoc.column << "\n";
+                        verboseStream() << "End at " << endLoc.line << ", " << endLoc.column << "\n";
                         addProcedureEndInstrumentation(endLoc.line);
                     }
                 }
@@ -334,7 +334,7 @@ namespace salt::fortran {
                         actionStmt->statement.u)) {
                         const std::optional returnPos{locationFromSource(parsing, actionStmt->source, false)};
                         const int returnLine{returnPos.value().line};
-                        llvm::outs() << "Return statement at " << returnLine << "\n";
+                        verboseStream() << "Return statement at " << returnLine << "\n";
                         addReturnStmtInstrumentation(returnLine);
                     }
                 }
@@ -359,7 +359,7 @@ namespace salt::fortran {
                                            source,
                                            true).value()
                     };
-                    llvm::outs() << "If-return, conditional: (" << startPos.line << "," << startPos.column << ") - "
+                    verboseStream() << "If-return, conditional: (" << startPos.line << "," << startPos.column << ") - "
                             << "(" << endPos.line << "," << endPos.column << ")\n";
                     // TODO handle return <value> case
                     // TODO handle multi-line
@@ -416,7 +416,7 @@ namespace salt::fortran {
             int lineNum{0};
             const auto &instPts{visitor.getInstrumentationPoints()};
 
-            llvm::outs() << "Will perform instrumentation:\n" << visitor.dumpInstrumentationPoints();
+            verboseStream() << "Will perform instrumentation:\n" << visitor.dumpInstrumentationPoints();
 
             // Sanity check: are instrumentation points in the right order?
             if (!std::is_sorted(instPts.cbegin(), instPts.cend(), [&](const auto &p1, const auto &p2) {
@@ -598,11 +598,29 @@ namespace salt::fortran {
             return true;
         }
 
+        static void dumpSelectiveRequests() {
+            const auto printStr = [&](const auto &a) { verboseStream() << a << "\n"; };
+            verboseStream() << "File include list:\n";
+            std::for_each(fileincludelist.cbegin(), fileincludelist.cend(), printStr);
+            verboseStream() << "File exclude list:\n";
+            std::for_each(fileexcludelist.cbegin(), fileexcludelist.cend(), printStr);
+            verboseStream() << "Include list:\n";
+            std::for_each(includelist.cbegin(), includelist.cend(), printStr);
+            verboseStream() << "Exclude list:\n";
+            std::for_each(excludelist.cbegin(), excludelist.cend(), printStr);
+        }
+
         /**
          * This is the entry point for the plugin.
          */
         void executeAction() override {
-            llvm::outs() << "==== SALT Instrumentor Plugin starting ====\n";
+            if (const char *val = getenv(SALT_FORTRAN_VERBOSE_VAR)) {
+                if (const std::string verboseFlag{val}; !verboseFlag.empty() && verboseFlag != "0"s) {
+                    enableVerbose();
+                }
+            }
+
+            verboseStream() << "==== SALT Instrumentor Plugin starting ====\n";
 
             // This is the object through which we access the parse tree
             // and the source
@@ -614,7 +632,8 @@ namespace salt::fortran {
                 llvm::errs() << "ERROR: Unable to find input file name!\n";
                 std::exit(-1);
             }
-            llvm::outs() << "Have input file: " << *inputFilePathStr << "\n";
+
+            verboseStream() << "Have input file: " << *inputFilePathStr << "\n";
 
             const std::filesystem::path inputFilePath{inputFilePathStr.value()};
 
@@ -625,15 +644,7 @@ namespace salt::fortran {
 
             if (const auto selectPath{getSelectFilePath()}; selectPath.has_value()) {
                 if (processInstrumentationRequests(selectPath->c_str())) {
-                    const auto printStr = [&](const auto &a) { llvm::outs() << a << "\n"; };
-                    llvm::outs() << "File include list:\n";
-                    std::for_each(fileincludelist.cbegin(), fileincludelist.cend(), printStr);
-                    llvm::outs() << "File exclude list:\n";
-                    std::for_each(fileexcludelist.cbegin(), fileexcludelist.cend(), printStr);
-                    llvm::outs() << "Include list:\n";
-                    std::for_each(includelist.cbegin(), includelist.cend(), printStr);
-                    llvm::outs() << "Exclude list:\n";
-                    std::for_each(excludelist.cbegin(), excludelist.cend(), printStr);
+                    dumpSelectiveRequests();
                 } else {
                     llvm::errs() << "ERROR: Unable to read selective instrumentation file at " << selectPath << "\n";
                     std::exit(-4);
@@ -663,7 +674,7 @@ namespace salt::fortran {
             // so the file is output into the .inst file unchanged.
             bool skipInstrument{false};
             if (!shouldInstrumentFile(inputFilePath)) {
-                llvm::outs() << "Skipping instrumentation of " << inputFilePath
+                verboseStream() << "Skipping instrumentation of " << inputFilePath
                         << " due to selective instrumentation.\n";
                 skipInstrument = true;
             }
@@ -676,10 +687,9 @@ namespace salt::fortran {
 
             outputFileStream->flush();
 
-            llvm::outs() << "==== SALT Instrumentor Plugin finished ====\n";
+            verboseStream() << "==== SALT Instrumentor Plugin finished ====\n";
         }
     };
-
 }
 
 [[maybe_unused]] static FrontendPluginRegistry::Add<salt::fortran::SaltInstrumentAction> X(

From fb0e4e3e89028a855319c64750b00c96be0ab5b0 Mon Sep 17 00:00:00 2001
From: Nicholas Chaimov <nchaimov@paratools.com>
Date: Thu, 16 Jan 2025 16:01:42 -0800
Subject: [PATCH 118/135] Insert #line directives to map back to original
 source

---
 src/flang_salt_instrument_plugin.cpp | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/src/flang_salt_instrument_plugin.cpp b/src/flang_salt_instrument_plugin.cpp
index 7903b8c..01541da 100644
--- a/src/flang_salt_instrument_plugin.cpp
+++ b/src/flang_salt_instrument_plugin.cpp
@@ -308,7 +308,8 @@ namespace salt::fortran {
                         const std::string splitTimerName{ss2.str()};
 
                         if (isInMainProgram_) {
-                            verboseStream() << "Program begin \"" << mainProgramName_ << "\" at " << startLoc.line << ", "
+                            verboseStream() << "Program begin \"" << mainProgramName_ << "\" at " << startLoc.line <<
+                                    ", "
                                     <<
                                     startLoc.column << "\n";
                             addProgramBeginInstrumentation(startLoc.line, splitTimerName);
@@ -404,6 +405,10 @@ namespace salt::fortran {
             return std::nullopt;
         }
 
+        static std::string lineDirective(const int line, const std::string &file) {
+            return "#line " + std::to_string(line) + " \"" + file + "\"";
+        }
+
         static void instrumentFile(const std::string &inputFilePath, llvm::raw_pwrite_stream &outputStream,
                                    const SaltInstrumentParseTreeVisitor &visitor,
                                    const InstrumentationMap &instMap) {
@@ -425,26 +430,39 @@ namespace salt::fortran {
                 DIE("ERROR: Instrumentation points not sorted by line number!\n");
             }
 
+            outputStream << lineDirective(1, inputFilePath) << "\n";
+
             auto instIter{instPts.cbegin()};
             while (std::getline(inputStream, lineText)) {
+                bool lineWasInstrumentedBefore{false};
+                bool lineWasInstrumentedAfter{false};
+                bool shouldOutputLine{true};
+
                 ++lineNum;
 
                 // First, process instrumentation points that come BEFORE this line.
                 while (instIter != instPts.cend() && (*instIter)->line() == lineNum && (*instIter)->location() ==
                        InstrumentationLocation::BEFORE) {
                     outputStream << (*instIter)->instrumentationString(instMap, lineText) << "\n";
+                    lineWasInstrumentedBefore = true;
                     ++instIter;
                 }
 
                 // Then, process instrumentation points that REPLACE this line.
-                bool shouldOutputLine{true};
                 while (instIter != instPts.cend() && (*instIter)->line() == lineNum && (*instIter)->location() ==
                        InstrumentationLocation::REPLACE) {
+                    outputStream << lineDirective(lineNum, inputFilePath) << "\n";
                     outputStream << (*instIter)->instrumentationString(instMap, lineText) << "\n";
+                    lineWasInstrumentedAfter = true;
                     shouldOutputLine = false;
                     ++instIter;
                 }
 
+                // If line was instrumented, output a #line directive
+                if (lineWasInstrumentedBefore) {
+                    outputStream << lineDirective(lineNum, inputFilePath) << "\n";
+                }
+
                 // Output the current line, if not replaced.
                 if (shouldOutputLine) {
                     outputStream << lineText << "\n";
@@ -454,8 +472,14 @@ namespace salt::fortran {
                 while (instIter != instPts.cend() && (*instIter)->line() == lineNum && (*instIter)->location() ==
                        InstrumentationLocation::AFTER) {
                     outputStream << (*instIter)->instrumentationString(instMap, lineText) << "\n";
+                    lineWasInstrumentedAfter = true;
                     ++instIter;
                 }
+
+                // If line was instrumented, output a #line directive
+                if (lineWasInstrumentedAfter) {
+                    outputStream << lineDirective(lineNum+1, inputFilePath) << "\n";
+                }
             }
         }
 

From 32c97560bb3d6dac1e8da360d2445caeb682e7b4 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Fri, 24 Jan 2025 11:01:46 -0500
Subject: [PATCH 119/135] Update cparse-llvm to look for config files relative
 to binary location

---
 CMakeLists.txt     | 41 +++++++++++++++-----------------------
 src/fparse-llvm.in | 30 ++++++++++++++++++++++++++--
 src/frontend.cpp   | 49 ++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 89 insertions(+), 31 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 760b86e..f488aeb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -82,7 +82,7 @@ set(CMAKE_CXX_STANDARD 17)
 #-----------------------------
 # Create the main SALT project
 #-----------------------------
-project(SALT-FM
+project(saltfm
   VERSION "${SALT_VERSION_MAJOR}.${SALT_VERSION_MINOR}"
   DESCRIPTION "An LLVM-based Source Analysis Tookit for HPC"
   HOMEPAGE_URL "https://github.com/ParaToolsInc/salt"
@@ -151,11 +151,6 @@ list(JOIN CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES "\", \"-I" CLANG_HEADER_INCLUDES)
 set(CLANG_HEADER_INCLUDES "\"-I${CLANG_HEADER_INCLUDES}\"")
 message(STATUS "CLANG_HEADER_INCLUDES: ${CLANG_HEADER_INCLUDES}")
 message(STATUS "NUM_CLANG_HEADER_INCLUDES: ${NUM_CLANG_HEADER_INCLUDES}")
-configure_file(
-  "${CMAKE_SOURCE_DIR}/include/clang_header_includes.h.in" "${CMAKE_BINARY_DIR}/include/clang_header_includes.h"
-  @ONLY)
-
-
 
 #------------------------------------------
 # Specify clang and system libraries needed
@@ -206,12 +201,16 @@ set(SALT_HEADER_FILES
 )
 
 list(TRANSFORM SALT_HEADER_FILES PREPEND "${CMAKE_SOURCE_DIR}/include/")
-list(APPEND SALT_HEADER_FILES "${CMAKE_BINARY_DIR}/include/clang_header_includes.h")
-set_source_files_properties(
-  "${CMAKE_BINARY_DIR}/include/clang_header_includes.h"
-  PROPERTIES
-  GENERATED TRUE
-)
+foreach(HEADER clang_header_includes.h frontend.hpp)
+  configure_file(
+    "${CMAKE_SOURCE_DIR}/include/${HEADER}.in" "${CMAKE_BINARY_DIR}/include/${HEADER}" @ONLY)
+  list(APPEND SALT_HEADER_FILES "${CMAKE_BINARY_DIR}/include/${HEADER}")
+  set_source_files_properties(
+    "${CMAKE_BINARY_DIR}/include/${HEADER}"
+    PROPERTIES
+    GENERATED TRUE
+  )
+endforeach()
 
 set(CPARSE_LLVM_SRCS
   frontend.cpp
@@ -245,11 +244,12 @@ set_target_properties(cparse-llvm PROPERTIES
 #------------------------------------------------------
 # Handle config files in build directory & installation
 #------------------------------------------------------
-# Copy ${CMAKE_SOURCE_DIR}/config_files to ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_DATADIR}/saltfm/config_files
-# and install them to ${CMAKE_INSTALL_DATADIR}/saltfm/config_files
+# Copy ${CMAKE_SOURCE_DIR}/config_files to
+# ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_DATADIR}/${CMAKE_PROJECT_NAME}/config_files
+# and install them to ${CMAKE_INSTALL_DATADIR}/${CMAKE_PROJECT_NAME}/config_files
 file(COPY ${CMAKE_SOURCE_DIR}/config_files
-  DESTINATION ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_DATADIR}/saltfm)
-install(DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_DATADIR}/saltfm
+  DESTINATION ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_DATADIR}/${CMAKE_PROJECT_NAME})
+install(DIRECTORY ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_DATADIR}/${CMAKE_PROJECT_NAME}
   DESTINATION ${CMAKE_INSTALL_DATADIR})
 
 #---------------------
@@ -454,14 +454,6 @@ set_tests_properties(salt_parser_smoke_test
   PASS_REGULAR_EXPRESSION "OVERVIEW"
 )
 
-# Test fixtures to copy and cleanup config files
-add_test(NAME setup_inputs
-  COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_SOURCE_DIR}/config_files ./config_files)
-add_test(NAME cleanup_inputs
-  COMMAND ${CMAKE_COMMAND} -E rm -rf ./config_files)
-set_tests_properties(setup_inputs PROPERTIES FIXTURES_SETUP Configs)
-set_tests_properties(cleanup_inputs PROPERTIES FIXTURES_CLEANUP Configs)
-
 # Function to map tests_list to source files and setup parser tests
 function(add_instrumentor_test test_src)
   # This is 1 of 2 functions for adding tests
@@ -484,7 +476,6 @@ function(add_instrumentor_test test_src)
     COMMAND $<TARGET_FILE:cparse-llvm> ${CMAKE_SOURCE_DIR}/tests/${test_src})
   set_tests_properties(${TEST_NAME}
     PROPERTIES
-    FIXTURES_REQUIRED Configs
     REQUIRED_FILES "${CMAKE_SOURCE_DIR}/tests/${test_src}"
     PASS_REGULAR_EXPRESSION "[Ii]nstrumentation:"
   )
diff --git a/src/fparse-llvm.in b/src/fparse-llvm.in
index 4b2dbcd..13239de 100755
--- a/src/fparse-llvm.in
+++ b/src/fparse-llvm.in
@@ -26,10 +26,36 @@ readonly _VERSION=@SALT_VERSION_MAJOR@.@SALT_VERSION_MINOR@
 
 # get the absolute path of this script
 readonly _SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-# Check if the script is being run from the install directory
 
 SALT_PLUGIN_SO="${_SCRIPT_DIR}/../@CMAKE_INSTALL_LIBDIR@/${_SALTFM_PLUGIN_SO}"
-FORTRAN_CONFIG_FILE="${_SCRIPT_DIR}/../@CMAKE_INSTALL_DATADIR@/saltfm/config_files/tau_config.yaml"
+FORTRAN_CONFIG_FILE="${_SCRIPT_DIR}/../@CMAKE_INSTALL_DATADIR@/@CMAKE_PROJECT_NAME@/config_files/config.yaml"
+
+read -r -d '' _USAGE <<EOF || true
+OVERVIEW: Tool for adding TAU instrumentation to source files.
+Note that this will only instrument the first source file given.
+USAGE: $0 [options] <source0> [... <sourceN>]
+
+OPTIONS:
+
+Generic Options:
+
+  --help                       - Display available options (--help-hidden for more)
+  --help-list                  - Display list of available options (--help-list-hidden for more)
+  --version                    - Display the version of this program
+
+TAU instrumentor options:
+To pass options directly to the compiler, use
+        tooling [options] <source> -- [compiler options]
+
+  --config_file=<filename>     - Specify path to SALT configuration YAML file
+  --extra-arg=<string>         - Additional argument to append to the compiler command line
+  --extra-arg-before=<string>  - Additional argument to prepend to the compiler command line
+  -p <string>                  - Build path
+  --tau_instrument_inline      - Instrument inlined functions (default: false)
+  --tau_output=<filename>      - Specify name of output instrumented file
+  --tau_select_file=<filename> - Provide a selective instrumentation specification file
+  --tau_use_cxx_api            - Use TAU's C++ instrumentation API
+EOF
 
 # Add a help/usage message function
 function usage {
diff --git a/src/frontend.cpp b/src/frontend.cpp
index f6a2a1c..ebc5f27 100644
--- a/src/frontend.cpp
+++ b/src/frontend.cpp
@@ -1,4 +1,10 @@
+#ifdef __linux__
+#include <unistd.h>
+#elif __APPLE__
+#include <mach-o/dyld.h>
+#endif
 #include "instrumentor.hpp"
+#include "frontend.hpp"
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTTypeTraits.h"
@@ -26,11 +32,46 @@ using namespace clang;
 llvm::cl::opt<std::string> outputfile("tau_output", llvm::cl::desc("Specify name of output instrumented file"),
                                       llvm::cl::value_desc("filename"), llvm::cl::cat(MyToolCategory));
 
+std::string getExecutablePath() {
+    char buffer[1024];
+    std::string path;
 
-std::string getEnvCfgFile()
-{
-    char * val = getenv( "SALT_CONFIG_FILE" );
-    return val == NULL ? std::string("config_files/config.yaml") : std::string(val);
+    #ifdef __linux__
+    ssize_t len = readlink("/proc/self/exe", buffer, sizeof(buffer) - 1);
+    if (len != -1) {
+        buffer[len] = '\0';
+        path = std::string(buffer);
+    }
+    #elif __APPLE__
+    uint32_t size = sizeof(buffer);
+    if (_NSGetExecutablePath(buffer, &size) == 0) {
+        path = std::string(buffer);
+    } else {
+        char *pathBuffer = new char[size];
+        if (_NSGetExecutablePath(pathBuffer, &size) == 0) {
+            path = std::string(pathBuffer);
+        }
+        delete[] pathBuffer;
+    }
+    #endif
+
+    // Remove the executable name from the path
+    size_t pos = path.find_last_of("/\\");
+    if (pos != std::string::npos) {
+        path = path.substr(0, pos);
+    }
+
+    return path;
+}
+
+std::string getEnvCfgFile() {
+    char *val = getenv("SALT_CONFIG_FILE");
+    if (val == NULL) {
+        std::string execPath = getExecutablePath();
+        return execPath + "/" + SALT_DEFAULT_CONFIG_FILE;
+    } else {
+        return std::string(val);
+    }
 }
 
 

From cf508a597b0826ec712e130e5708945ecc20e253 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Fri, 24 Jan 2025 16:14:41 -0500
Subject: [PATCH 120/135] Add configured header file

---
 include/frontend.hpp.in | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 include/frontend.hpp.in

diff --git a/include/frontend.hpp.in b/include/frontend.hpp.in
new file mode 100644
index 0000000..18fe9d0
--- /dev/null
+++ b/include/frontend.hpp.in
@@ -0,0 +1,2 @@
+// Define constants needed for the frontend
+#define SALT_DEFAULT_CONFIG_FILE "../@CMAKE_INSTALL_DATADIR@/@CMAKE_PROJECT_NAME@/config_files/config.yaml"
\ No newline at end of file

From 08643c38342a14f7ed3f1bf4574f62ee8fc62f18 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Fri, 24 Jan 2025 17:02:24 -0500
Subject: [PATCH 121/135] Update fparse-llvm to match cparse-llvm & use sif

Ensure that the selective isntrumentation file can get passed to the
flang front end plugin and that the arguments to the wrapper script
match those in cparse-llvm (where applicable)
---
 CMakeLists.txt     |  4 ++--
 src/fparse-llvm.in | 52 ++++++++++++++++++----------------------------
 2 files changed, 22 insertions(+), 34 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f488aeb..4a3b61c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -631,11 +631,11 @@ set(FORTRAN_TESTS_SOURCES_LIST
 
 # Add a smoke test of the fparse-llvm script
 add_test(NAME fparse_llvm_smoke_test
-  COMMAND ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/fparse-llvm -h)
+  COMMAND ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/fparse-llvm --help)
 set_tests_properties(fparse_llvm_smoke_test
   PROPERTIES
   LABELS smoke
-  PASS_REGULAR_EXPRESSION "Usage"
+  PASS_REGULAR_EXPRESSION "USAGE"
 )
 
 foreach(test_source IN LISTS FORTRAN_TESTS_SOURCES_LIST)
diff --git a/src/fparse-llvm.in b/src/fparse-llvm.in
index 13239de..7f10b21 100755
--- a/src/fparse-llvm.in
+++ b/src/fparse-llvm.in
@@ -25,7 +25,8 @@ readonly _SALTFM_PLUGIN_SO=libsalt-flang-plugin.so
 readonly _VERSION=@SALT_VERSION_MAJOR@.@SALT_VERSION_MINOR@
 
 # get the absolute path of this script
-readonly _SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+readonly _SCRIPT_DIR
 
 SALT_PLUGIN_SO="${_SCRIPT_DIR}/../@CMAKE_INSTALL_LIBDIR@/${_SALTFM_PLUGIN_SO}"
 FORTRAN_CONFIG_FILE="${_SCRIPT_DIR}/../@CMAKE_INSTALL_DATADIR@/@CMAKE_PROJECT_NAME@/config_files/config.yaml"
@@ -39,33 +40,20 @@ OPTIONS:
 
 Generic Options:
 
-  --help                       - Display available options (--help-hidden for more)
-  --help-list                  - Display list of available options (--help-list-hidden for more)
+  --help                       - Display available options
   --version                    - Display the version of this program
 
 TAU instrumentor options:
-To pass options directly to the compiler, use
-        tooling [options] <source> -- [compiler options]
 
   --config_file=<filename>     - Specify path to SALT configuration YAML file
-  --extra-arg=<string>         - Additional argument to append to the compiler command line
-  --extra-arg-before=<string>  - Additional argument to prepend to the compiler command line
-  -p <string>                  - Build path
-  --tau_instrument_inline      - Instrument inlined functions (default: false)
   --tau_output=<filename>      - Specify name of output instrumented file
   --tau_select_file=<filename> - Provide a selective instrumentation specification file
-  --tau_use_cxx_api            - Use TAU's C++ instrumentation API
+  --show                       - Print the command line that would be executed by the wrapper script
 EOF
 
 # Add a help/usage message function
 function usage {
-    echo "Usage: $0 [-h] [-o output_file] [-show] input_file [args]"
-    echo "         -h: print this help message and exit"    
-    echo "         --tau_output=output_file: specify the output file name"
-    echo "         -show: print the command line without running it"
-    echo "          input_file: the Fortran source file to parse and instrument"
-    echo "          args: additional arguments to pass to the flang compiler (include flags etc.)"
-
+    echo "$_USAGE"
 }
 
 if [[ $# -eq 0 ]]; then
@@ -166,38 +154,35 @@ done
 #echo "Passed command line arguments: $*"
 
 args=()
-original_args=("$@")
-expecting_output_file=false
 expecting_arg_to_forward=false
 show=false
 for arg in "$@"; do
     #echo "working on arg: $arg"
-    if [[ $arg == -h ]]; then
+    if [[ $arg == --help ]]; then
         usage
         exit 0
-    elif $expecting_output_file; then
-        output_file="$arg"
-        expecting_output_file=false
-        shift || true
-        #echo "args remaining: $*"
+    elif [[ $arg == --version ]]; then
+        echo "SALT FM Version: ${_VERSION}"
+        flang-new -version
+        exit 0
     elif $expecting_arg_to_forward; then
         args+=("$arg")
         expecting_arg_to_forward=false
         shift
         #echo "args remaining: $*"
-    elif [[ $arg == --tau_output ]]; then
-        shift
-        expecting_output_file=true
-        #echo "args remaining: $*"
     elif [[ $arg == --tau_output=* ]]; then
         output_file="${arg#--tau_output=}"
         shift || true
         #echo "args remaining: $*"
+    elif [[ $arg == --tau_select_file=* ]]; then
+        select_file="${arg#--tau_select_file=}"
+        shift || true
+        #echo "args remaining: $*"
     elif [[ $arg == *.[Ff]90 || $arg == *.[Ff] || $arg == *.[Ff]03 ]]; then
         input_file="$arg"
         shift || true
         #echo "args remaining: $*"
-    elif [[ $arg == -show ]]; then
+    elif [[ $arg == --show ]]; then
         show=true
         shift || true
         #echo "args remaining: $*"
@@ -214,7 +199,7 @@ for arg in "$@"; do
         if [[ "${arg:-}" == -Werror ]]; then
             args+=("$arg")
         fi
-        # Flang frontend oesn't (yet) support -Wall, -Wextra, etc. only -Werror, so throw others away
+        # Flang frontend doesn't (yet) support -Wall, -Wextra, etc. only -Werror, so throw others away
         shift || true
         #echo "Added whitelisted warning flag: $arg"
     elif [[ ${arg:-} =~ ${_WHITELISTED_REGEX} ]]; then
@@ -256,6 +241,7 @@ if [[ -z "${output_file:-}" ]]; then
     fi
     
 fi
+
 echo "output file: ${output_file:-\"<None given>\" }"
 echo "Remaining Arguments: ${args[*]}"
 
@@ -269,10 +255,12 @@ cmd=(flang-new
     "${args[@]}")
 if $show; then
     echo "SALT_FORTRAN_CONFIG_FILE=\"${FORTRAN_CONFIG_FILE}\""
+    echo "SALT_FORTRAN_SELECT_FILE=\"${select_file:-}\""
     echo "cmd: ${cmd[*]}"
 else
     echo "SALT_FORTRAN_CONFIG_FILE=\"${FORTRAN_CONFIG_FILE}\""
+    echo "SALT_FORTRAN_SELECT_FILE=\"${select_file:-}\""
     echo "Running: ${cmd[*]}"
-    SALT_FORTRAN_CONFIG_FILE="${FORTRAN_CONFIG_FILE}" "${cmd[@]}"
+    SALT_FORTRAN_SELECT_FILE="${select_file:-}" SALT_FORTRAN_CONFIG_FILE="${FORTRAN_CONFIG_FILE}" "${cmd[@]}"
     exit $?
 fi

From 94be054151768cf04f06a5fa88ea8905e661b20f Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Fri, 24 Jan 2025 17:15:11 -0500
Subject: [PATCH 122/135] Add back alias for --help

---
 src/fparse-llvm.in | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/fparse-llvm.in b/src/fparse-llvm.in
index 7f10b21..27f5274 100755
--- a/src/fparse-llvm.in
+++ b/src/fparse-llvm.in
@@ -40,6 +40,7 @@ OPTIONS:
 
 Generic Options:
 
+  -h                           - Alias for --help
   --help                       - Display available options
   --version                    - Display the version of this program
 
@@ -158,7 +159,7 @@ expecting_arg_to_forward=false
 show=false
 for arg in "$@"; do
     #echo "working on arg: $arg"
-    if [[ $arg == --help ]]; then
+    if [[ $arg == --help || $arg == -h ]]; then
         usage
         exit 0
     elif [[ $arg == --version ]]; then

From 4a927f33384e667c1381ca2e6ea51ddea7d0976f Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Mon, 27 Jan 2025 15:36:03 -0500
Subject: [PATCH 123/135] Add wrapper script to delegate to instrumentor based
 on language

Saltfm now will decide whether to call cparse-llvm or fparse-llvm to
instrument the code.
---
 CMakeLists.txt     |  12 ++--
 src/fparse-llvm.in |  14 +++--
 src/saltfm.in      | 150 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 167 insertions(+), 9 deletions(-)
 create mode 100755 src/saltfm.in

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4a3b61c..71cb91e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -328,6 +328,10 @@ else()
     message(STATUS "Flang not found -- skipping Flang frontend plugin")
 endif()
 
+configure_file(${CMAKE_SOURCE_DIR}/src/saltfm.in ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/saltfm @ONLY)
+install(PROGRAMS ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/saltfm
+  TYPE BIN) # TYPE BIN installs into CMAKE_INSTALL_BINDIR
+
 #---------------------
 # Find TAU locations for testing
 #---------------------
@@ -473,7 +477,7 @@ function(add_instrumentor_test test_src)
     set(TEST_NAME ${ARGV1})
   endif()
   add_test(NAME ${TEST_NAME}
-    COMMAND $<TARGET_FILE:cparse-llvm> ${CMAKE_SOURCE_DIR}/tests/${test_src})
+    COMMAND ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/saltfm ${CMAKE_SOURCE_DIR}/tests/${test_src})
   set_tests_properties(${TEST_NAME}
     PROPERTIES
     REQUIRED_FILES "${CMAKE_SOURCE_DIR}/tests/${test_src}"
@@ -536,7 +540,7 @@ function(compile_instrumented test_src)
   endif()
 
   set(test_path ${CMAKE_SOURCE_DIR}/tests/${TEST_BASE_NAME}.${TEST_LANG})
-  set(TAUC_OPTS -optVerbose -optSaltInst -optSaltParser=$<TARGET_FILE:cparse-llvm> -optSaltConfigFile=${CMAKE_SOURCE_DIR}/config_files/tau_config.yaml)
+  set(TAUC_OPTS -optVerbose -optSaltInst -optSaltParser=${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/saltfm -optSaltConfigFile=${CMAKE_SOURCE_DIR}/config_files/tau_config.yaml)
   set(compile_opts ${TAU_COMPILE_OPTIONS})
   foreach(comp IN LISTS compilers_to_test)
     set(lower_comp ${comp})
@@ -640,11 +644,11 @@ set_tests_properties(fparse_llvm_smoke_test
 
 foreach(test_source IN LISTS FORTRAN_TESTS_SOURCES_LIST)
   add_test(NAME instrument_${test_source}
-    COMMAND ./${CMAKE_INSTALL_BINDIR}/fparse-llvm ${CMAKE_SOURCE_DIR}/tests/fortran/${test_source}
+    COMMAND ./${CMAKE_INSTALL_BINDIR}/saltfm ${CMAKE_SOURCE_DIR}/tests/fortran/${test_source}
     )
   set_tests_properties(instrument_${test_source}
     PROPERTIES
-    REQUIRED_FILES "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/fparse-llvm"
+    REQUIRED_FILES "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/fparse-llvm;${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/saltfm"
     ENVIRONMENT "SALT_FORTRAN_CONFIG_FILE=${CMAKE_SOURCE_DIR}/config_files/tau_config.yaml;SALT_FORTRAN_VERBOSE=1"
     PASS_REGULAR_EXPRESSION "SALT Instrumentor Plugin finished"
   )
diff --git a/src/fparse-llvm.in b/src/fparse-llvm.in
index 27f5274..231eebc 100755
--- a/src/fparse-llvm.in
+++ b/src/fparse-llvm.in
@@ -164,7 +164,7 @@ for arg in "$@"; do
         exit 0
     elif [[ $arg == --version ]]; then
         echo "SALT FM Version: ${_VERSION}"
-        flang-new -version
+        flang-new --version
         exit 0
     elif $expecting_arg_to_forward; then
         args+=("$arg")
@@ -221,20 +221,24 @@ done
 #echo "args: \"${args[*]}\""
 # print the argument list
 if [[ -z "${input_file:-}" ]]; then
-    input_file="${args[0]}"
-    args=("${args[@]:1}")
+    if [[ ${#args[@]} -gt 0 ]]; then
+        # An input file has not been recognized.
+        # Try to take the first argument as the input file
+        input_file="${args[0]}"
+        args=("${args[@]:1}")
+    fi
 fi
 
 echo "input file: ${input_file:-\"<None given>\" }"
 
 # If no output file is given, emit the output file in the current working directory
 if [[ -z "${output_file:-}" ]]; then
-    if [[ ${input_file} == *.* ]]; then
+    if [[ ${input_file:-none} == *.* ]]; then
       file_ext=".${input_file##*.}"
     else
         file_ext=""
     fi
-    if [[ "${input_file}" == */* ]]; then
+    if [[ "${input_file:-none}" == */* ]]; then
         output_file="${input_file%.*}.inst${file_ext//f/F}"
         output_file="$(pwd)/${output_file##*/}"
     else
diff --git a/src/saltfm.in b/src/saltfm.in
new file mode 100755
index 0000000..6f62b57
--- /dev/null
+++ b/src/saltfm.in
@@ -0,0 +1,150 @@
+#!/usr/bin/env bash
+# Copyright (C) 2025, ParaTools, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script determines whether to call the SALTFM C/C++ or Fortran instrumentor
+# based on the file extension of the input source file or the --lang option.
+
+set -o errexit
+set -o nounset
+set -o pipefail
+#set -o verbose
+set -o xtrace
+
+readonly _VERSION=@SALT_VERSION_MAJOR@.@SALT_VERSION_MINOR@
+# get the absolute path of this script
+_SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+readonly _SCRIPT_DIR
+
+read -r -d '' _USAGE <<EOF || true
+OVERVIEW: Tool for adding TAU instrumentation to source files.
+Note that this will only instrument the first source file given.
+USAGE: $0 [options] <source0> [... <sourceN>]
+
+OPTIONS:
+
+Generic SALTFM Options:
+
+  -h                           - Alias for --help
+  --help                       - Display available options (--help-hidden for more)
+  --help-fortran               - Display Fortran instrumentor options and usage information
+  --help-c                     - Display C/C++ instrumentor options and usage information
+  --help-cxx                   - Display C/C++ instrumentor options and usage information
+  --version                    - Display the version of this program
+  --lang=<string>              - Specify the language of the input source file (default: auto-detect)
+  --show                       - Print the command line that would be executed by the outter wrapper script
+
+TAU instrumentor options:
+
+  --config_file=<filename>     - Specify path to SALT configuration YAML file
+  --tau_instrument_inline      - Instrument inlined functions (default: false)
+  --tau_output=<filename>      - Specify name of output instrumented file
+  --tau_select_file=<filename> - Provide a selective instrumentation specification file
+  --tau_use_cxx_api            - Use TAU's C++ instrumentation API
+EOF
+
+# Add a help/usage message function
+function usage {
+    echo "$_USAGE"
+}
+
+if [[ $# -eq 0 ]]; then
+    usage
+    exit 1
+fi
+
+# Reconstruct argument list to forward to tool
+args=()
+for arg in "$@"; do
+    #echo "working on arg: $arg"
+    if [[ $arg == --lang=* ]]; then
+        lang="${arg#--lang=}"
+        case "${lang}" in
+            fortran|Fortran)
+                force_lang=fortran
+                tool=fparse-llvm
+            ;;
+            c|C|cxx|CXX|c++|C++)
+                force_lang=c
+                tool=cparse-llvm
+            ;;
+            *)
+                echo "Invalid language specified: $lang"
+                exit 1
+            ;;
+        esac
+    elif [[ $arg == --help || $arg == -h ]]; then
+        if [[ -n "${force_lang:-}" ]]; then
+            "${_SCRIPT_DIR}/${tool}" --help
+        else
+            usage
+        fi
+        exit 0
+    elif [[ $arg == --help-fortran ]]; then
+        "${_SCRIPT_DIR}/fparse-llvm" --help
+        exit 0
+    elif [[ $arg == --help-c || $arg == --help-cxx ]]; then
+        "${_SCRIPT_DIR}/cparse-llvm" --help
+        exit 0
+    elif [[ $arg == --version ]]; then
+        echo "SALT FM Version: ${_VERSION}"
+        if [[ -n "${tool:-}" ]]; then
+            "${_SCRIPT_DIR}/${tool}" --version
+        fi
+        exit 0
+    elif [[ $arg == *.[Ff]90 || $arg == *.[Ff] || $arg == *.[Ff]03 ]]; then
+        args+=("$arg")
+        if [[ -z "${tool:-}" ]]; then
+            tool=fparse-llvm
+        fi
+    elif [[ $arg == *.[cC] || $arg == *.[cC][cC] || $arg == *.cpp || $arg == *.CPP ]]; then
+        args+=("$arg")
+        if [[ -z "${tool:-}" ]]; then
+            tool=cparse-llvm
+        fi
+    elif [[ $arg == --show ]]; then
+        show=true
+    else
+        args+=("$arg")
+    fi
+done
+
+if [[ -z "${tool:-}" ]]; then
+    echo "No source files provided, language not detected."
+    exit 1
+fi
+
+# Check if the tool exists
+if [[ ! -f "${_SCRIPT_DIR}/${tool}" ]]; then
+    echo "Tool not found: ${_SCRIPT_DIR}/${tool}"
+    exit 1
+fi
+
+# Print the command line that would be executed by the outter wrapper script
+# if the --show flag is set
+if [[ "${show:-false}" == "true" ]]; then
+    if [[ "${tool}" == "fparse-llvm" ]]; then
+        echo "$tool --show ${args[*]}"
+        # fparse-llvm is also a wrapper script with a --show flag
+        # so print the command line that would be executed by the inner wrapper script
+        # if the language is Fortran
+        "${_SCRIPT_DIR}/fparse-llvm" --show "${args[@]}"
+    else
+        echo "$tool ${args[*]}"
+    fi
+    exit 0
+fi
+
+# Execute the tool with the arguments
+"${_SCRIPT_DIR}/${tool}" "${args[@]}"

From afc037c5b3d2fdcce83297f696ff1749d666489b Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Mon, 27 Jan 2025 16:02:09 -0500
Subject: [PATCH 124/135] Upgrade rapidyaml (ryml) to 0.7.2

---
 include/ryml_all.hpp                 | 37464 ++++++++++++++++---------
 src/flang_salt_instrument_plugin.cpp |    22 +-
 2 files changed, 24055 insertions(+), 13431 deletions(-)

diff --git a/include/ryml_all.hpp b/include/ryml_all.hpp
index 4ee0046..c611a0b 100644
--- a/include/ryml_all.hpp
+++ b/include/ryml_all.hpp
@@ -10,14 +10,25 @@
 // This is an amalgamated single-header version of the library.
 //
 // INSTRUCTIONS:
-//   - Include at will in any header of your project
-//   - In one (and only one) of your project source files,
-//     #define RYML_SINGLE_HDR_DEFINE_NOW and then include this header.
-//     This will enable the function and class definitions in
-//     the header file.
-//   - To compile into a shared library, just define the
-//     preprocessor symbol RYML_SHARED . This will take
-//     care of symbol export/import.
+//
+//   - Include at will in any header of your project. Because the
+//     amalgamated header file is large, to speed up compilation of
+//     your project, protect the include with its include guard
+//     `_RYML_SINGLE_HEADER_AMALGAMATED_HPP_`, ie like this:
+//     ```
+//     #ifndef _RYML_SINGLE_HEADER_AMALGAMATED_HPP_
+//     #include <ryml_all.hpp>
+//     #endif
+//     ```
+//
+//   - In one (and only one) of your project source files, #define
+//     RYML_SINGLE_HDR_DEFINE_NOW and then include this header. This will enable
+//     the function and class definitions in the header file.
+//
+//   - To compile into a shared library, define the preprocessor symbol
+//     RYML_SHARED before including the header. This will take care of
+//     symbol export/import.
+//
 //
 
 
@@ -343,20 +354,22 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c);
 #   else
 #       error "Unknown Apple platform"
 #   endif
-#elif defined(__linux)
+#elif defined(__linux__) || defined(__linux)
 #   define C4_UNIX
 #   define C4_LINUX
-#elif defined(__unix)
+#elif defined(__unix__) || defined(__unix)
 #   define C4_UNIX
 #elif defined(__arm__) || defined(__aarch64__)
 #   define C4_ARM
+#elif defined(__xtensa__) || defined(__XTENSA__)
+#   define C4_XTENSA
 #elif defined(SWIG)
 #   define C4_SWIG
 #else
 #   error "unknown platform"
 #endif
 
-#if defined(__posix) || defined(__unix__) || defined(__linux)
+#if defined(__posix) || defined(C4_UNIX) || defined(C4_LINUX)
 #   define C4_POSIX
 #endif
 
@@ -387,96 +400,100 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c);
 // see http://code.qt.io/cgit/qt/qtbase.git/tree/src/corelib/global/qprocessordetection.h
 
 #ifdef __ORDER_LITTLE_ENDIAN__
-    #define _C4EL __ORDER_LITTLE_ENDIAN__
+#   define _C4EL __ORDER_LITTLE_ENDIAN__
 #else
-    #define _C4EL 1234
+#   define _C4EL 1234
 #endif
 
 #ifdef __ORDER_BIG_ENDIAN__
-    #define _C4EB __ORDER_BIG_ENDIAN__
+#   define _C4EB __ORDER_BIG_ENDIAN__
 #else
-    #define _C4EB 4321
+#   define _C4EB 4321
 #endif
 
 // mixed byte order (eg, PowerPC or ia64)
 #define _C4EM 1111
 
 #if defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64)
-    #define C4_CPU_X86_64
-    #define C4_WORDSIZE 8
-    #define C4_BYTE_ORDER _C4EL
+#    define C4_CPU_X86_64
+#    define C4_WORDSIZE 8
+#    define C4_BYTE_ORDER _C4EL
 
 #elif defined(__i386) || defined(__i386__) || defined(_M_IX86)
-    #define C4_CPU_X86
-    #define C4_WORDSIZE 4
-    #define C4_BYTE_ORDER _C4EL
+#    define C4_CPU_X86
+#    define C4_WORDSIZE 4
+#    define C4_BYTE_ORDER _C4EL
 
 #elif defined(__arm__) || defined(_M_ARM) \
     || defined(__TARGET_ARCH_ARM) || defined(__aarch64__) || defined(_M_ARM64)
-   #if defined(__aarch64__) || defined(_M_ARM64)
-       #define C4_CPU_ARM64
-       #define C4_CPU_ARMV8
-       #define C4_WORDSIZE 8
-   #else
-       #define C4_CPU_ARM
-       #define C4_WORDSIZE 4
-       #if defined(__ARM_ARCH_8__) || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 8)
-           #define C4_CPU_ARMV8
-       #elif defined(__ARM_ARCH_7__) || defined(_ARM_ARCH_7)    \
+#   if defined(__aarch64__) || defined(_M_ARM64)
+#       define C4_CPU_ARM64
+#       define C4_CPU_ARMV8
+#       define C4_WORDSIZE 8
+#   else
+#       define C4_CPU_ARM
+#       define C4_WORDSIZE 4
+#       if defined(__ARM_ARCH_8__) || defined(__ARM_ARCH_8A__)  \
+        || (defined(__ARCH_ARM) && __ARCH_ARM >= 8) \
+        || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 8)
+#           define C4_CPU_ARMV8
+#       elif defined(__ARM_ARCH_7__) || defined(_ARM_ARCH_7)    \
         || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) \
         || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) \
+        || defined(__ARM_ARCH_7EM__) \
         || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 7) \
         || (defined(_M_ARM) && _M_ARM >= 7)
-           #define C4_CPU_ARMV7
-       #elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+#           define C4_CPU_ARMV7
+#       elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
         || defined(__ARM_ARCH_6T2__) || defined(__ARM_ARCH_6Z__) \
         || defined(__ARM_ARCH_6K__)  || defined(__ARM_ARCH_6ZK__) \
-        || defined(__ARM_ARCH_6M__) \
+        || defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_6KZ__) \
         || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 6)
-           #define C4_CPU_ARMV6
-       #elif defined(__ARM_ARCH_5TEJ__) \
+#           define C4_CPU_ARMV6
+#       elif defined(__ARM_ARCH_5TEJ__) \
+        || defined(__ARM_ARCH_5TE__) \
         || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 5)
-           #define C4_CPU_ARMV5
-       #elif defined(__ARM_ARCH_4T__) \
+#           define C4_CPU_ARMV5
+#       elif defined(__ARM_ARCH_4T__) \
         || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 4)
-           #define C4_CPU_ARMV4
-       #else
-           #error "unknown CPU architecture: ARM"
-       #endif
-   #endif
-   #if defined(__ARMEL__) || defined(__LITTLE_ENDIAN__) || defined(__AARCH64EL__) \
+#           define C4_CPU_ARMV4
+#       else
+#           error "unknown CPU architecture: ARM"
+#       endif
+#   endif
+#   if defined(__ARMEL__) || defined(__LITTLE_ENDIAN__) || defined(__AARCH64EL__) \
        || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) \
        || defined(_MSC_VER) // winarm64 does not provide any of the above macros,
                             // but advises little-endianess:
                             // https://docs.microsoft.com/en-us/cpp/build/overview-of-arm-abi-conventions?view=msvc-170
                             // So if it is visual studio compiling, we'll assume little endian.
-       #define C4_BYTE_ORDER _C4EL
-   #elif defined(__ARMEB__) || defined(__BIG_ENDIAN__) || defined(__AARCH64EB__) \
+#       define C4_BYTE_ORDER _C4EL
+#   elif defined(__ARMEB__) || defined(__BIG_ENDIAN__) || defined(__AARCH64EB__) \
        || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))
-       #define C4_BYTE_ORDER _C4EB
-   #elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_PDP_ENDIAN__)
-       #define C4_BYTE_ORDER _C4EM
-   #else
-       #error "unknown endianness"
-   #endif
+#       define C4_BYTE_ORDER _C4EB
+#   elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_PDP_ENDIAN__)
+#       define C4_BYTE_ORDER _C4EM
+#   else
+#       error "unknown endianness"
+#   endif
 
 #elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
-   #define C4_CPU_IA64
-   #define C4_WORDSIZE 8
-   #define C4_BYTE_ORDER _C4EM
+#   define C4_CPU_IA64
+#   define C4_WORDSIZE 8
+#   define C4_BYTE_ORDER _C4EM
    // itanium is bi-endian - check byte order below
 
 #elif defined(__ppc__) || defined(__ppc) || defined(__powerpc__)       \
     || defined(_ARCH_COM) || defined(_ARCH_PWR) || defined(_ARCH_PPC)  \
     || defined(_M_MPPC) || defined(_M_PPC)
-   #if defined(__ppc64__) || defined(__powerpc64__) || defined(__64BIT__)
-       #define C4_CPU_PPC64
-       #define C4_WORDSIZE 8
-   #else
-       #define C4_CPU_PPC
-       #define C4_WORDSIZE 4
-   #endif
-   #define C4_BYTE_ORDER _C4EM
+#   if defined(__ppc64__) || defined(__powerpc64__) || defined(__64BIT__)
+#       define C4_CPU_PPC64
+#       define C4_WORDSIZE 8
+#   else
+#       define C4_CPU_PPC
+#       define C4_WORDSIZE 4
+#   endif
+#   define C4_BYTE_ORDER _C4EM
    // ppc is bi-endian - check byte order below
 
 #elif defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH_)
@@ -484,25 +501,45 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c);
 #   define C4_WORDSIZE 8
 #   define C4_BYTE_ORDER _C4EB
 
+#elif defined(__xtensa__) || defined(__XTENSA__)
+#   define C4_CPU_XTENSA
+#   define C4_WORDSIZE 4
+// not sure about this...
+#   if defined(__XTENSA_EL__) || defined(__xtensa_el__)
+#       define C4_BYTE_ORDER _C4EL
+#   else
+#       define C4_BYTE_ORDER _C4EB
+#   endif
+
 #elif defined(__riscv)
-   #if __riscv_xlen == 64
-       #define C4_CPU_RISCV64
-       #define C4_WORDSIZE 8
-   #else
-       #define C4_CPU_RISCV32
-       #define C4_WORDSIZE 4
-   #endif
-   #define C4_BYTE_ORDER _C4EL
+#   if __riscv_xlen == 64
+#       define C4_CPU_RISCV64
+#       define C4_WORDSIZE 8
+#   else
+#       define C4_CPU_RISCV32
+#       define C4_WORDSIZE 4
+#   endif
+#   define C4_BYTE_ORDER _C4EL
 
 #elif defined(__EMSCRIPTEN__)
 #   define C4_BYTE_ORDER _C4EL
 #   define C4_WORDSIZE 4
 
+#elif defined(__loongarch__)
+#   if defined(__loongarch64)
+#       define C4_CPU_LOONGARCH64
+#       define C4_WORDSIZE 8
+#   else
+#       define C4_CPU_LOONGARCH
+#       define C4_WORDSIZE 4
+#   endif
+#   define C4_BYTE_ORDER _C4EL
+
 #elif defined(SWIG)
-   #error "please define CPU architecture macros when compiling with swig"
+#   error "please define CPU architecture macros when compiling with swig"
 
 #else
-   #error "unknown CPU architecture"
+#   error "unknown CPU architecture"
 #endif
 
 #define C4_LITTLE_ENDIAN (C4_BYTE_ORDER == _C4EL)
@@ -514,6 +551,101 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c);
 
 // (end https://github.com/biojppm/c4core/src/c4/cpu.hpp)
 
+// (amalgamate) these includes are needed to work around
+// conditional includes in the gcc4.8 shim
+#include <cstdint>
+#include <type_traits>
+#include <cstring>
+
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/gcc-4.8.hpp
+// https://github.com/biojppm/c4core/src/c4/gcc-4.8.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifndef _C4_GCC_4_8_HPP_
+#define _C4_GCC_4_8_HPP_
+
+#if __GNUC__ == 4 && __GNUC_MINOR__ >= 8
+/* STL polyfills for old GNU compilers */
+
+_Pragma("GCC diagnostic ignored \"-Wshadow\"")
+_Pragma("GCC diagnostic ignored \"-Wmissing-field-initializers\"")
+
+#if __cplusplus
+//included above:
+//#include <cstdint>
+//included above:
+//#include <type_traits>
+
+namespace std {
+
+template<typename _Tp>
+struct is_trivially_copyable : public integral_constant<bool,
+    is_destructible<_Tp>::value && __has_trivial_destructor(_Tp) &&
+    (__has_trivial_constructor(_Tp) || __has_trivial_copy(_Tp) || __has_trivial_assign(_Tp))>
+{ };
+
+template<typename _Tp>
+using is_trivially_copy_constructible = has_trivial_copy_constructor<_Tp>;
+
+template<typename _Tp>
+using is_trivially_default_constructible = has_trivial_default_constructor<_Tp>;
+
+template<typename _Tp>
+using is_trivially_copy_assignable = has_trivial_copy_assign<_Tp>;
+
+/* not supported */
+template<typename _Tp>
+struct is_trivially_move_constructible : false_type
+{ };
+
+/* not supported */
+template<typename _Tp>
+struct is_trivially_move_assignable : false_type
+{ };
+
+inline void *align(size_t __align, size_t __size, void*& __ptr, size_t& __space) noexcept
+{
+    if (__space < __size)
+        return nullptr;
+    const auto __intptr = reinterpret_cast<uintptr_t>(__ptr);
+    const auto __aligned = (__intptr - 1u + __align) & -__align;
+    const auto __diff = __aligned - __intptr;
+    if (__diff > (__space - __size))
+        return nullptr;
+    else
+    {
+        __space -= __diff;
+        return __ptr = reinterpret_cast<void*>(__aligned);
+    }
+}
+
+#if __GNUC__ == 4 && __GNUC_MINOR__ == 8
+typedef long double max_align_t ;
+#endif
+
+}
+#else // __cplusplus
+
+//included above:
+//#include <string.h>
+// see https://sourceware.org/bugzilla/show_bug.cgi?id=25399 (ubuntu gcc-4.8)
+#define memset(s, c, count) __builtin_memset(s, c, count)
+
+#endif // __cplusplus
+
+#endif // __GNUC__ == 4 && __GNUC_MINOR__ >= 8
+
+#endif // _C4_GCC_4_8_HPP_
+
+
+// (end https://github.com/biojppm/c4core/src/c4/gcc-4.8.hpp)
+
 
 
 //********************************************************************************
@@ -555,7 +687,7 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c);
 /** @see http://sourceforge.net/p/predef/wiki/Compilers/ for a list of compiler identifier macros */
 /** @see https://msdn.microsoft.com/en-us/library/b0084kay.aspx for VS2013 predefined macros */
 
-#if defined(_MSC_VER)// && (defined(C4_WIN) || defined(C4_XBOX) || defined(C4_UE4))
+#if defined(_MSC_VER) && !defined(__clang__)
 #   define C4_MSVC
 #   define C4_MSVC_VERSION_2022 17
 #   define C4_MSVC_VERSION_2019 16
@@ -567,7 +699,7 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c);
 #       define C4_MSVC_VERSION C4_MSVC_VERSION_2022  // visual studio 2022
 #       define C4_MSVC_2022
 #   elif _MSC_VER >= 1920
-#       define C4_MSVC_VERSION C_4MSVC_VERSION_2019  // visual studio 2019
+#       define C4_MSVC_VERSION C4_MSVC_VERSION_2019  // visual studio 2019
 #       define C4_MSVC_2019
 #   elif _MSC_VER >= 1910
 #       define C4_MSVC_VERSION C4_MSVC_VERSION_2017  // visual studio 2017
@@ -624,6 +756,9 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c);
 #           define C4_CLANG_VERSION __apple_build_version__
 #       endif
 #   elif defined(__GNUC__)
+#       ifdef __MINGW32__
+#           define C4_MINGW
+#       endif
 #       define C4_GCC
 #       if defined(__GNUC_PATCHLEVEL__)
 #           define C4_GCC_VERSION C4_VERSION_ENCODED(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)
@@ -656,98 +791,6 @@ C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c);
 
 // (end https://github.com/biojppm/c4core/src/c4/compiler.hpp)
 
-// these includes are needed to work around conditional
-// includes in the gcc4.8 shim
-#include <cstdint>
-#include <type_traits>
-#include <cstring>
-
-
-
-
-//********************************************************************************
-//--------------------------------------------------------------------------------
-// cmake/compat/c4/gcc-4.8.hpp
-// https://github.com/biojppm/c4core/cmake/compat/c4/gcc-4.8.hpp
-//--------------------------------------------------------------------------------
-//********************************************************************************
-
-#ifndef _C4_COMPAT_GCC_4_8_HPP_
-#define _C4_COMPAT_GCC_4_8_HPP_
-
-#if __GNUC__ == 4 && __GNUC_MINOR__ >= 8
-/* STL polyfills for old GNU compilers */
-
-_Pragma("GCC diagnostic ignored \"-Wshadow\"")
-_Pragma("GCC diagnostic ignored \"-Wmissing-field-initializers\"")
-
-#if __cplusplus
-//included above:
-//#include <cstdint>
-//included above:
-//#include <type_traits>
-
-namespace std {
-
-template<typename _Tp>
-struct is_trivially_copyable : public integral_constant<bool,
-    is_destructible<_Tp>::value && __has_trivial_destructor(_Tp) &&
-    (__has_trivial_constructor(_Tp) || __has_trivial_copy(_Tp) || __has_trivial_assign(_Tp))>
-{ };
-
-template<typename _Tp>
-using is_trivially_copy_constructible = has_trivial_copy_constructor<_Tp>;
-
-template<typename _Tp>
-using is_trivially_default_constructible = has_trivial_default_constructor<_Tp>;
-
-template<typename _Tp>
-using is_trivially_copy_assignable = has_trivial_copy_assign<_Tp>;
-
-/* not supported */
-template<typename _Tp>
-struct is_trivially_move_constructible : false_type
-{ };
-
-/* not supported */
-template<typename _Tp>
-struct is_trivially_move_assignable : false_type
-{ };
-
-inline void *align(size_t __align, size_t __size, void*& __ptr, size_t& __space) noexcept
-{
-    if (__space < __size)
-        return nullptr;
-    const auto __intptr = reinterpret_cast<uintptr_t>(__ptr);
-    const auto __aligned = (__intptr - 1u + __align) & -__align;
-    const auto __diff = __aligned - __intptr;
-    if (__diff > (__space - __size))
-        return nullptr;
-    else
-    {
-        __space -= __diff;
-        return __ptr = reinterpret_cast<void*>(__aligned);
-    }
-}
-typedef long double max_align_t ;
-
-}
-#else // __cplusplus
-
-//included above:
-//#include <string.h>
-// see https://sourceware.org/bugzilla/show_bug.cgi?id=25399 (ubuntu gcc-4.8)
-#define memset(s, c, count) __builtin_memset(s, c, count)
-
-#endif // __cplusplus
-
-#endif // __GNUC__ == 4 && __GNUC_MINOR__ >= 8
-
-#endif // _C4_COMPAT_GCC_4_8_HPP_
-
-
-// (end https://github.com/biojppm/c4core/cmake/compat/c4/gcc-4.8.hpp)
-
 
 
 //********************************************************************************
@@ -783,8 +826,8 @@ typedef long double max_align_t ;
 /* Detect C++ standard.
  * @see http://stackoverflow.com/a/7132549/5875572 */
 #ifndef C4_CPP
-#   ifdef _MSC_VER
-#       if _MSC_VER >= 1910  // >VS2015: VS2017, VS2019
+#   if defined(_MSC_VER) && !defined(__clang__)
+#       if _MSC_VER >= 1910  // >VS2015: VS2017, VS2019, VS2022
 #           if (!defined(_MSVC_LANG))
 #               error _MSVC not defined
 #           endif
@@ -881,33 +924,27 @@ typedef long double max_align_t ;
 #endif
 
 /** lifted from this answer: http://stackoverflow.com/a/20170989/5875572 */
-#ifndef _MSC_VER
-#  if __cplusplus < 201103
+#if defined(_MSC_VER) && !defined(__clang__)
+#  if _MSC_VER < 1900
 #    define C4_CONSTEXPR11
 #    define C4_CONSTEXPR14
-//#    define C4_NOEXCEPT
-#  elif __cplusplus == 201103
+#  elif _MSC_VER < 2000
 #    define C4_CONSTEXPR11 constexpr
 #    define C4_CONSTEXPR14
-//#    define C4_NOEXCEPT noexcept
 #  else
 #    define C4_CONSTEXPR11 constexpr
 #    define C4_CONSTEXPR14 constexpr
-//#    define C4_NOEXCEPT noexcept
 #  endif
-#else  // _MSC_VER
-#  if _MSC_VER < 1900
+#else
+#  if __cplusplus < 201103
 #    define C4_CONSTEXPR11
 #    define C4_CONSTEXPR14
-//#    define C4_NOEXCEPT
-#  elif _MSC_VER < 2000
+#  elif __cplusplus == 201103
 #    define C4_CONSTEXPR11 constexpr
 #    define C4_CONSTEXPR14
-//#    define C4_NOEXCEPT noexcept
 #  else
 #    define C4_CONSTEXPR11 constexpr
 #    define C4_CONSTEXPR14 constexpr
-//#    define C4_NOEXCEPT noexcept
 #  endif
 #endif  // _MSC_VER
 
@@ -920,6 +957,42 @@ typedef long double max_align_t ;
 #define C4_INLINE_CONSTEXPR inline constexpr
 #endif
 
+#if defined(_MSC_VER) && !defined(__clang__)
+#  if (defined(_CPPUNWIND) && (_CPPUNWIND == 1))
+#    define C4_EXCEPTIONS
+#  endif
+#else
+#  if defined(__EXCEPTIONS) || defined(__cpp_exceptions)
+#    define C4_EXCEPTIONS
+#  endif
+#endif
+
+#ifdef C4_EXCEPTIONS
+#  define C4_IF_EXCEPTIONS_(exc_code, setjmp_code) exc_code
+#  define C4_IF_EXCEPTIONS(exc_code, setjmp_code) do { exc_code } while(0)
+#else
+#  define C4_IF_EXCEPTIONS_(exc_code, setjmp_code) setjmp_code
+#  define C4_IF_EXCEPTIONS(exc_code, setjmp_code) do { setjmp_code } while(0)
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#  if defined(_CPPRTTI)
+#    define C4_RTTI
+#  endif
+#else
+#  if defined(__GXX_RTTI)
+#    define C4_RTTI
+#  endif
+#endif
+
+#ifdef C4_RTTI
+#  define C4_IF_RTTI_(code_rtti, code_no_rtti) code_rtti
+#  define C4_IF_RTTI(code_rtti, code_no_rtti) do { code_rtti } while(0)
+#else
+#  define C4_IF_RTTI_(code_rtti, code_no_rtti) code_no_rtti
+#  define C4_IF_RTTI(code_rtti, code_no_rtti) do { code_no_rtti } while(0)
+#endif
+
 
 //------------------------------------------------------------
 
@@ -938,7 +1011,7 @@ typedef long double max_align_t ;
 //------------------------------------------------------------
 
 #ifndef C4_API
-#   if defined(_MSC_VER)
+#   if defined(_MSC_VER) && !defined(__clang__)
 #       if defined(C4_EXPORT)
 #           define C4_API __declspec(dllexport)
 #       elif defined(C4_IMPORT)
@@ -951,7 +1024,33 @@ typedef long double max_align_t ;
 #   endif
 #endif
 
-#ifndef _MSC_VER  ///< @todo assuming gcc-like compiler. check it is actually so.
+#if defined(_MSC_VER) && !defined(__clang__)
+#   define C4_RESTRICT __restrict
+#   define C4_RESTRICT_FN __declspec(restrict)
+#   define C4_NO_INLINE __declspec(noinline)
+#   define C4_ALWAYS_INLINE inline __forceinline
+/** these are not available in VS AFAIK */
+#   define C4_CONST
+#   define C4_PURE
+#   define C4_FLATTEN
+#   define C4_HOT         /** @todo */
+#   define C4_COLD        /** @todo */
+#   define C4_ASSUME(...) __assume(__VA_ARGS__)
+#   define C4_EXPECT(x, y) x /** @todo */
+#   define C4_LIKELY(x)   x
+#   define C4_UNLIKELY(x) x
+#   define C4_UNREACHABLE() _c4_msvc_unreachable()
+#   define C4_ATTR_FORMAT(...) /** */
+#   define C4_NORETURN [[noreturn]]
+#   if _MSC_VER >= 1700 // VS2012
+#       define C4_NODISCARD _Check_return_
+#   else
+#       define C4_NODISCARD
+#   endif
+[[noreturn]] __forceinline void _c4_msvc_unreachable() { __assume(false); } ///< https://stackoverflow.com/questions/60802864/emulating-gccs-builtin-unreachable-in-visual-studio
+#   define C4_UNREACHABLE_AFTER_ERR() /* */
+#else
+    ///< @todo assuming gcc-like compiler. check it is actually so.
 /** for function attributes in GCC,
  * @see https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes */
 /** for __builtin functions in GCC,
@@ -960,6 +1059,8 @@ typedef long double max_align_t ;
 #   define C4_RESTRICT_FN __attribute__((restrict))
 #   define C4_NO_INLINE __attribute__((noinline))
 #   define C4_ALWAYS_INLINE inline __attribute__((always_inline))
+#   define C4_CONST __attribute__((const))
+#   define C4_PURE __attribute__((pure))
 /** force inlining of every callee function */
 #   define C4_FLATTEN __atribute__((flatten))
 /** mark a function as hot, ie as having a visible impact in CPU time
@@ -975,29 +1076,58 @@ typedef long double max_align_t ;
 #   define C4_UNREACHABLE() __builtin_unreachable()
 #   define C4_ATTR_FORMAT(...) //__attribute__((format (__VA_ARGS__))) ///< @see https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes
 #   define C4_NORETURN __attribute__((noreturn))
-#else
-#   define C4_RESTRICT __restrict
-#   define C4_RESTRICT_FN __declspec(restrict)
-#   define C4_NO_INLINE __declspec(noinline)
-#   define C4_ALWAYS_INLINE inline __forceinline
-/** these are not available in VS AFAIK */
-#   define C4_FLATTEN
-#   define C4_HOT         /** @todo */
-#   define C4_COLD        /** @todo */
-#   define C4_EXPECT(x, y) x /** @todo */
-#   define C4_LIKELY(x)   x /** @todo */
-#   define C4_UNLIKELY(x) x /** @todo */
-#   define C4_UNREACHABLE() /** @todo */
-#   define C4_ATTR_FORMAT(...) /** */
-#   define C4_NORETURN /** @todo */
+#   define C4_NODISCARD __attribute__((warn_unused_result))
+#   define C4_UNREACHABLE_AFTER_ERR() C4_UNREACHABLE()
+// C4_ASSUME
+// see https://stackoverflow.com/questions/63493968/reproducing-clangs-builtin-assume-for-gcc
+// preferred option: C++ standard attribute
+#   ifdef __has_cpp_attribute
+#       if __has_cpp_attribute(assume) >= 202207L
+#           define C4_ASSUME(...) [[assume(__VA_ARGS__)]]
+#       endif
+#   endif
+// first fallback: compiler intrinsics/attributes for assumptions
+#   ifndef C4_ASSUME
+#       if defined(__clang__)
+#           define C4_ASSUME(...) __builtin_assume(__VA_ARGS__)
+#       elif defined(__GNUC__)
+#       if __GNUC__ >= 13
+#           define C4_ASSUME(...) __attribute__((__assume__(__VA_ARGS__)))
+#       endif
+#       endif
+#   endif
+// second fallback: possibly evaluating uses of unreachable()
+// Set this to 1 if you want to allow assumptions to possibly evaluate.
+#   ifndef C4_ASSUME_ALLOW_EVAL
+#       define C4_ASSUME_ALLOW_EVAL 0
+#   endif
+#   if !defined(C4_ASSUME) && (C4_ASSUME_ALLOW_EVAL)
+#       define C4_ASSUME(...) do { if (!bool(__VA_ARGS__)) C4_UNREACHABLE(); ) while(0)
+#   endif
+// last fallback: define macro as doing nothing
+#   ifndef C4_ASSUME
+#       define C4_ASSUME(...)
+#   endif
+#endif
+
+
+#if C4_CPP >= 14
+#   define C4_DEPRECATED(msg) [[deprecated(msg)]]
+#else
+#   if defined(_MSC_VER)
+#       define C4_DEPRECATED(msg) __declspec(deprecated(msg))
+#   else // defined(__GNUC__) || defined(__clang__)
+#       define C4_DEPRECATED(msg) __attribute__((deprecated(msg)))
+#   endif
 #endif
 
-#ifndef _MSC_VER
+
+#ifdef _MSC_VER
 #   define C4_FUNC __FUNCTION__
-#   define C4_PRETTY_FUNC __PRETTY_FUNCTION__
+#   define C4_PRETTY_FUNC __FUNCSIG__
 #else /// @todo assuming gcc-like compiler. check it is actually so.
 #   define C4_FUNC __FUNCTION__
-#   define C4_PRETTY_FUNC __FUNCSIG__
+#   define C4_PRETTY_FUNC __PRETTY_FUNCTION__
 #endif
 
 /** prevent compiler warnings about a specific var being unused */
@@ -1027,10 +1157,10 @@ void use_char_pointer(char const volatile*);
 
 /** @def C4_KEEP_EMPTY_LOOP prevent an empty loop from being optimized out.
  * @see http://stackoverflow.com/a/7084193/5875572 */
-#ifndef _MSC_VER
-#   define C4_KEEP_EMPTY_LOOP { asm(""); }
-#else
+#if defined(_MSC_VER) && !defined(__clang__)
 #   define C4_KEEP_EMPTY_LOOP { char c; C4_DONT_OPTIMIZE(c); }
+#else
+#   define C4_KEEP_EMPTY_LOOP { asm(""); }
 #endif
 
 /** @def C4_VA_LIST_REUSE_MUST_COPY
@@ -1119,6 +1249,13 @@ using ssize_t = typename std::make_signed<size_t>::type;
 
 // some tag types
 
+#if !defined(__clang__) && defined(__GNUC__)
+#pragma GCC diagnostic push
+#if __GNUC__ >= 6
+#pragma GCC diagnostic ignored "-Wunused-const-variable"
+#endif
+#endif
+
 /** a tag type for initializing the containers with variadic arguments a la
  * initializer_list, minus the initializer_list overload problems.
  */
@@ -1136,6 +1273,10 @@ struct varargs_t {};
 /** @see with_capacity_t */
 constexpr const varargs_t varargs{};
 
+#if !defined(__clang__) && defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
+
 
 //--------------------------------------------------
 
@@ -1919,7 +2060,7 @@ struct fail_type__ {};
 #endif // _DOXYGEN_
 
 
-#ifdef NDEBUG
+#if defined(NDEBUG) || defined(C4_NO_DEBUG_BREAK)
 #   define C4_DEBUG_BREAK()
 #else
 #   ifdef __clang__
@@ -2044,7 +2185,8 @@ struct ScopedErrorSettings
 /** source location */
 struct srcloc;
 
-C4CORE_EXPORT void handle_error(srcloc s, const char *fmt, ...);
+// watchout: for VS the [[noreturn]] needs to come before other annotations like C4CORE_EXPORT
+[[noreturn]] C4CORE_EXPORT void handle_error(srcloc s, const char *fmt, ...);
 C4CORE_EXPORT void handle_warning(srcloc s, const char *fmt, ...);
 
 
@@ -2213,12 +2355,12 @@ struct srcloc
 // Common error conditions
 
 #define C4_NOT_IMPLEMENTED() C4_ERROR("NOT IMPLEMENTED")
-#define C4_NOT_IMPLEMENTED_MSG(/*msg, */...) C4_ERROR("NOT IMPLEMENTED: " ## __VA_ARGS__)
+#define C4_NOT_IMPLEMENTED_MSG(/*msg, */...) C4_ERROR("NOT IMPLEMENTED: " __VA_ARGS__)
 #define C4_NOT_IMPLEMENTED_IF(condition) do { if(C4_UNLIKELY(condition)) { C4_ERROR("NOT IMPLEMENTED"); } } while(0)
-#define C4_NOT_IMPLEMENTED_IF_MSG(condition, /*msg, */...) do { if(C4_UNLIKELY(condition)) { C4_ERROR("NOT IMPLEMENTED: " ## __VA_ARGS__); } } while(0)
+#define C4_NOT_IMPLEMENTED_IF_MSG(condition, /*msg, */...) do { if(C4_UNLIKELY(condition)) { C4_ERROR("NOT IMPLEMENTED: " __VA_ARGS__); } } while(0)
 
 #define C4_NEVER_REACH() do { C4_ERROR("never reach this point"); C4_UNREACHABLE(); } while(0)
-#define C4_NEVER_REACH_MSG(/*msg, */...) do { C4_ERROR("never reach this point: " ## __VA_ARGS__); C4_UNREACHABLE(); } while(0)
+#define C4_NEVER_REACH_MSG(/*msg, */...) do { C4_ERROR("never reach this point: " __VA_ARGS__); C4_UNREACHABLE(); } while(0)
 
 
 
@@ -2226,19 +2368,17 @@ struct srcloc
 // helpers for warning suppression
 // idea adapted from https://github.com/onqtam/doctest/
 
+// TODO: add C4_MESSAGE() https://stackoverflow.com/questions/18252351/custom-preprocessor-macro-for-a-conditional-pragma-message-xxx?rq=1
+
 
 #ifdef C4_MSVC
 #define C4_SUPPRESS_WARNING_MSVC_PUSH __pragma(warning(push))
 #define C4_SUPPRESS_WARNING_MSVC(w)  __pragma(warning(disable : w))
 #define C4_SUPPRESS_WARNING_MSVC_POP __pragma(warning(pop))
-#define C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(w)   \
-    C4_SUPPRESS_WARNING_MSVC_PUSH               \
-    C4_SUPPRESS_WARNING_MSVC(w)
 #else // C4_MSVC
 #define C4_SUPPRESS_WARNING_MSVC_PUSH
 #define C4_SUPPRESS_WARNING_MSVC(w)
 #define C4_SUPPRESS_WARNING_MSVC_POP
-#define C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(w)
 #endif // C4_MSVC
 
 
@@ -2247,14 +2387,10 @@ struct srcloc
 #define C4_SUPPRESS_WARNING_CLANG_PUSH _Pragma("clang diagnostic push")
 #define C4_SUPPRESS_WARNING_CLANG(w) C4_PRAGMA_TO_STR(clang diagnostic ignored w)
 #define C4_SUPPRESS_WARNING_CLANG_POP _Pragma("clang diagnostic pop")
-#define C4_SUPPRESS_WARNING_CLANG_WITH_PUSH(w)  \
-    C4_SUPPRESS_WARNING_CLANG_PUSH              \
-    C4_SUPPRESS_WARNING_CLANG(w)
 #else // C4_CLANG
 #define C4_SUPPRESS_WARNING_CLANG_PUSH
 #define C4_SUPPRESS_WARNING_CLANG(w)
 #define C4_SUPPRESS_WARNING_CLANG_POP
-#define C4_SUPPRESS_WARNING_CLANG_WITH_PUSH(w)
 #endif // C4_CLANG
 
 
@@ -2263,17 +2399,26 @@ struct srcloc
 #define C4_SUPPRESS_WARNING_GCC_PUSH _Pragma("GCC diagnostic push")
 #define C4_SUPPRESS_WARNING_GCC(w) C4_PRAGMA_TO_STR(GCC diagnostic ignored w)
 #define C4_SUPPRESS_WARNING_GCC_POP _Pragma("GCC diagnostic pop")
-#define C4_SUPPRESS_WARNING_GCC_WITH_PUSH(w)    \
-    C4_SUPPRESS_WARNING_GCC_PUSH                \
-    C4_SUPPRESS_WARNING_GCC(w)
 #else // C4_GCC
 #define C4_SUPPRESS_WARNING_GCC_PUSH
 #define C4_SUPPRESS_WARNING_GCC(w)
 #define C4_SUPPRESS_WARNING_GCC_POP
-#define C4_SUPPRESS_WARNING_GCC_WITH_PUSH(w)
 #endif // C4_GCC
 
 
+#define C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(w)   \
+    C4_SUPPRESS_WARNING_MSVC_PUSH               \
+    C4_SUPPRESS_WARNING_MSVC(w)
+
+#define C4_SUPPRESS_WARNING_CLANG_WITH_PUSH(w)  \
+    C4_SUPPRESS_WARNING_CLANG_PUSH              \
+    C4_SUPPRESS_WARNING_CLANG(w)
+
+#define C4_SUPPRESS_WARNING_GCC_WITH_PUSH(w)    \
+    C4_SUPPRESS_WARNING_GCC_PUSH                \
+    C4_SUPPRESS_WARNING_GCC(w)
+
+
 #define C4_SUPPRESS_WARNING_GCC_CLANG_PUSH \
     C4_SUPPRESS_WARNING_GCC_PUSH     \
     C4_SUPPRESS_WARNING_CLANG_PUSH
@@ -2347,7 +2492,7 @@ struct srcloc
 //included above:
 //#include <string.h>
 
-#if (defined(__GNUC__) && __GNUC_MAJOR >= 10) || defined(__has_builtin)
+#if (defined(__GNUC__) && __GNUC__ >= 10) || defined(__has_builtin)
 #define _C4_USE_LSB_INTRINSIC(which) __has_builtin(which)
 #define _C4_USE_MSB_INTRINSIC(which) __has_builtin(which)
 #elif defined(C4_MSVC)
@@ -2364,6 +2509,8 @@ struct srcloc
 
 namespace c4 {
 
+C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast")
+
 /** set the given memory to zero */
 C4_ALWAYS_INLINE void mem_zero(void* mem, size_t num_bytes)
 {
@@ -2382,7 +2529,11 @@ C4_ALWAYS_INLINE void mem_zero(T* mem)
     memset(mem, 0, sizeof(T));
 }
 
-bool mem_overlaps(void const* a, void const* b, size_t sza, size_t szb);
+C4_ALWAYS_INLINE C4_CONST bool mem_overlaps(void const* a, void const* b, size_t sza, size_t szb)
+{
+    // thanks @timwynants
+    return (((const char*)b + szb) > a && b < ((const char*)a+sza));
+}
 
 void mem_repeat(void* dest, void const* pattern, size_t pattern_size, size_t num_times);
 
@@ -2392,9 +2543,9 @@ void mem_repeat(void* dest, void const* pattern, size_t pattern_size, size_t num
 //-----------------------------------------------------------------------------
 
 template<class T>
-bool is_aligned(T *ptr, size_t alignment=alignof(T))
+C4_ALWAYS_INLINE C4_CONST bool is_aligned(T *ptr, uintptr_t alignment=alignof(T))
 {
-    return (uintptr_t(ptr) & (alignment - 1)) == 0u;
+    return (uintptr_t(ptr) & (alignment - uintptr_t(1))) == uintptr_t(0);
 }
 
 
@@ -2793,6 +2944,118 @@ struct msb11
 #undef _C4_USE_LSB_INTRINSIC
 #undef _C4_USE_MSB_INTRINSIC
 
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
+// there is an implicit conversion below; it happens when E or B are
+// narrower than int, and thus any operation will upcast the result to
+// int, and then downcast to assign
+C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wconversion")
+
+/** integer power; this function is constexpr-14 because of the local
+ * variables */
+template<class B, class E>
+C4_CONSTEXPR14 C4_CONST auto ipow(B base, E exponent) noexcept -> typename std::enable_if<std::is_signed<E>::value, B>::type
+{
+    C4_STATIC_ASSERT(std::is_integral<E>::value);
+    B r = B(1);
+    if(exponent >= 0)
+    {
+        for(E e = 0; e < exponent; ++e)
+            r *= base;
+    }
+    else
+    {
+        exponent *= E(-1);
+        for(E e = 0; e < exponent; ++e)
+            r /= base;
+    }
+    return r;
+}
+
+/** integer power; this function is constexpr-14 because of the local
+ * variables */
+template<class B, B base, class E>
+C4_CONSTEXPR14 C4_CONST auto ipow(E exponent) noexcept -> typename std::enable_if<std::is_signed<E>::value, B>::type
+{
+    C4_STATIC_ASSERT(std::is_integral<E>::value);
+    B r = B(1);
+    if(exponent >= 0)
+    {
+        for(E e = 0; e < exponent; ++e)
+            r *= base;
+    }
+    else
+    {
+        exponent *= E(-1);
+        for(E e = 0; e < exponent; ++e)
+            r /= base;
+    }
+    return r;
+}
+
+/** integer power; this function is constexpr-14 because of the local
+ * variables */
+template<class B, class Base, Base base, class E>
+C4_CONSTEXPR14 C4_CONST auto ipow(E exponent) noexcept -> typename std::enable_if<std::is_signed<E>::value, B>::type
+{
+    C4_STATIC_ASSERT(std::is_integral<E>::value);
+    B r = B(1);
+    B bbase = B(base);
+    if(exponent >= 0)
+    {
+        for(E e = 0; e < exponent; ++e)
+            r *= bbase;
+    }
+    else
+    {
+        exponent *= E(-1);
+        for(E e = 0; e < exponent; ++e)
+            r /= bbase;
+    }
+    return r;
+}
+
+/** integer power; this function is constexpr-14 because of the local
+ * variables */
+template<class B, class E>
+C4_CONSTEXPR14 C4_CONST auto ipow(B base, E exponent) noexcept -> typename std::enable_if<!std::is_signed<E>::value, B>::type
+{
+    C4_STATIC_ASSERT(std::is_integral<E>::value);
+    B r = B(1);
+    for(E e = 0; e < exponent; ++e)
+        r *= base;
+    return r;
+}
+
+/** integer power; this function is constexpr-14 because of the local
+ * variables */
+template<class B, B base, class E>
+C4_CONSTEXPR14 C4_CONST auto ipow(E exponent) noexcept -> typename std::enable_if<!std::is_signed<E>::value, B>::type
+{
+    C4_STATIC_ASSERT(std::is_integral<E>::value);
+    B r = B(1);
+    for(E e = 0; e < exponent; ++e)
+        r *= base;
+    return r;
+}
+/** integer power; this function is constexpr-14 because of the local
+ * variables */
+template<class B, class Base, Base base, class E>
+C4_CONSTEXPR14 C4_CONST auto ipow(E exponent) noexcept -> typename std::enable_if<!std::is_signed<E>::value, B>::type
+{
+    C4_STATIC_ASSERT(std::is_integral<E>::value);
+    B r = B(1);
+    B bbase = B(base);
+    for(E e = 0; e < exponent; ++e)
+        r *= bbase;
+    return r;
+}
+
+C4_SUPPRESS_WARNING_GCC_CLANG_POP
+
+
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
@@ -2990,6 +3253,8 @@ struct tight_pair<First, Second, tpc_second_empty> : public Second
 template<class First, class Second>
 using tight_pair = detail::tight_pair<First, Second, detail::tpc_which_case<First,Second>()>;
 
+C4_SUPPRESS_WARNING_GCC_CLANG_POP
+
 } // namespace c4
 
 #endif /* _C4_MEMORY_UTIL_HPP_ */
@@ -3641,6 +3906,8 @@ struct ScopedMemoryResourceCounts
 
 namespace c4 {
 
+C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast")
+
 /** default-construct an object, trivial version */
 template <class U> C4_ALWAYS_INLINE typename std::enable_if<std::is_trivially_default_constructible<U>::value, void>::type
 construct(U *ptr) noexcept
@@ -4082,9 +4349,9 @@ destroy_room(U *dst, U const* src, I n, I room, I pos)
     }
 }
 
-} // namespace c4
+C4_SUPPRESS_WARNING_GCC_CLANG_POP
 
-#undef _C4REQUIRE
+} // namespace c4
 
 #endif /* _C4_CTOR_DTOR_HPP_ */
 
@@ -4139,6 +4406,8 @@ destroy_room(U *dst, U const* src, I n, I room, I pos)
 
 namespace c4 {
 
+C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast")
+
 namespace detail {
 template<class T> inline size_t size_for      (size_t num_objs) noexcept { return num_objs * sizeof(T); }
 template<       > inline size_t size_for<void>(size_t num_objs) noexcept { return num_objs;             }
@@ -4515,6 +4784,8 @@ template<class T, size_t N=16, size_t Alignment=alignof(T)> using small_allocato
 /** @ingroup allocators */
 template<class T, size_t N=16, size_t Alignment=alignof(T)> using small_allocator_mr = SmallAllocator<T, N, Alignment, MemRes>;
 
+C4_SUPPRESS_WARNING_GCC_CLANG_POP
+
 } // namespace c4
 
 #endif /* _C4_ALLOCATOR_HPP_ */
@@ -4786,6 +5057,8 @@ C4_CONSTEXPR14 inline size_t hash_bytes(const char (&str)[N]) noexcept
 
 namespace c4 {
 
+C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast")
+
 /** @todo this would be so much easier with calls to numeric_limits::max()... */
 template<class SizeOut, class SizeIn>
 struct is_narrower_size : std::conditional
@@ -4827,7 +5100,7 @@ szconv(SizeIn sz) noexcept
 template<class SizeOut, class SizeIn>
 C4_ALWAYS_INLINE
 typename std::enable_if<is_narrower_size<SizeOut, SizeIn>::value, SizeOut>::type
-szconv(SizeIn sz) C4_NOEXCEPT_X
+szconv(SizeIn sz)
 {
     C4_XASSERT(sz >= 0);
     C4_XASSERT_MSG((SizeIn)sz <= (SizeIn)std::numeric_limits<SizeOut>::max(), "size conversion overflow: in=%zu", (size_t)sz);
@@ -4835,6 +5108,8 @@ szconv(SizeIn sz) C4_NOEXCEPT_X
     return szo;
 }
 
+C4_SUPPRESS_WARNING_GCC_CLANG_POP
+
 } // namespace c4
 
 #endif /* _C4_SZCONV_HPP_ */
@@ -4874,32 +5149,49 @@ szconv(SizeIn sz) C4_NOEXCEPT_X
 
 namespace c4 {
 
+template<class T>
+struct blob_;
+
+namespace detail {
+template<class T> struct is_blob_type : std::integral_constant<bool, false> {};
+template<class T> struct is_blob_type<blob_<T>> : std::integral_constant<bool, true> {};
+template<class T> struct is_blob_value_type : std::integral_constant<bool, (std::is_fundamental<T>::value || std::is_trivially_copyable<T>::value)> {};
+} // namespace
+
 template<class T>
 struct blob_
 {
+    static_assert(std::is_same<T, byte>::value || std::is_same<T, cbyte>::value, "must be either byte or cbyte");
+    static_assert(sizeof(T) == 1u, "must be either byte or cbyte");
+
+public:
+
     T *    buf;
     size_t len;
 
-    C4_ALWAYS_INLINE blob_() noexcept : buf(), len() {}
+public:
 
+    C4_ALWAYS_INLINE blob_() noexcept = default;
     C4_ALWAYS_INLINE blob_(blob_ const& that) noexcept = default;
     C4_ALWAYS_INLINE blob_(blob_     && that) noexcept = default;
     C4_ALWAYS_INLINE blob_& operator=(blob_     && that) noexcept = default;
     C4_ALWAYS_INLINE blob_& operator=(blob_ const& that) noexcept = default;
 
-    // need to sfinae out copy constructors! (why? isn't the above sufficient?)
-    #define _C4_REQUIRE_NOT_SAME class=typename std::enable_if<( ! std::is_same<U, blob_>::value) && ( ! std::is_pointer<U>::value), T>::type
-    template<class U, _C4_REQUIRE_NOT_SAME> C4_ALWAYS_INLINE blob_(U &var) noexcept : buf(reinterpret_cast<T*>(&var)), len(sizeof(U)) {}
-    template<class U, _C4_REQUIRE_NOT_SAME> C4_ALWAYS_INLINE blob_& operator= (U &var) noexcept { buf = reinterpret_cast<T*>(&var); len = sizeof(U); return *this; }
-    #undef _C4_REQUIRE_NOT_SAME
-
-    template<class U, size_t N> C4_ALWAYS_INLINE blob_(U (&arr)[N]) noexcept : buf(reinterpret_cast<T*>(arr)), len(sizeof(U) * N) {}
-    template<class U, size_t N> C4_ALWAYS_INLINE blob_& operator= (U (&arr)[N]) noexcept { buf = reinterpret_cast<T*>(arr); len = sizeof(U) * N; return *this; }
+    template<class U, class=typename std::enable_if<std::is_const<T>::value && std::is_same<typename std::add_const<U>::type, T>::value, U>::type> C4_ALWAYS_INLINE blob_(blob_<U> const& that) noexcept : buf(that.buf), len(that.len) {}
+    template<class U, class=typename std::enable_if<std::is_const<T>::value && std::is_same<typename std::add_const<U>::type, T>::value, U>::type> C4_ALWAYS_INLINE blob_(blob_<U>     && that) noexcept : buf(that.buf), len(that.len) {}
+    template<class U, class=typename std::enable_if<std::is_const<T>::value && std::is_same<typename std::add_const<U>::type, T>::value, U>::type> C4_ALWAYS_INLINE blob_& operator=(blob_<U>     && that) noexcept { buf = that.buf; len = that.len; }
+    template<class U, class=typename std::enable_if<std::is_const<T>::value && std::is_same<typename std::add_const<U>::type, T>::value, U>::type> C4_ALWAYS_INLINE blob_& operator=(blob_<U> const& that) noexcept { buf = that.buf; len = that.len; }
 
-    template<class U>
-    C4_ALWAYS_INLINE blob_(U          *ptr, size_t n) noexcept : buf(reinterpret_cast<T*>(ptr)), len(sizeof(U) * n) { C4_ASSERT(is_aligned(ptr)); }
     C4_ALWAYS_INLINE blob_(void       *ptr, size_t n) noexcept : buf(reinterpret_cast<T*>(ptr)), len(n) {}
     C4_ALWAYS_INLINE blob_(void const *ptr, size_t n) noexcept : buf(reinterpret_cast<T*>(ptr)), len(n) {}
+
+    #define _C4_REQUIRE_BLOBTYPE(ty) class=typename std::enable_if<((!detail::is_blob_type<ty>::value) && (detail::is_blob_value_type<ty>::value)), T>::type
+    template<class U, _C4_REQUIRE_BLOBTYPE(U)> C4_ALWAYS_INLINE blob_(U &var) noexcept : buf(reinterpret_cast<T*>(&var)), len(sizeof(U)) {}
+    template<class U, _C4_REQUIRE_BLOBTYPE(U)> C4_ALWAYS_INLINE blob_(U *ptr, size_t n) noexcept : buf(reinterpret_cast<T*>(ptr)), len(sizeof(U) * n) { C4_ASSERT(is_aligned(ptr)); }
+    template<class U, _C4_REQUIRE_BLOBTYPE(U)> C4_ALWAYS_INLINE blob_& operator= (U &var) noexcept { buf = reinterpret_cast<T*>(&var); len = sizeof(U); return *this; }
+    template<class U, size_t N, _C4_REQUIRE_BLOBTYPE(U)> C4_ALWAYS_INLINE blob_(U (&arr)[N]) noexcept : buf(reinterpret_cast<T*>(arr)), len(sizeof(U) * N) {}
+    template<class U, size_t N, _C4_REQUIRE_BLOBTYPE(U)> C4_ALWAYS_INLINE blob_& operator= (U (&arr)[N]) noexcept { buf = reinterpret_cast<T*>(arr); len = sizeof(U) * N; return *this; }
+    #undef _C4_REQUIRE_BLOBTYPE
 };
 
 /** an immutable binary blob */
@@ -4997,22 +5289,26 @@ using substr = C4CORE_EXPORT basic_substring<char>;
 
 #ifdef __clang__
 #   pragma clang diagnostic push
+#   pragma clang diagnostic ignored "-Wold-style-cast"
 #elif defined(__GNUC__)
 #   pragma GCC diagnostic push
 #   pragma GCC diagnostic ignored "-Wtype-limits" // disable warnings on size_t>=0, used heavily in assertions below. These assertions are a preparation step for providing the index type as a template parameter.
 #   pragma GCC diagnostic ignored "-Wuseless-cast"
+#   pragma GCC diagnostic ignored "-Wold-style-cast"
 #endif
 
 
 namespace c4 {
 
+/** @defgroup doc_substr Substring: read/write string views
+ * @{ */
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
+/** @cond dev */
 namespace detail {
-
 template<typename C>
 static inline void _do_reverse(C *C4_RESTRICT first, C *C4_RESTRICT last)
 {
@@ -5023,36 +5319,33 @@ static inline void _do_reverse(C *C4_RESTRICT first, C *C4_RESTRICT last)
         *first++ = tmp;
     }
 }
-
 } // namespace detail
-
+/** @endcond */
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
+/** @cond dev */
 // utility macros to deuglify SFINAE code; undefined after the class.
 // https://stackoverflow.com/questions/43051882/how-to-disable-a-class-member-funrtion-for-certain-template-types
 #define C4_REQUIRE_RW(ret_type) \
     template <typename U=C> \
     typename std::enable_if< ! std::is_const<U>::value, ret_type>::type
-// non-const-to-const
-#define C4_NC2C(ty) \
-    typename std::enable_if<std::is_const<C>::value && ( ! std::is_const<ty>::value), ty>::type
+/** @endcond */
 
 
 /** a non-owning string-view, consisting of a character pointer
  * and a length.
  *
  * @note The pointer is explicitly restricted.
- * @note Because of a C++ limitation, there cannot coexist overloads for
- * constructing from a char[N] and a char*; the latter will always be chosen
- * by the compiler. To construct an object of this type, call to_substr() or
- * to_csubstr(). For a more detailed explanation on why the overloads cannot
- * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html
  *
- * @see to_substr()
- * @see to_csubstr()
+ * @see a [quickstart
+ * sample](https://rapidyaml.readthedocs.io/latest/doxygen/group__doc__quickstart.html#ga43e253da0692c13967019446809c1113)
+ * in rapidyaml's documentation.
+ *
+ * @see @ref substr and @ref to_substr()
+ * @see @ref csubstr and @ref to_csubstr()
  */
 template<class C>
 struct C4CORE_EXPORT basic_substring
@@ -5084,7 +5377,11 @@ struct C4CORE_EXPORT basic_substring
     enum : size_t { npos = (size_t)-1, NONE = (size_t)-1 };
 
     /// convert automatically to substring of const C
-    operator ro_substr () const { ro_substr s(str, len); return s; }
+    template<class U=C>
+    C4_ALWAYS_INLINE operator typename std::enable_if<!std::is_const<U>::value, ro_substr const&>::type () const noexcept
+    {
+        return *(ro_substr const*)this; // don't call the str+len ctor because it does a check
+    }
 
     /** @} */
 
@@ -5093,15 +5390,17 @@ struct C4CORE_EXPORT basic_substring
     /** @name Default construction and assignment */
     /** @{ */
 
-    constexpr basic_substring() : str(nullptr), len(0) {}
+    C4_ALWAYS_INLINE constexpr basic_substring() noexcept : str(), len() {}
+
+    C4_ALWAYS_INLINE basic_substring(basic_substring const&) noexcept = default;
+    C4_ALWAYS_INLINE basic_substring(basic_substring     &&) noexcept = default;
+    C4_ALWAYS_INLINE basic_substring(std::nullptr_t) noexcept : str(nullptr), len(0) {}
 
-    constexpr basic_substring(basic_substring const&) = default;
-    constexpr basic_substring(basic_substring     &&) = default;
-    constexpr basic_substring(std::nullptr_t) : str(nullptr), len(0) {}
+    C4_ALWAYS_INLINE basic_substring& operator= (basic_substring const&) noexcept = default;
+    C4_ALWAYS_INLINE basic_substring& operator= (basic_substring     &&) noexcept = default;
+    C4_ALWAYS_INLINE basic_substring& operator= (std::nullptr_t) noexcept { str = nullptr; len = 0; return *this; }
 
-    basic_substring& operator= (basic_substring const&) = default;
-    basic_substring& operator= (basic_substring     &&) = default;
-    basic_substring& operator= (std::nullptr_t) { str = nullptr; len = 0; return *this; }
+    C4_ALWAYS_INLINE void clear() noexcept { str = nullptr; len = 0; }
 
     /** @} */
 
@@ -5110,62 +5409,60 @@ struct C4CORE_EXPORT basic_substring
     /** @name Construction and assignment from characters with the same type */
     /** @{ */
 
-    //basic_substring(C *s_) : str(s_), len(s_ ? strlen(s_) : 0) {}
-    /** the overload for receiving a single C* pointer will always
-     * hide the array[N] overload. So it is disabled. If you want to
-     * construct a substr from a single pointer containing a C-style string,
-     * you can call c4::to_substr()/c4::to_csubstr().
-     * @see c4::to_substr()
-     * @see c4::to_csubstr() */
+    /** Construct from an array.
+     * @warning the input string need not be zero terminated, but the
+     * length is taken as if the string was zero terminated */
     template<size_t N>
-    constexpr basic_substring(C (&s_)[N]) noexcept : str(s_), len(N-1) {}
-    basic_substring(C *s_, size_t len_) : str(s_), len(len_) { C4_ASSERT(str || !len_); }
-    basic_substring(C *beg_, C *end_) : str(beg_), len(static_cast<size_t>(end_ - beg_)) { C4_ASSERT(end_ >= beg_); }
-
-    //basic_substring& operator= (C *s_) { this->assign(s_); return *this; }
+    C4_ALWAYS_INLINE constexpr basic_substring(C (&s_)[N]) noexcept : str(s_), len(N-1) {}
+    /** Construct from a pointer and length.
+     * @warning the input string need not be zero terminated. */
+    C4_ALWAYS_INLINE basic_substring(C *s_, size_t len_) noexcept : str(s_), len(len_) { C4_ASSERT(str || !len_); }
+    /** Construct from two pointers.
+     * @warning the end pointer MUST BE larger than or equal to the begin pointer
+     * @warning the input string need not be zero terminated */
+    C4_ALWAYS_INLINE basic_substring(C *beg_, C *end_) noexcept : str(beg_), len(static_cast<size_t>(end_ - beg_)) { C4_ASSERT(end_ >= beg_); }
+    /** Construct from a C-string (zero-terminated string)
+     * @warning the input string MUST BE zero terminated.
+     * @warning will call strlen()
+     * @note this overload uses SFINAE to prevent it from overriding the array ctor
+     * @see For a more detailed explanation on why the plain overloads cannot
+     * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
+    template<class U, typename std::enable_if<std::is_same<U, C*>::value || std::is_same<U, NCC_*>::value, int>::type=0>
+    C4_ALWAYS_INLINE basic_substring(U s_) noexcept : str(s_), len(s_ ? strlen(s_) : 0) {}
+
+    /** Assign from an array.
+     * @warning the input string need not be zero terminated, but the
+     * length is taken as if the string was zero terminated */
     template<size_t N>
-    basic_substring& operator= (C (&s_)[N]) { this->assign<N>(s_); return *this; }
-
-    //void assign(C *s_) { str = (s_); len = (s_ ? strlen(s_) : 0); }
-    /** the overload for receiving a single C* pointer will always
-     * hide the array[N] overload. So it is disabled. If you want to
-     * construct a substr from a single pointer containing a C-style string,
-     * you can call c4::to_substr()/c4::to_csubstr().
-     * @see c4::to_substr()
-     * @see c4::to_csubstr() */
+    C4_ALWAYS_INLINE void assign(C (&s_)[N]) noexcept { str = (s_); len = (N-1); }
+    /** Assign from a pointer and length.
+     * @warning the input string need not be zero terminated. */
+    C4_ALWAYS_INLINE void assign(C *s_, size_t len_) noexcept { str = s_; len = len_; C4_ASSERT(str || !len_); }
+    /** Assign from two pointers.
+     * @warning the end pointer MUST BE larger than or equal to the begin pointer
+     * @warning the input string need not be zero terminated. */
+    C4_ALWAYS_INLINE void assign(C *beg_, C *end_) noexcept { C4_ASSERT(end_ >= beg_); str = (beg_); len = static_cast<size_t>(end_ - beg_); }
+    /** Assign from a C-string (zero-terminated string)
+     * @warning the input string must be zero terminated.
+     * @warning will call strlen()
+     * @note this overload uses SFINAE to prevent it from overriding the array ctor
+     * @see For a more detailed explanation on why the plain overloads cannot
+     * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
+    template<class U, typename std::enable_if<std::is_same<U, C*>::value || std::is_same<U, NCC_*>::value, int>::type=0>
+    C4_ALWAYS_INLINE void assign(U s_) noexcept { str = (s_); len = (s_ ? strlen(s_) : 0); }
+
+    /** Assign from an array.
+     * @warning the input string need not be zero terminated. */
     template<size_t N>
-    void assign(C (&s_)[N]) { str = (s_); len = (N-1); }
-    void assign(C *s_, size_t len_) { str = s_; len = len_; C4_ASSERT(str || !len_); }
-    void assign(C *beg_, C *end_) { C4_ASSERT(end_ >= beg_); str = (beg_); len = (end_ - beg_); }
-
-    void clear() { str = nullptr; len = 0; }
-
-    /** @} */
-
-public:
-
-    /** @name Construction from non-const characters */
-    /** @{ */
-
-    // when the char type is const, allow construction and assignment from non-const chars
-
-    /** only available when the char type is const */
-    template<size_t N, class U=NCC_> explicit basic_substring(C4_NC2C(U) (&s_)[N]) { str = s_; len = N-1; }
-    /** only available when the char type is const */
-    template<          class U=NCC_>          basic_substring(C4_NC2C(U) *s_, size_t len_) { str = s_; len = len_; }
-    /** only available when the char type is const */
-    template<          class U=NCC_>          basic_substring(C4_NC2C(U) *beg_, C4_NC2C(U) *end_) { C4_ASSERT(end_ >= beg_); str = beg_; len = end_ - beg_;  }
-
-    /** only available when the char type is const */
-    template<size_t N, class U=NCC_> void assign(C4_NC2C(U) (&s_)[N]) { str = s_; len = N-1; }
-    /** only available when the char type is const */
-    template<          class U=NCC_> void assign(C4_NC2C(U) *s_, size_t len_) { str = s_; len = len_; }
-    /** only available when the char type is const */
-    template<          class U=NCC_> void assign(C4_NC2C(U) *beg_, C4_NC2C(U) *end_) { C4_ASSERT(end_ >= beg_); str = beg_; len = end_ - beg_;  }
-
-    /** only available when the char type is const */
-    template<size_t N, class U=NCC_>
-    basic_substring& operator=(C4_NC2C(U) (&s_)[N]) { str = s_; len = N-1; return *this; }
+    C4_ALWAYS_INLINE basic_substring& operator= (C (&s_)[N]) noexcept { str = (s_); len = (N-1); return *this; }
+    /** Assign from a C-string (zero-terminated string)
+     * @warning the input string MUST BE zero terminated.
+     * @warning will call strlen()
+     * @note this overload uses SFINAE to prevent it from overriding the array ctor
+     * @see For a more detailed explanation on why the plain overloads cannot
+     * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
+    template<class U, typename std::enable_if<std::is_same<U, C*>::value || std::is_same<U, NCC_*>::value, int>::type=0>
+    C4_ALWAYS_INLINE basic_substring& operator= (U s_) noexcept { str = s_; len = s_ ? strlen(s_) : 0; return *this; }
 
     /** @} */
 
@@ -5174,28 +5471,28 @@ struct C4CORE_EXPORT basic_substring
     /** @name Standard accessor methods */
     /** @{ */
 
-    bool   has_str()   const { return ! empty() && str[0] != C(0); }
-    bool   empty()     const { return (len == 0 || str == nullptr); }
-    bool   not_empty() const { return (len != 0 && str != nullptr); }
-    size_t size()      const { return len; }
+    C4_ALWAYS_INLINE C4_PURE bool   has_str()   const noexcept { return ! empty() && str[0] != C(0); }
+    C4_ALWAYS_INLINE C4_PURE bool   empty()     const noexcept { return (len == 0 || str == nullptr); }
+    C4_ALWAYS_INLINE C4_PURE bool   not_empty() const noexcept { return (len != 0 && str != nullptr); }
+    C4_ALWAYS_INLINE C4_PURE size_t size()      const noexcept { return len; }
 
-    iterator begin() { return str; }
-    iterator end  () { return str + len; }
+    C4_ALWAYS_INLINE C4_PURE iterator begin() noexcept { return str; }
+    C4_ALWAYS_INLINE C4_PURE iterator end  () noexcept { return str + len; }
 
-    const_iterator begin() const { return str; }
-    const_iterator end  () const { return str + len; }
+    C4_ALWAYS_INLINE C4_PURE const_iterator begin() const noexcept { return str; }
+    C4_ALWAYS_INLINE C4_PURE const_iterator end  () const noexcept { return str + len; }
 
-    C      * data()       { return str; }
-    C const* data() const { return str; }
+    C4_ALWAYS_INLINE C4_PURE C      * data()       noexcept { return str; }
+    C4_ALWAYS_INLINE C4_PURE C const* data() const noexcept { return str; }
 
-    inline C      & operator[] (size_t i)       { C4_ASSERT(i >= 0 && i < len); return str[i]; }
-    inline C const& operator[] (size_t i) const { C4_ASSERT(i >= 0 && i < len); return str[i]; }
+    C4_ALWAYS_INLINE C4_PURE C      & operator[] (size_t i)       noexcept { C4_ASSERT(i >= 0 && i < len); return str[i]; }
+    C4_ALWAYS_INLINE C4_PURE C const& operator[] (size_t i) const noexcept { C4_ASSERT(i >= 0 && i < len); return str[i]; }
 
-    inline C      & front()       { C4_ASSERT(len > 0 && str != nullptr); return *str; }
-    inline C const& front() const { C4_ASSERT(len > 0 && str != nullptr); return *str; }
+    C4_ALWAYS_INLINE C4_PURE C      & front()       noexcept { C4_ASSERT(len > 0 && str != nullptr); return *str; }
+    C4_ALWAYS_INLINE C4_PURE C const& front() const noexcept { C4_ASSERT(len > 0 && str != nullptr); return *str; }
 
-    inline C      & back()       { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); }
-    inline C const& back() const { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); }
+    C4_ALWAYS_INLINE C4_PURE C      & back()       noexcept { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); }
+    C4_ALWAYS_INLINE C4_PURE C const& back() const noexcept { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); }
 
     /** @} */
 
@@ -5204,28 +5501,35 @@ struct C4CORE_EXPORT basic_substring
     /** @name Comparison methods */
     /** @{ */
 
-    int compare(C const c) const
+    C4_PURE int compare(C const c) const noexcept
     {
         C4_XASSERT((str != nullptr) || len == 0);
-        if( ! len)
+        if(C4_LIKELY(str != nullptr && len > 0))
+            return (*str != c) ? *str - c : (static_cast<int>(len) - 1);
+        else
             return -1;
-        if(*str == c)
-            return static_cast<int>(len - 1);
-        return *str - c;
     }
 
-    int compare(const char *that, size_t sz) const
+    C4_PURE int compare(const char *C4_RESTRICT that, size_t sz) const noexcept
     {
         C4_XASSERT(that || sz  == 0);
         C4_XASSERT(str  || len == 0);
         if(C4_LIKELY(str && that))
         {
-            int ret = strncmp(str, that, len < sz ? len : sz);
-            if(ret == 0 && len != sz)
-                ret = len < sz ? -1 : 1;
-            return ret;
+            {
+                const size_t min = len < sz ? len : sz;
+                for(size_t i = 0; i < min; ++i)
+                    if(str[i] != that[i])
+                        return str[i] < that[i] ? -1 : 1;
+            }
+            if(len < sz)
+                return -1;
+            else if(len == sz)
+                return 0;
+            else
+                return 1;
         }
-        if((!str && !that) || (len == sz))
+        else if(len == sz)
         {
             C4_XASSERT(len == 0 && sz == 0);
             return 0;
@@ -5233,31 +5537,31 @@ struct C4CORE_EXPORT basic_substring
         return len < sz ? -1 : 1;
     }
 
-    C4_ALWAYS_INLINE int compare(ro_substr const that) const { return this->compare(that.str, that.len); }
+    C4_ALWAYS_INLINE C4_PURE int compare(ro_substr const that) const noexcept { return this->compare(that.str, that.len); }
 
-    C4_ALWAYS_INLINE bool operator== (std::nullptr_t) const { return str == nullptr || len == 0; }
-    C4_ALWAYS_INLINE bool operator!= (std::nullptr_t) const { return str != nullptr || len == 0; }
+    C4_ALWAYS_INLINE C4_PURE bool operator== (std::nullptr_t) const noexcept { return str == nullptr; }
+    C4_ALWAYS_INLINE C4_PURE bool operator!= (std::nullptr_t) const noexcept { return str != nullptr; }
 
-    C4_ALWAYS_INLINE bool operator== (C const c) const { return this->compare(c) == 0; }
-    C4_ALWAYS_INLINE bool operator!= (C const c) const { return this->compare(c) != 0; }
-    C4_ALWAYS_INLINE bool operator<  (C const c) const { return this->compare(c) <  0; }
-    C4_ALWAYS_INLINE bool operator>  (C const c) const { return this->compare(c) >  0; }
-    C4_ALWAYS_INLINE bool operator<= (C const c) const { return this->compare(c) <= 0; }
-    C4_ALWAYS_INLINE bool operator>= (C const c) const { return this->compare(c) >= 0; }
+    C4_ALWAYS_INLINE C4_PURE bool operator== (C const c) const noexcept { return this->compare(c) == 0; }
+    C4_ALWAYS_INLINE C4_PURE bool operator!= (C const c) const noexcept { return this->compare(c) != 0; }
+    C4_ALWAYS_INLINE C4_PURE bool operator<  (C const c) const noexcept { return this->compare(c) <  0; }
+    C4_ALWAYS_INLINE C4_PURE bool operator>  (C const c) const noexcept { return this->compare(c) >  0; }
+    C4_ALWAYS_INLINE C4_PURE bool operator<= (C const c) const noexcept { return this->compare(c) <= 0; }
+    C4_ALWAYS_INLINE C4_PURE bool operator>= (C const c) const noexcept { return this->compare(c) >= 0; }
 
-    template<class U> C4_ALWAYS_INLINE bool operator== (basic_substring<U> const that) const { return this->compare(that) == 0; }
-    template<class U> C4_ALWAYS_INLINE bool operator!= (basic_substring<U> const that) const { return this->compare(that) != 0; }
-    template<class U> C4_ALWAYS_INLINE bool operator<  (basic_substring<U> const that) const { return this->compare(that) <  0; }
-    template<class U> C4_ALWAYS_INLINE bool operator>  (basic_substring<U> const that) const { return this->compare(that) >  0; }
-    template<class U> C4_ALWAYS_INLINE bool operator<= (basic_substring<U> const that) const { return this->compare(that) <= 0; }
-    template<class U> C4_ALWAYS_INLINE bool operator>= (basic_substring<U> const that) const { return this->compare(that) >= 0; }
+    template<class U> C4_ALWAYS_INLINE C4_PURE bool operator== (basic_substring<U> const that) const noexcept { return this->compare(that) == 0; }
+    template<class U> C4_ALWAYS_INLINE C4_PURE bool operator!= (basic_substring<U> const that) const noexcept { return this->compare(that) != 0; }
+    template<class U> C4_ALWAYS_INLINE C4_PURE bool operator<  (basic_substring<U> const that) const noexcept { return this->compare(that) <  0; }
+    template<class U> C4_ALWAYS_INLINE C4_PURE bool operator>  (basic_substring<U> const that) const noexcept { return this->compare(that) >  0; }
+    template<class U> C4_ALWAYS_INLINE C4_PURE bool operator<= (basic_substring<U> const that) const noexcept { return this->compare(that) <= 0; }
+    template<class U> C4_ALWAYS_INLINE C4_PURE bool operator>= (basic_substring<U> const that) const noexcept { return this->compare(that) >= 0; }
 
-    template<size_t N> C4_ALWAYS_INLINE bool operator== (const char (&that)[N]) const { return this->compare(that, N-1) == 0; }
-    template<size_t N> C4_ALWAYS_INLINE bool operator!= (const char (&that)[N]) const { return this->compare(that, N-1) != 0; }
-    template<size_t N> C4_ALWAYS_INLINE bool operator<  (const char (&that)[N]) const { return this->compare(that, N-1) <  0; }
-    template<size_t N> C4_ALWAYS_INLINE bool operator>  (const char (&that)[N]) const { return this->compare(that, N-1) >  0; }
-    template<size_t N> C4_ALWAYS_INLINE bool operator<= (const char (&that)[N]) const { return this->compare(that, N-1) <= 0; }
-    template<size_t N> C4_ALWAYS_INLINE bool operator>= (const char (&that)[N]) const { return this->compare(that, N-1) >= 0; }
+    template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator== (const char (&that)[N]) const noexcept { return this->compare(that, N-1) == 0; }
+    template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator!= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) != 0; }
+    template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator<  (const char (&that)[N]) const noexcept { return this->compare(that, N-1) <  0; }
+    template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator>  (const char (&that)[N]) const noexcept { return this->compare(that, N-1) >  0; }
+    template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator<= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) <= 0; }
+    template<size_t N> C4_ALWAYS_INLINE C4_PURE bool operator>= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) >= 0; }
 
     /** @} */
 
@@ -5267,39 +5571,38 @@ struct C4CORE_EXPORT basic_substring
     /** @{ */
 
     /** true if *this is a substring of that (ie, from the same buffer) */
-    inline bool is_sub(ro_substr const that) const
+    C4_ALWAYS_INLINE C4_PURE bool is_sub(ro_substr const that) const noexcept
     {
         return that.is_super(*this);
     }
 
     /** true if that is a substring of *this (ie, from the same buffer) */
-    inline bool is_super(ro_substr const that) const
+    C4_ALWAYS_INLINE C4_PURE bool is_super(ro_substr const that) const noexcept
     {
-        if(C4_UNLIKELY(len == 0))
-        {
+        if(C4_LIKELY(len > 0))
+            return that.str >= str && that.str+that.len <= str+len;
+        else
             return that.len == 0 && that.str == str && str != nullptr;
-        }
-        return that.begin() >= begin() && that.end() <= end();
     }
 
     /** true if there is overlap of at least one element between that and *this */
-    inline bool overlaps(ro_substr const that) const
+    C4_ALWAYS_INLINE C4_PURE bool overlaps(ro_substr const that) const noexcept
     {
         // thanks @timwynants
-        return (that.end() > begin() && that.begin() < end());
+        return that.str+that.len > str && that.str < str+len;
     }
 
 public:
 
     /** return [first,len[ */
-    basic_substring sub(size_t first) const
+    C4_ALWAYS_INLINE C4_PURE basic_substring sub(size_t first) const noexcept
     {
         C4_ASSERT(first >= 0 && first <= len);
         return basic_substring(str + first, len - first);
     }
 
     /** return [first,first+num[. If num==npos, return [first,len[ */
-    basic_substring sub(size_t first, size_t num) const
+    C4_ALWAYS_INLINE C4_PURE basic_substring sub(size_t first, size_t num) const noexcept
     {
         C4_ASSERT(first >= 0 && first <= len);
         C4_ASSERT((num >= 0 && num <= len) || (num == npos));
@@ -5309,7 +5612,7 @@ struct C4CORE_EXPORT basic_substring
     }
 
     /** return [first,last[. If last==npos, return [first,len[ */
-    basic_substring range(size_t first, size_t last=npos) const
+    C4_ALWAYS_INLINE C4_PURE basic_substring range(size_t first, size_t last=npos) const noexcept
     {
         C4_ASSERT(first >= 0 && first <= len);
         last = last != npos ? last : len;
@@ -5318,24 +5621,26 @@ struct C4CORE_EXPORT basic_substring
         return basic_substring(str + first, last - first);
     }
 
-    /** return [0,num[*/
-    basic_substring first(size_t num) const
+    /** return the first @p num elements: [0,num[*/
+    C4_ALWAYS_INLINE C4_PURE basic_substring first(size_t num) const noexcept
     {
-        return sub(0, num);
+        C4_ASSERT(num <= len || num == npos);
+        return basic_substring(str, num != npos ? num : len);
     }
 
-    /** return [len-num,len[*/
-    basic_substring last(size_t num) const
+    /** return the last @p num elements: [len-num,len[*/
+    C4_ALWAYS_INLINE C4_PURE basic_substring last(size_t num) const noexcept
     {
-        if(num == npos)
-            return *this;
-        return sub(len - num);
+        C4_ASSERT(num <= len || num == npos);
+        return num != npos ?
+            basic_substring(str + len - num, num) :
+            *this;
     }
 
     /** offset from the ends: return [left,len-right[ ; ie, trim a
         number of characters from the left and right. This is
         equivalent to python's negative list indices. */
-    basic_substring offs(size_t left, size_t right) const
+    C4_ALWAYS_INLINE C4_PURE basic_substring offs(size_t left, size_t right) const noexcept
     {
         C4_ASSERT(left  >= 0 && left  <= len);
         C4_ASSERT(right >= 0 && right <= len);
@@ -5343,27 +5648,47 @@ struct C4CORE_EXPORT basic_substring
         return basic_substring(str + left, len - right - left);
     }
 
-    /** return [0, pos+include_pos[ */
-    basic_substring left_of(size_t pos, bool include_pos=false) const
+    /** return [0, pos[ . Same as .first(pos), but provided for compatibility with .right_of() */
+    C4_ALWAYS_INLINE C4_PURE basic_substring left_of(size_t pos) const noexcept
     {
-        if(pos == npos)
-            return *this;
-        return first(pos + include_pos);
+        C4_ASSERT(pos <= len || pos == npos);
+        return (pos != npos) ?
+            basic_substring(str, pos) :
+            *this;
+    }
+
+    /** return [0, pos+include_pos[ . Same as .first(pos+1), but provided for compatibility with .right_of() */
+    C4_ALWAYS_INLINE C4_PURE basic_substring left_of(size_t pos, bool include_pos) const noexcept
+    {
+        C4_ASSERT(pos <= len || pos == npos);
+        return (pos != npos) ?
+            basic_substring(str, pos+include_pos) :
+            *this;
+    }
+
+    /** return [pos+1, len[ */
+    C4_ALWAYS_INLINE C4_PURE basic_substring right_of(size_t pos) const noexcept
+    {
+        C4_ASSERT(pos <= len || pos == npos);
+        return (pos != npos) ?
+            basic_substring(str + (pos + 1), len - (pos + 1)) :
+            basic_substring(str + len, size_t(0));
     }
 
     /** return [pos+!include_pos, len[ */
-    basic_substring right_of(size_t pos, bool include_pos=false) const
+    C4_ALWAYS_INLINE C4_PURE basic_substring right_of(size_t pos, bool include_pos) const noexcept
     {
-        if(pos == npos)
-            return sub(len, 0);
-        return sub(pos + !include_pos);
+        C4_ASSERT(pos <= len || pos == npos);
+        return (pos != npos) ?
+            basic_substring(str + (pos + !include_pos), len - (pos + !include_pos)) :
+            basic_substring(str + len, size_t(0));
     }
 
 public:
 
     /** given @p subs a substring of the current string, get the
      * portion of the current string to the left of it */
-    basic_substring left_of(ro_substr const subs) const
+    C4_ALWAYS_INLINE C4_PURE basic_substring left_of(ro_substr const subs) const noexcept
     {
         C4_ASSERT(is_super(subs) || subs.empty());
         auto ssb = subs.begin();
@@ -5377,7 +5702,7 @@ struct C4CORE_EXPORT basic_substring
 
     /** given @p subs a substring of the current string, get the
      * portion of the current string to the right of it */
-    basic_substring right_of(ro_substr const subs) const
+    C4_ALWAYS_INLINE C4_PURE basic_substring right_of(ro_substr const subs) const noexcept
     {
         C4_ASSERT(is_super(subs) || subs.empty());
         auto sse = subs.end();
@@ -5793,7 +6118,17 @@ struct C4CORE_EXPORT basic_substring
 
 public:
 
-    size_t first_not_of(const C c, size_t start=0) const
+    size_t first_not_of(const C c) const
+    {
+        for(size_t i = 0; i < len; ++i)
+        {
+            if(str[i] != c)
+                return i;
+        }
+        return npos;
+    }
+
+    size_t first_not_of(const C c, size_t start) const
     {
         C4_ASSERT((start >= 0 && start <= len) || (start == len && len == 0));
         for(size_t i = start; i < len; ++i)
@@ -5804,7 +6139,17 @@ struct C4CORE_EXPORT basic_substring
         return npos;
     }
 
-    size_t last_not_of(const C c, size_t start=npos) const
+    size_t last_not_of(const C c) const
+    {
+        for(size_t i = len-1; i != size_t(-1); --i)
+        {
+            if(str[i] != c)
+                return i;
+        }
+        return npos;
+    }
+
+    size_t last_not_of(const C c, size_t start) const
     {
         C4_ASSERT(start == npos || (start >= 0 && start <= len));
         if(start == npos)
@@ -5817,7 +6162,28 @@ struct C4CORE_EXPORT basic_substring
         return npos;
     }
 
-    size_t first_not_of(ro_substr chars, size_t start=0) const
+    size_t first_not_of(ro_substr chars) const
+    {
+        for(size_t i = 0; i < len; ++i)
+        {
+            bool gotit = true;
+            for(size_t j = 0; j < chars.len; ++j)
+            {
+                if(str[i] == chars.str[j])
+                {
+                    gotit = false;
+                    break;
+                }
+            }
+            if(gotit)
+            {
+                return i;
+            }
+        }
+        return npos;
+    }
+
+    size_t first_not_of(ro_substr chars, size_t start) const
     {
         C4_ASSERT((start >= 0 && start <= len) || (start == len && len == 0));
         for(size_t i = start; i < len; ++i)
@@ -5839,7 +6205,28 @@ struct C4CORE_EXPORT basic_substring
         return npos;
     }
 
-    size_t last_not_of(ro_substr chars, size_t start=npos) const
+    size_t last_not_of(ro_substr chars) const
+    {
+        for(size_t i = len-1; i != size_t(-1); --i)
+        {
+            bool gotit = true;
+            for(size_t j = 0; j < chars.len; ++j)
+            {
+                if(str[i] == chars.str[j])
+                {
+                    gotit = false;
+                    break;
+                }
+            }
+            if(gotit)
+            {
+                return i;
+            }
+        }
+        return npos;
+    }
+
+    size_t last_not_of(ro_substr chars, size_t start) const
     {
         C4_ASSERT(start == npos || (start >= 0 && start <= len));
         if(start == npos)
@@ -6022,7 +6409,7 @@ struct C4CORE_EXPORT basic_substring
             return ne;
         if(ne.str[0] == '-')
             return first(0);
-        size_t skip_start = (ne.str[0] == '+') ? 1 : 0;
+        size_t skip_start = size_t(ne.str[0] == '+');
         return ne._first_integral_span(skip_start);
     }
 
@@ -6032,62 +6419,71 @@ struct C4CORE_EXPORT basic_substring
         basic_substring ne = first_non_empty_span();
         if(ne.empty())
             return ne;
-        size_t skip_start = (ne.str[0] == '+' || ne.str[0] == '-') ? 1 : 0;
+        size_t skip_start = size_t(ne.str[0] == '+' || ne.str[0] == '-');
         return ne._first_integral_span(skip_start);
     }
 
     basic_substring _first_integral_span(size_t skip_start) const
     {
         C4_ASSERT(!empty());
-        if(skip_start == len) {
+        if(skip_start == len)
             return first(0);
-        }
         C4_ASSERT(skip_start < len);
-        if(first_of_any("0x", "0X")) // hexadecimal
-        {
-            skip_start += 2;
-            if(len == skip_start)
-                return first(0);
-            for(size_t i = skip_start; i < len; ++i)
-            {
-                if( ! _is_hex_char(str[i]))
-                    return _is_delim_char(str[i]) ? first(i) : first(0);
-            }
-        }
-        else if(first_of_any("0o", "0O")) // octal
+        if(len >= skip_start + 3)
         {
-            skip_start += 2;
-            if(len == skip_start)
-                return first(0);
-            for(size_t i = skip_start; i < len; ++i)
+            if(str[skip_start] != '0')
             {
-                char c = str[i];
-                if(c < '0' || c > '7')
-                    return _is_delim_char(str[i]) ? first(i) : first(0);
+                for(size_t i = skip_start; i < len; ++i)
+                {
+                    char c = str[i];
+                    if(c < '0' || c > '9')
+                        return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
+                }
             }
-        }
-        else if(first_of_any("0b", "0B")) // binary
-        {
-            skip_start += 2;
-            if(len == skip_start)
-                return first(0);
-            for(size_t i = skip_start; i < len; ++i)
+            else
             {
-                char c = str[i];
-                if(c != '0' && c != '1')
-                    return _is_delim_char(c) ? first(i) : first(0);
+                char next = str[skip_start + 1];
+                if(next == 'x' || next == 'X')
+                {
+                    skip_start += 2;
+                    for(size_t i = skip_start; i < len; ++i)
+                    {
+                        const char c = str[i];
+                        if( ! _is_hex_char(c))
+                            return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
+                    }
+                    return *this;
+                }
+                else if(next == 'b' || next == 'B')
+                {
+                    skip_start += 2;
+                    for(size_t i = skip_start; i < len; ++i)
+                    {
+                        const char c = str[i];
+                        if(c != '0' && c != '1')
+                            return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
+                    }
+                    return *this;
+                }
+                else if(next == 'o' || next == 'O')
+                {
+                    skip_start += 2;
+                    for(size_t i = skip_start; i < len; ++i)
+                    {
+                        const char c = str[i];
+                        if(c < '0' || c > '7')
+                            return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
+                    }
+                    return *this;
+                }
             }
         }
-        else // otherwise, decimal
+        // must be a decimal, or it is not a an number
+        for(size_t i = skip_start; i < len; ++i)
         {
-            if(len == skip_start)
-                return first(0);
-            for(size_t i = skip_start; i < len; ++i)
-            {
-                char c = str[i];
-                if(c < '0' || c > '9')
-                    return _is_delim_char(c) ? first(i) : first(0);
-            }
+            const char c = str[i];
+            if(c < '0' || c > '9')
+                return i > skip_start && _is_delim_char(c) ? first(i) : first(0);
         }
         return *this;
     }
@@ -6098,125 +6494,433 @@ struct C4CORE_EXPORT basic_substring
         basic_substring ne = first_non_empty_span();
         if(ne.empty())
             return ne;
-        size_t skip_start = (ne.str[0] == '+' || ne.str[0] == '-') ? 1 : 0;
-        if(ne.first_of_any("0x", "0X")) // hexadecimal
-        {
-            skip_start += 2;
-            if(ne.len == skip_start)
-                return ne.first(0);
-            for(size_t i = skip_start; i < ne.len; ++i)
+        const size_t skip_start = (ne.str[0] == '+' || ne.str[0] == '-');
+        C4_ASSERT(skip_start == 0 || skip_start == 1);
+        // if we have at least three digits after the leading sign, it
+        // can be decimal, or hex, or bin or oct. Ex:
+        // non-decimal: 0x0, 0b0, 0o0
+        // decimal: 1.0, 10., 1e1, 100, inf, nan, infinity
+        if(ne.len >= skip_start+3)
+        {
+            // if it does not have leading 0, it must be decimal, or it is not a real
+            if(ne.str[skip_start] != '0')
             {
-                char c = ne.str[i];
-                if(( ! _is_hex_char(c)) && c != '.' && c != 'p' && c != 'P')
+                if(ne.str[skip_start] == 'i') // is it infinity or inf?
                 {
-                    if(c == '-' || c == '+')
-                    {
-                        // we can also have a sign for the exponent
-                        if(i > 1 && (ne[i-1] == 'p' || ne[i-1] == 'P'))
-                        {
-                            continue;
-                        }
-                    }
-                    return _is_delim_char(c) ? ne.first(i) : ne.first(0);
+                    basic_substring word = ne._word_follows(skip_start + 1, "nfinity");
+                    if(word.len)
+                        return word;
+                    return ne._word_follows(skip_start + 1, "nf");
                 }
-            }
-        }
-        else if(ne.first_of_any("0b", "0B")) // binary
-        {
-            skip_start += 2;
-            if(ne.len == skip_start)
-                return ne.first(0);
-            for(size_t i = skip_start; i < ne.len; ++i)
-            {
-                char c = ne.str[i];
-                if(c != '0' && c != '1' && c != '.')
+                else if(ne.str[skip_start] == 'n') // is it nan?
                 {
-                    return _is_delim_char(c) ? ne.first(i) : ne.first(0);
+                    return ne._word_follows(skip_start + 1, "an");
                 }
-            }
-        }
-        else if(ne.first_of_any("0o", "0O")) // octal
-        {
-            skip_start += 2;
-            if(ne.len == skip_start)
-                return ne.first(0);
-            for(size_t i = skip_start; i < ne.len; ++i)
-            {
-                char c = ne.str[i];
-                if((c < '0' || c > '7') && c != '.')
+                else // must be a decimal, or it is not a real
                 {
-                    return _is_delim_char(c) ? ne.first(i) : ne.first(0);
+                    return ne._first_real_span_dec(skip_start);
                 }
             }
-        }
-        else // assume decimal
-        {
-            if(ne.len == skip_start)
-                return ne.first(0);
-            for(size_t i = skip_start; i < ne.len; ++i)
+            else // starts with 0. is it 0x, 0b or 0o?
             {
-                char c = ne.str[i];
-                if((c < '0' || c > '9') && (c != '.' && c != 'e' && c != 'E'))
-                {
-                    if(c == '-' || c == '+')
-                    {
-                        // we can also have a sign for the exponent
-                        if(i > 1 && (ne[i-1] == 'e' || ne[i-1] == 'E'))
-                        {
-                            continue;
-                        }
-                    }
-                    else if(i == skip_start)
-                    {
-                        if(c == 'i')
-                        {
-                            if(ne.len >= skip_start + 8 && ne.sub(skip_start, 8) == "infinity")
-                                return _is_delim_char(ne.str[skip_start + 8]) ? ne.first(skip_start + 8) : ne.first(0);
-                            else if(ne.len >= skip_start + 3 && ne.sub(skip_start, 3) == "inf")
-                                return _is_delim_char(ne.str[skip_start + 3]) ? ne.first(skip_start + 3) : ne.first(0);
-                            else
-                                return ne.first(0);
-                        }
-                        else if(c == 'n')
-                        {
-                            if(ne.len >= skip_start + 3 && ne.sub(skip_start, 3) == "nan")
-                                return _is_delim_char(ne.str[skip_start + 3]) ? ne.first(skip_start + 3) : ne.first(0);
-                            else
-                                return ne.first(0);
-                        }
-                        else
-                        {
-                            return ne.first(0);
-                        }
-                    }
-                    else
-                    {
-                        return _is_delim_char(c) ? ne.first(i) : ne.first(0);
-                    }
-                }
+                const char next = ne.str[skip_start + 1];
+                // hexadecimal
+                if(next == 'x' || next == 'X')
+                    return ne._first_real_span_hex(skip_start + 2);
+                // binary
+                else if(next == 'b' || next == 'B')
+                    return ne._first_real_span_bin(skip_start + 2);
+                // octal
+                else if(next == 'o' || next == 'O')
+                    return ne._first_real_span_oct(skip_start + 2);
+                // none of the above. may still be a decimal.
+                else
+                    return ne._first_real_span_dec(skip_start); // do not skip the 0.
             }
         }
-        return ne;
+        // less than 3 chars after the leading sign. It is either a
+        // decimal or it is not a real. (cannot be any of 0x0, etc).
+        return ne._first_real_span_dec(skip_start);
     }
 
     /** true if the character is a delimiter character *at the end* */
-    static constexpr C4_ALWAYS_INLINE bool _is_delim_char(char c) noexcept
+    static constexpr C4_ALWAYS_INLINE C4_CONST bool _is_delim_char(char c) noexcept
     {
-        return c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\0'
+        return c == ' ' || c == '\n'
             || c == ']' || c == ')'  || c == '}'
-            || c == ',' || c == ';';
+            || c == ',' || c == ';' || c == '\r' || c == '\t' || c == '\0';
     }
 
     /** true if the character is in [0-9a-fA-F] */
-    static constexpr C4_ALWAYS_INLINE bool _is_hex_char(char c) noexcept
+    static constexpr C4_ALWAYS_INLINE C4_CONST bool _is_hex_char(char c) noexcept
     {
         return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
     }
 
-    /** true if the character is in [0-9a-fA-F] */
-    static constexpr C4_ALWAYS_INLINE bool _is_oct_char(char c) noexcept
+    C4_NO_INLINE C4_PURE basic_substring _word_follows(size_t pos, csubstr word) const noexcept
+    {
+        size_t posend = pos + word.len;
+        if(len >= posend && sub(pos, word.len) == word)
+            if(len == posend || _is_delim_char(str[posend]))
+                return first(posend);
+        return first(0);
+    }
+
+    // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
+    C4_NO_INLINE C4_PURE basic_substring _first_real_span_dec(size_t pos) const noexcept
+    {
+        bool intchars = false;
+        bool fracchars = false;
+        bool powchars;
+        // integral part
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(c >= '0' && c <= '9')
+            {
+                intchars = true;
+            }
+            else if(c == '.')
+            {
+                ++pos;
+                goto fractional_part_dec;
+            }
+            else if(c == 'e' || c == 'E')
+            {
+                ++pos;
+                goto power_part_dec;
+            }
+            else if(_is_delim_char(c))
+            {
+                return intchars ? first(pos) : first(0);
+            }
+            else
+            {
+                return first(0);
+            }
+        }
+        // no . or p were found; this is either an integral number
+        // or not a number at all
+        return intchars ?
+            *this :
+            first(0);
+    fractional_part_dec:
+        C4_ASSERT(pos > 0);
+        C4_ASSERT(str[pos - 1] == '.');
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(c >= '0' && c <= '9')
+            {
+                fracchars = true;
+            }
+            else if(c == 'e' || c == 'E')
+            {
+                ++pos;
+                goto power_part_dec;
+            }
+            else if(_is_delim_char(c))
+            {
+                return intchars || fracchars ? first(pos) : first(0);
+            }
+            else
+            {
+                return first(0);
+            }
+        }
+        return intchars || fracchars ?
+            *this :
+            first(0);
+    power_part_dec:
+        C4_ASSERT(pos > 0);
+        C4_ASSERT(str[pos - 1] == 'e' || str[pos - 1] == 'E');
+        // either digits, or +, or - are expected here, followed by more digits.
+        if((len == pos) || ((!intchars) && (!fracchars)))
+            return first(0);
+        if(str[pos] == '-' || str[pos] == '+')
+            ++pos; // skip the sign
+        powchars = false;
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(c >= '0' && c <= '9')
+                powchars = true;
+            else if(powchars && _is_delim_char(c))
+                return first(pos);
+            else
+                return first(0);
+        }
+        return powchars ? *this : first(0);
+    }
+
+    // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
+    C4_NO_INLINE C4_PURE basic_substring _first_real_span_hex(size_t pos) const noexcept
+    {
+        bool intchars = false;
+        bool fracchars = false;
+        bool powchars;
+        // integral part
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(_is_hex_char(c))
+            {
+                intchars = true;
+            }
+            else if(c == '.')
+            {
+                ++pos;
+                goto fractional_part_hex;
+            }
+            else if(c == 'p' || c == 'P')
+            {
+                ++pos;
+                goto power_part_hex;
+            }
+            else if(_is_delim_char(c))
+            {
+                return intchars ? first(pos) : first(0);
+            }
+            else
+            {
+                return first(0);
+            }
+        }
+        // no . or p were found; this is either an integral number
+        // or not a number at all
+        return intchars ?
+            *this :
+            first(0);
+    fractional_part_hex:
+        C4_ASSERT(pos > 0);
+        C4_ASSERT(str[pos - 1] == '.');
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(_is_hex_char(c))
+            {
+                fracchars = true;
+            }
+            else if(c == 'p' || c == 'P')
+            {
+                ++pos;
+                goto power_part_hex;
+            }
+            else if(_is_delim_char(c))
+            {
+                return intchars || fracchars ? first(pos) : first(0);
+            }
+            else
+            {
+                return first(0);
+            }
+        }
+        return intchars || fracchars ?
+            *this :
+            first(0);
+    power_part_hex:
+        C4_ASSERT(pos > 0);
+        C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');
+        // either a + or a - is expected here, followed by more chars.
+        // also, using (pos+1) in this check will cause an early
+        // return when no more chars follow the sign.
+        if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))
+            return first(0);
+        ++pos; // this was the sign.
+        // ... so the (pos+1) ensures that we enter the loop and
+        // hence that there exist chars in the power part
+        powchars = false;
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(c >= '0' && c <= '9')
+                powchars = true;
+            else if(powchars && _is_delim_char(c))
+                return first(pos);
+            else
+                return first(0);
+        }
+        return *this;
+    }
+
+    // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
+    C4_NO_INLINE C4_PURE basic_substring _first_real_span_bin(size_t pos) const noexcept
+    {
+        bool intchars = false;
+        bool fracchars = false;
+        bool powchars;
+        // integral part
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(c == '0' || c == '1')
+            {
+                intchars = true;
+            }
+            else if(c == '.')
+            {
+                ++pos;
+                goto fractional_part_bin;
+            }
+            else if(c == 'p' || c == 'P')
+            {
+                ++pos;
+                goto power_part_bin;
+            }
+            else if(_is_delim_char(c))
+            {
+                return intchars ? first(pos) : first(0);
+            }
+            else
+            {
+                return first(0);
+            }
+        }
+        // no . or p were found; this is either an integral number
+        // or not a number at all
+        return intchars ?
+            *this :
+            first(0);
+    fractional_part_bin:
+        C4_ASSERT(pos > 0);
+        C4_ASSERT(str[pos - 1] == '.');
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(c == '0' || c == '1')
+            {
+                fracchars = true;
+            }
+            else if(c == 'p' || c == 'P')
+            {
+                ++pos;
+                goto power_part_bin;
+            }
+            else if(_is_delim_char(c))
+            {
+                return intchars || fracchars ? first(pos) : first(0);
+            }
+            else
+            {
+                return first(0);
+            }
+        }
+        return intchars || fracchars ?
+            *this :
+            first(0);
+    power_part_bin:
+        C4_ASSERT(pos > 0);
+        C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');
+        // either a + or a - is expected here, followed by more chars.
+        // also, using (pos+1) in this check will cause an early
+        // return when no more chars follow the sign.
+        if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))
+            return first(0);
+        ++pos; // this was the sign.
+        // ... so the (pos+1) ensures that we enter the loop and
+        // hence that there exist chars in the power part
+        powchars = false;
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(c >= '0' && c <= '9')
+                powchars = true;
+            else if(powchars && _is_delim_char(c))
+                return first(pos);
+            else
+                return first(0);
+        }
+        return *this;
+    }
+
+    // this function is declared inside the class to avoid a VS error with __declspec(dllimport)
+    C4_NO_INLINE C4_PURE basic_substring _first_real_span_oct(size_t pos) const noexcept
     {
-        return (c >= '0' && c <= '7');
+        bool intchars = false;
+        bool fracchars = false;
+        bool powchars;
+        // integral part
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(c >= '0' && c <= '7')
+            {
+                intchars = true;
+            }
+            else if(c == '.')
+            {
+                ++pos;
+                goto fractional_part_oct;
+            }
+            else if(c == 'p' || c == 'P')
+            {
+                ++pos;
+                goto power_part_oct;
+            }
+            else if(_is_delim_char(c))
+            {
+                return intchars ? first(pos) : first(0);
+            }
+            else
+            {
+                return first(0);
+            }
+        }
+        // no . or p were found; this is either an integral number
+        // or not a number at all
+        return intchars ?
+            *this :
+            first(0);
+    fractional_part_oct:
+        C4_ASSERT(pos > 0);
+        C4_ASSERT(str[pos - 1] == '.');
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(c >= '0' && c <= '7')
+            {
+                fracchars = true;
+            }
+            else if(c == 'p' || c == 'P')
+            {
+                ++pos;
+                goto power_part_oct;
+            }
+            else if(_is_delim_char(c))
+            {
+                return intchars || fracchars ? first(pos) : first(0);
+            }
+            else
+            {
+                return first(0);
+            }
+        }
+        return intchars || fracchars ?
+            *this :
+            first(0);
+    power_part_oct:
+        C4_ASSERT(pos > 0);
+        C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P');
+        // either a + or a - is expected here, followed by more chars.
+        // also, using (pos+1) in this check will cause an early
+        // return when no more chars follow the sign.
+        if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars)))
+            return first(0);
+        ++pos; // this was the sign.
+        // ... so the (pos+1) ensures that we enter the loop and
+        // hence that there exist chars in the power part
+        powchars = false;
+        for( ; pos < len; ++pos)
+        {
+            const char c = str[pos];
+            if(c >= '0' && c <= '9')
+                powchars = true;
+            else if(powchars && _is_delim_char(c))
+                return first(pos);
+            else
+                return first(0);
+        }
+        return *this;
     }
 
     /** @} */
@@ -6234,7 +6938,7 @@ struct C4CORE_EXPORT basic_substring
     {
         if(C4_LIKELY(*start_pos < len))
         {
-            for(size_t i = *start_pos, e = len; i < e; i++)
+            for(size_t i = *start_pos; i < len; i++)
             {
                 if(str[i] == sep)
                 {
@@ -6250,13 +6954,13 @@ struct C4CORE_EXPORT basic_substring
         else
         {
             bool valid = len > 0 && (*start_pos == len);
-            if(valid && !empty() && str[len-1] == sep)
+            if(valid && str && str[len-1] == sep)
             {
-                out->assign(str + len, (size_t)0); // the cast is needed to prevent overload ambiguity
+                out->assign(str + len, size_t(0)); // the cast is needed to prevent overload ambiguity
             }
             else
             {
-                out->assign(str + len + 1, (size_t)0); // the cast is needed to prevent overload ambiguity
+                out->assign(str + len + 1, size_t(0)); // the cast is needed to prevent overload ambiguity
             }
             *start_pos = len + 1;
             return valid;
@@ -6587,7 +7291,11 @@ struct C4CORE_EXPORT basic_substring
         num = num != npos ? num : len - ifirst;
         num = num < that.len ? num : that.len;
         C4_ASSERT(ifirst + num >= 0 && ifirst + num <= len);
-        memcpy(str + sizeof(C) * ifirst, that.str, sizeof(C) * num);
+        // calling memcpy with null strings is undefined behavior
+        // and will wreak havoc in calling code's branches.
+        // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
+        if(num)
+            memcpy(str + sizeof(C) * ifirst, that.str, sizeof(C) * num);
     }
 
 public:
@@ -6684,7 +7392,7 @@ struct C4CORE_EXPORT basic_substring
     }
 
     /** replace @p pattern with @p repl, and write the result into
-     * @dst. pattern and repl don't need equal sizes.
+     * @p dst. pattern and repl don't need equal sizes.
      *
      * @return the required size for dst. No overflow occurs if
      * dst.len is smaller than the required size; this can be used to
@@ -6705,7 +7413,7 @@ struct C4CORE_EXPORT basic_substring
             {                                                           \
                 C4_ASSERT((last) >= (first));                           \
                 size_t num = static_cast<size_t>((last) - (first));     \
-                if(sz + num <= dst.len)                                 \
+                if(num > 0 && sz + num <= dst.len)                      \
                 {                                                       \
                     memcpy(dst.str + sz, first, num * sizeof(C));       \
                 }                                                       \
@@ -6736,102 +7444,83 @@ struct C4CORE_EXPORT basic_substring
 
 
 #undef C4_REQUIRE_RW
-#undef C4_REQUIRE_RO
-#undef C4_NC2C
 
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-/** Because of a C++ limitation, substr cannot provide simultaneous
- * overloads for constructing from a char[N] and a char*; the latter
- * will always be chosen by the compiler. So this specialization is
- * provided to simplify obtaining a substr from a char*. Being a
- * function has the advantage of highlighting the strlen() cost.
- *
- * @see to_csubstr
- * @see For a more detailed explanation on why the overloads cannot
- * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
-inline substr to_substr(char *s)
-{
-    return substr(s, s ? strlen(s) : 0);
-}
-
-/** Because of a C++ limitation, substr cannot provide simultaneous
- * overloads for constructing from a char[N] and a char*; the latter
- * will always be chosen by the compiler. So this specialization is
- * provided to simplify obtaining a substr from a char*. Being a
- * function has the advantage of highlighting the strlen() cost.
- *
- * @see to_substr
- * @see For a more detailed explanation on why the overloads cannot
- * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
-inline csubstr to_csubstr(char *s)
-{
-    return csubstr(s, s ? strlen(s) : 0);
-}
 
-/** Because of a C++ limitation, substr cannot provide simultaneous
- * overloads for constructing from a const char[N] and a const char*;
- * the latter will always be chosen by the compiler. So this
- * specialization is provided to simplify obtaining a substr from a
- * char*. Being a function has the advantage of highlighting the
- * strlen() cost.
+/** @defgroup doc_substr_adapters substr adapters
  *
- * @overload to_csubstr
- * @see to_substr
- * @see For a more detailed explanation on why the overloads cannot
- * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
-inline csubstr to_csubstr(const char *s)
-{
-    return csubstr(s, s ? strlen(s) : 0);
-}
+ * to_substr() and to_csubstr() is used in generic code like
+ * format(), and allow adding construction of substrings from new
+ * types like containers.
+ * @{ */
 
 
 /** neutral version for use in generic code */
-inline csubstr to_csubstr(csubstr s)
-{
-    return s;
-}
-
+C4_ALWAYS_INLINE substr to_substr(substr s) noexcept { return s; }
 /** neutral version for use in generic code */
-inline csubstr to_csubstr(substr s)
-{
-    return s;
-}
-
+C4_ALWAYS_INLINE csubstr to_csubstr(substr s) noexcept { return s; }
 /** neutral version for use in generic code */
-inline substr to_substr(substr s)
-{
-    return s;
-}
+C4_ALWAYS_INLINE csubstr to_csubstr(csubstr s) noexcept { return s; }
+
+
+template<size_t N>
+C4_ALWAYS_INLINE substr
+to_substr(char (&s)[N]) noexcept { substr ss(s, N-1); return ss; }
+template<size_t N>
+C4_ALWAYS_INLINE csubstr
+to_csubstr(const char (&s)[N]) noexcept { csubstr ss(s, N-1); return ss; }
+
+
+/** @note this overload uses SFINAE to prevent it from overriding the array overload
+ * @see For a more detailed explanation on why the plain overloads cannot
+ * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
+template<class U>
+C4_ALWAYS_INLINE typename std::enable_if<std::is_same<U, char*>::value, substr>::type
+to_substr(U s) noexcept { substr ss(s); return ss; }
+/** @note this overload uses SFINAE to prevent it from overriding the array overload
+ * @see For a more detailed explanation on why the plain overloads cannot
+ * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */
+template<class U>
+C4_ALWAYS_INLINE typename std::enable_if<std::is_same<U, const char*>::value || std::is_same<U, char*>::value, csubstr>::type
+to_csubstr(U s) noexcept { csubstr ss(s); return ss; }
+
+
+/** @} */
 
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-template<typename C, size_t N> inline bool operator== (const C (&s)[N], basic_substring<C> const that) { return that.compare(s) == 0; }
-template<typename C, size_t N> inline bool operator!= (const C (&s)[N], basic_substring<C> const that) { return that.compare(s) != 0; }
-template<typename C, size_t N> inline bool operator<  (const C (&s)[N], basic_substring<C> const that) { return that.compare(s) >  0; }
-template<typename C, size_t N> inline bool operator>  (const C (&s)[N], basic_substring<C> const that) { return that.compare(s) <  0; }
-template<typename C, size_t N> inline bool operator<= (const C (&s)[N], basic_substring<C> const that) { return that.compare(s) >= 0; }
-template<typename C, size_t N> inline bool operator>= (const C (&s)[N], basic_substring<C> const that) { return that.compare(s) <= 0; }
+/** @defgroup doc_substr_cmp substr comparison operators
+ * @{ */
+
+template<typename C, size_t N> inline bool operator== (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) == 0; }
+template<typename C, size_t N> inline bool operator!= (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) != 0; }
+template<typename C, size_t N> inline bool operator<  (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) >  0; }
+template<typename C, size_t N> inline bool operator>  (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) <  0; }
+template<typename C, size_t N> inline bool operator<= (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) >= 0; }
+template<typename C, size_t N> inline bool operator>= (const char (&s)[N], basic_substring<C> const that) noexcept { return that.compare(s, N-1) <= 0; }
 
-template<typename C> inline bool operator== (C const c, basic_substring<C> const that) { return that.compare(c) == 0; }
-template<typename C> inline bool operator!= (C const c, basic_substring<C> const that) { return that.compare(c) != 0; }
-template<typename C> inline bool operator<  (C const c, basic_substring<C> const that) { return that.compare(c) >  0; }
-template<typename C> inline bool operator>  (C const c, basic_substring<C> const that) { return that.compare(c) <  0; }
-template<typename C> inline bool operator<= (C const c, basic_substring<C> const that) { return that.compare(c) >= 0; }
-template<typename C> inline bool operator>= (C const c, basic_substring<C> const that) { return that.compare(c) <= 0; }
+template<typename C> inline bool operator== (const char c, basic_substring<C> const that) noexcept { return that.compare(c) == 0; }
+template<typename C> inline bool operator!= (const char c, basic_substring<C> const that) noexcept { return that.compare(c) != 0; }
+template<typename C> inline bool operator<  (const char c, basic_substring<C> const that) noexcept { return that.compare(c) >  0; }
+template<typename C> inline bool operator>  (const char c, basic_substring<C> const that) noexcept { return that.compare(c) <  0; }
+template<typename C> inline bool operator<= (const char c, basic_substring<C> const that) noexcept { return that.compare(c) >= 0; }
+template<typename C> inline bool operator>= (const char c, basic_substring<C> const that) noexcept { return that.compare(c) <= 0; }
+
+/** @} */
 
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-/** @define C4_SUBSTR_NO_OSTREAM_LSHIFT doctest does not deal well with
+/* C4_SUBSTR_NO_OSTREAM_LSHIFT doctest does not deal well with
  * template operator<<
  * @see https://github.com/onqtam/doctest/pull/431 */
 #ifndef C4_SUBSTR_NO_OSTREAM_LSHIFT
@@ -6866,6 +7555,8 @@ inline OStream& operator<< (OStream& os, basic_substring<C> s)
 #endif
 #endif // !C4_SUBSTR_NO_OSTREAM_LSHIFT
 
+/** @} */
+
 } // namespace c4
 
 
@@ -6892,29 +7583,40 @@ inline OStream& operator<< (OStream& os, basic_substring<C> s)
 #ifndef _C4_EXT_FAST_FLOAT_HPP_
 #define _C4_EXT_FAST_FLOAT_HPP_
 
-#ifdef _MSC_VER
+#if defined(_MSC_VER) && !defined(__clang__)
 #   pragma warning(push)
+#   pragma warning(disable: 4365) // '=': conversion from 'const _Ty' to 'fast_float::limb', signed/unsigned mismatch
 #   pragma warning(disable: 4996) // snprintf/scanf: this function or variable may be unsafe
 #elif defined(__clang__) || defined(__APPLE_CC__) || defined(_LIBCPP_VERSION)
 #   pragma clang diagnostic push
-#   if (defined(__clang_major__) && _clang_major__ >= 9) || defined(__APPLE_CC__)
+#   if (defined(__clang_major__) && (__clang_major__ >= 9)) || defined(__APPLE_CC__)
 #       pragma clang diagnostic ignored "-Wfortify-source"
 #   endif
 #   pragma clang diagnostic ignored "-Wshift-count-overflow"
+#   pragma clang diagnostic ignored "-Wold-style-cast"
 #elif defined(__GNUC__)
 #   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wnarrowing"
+#   pragma GCC diagnostic ignored "-Wconversion"
 #   pragma GCC diagnostic ignored "-Wuseless-cast"
+#   pragma GCC diagnostic ignored "-Wold-style-cast"
 #endif
 
 // fast_float by Daniel Lemire
 // fast_float by João Paulo Magalhaes
 //
+//
 // with contributions from Eugene Golushkov
 // with contributions from Maksim Kita
 // with contributions from Marcin Wojdyr
 // with contributions from Neal Richardson
 // with contributions from Tim Paine
 // with contributions from Fabio Pellacini
+// with contributions from Lénárd Szolnoki
+// with contributions from Jan Pharago
+// with contributions from Maya Warrier
+// with contributions from Taha Khokhar
+//
 //
 // MIT License Notice
 //
@@ -6947,92 +7649,120 @@ inline OStream& operator<< (OStream& os, basic_substring<C> s)
 //    DEALINGS IN THE SOFTWARE.
 //
 
-#ifndef FASTFLOAT_FAST_FLOAT_H
-#define FASTFLOAT_FAST_FLOAT_H
+#ifndef FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H
+#define FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H
+
+#ifdef __has_include
+#if __has_include(<version>)
+#include <version>
+#endif
+#endif
+
+// Testing for https://wg21.link/N3652, adopted in C++14
+#if __cpp_constexpr >= 201304
+#define FASTFLOAT_CONSTEXPR14 constexpr
+#else
+#define FASTFLOAT_CONSTEXPR14
+#endif
+
+#if defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L
+#define FASTFLOAT_HAS_BIT_CAST 1
+#else
+#define FASTFLOAT_HAS_BIT_CAST 0
+#endif
 
+#if defined(__cpp_lib_is_constant_evaluated) && __cpp_lib_is_constant_evaluated >= 201811L
+#define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 1
+#else
+#define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 0
+#endif
+
+// Testing for relevant C++20 constexpr library features
+#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED \
+    && FASTFLOAT_HAS_BIT_CAST \
+    && __cpp_lib_constexpr_algorithms >= 201806L /*For std::copy and std::fill*/
+#define FASTFLOAT_CONSTEXPR20 constexpr
+#define FASTFLOAT_IS_CONSTEXPR 1
+#else
+#define FASTFLOAT_CONSTEXPR20
+#define FASTFLOAT_IS_CONSTEXPR 0
+#endif
+
+#endif // FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H
+
+#ifndef FASTFLOAT_FLOAT_COMMON_H
+#define FASTFLOAT_FLOAT_COMMON_H
+
+#include <cfloat>
+//included above:
+//#include <cstdint>
+#include <cassert>
+//included above:
+//#include <cstring>
+//included above:
+//#include <type_traits>
 #include <system_error>
+#ifdef __has_include
+  #if __has_include(<stdfloat>) && (__cplusplus > 202002L || _MSVC_LANG > 202002L)
+    #include <stdfloat>
+  #endif
+#endif
 
 namespace fast_float {
+
+#define FASTFLOAT_JSONFMT (1 << 5)
+#define FASTFLOAT_FORTRANFMT (1 << 6)
+
 enum chars_format {
-    scientific = 1<<0,
-    fixed = 1<<2,
-    hex = 1<<3,
-    general = fixed | scientific
+  scientific = 1 << 0,
+  fixed = 1 << 2,
+  hex = 1 << 3,
+  no_infnan = 1 << 4,
+  // RFC 8259: https://datatracker.ietf.org/doc/html/rfc8259#section-6
+  json = FASTFLOAT_JSONFMT | fixed | scientific | no_infnan,
+  // Extension of RFC 8259 where, e.g., "inf" and "nan" are allowed.
+  json_or_infnan = FASTFLOAT_JSONFMT | fixed | scientific,
+  fortran = FASTFLOAT_FORTRANFMT | fixed | scientific,
+  general = fixed | scientific
 };
 
-
-struct from_chars_result {
-  const char *ptr;
+template <typename UC>
+struct from_chars_result_t {
+  UC const* ptr;
   std::errc ec;
 };
+using from_chars_result = from_chars_result_t<char>;
 
-struct parse_options {
-  constexpr explicit parse_options(chars_format fmt = chars_format::general,
-                         char dot = '.')
+template <typename UC>
+struct parse_options_t {
+  constexpr explicit parse_options_t(chars_format fmt = chars_format::general,
+    UC dot = UC('.'))
     : format(fmt), decimal_point(dot) {}
 
   /** Which number formats are accepted */
   chars_format format;
   /** The character used as decimal point */
-  char decimal_point;
+  UC decimal_point;
 };
-
-/**
- * This function parses the character sequence [first,last) for a number. It parses floating-point numbers expecting
- * a locale-indepent format equivalent to what is used by std::strtod in the default ("C") locale.
- * The resulting floating-point value is the closest floating-point values (using either float or double),
- * using the "round to even" convention for values that would otherwise fall right in-between two values.
- * That is, we provide exact parsing according to the IEEE standard.
- *
- * Given a successful parse, the pointer (`ptr`) in the returned value is set to point right after the
- * parsed number, and the `value` referenced is set to the parsed value. In case of error, the returned
- * `ec` contains a representative error, otherwise the default (`std::errc()`) value is stored.
- *
- * The implementation does not throw and does not allocate memory (e.g., with `new` or `malloc`).
- *
- * Like the C++17 standard, the `fast_float::from_chars` functions take an optional last argument of
- * the type `fast_float::chars_format`. It is a bitset value: we check whether
- * `fmt & fast_float::chars_format::fixed` and `fmt & fast_float::chars_format::scientific` are set
- * to determine whether we allowe the fixed point and scientific notation respectively.
- * The default is  `fast_float::chars_format::general` which allows both `fixed` and `scientific`.
- */
-template<typename T>
-from_chars_result from_chars(const char *first, const char *last,
-                             T &value, chars_format fmt = chars_format::general)  noexcept;
-
-/**
- * Like from_chars, but accepts an `options` argument to govern number parsing.
- */
-template<typename T>
-from_chars_result from_chars_advanced(const char *first, const char *last,
-                                      T &value, parse_options options)  noexcept;
+using parse_options = parse_options_t<char>;
 
 }
-#endif // FASTFLOAT_FAST_FLOAT_H
-
-#ifndef FASTFLOAT_FLOAT_COMMON_H
-#define FASTFLOAT_FLOAT_COMMON_H
 
-#include <cfloat>
-//included above:
-//#include <cstdint>
-#include <cassert>
-//included above:
-//#include <cstring>
-//included above:
-//#include <type_traits>
+#if FASTFLOAT_HAS_BIT_CAST
+#include <bit>
+#endif
 
 #if (defined(__x86_64) || defined(__x86_64__) || defined(_M_X64)   \
        || defined(__amd64) || defined(__aarch64__) || defined(_M_ARM64) \
        || defined(__MINGW64__)                                          \
        || defined(__s390x__)                                            \
        || (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)) \
-       || defined(__EMSCRIPTEN__))
-#define FASTFLOAT_64BIT
+       || defined(__loongarch64) )
+#define FASTFLOAT_64BIT 1
 #elif (defined(__i386) || defined(__i386__) || defined(_M_IX86)   \
-     || defined(__arm__) || defined(_M_ARM)                   \
-     || defined(__MINGW32__))
-#define FASTFLOAT_32BIT
+     || defined(__arm__) || defined(_M_ARM) || defined(__ppc__)   \
+     || defined(__MINGW32__) || defined(__EMSCRIPTEN__))
+#define FASTFLOAT_32BIT 1
 #else
   // Need to check incrementally, since SIZE_MAX is a size_t, avoid overflow.
   // We can never tell the register width, but the SIZE_MAX is a good approximation.
@@ -7040,9 +7770,9 @@ from_chars_result from_chars_advanced(const char *first, const char *last,
   #if SIZE_MAX == 0xffff
     #error Unknown platform (16-bit, unsupported)
   #elif SIZE_MAX == 0xffffffff
-    #define FASTFLOAT_32BIT
+    #define FASTFLOAT_32BIT 1
   #elif SIZE_MAX == 0xffffffffffffffff
-    #define FASTFLOAT_64BIT
+    #define FASTFLOAT_64BIT 1
   #else
     #error Unknown platform (not 32-bit, not 64-bit?)
   #endif
@@ -7066,8 +7796,14 @@ from_chars_result from_chars_advanced(const char *first, const char *last,
 #include <machine/endian.h>
 #elif defined(sun) || defined(__sun)
 #include <sys/byteorder.h>
+#elif defined(__MVS__)
+#include <sys/endian.h>
 #else
+#ifdef __has_include
+#if __has_include(<endian.h>)
 #include <endian.h>
+#endif //__has_include(<endian.h>)
+#endif //__has_include
 #endif
 #
 #ifndef __BYTE_ORDER__
@@ -7087,6 +7823,38 @@ from_chars_result from_chars_advanced(const char *first, const char *last,
 #endif
 #endif
 
+#if defined(__SSE2__) || \
+  (defined(FASTFLOAT_VISUAL_STUDIO) && \
+    (defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2)))
+#define FASTFLOAT_SSE2 1
+#endif
+
+#if defined(__aarch64__) || defined(_M_ARM64)
+#define FASTFLOAT_NEON 1
+#endif
+
+#if defined(FASTFLOAT_SSE2) || defined(FASTFLOAT_NEON)
+#define FASTFLOAT_HAS_SIMD 1
+#endif
+
+#if defined(__GNUC__)
+// disable -Wcast-align=strict (GCC only)
+#define FASTFLOAT_SIMD_DISABLE_WARNINGS \
+  _Pragma("GCC diagnostic push") \
+  _Pragma("GCC diagnostic ignored \"-Wcast-align\"")
+#else
+#define FASTFLOAT_SIMD_DISABLE_WARNINGS
+#endif
+
+#if defined(__GNUC__)
+#define FASTFLOAT_SIMD_RESTORE_WARNINGS \
+  _Pragma("GCC diagnostic pop")
+#else
+#define FASTFLOAT_SIMD_RESTORE_WARNINGS
+#endif
+
+
+
 #ifdef FASTFLOAT_VISUAL_STUDIO
 #define fastfloat_really_inline __forceinline
 #else
@@ -7094,26 +7862,57 @@ from_chars_result from_chars_advanced(const char *first, const char *last,
 #endif
 
 #ifndef FASTFLOAT_ASSERT
-#define FASTFLOAT_ASSERT(x)  { if (!(x)) abort(); }
+#define FASTFLOAT_ASSERT(x)  { ((void)(x)); }
 #endif
 
 #ifndef FASTFLOAT_DEBUG_ASSERT
-//included above:
-//#include <cassert>
-#define FASTFLOAT_DEBUG_ASSERT(x) assert(x)
+#define FASTFLOAT_DEBUG_ASSERT(x) { ((void)(x)); }
 #endif
 
 // rust style `try!()` macro, or `?` operator
 #define FASTFLOAT_TRY(x) { if (!(x)) return false; }
 
+#define FASTFLOAT_ENABLE_IF(...) typename std::enable_if<(__VA_ARGS__), int>::type
+
+
 namespace fast_float {
 
+fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() {
+#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED
+  return std::is_constant_evaluated();
+#else
+  return false;
+#endif
+}
+
+template <typename T>
+fastfloat_really_inline constexpr bool is_supported_float_type() {
+  return std::is_same<T, float>::value || std::is_same<T, double>::value
+#if __STDCPP_FLOAT32_T__
+    || std::is_same<T, std::float32_t>::value
+#endif
+#if __STDCPP_FLOAT64_T__
+    || std::is_same<T, std::float64_t>::value
+#endif
+  ;
+}
+
+template <typename UC>
+fastfloat_really_inline constexpr bool is_supported_char_type() {
+  return
+    std::is_same<UC, char>::value ||
+    std::is_same<UC, wchar_t>::value ||
+    std::is_same<UC, char16_t>::value ||
+    std::is_same<UC, char32_t>::value;
+}
+
 // Compares two ASCII strings in a case insensitive manner.
-inline bool fastfloat_strncasecmp(const char *input1, const char *input2,
-                                  size_t length) {
+template <typename UC>
+inline FASTFLOAT_CONSTEXPR14 bool
+fastfloat_strncasecmp(UC const * input1, UC const * input2, size_t length) {
   char running_diff{0};
-  for (size_t i = 0; i < length; i++) {
-    running_diff |= (input1[i] ^ input2[i]);
+  for (size_t i = 0; i < length; ++i) {
+    running_diff |= (char(input1[i]) ^ char(input2[i]));
   }
   return (running_diff == 0) || (running_diff == 32);
 }
@@ -7127,14 +7926,14 @@ template <typename T>
 struct span {
   const T* ptr;
   size_t length;
-  span(const T* _ptr, size_t _length) : ptr(_ptr), length(_length) {}
-  span() : ptr(nullptr), length(0) {}
+  constexpr span(const T* _ptr, size_t _length) : ptr(_ptr), length(_length) {}
+  constexpr span() : ptr(nullptr), length(0) {}
 
   constexpr size_t len() const noexcept {
     return length;
   }
 
-  const T& operator[](size_t index) const noexcept {
+  FASTFLOAT_CONSTEXPR14 const T& operator[](size_t index) const noexcept {
     FASTFLOAT_DEBUG_ASSERT(index < length);
     return ptr[index];
   }
@@ -7143,13 +7942,29 @@ struct span {
 struct value128 {
   uint64_t low;
   uint64_t high;
-  value128(uint64_t _low, uint64_t _high) : low(_low), high(_high) {}
-  value128() : low(0), high(0) {}
+  constexpr value128(uint64_t _low, uint64_t _high) : low(_low), high(_high) {}
+  constexpr value128() : low(0), high(0) {}
 };
 
+/* Helper C++14 constexpr generic implementation of leading_zeroes */
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14
+int leading_zeroes_generic(uint64_t input_num, int last_bit = 0) {
+    if(input_num & uint64_t(0xffffffff00000000)) { input_num >>= 32; last_bit |= 32; }
+    if(input_num & uint64_t(        0xffff0000)) { input_num >>= 16; last_bit |= 16; }
+    if(input_num & uint64_t(            0xff00)) { input_num >>=  8; last_bit |=  8; }
+    if(input_num & uint64_t(              0xf0)) { input_num >>=  4; last_bit |=  4; }
+    if(input_num & uint64_t(               0xc)) { input_num >>=  2; last_bit |=  2; }
+    if(input_num & uint64_t(               0x2)) { /* input_num >>=  1; */ last_bit |=  1; }
+    return 63 - last_bit;
+}
+
 /* result might be undefined when input_num is zero */
-fastfloat_really_inline int leading_zeroes(uint64_t input_num) {
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+int leading_zeroes(uint64_t input_num) {
   assert(input_num > 0);
+  if (cpp20_and_in_constexpr()) {
+    return leading_zeroes_generic(input_num);
+  }
 #ifdef FASTFLOAT_VISUAL_STUDIO
   #if defined(_M_X64) || defined(_M_ARM64)
   unsigned long leading_zero = 0;
@@ -7158,61 +7973,65 @@ fastfloat_really_inline int leading_zeroes(uint64_t input_num) {
   _BitScanReverse64(&leading_zero, input_num);
   return (int)(63 - leading_zero);
   #else
-  int last_bit = 0;
-  if(input_num & uint64_t(0xffffffff00000000)) input_num >>= 32, last_bit |= 32;
-  if(input_num & uint64_t(        0xffff0000)) input_num >>= 16, last_bit |= 16;
-  if(input_num & uint64_t(            0xff00)) input_num >>=  8, last_bit |=  8;
-  if(input_num & uint64_t(              0xf0)) input_num >>=  4, last_bit |=  4;
-  if(input_num & uint64_t(               0xc)) input_num >>=  2, last_bit |=  2;
-  if(input_num & uint64_t(               0x2)) input_num >>=  1, last_bit |=  1;
-  return 63 - last_bit;
+  return leading_zeroes_generic(input_num);
   #endif
 #else
   return __builtin_clzll(input_num);
 #endif
 }
 
-#ifdef FASTFLOAT_32BIT
-
 // slow emulation routine for 32-bit
-fastfloat_really_inline uint64_t emulu(uint32_t x, uint32_t y) {
+fastfloat_really_inline constexpr uint64_t emulu(uint32_t x, uint32_t y) {
     return x * (uint64_t)y;
 }
 
-// slow emulation routine for 32-bit
-#if !defined(__MINGW64__)
-fastfloat_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd,
-                                          uint64_t *hi) {
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14
+uint64_t umul128_generic(uint64_t ab, uint64_t cd, uint64_t *hi) {
   uint64_t ad = emulu((uint32_t)(ab >> 32), (uint32_t)cd);
   uint64_t bd = emulu((uint32_t)ab, (uint32_t)cd);
   uint64_t adbc = ad + emulu((uint32_t)ab, (uint32_t)(cd >> 32));
-  uint64_t adbc_carry = !!(adbc < ad);
+  uint64_t adbc_carry = (uint64_t)(adbc < ad);
   uint64_t lo = bd + (adbc << 32);
   *hi = emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
-        (adbc_carry << 32) + !!(lo < bd);
+        (adbc_carry << 32) + (uint64_t)(lo < bd);
   return lo;
 }
+
+#ifdef FASTFLOAT_32BIT
+
+// slow emulation routine for 32-bit
+#if !defined(__MINGW64__)
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14
+uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
+  return umul128_generic(ab, cd, hi);
+}
 #endif // !__MINGW64__
 
 #endif // FASTFLOAT_32BIT
 
 
 // compute 64-bit a*b
-fastfloat_really_inline value128 full_multiplication(uint64_t a,
-                                                     uint64_t b) {
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+value128 full_multiplication(uint64_t a, uint64_t b) {
+  if (cpp20_and_in_constexpr()) {
+    value128 answer;
+    answer.low = umul128_generic(a, b, &answer.high);
+    return answer;
+  }
   value128 answer;
-#ifdef _M_ARM64
+#if defined(_M_ARM64) && !defined(__MINGW32__)
   // ARM64 has native support for 64-bit multiplications, no need to emulate
+  // But MinGW on ARM64 doesn't have native support for 64-bit multiplications
   answer.high = __umulh(a, b);
   answer.low = a * b;
 #elif defined(FASTFLOAT_32BIT) || (defined(_WIN64) && !defined(__clang__))
   answer.low = _umul128(a, b, &answer.high); // _umul128 not available on ARM64
-#elif defined(FASTFLOAT_64BIT)
+#elif defined(FASTFLOAT_64BIT) && defined(__SIZEOF_INT128__)
   __uint128_t r = ((__uint128_t)a) * b;
   answer.low = uint64_t(r);
   answer.high = uint64_t(r >> 64);
 #else
-  #error Not implemented
+  answer.low = umul128_generic(a, b, &answer.high);
 #endif
   return answer;
 }
@@ -7221,10 +8040,10 @@ struct adjusted_mantissa {
   uint64_t mantissa{0};
   int32_t power2{0}; // a negative value indicates an invalid result
   adjusted_mantissa() = default;
-  bool operator==(const adjusted_mantissa &o) const {
+  constexpr bool operator==(const adjusted_mantissa &o) const {
     return mantissa == o.mantissa && power2 == o.power2;
   }
-  bool operator!=(const adjusted_mantissa &o) const {
+  constexpr bool operator!=(const adjusted_mantissa &o) const {
     return mantissa != o.mantissa || power2 != o.power2;
   }
 };
@@ -7232,24 +8051,25 @@ struct adjusted_mantissa {
 // Bias so we can get the real exponent with an invalid adjusted_mantissa.
 constexpr static int32_t invalid_am_bias = -0x8000;
 
-constexpr static double powers_of_ten_double[] = {
-    1e0,  1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,  1e10, 1e11,
-    1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22};
-constexpr static float powers_of_ten_float[] = {1e0, 1e1, 1e2, 1e3, 1e4, 1e5,
-                                                1e6, 1e7, 1e8, 1e9, 1e10};
+// used for binary_format_lookup_tables<T>::max_mantissa
+constexpr uint64_t constant_55555 = 5 * 5 * 5 * 5 * 5;
 
-template <typename T> struct binary_format {
+template <typename T, typename U = void>
+struct binary_format_lookup_tables;
+
+template <typename T> struct binary_format : binary_format_lookup_tables<T> {
   using equiv_uint = typename std::conditional<sizeof(T) == 4, uint32_t, uint64_t>::type;
 
   static inline constexpr int mantissa_explicit_bits();
   static inline constexpr int minimum_exponent();
   static inline constexpr int infinite_power();
   static inline constexpr int sign_index();
-  static inline constexpr int min_exponent_fast_path();
+  static inline constexpr int min_exponent_fast_path(); // used when fegetround() == FE_TONEAREST
   static inline constexpr int max_exponent_fast_path();
   static inline constexpr int max_exponent_round_to_even();
   static inline constexpr int min_exponent_round_to_even();
-  static inline constexpr uint64_t max_mantissa_fast_path();
+  static inline constexpr uint64_t max_mantissa_fast_path(int64_t power);
+  static inline constexpr uint64_t max_mantissa_fast_path(); // used when fegetround() == FE_TONEAREST
   static inline constexpr int largest_power_of_ten();
   static inline constexpr int smallest_power_of_ten();
   static inline constexpr T exact_power_of_ten(int64_t power);
@@ -7259,6 +8079,91 @@ template <typename T> struct binary_format {
   static inline constexpr equiv_uint hidden_bit_mask();
 };
 
+template <typename U>
+struct binary_format_lookup_tables<double, U> {
+  static constexpr double powers_of_ten[] = {
+      1e0,  1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,  1e10, 1e11,
+      1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22};
+
+  // Largest integer value v so that (5**index * v) <= 1<<53.
+  // 0x10000000000000 == 1 << 53
+  static constexpr uint64_t max_mantissa[] = {
+      0x10000000000000,
+      0x10000000000000 / 5,
+      0x10000000000000 / (5 * 5),
+      0x10000000000000 / (5 * 5 * 5),
+      0x10000000000000 / (5 * 5 * 5 * 5),
+      0x10000000000000 / (constant_55555),
+      0x10000000000000 / (constant_55555 * 5),
+      0x10000000000000 / (constant_55555 * 5 * 5),
+      0x10000000000000 / (constant_55555 * 5 * 5 * 5),
+      0x10000000000000 / (constant_55555 * 5 * 5 * 5 * 5),
+      0x10000000000000 / (constant_55555 * constant_55555),
+      0x10000000000000 / (constant_55555 * constant_55555 * 5),
+      0x10000000000000 / (constant_55555 * constant_55555 * 5 * 5),
+      0x10000000000000 / (constant_55555 * constant_55555 * 5 * 5 * 5),
+      0x10000000000000 / (constant_55555 * constant_55555 * constant_55555),
+      0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5),
+      0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5 * 5),
+      0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5),
+      0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5 * 5),
+      0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555),
+      0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555 * 5),
+      0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555 * 5 * 5),
+      0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5),
+      0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5 * 5)};
+};
+
+template <typename U>
+constexpr double binary_format_lookup_tables<double, U>::powers_of_ten[];
+
+template <typename U>
+constexpr uint64_t binary_format_lookup_tables<double, U>::max_mantissa[];
+
+template <typename U>
+struct binary_format_lookup_tables<float, U> {
+  static constexpr float powers_of_ten[] = {1e0f, 1e1f, 1e2f, 1e3f, 1e4f, 1e5f,
+                                     1e6f, 1e7f, 1e8f, 1e9f, 1e10f};
+
+  // Largest integer value v so that (5**index * v) <= 1<<24.
+  // 0x1000000 == 1<<24
+  static constexpr uint64_t max_mantissa[] = {
+        0x1000000,
+        0x1000000 / 5,
+        0x1000000 / (5 * 5),
+        0x1000000 / (5 * 5 * 5),
+        0x1000000 / (5 * 5 * 5 * 5),
+        0x1000000 / (constant_55555),
+        0x1000000 / (constant_55555 * 5),
+        0x1000000 / (constant_55555 * 5 * 5),
+        0x1000000 / (constant_55555 * 5 * 5 * 5),
+        0x1000000 / (constant_55555 * 5 * 5 * 5 * 5),
+        0x1000000 / (constant_55555 * constant_55555),
+        0x1000000 / (constant_55555 * constant_55555 * 5)};
+};
+
+template <typename U>
+constexpr float binary_format_lookup_tables<float, U>::powers_of_ten[];
+
+template <typename U>
+constexpr uint64_t binary_format_lookup_tables<float, U>::max_mantissa[];
+
+template <> inline constexpr int binary_format<double>::min_exponent_fast_path() {
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  return 0;
+#else
+  return -22;
+#endif
+}
+
+template <> inline constexpr int binary_format<float>::min_exponent_fast_path() {
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  return 0;
+#else
+  return -10;
+#endif
+}
+
 template <> inline constexpr int binary_format<double>::mantissa_explicit_bits() {
   return 52;
 }
@@ -7299,21 +8204,6 @@ template <> inline constexpr int binary_format<float>::infinite_power() {
 template <> inline constexpr int binary_format<double>::sign_index() { return 63; }
 template <> inline constexpr int binary_format<float>::sign_index() { return 31; }
 
-template <> inline constexpr int binary_format<double>::min_exponent_fast_path() {
-#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
-  return 0;
-#else
-  return -22;
-#endif
-}
-template <> inline constexpr int binary_format<float>::min_exponent_fast_path() {
-#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
-  return 0;
-#else
-  return -10;
-#endif
-}
-
 template <> inline constexpr int binary_format<double>::max_exponent_fast_path() {
   return 22;
 }
@@ -7324,18 +8214,33 @@ template <> inline constexpr int binary_format<float>::max_exponent_fast_path()
 template <> inline constexpr uint64_t binary_format<double>::max_mantissa_fast_path() {
   return uint64_t(2) << mantissa_explicit_bits();
 }
+template <> inline constexpr uint64_t binary_format<double>::max_mantissa_fast_path(int64_t power) {
+  // caller is responsible to ensure that
+  // power >= 0 && power <= 22
+  //
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)max_mantissa[0], max_mantissa[power];
+}
 template <> inline constexpr uint64_t binary_format<float>::max_mantissa_fast_path() {
   return uint64_t(2) << mantissa_explicit_bits();
 }
+template <> inline constexpr uint64_t binary_format<float>::max_mantissa_fast_path(int64_t power) {
+  // caller is responsible to ensure that
+  // power >= 0 && power <= 10
+  //
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)max_mantissa[0], max_mantissa[power];
+}
 
 template <>
 inline constexpr double binary_format<double>::exact_power_of_ten(int64_t power) {
-  return powers_of_ten_double[power];
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)powers_of_ten[0], powers_of_ten[power];
 }
 template <>
 inline constexpr float binary_format<float>::exact_power_of_ten(int64_t power) {
-
-  return powers_of_ten_float[power];
+  // Work around clang bug https://godbolt.org/z/zedh7rrhc
+  return (void)powers_of_ten[0], powers_of_ten[power];
 }
 
 
@@ -7354,7 +8259,7 @@ inline constexpr int binary_format<double>::smallest_power_of_ten() {
 }
 template <>
 inline constexpr int binary_format<float>::smallest_power_of_ten() {
-  return -65;
+  return -64;
 }
 
 template <> inline constexpr size_t binary_format<double>::max_digits() {
@@ -7392,27 +8297,217 @@ template <> inline constexpr binary_format<double>::equiv_uint
 }
 
 template<typename T>
-fastfloat_really_inline void to_float(bool negative, adjusted_mantissa am, T &value) {
-  uint64_t word = am.mantissa;
-  word |= uint64_t(am.power2) << binary_format<T>::mantissa_explicit_bits();
-  word = negative
-  ? word | (uint64_t(1) << binary_format<T>::sign_index()) : word;
-#if FASTFLOAT_IS_BIG_ENDIAN == 1
-   if (std::is_same<T, float>::value) {
-     ::memcpy(&value, (char *)&word + 4, sizeof(T)); // extract value at offset 4-7 if float on big-endian
-   } else {
-     ::memcpy(&value, &word, sizeof(T));
-   }
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+void to_float(bool negative, adjusted_mantissa am, T &value) {
+  using fastfloat_uint = typename binary_format<T>::equiv_uint;
+  fastfloat_uint word = (fastfloat_uint)am.mantissa;
+  word |= fastfloat_uint(am.power2) << binary_format<T>::mantissa_explicit_bits();
+  word |= fastfloat_uint(negative) << binary_format<T>::sign_index();
+#if FASTFLOAT_HAS_BIT_CAST
+  value = std::bit_cast<T>(word);
 #else
-   // For little-endian systems:
-   ::memcpy(&value, &word, sizeof(T));
+  ::memcpy(&value, &word, sizeof(T));
+#endif
+}
+
+#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default
+template <typename = void>
+struct space_lut {
+  static constexpr bool value[] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+};
+
+template <typename T>
+constexpr bool space_lut<T>::value[];
+
+inline constexpr bool is_space(uint8_t c) { return space_lut<>::value[c]; }
 #endif
+
+template<typename UC>
+static constexpr uint64_t int_cmp_zeros()
+{
+    static_assert((sizeof(UC) == 1) || (sizeof(UC) == 2) || (sizeof(UC) == 4), "Unsupported character size");
+    return (sizeof(UC) == 1) ? 0x3030303030303030 : (sizeof(UC) == 2) ? (uint64_t(UC('0')) << 48 | uint64_t(UC('0')) << 32 | uint64_t(UC('0')) << 16 | UC('0')) : (uint64_t(UC('0')) << 32 | UC('0'));
+}
+template<typename UC>
+static constexpr int int_cmp_len()
+{
+    return sizeof(uint64_t) / sizeof(UC);
+}
+template<typename UC>
+static constexpr UC const * str_const_nan()
+{
+    return nullptr;
+}
+template<>
+constexpr char const * str_const_nan<char>()
+{
+    return "nan";
+}
+template<>
+constexpr wchar_t const * str_const_nan<wchar_t>()
+{
+    return L"nan";
+}
+template<>
+constexpr char16_t const * str_const_nan<char16_t>()
+{
+    return u"nan";
+}
+template<>
+constexpr char32_t const * str_const_nan<char32_t>()
+{
+    return U"nan";
 }
+template<typename UC>
+static constexpr UC const * str_const_inf()
+{
+    return nullptr;
+}
+template<>
+constexpr char const * str_const_inf<char>()
+{
+    return "infinity";
+}
+template<>
+constexpr wchar_t const * str_const_inf<wchar_t>()
+{
+    return L"infinity";
+}
+template<>
+constexpr char16_t const * str_const_inf<char16_t>()
+{
+    return u"infinity";
+}
+template<>
+constexpr char32_t const * str_const_inf<char32_t>()
+{
+    return U"infinity";
+}
+
+
+template <typename = void>
+struct int_luts {
+  static constexpr uint8_t chdigit[] = {
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255, 255, 255, 255,
+    255, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+    25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 255, 255, 255, 255, 255,
+    255, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+    25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
+  };
+
+  static constexpr size_t maxdigits_u64[] = {
+    64, 41, 32, 28, 25, 23, 22, 21,
+    20, 19, 18, 18, 17, 17, 16, 16,
+    16, 16, 15, 15, 15, 15, 14, 14,
+    14, 14, 14, 14, 14, 13, 13, 13,
+    13, 13, 13
+  };
+
+  static constexpr uint64_t min_safe_u64[] = {
+    9223372036854775808ull, 12157665459056928801ull, 4611686018427387904, 7450580596923828125, 4738381338321616896,
+    3909821048582988049, 9223372036854775808ull, 12157665459056928801ull, 10000000000000000000ull, 5559917313492231481,
+    2218611106740436992, 8650415919381337933, 2177953337809371136, 6568408355712890625, 1152921504606846976, 
+    2862423051509815793, 6746640616477458432, 15181127029874798299ull, 1638400000000000000, 3243919932521508681,
+    6221821273427820544, 11592836324538749809ull, 876488338465357824, 1490116119384765625, 2481152873203736576,
+    4052555153018976267, 6502111422497947648, 10260628712958602189ull, 15943230000000000000ull, 787662783788549761,
+    1152921504606846976, 1667889514952984961, 2386420683693101056, 3379220508056640625, 4738381338321616896
+  };
+};
+
+template <typename T>
+constexpr uint8_t int_luts<T>::chdigit[];
+
+template <typename T>
+constexpr size_t int_luts<T>::maxdigits_u64[];
+
+template <typename T>
+constexpr uint64_t int_luts<T>::min_safe_u64[];
+
+template <typename UC>
+fastfloat_really_inline
+constexpr uint8_t ch_to_digit(UC c) { return int_luts<>::chdigit[static_cast<unsigned char>(c)]; }
+
+fastfloat_really_inline
+constexpr size_t max_digits_u64(int base) { return int_luts<>::maxdigits_u64[base - 2]; }
+
+// If a u64 is exactly max_digits_u64() in length, this is
+// the value below which it has definitely overflowed. 
+fastfloat_really_inline
+constexpr uint64_t min_safe_u64(int base) { return int_luts<>::min_safe_u64[base - 2]; }
 
 } // namespace fast_float
 
 #endif
 
+
+#ifndef FASTFLOAT_FAST_FLOAT_H
+#define FASTFLOAT_FAST_FLOAT_H
+
+
+namespace fast_float {
+/**
+ * This function parses the character sequence [first,last) for a number. It parses floating-point numbers expecting
+ * a locale-indepent format equivalent to what is used by std::strtod in the default ("C") locale.
+ * The resulting floating-point value is the closest floating-point values (using either float or double),
+ * using the "round to even" convention for values that would otherwise fall right in-between two values.
+ * That is, we provide exact parsing according to the IEEE standard.
+ *
+ * Given a successful parse, the pointer (`ptr`) in the returned value is set to point right after the
+ * parsed number, and the `value` referenced is set to the parsed value. In case of error, the returned
+ * `ec` contains a representative error, otherwise the default (`std::errc()`) value is stored.
+ *
+ * The implementation does not throw and does not allocate memory (e.g., with `new` or `malloc`).
+ *
+ * Like the C++17 standard, the `fast_float::from_chars` functions take an optional last argument of
+ * the type `fast_float::chars_format`. It is a bitset value: we check whether
+ * `fmt & fast_float::chars_format::fixed` and `fmt & fast_float::chars_format::scientific` are set
+ * to determine whether we allow the fixed point and scientific notation respectively.
+ * The default is  `fast_float::chars_format::general` which allows both `fixed` and `scientific`.
+ */
+template<typename T, typename UC = char, typename = FASTFLOAT_ENABLE_IF(is_supported_float_type<T>())>
+FASTFLOAT_CONSTEXPR20
+from_chars_result_t<UC> from_chars(UC const * first, UC const * last,
+                             T &value, chars_format fmt = chars_format::general)  noexcept;
+
+/**
+ * Like from_chars, but accepts an `options` argument to govern number parsing.
+ */
+template<typename T, typename UC = char>
+FASTFLOAT_CONSTEXPR20
+from_chars_result_t<UC> from_chars_advanced(UC const * first, UC const * last,
+                                      T &value, parse_options_t<UC> options)  noexcept;
+/**
+* from_chars for integer types.
+*/
+template <typename T, typename UC = char, typename = FASTFLOAT_ENABLE_IF(!is_supported_float_type<T>())>
+FASTFLOAT_CONSTEXPR20
+from_chars_result_t<UC> from_chars(UC const * first, UC const * last, T& value, int base = 10) noexcept;
+
+} // namespace fast_float
+#endif // FASTFLOAT_FAST_FLOAT_H
+
 #ifndef FASTFLOAT_ASCII_NUMBER_H
 #define FASTFLOAT_ASCII_NUMBER_H
 
@@ -7423,15 +8518,39 @@ fastfloat_really_inline void to_float(bool negative, adjusted_mantissa am, T &va
 //included above:
 //#include <cstring>
 #include <iterator>
+//included above:
+//#include <limits>
+//included above:
+//#include <type_traits>
+
+
+#ifdef FASTFLOAT_SSE2
+#include <emmintrin.h>
+#endif
 
+#ifdef FASTFLOAT_NEON
+#include <arm_neon.h>
+#endif
 
 namespace fast_float {
 
+template <typename UC>
+fastfloat_really_inline constexpr bool has_simd_opt() {
+#ifdef FASTFLOAT_HAS_SIMD
+  return std::is_same<UC, char16_t>::value;
+#else
+  return false;
+#endif
+}
+
 // Next function can be micro-optimized, but compilers are entirely
 // able to optimize it well.
-fastfloat_really_inline bool is_integer(char c)  noexcept  { return c >= '0' && c <= '9'; }
+template <typename UC>
+fastfloat_really_inline constexpr bool is_integer(UC c) noexcept {
+  return !(c > UC('9') || c < UC('0'));
+}
 
-fastfloat_really_inline uint64_t byteswap(uint64_t val) {
+fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
   return (val & 0xFF00000000000000) >> 56
     | (val & 0x00FF000000000000) >> 40
     | (val & 0x0000FF0000000000) >> 24
@@ -7442,7 +8561,18 @@ fastfloat_really_inline uint64_t byteswap(uint64_t val) {
     | (val & 0x00000000000000FF) << 56;
 }
 
-fastfloat_really_inline uint64_t read_u64(const char *chars) {
+// Read 8 UC into a u64. Truncates UC if not char.
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+uint64_t read8_to_u64(const UC *chars) {
+  if (cpp20_and_in_constexpr() || !std::is_same<UC, char>::value) {
+    uint64_t val = 0;
+    for(int i = 0; i < 8; ++i) {
+      val |= uint64_t(uint8_t(*chars)) << (i*8);
+      ++chars;
+    }
+    return val;
+  }
   uint64_t val;
   ::memcpy(&val, chars, sizeof(uint64_t));
 #if FASTFLOAT_IS_BIG_ENDIAN == 1
@@ -7452,16 +8582,64 @@ fastfloat_really_inline uint64_t read_u64(const char *chars) {
   return val;
 }
 
-fastfloat_really_inline void write_u64(uint8_t *chars, uint64_t val) {
-#if FASTFLOAT_IS_BIG_ENDIAN == 1
-  // Need to read as-if the number was in little-endian order.
-  val = byteswap(val);
+#ifdef FASTFLOAT_SSE2
+
+fastfloat_really_inline
+uint64_t simd_read8_to_u64(const __m128i data) {
+FASTFLOAT_SIMD_DISABLE_WARNINGS
+  const __m128i packed = _mm_packus_epi16(data, data);
+#ifdef FASTFLOAT_64BIT
+  return uint64_t(_mm_cvtsi128_si64(packed));
+#else
+  uint64_t value;
+  // Visual Studio + older versions of GCC don't support _mm_storeu_si64
+  _mm_storel_epi64(reinterpret_cast<__m128i*>(&value), packed);
+  return value;
+#endif
+FASTFLOAT_SIMD_RESTORE_WARNINGS
+}
+
+fastfloat_really_inline
+uint64_t simd_read8_to_u64(const char16_t* chars) {
+FASTFLOAT_SIMD_DISABLE_WARNINGS
+  return simd_read8_to_u64(_mm_loadu_si128(reinterpret_cast<const __m128i*>(chars)));
+FASTFLOAT_SIMD_RESTORE_WARNINGS
+}
+
+#elif defined(FASTFLOAT_NEON)
+
+
+fastfloat_really_inline
+uint64_t simd_read8_to_u64(const uint16x8_t data) {
+FASTFLOAT_SIMD_DISABLE_WARNINGS
+  uint8x8_t utf8_packed = vmovn_u16(data);
+  return vget_lane_u64(vreinterpret_u64_u8(utf8_packed), 0);
+FASTFLOAT_SIMD_RESTORE_WARNINGS
+}
+
+fastfloat_really_inline
+uint64_t simd_read8_to_u64(const char16_t* chars) {
+FASTFLOAT_SIMD_DISABLE_WARNINGS
+  return simd_read8_to_u64(vld1q_u16(reinterpret_cast<const uint16_t*>(chars)));
+FASTFLOAT_SIMD_RESTORE_WARNINGS
+}
+
+#endif // FASTFLOAT_SSE2
+
+// MSVC SFINAE is broken pre-VS2017
+#if defined(_MSC_VER) && _MSC_VER <= 1900
+template <typename UC>
+#else
+template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>()) = 0>
 #endif
-  ::memcpy(chars, &val, sizeof(uint64_t));
+// dummy for compile
+uint64_t simd_read8_to_u64(UC const*) {
+  return 0;
 }
 
 // credit  @aqrit
-fastfloat_really_inline uint32_t  parse_eight_digits_unrolled(uint64_t val) {
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14
+uint32_t parse_eight_digits_unrolled(uint64_t val) {
   const uint64_t mask = 0x000000FF000000FF;
   const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
   const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
@@ -7471,104 +8649,217 @@ fastfloat_really_inline uint32_t  parse_eight_digits_unrolled(uint64_t val) {
   return uint32_t(val);
 }
 
-fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars)  noexcept  {
-  return parse_eight_digits_unrolled(read_u64(chars));
+
+// Call this if chars are definitely 8 digits.
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+uint32_t parse_eight_digits_unrolled(UC const * chars)  noexcept {
+  if (cpp20_and_in_constexpr() || !has_simd_opt<UC>()) {
+    return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay
+  }
+  return parse_eight_digits_unrolled(simd_read8_to_u64(chars));
 }
 
+
 // credit @aqrit
-fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val)  noexcept  {
+fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val)  noexcept {
   return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
      0x8080808080808080));
 }
 
-fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars)  noexcept  {
-  return is_made_of_eight_digits_fast(read_u64(chars));
-}
 
-typedef span<const char> byte_span;
+#ifdef FASTFLOAT_HAS_SIMD
 
-struct parsed_number_string {
-  int64_t exponent{0};
-  uint64_t mantissa{0};
-  const char *lastmatch{nullptr};
-  bool negative{false};
-  bool valid{false};
-  bool too_many_digits{false};
-  // contains the range of the significant digits
-  byte_span integer{};  // non-nullable
-  byte_span fraction{}; // nullable
-};
+// Call this if chars might not be 8 digits.
+// Using this style (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled())
+// ensures we don't load SIMD registers twice.
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+bool simd_parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept {
+  if (cpp20_and_in_constexpr()) {
+    return false;
+  }   
+#ifdef FASTFLOAT_SSE2
+FASTFLOAT_SIMD_DISABLE_WARNINGS
+  const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(chars));
+
+  // (x - '0') <= 9
+  // http://0x80.pl/articles/simd-parsing-int-sequences.html
+  const __m128i t0 = _mm_add_epi16(data, _mm_set1_epi16(32720));
+  const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759));
+
+  if (_mm_movemask_epi8(t1) == 0) {
+    i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
+    return true;
+  }
+  else return false;
+FASTFLOAT_SIMD_RESTORE_WARNINGS
+#elif defined(FASTFLOAT_NEON)
+FASTFLOAT_SIMD_DISABLE_WARNINGS
+  const uint16x8_t data = vld1q_u16(reinterpret_cast<const uint16_t*>(chars));
+  
+  // (x - '0') <= 9
+  // http://0x80.pl/articles/simd-parsing-int-sequences.html
+  const uint16x8_t t0 = vsubq_u16(data, vmovq_n_u16('0'));
+  const uint16x8_t mask = vcltq_u16(t0, vmovq_n_u16('9' - '0' + 1));
+
+  if (vminvq_u16(mask) == 0xFFFF) {
+    i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
+    return true;
+  }
+  else return false;
+FASTFLOAT_SIMD_RESTORE_WARNINGS
+#else
+  (void)chars; (void)i;
+  return false;
+#endif // FASTFLOAT_SSE2
+}
+
+#endif // FASTFLOAT_HAS_SIMD
+
+// MSVC SFINAE is broken pre-VS2017
+#if defined(_MSC_VER) && _MSC_VER <= 1900
+template <typename UC>
+#else
+template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>()) = 0>
+#endif
+// dummy for compile
+bool simd_parse_if_eight_digits_unrolled(UC const*, uint64_t&) {
+  return 0;
+}
+
+
+template <typename UC, FASTFLOAT_ENABLE_IF(!std::is_same<UC, char>::value) = 0>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+void loop_parse_if_eight_digits(const UC*& p, const UC* const pend, uint64_t& i) {
+  if (!has_simd_opt<UC>()) {
+    return;
+  }
+  while ((std::distance(p, pend) >= 8) && simd_parse_if_eight_digits_unrolled(p, i)) { // in rare cases, this will overflow, but that's ok
+    p += 8;
+  }
+}
+
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+void loop_parse_if_eight_digits(const char*& p, const char* const pend, uint64_t& i) {
+  // optimizes better than parse_if_eight_digits_unrolled() for UC = char.
+  while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(read8_to_u64(p))) {
+    i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(p)); // in rare cases, this will overflow, but that's ok
+    p += 8;
+  }
+}
+
+template <typename UC>
+struct parsed_number_string_t {
+  int64_t exponent{0};
+  uint64_t mantissa{0};
+  UC const * lastmatch{nullptr};
+  bool negative{false};
+  bool valid{false};
+  bool too_many_digits{false};
+  // contains the range of the significant digits
+  span<const UC> integer{};  // non-nullable
+  span<const UC> fraction{}; // nullable
+};
+
+using byte_span = span<const char>;
+using parsed_number_string = parsed_number_string_t<char>;
 
 // Assuming that you use no more than 19 digits, this will
 // parse an ASCII string.
-fastfloat_really_inline
-parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept {
-  const chars_format fmt = options.format;
-  const char decimal_point = options.decimal_point;
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, parse_options_t<UC> options) noexcept {
+  chars_format const fmt = options.format;
+  UC const decimal_point = options.decimal_point;
 
-  parsed_number_string answer;
+  parsed_number_string_t<UC> answer;
   answer.valid = false;
   answer.too_many_digits = false;
-  answer.negative = (*p == '-');
-  if (*p == '-') { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
+  answer.negative = (*p == UC('-'));
+#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default
+  if ((*p == UC('-')) || (!(fmt & FASTFLOAT_JSONFMT) && *p == UC('+'))) {
+#else
+  if (*p == UC('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
+#endif
     ++p;
     if (p == pend) {
       return answer;
     }
-    if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot
-      return answer;
+    if (fmt & FASTFLOAT_JSONFMT) {
+      if (!is_integer(*p)) { // a sign must be followed by an integer
+        return answer;
+      }    
+    } else {
+      if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot
+        return answer;
+      }
     }
   }
-  const char *const start_digits = p;
+  UC const * const start_digits = p;
 
   uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
 
-  while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
-    i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
-    p += 8;
-  }
   while ((p != pend) && is_integer(*p)) {
     // a multiplication by 10 is cheaper than an arbitrary integer
     // multiplication
     i = 10 * i +
-        uint64_t(*p - '0'); // might overflow, we will handle the overflow later
+        uint64_t(*p - UC('0')); // might overflow, we will handle the overflow later
     ++p;
   }
-  const char *const end_of_integer_part = p;
+  UC const * const end_of_integer_part = p;
   int64_t digit_count = int64_t(end_of_integer_part - start_digits);
-  answer.integer = byte_span(start_digits, size_t(digit_count));
+  answer.integer = span<const UC>(start_digits, size_t(digit_count));
+  if (fmt & FASTFLOAT_JSONFMT) {
+    // at least 1 digit in integer part, without leading zeros
+    if (digit_count == 0 || (start_digits[0] == UC('0') && digit_count > 1)) {
+      return answer;
+    }
+  }
+
   int64_t exponent = 0;
-  if ((p != pend) && (*p == decimal_point)) {
+  const bool has_decimal_point = (p != pend) && (*p == decimal_point);
+  if (has_decimal_point) {
     ++p;
-    const char* before = p;
+    UC const * before = p;
     // can occur at most twice without overflowing, but let it occur more, since
     // for integers with many digits, digit parsing is the primary bottleneck.
-    while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
-      i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
-      p += 8;
-    }
+    loop_parse_if_eight_digits(p, pend, i);
+
     while ((p != pend) && is_integer(*p)) {
-      uint8_t digit = uint8_t(*p - '0');
+      uint8_t digit = uint8_t(*p - UC('0'));
       ++p;
       i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
     }
     exponent = before - p;
-    answer.fraction = byte_span(before, size_t(p - before));
+    answer.fraction = span<const UC>(before, size_t(p - before));
     digit_count -= exponent;
   }
-  // we must have encountered at least one integer!
-  if (digit_count == 0) {
+  if (fmt & FASTFLOAT_JSONFMT) {
+    // at least 1 digit in fractional part
+    if (has_decimal_point && exponent == 0) {
+      return answer;
+    }
+  } 
+  else if (digit_count == 0) { // we must have encountered at least one integer!
     return answer;
   }
   int64_t exp_number = 0;            // explicit exponential part
-  if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) {
-    const char * location_of_e = p;
-    ++p;
+  if ( ((fmt & chars_format::scientific) &&
+        (p != pend) &&
+        ((UC('e') == *p) || (UC('E') == *p)))
+       ||
+       ((fmt & FASTFLOAT_FORTRANFMT) &&
+        (p != pend) &&
+        ((UC('+') == *p) || (UC('-') == *p) || (UC('d') == *p) || (UC('D') == *p)))) {
+    UC const * location_of_e = p;
+    if ((UC('e') == *p) || (UC('E') == *p) || (UC('d') == *p) || (UC('D') == *p)) {
+      ++p;
+    }
     bool neg_exp = false;
-    if ((p != pend) && ('-' == *p)) {
+    if ((p != pend) && (UC('-') == *p)) {
       neg_exp = true;
       ++p;
-    } else if ((p != pend) && ('+' == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
+    } else if ((p != pend) && (UC('+') == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
       ++p;
     }
     if ((p == pend) || !is_integer(*p)) {
@@ -7580,7 +8871,7 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
       p = location_of_e;
     } else {
       while ((p != pend) && is_integer(*p)) {
-        uint8_t digit = uint8_t(*p - '0');
+        uint8_t digit = uint8_t(*p - UC('0'));
         if (exp_number < 0x10000000) {
           exp_number = 10 * exp_number + digit;
         }
@@ -7606,11 +8897,12 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
     // We have to handle the case where we have 0.0000somenumber.
     // We need to be mindful of the case where we only have zeroes...
     // E.g., 0.000000000...000.
-    const char *start = start_digits;
-    while ((start != pend) && (*start == '0' || *start == decimal_point)) {
-      if(*start == '0') { digit_count --; }
+    UC const * start = start_digits;
+    while ((start != pend) && (*start == UC('0') || *start == decimal_point)) {
+      if(*start == UC('0')) { digit_count --; }
       start++;
     }
+
     if (digit_count > 19) {
       answer.too_many_digits = true;
       // Let us start again, this time, avoiding overflows.
@@ -7618,22 +8910,23 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
       // pre-tokenized spans from above.
       i = 0;
       p = answer.integer.ptr;
-      const char* int_end = p + answer.integer.len();
-      const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
-      while((i < minimal_nineteen_digit_integer) && (p != int_end)) {
-        i = i * 10 + uint64_t(*p - '0');
+      UC const* int_end = p + answer.integer.len();
+      const uint64_t minimal_nineteen_digit_integer{ 1000000000000000000 };
+      while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
+        i = i * 10 + uint64_t(*p - UC('0'));
         ++p;
       }
       if (i >= minimal_nineteen_digit_integer) { // We have a big integers
         exponent = end_of_integer_part - p + exp_number;
-      } else { // We have a value with a fractional component.
-          p = answer.fraction.ptr;
-          const char* frac_end = p + answer.fraction.len();
-          while((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
-            i = i * 10 + uint64_t(*p - '0');
-            ++p;
-          }
-          exponent = answer.fraction.ptr - p + exp_number;
+      }
+      else { // We have a value with a fractional component.
+        p = answer.fraction.ptr;
+        UC const* frac_end = p + answer.fraction.len();
+        while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
+          i = i * 10 + uint64_t(*p - UC('0'));
+          ++p;
+        }
+        exponent = answer.fraction.ptr - p + exp_number;
       }
       // We have now corrected both exponent and i, to a truncated value
     }
@@ -7643,6 +8936,107 @@ parsed_number_string parse_number_string(const char *p, const char *pend, parse_
   return answer;
 }
 
+template <typename T, typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+from_chars_result_t<UC> parse_int_string(UC const* p, UC const* pend, T& value, int base) {
+  from_chars_result_t<UC> answer;
+  
+  UC const* const first = p;
+
+  bool negative = (*p == UC('-'));
+  if (!std::is_signed<T>::value && negative) {
+    answer.ec = std::errc::invalid_argument;
+    answer.ptr = first;
+    return answer;
+  }
+#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default
+  if ((*p == UC('-')) || (*p == UC('+'))) {
+#else
+  if (*p == UC('-')) {
+#endif
+    ++p;
+  }
+
+  UC const* const start_num = p;
+
+  while (p!= pend && *p == UC('0')) {
+    ++p; 
+  }
+
+  const bool has_leading_zeros = p > start_num;
+
+  UC const* const start_digits = p;
+
+  uint64_t i = 0;
+  if (base == 10) {
+    loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible
+  }
+  while (p != pend) {
+    uint8_t digit = ch_to_digit(*p);
+    if (digit >= base) {
+      break;
+    }
+    i = uint64_t(base) * i + digit; // might overflow, check this later
+    p++; 
+  }
+  
+  size_t digit_count = size_t(p - start_digits);
+
+  if (digit_count == 0) {
+    if (has_leading_zeros) {
+      value = 0;
+      answer.ec = std::errc();
+      answer.ptr = p;
+    }
+    else {
+      answer.ec = std::errc::invalid_argument;
+      answer.ptr = first;
+    }
+    return answer; 
+  }
+
+  answer.ptr = p;
+
+  // check u64 overflow
+  size_t max_digits = max_digits_u64(base);
+  if (digit_count > max_digits) {
+    answer.ec = std::errc::result_out_of_range;
+    return answer;
+  }
+  // this check can be eliminated for all other types, but they will all require a max_digits(base) equivalent
+  if (digit_count == max_digits && i < min_safe_u64(base)) {
+    answer.ec = std::errc::result_out_of_range;
+    return answer;
+  }
+
+  // check other types overflow
+  if (!std::is_same<T, uint64_t>::value) {
+    if (i > uint64_t(std::numeric_limits<T>::max()) + uint64_t(negative)) {
+      answer.ec = std::errc::result_out_of_range;
+      return answer;
+    }
+  }
+
+  if (negative) {
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#pragma warning(push)
+#pragma warning(disable: 4146) 
+#endif
+    // this weird workaround is required because:
+    // - converting unsigned to signed when its value is greater than signed max is UB pre-C++23.
+    // - reinterpret_casting (~i + 1) would work, but it is not constexpr
+    // this is always optimized into a neg instruction (note: T is an integer type)
+    value = T(-std::numeric_limits<T>::max() - T(i - uint64_t(std::numeric_limits<T>::max())));
+#ifdef FASTFLOAT_VISUAL_STUDIO
+#pragma warning(pop)
+#endif
+  }
+  else { value = T(i); }
+
+  answer.ec = std::errc();
+  return answer;
+}
+
 } // namespace fast_float
 
 #endif
@@ -7667,11 +9061,11 @@ namespace fast_float {
  */
 
 /**
- * The smallest non-zero float (binary64) is 2^−1074.
+ * The smallest non-zero float (binary64) is 2^-1074.
  * We take as input numbers of the form w x 10^q where w < 2^64.
  * We have that w * 10^-343  <  2^(64-344) 5^-343 < 2^-1076.
  * However, we have that
- * (2^64-1) * 10^-342 =  (2^64-1) * 2^-342 * 5^-342 > 2^−1074.
+ * (2^64-1) * 10^-342 =  (2^64-1) * 2^-342 * 5^-342 > 2^-1074.
  * Thus it is possible for a number of the form w * 10^-342 where
  * w is a 64-bit value to be a non-zero floating-point number.
  *********
@@ -7686,665 +9080,666 @@ constexpr static int smallest_power_of_five = binary_format<double>::smallest_po
 constexpr static int largest_power_of_five = binary_format<double>::largest_power_of_ten();
 constexpr static int number_of_entries = 2 * (largest_power_of_five - smallest_power_of_five + 1);
 // Powers of five from 5^-342 all the way to 5^308 rounded toward one.
-static const uint64_t power_of_five_128[number_of_entries];
+constexpr static uint64_t power_of_five_128[number_of_entries] = {
+    0xeef453d6923bd65a,0x113faa2906a13b3f,
+    0x9558b4661b6565f8,0x4ac7ca59a424c507,
+    0xbaaee17fa23ebf76,0x5d79bcf00d2df649,
+    0xe95a99df8ace6f53,0xf4d82c2c107973dc,
+    0x91d8a02bb6c10594,0x79071b9b8a4be869,
+    0xb64ec836a47146f9,0x9748e2826cdee284,
+    0xe3e27a444d8d98b7,0xfd1b1b2308169b25,
+    0x8e6d8c6ab0787f72,0xfe30f0f5e50e20f7,
+    0xb208ef855c969f4f,0xbdbd2d335e51a935,
+    0xde8b2b66b3bc4723,0xad2c788035e61382,
+    0x8b16fb203055ac76,0x4c3bcb5021afcc31,
+    0xaddcb9e83c6b1793,0xdf4abe242a1bbf3d,
+    0xd953e8624b85dd78,0xd71d6dad34a2af0d,
+    0x87d4713d6f33aa6b,0x8672648c40e5ad68,
+    0xa9c98d8ccb009506,0x680efdaf511f18c2,
+    0xd43bf0effdc0ba48,0x212bd1b2566def2,
+    0x84a57695fe98746d,0x14bb630f7604b57,
+    0xa5ced43b7e3e9188,0x419ea3bd35385e2d,
+    0xcf42894a5dce35ea,0x52064cac828675b9,
+    0x818995ce7aa0e1b2,0x7343efebd1940993,
+    0xa1ebfb4219491a1f,0x1014ebe6c5f90bf8,
+    0xca66fa129f9b60a6,0xd41a26e077774ef6,
+    0xfd00b897478238d0,0x8920b098955522b4,
+    0x9e20735e8cb16382,0x55b46e5f5d5535b0,
+    0xc5a890362fddbc62,0xeb2189f734aa831d,
+    0xf712b443bbd52b7b,0xa5e9ec7501d523e4,
+    0x9a6bb0aa55653b2d,0x47b233c92125366e,
+    0xc1069cd4eabe89f8,0x999ec0bb696e840a,
+    0xf148440a256e2c76,0xc00670ea43ca250d,
+    0x96cd2a865764dbca,0x380406926a5e5728,
+    0xbc807527ed3e12bc,0xc605083704f5ecf2,
+    0xeba09271e88d976b,0xf7864a44c633682e,
+    0x93445b8731587ea3,0x7ab3ee6afbe0211d,
+    0xb8157268fdae9e4c,0x5960ea05bad82964,
+    0xe61acf033d1a45df,0x6fb92487298e33bd,
+    0x8fd0c16206306bab,0xa5d3b6d479f8e056,
+    0xb3c4f1ba87bc8696,0x8f48a4899877186c,
+    0xe0b62e2929aba83c,0x331acdabfe94de87,
+    0x8c71dcd9ba0b4925,0x9ff0c08b7f1d0b14,
+    0xaf8e5410288e1b6f,0x7ecf0ae5ee44dd9,
+    0xdb71e91432b1a24a,0xc9e82cd9f69d6150,
+    0x892731ac9faf056e,0xbe311c083a225cd2,
+    0xab70fe17c79ac6ca,0x6dbd630a48aaf406,
+    0xd64d3d9db981787d,0x92cbbccdad5b108,
+    0x85f0468293f0eb4e,0x25bbf56008c58ea5,
+    0xa76c582338ed2621,0xaf2af2b80af6f24e,
+    0xd1476e2c07286faa,0x1af5af660db4aee1,
+    0x82cca4db847945ca,0x50d98d9fc890ed4d,
+    0xa37fce126597973c,0xe50ff107bab528a0,
+    0xcc5fc196fefd7d0c,0x1e53ed49a96272c8,
+    0xff77b1fcbebcdc4f,0x25e8e89c13bb0f7a,
+    0x9faacf3df73609b1,0x77b191618c54e9ac,
+    0xc795830d75038c1d,0xd59df5b9ef6a2417,
+    0xf97ae3d0d2446f25,0x4b0573286b44ad1d,
+    0x9becce62836ac577,0x4ee367f9430aec32,
+    0xc2e801fb244576d5,0x229c41f793cda73f,
+    0xf3a20279ed56d48a,0x6b43527578c1110f,
+    0x9845418c345644d6,0x830a13896b78aaa9,
+    0xbe5691ef416bd60c,0x23cc986bc656d553,
+    0xedec366b11c6cb8f,0x2cbfbe86b7ec8aa8,
+    0x94b3a202eb1c3f39,0x7bf7d71432f3d6a9,
+    0xb9e08a83a5e34f07,0xdaf5ccd93fb0cc53,
+    0xe858ad248f5c22c9,0xd1b3400f8f9cff68,
+    0x91376c36d99995be,0x23100809b9c21fa1,
+    0xb58547448ffffb2d,0xabd40a0c2832a78a,
+    0xe2e69915b3fff9f9,0x16c90c8f323f516c,
+    0x8dd01fad907ffc3b,0xae3da7d97f6792e3,
+    0xb1442798f49ffb4a,0x99cd11cfdf41779c,
+    0xdd95317f31c7fa1d,0x40405643d711d583,
+    0x8a7d3eef7f1cfc52,0x482835ea666b2572,
+    0xad1c8eab5ee43b66,0xda3243650005eecf,
+    0xd863b256369d4a40,0x90bed43e40076a82,
+    0x873e4f75e2224e68,0x5a7744a6e804a291,
+    0xa90de3535aaae202,0x711515d0a205cb36,
+    0xd3515c2831559a83,0xd5a5b44ca873e03,
+    0x8412d9991ed58091,0xe858790afe9486c2,
+    0xa5178fff668ae0b6,0x626e974dbe39a872,
+    0xce5d73ff402d98e3,0xfb0a3d212dc8128f,
+    0x80fa687f881c7f8e,0x7ce66634bc9d0b99,
+    0xa139029f6a239f72,0x1c1fffc1ebc44e80,
+    0xc987434744ac874e,0xa327ffb266b56220,
+    0xfbe9141915d7a922,0x4bf1ff9f0062baa8,
+    0x9d71ac8fada6c9b5,0x6f773fc3603db4a9,
+    0xc4ce17b399107c22,0xcb550fb4384d21d3,
+    0xf6019da07f549b2b,0x7e2a53a146606a48,
+    0x99c102844f94e0fb,0x2eda7444cbfc426d,
+    0xc0314325637a1939,0xfa911155fefb5308,
+    0xf03d93eebc589f88,0x793555ab7eba27ca,
+    0x96267c7535b763b5,0x4bc1558b2f3458de,
+    0xbbb01b9283253ca2,0x9eb1aaedfb016f16,
+    0xea9c227723ee8bcb,0x465e15a979c1cadc,
+    0x92a1958a7675175f,0xbfacd89ec191ec9,
+    0xb749faed14125d36,0xcef980ec671f667b,
+    0xe51c79a85916f484,0x82b7e12780e7401a,
+    0x8f31cc0937ae58d2,0xd1b2ecb8b0908810,
+    0xb2fe3f0b8599ef07,0x861fa7e6dcb4aa15,
+    0xdfbdcece67006ac9,0x67a791e093e1d49a,
+    0x8bd6a141006042bd,0xe0c8bb2c5c6d24e0,
+    0xaecc49914078536d,0x58fae9f773886e18,
+    0xda7f5bf590966848,0xaf39a475506a899e,
+    0x888f99797a5e012d,0x6d8406c952429603,
+    0xaab37fd7d8f58178,0xc8e5087ba6d33b83,
+    0xd5605fcdcf32e1d6,0xfb1e4a9a90880a64,
+    0x855c3be0a17fcd26,0x5cf2eea09a55067f,
+    0xa6b34ad8c9dfc06f,0xf42faa48c0ea481e,
+    0xd0601d8efc57b08b,0xf13b94daf124da26,
+    0x823c12795db6ce57,0x76c53d08d6b70858,
+    0xa2cb1717b52481ed,0x54768c4b0c64ca6e,
+    0xcb7ddcdda26da268,0xa9942f5dcf7dfd09,
+    0xfe5d54150b090b02,0xd3f93b35435d7c4c,
+    0x9efa548d26e5a6e1,0xc47bc5014a1a6daf,
+    0xc6b8e9b0709f109a,0x359ab6419ca1091b,
+    0xf867241c8cc6d4c0,0xc30163d203c94b62,
+    0x9b407691d7fc44f8,0x79e0de63425dcf1d,
+    0xc21094364dfb5636,0x985915fc12f542e4,
+    0xf294b943e17a2bc4,0x3e6f5b7b17b2939d,
+    0x979cf3ca6cec5b5a,0xa705992ceecf9c42,
+    0xbd8430bd08277231,0x50c6ff782a838353,
+    0xece53cec4a314ebd,0xa4f8bf5635246428,
+    0x940f4613ae5ed136,0x871b7795e136be99,
+    0xb913179899f68584,0x28e2557b59846e3f,
+    0xe757dd7ec07426e5,0x331aeada2fe589cf,
+    0x9096ea6f3848984f,0x3ff0d2c85def7621,
+    0xb4bca50b065abe63,0xfed077a756b53a9,
+    0xe1ebce4dc7f16dfb,0xd3e8495912c62894,
+    0x8d3360f09cf6e4bd,0x64712dd7abbbd95c,
+    0xb080392cc4349dec,0xbd8d794d96aacfb3,
+    0xdca04777f541c567,0xecf0d7a0fc5583a0,
+    0x89e42caaf9491b60,0xf41686c49db57244,
+    0xac5d37d5b79b6239,0x311c2875c522ced5,
+    0xd77485cb25823ac7,0x7d633293366b828b,
+    0x86a8d39ef77164bc,0xae5dff9c02033197,
+    0xa8530886b54dbdeb,0xd9f57f830283fdfc,
+    0xd267caa862a12d66,0xd072df63c324fd7b,
+    0x8380dea93da4bc60,0x4247cb9e59f71e6d,
+    0xa46116538d0deb78,0x52d9be85f074e608,
+    0xcd795be870516656,0x67902e276c921f8b,
+    0x806bd9714632dff6,0xba1cd8a3db53b6,
+    0xa086cfcd97bf97f3,0x80e8a40eccd228a4,
+    0xc8a883c0fdaf7df0,0x6122cd128006b2cd,
+    0xfad2a4b13d1b5d6c,0x796b805720085f81,
+    0x9cc3a6eec6311a63,0xcbe3303674053bb0,
+    0xc3f490aa77bd60fc,0xbedbfc4411068a9c,
+    0xf4f1b4d515acb93b,0xee92fb5515482d44,
+    0x991711052d8bf3c5,0x751bdd152d4d1c4a,
+    0xbf5cd54678eef0b6,0xd262d45a78a0635d,
+    0xef340a98172aace4,0x86fb897116c87c34,
+    0x9580869f0e7aac0e,0xd45d35e6ae3d4da0,
+    0xbae0a846d2195712,0x8974836059cca109,
+    0xe998d258869facd7,0x2bd1a438703fc94b,
+    0x91ff83775423cc06,0x7b6306a34627ddcf,
+    0xb67f6455292cbf08,0x1a3bc84c17b1d542,
+    0xe41f3d6a7377eeca,0x20caba5f1d9e4a93,
+    0x8e938662882af53e,0x547eb47b7282ee9c,
+    0xb23867fb2a35b28d,0xe99e619a4f23aa43,
+    0xdec681f9f4c31f31,0x6405fa00e2ec94d4,
+    0x8b3c113c38f9f37e,0xde83bc408dd3dd04,
+    0xae0b158b4738705e,0x9624ab50b148d445,
+    0xd98ddaee19068c76,0x3badd624dd9b0957,
+    0x87f8a8d4cfa417c9,0xe54ca5d70a80e5d6,
+    0xa9f6d30a038d1dbc,0x5e9fcf4ccd211f4c,
+    0xd47487cc8470652b,0x7647c3200069671f,
+    0x84c8d4dfd2c63f3b,0x29ecd9f40041e073,
+    0xa5fb0a17c777cf09,0xf468107100525890,
+    0xcf79cc9db955c2cc,0x7182148d4066eeb4,
+    0x81ac1fe293d599bf,0xc6f14cd848405530,
+    0xa21727db38cb002f,0xb8ada00e5a506a7c,
+    0xca9cf1d206fdc03b,0xa6d90811f0e4851c,
+    0xfd442e4688bd304a,0x908f4a166d1da663,
+    0x9e4a9cec15763e2e,0x9a598e4e043287fe,
+    0xc5dd44271ad3cdba,0x40eff1e1853f29fd,
+    0xf7549530e188c128,0xd12bee59e68ef47c,
+    0x9a94dd3e8cf578b9,0x82bb74f8301958ce,
+    0xc13a148e3032d6e7,0xe36a52363c1faf01,
+    0xf18899b1bc3f8ca1,0xdc44e6c3cb279ac1,
+    0x96f5600f15a7b7e5,0x29ab103a5ef8c0b9,
+    0xbcb2b812db11a5de,0x7415d448f6b6f0e7,
+    0xebdf661791d60f56,0x111b495b3464ad21,
+    0x936b9fcebb25c995,0xcab10dd900beec34,
+    0xb84687c269ef3bfb,0x3d5d514f40eea742,
+    0xe65829b3046b0afa,0xcb4a5a3112a5112,
+    0x8ff71a0fe2c2e6dc,0x47f0e785eaba72ab,
+    0xb3f4e093db73a093,0x59ed216765690f56,
+    0xe0f218b8d25088b8,0x306869c13ec3532c,
+    0x8c974f7383725573,0x1e414218c73a13fb,
+    0xafbd2350644eeacf,0xe5d1929ef90898fa,
+    0xdbac6c247d62a583,0xdf45f746b74abf39,
+    0x894bc396ce5da772,0x6b8bba8c328eb783,
+    0xab9eb47c81f5114f,0x66ea92f3f326564,
+    0xd686619ba27255a2,0xc80a537b0efefebd,
+    0x8613fd0145877585,0xbd06742ce95f5f36,
+    0xa798fc4196e952e7,0x2c48113823b73704,
+    0xd17f3b51fca3a7a0,0xf75a15862ca504c5,
+    0x82ef85133de648c4,0x9a984d73dbe722fb,
+    0xa3ab66580d5fdaf5,0xc13e60d0d2e0ebba,
+    0xcc963fee10b7d1b3,0x318df905079926a8,
+    0xffbbcfe994e5c61f,0xfdf17746497f7052,
+    0x9fd561f1fd0f9bd3,0xfeb6ea8bedefa633,
+    0xc7caba6e7c5382c8,0xfe64a52ee96b8fc0,
+    0xf9bd690a1b68637b,0x3dfdce7aa3c673b0,
+    0x9c1661a651213e2d,0x6bea10ca65c084e,
+    0xc31bfa0fe5698db8,0x486e494fcff30a62,
+    0xf3e2f893dec3f126,0x5a89dba3c3efccfa,
+    0x986ddb5c6b3a76b7,0xf89629465a75e01c,
+    0xbe89523386091465,0xf6bbb397f1135823,
+    0xee2ba6c0678b597f,0x746aa07ded582e2c,
+    0x94db483840b717ef,0xa8c2a44eb4571cdc,
+    0xba121a4650e4ddeb,0x92f34d62616ce413,
+    0xe896a0d7e51e1566,0x77b020baf9c81d17,
+    0x915e2486ef32cd60,0xace1474dc1d122e,
+    0xb5b5ada8aaff80b8,0xd819992132456ba,
+    0xe3231912d5bf60e6,0x10e1fff697ed6c69,
+    0x8df5efabc5979c8f,0xca8d3ffa1ef463c1,
+    0xb1736b96b6fd83b3,0xbd308ff8a6b17cb2,
+    0xddd0467c64bce4a0,0xac7cb3f6d05ddbde,
+    0x8aa22c0dbef60ee4,0x6bcdf07a423aa96b,
+    0xad4ab7112eb3929d,0x86c16c98d2c953c6,
+    0xd89d64d57a607744,0xe871c7bf077ba8b7,
+    0x87625f056c7c4a8b,0x11471cd764ad4972,
+    0xa93af6c6c79b5d2d,0xd598e40d3dd89bcf,
+    0xd389b47879823479,0x4aff1d108d4ec2c3,
+    0x843610cb4bf160cb,0xcedf722a585139ba,
+    0xa54394fe1eedb8fe,0xc2974eb4ee658828,
+    0xce947a3da6a9273e,0x733d226229feea32,
+    0x811ccc668829b887,0x806357d5a3f525f,
+    0xa163ff802a3426a8,0xca07c2dcb0cf26f7,
+    0xc9bcff6034c13052,0xfc89b393dd02f0b5,
+    0xfc2c3f3841f17c67,0xbbac2078d443ace2,
+    0x9d9ba7832936edc0,0xd54b944b84aa4c0d,
+    0xc5029163f384a931,0xa9e795e65d4df11,
+    0xf64335bcf065d37d,0x4d4617b5ff4a16d5,
+    0x99ea0196163fa42e,0x504bced1bf8e4e45,
+    0xc06481fb9bcf8d39,0xe45ec2862f71e1d6,
+    0xf07da27a82c37088,0x5d767327bb4e5a4c,
+    0x964e858c91ba2655,0x3a6a07f8d510f86f,
+    0xbbe226efb628afea,0x890489f70a55368b,
+    0xeadab0aba3b2dbe5,0x2b45ac74ccea842e,
+    0x92c8ae6b464fc96f,0x3b0b8bc90012929d,
+    0xb77ada0617e3bbcb,0x9ce6ebb40173744,
+    0xe55990879ddcaabd,0xcc420a6a101d0515,
+    0x8f57fa54c2a9eab6,0x9fa946824a12232d,
+    0xb32df8e9f3546564,0x47939822dc96abf9,
+    0xdff9772470297ebd,0x59787e2b93bc56f7,
+    0x8bfbea76c619ef36,0x57eb4edb3c55b65a,
+    0xaefae51477a06b03,0xede622920b6b23f1,
+    0xdab99e59958885c4,0xe95fab368e45eced,
+    0x88b402f7fd75539b,0x11dbcb0218ebb414,
+    0xaae103b5fcd2a881,0xd652bdc29f26a119,
+    0xd59944a37c0752a2,0x4be76d3346f0495f,
+    0x857fcae62d8493a5,0x6f70a4400c562ddb,
+    0xa6dfbd9fb8e5b88e,0xcb4ccd500f6bb952,
+    0xd097ad07a71f26b2,0x7e2000a41346a7a7,
+    0x825ecc24c873782f,0x8ed400668c0c28c8,
+    0xa2f67f2dfa90563b,0x728900802f0f32fa,
+    0xcbb41ef979346bca,0x4f2b40a03ad2ffb9,
+    0xfea126b7d78186bc,0xe2f610c84987bfa8,
+    0x9f24b832e6b0f436,0xdd9ca7d2df4d7c9,
+    0xc6ede63fa05d3143,0x91503d1c79720dbb,
+    0xf8a95fcf88747d94,0x75a44c6397ce912a,
+    0x9b69dbe1b548ce7c,0xc986afbe3ee11aba,
+    0xc24452da229b021b,0xfbe85badce996168,
+    0xf2d56790ab41c2a2,0xfae27299423fb9c3,
+    0x97c560ba6b0919a5,0xdccd879fc967d41a,
+    0xbdb6b8e905cb600f,0x5400e987bbc1c920,
+    0xed246723473e3813,0x290123e9aab23b68,
+    0x9436c0760c86e30b,0xf9a0b6720aaf6521,
+    0xb94470938fa89bce,0xf808e40e8d5b3e69,
+    0xe7958cb87392c2c2,0xb60b1d1230b20e04,
+    0x90bd77f3483bb9b9,0xb1c6f22b5e6f48c2,
+    0xb4ecd5f01a4aa828,0x1e38aeb6360b1af3,
+    0xe2280b6c20dd5232,0x25c6da63c38de1b0,
+    0x8d590723948a535f,0x579c487e5a38ad0e,
+    0xb0af48ec79ace837,0x2d835a9df0c6d851,
+    0xdcdb1b2798182244,0xf8e431456cf88e65,
+    0x8a08f0f8bf0f156b,0x1b8e9ecb641b58ff,
+    0xac8b2d36eed2dac5,0xe272467e3d222f3f,
+    0xd7adf884aa879177,0x5b0ed81dcc6abb0f,
+    0x86ccbb52ea94baea,0x98e947129fc2b4e9,
+    0xa87fea27a539e9a5,0x3f2398d747b36224,
+    0xd29fe4b18e88640e,0x8eec7f0d19a03aad,
+    0x83a3eeeef9153e89,0x1953cf68300424ac,
+    0xa48ceaaab75a8e2b,0x5fa8c3423c052dd7,
+    0xcdb02555653131b6,0x3792f412cb06794d,
+    0x808e17555f3ebf11,0xe2bbd88bbee40bd0,
+    0xa0b19d2ab70e6ed6,0x5b6aceaeae9d0ec4,
+    0xc8de047564d20a8b,0xf245825a5a445275,
+    0xfb158592be068d2e,0xeed6e2f0f0d56712,
+    0x9ced737bb6c4183d,0x55464dd69685606b,
+    0xc428d05aa4751e4c,0xaa97e14c3c26b886,
+    0xf53304714d9265df,0xd53dd99f4b3066a8,
+    0x993fe2c6d07b7fab,0xe546a8038efe4029,
+    0xbf8fdb78849a5f96,0xde98520472bdd033,
+    0xef73d256a5c0f77c,0x963e66858f6d4440,
+    0x95a8637627989aad,0xdde7001379a44aa8,
+    0xbb127c53b17ec159,0x5560c018580d5d52,
+    0xe9d71b689dde71af,0xaab8f01e6e10b4a6,
+    0x9226712162ab070d,0xcab3961304ca70e8,
+    0xb6b00d69bb55c8d1,0x3d607b97c5fd0d22,
+    0xe45c10c42a2b3b05,0x8cb89a7db77c506a,
+    0x8eb98a7a9a5b04e3,0x77f3608e92adb242,
+    0xb267ed1940f1c61c,0x55f038b237591ed3,
+    0xdf01e85f912e37a3,0x6b6c46dec52f6688,
+    0x8b61313bbabce2c6,0x2323ac4b3b3da015,
+    0xae397d8aa96c1b77,0xabec975e0a0d081a,
+    0xd9c7dced53c72255,0x96e7bd358c904a21,
+    0x881cea14545c7575,0x7e50d64177da2e54,
+    0xaa242499697392d2,0xdde50bd1d5d0b9e9,
+    0xd4ad2dbfc3d07787,0x955e4ec64b44e864,
+    0x84ec3c97da624ab4,0xbd5af13bef0b113e,
+    0xa6274bbdd0fadd61,0xecb1ad8aeacdd58e,
+    0xcfb11ead453994ba,0x67de18eda5814af2,
+    0x81ceb32c4b43fcf4,0x80eacf948770ced7,
+    0xa2425ff75e14fc31,0xa1258379a94d028d,
+    0xcad2f7f5359a3b3e,0x96ee45813a04330,
+    0xfd87b5f28300ca0d,0x8bca9d6e188853fc,
+    0x9e74d1b791e07e48,0x775ea264cf55347e,
+    0xc612062576589dda,0x95364afe032a819e,
+    0xf79687aed3eec551,0x3a83ddbd83f52205,
+    0x9abe14cd44753b52,0xc4926a9672793543,
+    0xc16d9a0095928a27,0x75b7053c0f178294,
+    0xf1c90080baf72cb1,0x5324c68b12dd6339,
+    0x971da05074da7bee,0xd3f6fc16ebca5e04,
+    0xbce5086492111aea,0x88f4bb1ca6bcf585,
+    0xec1e4a7db69561a5,0x2b31e9e3d06c32e6,
+    0x9392ee8e921d5d07,0x3aff322e62439fd0,
+    0xb877aa3236a4b449,0x9befeb9fad487c3,
+    0xe69594bec44de15b,0x4c2ebe687989a9b4,
+    0x901d7cf73ab0acd9,0xf9d37014bf60a11,
+    0xb424dc35095cd80f,0x538484c19ef38c95,
+    0xe12e13424bb40e13,0x2865a5f206b06fba,
+    0x8cbccc096f5088cb,0xf93f87b7442e45d4,
+    0xafebff0bcb24aafe,0xf78f69a51539d749,
+    0xdbe6fecebdedd5be,0xb573440e5a884d1c,
+    0x89705f4136b4a597,0x31680a88f8953031,
+    0xabcc77118461cefc,0xfdc20d2b36ba7c3e,
+    0xd6bf94d5e57a42bc,0x3d32907604691b4d,
+    0x8637bd05af6c69b5,0xa63f9a49c2c1b110,
+    0xa7c5ac471b478423,0xfcf80dc33721d54,
+    0xd1b71758e219652b,0xd3c36113404ea4a9,
+    0x83126e978d4fdf3b,0x645a1cac083126ea,
+    0xa3d70a3d70a3d70a,0x3d70a3d70a3d70a4,
+    0xcccccccccccccccc,0xcccccccccccccccd,
+    0x8000000000000000,0x0,
+    0xa000000000000000,0x0,
+    0xc800000000000000,0x0,
+    0xfa00000000000000,0x0,
+    0x9c40000000000000,0x0,
+    0xc350000000000000,0x0,
+    0xf424000000000000,0x0,
+    0x9896800000000000,0x0,
+    0xbebc200000000000,0x0,
+    0xee6b280000000000,0x0,
+    0x9502f90000000000,0x0,
+    0xba43b74000000000,0x0,
+    0xe8d4a51000000000,0x0,
+    0x9184e72a00000000,0x0,
+    0xb5e620f480000000,0x0,
+    0xe35fa931a0000000,0x0,
+    0x8e1bc9bf04000000,0x0,
+    0xb1a2bc2ec5000000,0x0,
+    0xde0b6b3a76400000,0x0,
+    0x8ac7230489e80000,0x0,
+    0xad78ebc5ac620000,0x0,
+    0xd8d726b7177a8000,0x0,
+    0x878678326eac9000,0x0,
+    0xa968163f0a57b400,0x0,
+    0xd3c21bcecceda100,0x0,
+    0x84595161401484a0,0x0,
+    0xa56fa5b99019a5c8,0x0,
+    0xcecb8f27f4200f3a,0x0,
+    0x813f3978f8940984,0x4000000000000000,
+    0xa18f07d736b90be5,0x5000000000000000,
+    0xc9f2c9cd04674ede,0xa400000000000000,
+    0xfc6f7c4045812296,0x4d00000000000000,
+    0x9dc5ada82b70b59d,0xf020000000000000,
+    0xc5371912364ce305,0x6c28000000000000,
+    0xf684df56c3e01bc6,0xc732000000000000,
+    0x9a130b963a6c115c,0x3c7f400000000000,
+    0xc097ce7bc90715b3,0x4b9f100000000000,
+    0xf0bdc21abb48db20,0x1e86d40000000000,
+    0x96769950b50d88f4,0x1314448000000000,
+    0xbc143fa4e250eb31,0x17d955a000000000,
+    0xeb194f8e1ae525fd,0x5dcfab0800000000,
+    0x92efd1b8d0cf37be,0x5aa1cae500000000,
+    0xb7abc627050305ad,0xf14a3d9e40000000,
+    0xe596b7b0c643c719,0x6d9ccd05d0000000,
+    0x8f7e32ce7bea5c6f,0xe4820023a2000000,
+    0xb35dbf821ae4f38b,0xdda2802c8a800000,
+    0xe0352f62a19e306e,0xd50b2037ad200000,
+    0x8c213d9da502de45,0x4526f422cc340000,
+    0xaf298d050e4395d6,0x9670b12b7f410000,
+    0xdaf3f04651d47b4c,0x3c0cdd765f114000,
+    0x88d8762bf324cd0f,0xa5880a69fb6ac800,
+    0xab0e93b6efee0053,0x8eea0d047a457a00,
+    0xd5d238a4abe98068,0x72a4904598d6d880,
+    0x85a36366eb71f041,0x47a6da2b7f864750,
+    0xa70c3c40a64e6c51,0x999090b65f67d924,
+    0xd0cf4b50cfe20765,0xfff4b4e3f741cf6d,
+    0x82818f1281ed449f,0xbff8f10e7a8921a4,
+    0xa321f2d7226895c7,0xaff72d52192b6a0d,
+    0xcbea6f8ceb02bb39,0x9bf4f8a69f764490,
+    0xfee50b7025c36a08,0x2f236d04753d5b4,
+    0x9f4f2726179a2245,0x1d762422c946590,
+    0xc722f0ef9d80aad6,0x424d3ad2b7b97ef5,
+    0xf8ebad2b84e0d58b,0xd2e0898765a7deb2,
+    0x9b934c3b330c8577,0x63cc55f49f88eb2f,
+    0xc2781f49ffcfa6d5,0x3cbf6b71c76b25fb,
+    0xf316271c7fc3908a,0x8bef464e3945ef7a,
+    0x97edd871cfda3a56,0x97758bf0e3cbb5ac,
+    0xbde94e8e43d0c8ec,0x3d52eeed1cbea317,
+    0xed63a231d4c4fb27,0x4ca7aaa863ee4bdd,
+    0x945e455f24fb1cf8,0x8fe8caa93e74ef6a,
+    0xb975d6b6ee39e436,0xb3e2fd538e122b44,
+    0xe7d34c64a9c85d44,0x60dbbca87196b616,
+    0x90e40fbeea1d3a4a,0xbc8955e946fe31cd,
+    0xb51d13aea4a488dd,0x6babab6398bdbe41,
+    0xe264589a4dcdab14,0xc696963c7eed2dd1,
+    0x8d7eb76070a08aec,0xfc1e1de5cf543ca2,
+    0xb0de65388cc8ada8,0x3b25a55f43294bcb,
+    0xdd15fe86affad912,0x49ef0eb713f39ebe,
+    0x8a2dbf142dfcc7ab,0x6e3569326c784337,
+    0xacb92ed9397bf996,0x49c2c37f07965404,
+    0xd7e77a8f87daf7fb,0xdc33745ec97be906,
+    0x86f0ac99b4e8dafd,0x69a028bb3ded71a3,
+    0xa8acd7c0222311bc,0xc40832ea0d68ce0c,
+    0xd2d80db02aabd62b,0xf50a3fa490c30190,
+    0x83c7088e1aab65db,0x792667c6da79e0fa,
+    0xa4b8cab1a1563f52,0x577001b891185938,
+    0xcde6fd5e09abcf26,0xed4c0226b55e6f86,
+    0x80b05e5ac60b6178,0x544f8158315b05b4,
+    0xa0dc75f1778e39d6,0x696361ae3db1c721,
+    0xc913936dd571c84c,0x3bc3a19cd1e38e9,
+    0xfb5878494ace3a5f,0x4ab48a04065c723,
+    0x9d174b2dcec0e47b,0x62eb0d64283f9c76,
+    0xc45d1df942711d9a,0x3ba5d0bd324f8394,
+    0xf5746577930d6500,0xca8f44ec7ee36479,
+    0x9968bf6abbe85f20,0x7e998b13cf4e1ecb,
+    0xbfc2ef456ae276e8,0x9e3fedd8c321a67e,
+    0xefb3ab16c59b14a2,0xc5cfe94ef3ea101e,
+    0x95d04aee3b80ece5,0xbba1f1d158724a12,
+    0xbb445da9ca61281f,0x2a8a6e45ae8edc97,
+    0xea1575143cf97226,0xf52d09d71a3293bd,
+    0x924d692ca61be758,0x593c2626705f9c56,
+    0xb6e0c377cfa2e12e,0x6f8b2fb00c77836c,
+    0xe498f455c38b997a,0xb6dfb9c0f956447,
+    0x8edf98b59a373fec,0x4724bd4189bd5eac,
+    0xb2977ee300c50fe7,0x58edec91ec2cb657,
+    0xdf3d5e9bc0f653e1,0x2f2967b66737e3ed,
+    0x8b865b215899f46c,0xbd79e0d20082ee74,
+    0xae67f1e9aec07187,0xecd8590680a3aa11,
+    0xda01ee641a708de9,0xe80e6f4820cc9495,
+    0x884134fe908658b2,0x3109058d147fdcdd,
+    0xaa51823e34a7eede,0xbd4b46f0599fd415,
+    0xd4e5e2cdc1d1ea96,0x6c9e18ac7007c91a,
+    0x850fadc09923329e,0x3e2cf6bc604ddb0,
+    0xa6539930bf6bff45,0x84db8346b786151c,
+    0xcfe87f7cef46ff16,0xe612641865679a63,
+    0x81f14fae158c5f6e,0x4fcb7e8f3f60c07e,
+    0xa26da3999aef7749,0xe3be5e330f38f09d,
+    0xcb090c8001ab551c,0x5cadf5bfd3072cc5,
+    0xfdcb4fa002162a63,0x73d9732fc7c8f7f6,
+    0x9e9f11c4014dda7e,0x2867e7fddcdd9afa,
+    0xc646d63501a1511d,0xb281e1fd541501b8,
+    0xf7d88bc24209a565,0x1f225a7ca91a4226,
+    0x9ae757596946075f,0x3375788de9b06958,
+    0xc1a12d2fc3978937,0x52d6b1641c83ae,
+    0xf209787bb47d6b84,0xc0678c5dbd23a49a,
+    0x9745eb4d50ce6332,0xf840b7ba963646e0,
+    0xbd176620a501fbff,0xb650e5a93bc3d898,
+    0xec5d3fa8ce427aff,0xa3e51f138ab4cebe,
+    0x93ba47c980e98cdf,0xc66f336c36b10137,
+    0xb8a8d9bbe123f017,0xb80b0047445d4184,
+    0xe6d3102ad96cec1d,0xa60dc059157491e5,
+    0x9043ea1ac7e41392,0x87c89837ad68db2f,
+    0xb454e4a179dd1877,0x29babe4598c311fb,
+    0xe16a1dc9d8545e94,0xf4296dd6fef3d67a,
+    0x8ce2529e2734bb1d,0x1899e4a65f58660c,
+    0xb01ae745b101e9e4,0x5ec05dcff72e7f8f,
+    0xdc21a1171d42645d,0x76707543f4fa1f73,
+    0x899504ae72497eba,0x6a06494a791c53a8,
+    0xabfa45da0edbde69,0x487db9d17636892,
+    0xd6f8d7509292d603,0x45a9d2845d3c42b6,
+    0x865b86925b9bc5c2,0xb8a2392ba45a9b2,
+    0xa7f26836f282b732,0x8e6cac7768d7141e,
+    0xd1ef0244af2364ff,0x3207d795430cd926,
+    0x8335616aed761f1f,0x7f44e6bd49e807b8,
+    0xa402b9c5a8d3a6e7,0x5f16206c9c6209a6,
+    0xcd036837130890a1,0x36dba887c37a8c0f,
+    0x802221226be55a64,0xc2494954da2c9789,
+    0xa02aa96b06deb0fd,0xf2db9baa10b7bd6c,
+    0xc83553c5c8965d3d,0x6f92829494e5acc7,
+    0xfa42a8b73abbf48c,0xcb772339ba1f17f9,
+    0x9c69a97284b578d7,0xff2a760414536efb,
+    0xc38413cf25e2d70d,0xfef5138519684aba,
+    0xf46518c2ef5b8cd1,0x7eb258665fc25d69,
+    0x98bf2f79d5993802,0xef2f773ffbd97a61,
+    0xbeeefb584aff8603,0xaafb550ffacfd8fa,
+    0xeeaaba2e5dbf6784,0x95ba2a53f983cf38,
+    0x952ab45cfa97a0b2,0xdd945a747bf26183,
+    0xba756174393d88df,0x94f971119aeef9e4,
+    0xe912b9d1478ceb17,0x7a37cd5601aab85d,
+    0x91abb422ccb812ee,0xac62e055c10ab33a,
+    0xb616a12b7fe617aa,0x577b986b314d6009,
+    0xe39c49765fdf9d94,0xed5a7e85fda0b80b,
+    0x8e41ade9fbebc27d,0x14588f13be847307,
+    0xb1d219647ae6b31c,0x596eb2d8ae258fc8,
+    0xde469fbd99a05fe3,0x6fca5f8ed9aef3bb,
+    0x8aec23d680043bee,0x25de7bb9480d5854,
+    0xada72ccc20054ae9,0xaf561aa79a10ae6a,
+    0xd910f7ff28069da4,0x1b2ba1518094da04,
+    0x87aa9aff79042286,0x90fb44d2f05d0842,
+    0xa99541bf57452b28,0x353a1607ac744a53,
+    0xd3fa922f2d1675f2,0x42889b8997915ce8,
+    0x847c9b5d7c2e09b7,0x69956135febada11,
+    0xa59bc234db398c25,0x43fab9837e699095,
+    0xcf02b2c21207ef2e,0x94f967e45e03f4bb,
+    0x8161afb94b44f57d,0x1d1be0eebac278f5,
+    0xa1ba1ba79e1632dc,0x6462d92a69731732,
+    0xca28a291859bbf93,0x7d7b8f7503cfdcfe,
+    0xfcb2cb35e702af78,0x5cda735244c3d43e,
+    0x9defbf01b061adab,0x3a0888136afa64a7,
+    0xc56baec21c7a1916,0x88aaa1845b8fdd0,
+    0xf6c69a72a3989f5b,0x8aad549e57273d45,
+    0x9a3c2087a63f6399,0x36ac54e2f678864b,
+    0xc0cb28a98fcf3c7f,0x84576a1bb416a7dd,
+    0xf0fdf2d3f3c30b9f,0x656d44a2a11c51d5,
+    0x969eb7c47859e743,0x9f644ae5a4b1b325,
+    0xbc4665b596706114,0x873d5d9f0dde1fee,
+    0xeb57ff22fc0c7959,0xa90cb506d155a7ea,
+    0x9316ff75dd87cbd8,0x9a7f12442d588f2,
+    0xb7dcbf5354e9bece,0xc11ed6d538aeb2f,
+    0xe5d3ef282a242e81,0x8f1668c8a86da5fa,
+    0x8fa475791a569d10,0xf96e017d694487bc,
+    0xb38d92d760ec4455,0x37c981dcc395a9ac,
+    0xe070f78d3927556a,0x85bbe253f47b1417,
+    0x8c469ab843b89562,0x93956d7478ccec8e,
+    0xaf58416654a6babb,0x387ac8d1970027b2,
+    0xdb2e51bfe9d0696a,0x6997b05fcc0319e,
+    0x88fcf317f22241e2,0x441fece3bdf81f03,
+    0xab3c2fddeeaad25a,0xd527e81cad7626c3,
+    0xd60b3bd56a5586f1,0x8a71e223d8d3b074,
+    0x85c7056562757456,0xf6872d5667844e49,
+    0xa738c6bebb12d16c,0xb428f8ac016561db,
+    0xd106f86e69d785c7,0xe13336d701beba52,
+    0x82a45b450226b39c,0xecc0024661173473,
+    0xa34d721642b06084,0x27f002d7f95d0190,
+    0xcc20ce9bd35c78a5,0x31ec038df7b441f4,
+    0xff290242c83396ce,0x7e67047175a15271,
+    0x9f79a169bd203e41,0xf0062c6e984d386,
+    0xc75809c42c684dd1,0x52c07b78a3e60868,
+    0xf92e0c3537826145,0xa7709a56ccdf8a82,
+    0x9bbcc7a142b17ccb,0x88a66076400bb691,
+    0xc2abf989935ddbfe,0x6acff893d00ea435,
+    0xf356f7ebf83552fe,0x583f6b8c4124d43,
+    0x98165af37b2153de,0xc3727a337a8b704a,
+    0xbe1bf1b059e9a8d6,0x744f18c0592e4c5c,
+    0xeda2ee1c7064130c,0x1162def06f79df73,
+    0x9485d4d1c63e8be7,0x8addcb5645ac2ba8,
+    0xb9a74a0637ce2ee1,0x6d953e2bd7173692,
+    0xe8111c87c5c1ba99,0xc8fa8db6ccdd0437,
+    0x910ab1d4db9914a0,0x1d9c9892400a22a2,
+    0xb54d5e4a127f59c8,0x2503beb6d00cab4b,
+    0xe2a0b5dc971f303a,0x2e44ae64840fd61d,
+    0x8da471a9de737e24,0x5ceaecfed289e5d2,
+    0xb10d8e1456105dad,0x7425a83e872c5f47,
+    0xdd50f1996b947518,0xd12f124e28f77719,
+    0x8a5296ffe33cc92f,0x82bd6b70d99aaa6f,
+    0xace73cbfdc0bfb7b,0x636cc64d1001550b,
+    0xd8210befd30efa5a,0x3c47f7e05401aa4e,
+    0x8714a775e3e95c78,0x65acfaec34810a71,
+    0xa8d9d1535ce3b396,0x7f1839a741a14d0d,
+    0xd31045a8341ca07c,0x1ede48111209a050,
+    0x83ea2b892091e44d,0x934aed0aab460432,
+    0xa4e4b66b68b65d60,0xf81da84d5617853f,
+    0xce1de40642e3f4b9,0x36251260ab9d668e,
+    0x80d2ae83e9ce78f3,0xc1d72b7c6b426019,
+    0xa1075a24e4421730,0xb24cf65b8612f81f,
+    0xc94930ae1d529cfc,0xdee033f26797b627,
+    0xfb9b7cd9a4a7443c,0x169840ef017da3b1,
+    0x9d412e0806e88aa5,0x8e1f289560ee864e,
+    0xc491798a08a2ad4e,0xf1a6f2bab92a27e2,
+    0xf5b5d7ec8acb58a2,0xae10af696774b1db,
+    0x9991a6f3d6bf1765,0xacca6da1e0a8ef29,
+    0xbff610b0cc6edd3f,0x17fd090a58d32af3,
+    0xeff394dcff8a948e,0xddfc4b4cef07f5b0,
+    0x95f83d0a1fb69cd9,0x4abdaf101564f98e,
+    0xbb764c4ca7a4440f,0x9d6d1ad41abe37f1,
+    0xea53df5fd18d5513,0x84c86189216dc5ed,
+    0x92746b9be2f8552c,0x32fd3cf5b4e49bb4,
+    0xb7118682dbb66a77,0x3fbc8c33221dc2a1,
+    0xe4d5e82392a40515,0xfabaf3feaa5334a,
+    0x8f05b1163ba6832d,0x29cb4d87f2a7400e,
+    0xb2c71d5bca9023f8,0x743e20e9ef511012,
+    0xdf78e4b2bd342cf6,0x914da9246b255416,
+    0x8bab8eefb6409c1a,0x1ad089b6c2f7548e,
+    0xae9672aba3d0c320,0xa184ac2473b529b1,
+    0xda3c0f568cc4f3e8,0xc9e5d72d90a2741e,
+    0x8865899617fb1871,0x7e2fa67c7a658892,
+    0xaa7eebfb9df9de8d,0xddbb901b98feeab7,
+    0xd51ea6fa85785631,0x552a74227f3ea565,
+    0x8533285c936b35de,0xd53a88958f87275f,
+    0xa67ff273b8460356,0x8a892abaf368f137,
+    0xd01fef10a657842c,0x2d2b7569b0432d85,
+    0x8213f56a67f6b29b,0x9c3b29620e29fc73,
+    0xa298f2c501f45f42,0x8349f3ba91b47b8f,
+    0xcb3f2f7642717713,0x241c70a936219a73,
+    0xfe0efb53d30dd4d7,0xed238cd383aa0110,
+    0x9ec95d1463e8a506,0xf4363804324a40aa,
+    0xc67bb4597ce2ce48,0xb143c6053edcd0d5,
+    0xf81aa16fdc1b81da,0xdd94b7868e94050a,
+    0x9b10a4e5e9913128,0xca7cf2b4191c8326,
+    0xc1d4ce1f63f57d72,0xfd1c2f611f63a3f0,
+    0xf24a01a73cf2dccf,0xbc633b39673c8cec,
+    0x976e41088617ca01,0xd5be0503e085d813,
+    0xbd49d14aa79dbc82,0x4b2d8644d8a74e18,
+    0xec9c459d51852ba2,0xddf8e7d60ed1219e,
+    0x93e1ab8252f33b45,0xcabb90e5c942b503,
+    0xb8da1662e7b00a17,0x3d6a751f3b936243,
+    0xe7109bfba19c0c9d,0xcc512670a783ad4,
+    0x906a617d450187e2,0x27fb2b80668b24c5,
+    0xb484f9dc9641e9da,0xb1f9f660802dedf6,
+    0xe1a63853bbd26451,0x5e7873f8a0396973,
+    0x8d07e33455637eb2,0xdb0b487b6423e1e8,
+    0xb049dc016abc5e5f,0x91ce1a9a3d2cda62,
+    0xdc5c5301c56b75f7,0x7641a140cc7810fb,
+    0x89b9b3e11b6329ba,0xa9e904c87fcb0a9d,
+    0xac2820d9623bf429,0x546345fa9fbdcd44,
+    0xd732290fbacaf133,0xa97c177947ad4095,
+    0x867f59a9d4bed6c0,0x49ed8eabcccc485d,
+    0xa81f301449ee8c70,0x5c68f256bfff5a74,
+    0xd226fc195c6a2f8c,0x73832eec6fff3111,
+    0x83585d8fd9c25db7,0xc831fd53c5ff7eab,
+    0xa42e74f3d032f525,0xba3e7ca8b77f5e55,
+    0xcd3a1230c43fb26f,0x28ce1bd2e55f35eb,
+    0x80444b5e7aa7cf85,0x7980d163cf5b81b3,
+    0xa0555e361951c366,0xd7e105bcc332621f,
+    0xc86ab5c39fa63440,0x8dd9472bf3fefaa7,
+    0xfa856334878fc150,0xb14f98f6f0feb951,
+    0x9c935e00d4b9d8d2,0x6ed1bf9a569f33d3,
+    0xc3b8358109e84f07,0xa862f80ec4700c8,
+    0xf4a642e14c6262c8,0xcd27bb612758c0fa,
+    0x98e7e9cccfbd7dbd,0x8038d51cb897789c,
+    0xbf21e44003acdd2c,0xe0470a63e6bd56c3,
+    0xeeea5d5004981478,0x1858ccfce06cac74,
+    0x95527a5202df0ccb,0xf37801e0c43ebc8,
+    0xbaa718e68396cffd,0xd30560258f54e6ba,
+    0xe950df20247c83fd,0x47c6b82ef32a2069,
+    0x91d28b7416cdd27e,0x4cdc331d57fa5441,
+    0xb6472e511c81471d,0xe0133fe4adf8e952,
+    0xe3d8f9e563a198e5,0x58180fddd97723a6,
+    0x8e679c2f5e44ff8f,0x570f09eaa7ea7648,};
 };
 
 template <class unused>
-const uint64_t powers_template<unused>::power_of_five_128[number_of_entries] = {
-        0xeef453d6923bd65a,0x113faa2906a13b3f,
-        0x9558b4661b6565f8,0x4ac7ca59a424c507,
-        0xbaaee17fa23ebf76,0x5d79bcf00d2df649,
-        0xe95a99df8ace6f53,0xf4d82c2c107973dc,
-        0x91d8a02bb6c10594,0x79071b9b8a4be869,
-        0xb64ec836a47146f9,0x9748e2826cdee284,
-        0xe3e27a444d8d98b7,0xfd1b1b2308169b25,
-        0x8e6d8c6ab0787f72,0xfe30f0f5e50e20f7,
-        0xb208ef855c969f4f,0xbdbd2d335e51a935,
-        0xde8b2b66b3bc4723,0xad2c788035e61382,
-        0x8b16fb203055ac76,0x4c3bcb5021afcc31,
-        0xaddcb9e83c6b1793,0xdf4abe242a1bbf3d,
-        0xd953e8624b85dd78,0xd71d6dad34a2af0d,
-        0x87d4713d6f33aa6b,0x8672648c40e5ad68,
-        0xa9c98d8ccb009506,0x680efdaf511f18c2,
-        0xd43bf0effdc0ba48,0x212bd1b2566def2,
-        0x84a57695fe98746d,0x14bb630f7604b57,
-        0xa5ced43b7e3e9188,0x419ea3bd35385e2d,
-        0xcf42894a5dce35ea,0x52064cac828675b9,
-        0x818995ce7aa0e1b2,0x7343efebd1940993,
-        0xa1ebfb4219491a1f,0x1014ebe6c5f90bf8,
-        0xca66fa129f9b60a6,0xd41a26e077774ef6,
-        0xfd00b897478238d0,0x8920b098955522b4,
-        0x9e20735e8cb16382,0x55b46e5f5d5535b0,
-        0xc5a890362fddbc62,0xeb2189f734aa831d,
-        0xf712b443bbd52b7b,0xa5e9ec7501d523e4,
-        0x9a6bb0aa55653b2d,0x47b233c92125366e,
-        0xc1069cd4eabe89f8,0x999ec0bb696e840a,
-        0xf148440a256e2c76,0xc00670ea43ca250d,
-        0x96cd2a865764dbca,0x380406926a5e5728,
-        0xbc807527ed3e12bc,0xc605083704f5ecf2,
-        0xeba09271e88d976b,0xf7864a44c633682e,
-        0x93445b8731587ea3,0x7ab3ee6afbe0211d,
-        0xb8157268fdae9e4c,0x5960ea05bad82964,
-        0xe61acf033d1a45df,0x6fb92487298e33bd,
-        0x8fd0c16206306bab,0xa5d3b6d479f8e056,
-        0xb3c4f1ba87bc8696,0x8f48a4899877186c,
-        0xe0b62e2929aba83c,0x331acdabfe94de87,
-        0x8c71dcd9ba0b4925,0x9ff0c08b7f1d0b14,
-        0xaf8e5410288e1b6f,0x7ecf0ae5ee44dd9,
-        0xdb71e91432b1a24a,0xc9e82cd9f69d6150,
-        0x892731ac9faf056e,0xbe311c083a225cd2,
-        0xab70fe17c79ac6ca,0x6dbd630a48aaf406,
-        0xd64d3d9db981787d,0x92cbbccdad5b108,
-        0x85f0468293f0eb4e,0x25bbf56008c58ea5,
-        0xa76c582338ed2621,0xaf2af2b80af6f24e,
-        0xd1476e2c07286faa,0x1af5af660db4aee1,
-        0x82cca4db847945ca,0x50d98d9fc890ed4d,
-        0xa37fce126597973c,0xe50ff107bab528a0,
-        0xcc5fc196fefd7d0c,0x1e53ed49a96272c8,
-        0xff77b1fcbebcdc4f,0x25e8e89c13bb0f7a,
-        0x9faacf3df73609b1,0x77b191618c54e9ac,
-        0xc795830d75038c1d,0xd59df5b9ef6a2417,
-        0xf97ae3d0d2446f25,0x4b0573286b44ad1d,
-        0x9becce62836ac577,0x4ee367f9430aec32,
-        0xc2e801fb244576d5,0x229c41f793cda73f,
-        0xf3a20279ed56d48a,0x6b43527578c1110f,
-        0x9845418c345644d6,0x830a13896b78aaa9,
-        0xbe5691ef416bd60c,0x23cc986bc656d553,
-        0xedec366b11c6cb8f,0x2cbfbe86b7ec8aa8,
-        0x94b3a202eb1c3f39,0x7bf7d71432f3d6a9,
-        0xb9e08a83a5e34f07,0xdaf5ccd93fb0cc53,
-        0xe858ad248f5c22c9,0xd1b3400f8f9cff68,
-        0x91376c36d99995be,0x23100809b9c21fa1,
-        0xb58547448ffffb2d,0xabd40a0c2832a78a,
-        0xe2e69915b3fff9f9,0x16c90c8f323f516c,
-        0x8dd01fad907ffc3b,0xae3da7d97f6792e3,
-        0xb1442798f49ffb4a,0x99cd11cfdf41779c,
-        0xdd95317f31c7fa1d,0x40405643d711d583,
-        0x8a7d3eef7f1cfc52,0x482835ea666b2572,
-        0xad1c8eab5ee43b66,0xda3243650005eecf,
-        0xd863b256369d4a40,0x90bed43e40076a82,
-        0x873e4f75e2224e68,0x5a7744a6e804a291,
-        0xa90de3535aaae202,0x711515d0a205cb36,
-        0xd3515c2831559a83,0xd5a5b44ca873e03,
-        0x8412d9991ed58091,0xe858790afe9486c2,
-        0xa5178fff668ae0b6,0x626e974dbe39a872,
-        0xce5d73ff402d98e3,0xfb0a3d212dc8128f,
-        0x80fa687f881c7f8e,0x7ce66634bc9d0b99,
-        0xa139029f6a239f72,0x1c1fffc1ebc44e80,
-        0xc987434744ac874e,0xa327ffb266b56220,
-        0xfbe9141915d7a922,0x4bf1ff9f0062baa8,
-        0x9d71ac8fada6c9b5,0x6f773fc3603db4a9,
-        0xc4ce17b399107c22,0xcb550fb4384d21d3,
-        0xf6019da07f549b2b,0x7e2a53a146606a48,
-        0x99c102844f94e0fb,0x2eda7444cbfc426d,
-        0xc0314325637a1939,0xfa911155fefb5308,
-        0xf03d93eebc589f88,0x793555ab7eba27ca,
-        0x96267c7535b763b5,0x4bc1558b2f3458de,
-        0xbbb01b9283253ca2,0x9eb1aaedfb016f16,
-        0xea9c227723ee8bcb,0x465e15a979c1cadc,
-        0x92a1958a7675175f,0xbfacd89ec191ec9,
-        0xb749faed14125d36,0xcef980ec671f667b,
-        0xe51c79a85916f484,0x82b7e12780e7401a,
-        0x8f31cc0937ae58d2,0xd1b2ecb8b0908810,
-        0xb2fe3f0b8599ef07,0x861fa7e6dcb4aa15,
-        0xdfbdcece67006ac9,0x67a791e093e1d49a,
-        0x8bd6a141006042bd,0xe0c8bb2c5c6d24e0,
-        0xaecc49914078536d,0x58fae9f773886e18,
-        0xda7f5bf590966848,0xaf39a475506a899e,
-        0x888f99797a5e012d,0x6d8406c952429603,
-        0xaab37fd7d8f58178,0xc8e5087ba6d33b83,
-        0xd5605fcdcf32e1d6,0xfb1e4a9a90880a64,
-        0x855c3be0a17fcd26,0x5cf2eea09a55067f,
-        0xa6b34ad8c9dfc06f,0xf42faa48c0ea481e,
-        0xd0601d8efc57b08b,0xf13b94daf124da26,
-        0x823c12795db6ce57,0x76c53d08d6b70858,
-        0xa2cb1717b52481ed,0x54768c4b0c64ca6e,
-        0xcb7ddcdda26da268,0xa9942f5dcf7dfd09,
-        0xfe5d54150b090b02,0xd3f93b35435d7c4c,
-        0x9efa548d26e5a6e1,0xc47bc5014a1a6daf,
-        0xc6b8e9b0709f109a,0x359ab6419ca1091b,
-        0xf867241c8cc6d4c0,0xc30163d203c94b62,
-        0x9b407691d7fc44f8,0x79e0de63425dcf1d,
-        0xc21094364dfb5636,0x985915fc12f542e4,
-        0xf294b943e17a2bc4,0x3e6f5b7b17b2939d,
-        0x979cf3ca6cec5b5a,0xa705992ceecf9c42,
-        0xbd8430bd08277231,0x50c6ff782a838353,
-        0xece53cec4a314ebd,0xa4f8bf5635246428,
-        0x940f4613ae5ed136,0x871b7795e136be99,
-        0xb913179899f68584,0x28e2557b59846e3f,
-        0xe757dd7ec07426e5,0x331aeada2fe589cf,
-        0x9096ea6f3848984f,0x3ff0d2c85def7621,
-        0xb4bca50b065abe63,0xfed077a756b53a9,
-        0xe1ebce4dc7f16dfb,0xd3e8495912c62894,
-        0x8d3360f09cf6e4bd,0x64712dd7abbbd95c,
-        0xb080392cc4349dec,0xbd8d794d96aacfb3,
-        0xdca04777f541c567,0xecf0d7a0fc5583a0,
-        0x89e42caaf9491b60,0xf41686c49db57244,
-        0xac5d37d5b79b6239,0x311c2875c522ced5,
-        0xd77485cb25823ac7,0x7d633293366b828b,
-        0x86a8d39ef77164bc,0xae5dff9c02033197,
-        0xa8530886b54dbdeb,0xd9f57f830283fdfc,
-        0xd267caa862a12d66,0xd072df63c324fd7b,
-        0x8380dea93da4bc60,0x4247cb9e59f71e6d,
-        0xa46116538d0deb78,0x52d9be85f074e608,
-        0xcd795be870516656,0x67902e276c921f8b,
-        0x806bd9714632dff6,0xba1cd8a3db53b6,
-        0xa086cfcd97bf97f3,0x80e8a40eccd228a4,
-        0xc8a883c0fdaf7df0,0x6122cd128006b2cd,
-        0xfad2a4b13d1b5d6c,0x796b805720085f81,
-        0x9cc3a6eec6311a63,0xcbe3303674053bb0,
-        0xc3f490aa77bd60fc,0xbedbfc4411068a9c,
-        0xf4f1b4d515acb93b,0xee92fb5515482d44,
-        0x991711052d8bf3c5,0x751bdd152d4d1c4a,
-        0xbf5cd54678eef0b6,0xd262d45a78a0635d,
-        0xef340a98172aace4,0x86fb897116c87c34,
-        0x9580869f0e7aac0e,0xd45d35e6ae3d4da0,
-        0xbae0a846d2195712,0x8974836059cca109,
-        0xe998d258869facd7,0x2bd1a438703fc94b,
-        0x91ff83775423cc06,0x7b6306a34627ddcf,
-        0xb67f6455292cbf08,0x1a3bc84c17b1d542,
-        0xe41f3d6a7377eeca,0x20caba5f1d9e4a93,
-        0x8e938662882af53e,0x547eb47b7282ee9c,
-        0xb23867fb2a35b28d,0xe99e619a4f23aa43,
-        0xdec681f9f4c31f31,0x6405fa00e2ec94d4,
-        0x8b3c113c38f9f37e,0xde83bc408dd3dd04,
-        0xae0b158b4738705e,0x9624ab50b148d445,
-        0xd98ddaee19068c76,0x3badd624dd9b0957,
-        0x87f8a8d4cfa417c9,0xe54ca5d70a80e5d6,
-        0xa9f6d30a038d1dbc,0x5e9fcf4ccd211f4c,
-        0xd47487cc8470652b,0x7647c3200069671f,
-        0x84c8d4dfd2c63f3b,0x29ecd9f40041e073,
-        0xa5fb0a17c777cf09,0xf468107100525890,
-        0xcf79cc9db955c2cc,0x7182148d4066eeb4,
-        0x81ac1fe293d599bf,0xc6f14cd848405530,
-        0xa21727db38cb002f,0xb8ada00e5a506a7c,
-        0xca9cf1d206fdc03b,0xa6d90811f0e4851c,
-        0xfd442e4688bd304a,0x908f4a166d1da663,
-        0x9e4a9cec15763e2e,0x9a598e4e043287fe,
-        0xc5dd44271ad3cdba,0x40eff1e1853f29fd,
-        0xf7549530e188c128,0xd12bee59e68ef47c,
-        0x9a94dd3e8cf578b9,0x82bb74f8301958ce,
-        0xc13a148e3032d6e7,0xe36a52363c1faf01,
-        0xf18899b1bc3f8ca1,0xdc44e6c3cb279ac1,
-        0x96f5600f15a7b7e5,0x29ab103a5ef8c0b9,
-        0xbcb2b812db11a5de,0x7415d448f6b6f0e7,
-        0xebdf661791d60f56,0x111b495b3464ad21,
-        0x936b9fcebb25c995,0xcab10dd900beec34,
-        0xb84687c269ef3bfb,0x3d5d514f40eea742,
-        0xe65829b3046b0afa,0xcb4a5a3112a5112,
-        0x8ff71a0fe2c2e6dc,0x47f0e785eaba72ab,
-        0xb3f4e093db73a093,0x59ed216765690f56,
-        0xe0f218b8d25088b8,0x306869c13ec3532c,
-        0x8c974f7383725573,0x1e414218c73a13fb,
-        0xafbd2350644eeacf,0xe5d1929ef90898fa,
-        0xdbac6c247d62a583,0xdf45f746b74abf39,
-        0x894bc396ce5da772,0x6b8bba8c328eb783,
-        0xab9eb47c81f5114f,0x66ea92f3f326564,
-        0xd686619ba27255a2,0xc80a537b0efefebd,
-        0x8613fd0145877585,0xbd06742ce95f5f36,
-        0xa798fc4196e952e7,0x2c48113823b73704,
-        0xd17f3b51fca3a7a0,0xf75a15862ca504c5,
-        0x82ef85133de648c4,0x9a984d73dbe722fb,
-        0xa3ab66580d5fdaf5,0xc13e60d0d2e0ebba,
-        0xcc963fee10b7d1b3,0x318df905079926a8,
-        0xffbbcfe994e5c61f,0xfdf17746497f7052,
-        0x9fd561f1fd0f9bd3,0xfeb6ea8bedefa633,
-        0xc7caba6e7c5382c8,0xfe64a52ee96b8fc0,
-        0xf9bd690a1b68637b,0x3dfdce7aa3c673b0,
-        0x9c1661a651213e2d,0x6bea10ca65c084e,
-        0xc31bfa0fe5698db8,0x486e494fcff30a62,
-        0xf3e2f893dec3f126,0x5a89dba3c3efccfa,
-        0x986ddb5c6b3a76b7,0xf89629465a75e01c,
-        0xbe89523386091465,0xf6bbb397f1135823,
-        0xee2ba6c0678b597f,0x746aa07ded582e2c,
-        0x94db483840b717ef,0xa8c2a44eb4571cdc,
-        0xba121a4650e4ddeb,0x92f34d62616ce413,
-        0xe896a0d7e51e1566,0x77b020baf9c81d17,
-        0x915e2486ef32cd60,0xace1474dc1d122e,
-        0xb5b5ada8aaff80b8,0xd819992132456ba,
-        0xe3231912d5bf60e6,0x10e1fff697ed6c69,
-        0x8df5efabc5979c8f,0xca8d3ffa1ef463c1,
-        0xb1736b96b6fd83b3,0xbd308ff8a6b17cb2,
-        0xddd0467c64bce4a0,0xac7cb3f6d05ddbde,
-        0x8aa22c0dbef60ee4,0x6bcdf07a423aa96b,
-        0xad4ab7112eb3929d,0x86c16c98d2c953c6,
-        0xd89d64d57a607744,0xe871c7bf077ba8b7,
-        0x87625f056c7c4a8b,0x11471cd764ad4972,
-        0xa93af6c6c79b5d2d,0xd598e40d3dd89bcf,
-        0xd389b47879823479,0x4aff1d108d4ec2c3,
-        0x843610cb4bf160cb,0xcedf722a585139ba,
-        0xa54394fe1eedb8fe,0xc2974eb4ee658828,
-        0xce947a3da6a9273e,0x733d226229feea32,
-        0x811ccc668829b887,0x806357d5a3f525f,
-        0xa163ff802a3426a8,0xca07c2dcb0cf26f7,
-        0xc9bcff6034c13052,0xfc89b393dd02f0b5,
-        0xfc2c3f3841f17c67,0xbbac2078d443ace2,
-        0x9d9ba7832936edc0,0xd54b944b84aa4c0d,
-        0xc5029163f384a931,0xa9e795e65d4df11,
-        0xf64335bcf065d37d,0x4d4617b5ff4a16d5,
-        0x99ea0196163fa42e,0x504bced1bf8e4e45,
-        0xc06481fb9bcf8d39,0xe45ec2862f71e1d6,
-        0xf07da27a82c37088,0x5d767327bb4e5a4c,
-        0x964e858c91ba2655,0x3a6a07f8d510f86f,
-        0xbbe226efb628afea,0x890489f70a55368b,
-        0xeadab0aba3b2dbe5,0x2b45ac74ccea842e,
-        0x92c8ae6b464fc96f,0x3b0b8bc90012929d,
-        0xb77ada0617e3bbcb,0x9ce6ebb40173744,
-        0xe55990879ddcaabd,0xcc420a6a101d0515,
-        0x8f57fa54c2a9eab6,0x9fa946824a12232d,
-        0xb32df8e9f3546564,0x47939822dc96abf9,
-        0xdff9772470297ebd,0x59787e2b93bc56f7,
-        0x8bfbea76c619ef36,0x57eb4edb3c55b65a,
-        0xaefae51477a06b03,0xede622920b6b23f1,
-        0xdab99e59958885c4,0xe95fab368e45eced,
-        0x88b402f7fd75539b,0x11dbcb0218ebb414,
-        0xaae103b5fcd2a881,0xd652bdc29f26a119,
-        0xd59944a37c0752a2,0x4be76d3346f0495f,
-        0x857fcae62d8493a5,0x6f70a4400c562ddb,
-        0xa6dfbd9fb8e5b88e,0xcb4ccd500f6bb952,
-        0xd097ad07a71f26b2,0x7e2000a41346a7a7,
-        0x825ecc24c873782f,0x8ed400668c0c28c8,
-        0xa2f67f2dfa90563b,0x728900802f0f32fa,
-        0xcbb41ef979346bca,0x4f2b40a03ad2ffb9,
-        0xfea126b7d78186bc,0xe2f610c84987bfa8,
-        0x9f24b832e6b0f436,0xdd9ca7d2df4d7c9,
-        0xc6ede63fa05d3143,0x91503d1c79720dbb,
-        0xf8a95fcf88747d94,0x75a44c6397ce912a,
-        0x9b69dbe1b548ce7c,0xc986afbe3ee11aba,
-        0xc24452da229b021b,0xfbe85badce996168,
-        0xf2d56790ab41c2a2,0xfae27299423fb9c3,
-        0x97c560ba6b0919a5,0xdccd879fc967d41a,
-        0xbdb6b8e905cb600f,0x5400e987bbc1c920,
-        0xed246723473e3813,0x290123e9aab23b68,
-        0x9436c0760c86e30b,0xf9a0b6720aaf6521,
-        0xb94470938fa89bce,0xf808e40e8d5b3e69,
-        0xe7958cb87392c2c2,0xb60b1d1230b20e04,
-        0x90bd77f3483bb9b9,0xb1c6f22b5e6f48c2,
-        0xb4ecd5f01a4aa828,0x1e38aeb6360b1af3,
-        0xe2280b6c20dd5232,0x25c6da63c38de1b0,
-        0x8d590723948a535f,0x579c487e5a38ad0e,
-        0xb0af48ec79ace837,0x2d835a9df0c6d851,
-        0xdcdb1b2798182244,0xf8e431456cf88e65,
-        0x8a08f0f8bf0f156b,0x1b8e9ecb641b58ff,
-        0xac8b2d36eed2dac5,0xe272467e3d222f3f,
-        0xd7adf884aa879177,0x5b0ed81dcc6abb0f,
-        0x86ccbb52ea94baea,0x98e947129fc2b4e9,
-        0xa87fea27a539e9a5,0x3f2398d747b36224,
-        0xd29fe4b18e88640e,0x8eec7f0d19a03aad,
-        0x83a3eeeef9153e89,0x1953cf68300424ac,
-        0xa48ceaaab75a8e2b,0x5fa8c3423c052dd7,
-        0xcdb02555653131b6,0x3792f412cb06794d,
-        0x808e17555f3ebf11,0xe2bbd88bbee40bd0,
-        0xa0b19d2ab70e6ed6,0x5b6aceaeae9d0ec4,
-        0xc8de047564d20a8b,0xf245825a5a445275,
-        0xfb158592be068d2e,0xeed6e2f0f0d56712,
-        0x9ced737bb6c4183d,0x55464dd69685606b,
-        0xc428d05aa4751e4c,0xaa97e14c3c26b886,
-        0xf53304714d9265df,0xd53dd99f4b3066a8,
-        0x993fe2c6d07b7fab,0xe546a8038efe4029,
-        0xbf8fdb78849a5f96,0xde98520472bdd033,
-        0xef73d256a5c0f77c,0x963e66858f6d4440,
-        0x95a8637627989aad,0xdde7001379a44aa8,
-        0xbb127c53b17ec159,0x5560c018580d5d52,
-        0xe9d71b689dde71af,0xaab8f01e6e10b4a6,
-        0x9226712162ab070d,0xcab3961304ca70e8,
-        0xb6b00d69bb55c8d1,0x3d607b97c5fd0d22,
-        0xe45c10c42a2b3b05,0x8cb89a7db77c506a,
-        0x8eb98a7a9a5b04e3,0x77f3608e92adb242,
-        0xb267ed1940f1c61c,0x55f038b237591ed3,
-        0xdf01e85f912e37a3,0x6b6c46dec52f6688,
-        0x8b61313bbabce2c6,0x2323ac4b3b3da015,
-        0xae397d8aa96c1b77,0xabec975e0a0d081a,
-        0xd9c7dced53c72255,0x96e7bd358c904a21,
-        0x881cea14545c7575,0x7e50d64177da2e54,
-        0xaa242499697392d2,0xdde50bd1d5d0b9e9,
-        0xd4ad2dbfc3d07787,0x955e4ec64b44e864,
-        0x84ec3c97da624ab4,0xbd5af13bef0b113e,
-        0xa6274bbdd0fadd61,0xecb1ad8aeacdd58e,
-        0xcfb11ead453994ba,0x67de18eda5814af2,
-        0x81ceb32c4b43fcf4,0x80eacf948770ced7,
-        0xa2425ff75e14fc31,0xa1258379a94d028d,
-        0xcad2f7f5359a3b3e,0x96ee45813a04330,
-        0xfd87b5f28300ca0d,0x8bca9d6e188853fc,
-        0x9e74d1b791e07e48,0x775ea264cf55347e,
-        0xc612062576589dda,0x95364afe032a819e,
-        0xf79687aed3eec551,0x3a83ddbd83f52205,
-        0x9abe14cd44753b52,0xc4926a9672793543,
-        0xc16d9a0095928a27,0x75b7053c0f178294,
-        0xf1c90080baf72cb1,0x5324c68b12dd6339,
-        0x971da05074da7bee,0xd3f6fc16ebca5e04,
-        0xbce5086492111aea,0x88f4bb1ca6bcf585,
-        0xec1e4a7db69561a5,0x2b31e9e3d06c32e6,
-        0x9392ee8e921d5d07,0x3aff322e62439fd0,
-        0xb877aa3236a4b449,0x9befeb9fad487c3,
-        0xe69594bec44de15b,0x4c2ebe687989a9b4,
-        0x901d7cf73ab0acd9,0xf9d37014bf60a11,
-        0xb424dc35095cd80f,0x538484c19ef38c95,
-        0xe12e13424bb40e13,0x2865a5f206b06fba,
-        0x8cbccc096f5088cb,0xf93f87b7442e45d4,
-        0xafebff0bcb24aafe,0xf78f69a51539d749,
-        0xdbe6fecebdedd5be,0xb573440e5a884d1c,
-        0x89705f4136b4a597,0x31680a88f8953031,
-        0xabcc77118461cefc,0xfdc20d2b36ba7c3e,
-        0xd6bf94d5e57a42bc,0x3d32907604691b4d,
-        0x8637bd05af6c69b5,0xa63f9a49c2c1b110,
-        0xa7c5ac471b478423,0xfcf80dc33721d54,
-        0xd1b71758e219652b,0xd3c36113404ea4a9,
-        0x83126e978d4fdf3b,0x645a1cac083126ea,
-        0xa3d70a3d70a3d70a,0x3d70a3d70a3d70a4,
-        0xcccccccccccccccc,0xcccccccccccccccd,
-        0x8000000000000000,0x0,
-        0xa000000000000000,0x0,
-        0xc800000000000000,0x0,
-        0xfa00000000000000,0x0,
-        0x9c40000000000000,0x0,
-        0xc350000000000000,0x0,
-        0xf424000000000000,0x0,
-        0x9896800000000000,0x0,
-        0xbebc200000000000,0x0,
-        0xee6b280000000000,0x0,
-        0x9502f90000000000,0x0,
-        0xba43b74000000000,0x0,
-        0xe8d4a51000000000,0x0,
-        0x9184e72a00000000,0x0,
-        0xb5e620f480000000,0x0,
-        0xe35fa931a0000000,0x0,
-        0x8e1bc9bf04000000,0x0,
-        0xb1a2bc2ec5000000,0x0,
-        0xde0b6b3a76400000,0x0,
-        0x8ac7230489e80000,0x0,
-        0xad78ebc5ac620000,0x0,
-        0xd8d726b7177a8000,0x0,
-        0x878678326eac9000,0x0,
-        0xa968163f0a57b400,0x0,
-        0xd3c21bcecceda100,0x0,
-        0x84595161401484a0,0x0,
-        0xa56fa5b99019a5c8,0x0,
-        0xcecb8f27f4200f3a,0x0,
-        0x813f3978f8940984,0x4000000000000000,
-        0xa18f07d736b90be5,0x5000000000000000,
-        0xc9f2c9cd04674ede,0xa400000000000000,
-        0xfc6f7c4045812296,0x4d00000000000000,
-        0x9dc5ada82b70b59d,0xf020000000000000,
-        0xc5371912364ce305,0x6c28000000000000,
-        0xf684df56c3e01bc6,0xc732000000000000,
-        0x9a130b963a6c115c,0x3c7f400000000000,
-        0xc097ce7bc90715b3,0x4b9f100000000000,
-        0xf0bdc21abb48db20,0x1e86d40000000000,
-        0x96769950b50d88f4,0x1314448000000000,
-        0xbc143fa4e250eb31,0x17d955a000000000,
-        0xeb194f8e1ae525fd,0x5dcfab0800000000,
-        0x92efd1b8d0cf37be,0x5aa1cae500000000,
-        0xb7abc627050305ad,0xf14a3d9e40000000,
-        0xe596b7b0c643c719,0x6d9ccd05d0000000,
-        0x8f7e32ce7bea5c6f,0xe4820023a2000000,
-        0xb35dbf821ae4f38b,0xdda2802c8a800000,
-        0xe0352f62a19e306e,0xd50b2037ad200000,
-        0x8c213d9da502de45,0x4526f422cc340000,
-        0xaf298d050e4395d6,0x9670b12b7f410000,
-        0xdaf3f04651d47b4c,0x3c0cdd765f114000,
-        0x88d8762bf324cd0f,0xa5880a69fb6ac800,
-        0xab0e93b6efee0053,0x8eea0d047a457a00,
-        0xd5d238a4abe98068,0x72a4904598d6d880,
-        0x85a36366eb71f041,0x47a6da2b7f864750,
-        0xa70c3c40a64e6c51,0x999090b65f67d924,
-        0xd0cf4b50cfe20765,0xfff4b4e3f741cf6d,
-        0x82818f1281ed449f,0xbff8f10e7a8921a4,
-        0xa321f2d7226895c7,0xaff72d52192b6a0d,
-        0xcbea6f8ceb02bb39,0x9bf4f8a69f764490,
-        0xfee50b7025c36a08,0x2f236d04753d5b4,
-        0x9f4f2726179a2245,0x1d762422c946590,
-        0xc722f0ef9d80aad6,0x424d3ad2b7b97ef5,
-        0xf8ebad2b84e0d58b,0xd2e0898765a7deb2,
-        0x9b934c3b330c8577,0x63cc55f49f88eb2f,
-        0xc2781f49ffcfa6d5,0x3cbf6b71c76b25fb,
-        0xf316271c7fc3908a,0x8bef464e3945ef7a,
-        0x97edd871cfda3a56,0x97758bf0e3cbb5ac,
-        0xbde94e8e43d0c8ec,0x3d52eeed1cbea317,
-        0xed63a231d4c4fb27,0x4ca7aaa863ee4bdd,
-        0x945e455f24fb1cf8,0x8fe8caa93e74ef6a,
-        0xb975d6b6ee39e436,0xb3e2fd538e122b44,
-        0xe7d34c64a9c85d44,0x60dbbca87196b616,
-        0x90e40fbeea1d3a4a,0xbc8955e946fe31cd,
-        0xb51d13aea4a488dd,0x6babab6398bdbe41,
-        0xe264589a4dcdab14,0xc696963c7eed2dd1,
-        0x8d7eb76070a08aec,0xfc1e1de5cf543ca2,
-        0xb0de65388cc8ada8,0x3b25a55f43294bcb,
-        0xdd15fe86affad912,0x49ef0eb713f39ebe,
-        0x8a2dbf142dfcc7ab,0x6e3569326c784337,
-        0xacb92ed9397bf996,0x49c2c37f07965404,
-        0xd7e77a8f87daf7fb,0xdc33745ec97be906,
-        0x86f0ac99b4e8dafd,0x69a028bb3ded71a3,
-        0xa8acd7c0222311bc,0xc40832ea0d68ce0c,
-        0xd2d80db02aabd62b,0xf50a3fa490c30190,
-        0x83c7088e1aab65db,0x792667c6da79e0fa,
-        0xa4b8cab1a1563f52,0x577001b891185938,
-        0xcde6fd5e09abcf26,0xed4c0226b55e6f86,
-        0x80b05e5ac60b6178,0x544f8158315b05b4,
-        0xa0dc75f1778e39d6,0x696361ae3db1c721,
-        0xc913936dd571c84c,0x3bc3a19cd1e38e9,
-        0xfb5878494ace3a5f,0x4ab48a04065c723,
-        0x9d174b2dcec0e47b,0x62eb0d64283f9c76,
-        0xc45d1df942711d9a,0x3ba5d0bd324f8394,
-        0xf5746577930d6500,0xca8f44ec7ee36479,
-        0x9968bf6abbe85f20,0x7e998b13cf4e1ecb,
-        0xbfc2ef456ae276e8,0x9e3fedd8c321a67e,
-        0xefb3ab16c59b14a2,0xc5cfe94ef3ea101e,
-        0x95d04aee3b80ece5,0xbba1f1d158724a12,
-        0xbb445da9ca61281f,0x2a8a6e45ae8edc97,
-        0xea1575143cf97226,0xf52d09d71a3293bd,
-        0x924d692ca61be758,0x593c2626705f9c56,
-        0xb6e0c377cfa2e12e,0x6f8b2fb00c77836c,
-        0xe498f455c38b997a,0xb6dfb9c0f956447,
-        0x8edf98b59a373fec,0x4724bd4189bd5eac,
-        0xb2977ee300c50fe7,0x58edec91ec2cb657,
-        0xdf3d5e9bc0f653e1,0x2f2967b66737e3ed,
-        0x8b865b215899f46c,0xbd79e0d20082ee74,
-        0xae67f1e9aec07187,0xecd8590680a3aa11,
-        0xda01ee641a708de9,0xe80e6f4820cc9495,
-        0x884134fe908658b2,0x3109058d147fdcdd,
-        0xaa51823e34a7eede,0xbd4b46f0599fd415,
-        0xd4e5e2cdc1d1ea96,0x6c9e18ac7007c91a,
-        0x850fadc09923329e,0x3e2cf6bc604ddb0,
-        0xa6539930bf6bff45,0x84db8346b786151c,
-        0xcfe87f7cef46ff16,0xe612641865679a63,
-        0x81f14fae158c5f6e,0x4fcb7e8f3f60c07e,
-        0xa26da3999aef7749,0xe3be5e330f38f09d,
-        0xcb090c8001ab551c,0x5cadf5bfd3072cc5,
-        0xfdcb4fa002162a63,0x73d9732fc7c8f7f6,
-        0x9e9f11c4014dda7e,0x2867e7fddcdd9afa,
-        0xc646d63501a1511d,0xb281e1fd541501b8,
-        0xf7d88bc24209a565,0x1f225a7ca91a4226,
-        0x9ae757596946075f,0x3375788de9b06958,
-        0xc1a12d2fc3978937,0x52d6b1641c83ae,
-        0xf209787bb47d6b84,0xc0678c5dbd23a49a,
-        0x9745eb4d50ce6332,0xf840b7ba963646e0,
-        0xbd176620a501fbff,0xb650e5a93bc3d898,
-        0xec5d3fa8ce427aff,0xa3e51f138ab4cebe,
-        0x93ba47c980e98cdf,0xc66f336c36b10137,
-        0xb8a8d9bbe123f017,0xb80b0047445d4184,
-        0xe6d3102ad96cec1d,0xa60dc059157491e5,
-        0x9043ea1ac7e41392,0x87c89837ad68db2f,
-        0xb454e4a179dd1877,0x29babe4598c311fb,
-        0xe16a1dc9d8545e94,0xf4296dd6fef3d67a,
-        0x8ce2529e2734bb1d,0x1899e4a65f58660c,
-        0xb01ae745b101e9e4,0x5ec05dcff72e7f8f,
-        0xdc21a1171d42645d,0x76707543f4fa1f73,
-        0x899504ae72497eba,0x6a06494a791c53a8,
-        0xabfa45da0edbde69,0x487db9d17636892,
-        0xd6f8d7509292d603,0x45a9d2845d3c42b6,
-        0x865b86925b9bc5c2,0xb8a2392ba45a9b2,
-        0xa7f26836f282b732,0x8e6cac7768d7141e,
-        0xd1ef0244af2364ff,0x3207d795430cd926,
-        0x8335616aed761f1f,0x7f44e6bd49e807b8,
-        0xa402b9c5a8d3a6e7,0x5f16206c9c6209a6,
-        0xcd036837130890a1,0x36dba887c37a8c0f,
-        0x802221226be55a64,0xc2494954da2c9789,
-        0xa02aa96b06deb0fd,0xf2db9baa10b7bd6c,
-        0xc83553c5c8965d3d,0x6f92829494e5acc7,
-        0xfa42a8b73abbf48c,0xcb772339ba1f17f9,
-        0x9c69a97284b578d7,0xff2a760414536efb,
-        0xc38413cf25e2d70d,0xfef5138519684aba,
-        0xf46518c2ef5b8cd1,0x7eb258665fc25d69,
-        0x98bf2f79d5993802,0xef2f773ffbd97a61,
-        0xbeeefb584aff8603,0xaafb550ffacfd8fa,
-        0xeeaaba2e5dbf6784,0x95ba2a53f983cf38,
-        0x952ab45cfa97a0b2,0xdd945a747bf26183,
-        0xba756174393d88df,0x94f971119aeef9e4,
-        0xe912b9d1478ceb17,0x7a37cd5601aab85d,
-        0x91abb422ccb812ee,0xac62e055c10ab33a,
-        0xb616a12b7fe617aa,0x577b986b314d6009,
-        0xe39c49765fdf9d94,0xed5a7e85fda0b80b,
-        0x8e41ade9fbebc27d,0x14588f13be847307,
-        0xb1d219647ae6b31c,0x596eb2d8ae258fc8,
-        0xde469fbd99a05fe3,0x6fca5f8ed9aef3bb,
-        0x8aec23d680043bee,0x25de7bb9480d5854,
-        0xada72ccc20054ae9,0xaf561aa79a10ae6a,
-        0xd910f7ff28069da4,0x1b2ba1518094da04,
-        0x87aa9aff79042286,0x90fb44d2f05d0842,
-        0xa99541bf57452b28,0x353a1607ac744a53,
-        0xd3fa922f2d1675f2,0x42889b8997915ce8,
-        0x847c9b5d7c2e09b7,0x69956135febada11,
-        0xa59bc234db398c25,0x43fab9837e699095,
-        0xcf02b2c21207ef2e,0x94f967e45e03f4bb,
-        0x8161afb94b44f57d,0x1d1be0eebac278f5,
-        0xa1ba1ba79e1632dc,0x6462d92a69731732,
-        0xca28a291859bbf93,0x7d7b8f7503cfdcfe,
-        0xfcb2cb35e702af78,0x5cda735244c3d43e,
-        0x9defbf01b061adab,0x3a0888136afa64a7,
-        0xc56baec21c7a1916,0x88aaa1845b8fdd0,
-        0xf6c69a72a3989f5b,0x8aad549e57273d45,
-        0x9a3c2087a63f6399,0x36ac54e2f678864b,
-        0xc0cb28a98fcf3c7f,0x84576a1bb416a7dd,
-        0xf0fdf2d3f3c30b9f,0x656d44a2a11c51d5,
-        0x969eb7c47859e743,0x9f644ae5a4b1b325,
-        0xbc4665b596706114,0x873d5d9f0dde1fee,
-        0xeb57ff22fc0c7959,0xa90cb506d155a7ea,
-        0x9316ff75dd87cbd8,0x9a7f12442d588f2,
-        0xb7dcbf5354e9bece,0xc11ed6d538aeb2f,
-        0xe5d3ef282a242e81,0x8f1668c8a86da5fa,
-        0x8fa475791a569d10,0xf96e017d694487bc,
-        0xb38d92d760ec4455,0x37c981dcc395a9ac,
-        0xe070f78d3927556a,0x85bbe253f47b1417,
-        0x8c469ab843b89562,0x93956d7478ccec8e,
-        0xaf58416654a6babb,0x387ac8d1970027b2,
-        0xdb2e51bfe9d0696a,0x6997b05fcc0319e,
-        0x88fcf317f22241e2,0x441fece3bdf81f03,
-        0xab3c2fddeeaad25a,0xd527e81cad7626c3,
-        0xd60b3bd56a5586f1,0x8a71e223d8d3b074,
-        0x85c7056562757456,0xf6872d5667844e49,
-        0xa738c6bebb12d16c,0xb428f8ac016561db,
-        0xd106f86e69d785c7,0xe13336d701beba52,
-        0x82a45b450226b39c,0xecc0024661173473,
-        0xa34d721642b06084,0x27f002d7f95d0190,
-        0xcc20ce9bd35c78a5,0x31ec038df7b441f4,
-        0xff290242c83396ce,0x7e67047175a15271,
-        0x9f79a169bd203e41,0xf0062c6e984d386,
-        0xc75809c42c684dd1,0x52c07b78a3e60868,
-        0xf92e0c3537826145,0xa7709a56ccdf8a82,
-        0x9bbcc7a142b17ccb,0x88a66076400bb691,
-        0xc2abf989935ddbfe,0x6acff893d00ea435,
-        0xf356f7ebf83552fe,0x583f6b8c4124d43,
-        0x98165af37b2153de,0xc3727a337a8b704a,
-        0xbe1bf1b059e9a8d6,0x744f18c0592e4c5c,
-        0xeda2ee1c7064130c,0x1162def06f79df73,
-        0x9485d4d1c63e8be7,0x8addcb5645ac2ba8,
-        0xb9a74a0637ce2ee1,0x6d953e2bd7173692,
-        0xe8111c87c5c1ba99,0xc8fa8db6ccdd0437,
-        0x910ab1d4db9914a0,0x1d9c9892400a22a2,
-        0xb54d5e4a127f59c8,0x2503beb6d00cab4b,
-        0xe2a0b5dc971f303a,0x2e44ae64840fd61d,
-        0x8da471a9de737e24,0x5ceaecfed289e5d2,
-        0xb10d8e1456105dad,0x7425a83e872c5f47,
-        0xdd50f1996b947518,0xd12f124e28f77719,
-        0x8a5296ffe33cc92f,0x82bd6b70d99aaa6f,
-        0xace73cbfdc0bfb7b,0x636cc64d1001550b,
-        0xd8210befd30efa5a,0x3c47f7e05401aa4e,
-        0x8714a775e3e95c78,0x65acfaec34810a71,
-        0xa8d9d1535ce3b396,0x7f1839a741a14d0d,
-        0xd31045a8341ca07c,0x1ede48111209a050,
-        0x83ea2b892091e44d,0x934aed0aab460432,
-        0xa4e4b66b68b65d60,0xf81da84d5617853f,
-        0xce1de40642e3f4b9,0x36251260ab9d668e,
-        0x80d2ae83e9ce78f3,0xc1d72b7c6b426019,
-        0xa1075a24e4421730,0xb24cf65b8612f81f,
-        0xc94930ae1d529cfc,0xdee033f26797b627,
-        0xfb9b7cd9a4a7443c,0x169840ef017da3b1,
-        0x9d412e0806e88aa5,0x8e1f289560ee864e,
-        0xc491798a08a2ad4e,0xf1a6f2bab92a27e2,
-        0xf5b5d7ec8acb58a2,0xae10af696774b1db,
-        0x9991a6f3d6bf1765,0xacca6da1e0a8ef29,
-        0xbff610b0cc6edd3f,0x17fd090a58d32af3,
-        0xeff394dcff8a948e,0xddfc4b4cef07f5b0,
-        0x95f83d0a1fb69cd9,0x4abdaf101564f98e,
-        0xbb764c4ca7a4440f,0x9d6d1ad41abe37f1,
-        0xea53df5fd18d5513,0x84c86189216dc5ed,
-        0x92746b9be2f8552c,0x32fd3cf5b4e49bb4,
-        0xb7118682dbb66a77,0x3fbc8c33221dc2a1,
-        0xe4d5e82392a40515,0xfabaf3feaa5334a,
-        0x8f05b1163ba6832d,0x29cb4d87f2a7400e,
-        0xb2c71d5bca9023f8,0x743e20e9ef511012,
-        0xdf78e4b2bd342cf6,0x914da9246b255416,
-        0x8bab8eefb6409c1a,0x1ad089b6c2f7548e,
-        0xae9672aba3d0c320,0xa184ac2473b529b1,
-        0xda3c0f568cc4f3e8,0xc9e5d72d90a2741e,
-        0x8865899617fb1871,0x7e2fa67c7a658892,
-        0xaa7eebfb9df9de8d,0xddbb901b98feeab7,
-        0xd51ea6fa85785631,0x552a74227f3ea565,
-        0x8533285c936b35de,0xd53a88958f87275f,
-        0xa67ff273b8460356,0x8a892abaf368f137,
-        0xd01fef10a657842c,0x2d2b7569b0432d85,
-        0x8213f56a67f6b29b,0x9c3b29620e29fc73,
-        0xa298f2c501f45f42,0x8349f3ba91b47b8f,
-        0xcb3f2f7642717713,0x241c70a936219a73,
-        0xfe0efb53d30dd4d7,0xed238cd383aa0110,
-        0x9ec95d1463e8a506,0xf4363804324a40aa,
-        0xc67bb4597ce2ce48,0xb143c6053edcd0d5,
-        0xf81aa16fdc1b81da,0xdd94b7868e94050a,
-        0x9b10a4e5e9913128,0xca7cf2b4191c8326,
-        0xc1d4ce1f63f57d72,0xfd1c2f611f63a3f0,
-        0xf24a01a73cf2dccf,0xbc633b39673c8cec,
-        0x976e41088617ca01,0xd5be0503e085d813,
-        0xbd49d14aa79dbc82,0x4b2d8644d8a74e18,
-        0xec9c459d51852ba2,0xddf8e7d60ed1219e,
-        0x93e1ab8252f33b45,0xcabb90e5c942b503,
-        0xb8da1662e7b00a17,0x3d6a751f3b936243,
-        0xe7109bfba19c0c9d,0xcc512670a783ad4,
-        0x906a617d450187e2,0x27fb2b80668b24c5,
-        0xb484f9dc9641e9da,0xb1f9f660802dedf6,
-        0xe1a63853bbd26451,0x5e7873f8a0396973,
-        0x8d07e33455637eb2,0xdb0b487b6423e1e8,
-        0xb049dc016abc5e5f,0x91ce1a9a3d2cda62,
-        0xdc5c5301c56b75f7,0x7641a140cc7810fb,
-        0x89b9b3e11b6329ba,0xa9e904c87fcb0a9d,
-        0xac2820d9623bf429,0x546345fa9fbdcd44,
-        0xd732290fbacaf133,0xa97c177947ad4095,
-        0x867f59a9d4bed6c0,0x49ed8eabcccc485d,
-        0xa81f301449ee8c70,0x5c68f256bfff5a74,
-        0xd226fc195c6a2f8c,0x73832eec6fff3111,
-        0x83585d8fd9c25db7,0xc831fd53c5ff7eab,
-        0xa42e74f3d032f525,0xba3e7ca8b77f5e55,
-        0xcd3a1230c43fb26f,0x28ce1bd2e55f35eb,
-        0x80444b5e7aa7cf85,0x7980d163cf5b81b3,
-        0xa0555e361951c366,0xd7e105bcc332621f,
-        0xc86ab5c39fa63440,0x8dd9472bf3fefaa7,
-        0xfa856334878fc150,0xb14f98f6f0feb951,
-        0x9c935e00d4b9d8d2,0x6ed1bf9a569f33d3,
-        0xc3b8358109e84f07,0xa862f80ec4700c8,
-        0xf4a642e14c6262c8,0xcd27bb612758c0fa,
-        0x98e7e9cccfbd7dbd,0x8038d51cb897789c,
-        0xbf21e44003acdd2c,0xe0470a63e6bd56c3,
-        0xeeea5d5004981478,0x1858ccfce06cac74,
-        0x95527a5202df0ccb,0xf37801e0c43ebc8,
-        0xbaa718e68396cffd,0xd30560258f54e6ba,
-        0xe950df20247c83fd,0x47c6b82ef32a2069,
-        0x91d28b7416cdd27e,0x4cdc331d57fa5441,
-        0xb6472e511c81471d,0xe0133fe4adf8e952,
-        0xe3d8f9e563a198e5,0x58180fddd97723a6,
-        0x8e679c2f5e44ff8f,0x570f09eaa7ea7648,};
+constexpr uint64_t powers_template<unused>::power_of_five_128[number_of_entries];
+
 using powers = powers_template<>;
 
-}
+} // namespace fast_float
 
 #endif
 
@@ -8368,7 +9763,7 @@ namespace fast_float {
 // low part corresponding to the least significant bits.
 //
 template <int bit_precision>
-fastfloat_really_inline
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 value128 compute_product_approximation(int64_t q, uint64_t w) {
   const int index = 2 * int(q - powers::smallest_power_of_five);
   // For small values of q, e.g., q in [0,27], the answer is always exact because
@@ -8399,9 +9794,9 @@ namespace detail {
  * where
  *   p = log(5**q)/log(2) = q * log(5)/log(2)
  *
- * For negative values of q in (-400,0), we have that 
+ * For negative values of q in (-400,0), we have that
  *  f = (((152170 + 65536) * q ) >> 16);
- * is equal to 
+ * is equal to
  *   -ceil(p) + q
  * where
  *   p = log(5**-q)/log(2) = -q * log(5)/log(2)
@@ -8414,7 +9809,7 @@ namespace detail {
 // create an adjusted mantissa, biased by the invalid power2
 // for significant digits already multiplied by 10 ** q.
 template <typename binary>
-fastfloat_really_inline
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14
 adjusted_mantissa compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept  {
   int hilz = int(w >> 63) ^ 1;
   adjusted_mantissa answer;
@@ -8427,7 +9822,7 @@ adjusted_mantissa compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept
 // w * 10 ** q, without rounding the representation up.
 // the power2 in the exponent will be adjusted by invalid_am_bias.
 template <typename binary>
-fastfloat_really_inline
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 adjusted_mantissa compute_error(int64_t q, uint64_t w)  noexcept  {
   int lz = leading_zeroes(w);
   w <<= lz;
@@ -8441,7 +9836,7 @@ adjusted_mantissa compute_error(int64_t q, uint64_t w)  noexcept  {
 // return an adjusted_mantissa with a negative power of 2: the caller should recompute
 // in such cases.
 template <typename binary>
-fastfloat_really_inline
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 adjusted_mantissa compute_float(int64_t q, uint64_t w)  noexcept  {
   adjusted_mantissa answer;
   if ((w == 0) || (q < binary::smallest_power_of_ten())) {
@@ -8468,23 +9863,19 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w)  noexcept  {
   // 3. We might lose a bit due to the "upperbit" routine (result too small, requiring a shift)
 
   value128 product = compute_product_approximation<binary::mantissa_explicit_bits() + 3>(q, w);
-  if(product.low == 0xFFFFFFFFFFFFFFFF) { //  could guard it further
-    // In some very rare cases, this could happen, in which case we might need a more accurate
-    // computation that what we can provide cheaply. This is very, very unlikely.
-    //
-    const bool inside_safe_exponent = (q >= -27) && (q <= 55); // always good because 5**q <2**128 when q>=0, 
-    // and otherwise, for q<0, we have 5**-q<2**64 and the 128-bit reciprocal allows for exact computation.
-    if(!inside_safe_exponent) {
-      return compute_error_scaled<binary>(q, product.high, lz);
-    }
-  }
+  // The computed 'product' is always sufficient.
+  // Mathematical proof:
+  // Noble Mushtak and Daniel Lemire, Fast Number Parsing Without Fallback (to appear)
+  // See script/mushtak_lemire.py
+
   // The "compute_product_approximation" function can be slightly slower than a branchless approach:
   // value128 product = compute_product(q, w);
   // but in practice, we can win big with the compute_product_approximation if its additional branch
   // is easily predicted. Which is best is data specific.
   int upperbit = int(product.high >> 63);
+  int shift = upperbit + 64 - binary::mantissa_explicit_bits() - 3;
 
-  answer.mantissa = product.high >> (upperbit + 64 - binary::mantissa_explicit_bits() - 3);
+  answer.mantissa = product.high >> shift;
 
   answer.power2 = int32_t(detail::power(int32_t(q)) + upperbit - lz - binary::minimum_exponent());
   if (answer.power2 <= 0) { // we have a subnormal?
@@ -8520,7 +9911,7 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w)  noexcept  {
     // To be in-between two floats we need that in doing
     //   answer.mantissa = product.high >> (upperbit + 64 - binary::mantissa_explicit_bits() - 3);
     // ... we dropped out only zeroes. But if this happened, then we can go back!!!
-    if((answer.mantissa  << (upperbit + 64 - binary::mantissa_explicit_bits() - 3)) ==  product.high) {
+    if((answer.mantissa  << shift) ==  product.high) {
       answer.mantissa &= ~uint64_t(1);          // flip it so that we do not round up
     }
   }
@@ -8565,7 +9956,7 @@ namespace fast_float {
 // we might have platforms where `CHAR_BIT` is not 8, so let's avoid
 // doing `8 * sizeof(limb)`.
 #if defined(FASTFLOAT_64BIT) && !defined(__sparc)
-#define FASTFLOAT_64BIT_LIMB
+#define FASTFLOAT_64BIT_LIMB 1
 typedef uint64_t limb;
 constexpr size_t limb_bits = 64;
 #else
@@ -8598,27 +9989,27 @@ struct stackvec {
   stackvec &operator=(stackvec &&other) = delete;
 
   // create stack vector from existing limb span.
-  stackvec(limb_span s) {
+  FASTFLOAT_CONSTEXPR20 stackvec(limb_span s) {
     FASTFLOAT_ASSERT(try_extend(s));
   }
 
-  limb& operator[](size_t index) noexcept {
+  FASTFLOAT_CONSTEXPR14 limb& operator[](size_t index) noexcept {
     FASTFLOAT_DEBUG_ASSERT(index < length);
     return data[index];
   }
-  const limb& operator[](size_t index) const noexcept {
+  FASTFLOAT_CONSTEXPR14 const limb& operator[](size_t index) const noexcept {
     FASTFLOAT_DEBUG_ASSERT(index < length);
     return data[index];
   }
   // index from the end of the container
-  const limb& rindex(size_t index) const noexcept {
+  FASTFLOAT_CONSTEXPR14 const limb& rindex(size_t index) const noexcept {
     FASTFLOAT_DEBUG_ASSERT(index < length);
     size_t rindex = length - index - 1;
     return data[rindex];
   }
 
   // set the length, without bounds checking.
-  void set_len(size_t len) noexcept {
+  FASTFLOAT_CONSTEXPR14 void set_len(size_t len) noexcept {
     length = uint16_t(len);
   }
   constexpr size_t len() const noexcept {
@@ -8631,12 +10022,12 @@ struct stackvec {
     return size;
   }
   // append item to vector, without bounds checking
-  void push_unchecked(limb value) noexcept {
+  FASTFLOAT_CONSTEXPR14 void push_unchecked(limb value) noexcept {
     data[length] = value;
     length++;
   }
   // append item to vector, returning if item was added
-  bool try_push(limb value) noexcept {
+  FASTFLOAT_CONSTEXPR14 bool try_push(limb value) noexcept {
     if (len() < capacity()) {
       push_unchecked(value);
       return true;
@@ -8645,13 +10036,13 @@ struct stackvec {
     }
   }
   // add items to the vector, from a span, without bounds checking
-  void extend_unchecked(limb_span s) noexcept {
+  FASTFLOAT_CONSTEXPR20 void extend_unchecked(limb_span s) noexcept {
     limb* ptr = data + length;
-    ::memcpy((void*)ptr, (const void*)s.ptr, sizeof(limb) * s.len());
+    std::copy_n(s.ptr, s.len(), ptr);
     set_len(len() + s.len());
   }
   // try to add items to the vector, returning if items were added
-  bool try_extend(limb_span s) noexcept {
+  FASTFLOAT_CONSTEXPR20 bool try_extend(limb_span s) noexcept {
     if (len() + s.len() <= capacity()) {
       extend_unchecked(s);
       return true;
@@ -8662,6 +10053,7 @@ struct stackvec {
   // resize the vector, without bounds checking
   // if the new size is longer than the vector, assign value to each
   // appended item.
+  FASTFLOAT_CONSTEXPR20
   void resize_unchecked(size_t new_len, limb value) noexcept {
     if (new_len > len()) {
       size_t count = new_len - len();
@@ -8674,7 +10066,7 @@ struct stackvec {
     }
   }
   // try to resize the vector, returning if the vector was resized.
-  bool try_resize(size_t new_len, limb value) noexcept {
+  FASTFLOAT_CONSTEXPR20 bool try_resize(size_t new_len, limb value) noexcept {
     if (new_len > capacity()) {
       return false;
     } else {
@@ -8685,7 +10077,7 @@ struct stackvec {
   // check if any limbs are non-zero after the given index.
   // this needs to be done in reverse order, since the index
   // is relative to the most significant limbs.
-  bool nonzero(size_t index) const noexcept {
+  FASTFLOAT_CONSTEXPR14 bool nonzero(size_t index) const noexcept {
     while (index < len()) {
       if (rindex(index) != 0) {
         return true;
@@ -8695,27 +10087,27 @@ struct stackvec {
     return false;
   }
   // normalize the big integer, so most-significant zero limbs are removed.
-  void normalize() noexcept {
+  FASTFLOAT_CONSTEXPR14 void normalize() noexcept {
     while (len() > 0 && rindex(0) == 0) {
       length--;
     }
   }
 };
 
-fastfloat_really_inline
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14
 uint64_t empty_hi64(bool& truncated) noexcept {
   truncated = false;
   return 0;
 }
 
-fastfloat_really_inline
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 uint64_t uint64_hi64(uint64_t r0, bool& truncated) noexcept {
   truncated = false;
   int shl = leading_zeroes(r0);
   return r0 << shl;
 }
 
-fastfloat_really_inline
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 uint64_t uint64_hi64(uint64_t r0, uint64_t r1, bool& truncated) noexcept {
   int shl = leading_zeroes(r0);
   if (shl == 0) {
@@ -8728,19 +10120,19 @@ uint64_t uint64_hi64(uint64_t r0, uint64_t r1, bool& truncated) noexcept {
   }
 }
 
-fastfloat_really_inline
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 uint64_t uint32_hi64(uint32_t r0, bool& truncated) noexcept {
   return uint64_hi64(r0, truncated);
 }
 
-fastfloat_really_inline
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 uint64_t uint32_hi64(uint32_t r0, uint32_t r1, bool& truncated) noexcept {
   uint64_t x0 = r0;
   uint64_t x1 = r1;
   return uint64_hi64((x0 << 32) | x1, truncated);
 }
 
-fastfloat_really_inline
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 uint64_t uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool& truncated) noexcept {
   uint64_t x0 = r0;
   uint64_t x1 = r1;
@@ -8752,15 +10144,16 @@ uint64_t uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool& truncated) noe
 // we want an efficient operation. for msvc, where
 // we don't have built-in intrinsics, this is still
 // pretty fast.
-fastfloat_really_inline
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 limb scalar_add(limb x, limb y, bool& overflow) noexcept {
   limb z;
-
 // gcc and clang
 #if defined(__has_builtin)
   #if __has_builtin(__builtin_add_overflow)
-    overflow = __builtin_add_overflow(x, y, &z);
-    return z;
+    if (!cpp20_and_in_constexpr()) {
+      overflow = __builtin_add_overflow(x, y, &z);
+      return z;
+    }
   #endif
 #endif
 
@@ -8771,7 +10164,7 @@ limb scalar_add(limb x, limb y, bool& overflow) noexcept {
 }
 
 // multiply two small integers, getting both the high and low bits.
-fastfloat_really_inline
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 limb scalar_mul(limb x, limb y, limb& carry) noexcept {
 #ifdef FASTFLOAT_64BIT_LIMB
   #if defined(__SIZEOF_INT128__)
@@ -8799,7 +10192,8 @@ limb scalar_mul(limb x, limb y, limb& carry) noexcept {
 // add scalar value to bigint starting from offset.
 // used in grade school multiplication
 template <uint16_t size>
-inline bool small_add_from(stackvec<size>& vec, limb y, size_t start) noexcept {
+inline FASTFLOAT_CONSTEXPR20
+bool small_add_from(stackvec<size>& vec, limb y, size_t start) noexcept {
   size_t index = start;
   limb carry = y;
   bool overflow;
@@ -8816,13 +10210,15 @@ inline bool small_add_from(stackvec<size>& vec, limb y, size_t start) noexcept {
 
 // add scalar value to bigint.
 template <uint16_t size>
-fastfloat_really_inline bool small_add(stackvec<size>& vec, limb y) noexcept {
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+bool small_add(stackvec<size>& vec, limb y) noexcept {
   return small_add_from(vec, y, 0);
 }
 
 // multiply bigint by scalar value.
 template <uint16_t size>
-inline bool small_mul(stackvec<size>& vec, limb y) noexcept {
+inline FASTFLOAT_CONSTEXPR20
+bool small_mul(stackvec<size>& vec, limb y) noexcept {
   limb carry = 0;
   for (size_t index = 0; index < vec.len(); index++) {
     vec[index] = scalar_mul(vec[index], y, carry);
@@ -8836,6 +10232,7 @@ inline bool small_mul(stackvec<size>& vec, limb y) noexcept {
 // add bigint to bigint starting from index.
 // used in grade school multiplication
 template <uint16_t size>
+FASTFLOAT_CONSTEXPR20
 bool large_add_from(stackvec<size>& x, limb_span y, size_t start) noexcept {
   // the effective x buffer is from `xstart..x.len()`, so exit early
   // if we can't get that current range.
@@ -8866,12 +10263,14 @@ bool large_add_from(stackvec<size>& x, limb_span y, size_t start) noexcept {
 
 // add bigint to bigint.
 template <uint16_t size>
-fastfloat_really_inline bool large_add_from(stackvec<size>& x, limb_span y) noexcept {
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+bool large_add_from(stackvec<size>& x, limb_span y) noexcept {
   return large_add_from(x, y, 0);
 }
 
 // grade-school multiplication algorithm
 template <uint16_t size>
+FASTFLOAT_CONSTEXPR20
 bool long_mul(stackvec<size>& x, limb_span y) noexcept {
   limb_span xs = limb_span(x.data, x.len());
   stackvec<size> z(xs);
@@ -8900,6 +10299,7 @@ bool long_mul(stackvec<size>& x, limb_span y) noexcept {
 
 // grade-school multiplication algorithm
 template <uint16_t size>
+FASTFLOAT_CONSTEXPR20
 bool large_mul(stackvec<size>& x, limb_span y) noexcept {
   if (y.len() == 1) {
     FASTFLOAT_TRY(small_mul(x, y[0]));
@@ -8909,21 +10309,52 @@ bool large_mul(stackvec<size>& x, limb_span y) noexcept {
   return true;
 }
 
+template <typename = void>
+struct pow5_tables {
+  static constexpr uint32_t large_step = 135;
+  static constexpr uint64_t small_power_of_5[] = {
+    1UL, 5UL, 25UL, 125UL, 625UL, 3125UL, 15625UL, 78125UL, 390625UL,
+    1953125UL, 9765625UL, 48828125UL, 244140625UL, 1220703125UL,
+    6103515625UL, 30517578125UL, 152587890625UL, 762939453125UL,
+    3814697265625UL, 19073486328125UL, 95367431640625UL, 476837158203125UL,
+    2384185791015625UL, 11920928955078125UL, 59604644775390625UL,
+    298023223876953125UL, 1490116119384765625UL, 7450580596923828125UL,
+  };
+#ifdef FASTFLOAT_64BIT_LIMB
+  constexpr static limb large_power_of_5[] = {
+    1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL,
+    10482974169319127550UL, 198276706040285095UL};
+#else
+  constexpr static limb large_power_of_5[] = {
+    4279965485U, 329373468U, 4020270615U, 2137533757U, 4287402176U,
+    1057042919U, 1071430142U, 2440757623U, 381945767U, 46164893U};
+#endif
+};
+
+template <typename T>
+constexpr uint32_t pow5_tables<T>::large_step;
+
+template <typename T>
+constexpr uint64_t pow5_tables<T>::small_power_of_5[];
+
+template <typename T>
+constexpr limb pow5_tables<T>::large_power_of_5[];
+
 // big integer type. implements a small subset of big integer
 // arithmetic, using simple algorithms since asymptotically
 // faster algorithms are slower for a small number of limbs.
 // all operations assume the big-integer is normalized.
-struct bigint {
+struct bigint : pow5_tables<> {
   // storage of the limbs, in little-endian order.
   stackvec<bigint_limbs> vec;
 
-  bigint(): vec() {}
+  FASTFLOAT_CONSTEXPR20 bigint(): vec() {}
   bigint(const bigint &) = delete;
   bigint &operator=(const bigint &) = delete;
   bigint(bigint &&) = delete;
   bigint &operator=(bigint &&other) = delete;
 
-  bigint(uint64_t value): vec() {
+  FASTFLOAT_CONSTEXPR20 bigint(uint64_t value): vec() {
 #ifdef FASTFLOAT_64BIT_LIMB
     vec.push_unchecked(value);
 #else
@@ -8935,7 +10366,7 @@ struct bigint {
 
   // get the high 64 bits from the vector, and if bits were truncated.
   // this is to get the significant digits for the float.
-  uint64_t hi64(bool& truncated) const noexcept {
+  FASTFLOAT_CONSTEXPR20 uint64_t hi64(bool& truncated) const noexcept {
 #ifdef FASTFLOAT_64BIT_LIMB
     if (vec.len() == 0) {
       return empty_hi64(truncated);
@@ -8967,7 +10398,7 @@ struct bigint {
   // positive, this is larger, otherwise they are equal.
   // the limbs are stored in little-endian order, so we
   // must compare the limbs in ever order.
-  int compare(const bigint& other) const noexcept {
+  FASTFLOAT_CONSTEXPR20 int compare(const bigint& other) const noexcept {
     if (vec.len() > other.vec.len()) {
       return 1;
     } else if (vec.len() < other.vec.len()) {
@@ -8988,7 +10419,7 @@ struct bigint {
 
   // shift left each limb n bits, carrying over to the new limb
   // returns true if we were able to shift all the digits.
-  bool shl_bits(size_t n) noexcept {
+  FASTFLOAT_CONSTEXPR20 bool shl_bits(size_t n) noexcept {
     // Internally, for each item, we shift left by n, and add the previous
     // right shifted limb-bits.
     // For example, we transform (for u8) shifted left 2, to:
@@ -9014,7 +10445,7 @@ struct bigint {
   }
 
   // move the limbs left by `n` limbs.
-  bool shl_limbs(size_t n) noexcept {
+  FASTFLOAT_CONSTEXPR20 bool shl_limbs(size_t n) noexcept {
     FASTFLOAT_DEBUG_ASSERT(n != 0);
     if (n + vec.len() > vec.capacity()) {
       return false;
@@ -9022,7 +10453,7 @@ struct bigint {
       // move limbs
       limb* dst = vec.data + n;
       const limb* src = vec.data;
-      ::memmove(dst, src, sizeof(limb) * vec.len());
+      std::copy_backward(src, src + vec.len(), dst + vec.len());
       // fill in empty limbs
       limb* first = vec.data;
       limb* last = first + n;
@@ -9035,7 +10466,7 @@ struct bigint {
   }
 
   // move the limbs left by `n` bits.
-  bool shl(size_t n) noexcept {
+  FASTFLOAT_CONSTEXPR20 bool shl(size_t n) noexcept {
     size_t rem = n % limb_bits;
     size_t div = n / limb_bits;
     if (rem != 0) {
@@ -9048,7 +10479,7 @@ struct bigint {
   }
 
   // get the number of leading zeros in the bigint.
-  int ctlz() const noexcept {
+  FASTFLOAT_CONSTEXPR20 int ctlz() const noexcept {
     if (vec.is_empty()) {
       return 0;
     } else {
@@ -9063,45 +10494,27 @@ struct bigint {
   }
 
   // get the number of bits in the bigint.
-  int bit_length() const noexcept {
+  FASTFLOAT_CONSTEXPR20 int bit_length() const noexcept {
     int lz = ctlz();
     return int(limb_bits * vec.len()) - lz;
   }
 
-  bool mul(limb y) noexcept {
+  FASTFLOAT_CONSTEXPR20 bool mul(limb y) noexcept {
     return small_mul(vec, y);
   }
 
-  bool add(limb y) noexcept {
+  FASTFLOAT_CONSTEXPR20 bool add(limb y) noexcept {
     return small_add(vec, y);
   }
 
   // multiply as if by 2 raised to a power.
-  bool pow2(uint32_t exp) noexcept {
+  FASTFLOAT_CONSTEXPR20 bool pow2(uint32_t exp) noexcept {
     return shl(exp);
   }
 
   // multiply as if by 5 raised to a power.
-  bool pow5(uint32_t exp) noexcept {
+  FASTFLOAT_CONSTEXPR20 bool pow5(uint32_t exp) noexcept {
     // multiply by a power of 5
-    static constexpr uint32_t large_step = 135;
-    static constexpr uint64_t small_power_of_5[] = {
-      1UL, 5UL, 25UL, 125UL, 625UL, 3125UL, 15625UL, 78125UL, 390625UL,
-      1953125UL, 9765625UL, 48828125UL, 244140625UL, 1220703125UL,
-      6103515625UL, 30517578125UL, 152587890625UL, 762939453125UL,
-      3814697265625UL, 19073486328125UL, 95367431640625UL, 476837158203125UL,
-      2384185791015625UL, 11920928955078125UL, 59604644775390625UL,
-      298023223876953125UL, 1490116119384765625UL, 7450580596923828125UL,
-    };
-#ifdef FASTFLOAT_64BIT_LIMB
-    constexpr static limb large_power_of_5[] = {
-      1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL,
-      10482974169319127550UL, 198276706040285095UL};
-#else
-    constexpr static limb large_power_of_5[] = {
-      4279965485U, 329373468U, 4020270615U, 2137533757U, 4287402176U,
-      1057042919U, 1071430142U, 2440757623U, 381945767U, 46164893U};
-#endif
     size_t large_length = sizeof(large_power_of_5) / sizeof(limb);
     limb_span large = limb_span(large_power_of_5, large_length);
     while (exp >= large_step) {
@@ -9120,14 +10533,19 @@ struct bigint {
       exp -= small_step;
     }
     if (exp != 0) {
-      FASTFLOAT_TRY(small_mul(vec, limb(small_power_of_5[exp])));
+      // Work around clang bug https://godbolt.org/z/zedh7rrhc
+      // This is similar to https://github.com/llvm/llvm-project/issues/47746,
+      // except the workaround described there don't work here
+      FASTFLOAT_TRY(
+        small_mul(vec, limb(((void)small_power_of_5[0], small_power_of_5[exp])))
+      );
     }
 
     return true;
   }
 
   // multiply as if by 10 raised to a power.
-  bool pow10(uint32_t exp) noexcept {
+  FASTFLOAT_CONSTEXPR20 bool pow10(uint32_t exp) noexcept {
     FASTFLOAT_TRY(pow5(exp));
     return pow2(exp);
   }
@@ -9137,11 +10555,11 @@ struct bigint {
 
 #endif
 
-#ifndef FASTFLOAT_ASCII_NUMBER_H
-#define FASTFLOAT_ASCII_NUMBER_H
+#ifndef FASTFLOAT_DIGIT_COMPARISON_H
+#define FASTFLOAT_DIGIT_COMPARISON_H
 
 //included above:
-//#include <cctype>
+//#include <algorithm>
 //included above:
 //#include <cstdint>
 //included above:
@@ -9152,323 +10570,97 @@ struct bigint {
 
 namespace fast_float {
 
-// Next function can be micro-optimized, but compilers are entirely
-// able to optimize it well.
-fastfloat_really_inline bool is_integer(char c)  noexcept  { return c >= '0' && c <= '9'; }
+// 1e0 to 1e19
+constexpr static uint64_t powers_of_ten_uint64[] = {
+    1UL, 10UL, 100UL, 1000UL, 10000UL, 100000UL, 1000000UL, 10000000UL, 100000000UL,
+    1000000000UL, 10000000000UL, 100000000000UL, 1000000000000UL, 10000000000000UL,
+    100000000000000UL, 1000000000000000UL, 10000000000000000UL, 100000000000000000UL,
+    1000000000000000000UL, 10000000000000000000UL};
 
-fastfloat_really_inline uint64_t byteswap(uint64_t val) {
-  return (val & 0xFF00000000000000) >> 56
-    | (val & 0x00FF000000000000) >> 40
-    | (val & 0x0000FF0000000000) >> 24
-    | (val & 0x000000FF00000000) >> 8
-    | (val & 0x00000000FF000000) << 8
-    | (val & 0x0000000000FF0000) << 24
-    | (val & 0x000000000000FF00) << 40
-    | (val & 0x00000000000000FF) << 56;
+// calculate the exponent, in scientific notation, of the number.
+// this algorithm is not even close to optimized, but it has no practical
+// effect on performance: in order to have a faster algorithm, we'd need
+// to slow down performance for faster algorithms, and this is still fast.
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14
+int32_t scientific_exponent(parsed_number_string_t<UC> & num) noexcept {
+  uint64_t mantissa = num.mantissa;
+  int32_t exponent = int32_t(num.exponent);
+  while (mantissa >= 10000) {
+    mantissa /= 10000;
+    exponent += 4;
+  }
+  while (mantissa >= 100) {
+    mantissa /= 100;
+    exponent += 2;
+  }
+  while (mantissa >= 10) {
+    mantissa /= 10;
+    exponent += 1;
+  }
+  return exponent;
 }
 
-fastfloat_really_inline uint64_t read_u64(const char *chars) {
-  uint64_t val;
-  ::memcpy(&val, chars, sizeof(uint64_t));
-#if FASTFLOAT_IS_BIG_ENDIAN == 1
-  // Need to read as-if the number was in little-endian order.
-  val = byteswap(val);
-#endif
-  return val;
-}
+// this converts a native floating-point number to an extended-precision float.
+template <typename T>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+adjusted_mantissa to_extended(T value) noexcept {
+  using equiv_uint = typename binary_format<T>::equiv_uint;
+  constexpr equiv_uint exponent_mask = binary_format<T>::exponent_mask();
+  constexpr equiv_uint mantissa_mask = binary_format<T>::mantissa_mask();
+  constexpr equiv_uint hidden_bit_mask = binary_format<T>::hidden_bit_mask();
 
-fastfloat_really_inline void write_u64(uint8_t *chars, uint64_t val) {
-#if FASTFLOAT_IS_BIG_ENDIAN == 1
-  // Need to read as-if the number was in little-endian order.
-  val = byteswap(val);
+  adjusted_mantissa am;
+  int32_t bias = binary_format<T>::mantissa_explicit_bits() - binary_format<T>::minimum_exponent();
+  equiv_uint bits;
+#if FASTFLOAT_HAS_BIT_CAST
+  bits = std::bit_cast<equiv_uint>(value);
+#else
+  ::memcpy(&bits, &value, sizeof(T));
 #endif
-  ::memcpy(chars, &val, sizeof(uint64_t));
-}
-
-// credit  @aqrit
-fastfloat_really_inline uint32_t  parse_eight_digits_unrolled(uint64_t val) {
-  const uint64_t mask = 0x000000FF000000FF;
-  const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
-  const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
-  val -= 0x3030303030303030;
-  val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
-  val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
-  return uint32_t(val);
-}
+  if ((bits & exponent_mask) == 0) {
+    // denormal
+    am.power2 = 1 - bias;
+    am.mantissa = bits & mantissa_mask;
+  } else {
+    // normal
+    am.power2 = int32_t((bits & exponent_mask) >> binary_format<T>::mantissa_explicit_bits());
+    am.power2 -= bias;
+    am.mantissa = (bits & mantissa_mask) | hidden_bit_mask;
+  }
 
-fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars)  noexcept  {
-  return parse_eight_digits_unrolled(read_u64(chars));
+  return am;
 }
 
-// credit @aqrit
-fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val)  noexcept  {
-  return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
-     0x8080808080808080));
+// get the extended precision value of the halfway point between b and b+u.
+// we are given a native float that represents b, so we need to adjust it
+// halfway between b and b+u.
+template <typename T>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+adjusted_mantissa to_extended_halfway(T value) noexcept {
+  adjusted_mantissa am = to_extended(value);
+  am.mantissa <<= 1;
+  am.mantissa += 1;
+  am.power2 -= 1;
+  return am;
 }
 
-fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars)  noexcept  {
-  return is_made_of_eight_digits_fast(read_u64(chars));
-}
+// round an extended-precision float to the nearest machine float.
+template <typename T, typename callback>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14
+void round(adjusted_mantissa& am, callback cb) noexcept {
+  int32_t mantissa_shift = 64 - binary_format<T>::mantissa_explicit_bits() - 1;
+  if (-am.power2 >= mantissa_shift) {
+    // have a denormal float
+    int32_t shift = -am.power2 + 1;
+    cb(am, std::min<int32_t>(shift, 64));
+    // check for round-up: if rounding-nearest carried us to the hidden bit.
+    am.power2 = (am.mantissa < (uint64_t(1) << binary_format<T>::mantissa_explicit_bits())) ? 0 : 1;
+    return;
+  }
 
-typedef span<const char> byte_span;
-
-struct parsed_number_string {
-  int64_t exponent{0};
-  uint64_t mantissa{0};
-  const char *lastmatch{nullptr};
-  bool negative{false};
-  bool valid{false};
-  bool too_many_digits{false};
-  // contains the range of the significant digits
-  byte_span integer{};  // non-nullable
-  byte_span fraction{}; // nullable
-};
-
-// Assuming that you use no more than 19 digits, this will
-// parse an ASCII string.
-fastfloat_really_inline
-parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept {
-  const chars_format fmt = options.format;
-  const char decimal_point = options.decimal_point;
-
-  parsed_number_string answer;
-  answer.valid = false;
-  answer.too_many_digits = false;
-  answer.negative = (*p == '-');
-  if (*p == '-') { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
-    ++p;
-    if (p == pend) {
-      return answer;
-    }
-    if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot
-      return answer;
-    }
-  }
-  const char *const start_digits = p;
-
-  uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
-
-  while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
-    i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
-    p += 8;
-  }
-  while ((p != pend) && is_integer(*p)) {
-    // a multiplication by 10 is cheaper than an arbitrary integer
-    // multiplication
-    i = 10 * i +
-        uint64_t(*p - '0'); // might overflow, we will handle the overflow later
-    ++p;
-  }
-  const char *const end_of_integer_part = p;
-  int64_t digit_count = int64_t(end_of_integer_part - start_digits);
-  answer.integer = byte_span(start_digits, size_t(digit_count));
-  int64_t exponent = 0;
-  if ((p != pend) && (*p == decimal_point)) {
-    ++p;
-    const char* before = p;
-    // can occur at most twice without overflowing, but let it occur more, since
-    // for integers with many digits, digit parsing is the primary bottleneck.
-    while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
-      i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
-      p += 8;
-    }
-    while ((p != pend) && is_integer(*p)) {
-      uint8_t digit = uint8_t(*p - '0');
-      ++p;
-      i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
-    }
-    exponent = before - p;
-    answer.fraction = byte_span(before, size_t(p - before));
-    digit_count -= exponent;
-  }
-  // we must have encountered at least one integer!
-  if (digit_count == 0) {
-    return answer;
-  }
-  int64_t exp_number = 0;            // explicit exponential part
-  if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) {
-    const char * location_of_e = p;
-    ++p;
-    bool neg_exp = false;
-    if ((p != pend) && ('-' == *p)) {
-      neg_exp = true;
-      ++p;
-    } else if ((p != pend) && ('+' == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1)
-      ++p;
-    }
-    if ((p == pend) || !is_integer(*p)) {
-      if(!(fmt & chars_format::fixed)) {
-        // We are in error.
-        return answer;
-      }
-      // Otherwise, we will be ignoring the 'e'.
-      p = location_of_e;
-    } else {
-      while ((p != pend) && is_integer(*p)) {
-        uint8_t digit = uint8_t(*p - '0');
-        if (exp_number < 0x10000000) {
-          exp_number = 10 * exp_number + digit;
-        }
-        ++p;
-      }
-      if(neg_exp) { exp_number = - exp_number; }
-      exponent += exp_number;
-    }
-  } else {
-    // If it scientific and not fixed, we have to bail out.
-    if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; }
-  }
-  answer.lastmatch = p;
-  answer.valid = true;
-
-  // If we frequently had to deal with long strings of digits,
-  // we could extend our code by using a 128-bit integer instead
-  // of a 64-bit integer. However, this is uncommon.
-  //
-  // We can deal with up to 19 digits.
-  if (digit_count > 19) { // this is uncommon
-    // It is possible that the integer had an overflow.
-    // We have to handle the case where we have 0.0000somenumber.
-    // We need to be mindful of the case where we only have zeroes...
-    // E.g., 0.000000000...000.
-    const char *start = start_digits;
-    while ((start != pend) && (*start == '0' || *start == decimal_point)) {
-      if(*start == '0') { digit_count --; }
-      start++;
-    }
-    if (digit_count > 19) {
-      answer.too_many_digits = true;
-      // Let us start again, this time, avoiding overflows.
-      // We don't need to check if is_integer, since we use the
-      // pre-tokenized spans from above.
-      i = 0;
-      p = answer.integer.ptr;
-      const char* int_end = p + answer.integer.len();
-      const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
-      while((i < minimal_nineteen_digit_integer) && (p != int_end)) {
-        i = i * 10 + uint64_t(*p - '0');
-        ++p;
-      }
-      if (i >= minimal_nineteen_digit_integer) { // We have a big integers
-        exponent = end_of_integer_part - p + exp_number;
-      } else { // We have a value with a fractional component.
-          p = answer.fraction.ptr;
-          const char* frac_end = p + answer.fraction.len();
-          while((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
-            i = i * 10 + uint64_t(*p - '0');
-            ++p;
-          }
-          exponent = answer.fraction.ptr - p + exp_number;
-      }
-      // We have now corrected both exponent and i, to a truncated value
-    }
-  }
-  answer.exponent = exponent;
-  answer.mantissa = i;
-  return answer;
-}
-
-} // namespace fast_float
-
-#endif
-
-#ifndef FASTFLOAT_DIGIT_COMPARISON_H
-#define FASTFLOAT_DIGIT_COMPARISON_H
-
-//included above:
-//#include <algorithm>
-//included above:
-//#include <cstdint>
-//included above:
-//#include <cstring>
-//included above:
-//#include <iterator>
-
-
-namespace fast_float {
-
-// 1e0 to 1e19
-constexpr static uint64_t powers_of_ten_uint64[] = {
-    1UL, 10UL, 100UL, 1000UL, 10000UL, 100000UL, 1000000UL, 10000000UL, 100000000UL,
-    1000000000UL, 10000000000UL, 100000000000UL, 1000000000000UL, 10000000000000UL,
-    100000000000000UL, 1000000000000000UL, 10000000000000000UL, 100000000000000000UL,
-    1000000000000000000UL, 10000000000000000000UL};
-
-// calculate the exponent, in scientific notation, of the number.
-// this algorithm is not even close to optimized, but it has no practical
-// effect on performance: in order to have a faster algorithm, we'd need
-// to slow down performance for faster algorithms, and this is still fast.
-fastfloat_really_inline int32_t scientific_exponent(parsed_number_string& num) noexcept {
-  uint64_t mantissa = num.mantissa;
-  int32_t exponent = int32_t(num.exponent);
-  while (mantissa >= 10000) {
-    mantissa /= 10000;
-    exponent += 4;
-  }
-  while (mantissa >= 100) {
-    mantissa /= 100;
-    exponent += 2;
-  }
-  while (mantissa >= 10) {
-    mantissa /= 10;
-    exponent += 1;
-  }
-  return exponent;
-}
-
-// this converts a native floating-point number to an extended-precision float.
-template <typename T>
-fastfloat_really_inline adjusted_mantissa to_extended(T value) noexcept {
-  using equiv_uint = typename binary_format<T>::equiv_uint;
-  constexpr equiv_uint exponent_mask = binary_format<T>::exponent_mask();
-  constexpr equiv_uint mantissa_mask = binary_format<T>::mantissa_mask();
-  constexpr equiv_uint hidden_bit_mask = binary_format<T>::hidden_bit_mask();
-
-  adjusted_mantissa am;
-  int32_t bias = binary_format<T>::mantissa_explicit_bits() - binary_format<T>::minimum_exponent();
-  equiv_uint bits;
-  ::memcpy(&bits, &value, sizeof(T));
-  if ((bits & exponent_mask) == 0) {
-    // denormal
-    am.power2 = 1 - bias;
-    am.mantissa = bits & mantissa_mask;
-  } else {
-    // normal
-    am.power2 = int32_t((bits & exponent_mask) >> binary_format<T>::mantissa_explicit_bits());
-    am.power2 -= bias;
-    am.mantissa = (bits & mantissa_mask) | hidden_bit_mask;
-  }
-
-  return am;
-}
-
-// get the extended precision value of the halfway point between b and b+u.
-// we are given a native float that represents b, so we need to adjust it
-// halfway between b and b+u.
-template <typename T>
-fastfloat_really_inline adjusted_mantissa to_extended_halfway(T value) noexcept {
-  adjusted_mantissa am = to_extended(value);
-  am.mantissa <<= 1;
-  am.mantissa += 1;
-  am.power2 -= 1;
-  return am;
-}
-
-// round an extended-precision float to the nearest machine float.
-template <typename T, typename callback>
-fastfloat_really_inline void round(adjusted_mantissa& am, callback cb) noexcept {
-  int32_t mantissa_shift = 64 - binary_format<T>::mantissa_explicit_bits() - 1;
-  if (-am.power2 >= mantissa_shift) {
-    // have a denormal float
-    int32_t shift = -am.power2 + 1;
-    cb(am, std::min<int32_t>(shift, 64));
-    // check for round-up: if rounding-nearest carried us to the hidden bit.
-    am.power2 = (am.mantissa < (uint64_t(1) << binary_format<T>::mantissa_explicit_bits())) ? 0 : 1;
-    return;
-  }
-
-  // have a normal float, use the default shift.
-  cb(am, mantissa_shift);
+  // have a normal float, use the default shift.
+  cb(am, mantissa_shift);
 
   // check for carry
   if (am.mantissa >= (uint64_t(2) << binary_format<T>::mantissa_explicit_bits())) {
@@ -9485,23 +10677,19 @@ fastfloat_really_inline void round(adjusted_mantissa& am, callback cb) noexcept
 }
 
 template <typename callback>
-fastfloat_really_inline
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14
 void round_nearest_tie_even(adjusted_mantissa& am, int32_t shift, callback cb) noexcept {
-  uint64_t mask;
-  uint64_t halfway;
-  if (shift == 64) {
-    mask = UINT64_MAX;
-  } else {
-    mask = (uint64_t(1) << shift) - 1;
-  }
-  if (shift == 0) {
-    halfway = 0;
-  } else {
-    halfway = uint64_t(1) << (shift - 1);
-  }
+  const uint64_t mask
+  = (shift == 64)
+    ? UINT64_MAX
+    : (uint64_t(1) << shift) - 1;
+  const uint64_t halfway
+  = (shift == 0)
+    ? 0
+    : uint64_t(1) << (shift - 1);
   uint64_t truncated_bits = am.mantissa & mask;
-  uint64_t is_above = truncated_bits > halfway;
-  uint64_t is_halfway = truncated_bits == halfway;
+  bool is_above = truncated_bits > halfway;
+  bool is_halfway = truncated_bits == halfway;
 
   // shift digits into position
   if (shift == 64) {
@@ -9515,7 +10703,8 @@ void round_nearest_tie_even(adjusted_mantissa& am, int32_t shift, callback cb) n
   am.mantissa += uint64_t(cb(is_odd, is_halfway, is_above));
 }
 
-fastfloat_really_inline void round_down(adjusted_mantissa& am, int32_t shift) noexcept {
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14
+void round_down(adjusted_mantissa& am, int32_t shift) noexcept {
   if (shift == 64) {
     am.mantissa = 0;
   } else {
@@ -9523,18 +10712,19 @@ fastfloat_really_inline void round_down(adjusted_mantissa& am, int32_t shift) no
   }
   am.power2 += shift;
 }
-
-fastfloat_really_inline void skip_zeros(const char*& first, const char* last) noexcept {
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+void skip_zeros(UC const * & first, UC const * last) noexcept {
   uint64_t val;
-  while (std::distance(first, last) >= 8) {
+  while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len<UC>()) {
     ::memcpy(&val, first, sizeof(uint64_t));
-    if (val != 0x3030303030303030) {
+    if (val != int_cmp_zeros<UC>()) {
       break;
     }
-    first += 8;
+    first += int_cmp_len<UC>();
   }
   while (first != last) {
-    if (*first != '0') {
+    if (*first != UC('0')) {
       break;
     }
     first++;
@@ -9543,52 +10733,59 @@ fastfloat_really_inline void skip_zeros(const char*& first, const char* last) no
 
 // determine if any non-zero digits were truncated.
 // all characters must be valid digits.
-fastfloat_really_inline bool is_truncated(const char* first, const char* last) noexcept {
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+bool is_truncated(UC const * first, UC const * last) noexcept {
   // do 8-bit optimizations, can just compare to 8 literal 0s.
   uint64_t val;
-  while (std::distance(first, last) >= 8) {
+  while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len<UC>()) {
     ::memcpy(&val, first, sizeof(uint64_t));
-    if (val != 0x3030303030303030) {
+    if (val != int_cmp_zeros<UC>()) {
       return true;
     }
-    first += 8;
+    first += int_cmp_len<UC>();
   }
   while (first != last) {
-    if (*first != '0') {
+    if (*first != UC('0')) {
       return true;
     }
-    first++;
+    ++first;
   }
   return false;
 }
-
-fastfloat_really_inline bool is_truncated(byte_span s) noexcept {
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+bool is_truncated(span<const UC> s) noexcept {
   return is_truncated(s.ptr, s.ptr + s.len());
 }
 
-fastfloat_really_inline
-void parse_eight_digits(const char*& p, limb& value, size_t& counter, size_t& count) noexcept {
+
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+void parse_eight_digits(const UC*& p, limb& value, size_t& counter, size_t& count) noexcept {
   value = value * 100000000 + parse_eight_digits_unrolled(p);
   p += 8;
   counter += 8;
   count += 8;
 }
 
-fastfloat_really_inline
-void parse_one_digit(const char*& p, limb& value, size_t& counter, size_t& count) noexcept {
-  value = value * 10 + limb(*p - '0');
+template <typename UC>
+fastfloat_really_inline FASTFLOAT_CONSTEXPR14
+void parse_one_digit(UC const *& p, limb& value, size_t& counter, size_t& count) noexcept {
+  value = value * 10 + limb(*p - UC('0'));
   p++;
   counter++;
   count++;
 }
 
-fastfloat_really_inline
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
 void add_native(bigint& big, limb power, limb value) noexcept {
   big.mul(power);
   big.add(value);
 }
 
-fastfloat_really_inline void round_up_bigint(bigint& big, size_t& count) noexcept {
+fastfloat_really_inline FASTFLOAT_CONSTEXPR20
+void round_up_bigint(bigint& big, size_t& count) noexcept {
   // need to round-up the digits, but need to avoid rounding
   // ....9999 to ...10000, which could cause a false halfway point.
   add_native(big, 10, 1);
@@ -9596,7 +10793,9 @@ fastfloat_really_inline void round_up_bigint(bigint& big, size_t& count) noexcep
 }
 
 // parse the significant digits into a big integer
-inline void parse_mantissa(bigint& result, parsed_number_string& num, size_t max_digits, size_t& digits) noexcept {
+template <typename UC>
+inline FASTFLOAT_CONSTEXPR20
+void parse_mantissa(bigint& result, parsed_number_string_t<UC>& num, size_t max_digits, size_t& digits) noexcept {
   // try to minimize the number of big integer and scalar multiplication.
   // therefore, try to parse 8 digits at a time, and multiply by the largest
   // scalar value (9 or 19 digits) for each step.
@@ -9610,8 +10809,8 @@ inline void parse_mantissa(bigint& result, parsed_number_string& num, size_t max
 #endif
 
   // process all integer digits.
-  const char* p = num.integer.ptr;
-  const char* pend = p + num.integer.len();
+  UC const * p = num.integer.ptr;
+  UC const * pend = p + num.integer.len();
   skip_zeros(p, pend);
   // process all digits, in increments of step per loop
   while (p != pend) {
@@ -9676,7 +10875,8 @@ inline void parse_mantissa(bigint& result, parsed_number_string& num, size_t max
 }
 
 template <typename T>
-inline adjusted_mantissa positive_digit_comp(bigint& bigmant, int32_t exponent) noexcept {
+inline FASTFLOAT_CONSTEXPR20
+adjusted_mantissa positive_digit_comp(bigint& bigmant, int32_t exponent) noexcept {
   FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent)));
   adjusted_mantissa answer;
   bool truncated;
@@ -9699,7 +10899,8 @@ inline adjusted_mantissa positive_digit_comp(bigint& bigmant, int32_t exponent)
 // we then need to scale by `2^(f- e)`, and then the two significant digits
 // are of the same magnitude.
 template <typename T>
-inline adjusted_mantissa negative_digit_comp(bigint& bigmant, adjusted_mantissa am, int32_t exponent) noexcept {
+inline FASTFLOAT_CONSTEXPR20
+adjusted_mantissa negative_digit_comp(bigint& bigmant, adjusted_mantissa am, int32_t exponent) noexcept {
   bigint& real_digits = bigmant;
   int32_t real_exp = exponent;
 
@@ -9758,8 +10959,9 @@ inline adjusted_mantissa negative_digit_comp(bigint& bigmant, adjusted_mantissa
 // `b` as a big-integer type, scaled to the same binary exponent as
 // the actual digits. we then compare the big integer representations
 // of both, and use that to direct rounding.
-template <typename T>
-inline adjusted_mantissa digit_comp(parsed_number_string& num, adjusted_mantissa am) noexcept {
+template <typename T, typename UC>
+inline FASTFLOAT_CONSTEXPR20
+adjusted_mantissa digit_comp(parsed_number_string_t<UC>& num, adjusted_mantissa am) noexcept {
   // remove the invalid exponent bias
   am.power2 -= invalid_am_bias;
 
@@ -9793,7 +10995,6 @@ inline adjusted_mantissa digit_comp(parsed_number_string& num, adjusted_mantissa
 //#include <limits>
 //included above:
 //#include <system_error>
-
 namespace fast_float {
 
 
@@ -9803,35 +11004,41 @@ namespace detail {
  * The case comparisons could be made much faster given that we know that the
  * strings a null-free and fixed.
  **/
-template <typename T>
-from_chars_result parse_infnan(const char *first, const char *last, T &value)  noexcept  {
-  from_chars_result answer;
+template <typename T, typename UC>
+from_chars_result_t<UC> FASTFLOAT_CONSTEXPR14
+parse_infnan(UC const * first, UC const * last, T &value)  noexcept  {
+  from_chars_result_t<UC> answer{};
   answer.ptr = first;
   answer.ec = std::errc(); // be optimistic
   bool minusSign = false;
-  if (*first == '-') { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here
+  if (*first == UC('-')) { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here
       minusSign = true;
       ++first;
   }
+#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default
+  if (*first == UC('+')) {
+      ++first;
+  }
+#endif
   if (last - first >= 3) {
-    if (fastfloat_strncasecmp(first, "nan", 3)) {
+    if (fastfloat_strncasecmp(first, str_const_nan<UC>(), 3)) {
       answer.ptr = (first += 3);
       value = minusSign ? -std::numeric_limits<T>::quiet_NaN() : std::numeric_limits<T>::quiet_NaN();
       // Check for possible nan(n-char-seq-opt), C++17 20.19.3.7, C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan).
-      if(first != last && *first == '(') {
-        for(const char* ptr = first + 1; ptr != last; ++ptr) {
-          if (*ptr == ')') {
+      if(first != last && *first == UC('(')) {
+        for(UC const * ptr = first + 1; ptr != last; ++ptr) {
+          if (*ptr == UC(')')) {
             answer.ptr = ptr + 1; // valid nan(n-char-seq-opt)
             break;
           }
-          else if(!(('a' <= *ptr && *ptr <= 'z') || ('A' <= *ptr && *ptr <= 'Z') || ('0' <= *ptr && *ptr <= '9') || *ptr == '_'))
+          else if(!((UC('a') <= *ptr && *ptr <= UC('z')) || (UC('A') <= *ptr && *ptr <= UC('Z')) || (UC('0') <= *ptr && *ptr <= UC('9')) || *ptr == UC('_')))
             break; // forbidden char, not nan(n-char-seq-opt)
         }
       }
       return answer;
     }
-    if (fastfloat_strncasecmp(first, "inf", 3)) {
-      if ((last - first >= 8) && fastfloat_strncasecmp(first + 3, "inity", 5)) {
+    if (fastfloat_strncasecmp(first, str_const_inf<UC>(), 3)) {
+      if ((last - first >= 8) && fastfloat_strncasecmp(first + 3, str_const_inf<UC>() + 3, 5)) {
         answer.ptr = first + 8;
       } else {
         answer.ptr = first + 3;
@@ -9844,40 +11051,183 @@ from_chars_result parse_infnan(const char *first, const char *last, T &value)  n
   return answer;
 }
 
-} // namespace detail
-
-template<typename T>
-from_chars_result from_chars(const char *first, const char *last,
-                             T &value, chars_format fmt /*= chars_format::general*/)  noexcept  {
-  return from_chars_advanced(first, last, value, parse_options{fmt});
+/**
+ * Returns true if the floating-pointing rounding mode is to 'nearest'.
+ * It is the default on most system. This function is meant to be inexpensive.
+ * Credit : @mwalcott3
+ */
+fastfloat_really_inline bool rounds_to_nearest() noexcept {
+  // https://lemire.me/blog/2020/06/26/gcc-not-nearest/
+#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
+  return false;
+#endif
+  // See
+  // A fast function to check your floating-point rounding mode
+  // https://lemire.me/blog/2022/11/16/a-fast-function-to-check-your-floating-point-rounding-mode/
+  //
+  // This function is meant to be equivalent to :
+  // prior: #include <cfenv>
+  //  return fegetround() == FE_TONEAREST;
+  // However, it is expected to be much faster than the fegetround()
+  // function call.
+  //
+  // The volatile keywoard prevents the compiler from computing the function
+  // at compile-time.
+  // There might be other ways to prevent compile-time optimizations (e.g., asm).
+  // The value does not need to be std::numeric_limits<float>::min(), any small
+  // value so that 1 + x should round to 1 would do (after accounting for excess
+  // precision, as in 387 instructions).
+  static volatile float fmin = std::numeric_limits<float>::min();
+  float fmini = fmin; // we copy it so that it gets loaded at most once.
+  //
+  // Explanation:
+  // Only when fegetround() == FE_TONEAREST do we have that
+  // fmin + 1.0f == 1.0f - fmin.
+  //
+  // FE_UPWARD:
+  //  fmin + 1.0f > 1
+  //  1.0f - fmin == 1
+  //
+  // FE_DOWNWARD or  FE_TOWARDZERO:
+  //  fmin + 1.0f == 1
+  //  1.0f - fmin < 1
+  //
+  // Note: This may fail to be accurate if fast-math has been
+  // enabled, as rounding conventions may not apply.
+  #ifdef FASTFLOAT_VISUAL_STUDIO
+  #   pragma warning(push)
+  //  todo: is there a VS warning?
+  //  see https://stackoverflow.com/questions/46079446/is-there-a-warning-for-floating-point-equality-checking-in-visual-studio-2013
+  #elif defined(__clang__)
+  #   pragma clang diagnostic push
+  #   pragma clang diagnostic ignored "-Wfloat-equal"
+  #elif defined(__GNUC__)
+  #   pragma GCC diagnostic push
+  #   pragma GCC diagnostic ignored "-Wfloat-equal"
+  #endif
+  return (fmini + 1.0f == 1.0f - fmini);
+  #ifdef FASTFLOAT_VISUAL_STUDIO
+  #   pragma warning(pop)
+  #elif defined(__clang__)
+  #   pragma clang diagnostic pop
+  #elif defined(__GNUC__)
+  #   pragma GCC diagnostic pop
+  #endif
 }
 
-template<typename T>
-from_chars_result from_chars_advanced(const char *first, const char *last,
-                                      T &value, parse_options options)  noexcept  {
+} // namespace detail
 
-  static_assert (std::is_same<T, double>::value || std::is_same<T, float>::value, "only float and double are supported");
+template <typename T>
+struct from_chars_caller
+{
+  template <typename UC>
+  FASTFLOAT_CONSTEXPR20
+  static from_chars_result_t<UC> call(UC const * first, UC const * last,
+                                      T &value, parse_options_t<UC> options)  noexcept {
+    return from_chars_advanced(first, last, value, options);
+  }
+};
 
+#if __STDCPP_FLOAT32_T__ == 1
+template <>
+struct from_chars_caller<std::float32_t>
+{
+  template <typename UC>
+  FASTFLOAT_CONSTEXPR20
+  static from_chars_result_t<UC> call(UC const * first, UC const * last,
+                                      std::float32_t &value, parse_options_t<UC> options) noexcept{
+    // if std::float32_t is defined, and we are in C++23 mode; macro set for float32; 
+    // set value to float due to equivalence between float and float32_t
+    float val;
+    auto ret = from_chars_advanced(first, last, val, options);
+    value = val;
+    return ret;
+  }
+};
+#endif
 
-  from_chars_result answer;
-  if (first == last) {
-    answer.ec = std::errc::invalid_argument;
-    answer.ptr = first;
-    return answer;
-  }
-  parsed_number_string pns = parse_number_string(first, last, options);
-  if (!pns.valid) {
-    return detail::parse_infnan(first, last, value);
+#if __STDCPP_FLOAT64_T__ == 1
+template <>
+struct from_chars_caller<std::float64_t>
+{
+  template <typename UC>
+  FASTFLOAT_CONSTEXPR20
+  static from_chars_result_t<UC> call(UC const * first, UC const * last,
+                                      std::float64_t &value, parse_options_t<UC> options) noexcept{
+    // if std::float64_t is defined, and we are in C++23 mode; macro set for float64;
+    // set value as double due to equivalence between double and float64_t
+    double val;
+    auto ret = from_chars_advanced(first, last, val, options);
+    value = val;
+    return ret;
   }
+};
+#endif
+
+
+template<typename T, typename UC, typename>
+FASTFLOAT_CONSTEXPR20
+from_chars_result_t<UC> from_chars(UC const * first, UC const * last,
+                             T &value, chars_format fmt /*= chars_format::general*/)  noexcept  {
+  return from_chars_caller<T>::call(first, last, value, parse_options_t<UC>(fmt));
+}
+
+/**
+ * This function overload takes parsed_number_string_t structure that is created and populated
+ * either by from_chars_advanced function taking chars range and parsing options
+ * or other parsing custom function implemented by user.
+ */
+template<typename T, typename UC>
+FASTFLOAT_CONSTEXPR20
+from_chars_result_t<UC> from_chars_advanced(parsed_number_string_t<UC>& pns,
+                                      T &value)  noexcept  {
+
+  static_assert (is_supported_float_type<T>(), "only some floating-point types are supported");
+  static_assert (is_supported_char_type<UC>(), "only char, wchar_t, char16_t and char32_t are supported");
+
+  from_chars_result_t<UC> answer;
+
   answer.ec = std::errc(); // be optimistic
   answer.ptr = pns.lastmatch;
-  // Next is Clinger's fast path.
-  if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && pns.mantissa <=binary_format<T>::max_mantissa_fast_path() && !pns.too_many_digits) {
-    value = T(pns.mantissa);
-    if (pns.exponent < 0) { value = value / binary_format<T>::exact_power_of_ten(-pns.exponent); }
-    else { value = value * binary_format<T>::exact_power_of_ten(pns.exponent); }
-    if (pns.negative) { value = -value; }
-    return answer;
+  // The implementation of the Clinger's fast path is convoluted because
+  // we want round-to-nearest in all cases, irrespective of the rounding mode
+  // selected on the thread.
+  // We proceed optimistically, assuming that detail::rounds_to_nearest() returns
+  // true.
+  if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && !pns.too_many_digits) {
+    // Unfortunately, the conventional Clinger's fast path is only possible
+    // when the system rounds to the nearest float.
+    //
+    // We expect the next branch to almost always be selected.
+    // We could check it first (before the previous branch), but
+    // there might be performance advantages at having the check
+    // be last.
+    if(!cpp20_and_in_constexpr() && detail::rounds_to_nearest())  {
+      // We have that fegetround() == FE_TONEAREST.
+      // Next is Clinger's fast path.
+      if (pns.mantissa <=binary_format<T>::max_mantissa_fast_path()) {
+        value = T(pns.mantissa);
+        if (pns.exponent < 0) { value = value / binary_format<T>::exact_power_of_ten(-pns.exponent); }
+        else { value = value * binary_format<T>::exact_power_of_ten(pns.exponent); }
+        if (pns.negative) { value = -value; }
+        return answer;
+      }
+    } else {
+      // We do not have that fegetround() == FE_TONEAREST.
+      // Next is a modified Clinger's fast path, inspired by Jakub Jelínek's proposal
+      if (pns.exponent >= 0 && pns.mantissa <=binary_format<T>::max_mantissa_fast_path(pns.exponent)) {
+#if defined(__clang__) || defined(FASTFLOAT_32BIT)
+        // Clang may map 0 to -0.0 when fegetround() == FE_DOWNWARD
+        if(pns.mantissa == 0) {
+          value = pns.negative ? T(-0.) : T(0.);
+          return answer;
+        }
+#endif
+        value = T(pns.mantissa) * binary_format<T>::exact_power_of_ten(pns.exponent);
+        if (pns.negative) { value = -value; }
+        return answer;
+      }
+    }
   }
   adjusted_mantissa am = compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
   if(pns.too_many_digits && am.power2 >= 0) {
@@ -9889,9 +11239,67 @@ from_chars_result from_chars_advanced(const char *first, const char *last,
   // then we need to go the long way around again. This is very uncommon.
   if(am.power2 < 0) { am = digit_comp<T>(pns, am); }
   to_float(pns.negative, am, value);
+  // Test for over/underflow.
+  if ((pns.mantissa != 0 && am.mantissa == 0 && am.power2 == 0) || am.power2 == binary_format<T>::infinite_power()) {
+    answer.ec = std::errc::result_out_of_range;
+  }
   return answer;
 }
 
+template<typename T, typename UC>
+FASTFLOAT_CONSTEXPR20
+from_chars_result_t<UC> from_chars_advanced(UC const * first, UC const * last,
+                                      T &value, parse_options_t<UC> options)  noexcept  {
+
+  static_assert (is_supported_float_type<T>(), "only some floating-point types are supported");
+  static_assert (is_supported_char_type<UC>(), "only char, wchar_t, char16_t and char32_t are supported");
+
+  from_chars_result_t<UC> answer;
+#ifdef FASTFLOAT_SKIP_WHITE_SPACE  // disabled by default
+  while ((first != last) && fast_float::is_space(uint8_t(*first))) {
+    first++;
+  }
+#endif
+  if (first == last) {
+    answer.ec = std::errc::invalid_argument;
+    answer.ptr = first;
+    return answer;
+  }
+  parsed_number_string_t<UC> pns = parse_number_string<UC>(first, last, options);
+  if (!pns.valid) {
+    if (options.format & chars_format::no_infnan) {
+      answer.ec = std::errc::invalid_argument;
+      answer.ptr = first;
+      return answer;
+    } else {
+      return detail::parse_infnan(first, last, value);
+    }
+  }
+
+  // call overload that takes parsed_number_string_t directly.
+  return from_chars_advanced(pns, value);
+}
+
+
+template <typename T, typename UC, typename>
+FASTFLOAT_CONSTEXPR20
+from_chars_result_t<UC> from_chars(UC const* first, UC const* last, T& value, int base) noexcept {
+  static_assert (is_supported_char_type<UC>(), "only char, wchar_t, char16_t and char32_t are supported");
+
+  from_chars_result_t<UC> answer;
+#ifdef FASTFLOAT_SKIP_WHITE_SPACE  // disabled by default
+  while ((first != last) && fast_float::is_space(uint8_t(*first))) {
+    first++;
+  }
+#endif
+  if (first == last || base < 2 || base > 36) {
+    answer.ec = std::errc::invalid_argument;
+    answer.ptr = first;
+    return answer;
+  }
+  return parse_int_string(first, last, value, base);
+}
+
 } // namespace fast_float
 
 #endif
@@ -9928,10 +11336,23 @@ from_chars_result from_chars_advanced(const char *first, const char *last,
 
 // forward declarations for std::vector
 #if defined(__GLIBCXX__) || defined(__GLIBCPP__) || defined(_MSC_VER)
+#if defined(_MSC_VER)
+__pragma(warning(push))
+__pragma(warning(disable : 4643))
+#endif
 namespace std {
 template<typename> class allocator;
+#ifdef _GLIBCXX_DEBUG
+inline namespace __debug {
+template<typename T, typename Alloc> class vector;
+}
+#else
 template<typename T, typename Alloc> class vector;
+#endif
 } // namespace std
+#if defined(_MSC_VER)
+__pragma(warning(pop))
+#endif
 #elif defined(_LIBCPP_ABI_NAMESPACE)
 namespace std {
 inline namespace _LIBCPP_ABI_NAMESPACE {
@@ -10017,12 +11438,21 @@ template<class Alloc> bool from_chars(c4::csubstr buf, std::vector<char, Alloc>
 #elif defined(_LIBCPP_VERSION) || defined(__APPLE_CC__)
 #include <iosfwd>  // use the fwd header in stdlibc++
 #elif defined(_MSC_VER)
+// amalgamate: removed include of
+// https://github.com/biojppm/c4core/src/c4/error.hpp
+//#include "c4/error.hpp"
+#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_)
+#error "amalgamate: file c4/error.hpp must have been included at this point"
+#endif /* C4_ERROR_HPP_ */
+
 //! @todo is there a fwd header in msvc?
 namespace std {
+C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4643) // Forward declaring 'char_traits' in namespace std is not permitted by the C++ Standard.
 template<typename> struct char_traits;
 template<typename> class allocator;
 template<typename _CharT, typename _Traits, typename _Alloc> class basic_string;
 using string = basic_string<char, char_traits<char>, allocator<char>>;
+C4_SUPPRESS_WARNING_MSVC_POP
 } /* namespace std */
 #else
 #error "unknown standard library"
@@ -10030,8 +11460,8 @@ using string = basic_string<char, char_traits<char>, allocator<char>>;
 
 namespace c4 {
 
-c4::substr to_substr(std::string &s);
-c4::csubstr to_csubstr(std::string const& s);
+c4::substr to_substr(std::string &s) noexcept;
+c4::csubstr to_csubstr(std::string const& s) noexcept;
 
 bool operator== (c4::csubstr ss, std::string const& s);
 bool operator!= (c4::csubstr ss, std::string const& s);
@@ -10093,6 +11523,13 @@ bool from_chars(c4::csubstr buf, std::string * s);
 
 // (end https://github.com/biojppm/c4core/src/c4/std/std_fwd.hpp)
 
+// (amalgamate) this include is needed to work around
+// conditional includes in charconv.hpp
+#if (defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L)) || (__cplusplus >= 201703L)
+#include <charconv>
+#endif
+
+
 
 
 //********************************************************************************
@@ -10107,35 +11544,6 @@ bool from_chars(c4::csubstr buf, std::string * s);
 
 /** @file charconv.hpp Lightweight generic type-safe wrappers for
  * converting individual values to/from strings.
- *
- * These are the main functions:
- *
- * @code{.cpp}
- * // Convert the given value, writing into the string.
- * // The resulting string will NOT be null-terminated.
- * // Return the number of characters needed.
- * // This function is safe to call when the string is too small -
- * // no writes will occur beyond the string's last character.
- * template<class T> size_t c4::to_chars(substr buf, T const& C4_RESTRICT val);
- *
- *
- * // Convert the given value to a string using to_chars(), and
- * // return the resulting string, up to and including the last
- * // written character.
- * template<class T> substr c4::to_chars_sub(substr buf, T const& C4_RESTRICT val);
- *
- *
- * // Read a value from the string, which must be
- * // trimmed to the value (ie, no leading/trailing whitespace).
- * // return true if the conversion succeeded.
- * template<class T> bool c4::from_chars(csubstr buf, T * C4_RESTRICT val);
- *
- *
- * // Read the first valid sequence of characters from the string,
- * // skipping leading whitespace, and convert it using from_chars().
- * // Return the number of characters read for converting.
- * template<class T> size_t c4::from_chars_first(csubstr buf, T * C4_RESTRICT val);
- * @endcode
  */
 
 // amalgamate: removed include of
@@ -10193,44 +11601,62 @@ bool from_chars(c4::csubstr buf, std::string * s);
 
 
 #ifndef C4CORE_NO_FAST_FLOAT
-    C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wsign-conversion")
-    C4_SUPPRESS_WARNING_GCC("-Warray-bounds")
-#if __GNUC__ >= 5
-    C4_SUPPRESS_WARNING_GCC("-Wshift-count-overflow")
-#endif
-// amalgamate: removed include of
-// https://github.com/biojppm/c4core/src/c4/ext/fast_float.hpp
-//#   include "c4/ext/fast_float.hpp"
-#if !defined(C4_EXT_FAST_FLOAT_HPP_) && !defined(_C4_EXT_FAST_FLOAT_HPP_)
-#error "amalgamate: file c4/ext/fast_float.hpp must have been included at this point"
-#endif /* C4_EXT_FAST_FLOAT_HPP_ */
-
-    C4_SUPPRESS_WARNING_GCC_POP
-#   define C4CORE_HAVE_FAST_FLOAT 1
-#   define C4CORE_HAVE_STD_FROMCHARS 0
 #   if (C4_CPP >= 17)
 #       if defined(_MSC_VER)
-#           if (C4_MSVC_VERSION >= C4_MSVC_VERSION_2019)
-#               include <charconv>
+#           if (C4_MSVC_VERSION >= C4_MSVC_VERSION_2019) // VS2017 and lower do not have these macros
+//included above:
+//#               include <charconv>
 #               define C4CORE_HAVE_STD_TOCHARS 1
+#               define C4CORE_HAVE_STD_FROMCHARS 0 // prefer fast_float with MSVC
+#               define C4CORE_HAVE_FAST_FLOAT 1
 #           else
 #               define C4CORE_HAVE_STD_TOCHARS 0
+#               define C4CORE_HAVE_STD_FROMCHARS 0
+#               define C4CORE_HAVE_FAST_FLOAT 1
 #           endif
-#       else  // VS2017 and lower do not have these macros
-#           if __has_include(<charconv>) && __cpp_lib_to_chars
-#               define C4CORE_HAVE_STD_TOCHARS 1
+#       else
+#           if __has_include(<charconv>)
 //included above:
 //#               include <charconv>
+#               if defined(__cpp_lib_to_chars)
+#                   define C4CORE_HAVE_STD_TOCHARS 1
+#                   define C4CORE_HAVE_STD_FROMCHARS 0 // glibc uses fast_float internally
+#                   define C4CORE_HAVE_FAST_FLOAT 1
+#               else
+#                   define C4CORE_HAVE_STD_TOCHARS 0
+#                   define C4CORE_HAVE_STD_FROMCHARS 0
+#                   define C4CORE_HAVE_FAST_FLOAT 1
+#               endif
 #           else
 #               define C4CORE_HAVE_STD_TOCHARS 0
+#               define C4CORE_HAVE_STD_FROMCHARS 0
+#               define C4CORE_HAVE_FAST_FLOAT 1
 #           endif
 #       endif
 #   else
 #       define C4CORE_HAVE_STD_TOCHARS 0
+#       define C4CORE_HAVE_STD_FROMCHARS 0
+#       define C4CORE_HAVE_FAST_FLOAT 1
+#   endif
+#   if C4CORE_HAVE_FAST_FLOAT
+        C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wsign-conversion")
+        C4_SUPPRESS_WARNING_GCC("-Warray-bounds")
+#       if defined(__GNUC__) && __GNUC__ >= 5
+            C4_SUPPRESS_WARNING_GCC("-Wshift-count-overflow")
+#       endif
+// amalgamate: removed include of
+// https://github.com/biojppm/c4core/src/c4/ext/fast_float.hpp
+//#       include "c4/ext/fast_float.hpp"
+#if !defined(C4_EXT_FAST_FLOAT_HPP_) && !defined(_C4_EXT_FAST_FLOAT_HPP_)
+#error "amalgamate: file c4/ext/fast_float.hpp must have been included at this point"
+#endif /* C4_EXT_FAST_FLOAT_HPP_ */
+
+        C4_SUPPRESS_WARNING_GCC_POP
 #   endif
 #elif (C4_CPP >= 17)
+#   define C4CORE_HAVE_FAST_FLOAT 0
 #   if defined(_MSC_VER)
-#       if (C4_MSVC_VERSION >= C4_MSVC_VERSION_2019)
+#       if (C4_MSVC_VERSION >= C4_MSVC_VERSION_2019) // VS2017 and lower do not have these macros
 //included above:
 //#           include <charconv>
 #           define C4CORE_HAVE_STD_TOCHARS 1
@@ -10239,12 +11665,17 @@ bool from_chars(c4::csubstr buf, std::string * s);
 #           define C4CORE_HAVE_STD_TOCHARS 0
 #           define C4CORE_HAVE_STD_FROMCHARS 0
 #       endif
-#   else  // VS2017 and lower do not have these macros
-#       if __has_include(<charconv>) && __cpp_lib_to_chars
-#           define C4CORE_HAVE_STD_TOCHARS 1
-#           define C4CORE_HAVE_STD_FROMCHARS 1
+#   else
+#       if __has_include(<charconv>)
 //included above:
 //#           include <charconv>
+#           if defined(__cpp_lib_to_chars)
+#               define C4CORE_HAVE_STD_TOCHARS 1
+#               define C4CORE_HAVE_STD_FROMCHARS 1 // glibc uses fast_float internally
+#           else
+#               define C4CORE_HAVE_STD_TOCHARS 0
+#               define C4CORE_HAVE_STD_FROMCHARS 0
+#           endif
 #       else
 #           define C4CORE_HAVE_STD_TOCHARS 0
 #           define C4CORE_HAVE_STD_FROMCHARS 0
@@ -10253,81 +11684,124 @@ bool from_chars(c4::csubstr buf, std::string * s);
 #else
 #   define C4CORE_HAVE_STD_TOCHARS 0
 #   define C4CORE_HAVE_STD_FROMCHARS 0
+#   define C4CORE_HAVE_FAST_FLOAT 0
 #endif
 
 
-#if !C4CORE_HAVE_STD_FROMCHARS && !defined(C4CORE_HAVE_FAST_FLOAT)
+#if !C4CORE_HAVE_STD_FROMCHARS
 #include <cstdio>
 #endif
 
 
-#ifdef _MSC_VER
+#if defined(_MSC_VER)
 #   pragma warning(push)
+#   pragma warning(disable: 4996) // snprintf/scanf: this function or variable may be unsafe
 #   if C4_MSVC_VERSION != C4_MSVC_VERSION_2017
 #       pragma warning(disable: 4800) //'int': forcing value to bool 'true' or 'false' (performance warning)
 #   endif
-#   pragma warning(disable: 4996) // snprintf/scanf: this function or variable may be unsafe
-#elif defined(__clang__)
+#endif
+
+#if defined(__clang__)
 #   pragma clang diagnostic push
 #   pragma clang diagnostic ignored "-Wtautological-constant-out-of-range-compare"
 #   pragma clang diagnostic ignored "-Wformat-nonliteral"
 #   pragma clang diagnostic ignored "-Wdouble-promotion" // implicit conversion increases floating-point precision
+#   pragma clang diagnostic ignored "-Wold-style-cast"
 #elif defined(__GNUC__)
 #   pragma GCC diagnostic push
 #   pragma GCC diagnostic ignored "-Wformat-nonliteral"
 #   pragma GCC diagnostic ignored "-Wdouble-promotion" // implicit conversion increases floating-point precision
 #   pragma GCC diagnostic ignored "-Wuseless-cast"
+#   pragma GCC diagnostic ignored "-Wold-style-cast"
+#endif
+
+#if defined(__clang__)
+#define C4_NO_UBSAN_IOVRFLW __attribute__((no_sanitize("signed-integer-overflow")))
+#elif defined(__GNUC__)
+#if __GNUC__ > 7
+#define C4_NO_UBSAN_IOVRFLW __attribute__((no_sanitize("signed-integer-overflow")))
+#else
+#define C4_NO_UBSAN_IOVRFLW
+#endif
+#else
+#define C4_NO_UBSAN_IOVRFLW
 #endif
 
 
 namespace c4 {
 
-typedef enum : uint8_t {
+/** @defgroup doc_charconv Charconv utilities
+ *
+ * Lightweight, very fast generic type-safe wrappers for converting
+ * individual values to/from strings. These are the main generic
+ * functions:
+ *   - @ref doc_to_chars and its alias @ref doc_xtoa: implemented by calling @ref itoa()/@ref utoa()/@ref ftoa()/@ref dtoa() (or generically @ref xtoa())
+ *   - @ref doc_from_chars and its alias @ref doc_atox: implemented by calling @ref atoi()/@ref atou()/@ref atof()/@ref atod() (or generically @ref atox())
+ *   - @ref to_chars_sub()
+ *   - @ref from_chars_first()
+ *   - @ref xtoa()/@ref atox() are implemented in terms @ref write_dec()/@ref read_dec() et al (see @ref doc_write/@ref doc_read())
+ *
+ * And also some modest brag is in order: these functions are really
+ * fast: faster even than C++17 `std::to_chars()` and
+ * `std::to_chars()`, and many dozens of times faster than the
+ * iostream abominations.
+ *
+ * For example, here are some benchmark comparisons for @ref
+ * doc_from_chars (link leads to the main project README, where these
+ * results are shown more systematically).
+ *
+ * <table>
+ * <caption id="atox-i64-results">atox,int64_t</caption>
+ * <tr><th>g++12, linux <th>Visual Studio 2019
+ * <tr><td> \image html linux-x86_64-gxx12.1-Release-c4core-bm-charconv-atox-mega_bytes_per_second-i64.png <td> \image html windows-x86_64-vs2019-Release-c4core-bm-charconv-atox-mega_bytes_per_second-i64.png
+ * </table>
+ *
+ * <table>
+ * <caption id="xtoa-i64-results">xtoa,int64_t</caption>
+ * <tr><th>g++12, linux <th>Visual Studio 2019
+ * <tr><td> \image html linux-x86_64-gxx12.1-Release-c4core-bm-charconv-xtoa-mega_bytes_per_second-i64.png <td> \image html windows-x86_64-vs2019-Release-c4core-bm-charconv-xtoa-mega_bytes_per_second-i64.png
+ * </table>
+ *
+ * To parse floating point, c4core uses
+ * [fastfloat](https://github.com/fastfloat/fast_float), which is
+ * extremely fast, by an even larger factor:
+ *
+ * <table>
+ * <caption id="atox-float-results">atox,float</caption>
+ * <tr><th>g++12, linux <th>Visual Studio 2019
+ * <tr><td> \image html linux-x86_64-gxx12.1-Release-c4core-bm-charconv-atof-mega_bytes_per_second-float.png <td> \image html windows-x86_64-vs2019-Release-c4core-bm-charconv-atof-mega_bytes_per_second-float.png
+ * </table>
+ *
+ * @{
+ */
+
+#if C4CORE_HAVE_STD_TOCHARS
+/** @warning Use only the symbol. Do not rely on the type or naked value of this enum. */
+typedef enum : std::underlying_type<std::chars_format>::type {
     /** print the real number in floating point format (like %f) */
-    FTOA_FLOAT = 0,
+    FTOA_FLOAT = static_cast<std::underlying_type<std::chars_format>::type>(std::chars_format::fixed),
     /** print the real number in scientific format (like %e) */
-    FTOA_SCIENT = 1,
+    FTOA_SCIENT = static_cast<std::underlying_type<std::chars_format>::type>(std::chars_format::scientific),
     /** print the real number in flexible format (like %g) */
-    FTOA_FLEX = 2,
+    FTOA_FLEX = static_cast<std::underlying_type<std::chars_format>::type>(std::chars_format::general),
     /** print the real number in hexadecimal format (like %a) */
-    FTOA_HEXA = 3,
-    _FTOA_COUNT
+    FTOA_HEXA = static_cast<std::underlying_type<std::chars_format>::type>(std::chars_format::hex),
 } RealFormat_e;
+#else
+/** @warning Use only the symbol. Do not rely on the type or naked value of this enum. */
+typedef enum : char {
+    /** print the real number in floating point format (like %f) */
+    FTOA_FLOAT = 'f',
+    /** print the real number in scientific format (like %e) */
+    FTOA_SCIENT = 'e',
+    /** print the real number in flexible format (like %g) */
+    FTOA_FLEX = 'g',
+    /** print the real number in hexadecimal format (like %a) */
+    FTOA_HEXA = 'a',
+} RealFormat_e;
+#endif
 
-
-inline C4_CONSTEXPR14 char to_c_fmt(RealFormat_e f)
-{
-    constexpr const char fmt[] = {
-        'f',  // FTOA_FLOAT
-        'e',  // FTOA_SCIENT
-        'g',  // FTOA_FLEX
-        'a',  // FTOA_HEXA
-    };
-    C4_STATIC_ASSERT(C4_COUNTOF(fmt) == _FTOA_COUNT);
-    #if C4_CPP > 14
-    C4_ASSERT(f < _FTOA_COUNT);
-    #endif
-    return fmt[f];
-}
-
-
-#if C4CORE_HAVE_STD_TOCHARS
-inline C4_CONSTEXPR14 std::chars_format to_std_fmt(RealFormat_e f)
-{
-    constexpr const std::chars_format fmt[] = {
-        std::chars_format::fixed,       // FTOA_FLOAT
-        std::chars_format::scientific,  // FTOA_SCIENT
-        std::chars_format::general,     // FTOA_FLEX
-        std::chars_format::hex,         // FTOA_HEXA
-    };
-    C4_STATIC_ASSERT(C4_COUNTOF(fmt) == _FTOA_COUNT);
-    #if C4_CPP >= 14
-    C4_ASSERT(f < _FTOA_COUNT);
-    #endif
-    return fmt[f];
-}
-#endif // C4CORE_HAVE_STD_TOCHARS
-
+/** @cond dev */
 /** in some platforms, int,unsigned int
  *  are not any of int8_t...int64_t and
  *  long,unsigned long are not any of uint8_t...uint64_t */
@@ -10351,6 +11825,7 @@ struct is_fixed_length
         value = value_i || value_u
     };
 };
+/** @endcond */
 
 
 //-----------------------------------------------------------------------------
@@ -10369,6 +11844,7 @@ struct is_fixed_length
 #   endif
 #endif
 
+/** @cond dev */
 namespace detail {
 
 /* python command to get the values below:
@@ -10528,25 +12004,30 @@ template<> struct charconv_digits_<8u, false>
 };
 } // namespace detail
 
+// Helper macros, undefined below
+#define _c4append(c) { if(C4_LIKELY(pos < buf.len)) { buf.str[pos++] = static_cast<char>(c); } else { ++pos; } }
+#define _c4appendhex(i) { if(C4_LIKELY(pos < buf.len)) { buf.str[pos++] = hexchars[i]; } else { ++pos; } }
+
+/** @endcond */
+
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-// Helper macros, undefined below
-#define _c4append(c) { if(C4_LIKELY(pos < buf.len)) { buf.str[pos++] = static_cast<char>(c); } else { ++pos; } }
-#define _c4appendhex(i) { if(C4_LIKELY(pos < buf.len)) { buf.str[pos++] = hexchars[i]; } else { ++pos; } }
 
-/** @name digits_dec return the number of digits required to encode a
- * decimal number.
+/** @defgroup doc_digits Get number of digits
  *
  * @note At first sight this code may look heavily branchy and
  * therefore inefficient. However, measurements revealed this to be
  * the fastest among the alternatives.
  *
- * @see https://github.com/biojppm/c4core/pull/77 */
-/** @{ */
+ * @see https://github.com/biojppm/c4core/pull/77
+ *
+ * @{
+ */
 
+/** decimal digits for 8 bit integers */
 template<class T>
 C4_CONSTEXPR14 C4_ALWAYS_INLINE
 auto digits_dec(T v) noexcept
@@ -10557,6 +12038,7 @@ auto digits_dec(T v) noexcept
     return ((v >= 100) ? 3u : ((v >= 10) ? 2u : 1u));
 }
 
+/** decimal digits for 16 bit integers */
 template<class T>
 C4_CONSTEXPR14 C4_ALWAYS_INLINE
 auto digits_dec(T v) noexcept
@@ -10567,6 +12049,7 @@ auto digits_dec(T v) noexcept
     return ((v >= 10000) ? 5u : (v >= 1000) ? 4u : (v >= 100) ? 3u : (v >= 10) ? 2u : 1u);
 }
 
+/** decimal digits for 32 bit integers */
 template<class T>
 C4_CONSTEXPR14 C4_ALWAYS_INLINE
 auto digits_dec(T v) noexcept
@@ -10579,6 +12062,7 @@ auto digits_dec(T v) noexcept
             (v >= 1000) ? 4u : (v >= 100) ? 3u : (v >= 10) ? 2u : 1u);
 }
 
+/** decimal digits for 64 bit integers */
 template<class T>
 C4_CONSTEXPR14 C4_ALWAYS_INLINE
 auto digits_dec(T v) noexcept
@@ -10626,9 +12110,8 @@ auto digits_dec(T v) noexcept
         return (v >= 10) ? 2u : 1u;
 }
 
-/** @} */
-
 
+/** return the number of digits required to encode an hexadecimal number. */
 template<class T>
 C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_hex(T v) noexcept
 {
@@ -10637,6 +12120,7 @@ C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_hex(T v) noexcept
     return v ? 1u + (msb((typename std::make_unsigned<T>::type)v) >> 2u) : 1u;
 }
 
+/** return the number of digits required to encode a binary number. */
 template<class T>
 C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_bin(T v) noexcept
 {
@@ -10645,6 +12129,7 @@ C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_bin(T v) noexcept
     return v ? 1u + msb((typename std::make_unsigned<T>::type)v) : 1u;
 }
 
+/** return the number of digits required to encode an octal number. */
 template<class T>
 C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_oct(T v_) noexcept
 {
@@ -10675,11 +12160,14 @@ C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_oct(T v_) noexcept
 	}
 }
 
+/** @} */
+
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
+/** @cond dev */
 namespace detail {
 C4_INLINE_CONSTEXPR const char hexchars[] = "0123456789abcdef";
 C4_INLINE_CONSTEXPR const char digits0099[] =
@@ -10689,7 +12177,23 @@ C4_INLINE_CONSTEXPR const char digits0099[] =
     "6061626364656667686970717273747576777879"
     "8081828384858687888990919293949596979899";
 } // namespace detail
+/** @endcond */
+
+C4_SUPPRESS_WARNING_GCC_PUSH
+C4_SUPPRESS_WARNING_GCC("-Warray-bounds")  // gcc has false positives here
+#if (defined(__GNUC__) && (__GNUC__ >= 7))
+C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow")  // gcc has false positives here
+#endif
 
+/** @defgroup doc_write_unchecked Write with known number of digits
+ *
+ * Writes a value without checking the buffer length with regards to
+ * the required number of digits to encode the value. It is the
+ * responsibility of the caller to ensure that the provided number of
+ * digits is enough to write the given value. Notwithstanding the
+ * name, assertions are liberally performed, so this code is safe.
+ *
+ * @{ */
 
 template<class T>
 C4_HOT C4_ALWAYS_INLINE
@@ -10702,7 +12206,8 @@ void write_dec_unchecked(substr buf, T v, unsigned digits_v) noexcept
     // in bm_xtoa: checkoncelog_singlediv_write2
     while(v >= T(100))
     {
-        const T quo = v / T(100);
+        T quo = v;
+        quo /= T(100);
         const auto num = (v - quo * T(100)) << 1u;
         v = quo;
         buf.str[--digits_v] = detail::digits0099[num + 1];
@@ -10770,23 +12275,34 @@ void write_bin_unchecked(substr buf, T v, unsigned digits_v) noexcept
     C4_ASSERT(digits_v == 0);
 }
 
+/** @} */ // write_unchecked
+
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
+/** @defgroup doc_write Write a value
+ *
+ * Writes a value without checking the buffer length
+ * decimal number -- but asserting.
+ *
+ * @{ */
 
 /** write an integer to a string in decimal format. This is the
  * lowest level (and the fastest) function to do this task.
  * @note does not accept negative numbers
- *
- * @return the number of characters required for the string, if the
- * buffer is large enough to accomodate the largest number of this
- * type. Otherwise it returns the latter. This allows reporting the
- * size of a successful write, or the size needed for any number of
- * this type. */
+ * @note the resulting string is NOT zero-terminated.
+ * @note it is ok to call this with an empty or too-small buffer;
+ * no writes will occur, and the required size will be returned
+ * @return the number of characters required for the buffer. */
 template<class T>
 C4_ALWAYS_INLINE size_t write_dec(substr buf, T v) noexcept
 {
     C4_STATIC_ASSERT(std::is_integral<T>::value);
     C4_ASSERT(v >= 0);
     unsigned digits = digits_dec(v);
-    if(C4_LIKELY(buf.len >= digits)) // VS does not have likely, so put the happy branch first
+    if(C4_LIKELY(buf.len >= digits))
         write_dec_unchecked(buf, v, digits);
     return digits;
 }
@@ -10794,18 +12310,18 @@ C4_ALWAYS_INLINE size_t write_dec(substr buf, T v) noexcept
 /** write an integer to a string in hexadecimal format. This is the
  * lowest level (and the fastest) function to do this task.
  * @note does not accept negative numbers
- * @return the number of characters required for the string, if the
- * buffer is large enough to accomodate the largest number of this
- * type. Otherwise it returns the latter. This allows reporting the
- * size of a successful write, or the size needed for any number of
- * this type. */
+ * @note does not prefix with 0x
+ * @note the resulting string is NOT zero-terminated.
+ * @note it is ok to call this with an empty or too-small buffer;
+ * no writes will occur, and the required size will be returned
+ * @return the number of characters required for the buffer. */
 template<class T>
 C4_ALWAYS_INLINE size_t write_hex(substr buf, T v) noexcept
 {
     C4_STATIC_ASSERT(std::is_integral<T>::value);
     C4_ASSERT(v >= 0);
     unsigned digits = digits_hex(v);
-    if(C4_LIKELY(buf.len >= digits)) // VS does not have likely, so put the happy branch first
+    if(C4_LIKELY(buf.len >= digits))
         write_hex_unchecked(buf, v, digits);
     return digits;
 }
@@ -10814,18 +12330,17 @@ C4_ALWAYS_INLINE size_t write_hex(substr buf, T v) noexcept
  * lowest level (and the fastest) function to do this task.
  * @note does not accept negative numbers
  * @note does not prefix with 0o
- * @return the number of characters required for the string, if the
- * buffer is large enough to accomodate the largest number of this
- * type. Otherwise it returns the latter. This allows reporting the
- * size of a successful write, or the size needed for any number of
- * this type. */
+ * @note the resulting string is NOT zero-terminated.
+ * @note it is ok to call this with an empty or too-small buffer;
+ * no writes will occur, and the required size will be returned
+ * @return the number of characters required for the buffer. */
 template<class T>
 C4_ALWAYS_INLINE size_t write_oct(substr buf, T v) noexcept
 {
     C4_STATIC_ASSERT(std::is_integral<T>::value);
     C4_ASSERT(v >= 0);
     unsigned digits = digits_oct(v);
-    if(C4_LIKELY(buf.len >= digits)) // VS does not have likely, so put the happy branch first
+    if(C4_LIKELY(buf.len >= digits))
         write_oct_unchecked(buf, v, digits);
     return digits;
 }
@@ -10834,11 +12349,10 @@ C4_ALWAYS_INLINE size_t write_oct(substr buf, T v) noexcept
  * lowest level (and the fastest) function to do this task.
  * @note does not accept negative numbers
  * @note does not prefix with 0b
- * @return the number of characters required for the string, if the
- * buffer is large enough to accomodate the largest number of this
- * type. Otherwise it returns the latter. This allows reporting the
- * size of a successful write, or the size needed for any number of
- * this type. */
+ * @note the resulting string is NOT zero-terminated.
+ * @note it is ok to call this with an empty or too-small buffer;
+ * no writes will occur, and the required size will be returned
+ * @return the number of characters required for the buffer. */
 template<class T>
 C4_ALWAYS_INLINE size_t write_bin(substr buf, T v) noexcept
 {
@@ -10852,6 +12366,7 @@ C4_ALWAYS_INLINE size_t write_bin(substr buf, T v) noexcept
 }
 
 
+/** @cond dev */
 namespace detail {
 template<class U> using NumberWriter = size_t (*)(substr, U);
 template<class T, NumberWriter<T> writer>
@@ -10870,11 +12385,12 @@ size_t write_num_digits(substr buf, T v, size_t num_digits) noexcept
     return num_digits;
 }
 } // namespace detail
+/** @endcond */
 
 
 /** same as c4::write_dec(), but pad with zeroes on the left
  * such that the resulting string is @p num_digits wide.
- * If the given number is wider than num_digits, then the number prevails. */
+ * If the given number is requires more than num_digits, then the number prevails. */
 template<class T>
 C4_ALWAYS_INLINE size_t write_dec(substr buf, T val, size_t num_digits) noexcept
 {
@@ -10883,7 +12399,7 @@ C4_ALWAYS_INLINE size_t write_dec(substr buf, T val, size_t num_digits) noexcept
 
 /** same as c4::write_hex(), but pad with zeroes on the left
  * such that the resulting string is @p num_digits wide.
- * If the given number is wider than num_digits, then the number prevails. */
+ * If the given number is requires more than num_digits, then the number prevails. */
 template<class T>
 C4_ALWAYS_INLINE size_t write_hex(substr buf, T val, size_t num_digits) noexcept
 {
@@ -10892,7 +12408,7 @@ C4_ALWAYS_INLINE size_t write_hex(substr buf, T val, size_t num_digits) noexcept
 
 /** same as c4::write_bin(), but pad with zeroes on the left
  * such that the resulting string is @p num_digits wide.
- * If the given number is wider than num_digits, then the number prevails. */
+ * If the given number is requires more than num_digits, then the number prevails. */
 template<class T>
 C4_ALWAYS_INLINE size_t write_bin(substr buf, T val, size_t num_digits) noexcept
 {
@@ -10901,25 +12417,43 @@ C4_ALWAYS_INLINE size_t write_bin(substr buf, T val, size_t num_digits) noexcept
 
 /** same as c4::write_oct(), but pad with zeroes on the left
  * such that the resulting string is @p num_digits wide.
- * If the given number is wider than num_digits, then the number prevails. */
+ * If the given number is requires more than num_digits, then the number prevails. */
 template<class T>
 C4_ALWAYS_INLINE size_t write_oct(substr buf, T val, size_t num_digits) noexcept
 {
     return detail::write_num_digits<T, &write_oct<T>>(buf, val, num_digits);
 }
 
+/** @} */ // write
+
+C4_SUPPRESS_WARNING_GCC_POP
+
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
+
+C4_SUPPRESS_WARNING_MSVC_PUSH
+C4_SUPPRESS_WARNING_MSVC(4365) // '=': conversion from 'int' to 'I', signed/unsigned mismatch
+
+/** @defgroup doc_read Read a value
+ *
+ * @{ */
+
 /** read a decimal integer from a string. This is the
  * lowest level (and the fastest) function to do this task.
  * @note does not accept negative numbers
  * @note The string must be trimmed. Whitespace is not accepted.
  * @note the string must not be empty
- * @return true if the conversion was successful */
+ * @note there is no check for overflow; the value wraps around
+ * in a way similar to the standard C/C++ overflow behavior.
+ * For example, `read_dec<int8_t>("128", &val)` returns true
+ * and val will be set to 0 because 127 is the max i8 value.
+ * @see overflows<T>() to find out if a number string overflows a type range
+ * @return true if the conversion was successful (no overflow check) */
 template<class I>
+C4_NO_UBSAN_IOVRFLW
 C4_ALWAYS_INLINE bool read_dec(csubstr s, I *C4_RESTRICT v) noexcept
 {
     C4_STATIC_ASSERT(std::is_integral<I>::value);
@@ -10940,8 +12474,14 @@ C4_ALWAYS_INLINE bool read_dec(csubstr s, I *C4_RESTRICT v) noexcept
  * @note does not accept leading 0x or 0X
  * @note the string must not be empty
  * @note the string must be trimmed. Whitespace is not accepted.
- * @return true if the conversion was successful */
+ * @note there is no check for overflow; the value wraps around
+ * in a way similar to the standard C/C++ overflow behavior.
+ * For example, `read_hex<int8_t>("80", &val)` returns true
+ * and val will be set to 0 because 7f is the max i8 value.
+ * @see overflows<T>() to find out if a number string overflows a type range
+ * @return true if the conversion was successful (no overflow check) */
 template<class I>
+C4_NO_UBSAN_IOVRFLW
 C4_ALWAYS_INLINE bool read_hex(csubstr s, I *C4_RESTRICT v) noexcept
 {
     C4_STATIC_ASSERT(std::is_integral<I>::value);
@@ -10969,8 +12509,14 @@ C4_ALWAYS_INLINE bool read_hex(csubstr s, I *C4_RESTRICT v) noexcept
  * @note does not accept leading 0b or 0B
  * @note the string must not be empty
  * @note the string must be trimmed. Whitespace is not accepted.
- * @return true if the conversion was successful */
+ * @note there is no check for overflow; the value wraps around
+ * in a way similar to the standard C/C++ overflow behavior.
+ * For example, `read_bin<int8_t>("10000000", &val)` returns true
+ * and val will be set to 0 because 1111111 is the max i8 value.
+ * @see overflows<T>() to find out if a number string overflows a type range
+ * @return true if the conversion was successful (no overflow check) */
 template<class I>
+C4_NO_UBSAN_IOVRFLW
 C4_ALWAYS_INLINE bool read_bin(csubstr s, I *C4_RESTRICT v) noexcept
 {
     C4_STATIC_ASSERT(std::is_integral<I>::value);
@@ -10993,8 +12539,14 @@ C4_ALWAYS_INLINE bool read_bin(csubstr s, I *C4_RESTRICT v) noexcept
  * @note does not accept leading 0o or 0O
  * @note the string must not be empty
  * @note the string must be trimmed. Whitespace is not accepted.
- * @return true if the conversion was successful */
+ * @note there is no check for overflow; the value wraps around
+ * in a way similar to the standard C/C++ overflow behavior.
+ * For example, `read_oct<int8_t>("200", &val)` returns true
+ * and val will be set to 0 because 177 is the max i8 value.
+ * @see overflows<T>() to find out if a number string overflows a type range
+ * @return true if the conversion was successful (no overflow check) */
 template<class I>
+C4_NO_UBSAN_IOVRFLW
 C4_ALWAYS_INLINE bool read_oct(csubstr s, I *C4_RESTRICT v) noexcept
 {
     C4_STATIC_ASSERT(std::is_integral<I>::value);
@@ -11009,11 +12561,18 @@ C4_ALWAYS_INLINE bool read_oct(csubstr s, I *C4_RESTRICT v) noexcept
     return true;
 }
 
+/** @} */
+
+C4_SUPPRESS_WARNING_MSVC_POP
+
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
+C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wswitch-default")
+
+/** @cond dev */
 namespace detail {
 inline size_t _itoa2buf(substr buf, size_t pos, csubstr val) noexcept
 {
@@ -11102,7 +12661,7 @@ C4_NO_INLINE size_t _itoa2buf(substr buf, I radix, size_t num_digits) noexcept
         buf.str[pos++] = 'x';
         pos = _itoa2bufwithdigits(buf, pos, num_digits, digits_type::min_value_hex());
         break;
-    case I( 2):
+    case I(2):
         // add 3 to account for -0b
         needed_digits = num_digits+3 > digits_type::maxdigits_bin ? num_digits+3 : digits_type::maxdigits_bin;
         if(C4_UNLIKELY(buf.len < needed_digits))
@@ -11112,7 +12671,7 @@ C4_NO_INLINE size_t _itoa2buf(substr buf, I radix, size_t num_digits) noexcept
         buf.str[pos++] = 'b';
         pos = _itoa2bufwithdigits(buf, pos, num_digits, digits_type::min_value_bin());
         break;
-    case I( 8):
+    case I(8):
         // add 3 to account for -0o
         needed_digits = num_digits+3 > digits_type::maxdigits_oct ? num_digits+3 : digits_type::maxdigits_oct;
         if(C4_UNLIKELY(buf.len < needed_digits))
@@ -11126,16 +12685,18 @@ C4_NO_INLINE size_t _itoa2buf(substr buf, I radix, size_t num_digits) noexcept
     return pos;
 }
 } // namespace detail
+/** @endcond */
+
 
+/** @defgroup doc_itoa itoa: signed to chars
+ *
+ * @{ */
 
 /** convert an integral signed decimal to a string.
- * The resulting string is NOT zero-terminated.
- * Writing stops at the buffer's end.
- * @return the number of characters required for the string, if the
- * buffer is large enough to accomodate the largest number of this
- * type. Otherwise it returns the latter. This allows reporting the
- * size of a successful write, or the size needed for any number of
- * this type. */
+ * @note the resulting string is NOT zero-terminated.
+ * @note it is ok to call this with an empty or too-small buffer;
+ * no writes will occur, and the needed size will be returned
+ * @return the number of characters required for the buffer. */
 template<class T>
 C4_ALWAYS_INLINE size_t itoa(substr buf, T v) noexcept
 {
@@ -11164,13 +12725,10 @@ C4_ALWAYS_INLINE size_t itoa(substr buf, T v) noexcept
 /** convert an integral signed integer to a string, using a specific
  * radix. The radix must be 2, 8, 10 or 16.
  *
- * The resulting string is NOT zero-terminated.
- * Writing stops at the buffer's end.
- * @return the number of characters required for the string, if the
- * buffer is large enough to accomodate the largest number of this
- * type. Otherwise it returns the latter. This allows reporting the
- * size of a successful write, or the size needed for any number of
- * this type. */
+ * @note the resulting string is NOT zero-terminated.
+ * @note it is ok to call this with an empty or too-small buffer;
+ * no writes will occur, and the needed size will be returned
+ * @return the number of characters required for the buffer. */
 template<class T>
 C4_ALWAYS_INLINE size_t itoa(substr buf, T v, T radix) noexcept
 {
@@ -11241,16 +12799,13 @@ C4_ALWAYS_INLINE size_t itoa(substr buf, T v, T radix) noexcept
 
 
 /** same as c4::itoa(), but pad with zeroes on the left such that the
- * resulting string is @p num_digits wide, not account for radix
- * prefix (0x,0o,0b). The @p radix must be 2, 8, 10 or 16.  The
- * resulting string is NOT zero-terminated.  Writing stops at the
- * buffer's end.
+ * resulting string is @p num_digits wide, not accounting for radix
+ * prefix (0x,0o,0b). The @p radix must be 2, 8, 10 or 16.
  *
- * @return the number of characters required for the string, if the
- * buffer is large enough to accomodate the largest number of this
- * type. Otherwise it returns the latter. This allows reporting the
- * size of a successful write, or the size needed for any number of
- * this type. */
+ * @note the resulting string is NOT zero-terminated.
+ * @note it is ok to call this with an empty or too-small buffer;
+ * no writes will occur, and the needed size will be returned
+ * @return the number of characters required for the buffer. */
 template<class T>
 C4_ALWAYS_INLINE size_t itoa(substr buf, T v, T radix, size_t num_digits) noexcept
 {
@@ -11320,19 +12875,23 @@ C4_ALWAYS_INLINE size_t itoa(substr buf, T v, T radix, size_t num_digits) noexce
     return detail::_itoa2buf<T>(buf, radix, num_digits);
 }
 
+/** @} */
+
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
+/** @defgroup doc_utoa utoa: unsigned to chars
+ *
+ * @{ */
+
 /** convert an integral unsigned decimal to a string.
- * The resulting string is NOT zero-terminated.
- * Writing stops at the buffer's end.
- * @return the number of characters required for the string, if the
- * buffer is large enough to accomodate the largest number of this
- * type. Otherwise it returns the latter. This allows reporting the
- * size of a successful write, or the size needed for any number of
- * this type. */
+ *
+ * @note the resulting string is NOT zero-terminated.
+ * @note it is ok to call this with an empty or too-small buffer;
+ * no writes will occur, and the needed size will be returned
+ * @return the number of characters required for the buffer. */
 template<class T>
 C4_ALWAYS_INLINE size_t utoa(substr buf, T v) noexcept
 {
@@ -11341,14 +12900,13 @@ C4_ALWAYS_INLINE size_t utoa(substr buf, T v) noexcept
     return write_dec(buf, v);
 }
 
-/** convert an integral unsigned integer to a string, using a specific radix. The radix must be 2, 8, 10 or 16.
- * The resulting string is NOT zero-terminated.
- * Writing stops at the buffer's end.
- * @return the number of characters required for the string, if the
- * buffer is large enough to accomodate the largest number of this
- * type. Otherwise it returns the latter. This allows reporting the
- * size of a successful write, or the size needed for any number of
- * this type. */
+/** convert an integral unsigned integer to a string, using a specific
+ * radix. The radix must be 2, 8, 10 or 16.
+ *
+ * @note the resulting string is NOT zero-terminated.
+ * @note it is ok to call this with an empty or too-small buffer;
+ * no writes will occur, and the needed size will be returned
+ * @return the number of characters required for the buffer. */
 template<class T>
 C4_ALWAYS_INLINE size_t utoa(substr buf, T v, T radix) noexcept
 {
@@ -11398,11 +12956,12 @@ C4_ALWAYS_INLINE size_t utoa(substr buf, T v, T radix) noexcept
 
 /** same as c4::utoa(), but pad with zeroes on the left such that the
  * resulting string is @p num_digits wide. The @p radix must be 2,
- * 8, 10 or 16.  The resulting string is NOT zero-terminated. Writing
- * occurs only if the buffer is large enough to contain the largest
- * value of the type or @p num_digits if it is larger.
+ * 8, 10 or 16.
  *
- * @return the number of characters required for the string */
+ * @note the resulting string is NOT zero-terminated.
+ * @note it is ok to call this with an empty or too-small buffer;
+ * no writes will occur, and the needed size will be returned
+ * @return the number of characters required for the buffer. */
 template<class T>
 C4_ALWAYS_INLINE size_t utoa(substr buf, T v, T radix, size_t num_digits) noexcept
 {
@@ -11450,12 +13009,19 @@ C4_ALWAYS_INLINE size_t utoa(substr buf, T v, T radix, size_t num_digits) noexce
     }
     return total_digits;
 }
+C4_SUPPRESS_WARNING_GCC_POP
+
+/** @} */
 
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
+/** @defgroup doc_atoi atoi: chars to signed
+ *
+ * @{ */
+
 /** Convert a trimmed string to a signed integral value. The input
  * string can be formatted as decimal, binary (prefix 0b or 0B), octal
  * (prefix 0o or 0O) or hexadecimal (prefix 0x or 0X). Strings with
@@ -11466,13 +13032,18 @@ C4_ALWAYS_INLINE size_t utoa(substr buf, T v, T radix, size_t num_digits) noexce
  *
  * @return true if the conversion was successful.
  *
+ * @note a positive sign is not accepted. ie, the string must not
+ * start with '+'
+ *
  * @note overflow is not detected: the return status is true even if
  * the conversion would return a value outside of the type's range, in
- * which case the result will wrap around the type's range.
- * This is similar to native behavior.
+ * which case the result will wrap around the type's range.  This is
+ * similar to native behavior. See @ref doc_overflows and @ref
+ * doc_overflow_checked for overflow checking utilities.
  *
  * @see atoi_first() if the string is not trimmed to the value to read. */
 template<class T>
+C4_NO_UBSAN_IOVRFLW
 C4_ALWAYS_INLINE bool atoi(csubstr str, T * C4_RESTRICT v) noexcept
 {
     C4_STATIC_ASSERT(std::is_integral<T>::value);
@@ -11481,13 +13052,14 @@ C4_ALWAYS_INLINE bool atoi(csubstr str, T * C4_RESTRICT v) noexcept
     if(C4_UNLIKELY(str.len == 0))
         return false;
 
+    C4_ASSERT(str.str[0] != '+');
+
     T sign = 1;
     size_t start = 0;
     if(str.str[0] == '-')
     {
-        if(C4_UNLIKELY(str.len == 1))
+        if(C4_UNLIKELY(str.len == ++start))
             return false;
-        ++start;
         sign = -1;
     }
 
@@ -11536,8 +13108,16 @@ C4_ALWAYS_INLINE size_t atoi_first(csubstr str, T * C4_RESTRICT v)
     return csubstr::npos;
 }
 
+/** @} */
+
 
 //-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
+/** @defgroup doc_atou atou: chars to unsigned
+ *
+ * @{ */
 
 /** Convert a trimmed string to an unsigned integral value. The string can be
  * formatted as decimal, binary (prefix 0b or 0B), octal (prefix 0o or 0O)
@@ -11548,7 +13128,9 @@ C4_ALWAYS_INLINE size_t atoi_first(csubstr str, T * C4_RESTRICT v)
  *
  * @note overflow is not detected: the return status is true even if
  * the conversion would return a value outside of the type's range, in
- * which case the result will wrap around the type's range.
+ * which case the result will wrap around the type's range. See @ref
+ * doc_overflows and @ref doc_overflow_checked for overflow checking
+ * utilities.
  *
  * @note If the string has a minus character, the return status
  * will be false.
@@ -11608,6 +13190,8 @@ C4_ALWAYS_INLINE size_t atou_first(csubstr str, T *v)
 }
 
 
+/** @} */
+
 #ifdef _MSC_VER
 #   pragma warning(pop)
 #elif defined(__clang__)
@@ -11620,6 +13204,8 @@ C4_ALWAYS_INLINE size_t atou_first(csubstr str, T *v)
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
+
+/** @cond dev */
 namespace detail {
 inline bool check_overflow(csubstr str, csubstr limit) noexcept
 {
@@ -11638,15 +13224,22 @@ inline bool check_overflow(csubstr str, csubstr limit) noexcept
         return str.len > limit.len;
 }
 } // namespace detail
+/** @endcond */
+
 
+/** @defgroup doc_overflows overflows: does a number string overflow a type
+ *
+ * @{ */
 
-/** Test if the following string would overflow when converted to associated
- * types.
+/** Test if the following string would overflow when converted to
+ * associated integral types; this function is dispatched with SFINAE
+ * to handle differently signed and unsigned types.
  * @return true if number will overflow, false if it fits (or doesn't parse)
+ * @see doc_overflow_checked for format specifiers to enforce no-overflow reads
  */
 template<class T>
 auto overflows(csubstr str) noexcept
-    -> typename std::enable_if<std::is_unsigned<T>::value, bool>::type 
+    -> typename std::enable_if<std::is_unsigned<T>::value, bool>::type
 {
     C4_STATIC_ASSERT(std::is_integral<T>::value);
 
@@ -11704,13 +13297,16 @@ auto overflows(csubstr str) noexcept
 }
 
 
-/** Test if the following string would overflow when converted to associated
- * types.
+/** Test if the following string would overflow when converted to
+ * associated integral types; this function is dispatched with SFINAE
+ * to handle differently signed and unsigned types.
+ *
  * @return true if number will overflow, false if it fits (or doesn't parse)
+ * @see doc_overflow_checked for format specifiers to enforce no-overflow reads
  */
 template<class T>
 auto overflows(csubstr str)
-    -> typename std::enable_if<std::is_signed<T>::value, bool>::type 
+    -> typename std::enable_if<std::is_signed<T>::value, bool>::type
 {
     C4_STATIC_ASSERT(std::is_integral<T>::value);
     if(C4_UNLIKELY(str.len == 0))
@@ -11803,25 +13399,29 @@ auto overflows(csubstr str)
         return detail::check_overflow(str, detail::charconv_digits<T>::max_value_dec());
 }
 
+/** @} */
+
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
+/** @cond dev */
 namespace detail {
 
 
+#if (!C4CORE_HAVE_STD_FROMCHARS)
 /** @see http://www.exploringbinary.com/ for many good examples on float-str conversion */
 template<size_t N>
 void get_real_format_str(char (& C4_RESTRICT fmt)[N], int precision, RealFormat_e formatting, const char* length_modifier="")
 {
     int iret;
     if(precision == -1)
-        iret = snprintf(fmt, sizeof(fmt), "%%%s%c", length_modifier, to_c_fmt(formatting));
+        iret = snprintf(fmt, sizeof(fmt), "%%%s%c", length_modifier, formatting);
     else if(precision == 0)
-        iret = snprintf(fmt, sizeof(fmt), "%%.%s%c", length_modifier, to_c_fmt(formatting));
+        iret = snprintf(fmt, sizeof(fmt), "%%.%s%c", length_modifier, formatting);
     else
-        iret = snprintf(fmt, sizeof(fmt), "%%.%d%s%c", precision, length_modifier, to_c_fmt(formatting));
+        iret = snprintf(fmt, sizeof(fmt), "%%.%d%s%c", precision, length_modifier, formatting);
     C4_ASSERT(iret >= 2 && size_t(iret) < sizeof(fmt));
     C4_UNUSED(iret);
 }
@@ -11866,8 +13466,10 @@ size_t print_one(substr str, const char* full_fmt, T v)
     return ret;
 #endif
 }
+#endif // (!C4CORE_HAVE_STD_FROMCHARS)
+
 
-#if !C4CORE_HAVE_STD_FROMCHARS && !defined(C4CORE_HAVE_FAST_FLOAT)
+#if (!C4CORE_HAVE_STD_FROMCHARS) && (!C4CORE_HAVE_FAST_FLOAT)
 /** scans a string using the given type format, while at the same time
  * allowing non-null-terminated strings AND guaranteeing that the given
  * string length is strictly respected, so that no buffer overflows
@@ -11904,24 +13506,28 @@ inline size_t scan_one(csubstr str, const char *type_fmt, T *v)
     C4_ASSERT(num_chars >= 0);
     return (size_t)(num_chars);
 }
-#endif
+#endif // (!C4CORE_HAVE_STD_FROMCHARS) && (!C4CORE_HAVE_FAST_FLOAT)
 
 
 #if C4CORE_HAVE_STD_TOCHARS
 template<class T>
-size_t rtoa(substr buf, T v, int precision=-1, RealFormat_e formatting=FTOA_FLEX)
+C4_ALWAYS_INLINE size_t rtoa(substr buf, T v, int precision=-1, RealFormat_e formatting=FTOA_FLEX) noexcept
 {
     std::to_chars_result result;
     size_t pos = 0;
     if(formatting == FTOA_HEXA)
     {
-        _c4append('0');
-        _c4append('x');
+        if(buf.len > size_t(2))
+        {
+            buf.str[0] = '0';
+            buf.str[1] = 'x';
+        }
+        pos += size_t(2);
     }
     if(precision == -1)
-        result = std::to_chars(buf.str + pos, buf.str + buf.len, v, to_std_fmt(formatting));
+        result = std::to_chars(buf.str + pos, buf.str + buf.len, v, (std::chars_format)formatting);
     else
-        result = std::to_chars(buf.str + pos, buf.str + buf.len, v, to_std_fmt(formatting), precision);
+        result = std::to_chars(buf.str + pos, buf.str + buf.len, v, (std::chars_format)formatting, precision);
     if(result.ec == std::errc())
     {
         // all good, no errors.
@@ -11945,18 +13551,106 @@ size_t rtoa(substr buf, T v, int precision=-1, RealFormat_e formatting=FTOA_FLEX
 }
 #endif // C4CORE_HAVE_STD_TOCHARS
 
+
+#if C4CORE_HAVE_FAST_FLOAT
+template<class T>
+C4_ALWAYS_INLINE bool scan_rhex(csubstr s, T *C4_RESTRICT val) noexcept
+{
+    C4_ASSERT(s.len > 0);
+    C4_ASSERT(s.str[0] != '-');
+    C4_ASSERT(s.str[0] != '+');
+    C4_ASSERT(!s.begins_with("0x"));
+    C4_ASSERT(!s.begins_with("0X"));
+    size_t pos = 0;
+    // integer part
+    for( ; pos < s.len; ++pos)
+    {
+        const char c = s.str[pos];
+        if(c >= '0' && c <= '9')
+            *val = *val * T(16) + T(c - '0');
+        else if(c >= 'a' && c <= 'f')
+            *val = *val * T(16) + T(c - 'a');
+        else if(c >= 'A' && c <= 'F')
+            *val = *val * T(16) + T(c - 'A');
+        else if(c == '.')
+        {
+            ++pos;
+            break; // follow on to mantissa
+        }
+        else if(c == 'p' || c == 'P')
+        {
+            ++pos;
+            goto power; // no mantissa given, jump to power
+        }
+        else
+        {
+            return false;
+        }
+    }
+    // mantissa
+    {
+        // 0.0625 == 1/16 == value of first digit after the comma
+        for(T digit = T(0.0625); pos < s.len; ++pos, digit /= T(16))
+        {
+            const char c = s.str[pos];
+            if(c >= '0' && c <= '9')
+                *val += digit * T(c - '0');
+            else if(c >= 'a' && c <= 'f')
+                *val += digit * T(c - 'a');
+            else if(c >= 'A' && c <= 'F')
+                *val += digit * T(c - 'A');
+            else if(c == 'p' || c == 'P')
+            {
+                ++pos;
+                goto power; // mantissa finished, jump to power
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
+    return true;
+power:
+    if(C4_LIKELY(pos < s.len))
+    {
+        if(s.str[pos] == '+') // atoi() cannot handle a leading '+'
+            ++pos;
+        if(C4_LIKELY(pos < s.len))
+        {
+            int16_t powval = {};
+            if(C4_LIKELY(atoi(s.sub(pos), &powval)))
+            {
+                *val *= ipow<T, int16_t, 16>(powval);
+                return true;
+            }
+        }
+    }
+    return false;
+}
+#endif
+
 } // namespace detail
+/** @endcond */
 
 
 #undef _c4appendhex
 #undef _c4append
 
 
-/** Convert a single-precision real number to string.
- * The string will in general be NOT null-terminated.
- * For FTOA_FLEX, \p precision is the number of significand digits. Otherwise
- * \p precision is the number of decimals. */
-inline size_t ftoa(substr str, float v, int precision=-1, RealFormat_e formatting=FTOA_FLEX)
+/** @defgroup doc_ftoa ftoa: float32 to chars
+ *
+ * @{ */
+
+/** Convert a single-precision real number to string.  The string will
+ * in general be NOT null-terminated.  For FTOA_FLEX, \p precision is
+ * the number of significand digits. Otherwise \p precision is the
+ * number of decimals. It is safe to call this function with an empty
+ * or too-small buffer.
+ *
+ * @return the size of the buffer needed to write the number
+ */
+C4_ALWAYS_INLINE size_t ftoa(substr str, float v, int precision=-1, RealFormat_e formatting=FTOA_FLEX) noexcept
 {
 #if C4CORE_HAVE_STD_TOCHARS
     return detail::rtoa(str, v, precision, formatting);
@@ -11967,15 +13661,22 @@ inline size_t ftoa(substr str, float v, int precision=-1, RealFormat_e formattin
 #endif
 }
 
+/** @} */
+
+
+/** @defgroup doc_dtoa dtoa: float64 to chars
+ *
+ * @{ */
 
-/** Convert a double-precision real number to string.
- * The string will in general be NOT null-terminated.
- * For FTOA_FLEX, \p precision is the number of significand digits. Otherwise
- * \p precision is the number of decimals.
+/** Convert a double-precision real number to string.  The string will
+ * in general be NOT null-terminated.  For FTOA_FLEX, \p precision is
+ * the number of significand digits. Otherwise \p precision is the
+ * number of decimals. It is safe to call this function with an empty
+ * or too-small buffer.
  *
- * @return the number of characters written.
+ * @return the size of the buffer needed to write the number
  */
-inline size_t dtoa(substr str, double v, int precision=-1, RealFormat_e formatting=FTOA_FLEX)
+C4_ALWAYS_INLINE size_t dtoa(substr str, double v, int precision=-1, RealFormat_e formatting=FTOA_FLEX) noexcept
 {
 #if C4CORE_HAVE_STD_TOCHARS
     return detail::rtoa(str, v, precision, formatting);
@@ -11986,6 +13687,12 @@ inline size_t dtoa(substr str, double v, int precision=-1, RealFormat_e formatti
 #endif
 }
 
+/** @} */
+
+
+/** @defgroup doc_atof atof: chars to float32
+ *
+ * @{ */
 
 /** Convert a string to a single precision real number.
  * The input string must be trimmed to the value, ie
@@ -11993,63 +13700,101 @@ inline size_t dtoa(substr str, double v, int precision=-1, RealFormat_e formatti
  * @return true iff the conversion succeeded
  * @see atof_first() if the string is not trimmed
  */
-inline bool atof(csubstr str, float * C4_RESTRICT v) noexcept
+C4_ALWAYS_INLINE bool atof(csubstr str, float * C4_RESTRICT v) noexcept
 {
+    C4_ASSERT(str.len > 0);
     C4_ASSERT(str.triml(" \r\t\n").len == str.len);
 #if C4CORE_HAVE_FAST_FLOAT
-    fast_float::from_chars_result result;
-    result = fast_float::from_chars(str.str, str.str + str.len, *v);
-    return result.ec == std::errc();
+    // fastfloat cannot parse hexadecimal floats
+    bool isneg = (str.str[0] == '-');
+    csubstr rem = str.sub(isneg || str.str[0] == '+');
+    if(!(rem.len >= 2 && (rem.str[0] == '0' && (rem.str[1] == 'x' || rem.str[1] == 'X'))))
+    {
+        fast_float::from_chars_result result;
+        result = fast_float::from_chars(str.str, str.str + str.len, *v);
+        return result.ec == std::errc();
+    }
+    else if(detail::scan_rhex(rem.sub(2), v))
+    {
+        *v *= isneg ? -1.f : 1.f;
+        return true;
+    }
+    return false;
 #elif C4CORE_HAVE_STD_FROMCHARS
     std::from_chars_result result;
     result = std::from_chars(str.str, str.str + str.len, *v);
     return result.ec == std::errc();
 #else
-    size_t ret = detail::scan_one(str, "f", v);
-    return ret != csubstr::npos;
+    csubstr rem = str.sub(str.str[0] == '-' || str.str[0] == '+');
+    if(!(rem.len >= 2 && (rem.str[0] == '0' && (rem.str[1] == 'x' || rem.str[1] == 'X'))))
+        return detail::scan_one(str, "f", v) != csubstr::npos;
+    else
+        return detail::scan_one(str, "a", v) != csubstr::npos;
 #endif
 }
 
 
+/** Convert a string to a single precision real number.
+ * Leading whitespace is skipped until valid characters are found.
+ * @return the number of characters read from the string, or npos if
+ * conversion was not successful or if the string was empty */
+inline size_t atof_first(csubstr str, float * C4_RESTRICT v) noexcept
+{
+    csubstr trimmed = str.first_real_span();
+    if(trimmed.len == 0)
+        return csubstr::npos;
+    if(atof(trimmed, v))
+        return static_cast<size_t>(trimmed.end() - str.begin());
+    return csubstr::npos;
+}
+
+/** @} */
+
+
+/** @defgroup doc_atod atod: chars to float64
+ *
+ * @{ */
+
 /** Convert a string to a double precision real number.
  * The input string must be trimmed to the value, ie
  * no leading or trailing whitespace can be present.
  * @return true iff the conversion succeeded
  * @see atod_first() if the string is not trimmed
  */
-inline bool atod(csubstr str, double * C4_RESTRICT v) noexcept
+C4_ALWAYS_INLINE bool atod(csubstr str, double * C4_RESTRICT v) noexcept
 {
+    C4_ASSERT(str.len > 0);
     C4_ASSERT(str.triml(" \r\t\n").len == str.len);
 #if C4CORE_HAVE_FAST_FLOAT
-    fast_float::from_chars_result result;
-    result = fast_float::from_chars(str.str, str.str + str.len, *v);
-    return result.ec == std::errc();
+    // fastfloat cannot parse hexadecimal floats
+    bool isneg = (str.str[0] == '-');
+    csubstr rem = str.sub(isneg || str.str[0] == '+');
+    if(!(rem.len >= 2 && (rem.str[0] == '0' && (rem.str[1] == 'x' || rem.str[1] == 'X'))))
+    {
+        fast_float::from_chars_result result;
+        result = fast_float::from_chars(str.str, str.str + str.len, *v);
+        return result.ec == std::errc();
+    }
+    else if(detail::scan_rhex(rem.sub(2), v))
+    {
+        *v *= isneg ? -1. : 1.;
+        return true;
+    }
+    return false;
 #elif C4CORE_HAVE_STD_FROMCHARS
     std::from_chars_result result;
     result = std::from_chars(str.str, str.str + str.len, *v);
     return result.ec == std::errc();
 #else
-    size_t ret = detail::scan_one(str, "lf", v);
-    return ret != csubstr::npos;
+    csubstr rem = str.sub(str.str[0] == '-' || str.str[0] == '+');
+    if(!(rem.len >= 2 && (rem.str[0] == '0' && (rem.str[1] == 'x' || rem.str[1] == 'X'))))
+        return detail::scan_one(str, "lf", v) != csubstr::npos;
+    else
+        return detail::scan_one(str, "la", v) != csubstr::npos;
 #endif
 }
 
 
-/** Convert a string to a single precision real number.
- * Leading whitespace is skipped until valid characters are found.
- * @return the number of characters read from the string, or npos if
- * conversion was not successful or if the string was empty */
-inline size_t atof_first(csubstr str, float * C4_RESTRICT v) noexcept
-{
-    csubstr trimmed = str.first_real_span();
-    if(trimmed.len == 0)
-        return csubstr::npos;
-    if(atof(trimmed, v))
-        return static_cast<size_t>(trimmed.end() - str.begin());
-    return csubstr::npos;
-}
-
-
 /** Convert a string to a double precision real number.
  * Leading whitespace is skipped until valid characters are found.
  * @return the number of characters read from the string, or npos if
@@ -12064,12 +13809,28 @@ inline size_t atod_first(csubstr str, double * C4_RESTRICT v) noexcept
     return csubstr::npos;
 }
 
+/** @} */
+
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 // generic versions
 
+/** @cond dev */
+// on some platforms, (unsigned) int and (unsigned) long
+// are not any of the fixed length types above
+#define _C4_IF_NOT_FIXED_LENGTH_I(T, ty) C4_ALWAYS_INLINE typename std::enable_if<std::  is_signed<T>::value && !is_fixed_length<T>::value_i, ty>
+#define _C4_IF_NOT_FIXED_LENGTH_U(T, ty) C4_ALWAYS_INLINE typename std::enable_if<std::is_unsigned<T>::value && !is_fixed_length<T>::value_u, ty>
+/** @endcond*/
+
+
+/** @defgroup doc_xtoa xtoa: generic value to chars
+ *
+ * Dispatches to the most appropriate and efficient conversion
+ * function
+ *
+ * @{ */
 C4_ALWAYS_INLINE size_t xtoa(substr s,  uint8_t v) noexcept { return write_dec(s, v); }
 C4_ALWAYS_INLINE size_t xtoa(substr s, uint16_t v) noexcept { return write_dec(s, v); }
 C4_ALWAYS_INLINE size_t xtoa(substr s, uint32_t v) noexcept { return write_dec(s, v); }
@@ -12099,6 +13860,23 @@ C4_ALWAYS_INLINE size_t xtoa(substr s,  int16_t v,  int16_t radix, size_t num_di
 C4_ALWAYS_INLINE size_t xtoa(substr s,  int32_t v,  int32_t radix, size_t num_digits) noexcept { return itoa(s, v, radix, num_digits); }
 C4_ALWAYS_INLINE size_t xtoa(substr s,  int64_t v,  int64_t radix, size_t num_digits) noexcept { return itoa(s, v, radix, num_digits); }
 
+C4_ALWAYS_INLINE size_t xtoa(substr s,  float v, int precision, RealFormat_e formatting=FTOA_FLEX) noexcept { return ftoa(s, v, precision, formatting); }
+C4_ALWAYS_INLINE size_t xtoa(substr s, double v, int precision, RealFormat_e formatting=FTOA_FLEX) noexcept { return dtoa(s, v, precision, formatting); }
+
+template <class T> _C4_IF_NOT_FIXED_LENGTH_I(T, size_t)::type xtoa(substr buf, T v) noexcept { return itoa(buf, v); }
+template <class T> _C4_IF_NOT_FIXED_LENGTH_U(T, size_t)::type xtoa(substr buf, T v) noexcept { return write_dec(buf, v); }
+template <class T>
+C4_ALWAYS_INLINE size_t xtoa(substr s, T *v) noexcept { return itoa(s, (intptr_t)v, (intptr_t)16); }
+
+/** @} */
+
+/** @defgroup doc_atox atox: generic chars to value
+ *
+ * Dispatches to the most appropriate and efficient conversion
+ * function
+ *
+ * @{ */
+
 C4_ALWAYS_INLINE bool atox(csubstr s,  uint8_t *C4_RESTRICT v) noexcept { return atou(s, v); }
 C4_ALWAYS_INLINE bool atox(csubstr s, uint16_t *C4_RESTRICT v) noexcept { return atou(s, v); }
 C4_ALWAYS_INLINE bool atox(csubstr s, uint32_t *C4_RESTRICT v) noexcept { return atou(s, v); }
@@ -12110,6 +13888,35 @@ C4_ALWAYS_INLINE bool atox(csubstr s,  int64_t *C4_RESTRICT v) noexcept { return
 C4_ALWAYS_INLINE bool atox(csubstr s,    float *C4_RESTRICT v) noexcept { return atof(s, v); }
 C4_ALWAYS_INLINE bool atox(csubstr s,   double *C4_RESTRICT v) noexcept { return atod(s, v); }
 
+template <class T> _C4_IF_NOT_FIXED_LENGTH_I(T, bool  )::type atox(csubstr buf, T *C4_RESTRICT v) noexcept { return atoi(buf, v); }
+template <class T> _C4_IF_NOT_FIXED_LENGTH_U(T, bool  )::type atox(csubstr buf, T *C4_RESTRICT v) noexcept { return atou(buf, v); }
+template <class T>
+C4_ALWAYS_INLINE bool atox(csubstr s, T **v) noexcept { intptr_t tmp; bool ret = atox(s, &tmp); if(ret) { *v = (T*)tmp; } return ret; }
+
+/** @} */
+
+
+/** @defgroup doc_to_chars to_chars: generalized chars to value
+ *
+ * Convert the given value, writing into the string.  The resulting
+ * string will NOT be null-terminated.  Return the number of
+ * characters needed.  This function is safe to call when the string
+ * is too small - no writes will occur beyond the string's last
+ * character.
+ *
+ * Dispatches to the most appropriate and efficient conversion
+ * function.
+ *
+ * @see write_dec, doc_utoa, doc_itoa, doc_ftoa, doc_dtoa
+ *
+ * @warning When serializing floating point values (float or double),
+ * be aware that because it uses defaults, to_chars() may cause a
+ * truncation of the precision. To enforce a particular precision, use
+ * for example @ref c4::fmt::real, or call directly @ref c4::ftoa or
+ * @ref c4::dtoa.
+ *
+ * @{ */
+
 C4_ALWAYS_INLINE size_t to_chars(substr buf,  uint8_t v) noexcept { return write_dec(buf, v); }
 C4_ALWAYS_INLINE size_t to_chars(substr buf, uint16_t v) noexcept { return write_dec(buf, v); }
 C4_ALWAYS_INLINE size_t to_chars(substr buf, uint32_t v) noexcept { return write_dec(buf, v); }
@@ -12121,6 +13928,30 @@ C4_ALWAYS_INLINE size_t to_chars(substr buf,  int64_t v) noexcept { return itoa(
 C4_ALWAYS_INLINE size_t to_chars(substr buf,    float v) noexcept { return ftoa(buf, v); }
 C4_ALWAYS_INLINE size_t to_chars(substr buf,   double v) noexcept { return dtoa(buf, v); }
 
+template <class T> _C4_IF_NOT_FIXED_LENGTH_I(T, size_t)::type to_chars(substr buf, T v) noexcept { return itoa(buf, v); }
+template <class T> _C4_IF_NOT_FIXED_LENGTH_U(T, size_t)::type to_chars(substr buf, T v) noexcept { return write_dec(buf, v); }
+template <class T>
+C4_ALWAYS_INLINE size_t to_chars(substr s, T *v) noexcept { return itoa(s, (intptr_t)v, (intptr_t)16); }
+
+/** @} */
+
+
+/** @defgroup doc_from_chars from_chars: generalized chars to value
+ *
+ * Read a value from the string, which must be trimmed to the value
+ * (ie, no leading/trailing whitespace).  return true if the
+ * conversion succeeded.  There is no check for overflow; the value
+ * wraps around in a way similar to the standard C/C++ overflow
+ * behavior. For example, from_chars<int8_t>("128", &val) returns true
+ * and val will be set tot 0. See @ref doc_overflows and @ref
+ * doc_overflow_checked for facilities enforcing no-overflow.
+ *
+ * Dispatches to the most appropriate and efficient conversion
+ * function
+ *
+ * @see doc_from_chars_first, atou, atoi, atof, atod
+ * @{ */
+
 C4_ALWAYS_INLINE bool from_chars(csubstr buf,  uint8_t *C4_RESTRICT v) noexcept { return atou(buf, v); }
 C4_ALWAYS_INLINE bool from_chars(csubstr buf, uint16_t *C4_RESTRICT v) noexcept { return atou(buf, v); }
 C4_ALWAYS_INLINE bool from_chars(csubstr buf, uint32_t *C4_RESTRICT v) noexcept { return atou(buf, v); }
@@ -12132,6 +13963,23 @@ C4_ALWAYS_INLINE bool from_chars(csubstr buf,  int64_t *C4_RESTRICT v) noexcept
 C4_ALWAYS_INLINE bool from_chars(csubstr buf,    float *C4_RESTRICT v) noexcept { return atof(buf, v); }
 C4_ALWAYS_INLINE bool from_chars(csubstr buf,   double *C4_RESTRICT v) noexcept { return atod(buf, v); }
 
+template <class T> _C4_IF_NOT_FIXED_LENGTH_I(T, bool  )::type from_chars(csubstr buf, T *C4_RESTRICT v) noexcept { return atoi(buf, v); }
+template <class T> _C4_IF_NOT_FIXED_LENGTH_U(T, bool  )::type from_chars(csubstr buf, T *C4_RESTRICT v) noexcept { return atou(buf, v); }
+template <class T>
+C4_ALWAYS_INLINE bool from_chars(csubstr buf, T **v) noexcept { intptr_t tmp; bool ret = from_chars(buf, &tmp); if(ret) { *v = (T*)tmp; } return ret; }
+
+/** @defgroup doc_from_chars_first from_chars_first: generalized chars to value
+ *
+ * Read the first valid sequence of characters from the string,
+ * skipping leading whitespace, and convert it using @ref doc_from_chars .
+ * Return the number of characters read for converting.
+ *
+ * Dispatches to the most appropriate and efficient conversion
+ * function.
+ *
+ * @see atou_first, atoi_first, atof_first, atod_first
+ * @{ */
+
 C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf,  uint8_t *C4_RESTRICT v) noexcept { return atou_first(buf, v); }
 C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, uint16_t *C4_RESTRICT v) noexcept { return atou_first(buf, v); }
 C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, uint32_t *C4_RESTRICT v) noexcept { return atou_first(buf, v); }
@@ -12143,41 +13991,17 @@ C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf,  int64_t *C4_RESTRICT v) n
 C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf,    float *C4_RESTRICT v) noexcept { return atof_first(buf, v); }
 C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf,   double *C4_RESTRICT v) noexcept { return atod_first(buf, v); }
 
-
-//-----------------------------------------------------------------------------
-// on some platforms, (unsigned) int and (unsigned) long
-// are not any of the fixed length types above
-
-#define _C4_IF_NOT_FIXED_LENGTH_I(T, ty) C4_ALWAYS_INLINE typename std::enable_if<std::  is_signed<T>::value && !is_fixed_length<T>::value_i, ty>
-#define _C4_IF_NOT_FIXED_LENGTH_U(T, ty) C4_ALWAYS_INLINE typename std::enable_if<std::is_unsigned<T>::value && !is_fixed_length<T>::value_u, ty>
-
-template <class T> _C4_IF_NOT_FIXED_LENGTH_I(T, size_t)::type xtoa(substr buf, T v) noexcept { return itoa(buf, v); }
-template <class T> _C4_IF_NOT_FIXED_LENGTH_U(T, size_t)::type xtoa(substr buf, T v) noexcept { return write_dec(buf, v); }
-
-template <class T> _C4_IF_NOT_FIXED_LENGTH_I(T, bool  )::type atox(csubstr buf, T *C4_RESTRICT v) noexcept { return atoi(buf, v); }
-template <class T> _C4_IF_NOT_FIXED_LENGTH_U(T, bool  )::type atox(csubstr buf, T *C4_RESTRICT v) noexcept { return atou(buf, v); }
-
-template <class T> _C4_IF_NOT_FIXED_LENGTH_I(T, size_t)::type to_chars(substr buf, T v) noexcept { return itoa(buf, v); }
-template <class T> _C4_IF_NOT_FIXED_LENGTH_U(T, size_t)::type to_chars(substr buf, T v) noexcept { return write_dec(buf, v); }
-
-template <class T> _C4_IF_NOT_FIXED_LENGTH_I(T, bool  )::type from_chars(csubstr buf, T *C4_RESTRICT v) noexcept { return atoi(buf, v); }
-template <class T> _C4_IF_NOT_FIXED_LENGTH_U(T, bool  )::type from_chars(csubstr buf, T *C4_RESTRICT v) noexcept { return atou(buf, v); }
-
 template <class T> _C4_IF_NOT_FIXED_LENGTH_I(T, size_t)::type from_chars_first(csubstr buf, T *C4_RESTRICT v) noexcept { return atoi_first(buf, v); }
 template <class T> _C4_IF_NOT_FIXED_LENGTH_U(T, size_t)::type from_chars_first(csubstr buf, T *C4_RESTRICT v) noexcept { return atou_first(buf, v); }
+template <class T>
+C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, T **v) noexcept { intptr_t tmp; bool ret = from_chars_first(buf, &tmp); if(ret) { *v = (T*)tmp; } return ret; }
 
-#undef _C4_IF_NOT_FIXED_LENGTH_I
-#undef _C4_IF_NOT_FIXED_LENGTH_U
-
+/** @} */
 
-//-----------------------------------------------------------------------------
-// for pointers
+/** @} */
 
-template <class T> C4_ALWAYS_INLINE size_t xtoa(substr s, T *v) noexcept { return itoa(s, (intptr_t)v, (intptr_t)16); }
-template <class T> C4_ALWAYS_INLINE bool   atox(csubstr s, T **v) noexcept { intptr_t tmp; bool ret = atox(s, &tmp); if(ret) { *v = (T*)tmp; } return ret; }
-template <class T> C4_ALWAYS_INLINE size_t to_chars(substr s, T *v) noexcept { return itoa(s, (intptr_t)v, (intptr_t)16); }
-template <class T> C4_ALWAYS_INLINE bool   from_chars(csubstr buf, T **v) noexcept { intptr_t tmp; bool ret = from_chars(buf, &tmp); if(ret) { *v = (T*)tmp; } return ret; }
-template <class T> C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, T **v) noexcept { intptr_t tmp; bool ret = from_chars_first(buf, &tmp); if(ret) { *v = (T*)tmp; } return ret; }
+#undef _C4_IF_NOT_FIXED_LENGTH_I
+#undef _C4_IF_NOT_FIXED_LENGTH_U
 
 
 //-----------------------------------------------------------------------------
@@ -12186,7 +14010,10 @@ template <class T> C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, T **v)
 /** call to_chars() and return a substr consisting of the
  * written portion of the input buffer. Ie, same as to_chars(),
  * but return a substr instead of a size_t.
- *
+ * Convert the given value to a string using to_chars(), and
+ * return the resulting string, up to and including the last
+ * written character.
+ * @ingroup doc_to_chars
  * @see to_chars() */
 template<class T>
 C4_ALWAYS_INLINE substr to_chars_sub(substr buf, T const& C4_RESTRICT v) noexcept
@@ -12200,12 +14027,14 @@ C4_ALWAYS_INLINE substr to_chars_sub(substr buf, T const& C4_RESTRICT v) noexcep
 //-----------------------------------------------------------------------------
 // bool implementation
 
+/** @ingroup doc_to_chars */
 C4_ALWAYS_INLINE size_t to_chars(substr buf, bool v) noexcept
 {
     int val = v;
     return to_chars(buf, val);
 }
 
+/** @ingroup doc_from_chars */
 inline bool from_chars(csubstr buf, bool * C4_RESTRICT v) noexcept
 {
     if(buf == '0')
@@ -12250,6 +14079,7 @@ inline bool from_chars(csubstr buf, bool * C4_RESTRICT v) noexcept
     return ret;
 }
 
+/** @ingroup doc_from_chars_first */
 inline size_t from_chars_first(csubstr buf, bool * C4_RESTRICT v) noexcept
 {
     csubstr trimmed = buf.first_non_empty_span();
@@ -12262,28 +14092,36 @@ inline size_t from_chars_first(csubstr buf, bool * C4_RESTRICT v) noexcept
 //-----------------------------------------------------------------------------
 // single-char implementation
 
+/** @ingroup doc_to_chars */
 inline size_t to_chars(substr buf, char v) noexcept
 {
     if(buf.len > 0)
-        buf[0] = v;
+    {
+        C4_XASSERT(buf.str);
+        buf.str[0] = v;
+    }
     return 1;
 }
 
 /** extract a single character from a substring
- * @note to extract a string instead and not just a single character, use the csubstr overload */
+ * @note to extract a string instead and not just a single character, use the csubstr overload
+ * @ingroup doc_from_chars
+ * */
 inline bool from_chars(csubstr buf, char * C4_RESTRICT v) noexcept
 {
     if(buf.len != 1)
         return false;
-    *v = buf[0];
+    C4_XASSERT(buf.str);
+    *v = buf.str[0];
     return true;
 }
 
+/** @ingroup doc_from_chars_first */
 inline size_t from_chars_first(csubstr buf, char * C4_RESTRICT v) noexcept
 {
     if(buf.len < 1)
         return csubstr::npos;
-    *v = buf[0];
+    *v = buf.str[0];
     return 1;
 }
 
@@ -12291,20 +14129,31 @@ inline size_t from_chars_first(csubstr buf, char * C4_RESTRICT v) noexcept
 //-----------------------------------------------------------------------------
 // csubstr implementation
 
+/** @ingroup doc_to_chars */
 inline size_t to_chars(substr buf, csubstr v) noexcept
 {
     C4_ASSERT(!buf.overlaps(v));
     size_t len = buf.len < v.len ? buf.len : v.len;
-    memcpy(buf.str, v.str, len);
+    // calling memcpy with null strings is undefined behavior
+    // and will wreak havoc in calling code's branches.
+    // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
+    if(len)
+    {
+        C4_ASSERT(buf.str != nullptr);
+        C4_ASSERT(v.str != nullptr);
+        memcpy(buf.str, v.str, len);
+    }
     return v.len;
 }
 
+/** @ingroup doc_from_chars */
 inline bool from_chars(csubstr buf, csubstr *C4_RESTRICT v) noexcept
 {
     *v = buf;
     return true;
 }
 
+/** @ingroup doc_from_chars_first */
 inline size_t from_chars_first(substr buf, csubstr * C4_RESTRICT v) noexcept
 {
     csubstr trimmed = buf.first_non_empty_span();
@@ -12318,27 +14167,46 @@ inline size_t from_chars_first(substr buf, csubstr * C4_RESTRICT v) noexcept
 //-----------------------------------------------------------------------------
 // substr
 
+/** @ingroup doc_to_chars */
 inline size_t to_chars(substr buf, substr v) noexcept
 {
     C4_ASSERT(!buf.overlaps(v));
     size_t len = buf.len < v.len ? buf.len : v.len;
-    memcpy(buf.str, v.str, len);
+    // calling memcpy with null strings is undefined behavior
+    // and will wreak havoc in calling code's branches.
+    // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
+    if(len)
+    {
+        C4_ASSERT(buf.str != nullptr);
+        C4_ASSERT(v.str != nullptr);
+        memcpy(buf.str, v.str, len);
+    }
     return v.len;
 }
 
+/** @ingroup doc_from_chars */
 inline bool from_chars(csubstr buf, substr * C4_RESTRICT v) noexcept
 {
     C4_ASSERT(!buf.overlaps(*v));
-    if(buf.len <= v->len)
+    // is the destination buffer wide enough?
+    if(v->len >= buf.len)
     {
-        memcpy(v->str, buf.str, buf.len);
+        // calling memcpy with null strings is undefined behavior
+        // and will wreak havoc in calling code's branches.
+        // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
+        if(buf.len)
+        {
+            C4_ASSERT(buf.str != nullptr);
+            C4_ASSERT(v->str != nullptr);
+            memcpy(v->str, buf.str, buf.len);
+        }
         v->len = buf.len;
         return true;
     }
-    memcpy(v->str, buf.str, v->len);
     return false;
 }
 
+/** @ingroup doc_from_chars_first */
 inline size_t from_chars_first(csubstr buf, substr * C4_RESTRICT v) noexcept
 {
     csubstr trimmed = buf.first_non_empty_span();
@@ -12346,7 +14214,15 @@ inline size_t from_chars_first(csubstr buf, substr * C4_RESTRICT v) noexcept
     if(C4_UNLIKELY(trimmed.len == 0))
         return csubstr::npos;
     size_t len = trimmed.len > v->len ? v->len : trimmed.len;
-    memcpy(v->str, trimmed.str, len);
+    // calling memcpy with null strings is undefined behavior
+    // and will wreak havoc in calling code's branches.
+    // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
+    if(len)
+    {
+        C4_ASSERT(buf.str != nullptr);
+        C4_ASSERT(v->str != nullptr);
+        memcpy(v->str, trimmed.str, len);
+    }
     if(C4_UNLIKELY(trimmed.len > v->len))
         return csubstr::npos;
     return static_cast<size_t>(trimmed.end() - buf.begin());
@@ -12355,6 +14231,7 @@ inline size_t from_chars_first(csubstr buf, substr * C4_RESTRICT v) noexcept
 
 //-----------------------------------------------------------------------------
 
+/** @ingroup doc_to_chars */
 template<size_t N>
 inline size_t to_chars(substr buf, const char (& C4_RESTRICT v)[N]) noexcept
 {
@@ -12362,16 +14239,21 @@ inline size_t to_chars(substr buf, const char (& C4_RESTRICT v)[N]) noexcept
     return to_chars(buf, sp);
 }
 
+/** @ingroup doc_to_chars */
 inline size_t to_chars(substr buf, const char * C4_RESTRICT v) noexcept
 {
     return to_chars(buf, to_csubstr(v));
 }
 
+/** @} */
+
 } // namespace c4
 
 #ifdef _MSC_VER
 #   pragma warning(pop)
-#elif defined(__clang__)
+#endif
+
+#if defined(__clang__)
 #   pragma clang diagnostic pop
 #elif defined(__GNUC__)
 #   pragma GCC diagnostic pop
@@ -12469,8 +14351,29 @@ size_t decode_code_point(uint8_t *C4_RESTRICT buf, size_t buflen, const uint32_t
 #   pragma GCC diagnostic ignored "-Wuseless-cast"
 #endif
 
+/** @defgroup doc_format_utils Format utilities
+ *
+ * @brief Provides generic and type-safe formatting/scanning utilities
+ * built on top of @ref doc_to_chars() and @ref doc_from_chars,
+ * forwarding the arguments to these functions, which in turn use the
+ * @ref doc_charconv utilities. Like @ref doc_charconv, the formatting
+ * facilities are very efficient and many times faster than printf().
+ *
+ * @see [a formatting sample in rapidyaml's docs](https://rapidyaml.readthedocs.io/latest/doxygen/group__doc__quickstart.html#gac2425b515eb552589708cfff70c52b14)
+ * */
+
+/** @defgroup doc_format_specifiers Format specifiers
+ *
+ * @brief Format specifiers are tag types and functions that are used
+ * together with @ref doc_to_chars and @ref doc_from_chars
+ *
+ * @see [a formatting sample in rapidyaml's docs](https://rapidyaml.readthedocs.io/latest/doxygen/group__doc__quickstart.html#gac2425b515eb552589708cfff70c52b14)
+ * @ingroup doc_format_utils */
+
 namespace c4 {
 
+/** @addtogroup doc_format_utils
+ * @{ */
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
@@ -12479,6 +14382,12 @@ namespace c4 {
 
 namespace fmt {
 
+/** @addtogroup doc_format_specifiers
+ * @{ */
+
+/** @defgroup doc_boolean_specifiers boolean specifiers
+ * @{ */
+
 /** write a variable as an alphabetic boolean, ie as either true or false
  * @param strict_read */
 template<class T>
@@ -12495,9 +14404,15 @@ boolalpha_<T> boolalpha(T const& val, bool strict_read=false)
     return boolalpha_<T>(val, strict_read);
 }
 
+/** @} */
+
+/** @} */
+
 } // namespace fmt
 
-/** write a variable as an alphabetic boolean, ie as either true or false */
+/** write a variable as an alphabetic boolean, ie as either true or
+ * false
+ * @ingroup doc_to_chars */
 template<class T>
 inline size_t to_chars(substr buf, fmt::boolalpha_<T> fmt)
 {
@@ -12513,10 +14428,17 @@ inline size_t to_chars(substr buf, fmt::boolalpha_<T> fmt)
 
 namespace fmt {
 
+/** @addtogroup doc_format_specifiers
+ * @{ */
+
+/** @defgroup doc_integer_specifiers Integer specifiers
+ * @{ */
+
 /** format an integral type with a custom radix */
 template<typename T>
 struct integral_
 {
+    C4_STATIC_ASSERT(std::is_integral<T>::value);
     T val;
     T radix;
     C4_ALWAYS_INLINE integral_(T val_, T radix_) : val(val_), radix(radix_) {}
@@ -12526,12 +14448,14 @@ struct integral_
 template<typename T>
 struct integral_padded_
 {
+    C4_STATIC_ASSERT(std::is_integral<T>::value);
     T val;
     T radix;
     size_t num_digits;
     C4_ALWAYS_INLINE integral_padded_(T val_, T radix_, size_t nd) : val(val_), radix(radix_), num_digits(nd) {}
 };
 
+
 /** format an integral type with a custom radix */
 template<class T>
 C4_ALWAYS_INLINE integral_<T> integral(T val, T radix=10)
@@ -12550,47 +14474,19 @@ C4_ALWAYS_INLINE integral_<intptr_t> integral(std::nullptr_t, T radix=10)
 {
     return integral_<intptr_t>(intptr_t(0), static_cast<intptr_t>(radix));
 }
-/** pad the argument with zeroes on the left, with decimal radix */
+
+
+/** format the pointer as an hexadecimal value */
 template<class T>
-C4_ALWAYS_INLINE integral_padded_<T> zpad(T val, size_t num_digits)
+inline integral_<intptr_t> hex(T * v)
 {
-    return integral_padded_<T>(val, T(10), num_digits);
+    return integral_<intptr_t>(reinterpret_cast<intptr_t>(v), intptr_t(16));
 }
-/** pad the argument with zeroes on the left */
+/** format the pointer as an hexadecimal value */
 template<class T>
-C4_ALWAYS_INLINE integral_padded_<T> zpad(integral_<T> val, size_t num_digits)
+inline integral_<intptr_t> hex(T const* v)
 {
-    return integral_padded_<T>(val.val, val.radix, num_digits);
-}
-/** pad the argument with zeroes on the left */
-C4_ALWAYS_INLINE integral_padded_<intptr_t> zpad(std::nullptr_t, size_t num_digits)
-{
-    return integral_padded_<intptr_t>(0, 16, num_digits);
-}
-/** pad the argument with zeroes on the left */
-template<class T>
-C4_ALWAYS_INLINE integral_padded_<intptr_t> zpad(T const* val, size_t num_digits)
-{
-    return integral_padded_<intptr_t>(reinterpret_cast<intptr_t>(val), 16, num_digits);
-}
-template<class T>
-C4_ALWAYS_INLINE integral_padded_<intptr_t> zpad(T * val, size_t num_digits)
-{
-    return integral_padded_<intptr_t>(reinterpret_cast<intptr_t>(val), 16, num_digits);
-}
-
-
-/** format the pointer as an hexadecimal value */
-template<class T>
-inline integral_<intptr_t> hex(T * v)
-{
-    return integral_<intptr_t>(reinterpret_cast<intptr_t>(v), intptr_t(16));
-}
-/** format the pointer as an hexadecimal value */
-template<class T>
-inline integral_<intptr_t> hex(T const* v)
-{
-    return integral_<intptr_t>(reinterpret_cast<intptr_t>(v), intptr_t(16));
+    return integral_<intptr_t>(reinterpret_cast<intptr_t>(v), intptr_t(16));
 }
 /** format null as an hexadecimal value
  * @overload hex */
@@ -12658,6 +14554,46 @@ inline integral_<T> bin(T v)
     return integral_<T>(v, T(2));
 }
 
+/** @} */ // integer_specifiers
+
+
+/** @defgroup doc_zpad Pad the number with zeroes on the left
+ * @{ */
+
+/** pad the argument with zeroes on the left, with decimal radix */
+template<class T>
+C4_ALWAYS_INLINE integral_padded_<T> zpad(T val, size_t num_digits)
+{
+    return integral_padded_<T>(val, T(10), num_digits);
+}
+/** pad the argument with zeroes on the left */
+template<class T>
+C4_ALWAYS_INLINE integral_padded_<T> zpad(integral_<T> val, size_t num_digits)
+{
+    return integral_padded_<T>(val.val, val.radix, num_digits);
+}
+/** pad the argument with zeroes on the left */
+C4_ALWAYS_INLINE integral_padded_<intptr_t> zpad(std::nullptr_t, size_t num_digits)
+{
+    return integral_padded_<intptr_t>(0, 16, num_digits);
+}
+/** pad the argument with zeroes on the left */
+template<class T>
+C4_ALWAYS_INLINE integral_padded_<intptr_t> zpad(T const* val, size_t num_digits)
+{
+    return integral_padded_<intptr_t>(reinterpret_cast<intptr_t>(val), 16, num_digits);
+}
+template<class T>
+C4_ALWAYS_INLINE integral_padded_<intptr_t> zpad(T * val, size_t num_digits)
+{
+    return integral_padded_<intptr_t>(reinterpret_cast<intptr_t>(val), 16, num_digits);
+}
+
+/** @} */ // zpad
+
+
+/** @defgroup doc_overflow_checked Check read for overflow
+ * @{ */
 
 template<class T>
 struct overflow_checked_
@@ -12672,9 +14608,15 @@ C4_ALWAYS_INLINE overflow_checked_<T> overflow_checked(T &val)
    return overflow_checked_<T>(val);
 }
 
+/** @} */ // overflow_checked
+
+/** @} */ // format_specifiers
+
+
 } // namespace fmt
 
-/** format an integral_ signed type */
+/** format an integer signed type
+ * @ingroup doc_to_chars */
 template<typename T>
 C4_ALWAYS_INLINE
 typename std::enable_if<std::is_signed<T>::value, size_t>::type
@@ -12682,7 +14624,8 @@ to_chars(substr buf, fmt::integral_<T> fmt)
 {
     return itoa(buf, fmt.val, fmt.radix);
 }
-/** format an integral_ signed type, pad with zeroes */
+/** format an integer signed type, pad with zeroes
+ * @ingroup doc_to_chars */
 template<typename T>
 C4_ALWAYS_INLINE
 typename std::enable_if<std::is_signed<T>::value, size_t>::type
@@ -12691,7 +14634,8 @@ to_chars(substr buf, fmt::integral_padded_<T> fmt)
     return itoa(buf, fmt.val, fmt.radix, fmt.num_digits);
 }
 
-/** format an integral_ unsigned type */
+/** format an integer unsigned type
+ * @ingroup doc_to_chars */
 template<typename T>
 C4_ALWAYS_INLINE
 typename std::enable_if<std::is_unsigned<T>::value, size_t>::type
@@ -12699,7 +14643,8 @@ to_chars(substr buf, fmt::integral_<T> fmt)
 {
     return utoa(buf, fmt.val, fmt.radix);
 }
-/** format an integral_ unsigned type, pad with zeroes */
+/** format an integer unsigned type, pad with zeroes
+ * @ingroup doc_to_chars */
 template<typename T>
 C4_ALWAYS_INLINE
 typename std::enable_if<std::is_unsigned<T>::value, size_t>::type
@@ -12708,6 +14653,8 @@ to_chars(substr buf, fmt::integral_padded_<T> fmt)
     return utoa(buf, fmt.val, fmt.radix, fmt.num_digits);
 }
 
+/** read an integer type, detecting overflow (returns false on overflow)
+ * @ingroup doc_from_chars */
 template<class T>
 C4_ALWAYS_INLINE bool from_chars(csubstr s, fmt::overflow_checked_<T> wrapper)
 {
@@ -12715,6 +14662,15 @@ C4_ALWAYS_INLINE bool from_chars(csubstr s, fmt::overflow_checked_<T> wrapper)
         return atox(s, wrapper.val);
     return false;
 }
+/** read an integer type, detecting overflow (returns false on overflow)
+ * @ingroup doc_from_chars */
+template<class T>
+C4_ALWAYS_INLINE bool from_chars(csubstr s, fmt::overflow_checked_<T> *wrapper)
+{
+    if(C4_LIKELY(!overflows<T>(s)))
+        return atox(s, wrapper->val);
+    return false;
+}
 
 
 //-----------------------------------------------------------------------------
@@ -12724,6 +14680,12 @@ C4_ALWAYS_INLINE bool from_chars(csubstr s, fmt::overflow_checked_<T> wrapper)
 
 namespace fmt {
 
+/** @addtogroup doc_format_specifiers
+ * @{ */
+
+/** @defgroup doc_real_specifiers Real specifiers
+ * @{ */
+
 template<class T>
 struct real_
 {
@@ -12739,9 +14701,15 @@ real_<T> real(T val, int precision, RealFormat_e fmt=FTOA_FLOAT)
     return real_<T>(val, precision, fmt);
 }
 
+/** @} */ // real_specifiers
+
+/** @} */ // format_specifiers
+
 } // namespace fmt
 
+/** @ingroup doc_to_chars */
 inline size_t to_chars(substr buf, fmt::real_< float> fmt) { return ftoa(buf, fmt.val, fmt.precision, fmt.fmt); }
+/** @ingroup doc_to_chars */
 inline size_t to_chars(substr buf, fmt::real_<double> fmt) { return dtoa(buf, fmt.val, fmt.precision, fmt.fmt); }
 
 
@@ -12752,6 +14720,12 @@ inline size_t to_chars(substr buf, fmt::real_<double> fmt) { return dtoa(buf, fm
 
 namespace fmt {
 
+/** @addtogroup doc_format_specifiers
+ * @{ */
+
+/** @defgroup doc_raw_binary_specifiers Raw binary data
+ * @{ */
+
 /** @see blob_ */
 template<class T>
 struct raw_wrapper_ : public blob_<T>
@@ -12809,26 +14783,35 @@ inline raw_wrapper raw(T & C4_RESTRICT data, size_t alignment=alignof(T))
     return raw_wrapper(blob(data), alignment);
 }
 
+/** @} */ // raw_binary_specifiers
+
+/** @} */ // format_specifiers
+
 } // namespace fmt
 
 
-/** write a variable in raw binary format, using memcpy */
+/** write a variable in raw binary format, using memcpy
+ * @ingroup doc_to_chars */
 C4CORE_EXPORT size_t to_chars(substr buf, fmt::const_raw_wrapper r);
 
-/** read a variable in raw binary format, using memcpy */
+/** read a variable in raw binary format, using memcpy
+ * @ingroup doc_from_chars */
 C4CORE_EXPORT bool from_chars(csubstr buf, fmt::raw_wrapper *r);
-/** read a variable in raw binary format, using memcpy */
+/** read a variable in raw binary format, using memcpy
+ * @ingroup doc_from_chars */
 inline bool from_chars(csubstr buf, fmt::raw_wrapper r)
 {
     return from_chars(buf, &r);
 }
 
-/** read a variable in raw binary format, using memcpy */
+/** read a variable in raw binary format, using memcpy
+ * @ingroup doc_from_chars_first */
 inline size_t from_chars_first(csubstr buf, fmt::raw_wrapper *r)
 {
     return from_chars(buf, r);
 }
-/** read a variable in raw binary format, using memcpy */
+/** read a variable in raw binary format, using memcpy
+ * @ingroup doc_from_chars_first */
 inline size_t from_chars_first(csubstr buf, fmt::raw_wrapper r)
 {
     return from_chars(buf, &r);
@@ -12842,6 +14825,12 @@ inline size_t from_chars_first(csubstr buf, fmt::raw_wrapper r)
 
 namespace fmt {
 
+/** @addtogroup doc_format_specifiers
+ * @{ */
+
+/** @defgroup doc_alignment_specifiers Alignment specifiers
+ * @{ */
+
 template<class T>
 struct left_
 {
@@ -12874,9 +14863,14 @@ right_<T> right(T val, size_t width, char padchar=' ')
     return right_<T>(val, width, padchar);
 }
 
+/** @} */ // alignment_specifiers
+
+/** @} */ // format_specifiers
+
 } // namespace fmt
 
 
+/** @ingroup doc_to_chars */
 template<class T>
 size_t to_chars(substr buf, fmt::left_<T> const& C4_RESTRICT align)
 {
@@ -12888,6 +14882,7 @@ size_t to_chars(substr buf, fmt::left_<T> const& C4_RESTRICT align)
     return align.width;
 }
 
+/** @ingroup doc_to_chars */
 template<class T>
 size_t to_chars(substr buf, fmt::right_<T> const& C4_RESTRICT align)
 {
@@ -12905,13 +14900,16 @@ size_t to_chars(substr buf, fmt::right_<T> const& C4_RESTRICT align)
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-/// @cond dev
+/** @defgroup doc_cat cat: concatenate arguments to string
+ * @{ */
+
+/** @cond dev */
 // terminates the variadic recursion
 inline size_t cat(substr /*buf*/)
 {
     return 0;
 }
-/// @endcond
+/** @endcond */
 
 
 /** serialize the arguments, concatenating them to the given fixed-size buffer.
@@ -12939,16 +14937,22 @@ substr cat_sub(substr buf, Args && ...args)
     return {buf.str, sz <= buf.len ? sz : buf.len};
 }
 
+/** @} */
+
 
 //-----------------------------------------------------------------------------
 
-/// @cond dev
+
+/** @defgroup doc_uncat uncat: read concatenated arguments from string
+ * @{ */
+
+/** @cond dev */
 // terminates the variadic recursion
 inline size_t uncat(csubstr /*buf*/)
 {
     return 0;
 }
-/// @endcond
+/** @endcond */
 
 
 /** deserialize the arguments from the given buffer.
@@ -12969,16 +14973,22 @@ size_t uncat(csubstr buf, Arg & C4_RESTRICT a, Args & C4_RESTRICT ...more)
     return out + num;
 }
 
+/** @} */
+
 
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-namespace detail {
 
+/** @defgroup doc_catsep catsep: cat arguments to string with separator
+ * @{ */
+
+/** @cond dev */
+namespace detail {
 template<class Sep>
-inline size_t catsep_more(substr /*buf*/, Sep const& C4_RESTRICT /*sep*/)
+C4_ALWAYS_INLINE size_t catsep_more(substr /*buf*/, Sep const& C4_RESTRICT /*sep*/)
 {
     return 0;
 }
@@ -12986,7 +14996,8 @@ inline size_t catsep_more(substr /*buf*/, Sep const& C4_RESTRICT /*sep*/)
 template<class Sep, class Arg, class... Args>
 size_t catsep_more(substr buf, Sep const& C4_RESTRICT sep, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more)
 {
-    size_t ret = to_chars(buf, sep), num = ret;
+    size_t ret = to_chars(buf, sep);
+    size_t num = ret;
     buf  = buf.len >= ret ? buf.sub(ret) : substr{};
     ret  = to_chars(buf, a);
     num += ret;
@@ -12996,6 +15007,7 @@ size_t catsep_more(substr buf, Sep const& C4_RESTRICT sep, Arg const& C4_RESTRIC
     return num;
 }
 
+
 template<class Sep>
 inline size_t uncatsep_more(csubstr /*buf*/, Sep & /*sep*/)
 {
@@ -13005,7 +15017,8 @@ inline size_t uncatsep_more(csubstr /*buf*/, Sep & /*sep*/)
 template<class Sep, class Arg, class... Args>
 size_t uncatsep_more(csubstr buf, Sep & C4_RESTRICT sep, Arg & C4_RESTRICT a, Args & C4_RESTRICT ...more)
 {
-    size_t ret = from_chars_first(buf, &sep), num = ret;
+    size_t ret = from_chars_first(buf, &sep);
+    size_t num = ret;
     if(C4_UNLIKELY(ret == csubstr::npos))
         return csubstr::npos;
     buf  = buf.len >= ret ? buf.sub(ret) : substr{};
@@ -13023,6 +15036,13 @@ size_t uncatsep_more(csubstr buf, Sep & C4_RESTRICT sep, Arg & C4_RESTRICT a, Ar
 
 } // namespace detail
 
+template<class Sep>
+size_t catsep(substr /*buf*/, Sep const& C4_RESTRICT /*sep*/)
+{
+    return 0;
+}
+/** @endcond */
+
 
 /** serialize the arguments, concatenating them to the given fixed-size
  * buffer, using a separator between each argument.
@@ -13051,6 +15071,23 @@ substr catsep_sub(substr buf, Args && ...args)
     return {buf.str, sz <= buf.len ? sz : buf.len};
 }
 
+/** @} */
+
+
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
+/** @defgroup doc_uncatsep uncatsep: deserialize the separated arguments from a string
+ * @{ */
+
+/** deserialize the arguments from the given buffer.
+ *
+ * @return the number of characters read from the buffer, or csubstr::npos
+ *   if a conversion was not successful.
+ * @see c4::cat(). c4::uncat() is the inverse of c4::cat(). */
+
 /** deserialize the arguments from the given buffer, using a separator.
  *
  * @return the number of characters read from the buffer, or csubstr::npos
@@ -13070,11 +15107,16 @@ size_t uncatsep(csubstr buf, Sep & C4_RESTRICT sep, Arg & C4_RESTRICT a, Args &
     return num;
 }
 
+/** @} */
+
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
+/** @defgroup doc_format format: formatted string interpolation
+ * @{ */
+
 /// @cond dev
 // terminates the variadic recursion
 inline size_t format(substr buf, csubstr fmt)
@@ -13127,9 +15169,14 @@ substr format_sub(substr buf, csubstr fmt, Args const& C4_RESTRICT ...args)
     return {buf.str, sz <= buf.len ? sz : buf.len};
 }
 
+/** @} */
+
 
 //-----------------------------------------------------------------------------
 
+/** @defgroup doc_unformat unformat: formatted read from string
+ * @{ */
+
 /// @cond dev
 // terminates the variadic recursion
 inline size_t unformat(csubstr /*buf*/, csubstr fmt)
@@ -13164,26 +15211,18 @@ size_t unformat(csubstr buf, csubstr fmt, Arg & C4_RESTRICT a, Args & C4_RESTRIC
     return out;
 }
 
+/** @} */
+
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-/** a tag type for marking append to container
- * @see c4::catrs() */
-struct append_t {};
-
-/** a tag variable
- * @see c4::catrs() */
-constexpr const append_t append = {};
-
-
-//-----------------------------------------------------------------------------
-
-/** like c4::cat(), but receives a container, and resizes it as needed to contain
- * the result. The container is overwritten. To append to it, use the append
- * overload.
- * @see c4::cat() */
+/** cat+resize: like c4::cat(), but receives a container, and resizes
+ * it as needed to contain the result. The container is
+ * overwritten. To append to it, use the append overload.
+ * @see c4::cat()
+ * @ingroup doc_cat */
 template<class CharOwningContainer, class... Args>
 inline void catrs(CharOwningContainer * C4_RESTRICT cont, Args const& C4_RESTRICT ...args)
 {
@@ -13195,9 +15234,10 @@ inline void catrs(CharOwningContainer * C4_RESTRICT cont, Args const& C4_RESTRIC
         goto retry;
 }
 
-/** like c4::cat(), but creates and returns a new container sized as needed to contain
- * the result.
- * @see c4::cat() */
+/** cat+resize: like c4::cat(), but creates and returns a new
+ * container sized as needed to contain the result.
+ * @see c4::cat()
+ * @ingroup doc_cat */
 template<class CharOwningContainer, class... Args>
 inline CharOwningContainer catrs(Args const& C4_RESTRICT ...args)
 {
@@ -13206,13 +15246,16 @@ inline CharOwningContainer catrs(Args const& C4_RESTRICT ...args)
     return cont;
 }
 
-/** like c4::cat(), but receives a container, and appends to it instead of
- * overwriting it. The container is resized as needed to contain the result.
+/** cat+resize+append: like c4::cat(), but receives a container, and
+ * appends to it instead of overwriting it. The container is resized
+ * as needed to contain the result.
+ *
  * @return the region newly appended to the original container
  * @see c4::cat()
- * @see c4::catrs() */
+ * @see c4::catrs()
+ * @ingroup doc_cat */
 template<class CharOwningContainer, class... Args>
-inline csubstr catrs(append_t, CharOwningContainer * C4_RESTRICT cont, Args const& C4_RESTRICT ...args)
+inline csubstr catrs_append(CharOwningContainer * C4_RESTRICT cont, Args const& C4_RESTRICT ...args)
 {
     const size_t pos = cont->size();
 retry:
@@ -13227,19 +15270,12 @@ inline csubstr catrs(append_t, CharOwningContainer * C4_RESTRICT cont, Args cons
 
 //-----------------------------------------------------------------------------
 
-/// @cond dev
-// terminates the recursion
-template<class CharOwningContainer, class Sep, class... Args>
-inline void catseprs(CharOwningContainer * C4_RESTRICT, Sep const& C4_RESTRICT)
-{
-    return;
-}
-/// @end cond
-
-
-/** like c4::catsep(), but receives a container, and resizes it as needed to contain the result.
- * The container is overwritten. To append to the container use the append overload.
- * @see c4::catsep() */
+/** catsep+resize: like c4::catsep(), but receives a container, and
+ * resizes it as needed to contain the result.  The container is
+ * overwritten. To append to the container use the append overload.
+ *
+ * @see c4::catsep()
+ * @ingroup doc_catsep */
 template<class CharOwningContainer, class Sep, class... Args>
 inline void catseprs(CharOwningContainer * C4_RESTRICT cont, Sep const& C4_RESTRICT sep, Args const& C4_RESTRICT ...args)
 {
@@ -13251,8 +15287,11 @@ inline void catseprs(CharOwningContainer * C4_RESTRICT cont, Sep const& C4_RESTR
         goto retry;
 }
 
-/** like c4::catsep(), but create a new container with the result.
- * @return the requested container */
+/** catsep+resize: like c4::catsep(), but create a new container with
+ * the result.
+ *
+ * @return the requested container
+ * @ingroup doc_catsep */
 template<class CharOwningContainer, class Sep, class... Args>
 inline CharOwningContainer catseprs(Sep const& C4_RESTRICT sep, Args const& C4_RESTRICT ...args)
 {
@@ -13262,22 +15301,15 @@ inline CharOwningContainer catseprs(Sep const& C4_RESTRICT sep, Args const& C4_R
 }
 
 
-/// @cond dev
-// terminates the recursion
-template<class CharOwningContainer, class Sep, class... Args>
-inline csubstr catseprs(append_t, CharOwningContainer * C4_RESTRICT, Sep const& C4_RESTRICT)
-{
-    csubstr s;
-    return s;
-}
-/// @endcond
-
-/** like catsep(), but receives a container, and appends the arguments, resizing the
- * container as needed to contain the result. The buffer is appended to.
+/** catsep+resize+append: like catsep(), but receives a container, and
+ * appends the arguments, resizing the container as needed to contain
+ * the result. The buffer is appended to.
+ *
  * @return a csubstr of the appended part
- * @ingroup formatting_functions */
+ * @ingroup formatting_functions
+ * @ingroup doc_catsep */
 template<class CharOwningContainer, class Sep, class... Args>
-inline csubstr catseprs(append_t, CharOwningContainer * C4_RESTRICT cont, Sep const& C4_RESTRICT sep, Args const& C4_RESTRICT ...args)
+inline csubstr catseprs_append(CharOwningContainer * C4_RESTRICT cont, Sep const& C4_RESTRICT sep, Args const& C4_RESTRICT ...args)
 {
     const size_t pos = cont->size();
 retry:
@@ -13292,10 +15324,12 @@ inline csubstr catseprs(append_t, CharOwningContainer * C4_RESTRICT cont, Sep co
 
 //-----------------------------------------------------------------------------
 
-/** like c4::format(), but receives a container, and resizes it as needed
- * to contain the result.  The container is overwritten. To append to
- * the container use the append overload.
- * @see c4::format() */
+/** format+resize: like c4::format(), but receives a container, and
+ * resizes it as needed to contain the result.  The container is
+ * overwritten. To append to the container use the append overload.
+ *
+ * @see c4::format()
+ * @ingroup doc_format */
 template<class CharOwningContainer, class... Args>
 inline void formatrs(CharOwningContainer * C4_RESTRICT cont, csubstr fmt, Args const& C4_RESTRICT ...args)
 {
@@ -13307,8 +15341,11 @@ inline void formatrs(CharOwningContainer * C4_RESTRICT cont, csubstr fmt, Args c
         goto retry;
 }
 
-/** like c4::format(), but create a new container with the result.
- * @return the requested container */
+/** format+resize: like c4::format(), but create a new container with
+ * the result.
+ *
+ * @return the requested container
+ * @ingroup doc_format */
 template<class CharOwningContainer, class... Args>
 inline CharOwningContainer formatrs(csubstr fmt, Args const& C4_RESTRICT ...args)
 {
@@ -13317,13 +15354,14 @@ inline CharOwningContainer formatrs(csubstr fmt, Args const& C4_RESTRICT ...args
     return cont;
 }
 
-/** like format(), but receives a container, and appends the
+/** format+resize+append: like format(), but receives a container, and appends the
  * arguments, resizing the container as needed to contain the
  * result. The buffer is appended to.
  * @return the region newly appended to the original container
- * @ingroup formatting_functions */
+ * @ingroup formatting_functions
+ * @ingroup doc_format */
 template<class CharOwningContainer, class... Args>
-inline csubstr formatrs(append_t, CharOwningContainer * C4_RESTRICT cont, csubstr fmt, Args const& C4_RESTRICT ...args)
+inline csubstr formatrs_append(CharOwningContainer * C4_RESTRICT cont, csubstr fmt, Args const& C4_RESTRICT ...args)
 {
     const size_t pos = cont->size();
 retry:
@@ -13335,6 +15373,8 @@ inline csubstr formatrs(append_t, CharOwningContainer * C4_RESTRICT cont, csubst
     return to_csubstr(*cont).range(pos, cont->size());
 }
 
+/** @} */
+
 } // namespace c4
 
 #ifdef _MSC_VER
@@ -13372,6 +15412,8 @@ inline csubstr formatrs(append_t, CharOwningContainer * C4_RESTRICT cont, csubst
 
 namespace c4 {
 
+C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast")
+
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
@@ -13732,6 +15774,8 @@ C4_ALWAYS_INLINE DumpResults catsep_dump_resume(DumperFn &&dumpfn, substr buf, S
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
+/// @cond dev
+
 /** take the function pointer as a function argument */
 template<class DumperFn>
 C4_ALWAYS_INLINE size_t format_dump(DumperFn &&dumpfn, substr buf, csubstr fmt)
@@ -13743,7 +15787,7 @@ C4_ALWAYS_INLINE size_t format_dump(DumperFn &&dumpfn, substr buf, csubstr fmt)
     return 0u;
 }
 
-/** take the function pointer as a function argument */
+/** take the function pointer as a template argument */
 template<DumperPfn dumpfn>
 C4_ALWAYS_INLINE size_t format_dump(substr buf, csubstr fmt)
 {
@@ -13754,9 +15798,12 @@ C4_ALWAYS_INLINE size_t format_dump(substr buf, csubstr fmt)
     return 0u;
 }
 
+/// @endcond
+
+
 /** take the function pointer as a function argument */
 template<class DumperFn, class Arg, class... Args>
-size_t format_dump(DumperFn &&dumpfn, substr buf, csubstr fmt, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more)
+C4_NO_INLINE size_t format_dump(DumperFn &&dumpfn, substr buf, csubstr fmt, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more)
 {
     // we can dump without using buf
     // but we'll only dump if the buffer is ok
@@ -13779,7 +15826,7 @@ size_t format_dump(DumperFn &&dumpfn, substr buf, csubstr fmt, Arg const& C4_RES
 
 /** take the function pointer as a template argument */
 template<DumperPfn dumpfn, class Arg, class... Args>
-size_t format_dump(substr buf, csubstr fmt, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more)
+C4_NO_INLINE size_t format_dump(substr buf, csubstr fmt, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more)
 {
     // we can dump without using buf
     // but we'll only dump if the buffer is ok
@@ -13939,6 +15986,7 @@ C4_ALWAYS_INLINE DumpResults format_dump_resume(DumperFn &&dumpfn, substr buf, c
     return detail::format_dump_resume(0u, dumpfn, DumpResults{}, buf, fmt, more...);
 }
 
+C4_SUPPRESS_WARNING_GCC_CLANG_POP
 
 } // namespace c4
 
@@ -13976,6 +16024,8 @@ C4_ALWAYS_INLINE DumpResults format_dump_resume(DumperFn &&dumpfn, substr buf, c
 
 namespace c4 {
 
+C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast")
+
 //! taken from http://stackoverflow.com/questions/15586163/c11-type-trait-to-differentiate-between-enum-class-and-regular-enum
 template<typename Enum>
 using is_scoped_enum = std::integral_constant<bool, std::is_enum<Enum>::value && !std::is_convertible<Enum, int>::value>;
@@ -14103,7 +16153,6 @@ size_t eoffs(EnumOffsetType which)
     }
     default:
         C4_ERROR("unknown offset type %d", (int)which);
-        return 0;
     }
 }
 
@@ -14237,6 +16286,8 @@ const char* EnumSymbols<Enum>::Sym::name_offs(EnumOffsetType t) const
     return name + eoffs<Enum>(t);
 }
 
+C4_SUPPRESS_WARNING_GCC_CLANG_POP
+
 } // namespace c4
 
 #endif // _C4_ENUM_HPP_
@@ -14278,12 +16329,17 @@ const char* EnumSymbols<Enum>::Sym::name_offs(EnumOffsetType t) const
 #endif /* C4_FORMAT_HPP_ */
 
 
-#ifdef _MSC_VER
+#if defined(_MSC_VER)
 #   pragma warning(push)
 #   pragma warning(disable : 4996) // 'strncpy', fopen, etc: This function or variable may be unsafe
-#elif defined(__clang__)
+#endif
+
+#if defined(__clang__)
+#   pragma clang diagnostic push
+#   pragma clang diagnostic ignored "-Wold-style-cast"
 #elif defined(__GNUC__)
 #   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wold-style-cast"
 #   if __GNUC__ >= 8
 #       pragma GCC diagnostic ignored "-Wstringop-truncation"
 #       pragma GCC diagnostic ignored "-Wstringop-overflow"
@@ -14505,7 +16561,7 @@ typename std::underlying_type<Enum>::type str2bm_read_one(const char *str, size_
         C4_CHECK_MSG(p != nullptr, "no valid enum pair name for '%.*s'", (int)sz, str);
         return static_cast<I>(p->value);
     }
-    I tmp;
+    I tmp{0};
     size_t len = uncat(csubstr(str, sz), tmp);
     C4_CHECK_MSG(len != csubstr::npos, "could not read string as an integral type: '%.*s'", (int)sz, str);
     return tmp;
@@ -14591,7 +16647,10 @@ typename std::underlying_type<Enum>::type str2bm(const char *str)
 
 #ifdef _MSC_VER
 #   pragma warning(pop)
-#elif defined(__clang__)
+#endif
+
+#if defined(__clang__)
+#   pragma clang diagnostic pop
 #elif defined(__GNUC__)
 #   pragma GCC diagnostic pop
 #endif
@@ -14642,6 +16701,8 @@ typename std::underlying_type<Enum>::type str2bm(const char *str)
 
 namespace c4 {
 
+C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast")
+
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
@@ -14709,8 +16770,8 @@ class span_crtp
     C4_ALWAYS_INLINE const_iterator cend() const noexcept { return _c4cptr + _c4csz; }
 
     C4_ALWAYS_INLINE       reverse_iterator  rbegin()       noexcept { return reverse_iterator(_c4ptr + _c4sz); }
-    C4_ALWAYS_INLINE const_reverse_iterator  rbegin() const noexcept { return reverse_iterator(_c4cptr + _c4sz); }
-    C4_ALWAYS_INLINE const_reverse_iterator crbegin() const noexcept { return reverse_iterator(_c4cptr + _c4sz); }
+    C4_ALWAYS_INLINE const_reverse_iterator  rbegin() const noexcept { return reverse_iterator(_c4cptr + _c4csz); }
+    C4_ALWAYS_INLINE const_reverse_iterator crbegin() const noexcept { return reverse_iterator(_c4cptr + _c4csz); }
 
     C4_ALWAYS_INLINE       reverse_iterator  rend()       noexcept { return const_reverse_iterator(_c4ptr); }
     C4_ALWAYS_INLINE const_reverse_iterator  rend() const noexcept { return const_reverse_iterator(_c4cptr); }
@@ -15141,6 +17202,7 @@ class spanrsl : public span_crtp<T, I, spanrsl<T, I>>
 };
 template<class T, class I=C4_SIZE_TYPE> using cspanrsl = spanrsl<const T, I>;
 
+C4_SUPPRESS_WARNING_GCC_CLANG_POP
 
 } // namespace c4
 
@@ -15171,6 +17233,13 @@ template<class T, class I=C4_SIZE_TYPE> using cspanrsl = spanrsl<const T, I>;
 #error "amalgamate: file c4/span.hpp must have been included at this point"
 #endif /* C4_SPAN_HPP_ */
 
+// amalgamate: removed include of
+// https://github.com/biojppm/c4core/src/c4/compiler.hpp
+//#include "c4/compiler.hpp"
+#if !defined(C4_COMPILER_HPP_) && !defined(_C4_COMPILER_HPP_)
+#error "amalgamate: file c4/compiler.hpp must have been included at this point"
+#endif /* C4_COMPILER_HPP_ */
+
 
 /// @cond dev
 struct _c4t
@@ -15203,31 +17272,32 @@ C4_CONSTEXPR14 cspan<char> type_name()
 {
     const _c4t p = _c4tn<T>();
 
-#if (0) // _C4_THIS_IS_A_DEBUG_SCAFFOLD
+#if (0) // enable this to debug and find the offsets
     for(size_t index = 0; index < p.sz; ++index)
-    {
         printf(" %2c", p.str[index]);
-    }
     printf("\n");
     for(size_t index = 0; index < p.sz; ++index)
-    {
-        printf(" %2d", (int)index);
-    }
+        printf(" %2zu", index);
     printf("\n");
 #endif
 
 #if defined(_MSC_VER)
 #   if defined(__clang__) // Visual Studio has the clang toolset
+#   if (_MSC_VER >= 1930) // do not use this: defined(C4_MSVC_2022)
+    // ..............................xxx.
+    // _c4t __cdecl _c4tn(void) [T = int]
+    enum : size_t { tstart = 30, tend = 1};
+#   else
     // example:
     // ..........................xxx.
     // _c4t __cdecl _c4tn() [T = int]
     enum : size_t { tstart = 26, tend = 1};
-
+#   endif
 #   elif defined(C4_MSVC_2015) || defined(C4_MSVC_2017) || defined(C4_MSVC_2019) || defined(C4_MSVC_2022)
     // Note: subtract 7 at the end because the function terminates with ">(void)" in VS2015+
     cspan<char>::size_type tstart = 26, tend = 7;
 
-    const char *s = p.str + tstart; // look at the start
+    const char *C4_RESTRICT s = p.str + tstart; // look at the start
 
     // we're not using strcmp() or memcmp() to spare the #include
 
@@ -15328,27 +17398,62 @@ C4_CONSTEXPR14 C4_ALWAYS_INLINE cspan<char> type_name(T const&)
 
 namespace c4 {
 
+/** @defgroup doc_base64 Base64 encoding/decoding
+ * @see https://en.wikipedia.org/wiki/Base64
+ * @see https://www.base64encode.org/
+ * @{ */
+
 /** check that the given buffer is a valid base64 encoding
  * @see https://en.wikipedia.org/wiki/Base64 */
-bool base64_valid(csubstr encoded);
+C4CORE_EXPORT bool base64_valid(csubstr encoded);
+
 
 /** base64-encode binary data.
  * @param encoded [out] output buffer for encoded data
  * @param data [in] the input buffer with the binary data
- * @return the number of bytes needed to return the output. No writes occur beyond the end of the output buffer.
+ *
+ * @return the number of bytes needed to return the output (ie the
+ * required size for @p encoded). No writes occur beyond the end of
+ * the output buffer, so it is safe to do a speculative call where the
+ * encoded buffer is empty, or maybe too small. The caller should
+ * ensure that the returned size is smaller than the size of the
+ * encoded buffer.
+ *
+ * @note the result depends on endianness. If transfer between
+ * little/big endian systems is desired, the caller should normalize
+ * @p data before encoding.
+ *
  * @see https://en.wikipedia.org/wiki/Base64 */
-size_t base64_encode(substr encoded, cblob data);
+C4CORE_EXPORT size_t base64_encode(substr encoded, cblob data);
+
 
 /** decode the base64 encoding in the given buffer
  * @param encoded [in] the encoded base64
  * @param data [out] the output buffer
- * @return the number of bytes needed to return the output.. No writes occur beyond the end of the output buffer.
+ *
+ * @return the number of bytes needed to return the output (ie the
+ * required size for @p data). No writes occur beyond the end of the
+ * output buffer, so it is safe to do a speculative call where the
+ * data buffer is empty, or maybe too small. The caller should ensure
+ * that the returned size is smaller than the size of the data buffer.
+ *
+ * @note the result depends on endianness. If transfer between
+ * little/big endian systems is desired, the caller should normalize
+ * @p data after decoding.
+ *
  * @see https://en.wikipedia.org/wiki/Base64 */
-size_t base64_decode(csubstr encoded, blob data);
+C4CORE_EXPORT size_t base64_decode(csubstr encoded, blob data);
 
+/** @} */ // base64
 
 namespace fmt {
 
+/** @addtogroup doc_format_specifiers
+ * @{ */
+
+/** @defgroup doc_base64_fmt Base64
+ * @{ */
+
 template<typename CharOrConstChar>
 struct base64_wrapper_
 {
@@ -15356,7 +17461,9 @@ struct base64_wrapper_
     base64_wrapper_() : data() {}
     base64_wrapper_(blob_<CharOrConstChar> blob) : data(blob) {}
 };
+/** a tag type to mark a payload as base64-encoded */
 using const_base64_wrapper = base64_wrapper_<cbyte>;
+/** a tag type to mark a payload to be encoded as base64 */
 using base64_wrapper = base64_wrapper_<byte>;
 
 
@@ -15395,16 +17502,22 @@ C4_ALWAYS_INLINE base64_wrapper base64(substr s)
     return base64_wrapper(blob(s.str, s.len));
 }
 
+/** @} */ // base64_fmt
+
+/** @} */ // format_specifiers
+
 } // namespace fmt
 
 
-/** write a variable in base64 format */
+/** write a variable in base64 format
+ * @ingroup doc_to_chars */
 inline size_t to_chars(substr buf, fmt::const_base64_wrapper b)
 {
     return base64_encode(buf, b.data);
 }
 
-/** read a variable in base64 format */
+/** read a variable in base64 format
+ * @ingroup doc_from_chars */
 inline size_t from_chars(csubstr buf, fmt::base64_wrapper *b)
 {
     return base64_decode(buf, b->data);
@@ -15448,18 +17561,32 @@ namespace c4 {
 
 //-----------------------------------------------------------------------------
 
-/** get a writeable view to an existing std::string */
-inline c4::substr to_substr(std::string &s)
+/** get a writeable view to an existing std::string.
+ * When the string is empty, the returned view will be pointing
+ * at the character with value '\0', but the size will be zero.
+ * @see https://en.cppreference.com/w/cpp/string/basic_string/operator_at
+ */
+C4_ALWAYS_INLINE c4::substr to_substr(std::string &s) noexcept
 {
-    char* data = ! s.empty() ? &s[0] : nullptr;
-    return c4::substr(data, s.size());
+    #if C4_CPP < 11
+    #error this function will have undefined behavior
+    #endif
+    // since c++11 it is legal to call s[s.size()].
+    return c4::substr(&s[0], s.size());
 }
 
-/** get a readonly view to an existing std::string */
-inline c4::csubstr to_csubstr(std::string const& s)
+/** get a readonly view to an existing std::string.
+ * When the string is empty, the returned view will be pointing
+ * at the character with value '\0', but the size will be zero.
+ * @see https://en.cppreference.com/w/cpp/string/basic_string/operator_at
+ */
+C4_ALWAYS_INLINE c4::csubstr to_csubstr(std::string const& s) noexcept
 {
-    const char* data = ! s.empty() ? &s[0] : nullptr;
-    return c4::csubstr(data, s.size());
+    #if C4_CPP < 11
+    #error this function will have undefined behavior
+    #endif
+    // since c++11 it is legal to call s[s.size()].
+    return c4::csubstr(&s[0], s.size());
 }
 
 //-----------------------------------------------------------------------------
@@ -15485,7 +17612,15 @@ inline size_t to_chars(c4::substr buf, std::string const& s)
 {
     C4_ASSERT(!buf.overlaps(to_csubstr(s)));
     size_t len = buf.len < s.size() ? buf.len : s.size();
-    memcpy(buf.str, s.data(), len);
+    // calling memcpy with null strings is undefined behavior
+    // and will wreak havoc in calling code's branches.
+    // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
+    if(len)
+    {
+        C4_ASSERT(s.data() != nullptr);
+        C4_ASSERT(buf.str != nullptr);
+        memcpy(buf.str, s.data(), len);
+    }
     return s.size(); // return the number of needed chars
 }
 
@@ -15494,7 +17629,14 @@ inline bool from_chars(c4::csubstr buf, std::string * s)
 {
     s->resize(buf.len);
     C4_ASSERT(!buf.overlaps(to_csubstr(*s)));
-    memcpy(&(*s)[0], buf.str, buf.len);
+    // calling memcpy with null strings is undefined behavior
+    // and will wreak havoc in calling code's branches.
+    // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
+    if(buf.len)
+    {
+        C4_ASSERT(buf.str != nullptr);
+        memcpy(&(*s)[0], buf.str, buf.len);
+    }
     return true;
 }
 
@@ -15509,28 +17651,124 @@ inline bool from_chars(c4::csubstr buf, std::string * s)
 
 //********************************************************************************
 //--------------------------------------------------------------------------------
-// src/c4/std/vector.hpp
-// https://github.com/biojppm/c4core/src/c4/std/vector.hpp
+// src/c4/std/string_view.hpp
+// https://github.com/biojppm/c4core/src/c4/std/string_view.hpp
 //--------------------------------------------------------------------------------
 //********************************************************************************
 
-#ifndef _C4_STD_VECTOR_HPP_
-#define _C4_STD_VECTOR_HPP_
+#ifndef _C4_STD_STRING_VIEW_HPP_
+#define _C4_STD_STRING_VIEW_HPP_
 
-/** @file vector.hpp provides conversion and comparison facilities
- * from/between std::vector<char> to c4::substr and c4::csubstr.
- * @todo add to_span() and friends
- */
+/** @file string_view.hpp */
 
 #ifndef C4CORE_SINGLE_HEADER
 // amalgamate: removed include of
-// https://github.com/biojppm/c4core/src/c4/substr.hpp
-//#include "c4/substr.hpp"
-#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_)
-#error "amalgamate: file c4/substr.hpp must have been included at this point"
-#endif /* C4_SUBSTR_HPP_ */
-
-#endif
+// https://github.com/biojppm/c4core/src/c4/language.hpp
+//#include "c4/language.hpp"
+#if !defined(C4_LANGUAGE_HPP_) && !defined(_C4_LANGUAGE_HPP_)
+#error "amalgamate: file c4/language.hpp must have been included at this point"
+#endif /* C4_LANGUAGE_HPP_ */
+
+#endif
+
+#if (C4_CPP >= 17 && defined(__cpp_lib_string_view)) || defined(__DOXYGEN__)
+
+#ifndef C4CORE_SINGLE_HEADER
+// amalgamate: removed include of
+// https://github.com/biojppm/c4core/src/c4/substr.hpp
+//#include "c4/substr.hpp"
+#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_)
+#error "amalgamate: file c4/substr.hpp must have been included at this point"
+#endif /* C4_SUBSTR_HPP_ */
+
+#endif
+
+#include <string_view>
+
+
+namespace c4 {
+
+//-----------------------------------------------------------------------------
+
+/** create a csubstr from an existing std::string_view. */
+C4_ALWAYS_INLINE c4::csubstr to_csubstr(std::string_view s) noexcept
+{
+    return c4::csubstr(s.data(), s.size());
+}
+
+
+//-----------------------------------------------------------------------------
+
+C4_ALWAYS_INLINE bool operator== (c4::csubstr ss, std::string_view s) { return ss.compare(s.data(), s.size()) == 0; }
+C4_ALWAYS_INLINE bool operator!= (c4::csubstr ss, std::string_view s) { return ss.compare(s.data(), s.size()) != 0; }
+C4_ALWAYS_INLINE bool operator>= (c4::csubstr ss, std::string_view s) { return ss.compare(s.data(), s.size()) >= 0; }
+C4_ALWAYS_INLINE bool operator>  (c4::csubstr ss, std::string_view s) { return ss.compare(s.data(), s.size()) >  0; }
+C4_ALWAYS_INLINE bool operator<= (c4::csubstr ss, std::string_view s) { return ss.compare(s.data(), s.size()) <= 0; }
+C4_ALWAYS_INLINE bool operator<  (c4::csubstr ss, std::string_view s) { return ss.compare(s.data(), s.size()) <  0; }
+
+C4_ALWAYS_INLINE bool operator== (std::string_view s, c4::csubstr ss) { return ss.compare(s.data(), s.size()) == 0; }
+C4_ALWAYS_INLINE bool operator!= (std::string_view s, c4::csubstr ss) { return ss.compare(s.data(), s.size()) != 0; }
+C4_ALWAYS_INLINE bool operator<= (std::string_view s, c4::csubstr ss) { return ss.compare(s.data(), s.size()) >= 0; }
+C4_ALWAYS_INLINE bool operator<  (std::string_view s, c4::csubstr ss) { return ss.compare(s.data(), s.size()) >  0; }
+C4_ALWAYS_INLINE bool operator>= (std::string_view s, c4::csubstr ss) { return ss.compare(s.data(), s.size()) <= 0; }
+C4_ALWAYS_INLINE bool operator>  (std::string_view s, c4::csubstr ss) { return ss.compare(s.data(), s.size()) <  0; }
+
+
+//-----------------------------------------------------------------------------
+
+/** copy an std::string_view to a writeable substr */
+inline size_t to_chars(c4::substr buf, std::string_view s)
+{
+    C4_ASSERT(!buf.overlaps(to_csubstr(s)));
+    size_t sz = s.size();
+    size_t len = buf.len < sz ? buf.len : sz;
+    // calling memcpy with null strings is undefined behavior
+    // and will wreak havoc in calling code's branches.
+    // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
+    if(len)
+    {
+        C4_ASSERT(s.data() != nullptr);
+        C4_ASSERT(buf.str != nullptr);
+        memcpy(buf.str, s.data(), len);
+    }
+    return sz; // return the number of needed chars
+}
+
+} // namespace c4
+
+#endif // C4_STRING_VIEW_AVAILABLE
+
+#endif // _C4_STD_STRING_VIEW_HPP_
+
+
+// (end https://github.com/biojppm/c4core/src/c4/std/string_view.hpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/std/vector.hpp
+// https://github.com/biojppm/c4core/src/c4/std/vector.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifndef _C4_STD_VECTOR_HPP_
+#define _C4_STD_VECTOR_HPP_
+
+/** @file vector.hpp provides conversion and comparison facilities
+ * from/between std::vector<char> to c4::substr and c4::csubstr.
+ * @todo add to_span() and friends
+ */
+
+#ifndef C4CORE_SINGLE_HEADER
+// amalgamate: removed include of
+// https://github.com/biojppm/c4core/src/c4/substr.hpp
+//#include "c4/substr.hpp"
+#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_)
+#error "amalgamate: file c4/substr.hpp must have been included at this point"
+#endif /* C4_SUBSTR_HPP_ */
+
+#endif
 
 #include <vector>
 
@@ -15579,7 +17817,13 @@ inline size_t to_chars(c4::substr buf, std::vector<char, Alloc> const& s)
 {
     C4_ASSERT(!buf.overlaps(to_csubstr(s)));
     size_t len = buf.len < s.size() ? buf.len : s.size();
-    memcpy(buf.str, s.data(), len);
+    // calling memcpy with null strings is undefined behavior
+    // and will wreak havoc in calling code's branches.
+    // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
+    if(len > 0)
+    {
+        memcpy(buf.str, s.data(), len);
+    }
     return s.size(); // return the number of needed chars
 }
 
@@ -15589,7 +17833,13 @@ inline bool from_chars(c4::csubstr buf, std::vector<char, Alloc> * s)
 {
     s->resize(buf.len);
     C4_ASSERT(!buf.overlaps(to_csubstr(*s)));
-    memcpy(&(*s)[0], buf.str, buf.len);
+    // calling memcpy with null strings is undefined behavior
+    // and will wreak havoc in calling code's branches.
+    // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637
+    if(buf.len > 0)
+    {
+        memcpy(&(*s)[0], buf.str, buf.len);
+    }
     return true;
 }
 
@@ -15825,10 +18075,17 @@ inline size_t unformat(csubstr buf, csubstr fmt, std::tuple< Types... > & tp)
 #include <random>
 
 
+#ifdef __clang__
+#   pragma clang diagnostic push
+#   pragma clang diagnostic ignored "-Wold-style-cast"
+#elif defined(__GNUC__)
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wold-style-cast"
+#endif
+
 namespace c4 {
 namespace rng {
 
-
 class splitmix
 {
 public:
@@ -16004,6 +18261,12 @@ inline bool operator!=(pcg const &lhs, pcg const &rhs)
 } // namespace rng
 } // namespace c4
 
+#ifdef __clang__
+#   pragma clang diagnostic pop
+#elif defined(__GNUC__)
+#   pragma GCC diagnostic pop
+#endif
+
 #endif /* AG_RANDOM_H */
 
 
@@ -16051,6 +18314,8 @@ inline bool operator!=(pcg const &lhs, pcg const &rhs)
 //included above:
 //#include <utility>
 #include <functional>
+//included above:
+//#include <cstdlib>
 
 namespace stdext {
 
@@ -16103,7 +18368,15 @@ template<typename R, typename... Args> struct vtable
 
     explicit constexpr vtable() noexcept :
         invoke_ptr{ [](storage_ptr_t, Args&&...) -> R
-            { throw std::bad_function_call(); }
+            {
+                #if (defined(_MSC_VER) && (defined(_CPPUNWIND) && (__CPPUNWIND == 1)))  \
+                    ||                                                  \
+                    (defined(__EXCEPTIONS) || defined(__cpp_exceptions))
+                throw std::bad_function_call();
+                #else
+                std::abort();
+                #endif
+            }
         },
         copy_ptr{ [](storage_ptr_t, storage_ptr_t) noexcept -> void {} },
         move_ptr{ [](storage_ptr_t, storage_ptr_t) noexcept -> void {} },
@@ -16433,9 +18706,11 @@ void foo() {} // to avoid empty file warning from the linker
 #ifdef __clang__
 #   pragma clang diagnostic push
 #   pragma clang diagnostic ignored "-Wformat-nonliteral"
+#   pragma clang diagnostic ignored "-Wold-style-cast"
 #elif defined(__GNUC__)
 #   pragma GCC diagnostic push
 #   pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#   pragma GCC diagnostic ignored "-Wold-style-cast"
 #endif
 
 namespace c4 {
@@ -16464,13 +18739,19 @@ size_t to_chars(substr buf, fmt::const_raw_wrapper r)
 
 bool from_chars(csubstr buf, fmt::raw_wrapper *r)
 {
+    C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wcast-qual")
     void * vptr = (void*)buf.str;
+    C4_SUPPRESS_WARNING_GCC_POP
     size_t space = buf.len;
     auto ptr = (decltype(buf.str)) std::align(r->alignment, r->len, vptr, space);
     C4_CHECK(ptr != nullptr);
     C4_CHECK(ptr >= buf.begin() && ptr <= buf.end());
-    //size_t dim = (ptr - buf.str) + r->len;
+    C4_SUPPRESS_WARNING_GCC_PUSH
+    #if defined(__GNUC__) && __GNUC__ > 9
+    C4_SUPPRESS_WARNING_GCC("-Wanalyzer-null-argument")
+    #endif
     memcpy(r->buf, ptr, r->len);
+    C4_SUPPRESS_WARNING_GCC_POP
     return true;
 }
 
@@ -16515,26 +18796,6 @@ bool from_chars(csubstr buf, fmt::raw_wrapper *r)
 
 namespace c4 {
 
-/** returns true if the memory overlaps */
-bool mem_overlaps(void const* a, void const* b, size_t sza, size_t szb)
-{
-    if(a < b)
-    {
-        if(size_t(a) + sza > size_t(b))
-            return true;
-    }
-    else if(a > b)
-    {
-        if(size_t(b) + szb > size_t(a))
-            return true;
-    }
-    else if(a == b)
-    {
-        if(sza != 0 && szb != 0)
-            return true;
-    }
-    return false;
-}
 
 /** Fills 'dest' with the first 'pattern_size' bytes at 'pattern', 'num_times'. */
 void mem_repeat(void* dest, void const* pattern, size_t pattern_size, size_t num_times)
@@ -16542,7 +18803,7 @@ void mem_repeat(void* dest, void const* pattern, size_t pattern_size, size_t num
     if(C4_UNLIKELY(num_times == 0))
         return;
     C4_ASSERT( ! mem_overlaps(dest, pattern, num_times*pattern_size, pattern_size));
-    char *begin = (char*)dest;
+    char *begin = static_cast<char*>(dest);
     char *end   = begin + num_times * pattern_size;
     // copy the pattern once
     ::memcpy(begin, pattern, pattern_size);
@@ -16560,6 +18821,7 @@ void mem_repeat(void* dest, void const* pattern, size_t pattern_size, size_t num
     }
 }
 
+
 } // namespace c4
 
 #endif /* C4CORE_SINGLE_HDR_DEFINE_NOW */
@@ -16640,6 +18902,8 @@ constexpr const size_t char_traits< wchar_t >::num_whitespace_chars;
 
 namespace c4 {
 
+C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast")
+
 namespace detail {
 
 
@@ -16662,38 +18926,38 @@ void afree_impl(void *ptr)
 
 void* aalloc_impl(size_t size, size_t alignment)
 {
+    // alignment must be nonzero and a power of 2
+    C4_CHECK(alignment > 0 && (alignment & (alignment - 1u)) == 0);
+    // NOTE: alignment needs to be sized in multiples of sizeof(void*)
+    if(C4_UNLIKELY(alignment < sizeof(void*)))
+        alignment = sizeof(void*);
+    static_assert((sizeof(void*) & (sizeof(void*)-1u)) == 0, "sizeof(void*) must be a power of 2");
+    C4_CHECK(((alignment & (sizeof(void*) - 1u))) == 0u);
     void *mem;
 #if defined(C4_WIN) || defined(C4_XBOX)
     mem = ::_aligned_malloc(size, alignment);
     C4_CHECK(mem != nullptr || size == 0);
-#elif defined(C4_ARM)
-    // https://stackoverflow.com/questions/53614538/undefined-reference-to-posix-memalign-in-arm-gcc
-    // https://electronics.stackexchange.com/questions/467382/e2-studio-undefined-reference-to-posix-memalign/467753
-    mem = memalign(alignment, size);
-    C4_CHECK(mem != nullptr || size == 0);
 #elif defined(C4_POSIX) || defined(C4_IOS) || defined(C4_MACOS)
-    // NOTE: alignment needs to be sized in multiples of sizeof(void*)
-    size_t amult = alignment;
-    if(C4_UNLIKELY(alignment < sizeof(void*)))
-    {
-        amult = sizeof(void*);
-    }
-    int ret = ::posix_memalign(&mem, amult, size);
+    int ret = ::posix_memalign(&mem, alignment, size);
     if(C4_UNLIKELY(ret))
     {
-        if(ret == EINVAL)
-        {
-            C4_ERROR("The alignment argument %zu was not a power of two, "
-                     "or was not a multiple of sizeof(void*)", alignment);
-        }
-        else if(ret == ENOMEM)
+        C4_ASSERT(ret != EINVAL); // this was already handled above
+        if(ret == ENOMEM)
         {
             C4_ERROR("There was insufficient memory to fulfill the "
                      "allocation request of %zu bytes (alignment=%lu)", size, size);
         }
         return nullptr;
     }
+#elif defined(C4_ARM) || defined(C4_ANDROID)
+    // https://stackoverflow.com/questions/53614538/undefined-reference-to-posix-memalign-in-arm-gcc
+    // https://electronics.stackexchange.com/questions/467382/e2-studio-undefined-reference-to-posix-memalign/467753
+    mem = memalign(alignment, size);
+    C4_CHECK(mem != nullptr || size == 0);
 #else
+    (void)size;
+    (void)alignment;
+    mem = nullptr;
     C4_NOT_IMPLEMENTED_MSG("need to implement an aligned allocation for this platform");
 #endif
     C4_ASSERT_MSG((uintptr_t(mem) & (alignment-1)) == 0, "address %p is not aligned to %zu boundary", mem, alignment);
@@ -16828,7 +19092,6 @@ void* MemoryResourceLinear::do_allocate(size_t sz, size_t alignment, void *hint)
     if(m_pos + sz > m_size)
     {
         C4_ERROR("out of memory");
-        return nullptr;
     }
     void *mem = m_mem + m_pos;
     size_t space = m_size - m_pos;
@@ -16843,7 +19106,6 @@ void* MemoryResourceLinear::do_allocate(size_t sz, size_t alignment, void *hint)
     else
     {
         C4_ERROR("could not align memory");
-        mem = nullptr;
     }
     return mem;
 }
@@ -16902,6 +19164,8 @@ void* MemoryResourceLinear::do_reallocate(void* ptr, size_t oldsz, size_t newsz,
  *
  * */
 
+C4_SUPPRESS_WARNING_GCC_CLANG_POP
+
 } // namespace c4
 
 
@@ -16995,6 +19259,8 @@ void operator delete[](void *p, size_t, std::nothrow_t)
 
 namespace c4 {
 
+C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast")
+
 size_t decode_code_point(uint8_t *C4_RESTRICT buf, size_t buflen, const uint32_t code)
 {
     C4_UNUSED(buflen);
@@ -17045,6 +19311,8 @@ substr decode_code_point(substr out, csubstr code_point)
     return out.first(ret);
 }
 
+C4_SUPPRESS_WARNING_GCC_CLANG_POP
+
 } // namespace c4
 
 #endif /* C4CORE_SINGLE_HDR_DEFINE_NOW */
@@ -17073,10 +19341,12 @@ substr decode_code_point(substr out, csubstr code_point)
 #ifdef __clang__
 #   pragma clang diagnostic push
 #   pragma clang diagnostic ignored "-Wchar-subscripts" // array subscript is of type 'char'
+#   pragma clang diagnostic ignored "-Wold-style-cast"
 #elif defined(__GNUC__)
 #   pragma GCC diagnostic push
 #   pragma GCC diagnostic ignored "-Wchar-subscripts"
 #   pragma GCC diagnostic ignored "-Wtype-limits"
+#   pragma GCC diagnostic ignored "-Wold-style-cast"
 #endif
 
 namespace c4 {
@@ -17164,7 +19434,8 @@ void base64_test_tables()
 
 bool base64_valid(csubstr encoded)
 {
-    if(encoded.len % 4) return false;
+    if(encoded.len & 3u) // (encoded.len % 4u)
+        return false;
     for(const char c : encoded)
     {
         if(c < 0/* || c >= 128*/)
@@ -17186,10 +19457,9 @@ size_t base64_encode(substr buf, cblob data)
          C4_XASSERT((char_idx) < sizeof(detail::base64_sextet_to_char_));\
          c4append_(detail::base64_sextet_to_char_[(char_idx)]);\
     }
-
     size_t rem, pos = 0;
     constexpr const uint32_t sextet_mask = uint32_t(1 << 6) - 1;
-    const unsigned char *C4_RESTRICT d = (unsigned char *) data.buf; // cast to unsigned to avoid wrapping high-bits
+    const unsigned char *C4_RESTRICT d = (const unsigned char *) data.buf; // cast to unsigned to avoid wrapping high-bits
     for(rem = data.len; rem >= 3; rem -= 3, d += 3)
     {
         const uint32_t val = ((uint32_t(d[0]) << 16) | (uint32_t(d[1]) << 8) | (uint32_t(d[2])));
@@ -17231,9 +19501,8 @@ size_t base64_decode(csubstr encoded, blob data)
         C4_XASSERT(size_t(c) < sizeof(detail::base64_char_to_sextet_));\
         val |= static_cast<uint32_t>(detail::base64_char_to_sextet_[(c)]) << ((shift) * 6);\
     }
-
     C4_ASSERT(base64_valid(encoded));
-    C4_CHECK(encoded.len % 4 == 0);
+    C4_CHECK((encoded.len & 3u) == 0);
     size_t wpos = 0;  // the write position
     const char *C4_RESTRICT d = encoded.str;
     constexpr const uint32_t full_byte = 0xff;
@@ -17564,9 +19833,11 @@ size_t base64_decode(csubstr encoded, blob data)
 #ifdef __clang__
 #   pragma clang diagnostic push
 #   pragma clang diagnostic ignored "-Wformat-nonliteral"
+#   pragma clang diagnostic ignored "-Wold-style-cast"
 #elif defined(__GNUC__)
 #   pragma GCC diagnostic push
 #   pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#   pragma GCC diagnostic ignored "-Wold-style-cast"
 #endif
 
 
@@ -17576,6 +19847,7 @@ namespace c4 {
 static error_flags         s_error_flags = ON_ERROR_DEFAULTS;
 static error_callback_type s_error_callback = nullptr;
 
+
 //-----------------------------------------------------------------------------
 
 error_flags get_error_flags()
@@ -17597,6 +19869,7 @@ void set_error_callback(error_callback_type cb)
     s_error_callback = cb;
 }
 
+
 //-----------------------------------------------------------------------------
 
 void handle_error(srcloc where, const char *fmt, ...)
@@ -17629,23 +19902,24 @@ void handle_error(srcloc where, const char *fmt, ...)
     {
         if(s_error_callback)
         {
-            s_error_callback(buf, msglen/*ss.c_strp(), ss.tellp()*/);
+            s_error_callback(buf, msglen);
         }
     }
 
-    if(s_error_flags & ON_ERROR_ABORT)
+    if(s_error_flags & ON_ERROR_THROW)
     {
-        abort();
+#if defined(C4_EXCEPTIONS_ENABLED) && defined(C4_ERROR_THROWS_EXCEPTION)
+        throw std::runtime_error(buf);
+#endif
     }
 
-    if(s_error_flags & ON_ERROR_THROW)
+    if(s_error_flags & ON_ERROR_ABORT)
     {
-#if defined(C4_EXCEPTIONS_ENABLED) && defined(C4_ERROR_THROWS_EXCEPTION)
-        throw Exception(buf);
-#else
         abort();
-#endif
     }
+
+    abort(); // abort anyway, in case nothing was set
+    C4_UNREACHABLE_AFTER_ERR();
 }
 
 //-----------------------------------------------------------------------------
@@ -17653,20 +19927,19 @@ void handle_error(srcloc where, const char *fmt, ...)
 void handle_warning(srcloc where, const char *fmt, ...)
 {
     va_list args;
-    char buf[1024]; //sstream<c4::string> ss;
+    char buf[1024];
     va_start(args, fmt);
     vsnprintf(buf, sizeof(buf), fmt, args);
     va_end(args);
     C4_LOGF_WARN("\n");
 #if defined(C4_ERROR_SHOWS_FILELINE) && defined(C4_ERROR_SHOWS_FUNC)
-    C4_LOGF_WARN("%s:%d: WARNING: %s\n", where.file, where.line, buf/*ss.c_strp()*/);
+    C4_LOGF_WARN("%s:%d: WARNING: %s\n", where.file, where.line, buf);
     C4_LOGF_WARN("%s:%d: WARNING: here: %s\n", where.file, where.line, where.func);
 #elif defined(C4_ERROR_SHOWS_FILELINE)
-    C4_LOGF_WARN("%s:%d: WARNING: %s\n", where.file, where.line, buf/*ss.c_strp()*/);
+    C4_LOGF_WARN("%s:%d: WARNING: %s\n", where.file, where.line, buf);
 #elif ! defined(C4_ERROR_SHOWS_FUNC)
-    C4_LOGF_WARN("WARNING: %s\n", buf/*ss.c_strp()*/);
+    C4_LOGF_WARN("WARNING: %s\n", buf);
 #endif
-    //c4::log.flush();
 }
 
 //-----------------------------------------------------------------------------
@@ -17678,33 +19951,38 @@ bool is_debugger_attached()
     if(first_call)
     {
         first_call = false;
+        C4_SUPPRESS_WARNING_GCC_PUSH
+        #if defined(__GNUC__) && __GNUC__ > 9
+        C4_SUPPRESS_WARNING_GCC("-Wanalyzer-fd-leak")
+        #endif
         //! @see http://stackoverflow.com/questions/3596781/how-to-detect-if-the-current-process-is-being-run-by-gdb
         //! (this answer: http://stackoverflow.com/a/24969863/3968589 )
         char buf[1024] = "";
-
         int status_fd = open("/proc/self/status", O_RDONLY);
         if (status_fd == -1)
         {
             return 0;
         }
-
-        ssize_t num_read = ::read(status_fd, buf, sizeof(buf));
-
-        if (num_read > 0)
+        else
         {
-            static const char TracerPid[] = "TracerPid:";
-            char *tracer_pid;
-
-            if(num_read < 1024)
-            {
-                buf[num_read] = 0;
-            }
-            tracer_pid = strstr(buf, TracerPid);
-            if (tracer_pid)
+            ssize_t num_read = ::read(status_fd, buf, sizeof(buf));
+            if (num_read > 0)
             {
-                first_call_result = !!::atoi(tracer_pid + sizeof(TracerPid) - 1);
+                static const char TracerPid[] = "TracerPid:";
+                char *tracer_pid;
+                if(num_read < 1024)
+                {
+                    buf[num_read] = 0;
+                }
+                tracer_pid = strstr(buf, TracerPid);
+                if (tracer_pid)
+                {
+                    first_call_result = !!::atoi(tracer_pid + sizeof(TracerPid) - 1);
+                }
             }
+            close(status_fd);
         }
+        C4_SUPPRESS_WARNING_GCC_POP
     }
     return first_call_result;
 #elif defined(C4_PS4)
@@ -17738,6 +20016,7 @@ bool is_debugger_attached()
     size = sizeof(info);
     junk = sysctl(mib, sizeof(mib) / sizeof(*mib), &info, &size, NULL, 0);
     assert(junk == 0);
+    (void)junk;
 
     // We're being debugged if the P_TRACED flag is set.
     return ((info.kp_proc.p_flag & P_TRACED) != 0);
@@ -17799,6 +20078,93 @@ bool is_debugger_attached()
 
 
 
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/fwd.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/fwd.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifndef _C4_YML_FWD_HPP_
+#define _C4_YML_FWD_HPP_
+
+/** @file fwd.hpp forward declarations */
+
+namespace c4 {
+namespace yml {
+
+struct NodeScalar;
+struct NodeInit;
+struct NodeData;
+struct NodeType;
+class NodeRef;
+class ConstNodeRef;
+class Tree;
+struct ReferenceResolver;
+template<class EventHandler> class ParseEngine;
+struct EventHandlerTree;
+using Parser = ParseEngine<EventHandlerTree>;
+
+} // namespace c4
+} // namespace yml
+
+#endif /* _C4_YML_FWD_HPP_ */
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/fwd.hpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/version.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/version.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifndef _C4_YML_VERSION_HPP_
+#define _C4_YML_VERSION_HPP_
+
+/** @file version.hpp */
+
+#define RYML_VERSION "0.7.2"
+#define RYML_VERSION_MAJOR 0
+#define RYML_VERSION_MINOR 7
+#define RYML_VERSION_PATCH 2
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/substr.hpp
+//#include <c4/substr.hpp>
+#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_)
+#error "amalgamate: file c4/substr.hpp must have been included at this point"
+#endif /* C4_SUBSTR_HPP_ */
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/export.hpp
+//#include <c4/yml/export.hpp>
+#if !defined(C4_YML_EXPORT_HPP_) && !defined(_C4_YML_EXPORT_HPP_)
+#error "amalgamate: file c4/yml/export.hpp must have been included at this point"
+#endif /* C4_YML_EXPORT_HPP_ */
+
+
+namespace c4 {
+namespace yml {
+
+RYML_EXPORT csubstr version();
+RYML_EXPORT int version_major();
+RYML_EXPORT int version_minor();
+RYML_EXPORT int version_patch();
+
+} // namespace yml
+} // namespace c4
+
+#endif /* _C4_YML_VERSION_HPP_ */
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/version.hpp)
+
+
+
 //********************************************************************************
 //--------------------------------------------------------------------------------
 // src/c4/yml/common.hpp
@@ -17809,6 +20175,8 @@ bool is_debugger_attached()
 #ifndef _C4_YML_COMMON_HPP_
 #define _C4_YML_COMMON_HPP_
 
+/** @file common.hpp Common utilities and infrastructure used by ryml. */
+
 //included above:
 //#include <cstddef>
 // amalgamate: removed include of
@@ -17818,6 +20186,13 @@ bool is_debugger_attached()
 #error "amalgamate: file c4/substr.hpp must have been included at this point"
 #endif /* C4_SUBSTR_HPP_ */
 
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/dump.hpp
+//#include <c4/dump.hpp>
+#if !defined(C4_DUMP_HPP_) && !defined(_C4_DUMP_HPP_)
+#error "amalgamate: file c4/dump.hpp must have been included at this point"
+#endif /* C4_DUMP_HPP_ */
+
 // amalgamate: removed include of
 // https://github.com/biojppm/rapidyaml/src/c4/yml/export.hpp
 //#include <c4/yml/export.hpp>
@@ -17826,51 +20201,226 @@ bool is_debugger_attached()
 #endif /* C4_YML_EXPORT_HPP_ */
 
 
-
-#ifndef RYML_USE_ASSERT
-#   define RYML_USE_ASSERT C4_USE_ASSERT
+#if defined(C4_MSVC) || defined(C4_MINGW)
+//included above:
+//#include <malloc.h>
+#else
+#include <alloca.h>
 #endif
 
 
-#if RYML_USE_ASSERT
-#   define RYML_ASSERT(cond) RYML_CHECK(cond)
-#   define RYML_ASSERT_MSG(cond, msg) RYML_CHECK_MSG(cond, msg)
-#else
-#   define RYML_ASSERT(cond)
-#   define RYML_ASSERT_MSG(cond, msg)
+
+//-----------------------------------------------------------------------------
+
+#ifndef RYML_ERRMSG_SIZE
+/// size for the error message buffer
+#define RYML_ERRMSG_SIZE (1024)
 #endif
 
+#ifndef RYML_LOGBUF_SIZE
+/// size for the buffer used to format individual values to string
+/// while preparing an error message. This is only used for formatting
+/// individual values in the message; final messages will be larger
+/// than this value (see @ref RYML_ERRMSG_SIZE). This is also used for
+/// the detailed debug log messages when RYML_DBG is defined.
+#define RYML_LOGBUF_SIZE (256)
+#endif
 
-#define RYML_CHECK(cond)                                                \
-    do {                                                                \
-        if(!(cond))                                                     \
-        {                                                               \
-            C4_DEBUG_BREAK();                                           \
-            c4::yml::error("check failed: " #cond, c4::yml::Location(__FILE__, __LINE__, 0)); \
-        }                                                               \
-    } while(0)
+#ifndef RYML_LOGBUF_SIZE_MAX
+/// size for the fallback larger log buffer. When @ref
+/// RYML_LOGBUF_SIZE is not large enough to convert a value to string,
+/// then temporary stack memory is allocated up to
+/// RYML_LOGBUF_SIZE_MAX. This limit is in place to prevent a stack
+/// overflow. If the printed value requires more than
+/// RYML_LOGBUF_SIZE_MAX, the value is silently skipped.
+#define RYML_LOGBUF_SIZE_MAX (1024)
+#endif
+
+#ifndef RYML_LOCATIONS_SMALL_THRESHOLD
+/// threshold at which a location search will revert from linear to
+/// binary search.
+#define RYML_LOCATIONS_SMALL_THRESHOLD (30)
+#endif
+
+
+//-----------------------------------------------------------------------------
+// Specify groups to have a predefined topic order in doxygen:
+
+/** @defgroup doc_quickstart Quickstart
+ *
+ * Example code for every feature.
+ */
+
+/** @defgroup doc_parse Parse utilities
+ * @see sample::sample_parse_in_place
+ * @see sample::sample_parse_in_arena
+ * @see sample::sample_parse_file
+ * @see sample::sample_parse_reuse_tree
+ * @see sample::sample_parse_reuse_parser
+ * @see sample::sample_parse_reuse_tree_and_parser
+ * @see sample::sample_location_tracking
+ */
+
+/** @defgroup doc_emit Emit utilities
+ *
+ * Utilities to emit YAML and JSON, either to a memory buffer or to a
+ * file or ostream-like class.
+ *
+ * @see sample::sample_emit_to_container
+ * @see sample::sample_emit_to_stream
+ * @see sample::sample_emit_to_file
+ * @see sample::sample_emit_nested_node
+ * @see sample::sample_emit_style
+ */
+
+/** @defgroup doc_node_type Node types
+ */
+
+/** @defgroup doc_tree Tree utilities
+ * @see sample::sample_quick_overview
+ * @see sample::sample_iterate_trees
+ * @see sample::sample_create_trees
+ * @see sample::sample_tree_arena
+ *
+ * @see sample::sample_static_trees
+ * @see sample::sample_location_tracking
+ *
+ * @see sample::sample_docs
+ * @see sample::sample_anchors_and_aliases
+ * @see sample::sample_tags
+ */
+
+/** @defgroup doc_node_classes Node classes
+ *
+ * High-level node classes.
+ *
+ * @see sample::sample_quick_overview
+ * @see sample::sample_iterate_trees
+ * @see sample::sample_create_trees
+ * @see sample::sample_tree_arena
+ */
+
+/** @defgroup doc_callbacks Callbacks for errors and allocation
+ *
+ * Functions called by ryml to allocate/free memory and to report
+ * errors.
+ *
+ * @see sample::sample_error_handler
+ * @see sample::sample_global_allocator
+ * @see sample::sample_per_tree_allocator
+ */
+
+/** @defgroup doc_serialization Serialization/deserialization
+ *
+ * Contains information on how to serialize and deserialize
+ * fundamental types, user scalar types, user container types and
+ * interop with std scalar/container types.
+ *
+ */
+
+/** @defgroup doc_ref_utils Anchor/Reference utilities
+ *
+ * @see sample::sample_anchors_and_aliases
+ * */
+
+/** @defgroup doc_tag_utils Tag utilities
+ * @see sample::sample_tags
+ */
+
+/** @defgroup doc_preprocessors Preprocessors
+ *
+ * Functions for preprocessing YAML prior to parsing.
+ */
+
+
+//-----------------------------------------------------------------------------
+
+// document macros for doxygen
+#ifdef __DOXYGEN__ // defined in Doxyfile::PREDEFINED
+
+/** define this macro with a boolean value to enable/disable
+ * assertions to check preconditions and assumptions throughout the
+ * codebase; this causes a slowdown of the code, and larger code
+ * size. By default, this macro is defined unless NDEBUG is defined
+ * (see C4_USE_ASSERT); as a result, by default this macro is truthy
+ * only in debug builds. */
+#   define RYML_USE_ASSERT
+
+/** (Undefined by default) Define this macro to disable ryml's default
+ * implementation of the callback functions; see @ref c4::yml::Callbacks  */
+#   define RYML_NO_DEFAULT_CALLBACKS
+
+/** (Undefined by default) When this macro is defined (and
+ * @ref RYML_NO_DEFAULT_CALLBACKS is not defined), the default error
+ * handler will throw C++ exceptions of type `std::runtime_error`. */
+#   define RYML_DEFAULT_CALLBACK_USES_EXCEPTIONS
+
+/** Conditionally expands to `noexcept` when @ref RYML_USE_ASSERT is 0 and
+ * is empty otherwise. The user is unable to override this macro. */
+#   define RYML_NOEXCEPT
+
+#endif
+
+
+//-----------------------------------------------------------------------------
+
+
+/** @cond dev*/
+#ifndef RYML_USE_ASSERT
+#   define RYML_USE_ASSERT C4_USE_ASSERT
+#endif
+
+#if RYML_USE_ASSERT
+#   define RYML_ASSERT(cond) RYML_CHECK(cond)
+#   define RYML_ASSERT_MSG(cond, msg) RYML_CHECK_MSG(cond, msg)
+#   define _RYML_CB_ASSERT(cb, cond) _RYML_CB_CHECK((cb), (cond))
+#   define _RYML_CB_ASSERT_(cb, cond, loc) _RYML_CB_CHECK((cb), (cond), (loc))
+#   define RYML_NOEXCEPT
+#else
+#   define RYML_ASSERT(cond)
+#   define RYML_ASSERT_MSG(cond, msg)
+#   define _RYML_CB_ASSERT(cb, cond)
+#   define _RYML_CB_ASSERT_(cb, cond, loc)
+#   define RYML_NOEXCEPT noexcept
+#endif
+
+#define RYML_DEPRECATED(msg) C4_DEPRECATED(msg)
+
+#define RYML_CHECK(cond)                                                \
+    do {                                                                \
+        if(C4_UNLIKELY(!(cond)))                                        \
+        {                                                               \
+            RYML_DEBUG_BREAK();                                         \
+            c4::yml::error("check failed: " #cond, c4::yml::Location(__FILE__, __LINE__, 0)); \
+            C4_UNREACHABLE_AFTER_ERR();                                 \
+        }                                                               \
+    } while(0)
 
 #define RYML_CHECK_MSG(cond, msg)                                       \
     do                                                                  \
     {                                                                   \
-        if(!(cond))                                                     \
+        if(C4_UNLIKELY(!(cond)))                                        \
         {                                                               \
-            C4_DEBUG_BREAK();                                           \
+            RYML_DEBUG_BREAK();                                         \
             c4::yml::error(msg ": check failed: " #cond, c4::yml::Location(__FILE__, __LINE__, 0)); \
+            C4_UNREACHABLE_AFTER_ERR();                                 \
         }                                                               \
     } while(0)
 
-
-#if C4_CPP >= 14
-#   define RYML_DEPRECATED(msg) [[deprecated(msg)]]
+#if defined(RYML_DBG) && !defined(NDEBUG) && !defined(C4_NO_DEBUG_BREAK)
+#   define RYML_DEBUG_BREAK()                               \
+    do {                                                    \
+        if(c4::get_error_flags() & c4::ON_ERROR_DEBUGBREAK) \
+        {                                                   \
+            C4_DEBUG_BREAK();                               \
+        }                                                   \
+    } while(0)
 #else
-#   if defined(_MSC_VER)
-#       define RYML_DEPRECATED(msg) __declspec(deprecated)
-#   else // defined(__GNUC__) || defined(__clang__)
-#       define RYML_DEPRECATED(msg) __attribute__((deprecated))
-#   endif
+#   define RYML_DEBUG_BREAK()
 #endif
 
+/** @endcond */
+
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
@@ -17879,11 +20429,35 @@ bool is_debugger_attached()
 namespace c4 {
 namespace yml {
 
-enum : size_t {
-    /** a null position */
-    npos = size_t(-1),
+C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast")
+
+
+#ifndef RYML_ID_TYPE
+/** The type of a node id in the YAML tree. In the future, the default
+ * will likely change to int32_t, which was observed to be faster.
+ * @see id_type */
+#define RYML_ID_TYPE size_t
+#endif
+
+
+/** The type of a node id in the YAML tree; to override the default
+ * type, define the macro @ref RYML_ID_TYPE to a suitable integer
+ * type. */
+using id_type = RYML_ID_TYPE;
+static_assert(std::is_integral<id_type>::value, "id_type must be an integer type");
+
+
+C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wuseless-cast")
+enum : id_type {
     /** an index to none */
-    NONE = size_t(-1)
+    NONE = id_type(-1),
+};
+C4_SUPPRESS_WARNING_GCC_CLANG_POP
+
+
+enum : size_t {
+    /** a null string position */
+    npos = size_t(-1)
 };
 
 
@@ -17901,72 +20475,96 @@ struct RYML_EXPORT LineCol
     //! column
     size_t col;
 
-    LineCol() : offset(), line(), col() {}
+    LineCol() = default;
     //! construct from line and column
     LineCol(size_t l, size_t c) : offset(0), line(l), col(c) {}
     //! construct from offset, line and column
     LineCol(size_t o, size_t l, size_t c) : offset(o), line(l), col(c) {}
 };
+static_assert(std::is_trivial<LineCol>::value, "LineCol not trivial");
+static_assert(std::is_standard_layout<LineCol>::value, "Location not trivial");
 
 
 //! a source file position
-struct RYML_EXPORT Location : public LineCol
+struct RYML_EXPORT Location
 {
+    //! number of bytes from the beginning of the source buffer
+    size_t offset;
+    //! line
+    size_t line;
+    //! column
+    size_t col;
+    //! file name
     csubstr name;
 
-    operator bool () const { return !name.empty() || line != 0 || offset != 0; }
+    operator bool () const { return !name.empty() || line != 0 || offset != 0 || col != 0; }
+    operator LineCol const& () const { return reinterpret_cast<LineCol const&>(*this); }
 
-    Location() : LineCol(), name() {}
-    Location(                         size_t l, size_t c) : LineCol{   l, c}, name( ) {}
-    Location(    csubstr n,           size_t l, size_t c) : LineCol{   l, c}, name(n) {}
-    Location(    csubstr n, size_t b, size_t l, size_t c) : LineCol{b, l, c}, name(n) {}
-    Location(const char *n,           size_t l, size_t c) : LineCol{   l, c}, name(to_csubstr(n)) {}
-    Location(const char *n, size_t b, size_t l, size_t c) : LineCol{b, l, c}, name(to_csubstr(n)) {}
+    Location() = default;
+    Location(                         size_t l, size_t c) : offset( ), line(l), col(c), name( ) {}
+    Location(               size_t b, size_t l, size_t c) : offset(b), line(l), col(c), name( ) {}
+    Location(    csubstr n,           size_t l, size_t c) : offset( ), line(l), col(c), name(n) {}
+    Location(    csubstr n, size_t b, size_t l, size_t c) : offset(b), line(l), col(c), name(n) {}
+    Location(const char *n,           size_t l, size_t c) : offset( ), line(l), col(c), name(to_csubstr(n)) {}
+    Location(const char *n, size_t b, size_t l, size_t c) : offset(b), line(l), col(c), name(to_csubstr(n)) {}
 };
+static_assert(std::is_standard_layout<Location>::value, "Location not trivial");
 
 
 //-----------------------------------------------------------------------------
 
-/** the type of the function used to report errors. This function must
- * interrupt execution, either by raising an exception or calling
- * std::abort().
+/** @addtogroup doc_callbacks
  *
- * @warning the error callback must never return: it must either abort
- * or throw an exception. Otherwise, the parser will enter into an
- * infinite loop, or the program may crash. */
-using pfn_error = void (*)(const char* msg, size_t msg_len, Location location, void *user_data);
-/** the type of the function used to allocate memory */
-using pfn_allocate = void* (*)(size_t len, void* hint, void *user_data);
-/** the type of the function used to free memory */
-using pfn_free = void (*)(void* mem, size_t size, void *user_data);
+ * @{ */
 
-/** trigger an error: call the current error callback. */
-RYML_EXPORT void error(const char *msg, size_t msg_len, Location loc);
-/** @overload error */
-inline void error(const char *msg, size_t msg_len)
-{
-    error(msg, msg_len, Location{});
-}
-/** @overload error */
-template<size_t N>
-inline void error(const char (&msg)[N], Location loc)
-{
-    error(msg, N-1, loc);
-}
-/** @overload error */
-template<size_t N>
-inline void error(const char (&msg)[N])
-{
-    error(msg, N-1, Location{});
-}
+struct Callbacks;
 
-//-----------------------------------------------------------------------------
 
-/** a c-style callbacks class
+/** set the global callbacks for the library; after a call to this
+ * function, these callbacks will be used by newly created objects
+ * (unless they are copying older objects with different
+ * callbacks). If @ref RYML_NO_DEFAULT_CALLBACKS is defined, it is
+ * mandatory to call this function prior to using any other library
+ * facility.
+ *
+ * @warning This function is NOT thread-safe.
+ *
+ * @warning the error callback must never return: see @ref pfn_error
+ * for more details */
+RYML_EXPORT void set_callbacks(Callbacks const& c);
+
+/** get the global callbacks
+ * @warning This function is not thread-safe. */
+RYML_EXPORT Callbacks const& get_callbacks();
+
+/** set the global callbacks back to their defaults ()
+ * @warning This function is not thread-safe. */
+RYML_EXPORT void reset_callbacks();
+
+
+/** the type of the function used to report errors
  *
- * @warning the error callback must never return: it must either abort
- * or throw an exception. Otherwise, the parser will enter into an
- * infinite loop, or the program may crash. */
+ * @warning When given by the user, this function MUST interrupt
+ * execution, typically by either throwing an exception, or using
+ * `std::longjmp()` ([see
+ * documentation](https://en.cppreference.com/w/cpp/utility/program/setjmp))
+ * or by calling `std::abort()`. If the function returned, the parser
+ * would enter into an infinite loop, or the program may crash. */
+using pfn_error = void (*) (const char* msg, size_t msg_len, Location location, void *user_data);
+
+
+/** the type of the function used to allocate memory; ryml will only
+ * allocate memory through this callback. */
+using pfn_allocate = void* (*)(size_t len, void* hint, void *user_data);
+
+
+/** the type of the function used to free memory; ryml will only free
+ * memory through this callback. */
+using pfn_free = void (*)(void* mem, size_t size, void *user_data);
+
+
+/** a c-style callbacks class. Can be used globally by the library
+ * and/or locally by @ref Tree and @ref Parser objects. */
 struct RYML_EXPORT Callbacks
 {
     void *       m_user_data;
@@ -17974,8 +20572,32 @@ struct RYML_EXPORT Callbacks
     pfn_free     m_free;
     pfn_error    m_error;
 
+    /** Construct an object with the default callbacks. If
+     * @ref RYML_NO_DEFAULT_CALLBACKS is defined, the object will have null
+     * members.*/
     Callbacks();
-    Callbacks(void *user_data, pfn_allocate alloc, pfn_free free, pfn_error error_);
+
+    /** Construct an object with the given callbacks.
+     *
+     * @param user_data Data to be forwarded in every call to a callback.
+     *
+     * @param alloc A pointer to an allocate function. Unless
+     *        @ref RYML_NO_DEFAULT_CALLBACKS is defined, when this
+     *        parameter is null, will fall back to ryml's default
+     *        alloc implementation.
+     *
+     * @param free A pointer to a free function. Unless
+     *        @ref RYML_NO_DEFAULT_CALLBACKS is defined, when this
+     *        parameter is null, will fall back to ryml's default free
+     *        implementation.
+     *
+     * @param error A pointer to an error function, which must never
+     *        return (see @ref pfn_error). Unless
+     *        @ref RYML_NO_DEFAULT_CALLBACKS is defined, when this
+     *        parameter is null, will fall back to ryml's default
+     *        error implementation.
+     */
+    Callbacks(void *user_data, pfn_allocate alloc, pfn_free free, pfn_error error);
 
     bool operator!= (Callbacks const& that) const { return !operator==(that); }
     bool operator== (Callbacks const& that) const
@@ -17987,42 +20609,60 @@ struct RYML_EXPORT Callbacks
     }
 };
 
-/** set the global callbacks.
- *
- * @warning the error callback must never return: it must either abort
- * or throw an exception. Otherwise, the parser will enter into an
- * infinite loop, or the program may crash. */
-RYML_EXPORT void set_callbacks(Callbacks const& c);
-/// get the global callbacks
-RYML_EXPORT Callbacks const& get_callbacks();
-/// set the global callbacks back to their defaults
-RYML_EXPORT void reset_callbacks();
+
+/** @} */
+
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 
 /// @cond dev
+
+// BEWARE! MSVC requires that [[noreturn]] appears before RYML_EXPORT
+[[noreturn]] RYML_EXPORT void error(Callbacks const& cb, const char *msg, size_t msg_len, Location loc);
+[[noreturn]] RYML_EXPORT void error(const char *msg, size_t msg_len, Location loc);
+
+[[noreturn]] inline void error(const char *msg, size_t msg_len)
+{
+    error(msg, msg_len, Location{});
+}
+template<size_t N>
+[[noreturn]] inline void error(const char (&msg)[N], Location loc)
+{
+    error(msg, N-1, loc);
+}
+template<size_t N>
+[[noreturn]] inline void error(const char (&msg)[N])
+{
+    error(msg, N-1, Location{});
+}
+
 #define _RYML_CB_ERR(cb, msg_literal)                                   \
+    _RYML_CB_ERR_(cb, msg_literal, c4::yml::Location(__FILE__, 0, __LINE__, 0))
+#define _RYML_CB_CHECK(cb, cond)                                        \
+    _RYML_CB_CHECK_(cb, cond, c4::yml::Location(__FILE__, 0, __LINE__, 0))
+#define _RYML_CB_ERR_(cb, msg_literal, loc)                             \
 do                                                                      \
 {                                                                       \
     const char msg[] = msg_literal;                                     \
-    C4_DEBUG_BREAK();                                                   \
-    (cb).m_error(msg, sizeof(msg), c4::yml::Location(__FILE__, 0, __LINE__, 0), (cb).m_user_data); \
+    RYML_DEBUG_BREAK();                                                 \
+    c4::yml::error((cb), msg, sizeof(msg)-1, loc);                      \
+    C4_UNREACHABLE_AFTER_ERR();                                         \
 } while(0)
-#define _RYML_CB_CHECK(cb, cond)                                        \
+#define _RYML_CB_CHECK_(cb, cond, loc)                                  \
     do                                                                  \
     {                                                                   \
-        if(!(cond))                                                     \
+        if(C4_UNLIKELY(!(cond)))                                        \
         {                                                               \
             const char msg[] = "check failed: " #cond;                  \
-            C4_DEBUG_BREAK();                                           \
-            (cb).m_error(msg, sizeof(msg), c4::yml::Location(__FILE__, 0, __LINE__, 0), (cb).m_user_data); \
+            RYML_DEBUG_BREAK();                                         \
+            c4::yml::error((cb), msg, sizeof(msg)-1, loc);              \
+            C4_UNREACHABLE_AFTER_ERR();                                 \
         }                                                               \
     } while(0)
-#ifdef RYML_USE_ASSERT
-#define _RYML_CB_ASSERT(cb, cond) _RYML_CB_CHECK((cb), (cond))
-#else
-#define _RYML_CB_ASSERT(cb, cond) do {} while(0)
-#endif
 #define _RYML_CB_ALLOC_HINT(cb, T, num, hint) (T*) (cb).m_allocate((num) * sizeof(T), (hint), (cb).m_user_data)
-#define _RYML_CB_ALLOC(cb, T, num) _RYML_CB_ALLOC_HINT((cb), (T), (num), nullptr)
+#define _RYML_CB_ALLOC(cb, T, num) _RYML_CB_ALLOC_HINT((cb), T, (num), nullptr)
 #define _RYML_CB_FREE(cb, buf, T, num)                              \
     do {                                                            \
         (cb).m_free((buf), (num) * sizeof(T), (cb).m_user_data);    \
@@ -18031,7 +20671,50 @@ do                                                                      \
 
 
 
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
+typedef enum {
+    BLOCK_LITERAL, //!< keep newlines (|)
+    BLOCK_FOLD     //!< replace newline with single space (>)
+} BlockStyle_e;
+
+typedef enum {
+    CHOMP_CLIP,    //!< single newline at end (default)
+    CHOMP_STRIP,   //!< no newline at end     (-)
+    CHOMP_KEEP     //!< all newlines from end (+)
+} BlockChomp_e;
+
+
+/** Abstracts the fact that a scalar filter result may not fit in the
+ * intended memory. */
+struct FilterResult
+{
+    C4_ALWAYS_INLINE bool valid() const noexcept { return str.str != nullptr; }
+    C4_ALWAYS_INLINE size_t required_len() const noexcept { return str.len; }
+    C4_ALWAYS_INLINE csubstr get() { RYML_ASSERT(valid()); return str; }
+    csubstr str;
+};
+/** Abstracts the fact that a scalar filter result may not fit in the
+ * intended memory. */
+struct FilterResultExtending
+{
+    C4_ALWAYS_INLINE bool valid() const noexcept { return str.str != nullptr; }
+    C4_ALWAYS_INLINE size_t required_len() const noexcept { return reqlen; }
+    C4_ALWAYS_INLINE csubstr get() { RYML_ASSERT(valid()); return str; }
+    csubstr str;
+    size_t reqlen;
+};
+
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
+
 namespace detail {
+// is there a better way to do this?
 template<int8_t signedval, uint8_t unsignedval>
 struct _charconstant_t
     : public std::conditional<std::is_signed<char>::value,
@@ -18047,23 +20730,29 @@ struct _SubstrWriter
 {
     substr buf;
     size_t pos;
-    _SubstrWriter(substr buf_, size_t pos_=0) : buf(buf_), pos(pos_) {}
+    _SubstrWriter(substr buf_, size_t pos_=0) : buf(buf_), pos(pos_) { C4_ASSERT(buf.str); }
     void append(csubstr s)
     {
         C4_ASSERT(!s.overlaps(buf));
-        if(pos + s.len <= buf.len)
+        C4_ASSERT(s.str || !s.len);
+        if(s.len && pos + s.len <= buf.len)
+        {
+            C4_ASSERT(s.str);
             memcpy(buf.str + pos, s.str, s.len);
+        }
         pos += s.len;
     }
     void append(char c)
     {
+        C4_ASSERT(buf.str);
         if(pos < buf.len)
             buf.str[pos] = c;
         ++pos;
     }
     void append_n(char c, size_t numtimes)
     {
-        if(pos + numtimes < buf.len)
+        C4_ASSERT(buf.str);
+        if(numtimes && pos + numtimes < buf.len)
             memset(buf.str + pos, c, numtimes);
         pos += numtimes;
     }
@@ -18078,8 +20767,72 @@ struct _SubstrWriter
 };
 } // namespace detail
 
+
+namespace detail {
+// dumpfn is a function abstracting prints to terminal (or to string).
+template<class DumpFn, class ...Args>
+C4_NO_INLINE void _dump(DumpFn &&dumpfn, csubstr fmt, Args&& ...args)
+{
+    DumpResults results;
+    // try writing everything:
+    {
+        // buffer for converting individual arguments. it is defined
+        // in a child scope to free it in case the buffer is too small
+        // for any of the arguments.
+        char writebuf[RYML_LOGBUF_SIZE];
+        results = format_dump_resume(std::forward<DumpFn>(dumpfn), writebuf, fmt, std::forward<Args>(args)...);
+    }
+    // if any of the arguments failed to fit the buffer, allocate a
+    // larger buffer (up to a limit) and resume writing.
+    //
+    // results.bufsize is set to the size of the largest element
+    // serialized. Eg int(1) will require 1 byte.
+    if(C4_UNLIKELY(results.bufsize > RYML_LOGBUF_SIZE))
+    {
+        const size_t bufsize = results.bufsize <= RYML_LOGBUF_SIZE_MAX ? results.bufsize : RYML_LOGBUF_SIZE_MAX;
+        #ifdef C4_MSVC
+        substr largerbuf = {static_cast<char*>(_alloca(bufsize)), bufsize};
+        #else
+        substr largerbuf = {static_cast<char*>(alloca(bufsize)), bufsize};
+        #endif
+        results = format_dump_resume(std::forward<DumpFn>(dumpfn), results, largerbuf, fmt, std::forward<Args>(args)...);
+    }
+}
+template<class ...Args>
+C4_NORETURN C4_NO_INLINE void _report_err(Callbacks const& C4_RESTRICT callbacks, csubstr fmt, Args const& C4_RESTRICT ...args)
+{
+    char errmsg[RYML_ERRMSG_SIZE] = {0};
+    detail::_SubstrWriter writer(errmsg);
+    auto dumpfn = [&writer](csubstr s){ writer.append(s); };
+    _dump(dumpfn, fmt, args...);
+    writer.append('\n');
+    const size_t len = writer.pos < RYML_ERRMSG_SIZE ? writer.pos : RYML_ERRMSG_SIZE;
+    callbacks.m_error(errmsg, len, {}, callbacks.m_user_data);
+    C4_UNREACHABLE_AFTER_ERR();
+}
+} // namespace detail
+
+
+inline csubstr _c4prc(const char &C4_RESTRICT c) // pass by reference!
+{
+    switch(c)
+    {
+    case '\n': return csubstr("\\n");
+    case '\t': return csubstr("\\t");
+    case '\0': return csubstr("\\0");
+    case '\r': return csubstr("\\r");
+    case '\f': return csubstr("\\f");
+    case '\b': return csubstr("\\b");
+    case '\v': return csubstr("\\v");
+    case '\a': return csubstr("\\a");
+    default: return csubstr(&c, 1);
+    }
+}
+
 /// @endcond
 
+C4_SUPPRESS_WARNING_GCC_POP
+
 } // namespace yml
 } // namespace c4
 
@@ -18092,28 +20845,13 @@ struct _SubstrWriter
 
 //********************************************************************************
 //--------------------------------------------------------------------------------
-// src/c4/yml/tree.hpp
-// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp
+// src/c4/yml/node_type.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/node_type.hpp
 //--------------------------------------------------------------------------------
 //********************************************************************************
 
-#ifndef _C4_YML_TREE_HPP_
-#define _C4_YML_TREE_HPP_
-
-
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/error.hpp
-//#include "c4/error.hpp"
-#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_)
-#error "amalgamate: file c4/error.hpp must have been included at this point"
-#endif /* C4_ERROR_HPP_ */
-
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/types.hpp
-//#include "c4/types.hpp"
-#if !defined(C4_TYPES_HPP_) && !defined(_C4_TYPES_HPP_)
-#error "amalgamate: file c4/types.hpp must have been included at this point"
-#endif /* C4_TYPES_HPP_ */
+#ifndef C4_YML_NODE_TYPE_HPP_
+#define C4_YML_NODE_TYPE_HPP_
 
 #ifndef _C4_YML_COMMON_HPP_
 // amalgamate: removed include of
@@ -18125,204 +20863,113 @@ struct _SubstrWriter
 
 #endif
 
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/charconv.hpp
-//#include <c4/charconv.hpp>
-#if !defined(C4_CHARCONV_HPP_) && !defined(_C4_CHARCONV_HPP_)
-#error "amalgamate: file c4/charconv.hpp must have been included at this point"
-#endif /* C4_CHARCONV_HPP_ */
-
-//included above:
-//#include <cmath>
-//included above:
-//#include <limits>
-
-
-C4_SUPPRESS_WARNING_MSVC_PUSH
-C4_SUPPRESS_WARNING_MSVC(4251) // needs to have dll-interface to be used by clients of struct
-C4_SUPPRESS_WARNING_MSVC(4296) // expression is always 'boolean_value'
-C4_SUPPRESS_WARNING_GCC_CLANG_PUSH
-C4_SUPPRESS_WARNING_GCC("-Wtype-limits")
-
+C4_SUPPRESS_WARNING_MSVC_PUSH
+C4_SUPPRESS_WARNING_GCC_CLANG_PUSH
+C4_SUPPRESS_WARNING_GCC_CLANG("-Wold-style-cast")
 
 namespace c4 {
 namespace yml {
 
-struct NodeScalar;
-struct NodeInit;
-struct NodeData;
-class NodeRef;
-class Tree;
-
-
-/** encode a floating point value to a string. */
-template<class T>
-size_t to_chars_float(substr buf, T val)
-{
-    C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wfloat-equal");
-    static_assert(std::is_floating_point<T>::value, "must be floating point");
-    if(C4_UNLIKELY(std::isnan(val)))
-        return to_chars(buf, csubstr(".nan"));
-    else if(C4_UNLIKELY(val == std::numeric_limits<T>::infinity()))
-        return to_chars(buf, csubstr(".inf"));
-    else if(C4_UNLIKELY(val == -std::numeric_limits<T>::infinity()))
-        return to_chars(buf, csubstr("-.inf"));
-    return to_chars(buf, val);
-    C4_SUPPRESS_WARNING_GCC_CLANG_POP
-}
-
-
-/** decode a floating point from string. Accepts special values: .nan,
- * .inf, -.inf */
-template<class T>
-bool from_chars_float(csubstr buf, T *C4_RESTRICT val)
-{
-    static_assert(std::is_floating_point<T>::value, "must be floating point");
-    if(C4_LIKELY(from_chars(buf, val)))
-    {
-        return true;
-    }
-    else if(C4_UNLIKELY(buf == ".nan" || buf == ".NaN" || buf == ".NAN"))
-    {
-        *val = std::numeric_limits<T>::quiet_NaN();
-        return true;
-    }
-    else if(C4_UNLIKELY(buf == ".inf" || buf == ".Inf" || buf == ".INF"))
-    {
-        *val = std::numeric_limits<T>::infinity();
-        return true;
-    }
-    else if(C4_UNLIKELY(buf == "-.inf" || buf == "-.Inf" || buf == "-.INF"))
-    {
-        *val = -std::numeric_limits<T>::infinity();
-        return true;
-    }
-    else
-    {
-        return false;
-    }
-}
-
-
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-
-/** the integral type necessary to cover all the bits marking node tags */
-using tag_bits = uint16_t;
-
-/** a bit mask for marking tags for types */
-typedef enum : tag_bits {
-    // container types
-    TAG_NONE      =  0,
-    TAG_MAP       =  1, /**< !!map   Unordered set of key: value pairs without duplicates. @see https://yaml.org/type/map.html */
-    TAG_OMAP      =  2, /**< !!omap  Ordered sequence of key: value pairs without duplicates. @see https://yaml.org/type/omap.html */
-    TAG_PAIRS     =  3, /**< !!pairs Ordered sequence of key: value pairs allowing duplicates. @see https://yaml.org/type/pairs.html */
-    TAG_SET       =  4, /**< !!set   Unordered set of non-equal values. @see https://yaml.org/type/set.html */
-    TAG_SEQ       =  5, /**< !!seq   Sequence of arbitrary values. @see https://yaml.org/type/seq.html */
-    // scalar types
-    TAG_BINARY    =  6, /**< !!binary A sequence of zero or more octets (8 bit values). @see https://yaml.org/type/binary.html */
-    TAG_BOOL      =  7, /**< !!bool   Mathematical Booleans. @see https://yaml.org/type/bool.html */
-    TAG_FLOAT     =  8, /**< !!float  Floating-point approximation to real numbers. https://yaml.org/type/float.html */
-    TAG_INT       =  9, /**< !!float  Mathematical integers. https://yaml.org/type/int.html */
-    TAG_MERGE     = 10, /**< !!merge  Specify one or more mapping to be merged with the current one. https://yaml.org/type/merge.html */
-    TAG_NULL      = 11, /**< !!null   Devoid of value. https://yaml.org/type/null.html */
-    TAG_STR       = 12, /**< !!str    A sequence of zero or more Unicode characters. https://yaml.org/type/str.html */
-    TAG_TIMESTAMP = 13, /**< !!timestamp A point in time https://yaml.org/type/timestamp.html */
-    TAG_VALUE     = 14, /**< !!value  Specify the default value of a mapping https://yaml.org/type/value.html */
-    TAG_YAML      = 15, /**< !!yaml   Specify the default value of a mapping https://yaml.org/type/yaml.html */
-} YamlTag_e;
-
-YamlTag_e to_tag(csubstr tag);
-csubstr from_tag(YamlTag_e tag);
-csubstr from_tag_long(YamlTag_e tag);
-csubstr normalize_tag(csubstr tag);
-csubstr normalize_tag_long(csubstr tag);
-
-struct TagDirective
-{
-    /** Eg `!e!` in `%TAG !e! tag:example.com,2000:app/` */
-    csubstr handle;
-    /** Eg `tag:example.com,2000:app/` in `%TAG !e! tag:example.com,2000:app/` */
-    csubstr prefix;
-    /** The next node to which this tag directive applies */
-    size_t next_node_id;
-};
-
-#ifndef RYML_MAX_TAG_DIRECTIVES
-/** the maximum number of tag directives in a Tree */
-#define RYML_MAX_TAG_DIRECTIVES 4
-#endif
-
-
+/** @addtogroup doc_node_type
+ *
+ * @{
+ */
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
 
-/** the integral type necessary to cover all the bits marking node types */
-using type_bits = uint64_t;
+/** the integral type necessary to cover all the bits for NodeType_e */
+using type_bits = uint32_t;
 
 
-/** a bit mask for marking node types */
+/** a bit mask for marking node types and styles */
 typedef enum : type_bits {
-    // a convenience define, undefined below
-    #define c4bit(v) (type_bits(1) << v)
-    NOTYPE  = 0,            ///< no node type is set
-    VAL     = c4bit(0),     ///< a leaf node, has a (possibly empty) value
-    KEY     = c4bit(1),     ///< is member of a map, must have non-empty key
-    MAP     = c4bit(2),     ///< a map: a parent of keyvals
-    SEQ     = c4bit(3),     ///< a seq: a parent of vals
-    DOC     = c4bit(4),     ///< a document
-    STREAM  = c4bit(5)|SEQ, ///< a stream: a seq of docs
-    KEYREF  = c4bit(6),     ///< a *reference: the key references an &anchor
-    VALREF  = c4bit(7),     ///< a *reference: the val references an &anchor
-    KEYANCH = c4bit(8),     ///< the key has an &anchor
-    VALANCH = c4bit(9),     ///< the val has an &anchor
-    KEYTAG  = c4bit(10),    ///< the key has an explicit tag/type
-    VALTAG  = c4bit(11),    ///< the val has an explicit tag/type
-    _TYMASK = c4bit(12)-1,  // all the bits up to here
-    VALQUO  = c4bit(12),    ///< the val is quoted by '', "", > or |
-    KEYQUO  = c4bit(13),    ///< the key is quoted by '', "", > or |
+    #define __(v) (type_bits(1) << v) // a convenience define, undefined below
+    NOTYPE  = 0,         ///< no node type or style is set
+    KEY     = __(0),     ///< is member of a map, must have non-empty key
+    VAL     = __(1),     ///< a scalar: has a scalar (ie string) value, possibly empty. must be a leaf node, and cannot be MAP or SEQ
+    MAP     = __(2),     ///< a map: a parent of KEYVAL/KEYSEQ/KEYMAP nodes
+    SEQ     = __(3),     ///< a seq: a parent of VAL/SEQ/MAP nodes
+    DOC     = __(4),     ///< a document
+    STREAM  = __(5)|SEQ, ///< a stream: a seq of docs
+    KEYREF  = __(6),     ///< a *reference: the key references an &anchor
+    VALREF  = __(7),     ///< a *reference: the val references an &anchor
+    KEYANCH = __(8),     ///< the key has an &anchor
+    VALANCH = __(9),     ///< the val has an &anchor
+    KEYTAG  = __(10),    ///< the key has a tag
+    VALTAG  = __(11),    ///< the val has a tag
+    _TYMASK = __(12)-1,  ///< all the bits up to here
+    //
+    // unfiltered flags:
+    //
+    KEY_UNFILT  = __(12), ///< the key scalar was left unfiltered; the parser was set not to filter. @see ParserOptions
+    VAL_UNFILT  = __(13), ///< the val scalar was left unfiltered; the parser was set not to filter. @see ParserOptions
+    //
+    // style flags:
+    //
+    FLOW_SL     = __(14), ///< mark container with single-line flow style (seqs as '[val1,val2], maps as '{key: val,key2: val2}')
+    FLOW_ML     = __(15), ///< (NOT IMPLEMENTED, work in progress) mark container with multi-line flow style (seqs as '[\n  val1,\n  val2\n], maps as '{\n  key: val,\n  key2: val2\n}')
+    BLOCK       = __(16), ///< mark container with block style (seqs as '- val\n', maps as 'key: val')
+    KEY_LITERAL = __(17), ///< mark key scalar as multiline, block literal |
+    VAL_LITERAL = __(18), ///< mark val scalar as multiline, block literal |
+    KEY_FOLDED  = __(19), ///< mark key scalar as multiline, block folded >
+    VAL_FOLDED  = __(20), ///< mark val scalar as multiline, block folded >
+    KEY_SQUO    = __(21), ///< mark key scalar as single quoted '
+    VAL_SQUO    = __(22), ///< mark val scalar as single quoted '
+    KEY_DQUO    = __(23), ///< mark key scalar as double quoted "
+    VAL_DQUO    = __(24), ///< mark val scalar as double quoted "
+    KEY_PLAIN   = __(25), ///< mark key scalar as plain scalar (unquoted, even when multiline)
+    VAL_PLAIN   = __(26), ///< mark val scalar as plain scalar (unquoted, even when multiline)
+    //
+    // type combination masks:
+    //
     KEYVAL  = KEY|VAL,
     KEYSEQ  = KEY|SEQ,
     KEYMAP  = KEY|MAP,
     DOCMAP  = DOC|MAP,
     DOCSEQ  = DOC|SEQ,
     DOCVAL  = DOC|VAL,
-    // these flags are from a work in progress and should not be used yet
-    _WIP_STYLE_FLOW_SL = c4bit(14), ///< mark container with single-line flow format (seqs as '[val1,val2], maps as '{key: val, key2: val2}')
-    _WIP_STYLE_FLOW_ML = c4bit(15), ///< mark container with multi-line flow format (seqs as '[val1,\nval2], maps as '{key: val,\nkey2: val2}')
-    _WIP_STYLE_BLOCK   = c4bit(16), ///< mark container with block format (seqs as '- val\n', maps as 'key: val')
-    _WIP_KEY_LITERAL   = c4bit(17), ///< mark key scalar as multiline, block literal |
-    _WIP_VAL_LITERAL   = c4bit(18), ///< mark val scalar as multiline, block literal |
-    _WIP_KEY_FOLDED    = c4bit(19), ///< mark key scalar as multiline, block folded >
-    _WIP_VAL_FOLDED    = c4bit(20), ///< mark val scalar as multiline, block folded >
-    _WIP_KEY_SQUO      = c4bit(21), ///< mark key scalar as single quoted
-    _WIP_VAL_SQUO      = c4bit(22), ///< mark val scalar as single quoted
-    _WIP_KEY_DQUO      = c4bit(23), ///< mark key scalar as double quoted
-    _WIP_VAL_DQUO      = c4bit(24), ///< mark val scalar as double quoted
-    _WIP_KEY_PLAIN     = c4bit(25), ///< mark key scalar as plain scalar (unquoted, even when multiline)
-    _WIP_VAL_PLAIN     = c4bit(26), ///< mark val scalar as plain scalar (unquoted, even when multiline)
-    _WIP_KEY_STYLE     = _WIP_KEY_LITERAL|_WIP_KEY_FOLDED|_WIP_KEY_SQUO|_WIP_KEY_DQUO|_WIP_KEY_PLAIN,
-    _WIP_VAL_STYLE     = _WIP_VAL_LITERAL|_WIP_VAL_FOLDED|_WIP_VAL_SQUO|_WIP_VAL_DQUO|_WIP_VAL_PLAIN,
-    _WIP_KEY_FT_NL     = c4bit(27), ///< features: mark key scalar as having \n in its contents
-    _WIP_VAL_FT_NL     = c4bit(28), ///< features: mark val scalar as having \n in its contents
-    _WIP_KEY_FT_SQ     = c4bit(29), ///< features: mark key scalar as having single quotes in its contents
-    _WIP_VAL_FT_SQ     = c4bit(30), ///< features: mark val scalar as having single quotes in its contents
-    _WIP_KEY_FT_DQ     = c4bit(31), ///< features: mark key scalar as having double quotes in its contents
-    _WIP_VAL_FT_DQ     = c4bit(32), ///< features: mark val scalar as having double quotes in its contents
-    #undef c4bit
+    //
+    // style combination masks:
+    //
+    SCALAR_LITERAL = KEY_LITERAL|VAL_LITERAL,
+    SCALAR_FOLDED  = KEY_FOLDED|VAL_FOLDED,
+    SCALAR_SQUO    = KEY_SQUO|VAL_SQUO,
+    SCALAR_DQUO    = KEY_DQUO|VAL_DQUO,
+    SCALAR_PLAIN   = KEY_PLAIN|VAL_PLAIN,
+    KEYQUO         = KEY_SQUO|KEY_DQUO|KEY_FOLDED|KEY_LITERAL, ///< key style is one of ', ", > or |
+    VALQUO         = VAL_SQUO|VAL_DQUO|VAL_FOLDED|VAL_LITERAL, ///< val style is one of ', ", > or |
+    KEY_STYLE      = KEY_LITERAL|KEY_FOLDED|KEY_SQUO|KEY_DQUO|KEY_PLAIN, ///< mask of all the scalar styles for key (not container styles!)
+    VAL_STYLE      = VAL_LITERAL|VAL_FOLDED|VAL_SQUO|VAL_DQUO|VAL_PLAIN, ///< mask of all the scalar styles for val (not container styles!)
+    SCALAR_STYLE   = KEY_STYLE|VAL_STYLE,
+    CONTAINER_STYLE_FLOW  = FLOW_SL|FLOW_ML,
+    CONTAINER_STYLE_BLOCK = BLOCK,
+    CONTAINER_STYLE       = FLOW_SL|FLOW_ML|BLOCK,
+    STYLE          = SCALAR_STYLE | CONTAINER_STYLE,
+    //
+    // mixed masks
+    _KEYMASK = KEY | KEYQUO | KEYANCH | KEYREF | KEYTAG,
+    _VALMASK = VAL | VALQUO | VALANCH | VALREF | VALTAG,
+    #undef __
 } NodeType_e;
 
+constexpr C4_ALWAYS_INLINE C4_CONST NodeType_e operator|  (NodeType_e lhs, NodeType_e rhs) noexcept { return (NodeType_e)(((type_bits)lhs) | ((type_bits)rhs)); }
+constexpr C4_ALWAYS_INLINE C4_CONST NodeType_e operator&  (NodeType_e lhs, NodeType_e rhs) noexcept { return (NodeType_e)(((type_bits)lhs) & ((type_bits)rhs)); }
+constexpr C4_ALWAYS_INLINE C4_CONST NodeType_e operator>> (NodeType_e bits, uint32_t n) noexcept { return (NodeType_e)(((type_bits)bits) >> n); }
+constexpr C4_ALWAYS_INLINE C4_CONST NodeType_e operator<< (NodeType_e bits, uint32_t n) noexcept { return (NodeType_e)(((type_bits)bits) << n); }
+constexpr C4_ALWAYS_INLINE C4_CONST NodeType_e operator~  (NodeType_e bits) noexcept { return (NodeType_e)(~(type_bits)bits); }
+C4_ALWAYS_INLINE NodeType_e& operator&= (NodeType_e &subject, NodeType_e bits) noexcept { subject = (NodeType_e)((type_bits)subject & (type_bits)bits); return subject; }
+C4_ALWAYS_INLINE NodeType_e& operator|= (NodeType_e &subject, NodeType_e bits) noexcept { subject = (NodeType_e)((type_bits)subject | (type_bits)bits); return subject; }
+
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
 /** wraps a NodeType_e element with some syntactic sugar and predicates */
-struct NodeType
+struct RYML_EXPORT NodeType
 {
 public:
 
@@ -18330,2130 +20977,1842 @@ struct NodeType
 
 public:
 
-    C4_ALWAYS_INLINE operator NodeType_e      & C4_RESTRICT ()       { return type; }
-    C4_ALWAYS_INLINE operator NodeType_e const& C4_RESTRICT () const { return type; }
+    C4_ALWAYS_INLINE NodeType() noexcept : type(NOTYPE) {}
+    C4_ALWAYS_INLINE NodeType(NodeType_e t) noexcept : type(t) {}
+    C4_ALWAYS_INLINE NodeType(type_bits t) noexcept : type((NodeType_e)t) {}
 
-    C4_ALWAYS_INLINE NodeType() : type(NOTYPE) {}
-    C4_ALWAYS_INLINE NodeType(NodeType_e t) : type(t) {}
-    C4_ALWAYS_INLINE NodeType(type_bits t) : type((NodeType_e)t) {}
+    C4_ALWAYS_INLINE bool has_any(NodeType_e t) const noexcept { return (type & t) != 0u; }
+    C4_ALWAYS_INLINE bool has_all(NodeType_e t) const noexcept { return (type & t) == t; }
+    C4_ALWAYS_INLINE bool has_none(NodeType_e t) const noexcept { return (type & t) == 0; }
 
-    C4_ALWAYS_INLINE const char *type_str() const { return type_str(type); }
-    static const char* type_str(NodeType_e t);
+    C4_ALWAYS_INLINE void set(NodeType_e t) noexcept { type = t; }
+    C4_ALWAYS_INLINE void add(NodeType_e t) noexcept { type = (type|t); }
+    C4_ALWAYS_INLINE void rem(NodeType_e t) noexcept { type = (type & ~t); }
+    C4_ALWAYS_INLINE void addrem(NodeType_e bits_to_add, NodeType_e bits_to_remove) noexcept { type |= bits_to_add; type &= ~bits_to_remove; }
 
-    C4_ALWAYS_INLINE void set(NodeType_e t) { type = t; }
-    C4_ALWAYS_INLINE void set(type_bits  t) { type = (NodeType_e)t; }
-
-    C4_ALWAYS_INLINE void add(NodeType_e t) { type = (NodeType_e)(type|t); }
-    C4_ALWAYS_INLINE void add(type_bits  t) { type = (NodeType_e)(type|t); }
-
-    C4_ALWAYS_INLINE void rem(NodeType_e t) { type = (NodeType_e)(type & ~t); }
-    C4_ALWAYS_INLINE void rem(type_bits  t) { type = (NodeType_e)(type & ~t); }
-
-    C4_ALWAYS_INLINE void clear() { type = NOTYPE; }
+    C4_ALWAYS_INLINE void clear() noexcept { type = NOTYPE; }
 
 public:
 
-    #if defined(__clang__)
-    #   pragma clang diagnostic push
-    #   pragma clang diagnostic ignored "-Wnull-dereference"
-    #elif defined(__GNUC__)
-    #   pragma GCC diagnostic push
-    #   if __GNUC__ >= 6
-    #       pragma GCC diagnostic ignored "-Wnull-dereference"
-    #   endif
-    #endif
-
-    C4_ALWAYS_INLINE bool is_stream() const { return ((type & STREAM) == STREAM) != 0; }
-    C4_ALWAYS_INLINE bool is_doc() const { return (type & DOC) != 0; }
-    C4_ALWAYS_INLINE bool is_container() const { return (type & (MAP|SEQ|STREAM)) != 0; }
-    C4_ALWAYS_INLINE bool is_map() const { return (type & MAP) != 0; }
-    C4_ALWAYS_INLINE bool is_seq() const { return (type & SEQ) != 0; }
-    C4_ALWAYS_INLINE bool has_key() const { return (type & KEY) != 0; }
-    C4_ALWAYS_INLINE bool has_val() const { return (type & VAL) != 0; }
-    C4_ALWAYS_INLINE bool is_val() const { return (type & KEYVAL) == VAL; }
-    C4_ALWAYS_INLINE bool is_keyval() const { return (type & KEYVAL) == KEYVAL; }
-    C4_ALWAYS_INLINE bool has_key_tag() const { return (type & (KEY|KEYTAG)) == (KEY|KEYTAG); }
-    C4_ALWAYS_INLINE bool has_val_tag() const { return ((type & VALTAG) && (type & (VAL|MAP|SEQ))); }
-    C4_ALWAYS_INLINE bool has_key_anchor() const { return (type & (KEY|KEYANCH)) == (KEY|KEYANCH); }
-    C4_ALWAYS_INLINE bool is_key_anchor() const { return (type & (KEY|KEYANCH)) == (KEY|KEYANCH); }
-    C4_ALWAYS_INLINE bool has_val_anchor() const { return (type & VALANCH) != 0 && (type & (VAL|SEQ|MAP)) != 0; }
-    C4_ALWAYS_INLINE bool is_val_anchor() const { return (type & VALANCH) != 0 && (type & (VAL|SEQ|MAP)) != 0; }
-    C4_ALWAYS_INLINE bool has_anchor() const { return (type & (KEYANCH|VALANCH)) != 0; }
-    C4_ALWAYS_INLINE bool is_anchor() const { return (type & (KEYANCH|VALANCH)) != 0; }
-    C4_ALWAYS_INLINE bool is_key_ref() const { return (type & KEYREF) != 0; }
-    C4_ALWAYS_INLINE bool is_val_ref() const { return (type & VALREF) != 0; }
-    C4_ALWAYS_INLINE bool is_ref() const { return (type & (KEYREF|VALREF)) != 0; }
-    C4_ALWAYS_INLINE bool is_anchor_or_ref() const { return (type & (KEYANCH|VALANCH|KEYREF|VALREF)) != 0; }
-    C4_ALWAYS_INLINE bool is_key_quoted() const { return (type & (KEY|KEYQUO)) == (KEY|KEYQUO); }
-    C4_ALWAYS_INLINE bool is_val_quoted() const { return (type & (VAL|VALQUO)) == (VAL|VALQUO); }
-    C4_ALWAYS_INLINE bool is_quoted() const { return (type & (KEY|KEYQUO)) == (KEY|KEYQUO) || (type & (VAL|VALQUO)) == (VAL|VALQUO); }
-
-    // these predicates are a work in progress and subject to change. Don't use yet.
-    C4_ALWAYS_INLINE bool default_block() const { return (type & (_WIP_STYLE_BLOCK|_WIP_STYLE_FLOW_ML|_WIP_STYLE_FLOW_SL)) == 0; }
-    C4_ALWAYS_INLINE bool marked_block() const { return (type & (_WIP_STYLE_BLOCK)) != 0; }
-    C4_ALWAYS_INLINE bool marked_flow_sl() const { return (type & (_WIP_STYLE_FLOW_SL)) != 0; }
-    C4_ALWAYS_INLINE bool marked_flow_ml() const { return (type & (_WIP_STYLE_FLOW_ML)) != 0; }
-    C4_ALWAYS_INLINE bool marked_flow() const { return (type & (_WIP_STYLE_FLOW_ML|_WIP_STYLE_FLOW_SL)) != 0; }
-    C4_ALWAYS_INLINE bool key_marked_literal() const { return (type & (_WIP_KEY_LITERAL)) != 0; }
-    C4_ALWAYS_INLINE bool val_marked_literal() const { return (type & (_WIP_VAL_LITERAL)) != 0; }
-    C4_ALWAYS_INLINE bool key_marked_folded() const { return (type & (_WIP_KEY_FOLDED)) != 0; }
-    C4_ALWAYS_INLINE bool val_marked_folded() const { return (type & (_WIP_VAL_FOLDED)) != 0; }
-    C4_ALWAYS_INLINE bool key_marked_squo() const { return (type & (_WIP_KEY_SQUO)) != 0; }
-    C4_ALWAYS_INLINE bool val_marked_squo() const { return (type & (_WIP_VAL_SQUO)) != 0; }
-    C4_ALWAYS_INLINE bool key_marked_dquo() const { return (type & (_WIP_KEY_DQUO)) != 0; }
-    C4_ALWAYS_INLINE bool val_marked_dquo() const { return (type & (_WIP_VAL_DQUO)) != 0; }
-    C4_ALWAYS_INLINE bool key_marked_plain() const { return (type & (_WIP_KEY_PLAIN)) != 0; }
-    C4_ALWAYS_INLINE bool val_marked_plain() const { return (type & (_WIP_VAL_PLAIN)) != 0; }
-
-    #if defined(__clang__)
-    #   pragma clang diagnostic pop
-    #elif defined(__GNUC__)
-    #   pragma GCC diagnostic pop
-    #endif
-
-};
-
-
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-
-/** a node scalar is a csubstr, which may be tagged and anchored. */
-struct NodeScalar
-{
-    csubstr tag;
-    csubstr scalar;
-    csubstr anchor;
+    C4_ALWAYS_INLINE operator NodeType_e      & C4_RESTRICT ()       noexcept { return type; }
+    C4_ALWAYS_INLINE operator NodeType_e const& C4_RESTRICT () const noexcept { return type; }
 
 public:
 
-    /// initialize as an empty scalar
-    inline NodeScalar() noexcept : tag(), scalar(), anchor() {}
+    /** @name node type queries
+     * @{ */
 
-    /// initialize as an untagged scalar
-    template<size_t N>
-    inline NodeScalar(const char (&s)[N]) noexcept : tag(), scalar(s), anchor() {}
-    inline NodeScalar(csubstr      s    ) noexcept : tag(), scalar(s), anchor() {}
+    /** return a preset string based on the node type */
+    C4_ALWAYS_INLINE const char *type_str() const noexcept { return type_str(type); }
+    /** return a preset string based on the node type */
+    static const char* type_str(NodeType_e t) noexcept;
 
-    /// initialize as a tagged scalar
-    template<size_t N, size_t M>
-    inline NodeScalar(const char (&t)[N], const char (&s)[N]) noexcept : tag(t), scalar(s), anchor() {}
-    inline NodeScalar(csubstr      t    , csubstr      s    ) noexcept : tag(t), scalar(s), anchor() {}
+    /** fill a string with the node type flags. If the string is small, returns {null, len} */
+    C4_ALWAYS_INLINE csubstr type_str(substr buf) const noexcept { return type_str(buf, type); }
+    /** fill a string with the node type flags. If the string is small, returns {null, len}  */
+    static csubstr type_str(substr buf, NodeType_e t) noexcept;
 
 public:
 
-    ~NodeScalar() noexcept = default;
-    NodeScalar(NodeScalar &&) noexcept = default;
-    NodeScalar(NodeScalar const&) noexcept = default;
-    NodeScalar& operator= (NodeScalar &&) noexcept = default;
-    NodeScalar& operator= (NodeScalar const&) noexcept = default;
+    /** @name node type queries
+     * @{ */
+
+    C4_ALWAYS_INLINE bool is_notype()         const noexcept { return type == NOTYPE; }
+    C4_ALWAYS_INLINE bool is_stream()         const noexcept { return ((type & STREAM) == STREAM) != 0; }
+    C4_ALWAYS_INLINE bool is_doc()            const noexcept { return (type & DOC) != 0; }
+    C4_ALWAYS_INLINE bool is_container()      const noexcept { return (type & (MAP|SEQ|STREAM)) != 0; }
+    C4_ALWAYS_INLINE bool is_map()            const noexcept { return (type & MAP) != 0; }
+    C4_ALWAYS_INLINE bool is_seq()            const noexcept { return (type & SEQ) != 0; }
+    C4_ALWAYS_INLINE bool has_key()           const noexcept { return (type & KEY) != 0; }
+    C4_ALWAYS_INLINE bool has_val()           const noexcept { return (type & VAL) != 0; }
+    C4_ALWAYS_INLINE bool is_val()            const noexcept { return (type & KEYVAL) == VAL; }
+    C4_ALWAYS_INLINE bool is_keyval()         const noexcept { return (type & KEYVAL) == KEYVAL; }
+    C4_ALWAYS_INLINE bool has_key_tag()       const noexcept { return (type & KEYTAG) != 0; }
+    C4_ALWAYS_INLINE bool has_val_tag()       const noexcept { return (type & VALTAG) != 0; }
+    C4_ALWAYS_INLINE bool has_key_anchor()    const noexcept { return (type & KEYANCH) != 0; }
+    C4_ALWAYS_INLINE bool has_val_anchor()    const noexcept { return (type & VALANCH) != 0; }
+    C4_ALWAYS_INLINE bool has_anchor()        const noexcept { return (type & (KEYANCH|VALANCH)) != 0; }
+    C4_ALWAYS_INLINE bool is_key_ref()        const noexcept { return (type & KEYREF) != 0; }
+    C4_ALWAYS_INLINE bool is_val_ref()        const noexcept { return (type & VALREF) != 0; }
+    C4_ALWAYS_INLINE bool is_ref()            const noexcept { return (type & (KEYREF|VALREF)) != 0; }
+
+    C4_ALWAYS_INLINE bool is_key_unfiltered() const noexcept { return (type & (KEY_UNFILT)) != 0; }
+    C4_ALWAYS_INLINE bool is_val_unfiltered() const noexcept { return (type & (VAL_UNFILT)) != 0; }
+
+    RYML_DEPRECATED("use has_key_anchor()")    bool is_key_anchor() const noexcept { return has_key_anchor(); }
+    RYML_DEPRECATED("use has_val_anchor()")    bool is_val_anchor() const noexcept { return has_val_anchor(); }
+    RYML_DEPRECATED("use has_anchor()")        bool is_anchor() const noexcept { return has_anchor(); }
+    RYML_DEPRECATED("use has_anchor() || is_ref()") bool is_anchor_or_ref() const noexcept { return has_anchor() || is_ref(); }
+    /** @} */
 
 public:
 
-    bool empty() const noexcept { return tag.empty() && scalar.empty() && anchor.empty(); }
+    /** @name container+scalar style queries
+     * @{ */
+
+    C4_ALWAYS_INLINE bool is_container_styled() const noexcept { return (type & (CONTAINER_STYLE)) != 0; }
+    C4_ALWAYS_INLINE bool is_block() const noexcept { return (type & (BLOCK)) != 0; }
+    C4_ALWAYS_INLINE bool is_flow_sl() const noexcept { return (type & (FLOW_SL)) != 0; }
+    C4_ALWAYS_INLINE bool is_flow_ml() const noexcept { return (type & (FLOW_ML)) != 0; }
+    C4_ALWAYS_INLINE bool is_flow() const noexcept { return (type & (FLOW_ML|FLOW_SL)) != 0; }
+
+    C4_ALWAYS_INLINE bool is_key_styled() const noexcept { return (type & (KEY_STYLE)) != 0; }
+    C4_ALWAYS_INLINE bool is_val_styled() const noexcept { return (type & (VAL_STYLE)) != 0; }
+    C4_ALWAYS_INLINE bool is_key_literal() const noexcept { return (type & (KEY_LITERAL)) != 0; }
+    C4_ALWAYS_INLINE bool is_val_literal() const noexcept { return (type & (VAL_LITERAL)) != 0; }
+    C4_ALWAYS_INLINE bool is_key_folded() const noexcept { return (type & (KEY_FOLDED)) != 0; }
+    C4_ALWAYS_INLINE bool is_val_folded() const noexcept { return (type & (VAL_FOLDED)) != 0; }
+    C4_ALWAYS_INLINE bool is_key_squo() const noexcept { return (type & (KEY_SQUO)) != 0; }
+    C4_ALWAYS_INLINE bool is_val_squo() const noexcept { return (type & (VAL_SQUO)) != 0; }
+    C4_ALWAYS_INLINE bool is_key_dquo() const noexcept { return (type & (KEY_DQUO)) != 0; }
+    C4_ALWAYS_INLINE bool is_val_dquo() const noexcept { return (type & (VAL_DQUO)) != 0; }
+    C4_ALWAYS_INLINE bool is_key_plain() const noexcept { return (type & (KEY_PLAIN)) != 0; }
+    C4_ALWAYS_INLINE bool is_val_plain() const noexcept { return (type & (VAL_PLAIN)) != 0; }
+    C4_ALWAYS_INLINE bool is_key_quoted() const noexcept { return (type & KEYQUO) != 0; }
+    C4_ALWAYS_INLINE bool is_val_quoted() const noexcept { return (type & VALQUO) != 0; }
+    C4_ALWAYS_INLINE bool is_quoted() const noexcept { return (type & (KEYQUO|VALQUO)) != 0; }
+
+    C4_ALWAYS_INLINE void set_container_style(NodeType_e style) noexcept { type = ((style & CONTAINER_STYLE) | (type & ~CONTAINER_STYLE)); }
+    C4_ALWAYS_INLINE void set_key_style(NodeType_e style) noexcept { type = ((style & KEY_STYLE) | (type & ~KEY_STYLE)); }
+    C4_ALWAYS_INLINE void set_val_style(NodeType_e style) noexcept { type = ((style & VAL_STYLE) | (type & ~VAL_STYLE)); }
 
-    void clear() noexcept { tag.clear(); scalar.clear(); anchor.clear(); }
+    /** @} */
 
-    void set_ref_maybe_replacing_scalar(csubstr ref, bool has_scalar) noexcept
-    {
-        csubstr trimmed = ref.begins_with('*') ? ref.sub(1) : ref;
-        anchor = trimmed;
-        if((!has_scalar) || !scalar.ends_with(trimmed))
-            scalar = ref;
-    }
 };
-C4_MUST_BE_TRIVIAL_COPY(NodeScalar);
 
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-/** convenience class to initialize nodes */
-struct NodeInit
-{
+/** @name scalar style helpers
+ * @{ */
 
-    NodeType   type;
-    NodeScalar key;
-    NodeScalar val;
+/** choose a YAML emitting style based on the scalar's contents */
+RYML_EXPORT NodeType_e scalar_style_choose(csubstr scalar) noexcept;
 
-public:
+/** choose a json style based on the scalar's contents */
+RYML_EXPORT NodeType_e scalar_style_json_choose(csubstr scalar) noexcept;
 
-    /// initialize as an empty node
-    NodeInit() : type(NOTYPE), key(), val() {}
-    /// initialize as a typed node
-    NodeInit(NodeType_e t) : type(t), key(), val() {}
-    /// initialize as a sequence member
-    NodeInit(NodeScalar const& v) : type(VAL), key(), val(v) { _add_flags(); }
-    /// initialize as a mapping member
-    NodeInit(              NodeScalar const& k, NodeScalar const& v) : type(KEYVAL), key(k.tag, k.scalar), val(v.tag, v.scalar) { _add_flags(); }
-    /// initialize as a mapping member with explicit type
-    NodeInit(NodeType_e t, NodeScalar const& k, NodeScalar const& v) : type(t     ), key(k.tag, k.scalar), val(v.tag, v.scalar) { _add_flags(); }
-    /// initialize as a mapping member with explicit type (eg SEQ or MAP)
-    NodeInit(NodeType_e t, NodeScalar const& k                     ) : type(t     ), key(k.tag, k.scalar), val(               ) { _add_flags(KEY); }
+/** query whether a scalar can be encoded using single quotes.
+ * It may not be possible, notably when there is leading
+ * whitespace after a newline. */
+RYML_EXPORT bool scalar_style_query_squo(csubstr s) noexcept;
 
-public:
+/** query whether a scalar can be encoded using plain style (no
+ * quotes, not a literal/folded block scalar). */
+RYML_EXPORT bool scalar_style_query_plain(csubstr s) noexcept;
 
-    void clear()
-    {
-        type.clear();
-        key.clear();
-        val.clear();
-    }
+/** YAML-sense query of nullity. returns true if the scalar points
+ * to `nullptr` or is otherwise equal to one of the strings
+ * `"~"`,`"null"`,`"Null"`,`"NULL"` */
+RYML_EXPORT inline C4_NO_INLINE bool scalar_is_null(csubstr s) noexcept
+{
+    return s.str == nullptr ||
+        s == "~" ||
+        s == "null" ||
+        s == "Null" ||
+        s == "NULL";
+}
 
-    void _add_flags(type_bits more_flags=0)
-    {
-        type = (type|more_flags);
-        if( ! key.tag.empty())
-            type = (type|KEYTAG);
-        if( ! val.tag.empty())
-            type = (type|VALTAG);
-        if( ! key.anchor.empty())
-            type = (type|KEYANCH);
-        if( ! val.anchor.empty())
-            type = (type|VALANCH);
-    }
+/** @} */
 
-    bool _check() const
-    {
-        // key cannot be empty
-        RYML_ASSERT(key.scalar.empty() == ((type & KEY) == 0));
-        // key tag cannot be empty
-        RYML_ASSERT(key.tag.empty() == ((type & KEYTAG) == 0));
-        // val may be empty even though VAL is set. But when VAL is not set, val must be empty
-        RYML_ASSERT(((type & VAL) != 0) || val.scalar.empty());
-        // val tag cannot be empty
-        RYML_ASSERT(val.tag.empty() == ((type & VALTAG) == 0));
-        return true;
-    }
-};
 
+/** @} */
 
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
+} // namespace yml
+} // namespace c4
 
-/** contains the data for each YAML node. */
-struct NodeData
-{
-    NodeType   m_type;
+C4_SUPPRESS_WARNING_MSVC_POP
+C4_SUPPRESS_WARNING_GCC_CLANG_POP
 
-    NodeScalar m_key;
-    NodeScalar m_val;
+#endif /* C4_YML_NODE_TYPE_HPP_ */
 
-    size_t     m_parent;
-    size_t     m_first_child;
-    size_t     m_last_child;
-    size_t     m_next_sibling;
-    size_t     m_prev_sibling;
-};
-C4_MUST_BE_TRIVIAL_COPY(NodeData);
 
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/node_type.hpp)
 
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
 
-class RYML_EXPORT Tree
-{
-public:
 
-    /** @name construction and assignment */
-    /** @{ */
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/tag.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/tag.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
 
-    Tree() : Tree(get_callbacks()) {}
-    Tree(Callbacks const& cb);
-    Tree(size_t node_capacity, size_t arena_capacity=0) : Tree(node_capacity, arena_capacity, get_callbacks()) {}
-    Tree(size_t node_capacity, size_t arena_capacity, Callbacks const& cb);
+#ifndef _C4_YML_TAG_HPP_
+#define _C4_YML_TAG_HPP_
 
-    ~Tree();
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp
+//#include <c4/yml/common.hpp>
+#if !defined(C4_YML_COMMON_HPP_) && !defined(_C4_YML_COMMON_HPP_)
+#error "amalgamate: file c4/yml/common.hpp must have been included at this point"
+#endif /* C4_YML_COMMON_HPP_ */
 
-    Tree(Tree const& that) noexcept;
-    Tree(Tree     && that) noexcept;
 
-    Tree& operator= (Tree const& that) noexcept;
-    Tree& operator= (Tree     && that) noexcept;
+namespace c4 {
+namespace yml {
 
-    /** @} */
+class Tree;
 
-public:
+/** @addtogroup doc_tag_utils
+ *
+ * @{
+ */
 
-    /** @name memory and sizing */
-    /** @{ */
 
-    void reserve(size_t node_capacity);
+#ifndef RYML_MAX_TAG_DIRECTIVES
+/** the maximum number of tag directives in a Tree */
+#define RYML_MAX_TAG_DIRECTIVES 4
+#endif
 
-    /** clear the tree and zero every node
-     * @note does NOT clear the arena
-     * @see clear_arena() */
-    void clear();
-    inline void clear_arena() { m_arena_pos = 0; }
+/** the integral type necessary to cover all the bits marking node tags */
+using tag_bits = uint16_t;
 
-    inline bool   empty() const { return m_size == 0; }
+/** a bit mask for marking tags for types */
+typedef enum : tag_bits {
+    TAG_NONE      =  0,
+    // container types
+    TAG_MAP       =  1, /**< !!map   Unordered set of key: value pairs without duplicates. @see https://yaml.org/type/map.html */
+    TAG_OMAP      =  2, /**< !!omap  Ordered sequence of key: value pairs without duplicates. @see https://yaml.org/type/omap.html */
+    TAG_PAIRS     =  3, /**< !!pairs Ordered sequence of key: value pairs allowing duplicates. @see https://yaml.org/type/pairs.html */
+    TAG_SET       =  4, /**< !!set   Unordered set of non-equal values. @see https://yaml.org/type/set.html */
+    TAG_SEQ       =  5, /**< !!seq   Sequence of arbitrary values. @see https://yaml.org/type/seq.html */
+    // scalar types
+    TAG_BINARY    =  6, /**< !!binary A sequence of zero or more octets (8 bit values). @see https://yaml.org/type/binary.html */
+    TAG_BOOL      =  7, /**< !!bool   Mathematical Booleans. @see https://yaml.org/type/bool.html */
+    TAG_FLOAT     =  8, /**< !!float  Floating-point approximation to real numbers. https://yaml.org/type/float.html */
+    TAG_INT       =  9, /**< !!float  Mathematical integers. https://yaml.org/type/int.html */
+    TAG_MERGE     = 10, /**< !!merge  Specify one or more mapping to be merged with the current one. https://yaml.org/type/merge.html */
+    TAG_NULL      = 11, /**< !!null   Devoid of value. https://yaml.org/type/null.html */
+    TAG_STR       = 12, /**< !!str    A sequence of zero or more Unicode characters. https://yaml.org/type/str.html */
+    TAG_TIMESTAMP = 13, /**< !!timestamp A point in time https://yaml.org/type/timestamp.html */
+    TAG_VALUE     = 14, /**< !!value  Specify the default value of a mapping https://yaml.org/type/value.html */
+    TAG_YAML      = 15, /**< !!yaml   Specify the default value of a mapping https://yaml.org/type/yaml.html */
+} YamlTag_e;
 
-    inline size_t size () const { return m_size; }
-    inline size_t capacity() const { return m_cap; }
-    inline size_t slack() const { RYML_ASSERT(m_cap >= m_size); return m_cap - m_size; }
+RYML_EXPORT YamlTag_e to_tag(csubstr tag);
+RYML_EXPORT csubstr from_tag(YamlTag_e tag);
+RYML_EXPORT csubstr from_tag_long(YamlTag_e tag);
+RYML_EXPORT csubstr normalize_tag(csubstr tag);
+RYML_EXPORT csubstr normalize_tag_long(csubstr tag);
+RYML_EXPORT csubstr normalize_tag_long(csubstr tag, substr output);
 
-    inline size_t arena_size() const { return m_arena_pos; }
-    inline size_t arena_capacity() const { return m_arena.len; }
-    inline size_t arena_slack() const { RYML_ASSERT(m_arena.len >= m_arena_pos); return m_arena.len - m_arena_pos; }
+RYML_EXPORT bool is_custom_tag(csubstr tag);
 
-    Callbacks const& callbacks() const { return m_callbacks; }
-    void callbacks(Callbacks const& cb) { m_callbacks = cb; }
 
-    /** @} */
+struct RYML_EXPORT TagDirective
+{
+    /** Eg `!e!` in `%TAG !e! tag:example.com,2000:app/` */
+    csubstr handle;
+    /** Eg `tag:example.com,2000:app/` in `%TAG !e! tag:example.com,2000:app/` */
+    csubstr prefix;
+    /** The next node to which this tag directive applies */
+    id_type next_node_id;
 
-public:
+    bool create_from_str(csubstr directive_); ///< leaves next_node_id unfilled
+    bool create_from_str(csubstr directive_, Tree *tree);
+    size_t transform(csubstr tag, substr output, Callbacks const& callbacks) const;
+};
 
-    /** @name node getters */
-    /** @{ */
+struct RYML_EXPORT TagDirectiveRange
+{
+    TagDirective const* C4_RESTRICT b;
+    TagDirective const* C4_RESTRICT e;
+    C4_ALWAYS_INLINE TagDirective const* begin() const noexcept { return b; }
+    C4_ALWAYS_INLINE TagDirective const* end() const noexcept { return e; }
+};
 
-    //! get the index of a node belonging to this tree.
-    //! @p n can be nullptr, in which case a
-    size_t id(NodeData const* n) const
-    {
-        if( ! n)
-        {
-            return NONE;
-        }
-        RYML_ASSERT(n >= m_buf && n < m_buf + m_cap);
-        return static_cast<size_t>(n - m_buf);
-    }
+/** @} */
 
-    //! get a pointer to a node's NodeData.
-    //! i can be NONE, in which case a nullptr is returned
-    inline NodeData *get(size_t i)
-    {
-        if(i == NONE)
-            return nullptr;
-        RYML_ASSERT(i >= 0 && i < m_cap);
-        return m_buf + i;
-    }
-    //! get a pointer to a node's NodeData.
-    //! i can be NONE, in which case a nullptr is returned.
-    inline NodeData const *get(size_t i) const
-    {
-        if(i == NONE)
-            return nullptr;
-        RYML_ASSERT(i >= 0 && i < m_cap);
-        return m_buf + i;
-    }
+} // namespace yml
+} // namespace c4
 
-    //! An if-less form of get() that demands a valid node index.
-    //! This function is implementation only; use at your own risk.
-    inline NodeData       * _p(size_t i)       { RYML_ASSERT(i != NONE && i >= 0 && i < m_cap); return m_buf + i; }
-    //! An if-less form of get() that demands a valid node index.
-    //! This function is implementation only; use at your own risk.
-    inline NodeData const * _p(size_t i) const { RYML_ASSERT(i != NONE && i >= 0 && i < m_cap); return m_buf + i; }
+#endif /* _C4_YML_TAG_HPP_ */
 
-    //! Get the id of the root node
-    size_t root_id()       { if(m_cap == 0) { reserve(16); } RYML_ASSERT(m_cap > 0 && m_size > 0); return 0; }
-    //! Get the id of the root node
-    size_t root_id() const {                                 RYML_ASSERT(m_cap > 0 && m_size > 0); return 0; }
 
-    //! Get a NodeRef of a node by id
-    NodeRef       ref(size_t id);
-    //! Get a NodeRef of a node by id
-    NodeRef const ref(size_t id) const;
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/tag.hpp)
 
-    //! Get the root as a NodeRef
-    NodeRef       rootref();
-    //! Get the root as a NodeRef
-    NodeRef const rootref() const;
 
-    //! find a root child by name, return it as a NodeRef
-    //! @note requires the root to be a map.
-    NodeRef       operator[] (csubstr key);
-    //! find a root child by name, return it as a NodeRef
-    //! @note requires the root to be a map.
-    NodeRef const operator[] (csubstr key) const;
 
-    //! find a root child by index: return the root node's @p i-th child as a NodeRef
-    //! @note @i is NOT the node id, but the child's position
-    NodeRef       operator[] (size_t i);
-    //! find a root child by index: return the root node's @p i-th child as a NodeRef
-    //! @note @i is NOT the node id, but the child's position
-    NodeRef const operator[] (size_t i) const;
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/tree.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
 
-    //! get the i-th document of the stream
-    //! @note @i is NOT the node id, but the doc position within the stream
-    NodeRef       docref(size_t i);
-    //! get the i-th document of the stream
-    //! @note @i is NOT the node id, but the doc position within the stream
-    NodeRef const docref(size_t i) const;
+#ifndef _C4_YML_TREE_HPP_
+#define _C4_YML_TREE_HPP_
 
-    /** @} */
+/** @file tree.hpp */
 
-public:
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/error.hpp
+//#include "c4/error.hpp"
+#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_)
+#error "amalgamate: file c4/error.hpp must have been included at this point"
+#endif /* C4_ERROR_HPP_ */
 
-    /** @name node property getters */
-    /** @{ */
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/types.hpp
+//#include "c4/types.hpp"
+#if !defined(C4_TYPES_HPP_) && !defined(_C4_TYPES_HPP_)
+#error "amalgamate: file c4/types.hpp must have been included at this point"
+#endif /* C4_TYPES_HPP_ */
 
-    NodeType type(size_t node) const { return _p(node)->m_type; }
-    const char* type_str(size_t node) const { return NodeType::type_str(_p(node)->m_type); }
+#ifndef _C4_YML_FWD_HPP_
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/fwd.hpp
+//#include "c4/yml/fwd.hpp"
+#if !defined(C4_YML_FWD_HPP_) && !defined(_C4_YML_FWD_HPP_)
+#error "amalgamate: file c4/yml/fwd.hpp must have been included at this point"
+#endif /* C4_YML_FWD_HPP_ */
 
-    csubstr    const& key       (size_t node) const { RYML_ASSERT(has_key(node)); return _p(node)->m_key.scalar; }
-    csubstr    const& key_tag   (size_t node) const { RYML_ASSERT(has_key_tag(node)); return _p(node)->m_key.tag; }
-    csubstr    const& key_ref   (size_t node) const { RYML_ASSERT(is_key_ref(node) && ! has_key_anchor(node)); return _p(node)->m_key.anchor; }
-    csubstr    const& key_anchor(size_t node) const { RYML_ASSERT( ! is_key_ref(node) && has_key_anchor(node)); return _p(node)->m_key.anchor; }
-    NodeScalar const& keysc     (size_t node) const { RYML_ASSERT(has_key(node)); return _p(node)->m_key; }
+#endif
+#ifndef _C4_YML_COMMON_HPP_
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp
+//#include "c4/yml/common.hpp"
+#if !defined(C4_YML_COMMON_HPP_) && !defined(_C4_YML_COMMON_HPP_)
+#error "amalgamate: file c4/yml/common.hpp must have been included at this point"
+#endif /* C4_YML_COMMON_HPP_ */
 
-    csubstr    const& val       (size_t node) const { RYML_ASSERT(has_val(node)); return _p(node)->m_val.scalar; }
-    csubstr    const& val_tag   (size_t node) const { RYML_ASSERT(has_val_tag(node)); return _p(node)->m_val.tag; }
-    csubstr    const& val_ref   (size_t node) const { RYML_ASSERT(is_val_ref(node) && ! has_val_anchor(node)); return _p(node)->m_val.anchor; }
-    csubstr    const& val_anchor(size_t node) const { RYML_ASSERT( ! is_val_ref(node) && has_val_anchor(node)); return _p(node)->m_val.anchor; }
-    NodeScalar const& valsc     (size_t node) const { RYML_ASSERT(has_val(node)); return _p(node)->m_val; }
+#endif
+#ifndef C4_YML_NODE_TYPE_HPP_
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/node_type.hpp
+//#include "c4/yml/node_type.hpp"
+#if !defined(C4_YML_NODE_TYPE_HPP_) && !defined(_C4_YML_NODE_TYPE_HPP_)
+#error "amalgamate: file c4/yml/node_type.hpp must have been included at this point"
+#endif /* C4_YML_NODE_TYPE_HPP_ */
 
-    bool key_is_null(size_t node) const { RYML_ASSERT(has_key(node)); if(is_key_quoted(node)) return false; csubstr s = _p(node)->m_key.scalar; return s == nullptr || s == "~" || s == "null" || s == "Null" || s == "NULL"; }
-    bool val_is_null(size_t node) const { RYML_ASSERT(has_val(node)); if(is_val_quoted(node)) return false; csubstr s = _p(node)->m_val.scalar; return s == nullptr || s == "~" || s == "null" || s == "Null" || s == "NULL"; }
+#endif
+#ifndef _C4_YML_TAG_HPP_
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/tag.hpp
+//#include "c4/yml/tag.hpp"
+#if !defined(C4_YML_TAG_HPP_) && !defined(_C4_YML_TAG_HPP_)
+#error "amalgamate: file c4/yml/tag.hpp must have been included at this point"
+#endif /* C4_YML_TAG_HPP_ */
 
-    /** @} */
+#endif
+#ifndef _C4_CHARCONV_HPP_
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/charconv.hpp
+//#include <c4/charconv.hpp>
+#if !defined(C4_CHARCONV_HPP_) && !defined(_C4_CHARCONV_HPP_)
+#error "amalgamate: file c4/charconv.hpp must have been included at this point"
+#endif /* C4_CHARCONV_HPP_ */
 
-public:
+#endif
 
-    /** @name node type predicates */
-    /** @{ */
+//included above:
+//#include <cmath>
+//included above:
+//#include <limits>
 
-    C4_ALWAYS_INLINE bool is_stream(size_t node) const { return _p(node)->m_type.is_stream(); }
-    C4_ALWAYS_INLINE bool is_doc(size_t node) const { return _p(node)->m_type.is_doc(); }
-    C4_ALWAYS_INLINE bool is_container(size_t node) const { return _p(node)->m_type.is_container(); }
-    C4_ALWAYS_INLINE bool is_map(size_t node) const { return _p(node)->m_type.is_map(); }
-    C4_ALWAYS_INLINE bool is_seq(size_t node) const { return _p(node)->m_type.is_seq(); }
-    C4_ALWAYS_INLINE bool has_key(size_t node) const { return _p(node)->m_type.has_key(); }
-    C4_ALWAYS_INLINE bool has_val(size_t node) const { return _p(node)->m_type.has_val(); }
-    C4_ALWAYS_INLINE bool is_val(size_t node) const { return _p(node)->m_type.is_val(); }
-    C4_ALWAYS_INLINE bool is_keyval(size_t node) const { return _p(node)->m_type.is_keyval(); }
-    C4_ALWAYS_INLINE bool has_key_tag(size_t node) const { return _p(node)->m_type.has_key_tag(); }
-    C4_ALWAYS_INLINE bool has_val_tag(size_t node) const { return _p(node)->m_type.has_val_tag(); }
-    C4_ALWAYS_INLINE bool has_key_anchor(size_t node) const { return _p(node)->m_type.has_key_anchor(); }
-    C4_ALWAYS_INLINE bool is_key_anchor(size_t node) const { return _p(node)->m_type.is_key_anchor(); }
-    C4_ALWAYS_INLINE bool has_val_anchor(size_t node) const { return _p(node)->m_type.has_val_anchor(); }
-    C4_ALWAYS_INLINE bool is_val_anchor(size_t node) const { return _p(node)->m_type.is_val_anchor(); }
-    C4_ALWAYS_INLINE bool has_anchor(size_t node) const { return _p(node)->m_type.has_anchor(); }
-    C4_ALWAYS_INLINE bool is_anchor(size_t node) const { return _p(node)->m_type.is_anchor(); }
-    C4_ALWAYS_INLINE bool is_key_ref(size_t node) const { return _p(node)->m_type.is_key_ref(); }
-    C4_ALWAYS_INLINE bool is_val_ref(size_t node) const { return _p(node)->m_type.is_val_ref(); }
-    C4_ALWAYS_INLINE bool is_ref(size_t node) const { return _p(node)->m_type.is_ref(); }
-    C4_ALWAYS_INLINE bool is_anchor_or_ref(size_t node) const { return _p(node)->m_type.is_anchor_or_ref(); }
-    C4_ALWAYS_INLINE bool is_key_quoted(size_t node) const { return _p(node)->m_type.is_key_quoted(); }
-    C4_ALWAYS_INLINE bool is_val_quoted(size_t node) const { return _p(node)->m_type.is_val_quoted(); }
-    C4_ALWAYS_INLINE bool is_quoted(size_t node) const { return _p(node)->m_type.is_quoted(); }
-
-    C4_ALWAYS_INLINE bool parent_is_seq(size_t node) const { RYML_ASSERT(has_parent(node)); return is_seq(_p(node)->m_parent); }
-    C4_ALWAYS_INLINE bool parent_is_map(size_t node) const { RYML_ASSERT(has_parent(node)); return is_map(_p(node)->m_parent); }
 
-    /** true when key and val are empty, and has no children */
-    bool empty(size_t node) const { return ! has_children(node) && _p(node)->m_key.empty() && (( ! (_p(node)->m_type & VAL)) || _p(node)->m_val.empty()); }
-    /** true when the node has an anchor named a */
-    bool has_anchor(size_t node, csubstr a) const { return _p(node)->m_key.anchor == a || _p(node)->m_val.anchor == a; }
+C4_SUPPRESS_WARNING_MSVC_PUSH
+C4_SUPPRESS_WARNING_MSVC(4251) // needs to have dll-interface to be used by clients of struct
+C4_SUPPRESS_WARNING_MSVC(4296) // expression is always 'boolean_value'
+C4_SUPPRESS_WARNING_GCC_CLANG_PUSH
+C4_SUPPRESS_WARNING_GCC_CLANG("-Wold-style-cast")
+C4_SUPPRESS_WARNING_GCC("-Wuseless-cast")
+C4_SUPPRESS_WARNING_GCC("-Wtype-limits")
 
-    /** @} */
 
-public:
+namespace c4 {
+namespace yml {
 
-    /** @name hierarchy predicates */
-    /** @{ */
+/** encode a floating point value to a string. */
+template<class T>
+size_t to_chars_float(substr buf, T val)
+{
+    C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wfloat-equal");
+    static_assert(std::is_floating_point<T>::value, "must be floating point");
+    if(C4_UNLIKELY(std::isnan(val)))
+        return to_chars(buf, csubstr(".nan"));
+    else if(C4_UNLIKELY(val == std::numeric_limits<T>::infinity()))
+        return to_chars(buf, csubstr(".inf"));
+    else if(C4_UNLIKELY(val == -std::numeric_limits<T>::infinity()))
+        return to_chars(buf, csubstr("-.inf"));
+    return to_chars(buf, val);
+    C4_SUPPRESS_WARNING_GCC_CLANG_POP
+}
 
-    bool is_root(size_t node) const { RYML_ASSERT(_p(node)->m_parent != NONE || node == 0); return _p(node)->m_parent == NONE; }
 
-    bool has_parent(size_t node) const { return _p(node)->m_parent != NONE; }
+/** decode a floating point from string. Accepts special values: .nan,
+ * .inf, -.inf */
+template<class T>
+bool from_chars_float(csubstr buf, T *C4_RESTRICT val)
+{
+    static_assert(std::is_floating_point<T>::value, "must be floating point");
+    if(C4_LIKELY(from_chars(buf, val)))
+    {
+        return true;
+    }
+    else if(C4_UNLIKELY(buf.begins_with('+')))
+    {
+        return from_chars(buf.sub(1), val);
+    }
+    else if(C4_UNLIKELY(buf == ".nan" || buf == ".NaN" || buf == ".NAN"))
+    {
+        *val = std::numeric_limits<T>::quiet_NaN();
+        return true;
+    }
+    else if(C4_UNLIKELY(buf == ".inf" || buf == ".Inf" || buf == ".INF"))
+    {
+        *val = std::numeric_limits<T>::infinity();
+        return true;
+    }
+    else if(C4_UNLIKELY(buf == "-.inf" || buf == "-.Inf" || buf == "-.INF"))
+    {
+        *val = -std::numeric_limits<T>::infinity();
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
 
-    bool has_child(size_t node, csubstr key) const { return find_child(node, key) != npos; }
-    bool has_child(size_t node, size_t ch) const { return child_pos(node, ch) != npos; }
-    bool has_children(size_t node) const { return _p(node)->m_first_child != NONE; }
 
-    bool has_sibling(size_t node, size_t sib) const { return is_root(node) ? sib==node : child_pos(_p(node)->m_parent, sib) != npos; }
-    bool has_sibling(size_t node, csubstr key) const { return find_sibling(node, key) != npos; }
-    /** counts with *this */
-    bool has_siblings(size_t /*node*/) const { return true; }
-    /** does not count with *this */
-    bool has_other_siblings(size_t node) const { return is_root(node) ? false : (_p(_p(node)->m_parent)->m_first_child != _p(_p(node)->m_parent)->m_last_child); }
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 
-    /** @} */
 
-public:
+/** @addtogroup doc_tree
+ *
+ * @{
+ */
 
-    /** @name hierarchy getters */
-    /** @{ */
+/** a node scalar is a csubstr, which may be tagged and anchored. */
+struct NodeScalar
+{
+    csubstr tag;
+    csubstr scalar;
+    csubstr anchor;
 
-    size_t parent(size_t node) const { return _p(node)->m_parent; }
+public:
 
-    size_t prev_sibling(size_t node) const { return _p(node)->m_prev_sibling; }
-    size_t next_sibling(size_t node) const { return _p(node)->m_next_sibling; }
+    /// initialize as an empty scalar
+    inline NodeScalar() noexcept : tag(), scalar(), anchor() {}
 
-    /** O(#num_children) */
-    size_t num_children(size_t node) const;
-    size_t child_pos(size_t node, size_t ch) const;
-    size_t first_child(size_t node) const { return _p(node)->m_first_child; }
-    size_t last_child(size_t node) const { return _p(node)->m_last_child; }
-    size_t child(size_t node, size_t pos) const;
-    size_t find_child(size_t node, csubstr const& key) const;
+    /// initialize as an untagged scalar
+    template<size_t N>
+    inline NodeScalar(const char (&s)[N]) noexcept : tag(), scalar(s), anchor() {}
+    inline NodeScalar(csubstr      s    ) noexcept : tag(), scalar(s), anchor() {}
 
-    /** O(#num_siblings) */
-    /** counts with this */
-    size_t num_siblings(size_t node) const { return is_root(node) ? 1 : num_children(_p(node)->m_parent); }
-    /** does not count with this */
-    size_t num_other_siblings(size_t node) const { size_t ns = num_siblings(node); RYML_ASSERT(ns > 0); return ns-1; }
-    size_t sibling_pos(size_t node, size_t sib) const { RYML_ASSERT( ! is_root(node) || node == root_id()); return child_pos(_p(node)->m_parent, sib); }
-    size_t first_sibling(size_t node) const { return is_root(node) ? node : _p(_p(node)->m_parent)->m_first_child; }
-    size_t last_sibling(size_t node) const { return is_root(node) ? node : _p(_p(node)->m_parent)->m_last_child; }
-    size_t sibling(size_t node, size_t pos) const { return child(_p(node)->m_parent, pos); }
-    size_t find_sibling(size_t node, csubstr const& key) const { return find_child(_p(node)->m_parent, key); }
+    /// initialize as a tagged scalar
+    template<size_t N, size_t M>
+    inline NodeScalar(const char (&t)[N], const char (&s)[N]) noexcept : tag(t), scalar(s), anchor() {}
+    inline NodeScalar(csubstr      t    , csubstr      s    ) noexcept : tag(t), scalar(s), anchor() {}
 
-    size_t doc(size_t i) const { size_t rid = root_id(); RYML_ASSERT(is_stream(rid)); return child(rid, i); } //!< gets the @p i document node index. requires that the root node is a stream.
+public:
 
-    /** @} */
+    ~NodeScalar() noexcept = default;
+    NodeScalar(NodeScalar &&) noexcept = default;
+    NodeScalar(NodeScalar const&) noexcept = default;
+    NodeScalar& operator= (NodeScalar &&) noexcept = default;
+    NodeScalar& operator= (NodeScalar const&) noexcept = default;
 
 public:
 
-    /** @name node modifiers */
-    /** @{ */
+    bool empty() const noexcept { return tag.empty() && scalar.empty() && anchor.empty(); }
 
-    void to_keyval(size_t node, csubstr key, csubstr val, type_bits more_flags=0);
-    void to_map(size_t node, csubstr key, type_bits more_flags=0);
-    void to_seq(size_t node, csubstr key, type_bits more_flags=0);
-    void to_val(size_t node, csubstr val, type_bits more_flags=0);
-    void to_map(size_t node, type_bits more_flags=0);
-    void to_seq(size_t node, type_bits more_flags=0);
-    void to_doc(size_t node, type_bits more_flags=0);
-    void to_stream(size_t node, type_bits more_flags=0);
+    void clear() noexcept { tag.clear(); scalar.clear(); anchor.clear(); }
 
-    void set_key(size_t node, csubstr key) { RYML_ASSERT(has_key(node)); _p(node)->m_key.scalar = key; }
-    void set_val(size_t node, csubstr val) { RYML_ASSERT(has_val(node)); _p(node)->m_val.scalar = val; }
+    void set_ref_maybe_replacing_scalar(csubstr ref, bool has_scalar) RYML_NOEXCEPT
+    {
+        csubstr trimmed = ref.begins_with('*') ? ref.sub(1) : ref;
+        anchor = trimmed;
+        if((!has_scalar) || !scalar.ends_with(trimmed))
+            scalar = ref;
+    }
+};
+C4_MUST_BE_TRIVIAL_COPY(NodeScalar);
 
-    void set_key_tag(size_t node, csubstr tag) { RYML_ASSERT(has_key(node)); _p(node)->m_key.tag = tag; _add_flags(node, KEYTAG); }
-    void set_val_tag(size_t node, csubstr tag) { RYML_ASSERT(has_val(node) || is_container(node)); _p(node)->m_val.tag = tag; _add_flags(node, VALTAG); }
 
-    void set_key_anchor(size_t node, csubstr anchor) { RYML_ASSERT( ! is_key_ref(node)); _p(node)->m_key.anchor = anchor.triml('&'); _add_flags(node, KEYANCH); }
-    void set_val_anchor(size_t node, csubstr anchor) { RYML_ASSERT( ! is_val_ref(node)); _p(node)->m_val.anchor = anchor.triml('&'); _add_flags(node, VALANCH); }
-    void set_key_ref   (size_t node, csubstr ref   ) { RYML_ASSERT( ! has_key_anchor(node)); NodeData* C4_RESTRICT n = _p(node); n->m_key.set_ref_maybe_replacing_scalar(ref, n->m_type.has_key()); _add_flags(node, KEY|KEYREF); }
-    void set_val_ref   (size_t node, csubstr ref   ) { RYML_ASSERT( ! has_val_anchor(node)); NodeData* C4_RESTRICT n = _p(node); n->m_val.set_ref_maybe_replacing_scalar(ref, n->m_type.has_val()); _add_flags(node, VAL|VALREF); }
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 
-    void rem_key_anchor(size_t node) { _p(node)->m_key.anchor.clear(); _rem_flags(node, KEYANCH); }
-    void rem_val_anchor(size_t node) { _p(node)->m_val.anchor.clear(); _rem_flags(node, VALANCH); }
-    void rem_key_ref   (size_t node) { _p(node)->m_key.anchor.clear(); _rem_flags(node, KEYREF); }
-    void rem_val_ref   (size_t node) { _p(node)->m_val.anchor.clear(); _rem_flags(node, VALREF); }
-    void rem_anchor_ref(size_t node) { _p(node)->m_key.anchor.clear(); _p(node)->m_val.anchor.clear(); _rem_flags(node, KEYANCH|VALANCH|KEYREF|VALREF); }
+/** convenience class to initialize nodes */
+struct NodeInit
+{
 
-    /** @} */
+    NodeType   type;
+    NodeScalar key;
+    NodeScalar val;
 
 public:
 
-    /** @name tree modifiers */
-    /** @{ */
-
-    /** reorder the tree in memory so that all the nodes are stored
-     * in a linear sequence when visited in depth-first order.
-     * This will invalidate existing ids, since the node id is its
-     * position in the node array. */
-    void reorder();
-
-    /** Resolve references (aliases <- anchors) in the tree.
-     *
-     * Dereferencing is opt-in; after parsing, Tree::resolve()
-     * has to be called explicitly for obtaining resolved references in the
-     * tree. This method will resolve all references and substitute the
-     * anchored values in place of the reference.
-     *
-     * This method first does a full traversal of the tree to gather all
-     * anchors and references in a separate collection, then it goes through
-     * that collection to locate the names, which it does by obeying the YAML
-     * standard diktat that "an alias node refers to the most recent node in
-     * the serialization having the specified anchor"
-     *
-     * So, depending on the number of anchor/alias nodes, this is a
-     * potentially expensive operation, with a best-case linear complexity
-     * (from the initial traversal). This potential cost is the reason for
-     * requiring an explicit call.
-     */
-    void resolve();
-
-    /** @} */
+    /// initialize as an empty node
+    NodeInit() : type(NOTYPE), key(), val() {}
+    /// initialize as a typed node
+    NodeInit(NodeType_e t) : type(t), key(), val() {}
+    /// initialize as a sequence member
+    NodeInit(NodeScalar const& v) : type(VAL), key(), val(v) { _add_flags(); }
+    /// initialize as a sequence member with explicit type
+    NodeInit(NodeScalar const& v, NodeType_e t) : type(t|VAL), key(), val(v) { _add_flags(); }
+    /// initialize as a mapping member
+    NodeInit(              NodeScalar const& k, NodeScalar const& v) : type(KEYVAL), key(k), val(v) { _add_flags(); }
+    /// initialize as a mapping member with explicit type
+    NodeInit(NodeType_e t, NodeScalar const& k, NodeScalar const& v) : type(t), key(k), val(v) { _add_flags(); }
+    /// initialize as a mapping member with explicit type (eg for SEQ or MAP)
+    NodeInit(NodeType_e t, NodeScalar const& k                     ) : type(t), key(k), val( ) { _add_flags(KEY); }
 
 public:
 
-    /** @name tag directives */
-    /** @{ */
-
-    void resolve_tags();
-
-    size_t num_tag_directives() const;
-    size_t add_tag_directive(TagDirective const& td);
-    void clear_tag_directives();
-
-    size_t resolve_tag(substr output, csubstr tag, size_t node_id) const;
-    csubstr resolve_tag_sub(substr output, csubstr tag, size_t node_id) const
+    void clear()
     {
-        size_t needed = resolve_tag(output, tag, node_id);
-        return needed <= output.len ? output.first(needed) : output;
+        type.clear();
+        key.clear();
+        val.clear();
     }
 
-    using tag_directive_const_iterator = TagDirective const*;
-    tag_directive_const_iterator begin_tag_directives() const { return m_tag_directives; }
-    tag_directive_const_iterator end_tag_directives() const { return m_tag_directives + num_tag_directives(); }
-
-    struct TagDirectiveProxy
+    void _add_flags(type_bits more_flags=0)
     {
-        tag_directive_const_iterator b, e;
-        tag_directive_const_iterator begin() const { return b; }
-        tag_directive_const_iterator end() const { return e; }
-    };
-
-    TagDirectiveProxy tag_directives() const { return TagDirectiveProxy{begin_tag_directives(), end_tag_directives()}; }
-
-    /** @} */
-
-public:
-
-    /** @name modifying hierarchy */
-    /** @{ */
+        type = (type|more_flags);
+        if( ! key.tag.empty())
+            type = (type|KEYTAG);
+        if( ! val.tag.empty())
+            type = (type|VALTAG);
+        if( ! key.anchor.empty())
+            type = (type|KEYANCH);
+        if( ! val.anchor.empty())
+            type = (type|VALANCH);
+    }
 
-    /** create and insert a new child of "parent". insert after the (to-be)
-     * sibling "after", which must be a child of "parent". To insert as the
-     * first child, set after to NONE */
-    inline size_t insert_child(size_t parent, size_t after)
+    bool _check() const
     {
-        RYML_ASSERT(parent != NONE);
-        RYML_ASSERT(is_container(parent) || is_root(parent));
-        RYML_ASSERT(after == NONE || has_child(parent, after));
-        size_t child = _claim();
-        _set_hierarchy(child, parent, after);
-        return child;
+        // key cannot be empty
+        RYML_ASSERT(key.scalar.empty() == ((type & KEY) == 0));
+        // key tag cannot be empty
+        RYML_ASSERT(key.tag.empty() == ((type & KEYTAG) == 0));
+        // val may be empty even though VAL is set. But when VAL is not set, val must be empty
+        RYML_ASSERT(((type & VAL) != 0) || val.scalar.empty());
+        // val tag cannot be empty
+        RYML_ASSERT(val.tag.empty() == ((type & VALTAG) == 0));
+        return true;
     }
-    inline size_t prepend_child(size_t parent) { return insert_child(parent, NONE); }
-    inline size_t  append_child(size_t parent) { return insert_child(parent, last_child(parent)); }
+};
 
-public:
 
-    #if defined(__clang__)
-    #   pragma clang diagnostic push
-    #   pragma clang diagnostic ignored "-Wnull-dereference"
-    #elif defined(__GNUC__)
-    #   pragma GCC diagnostic push
-    #   if __GNUC__ >= 6
-    #       pragma GCC diagnostic ignored "-Wnull-dereference"
-    #   endif
-    #endif
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 
-    //! create and insert a new sibling of n. insert after "after"
-    inline size_t insert_sibling(size_t node, size_t after)
-    {
-        RYML_ASSERT(node != NONE);
-        RYML_ASSERT( ! is_root(node));
-        RYML_ASSERT(parent(node) != NONE);
-        RYML_ASSERT(after == NONE || (has_sibling(node, after) && has_sibling(after, node)));
-        RYML_ASSERT(get(node) != nullptr);
-        return insert_child(get(node)->m_parent, after);
-    }
-    inline size_t prepend_sibling(size_t node) { return insert_sibling(node, NONE); }
-    inline size_t  append_sibling(size_t node) { return insert_sibling(node, last_sibling(node)); }
+/** contains the data for each YAML node. */
+struct NodeData
+{
+    NodeType   m_type;
 
-public:
+    NodeScalar m_key;
+    NodeScalar m_val;
 
-    /** remove an entire branch at once: ie remove the children and the node itself */
-    inline void remove(size_t node)
-    {
-        remove_children(node);
-        _release(node);
-    }
+    id_type    m_parent;
+    id_type    m_first_child;
+    id_type    m_last_child;
+    id_type    m_next_sibling;
+    id_type    m_prev_sibling;
+};
+C4_MUST_BE_TRIVIAL_COPY(NodeData);
 
-    /** remove all the node's children, but keep the node itself */
-    void remove_children(size_t node);
 
-    /** change the @p type of the node to one of MAP, SEQ or VAL.  @p
-     * type must have one and only one of MAP,SEQ,VAL; @p type may
-     * possibly have KEY, but if it does, then the @p node must also
-     * have KEY. Changing to the same type is a no-op. Otherwise,
-     * changing to a different type will initialize the node with an
-     * empty value of the desired type: changing to VAL will
-     * initialize with a null scalar (~), changing to MAP will
-     * initialize with an empty map ({}), and changing to SEQ will
-     * initialize with an empty seq ([]). */
-    bool change_type(size_t node, NodeType type);
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 
-    bool change_type(size_t node, type_bits type)
-    {
-        return change_type(node, (NodeType)type);
-    }
+class RYML_EXPORT Tree
+{
+public:
 
-    #if defined(__clang__)
-    #   pragma clang diagnostic pop
-    #elif defined(__GNUC__)
-    #   pragma GCC diagnostic pop
-    #endif
+    /** @name construction and assignment */
+    /** @{ */
 
-public:
+    Tree() : Tree(get_callbacks()) {}
+    Tree(Callbacks const& cb);
+    Tree(id_type node_capacity, size_t arena_capacity=0) : Tree(node_capacity, arena_capacity, get_callbacks()) {}
+    Tree(id_type node_capacity, size_t arena_capacity, Callbacks const& cb);
 
-    /** change the node's position in the parent */
-    void move(size_t node, size_t after);
+    ~Tree();
 
-    /** change the node's parent and position */
-    void move(size_t node, size_t new_parent, size_t after);
+    Tree(Tree const& that);
+    Tree(Tree     && that) noexcept;
 
-    /** change the node's parent and position to a different tree
-     * @return the index of the new node in the destination tree */
-    size_t move(Tree * src, size_t node, size_t new_parent, size_t after);
+    Tree& operator= (Tree const& that);
+    Tree& operator= (Tree     && that) RYML_NOEXCEPT;
 
-    /** ensure the first node is a stream. Eg, change this tree
-     *
-     *  DOCMAP
-     *    MAP
-     *      KEYVAL
-     *      KEYVAL
-     *    SEQ
-     *      VAL
-     *
-     * to
-     *
-     *  STREAM
-     *    DOCMAP
-     *      MAP
-     *        KEYVAL
-     *        KEYVAL
-     *      SEQ
-     *        VAL
-     *
-     * If the root is already a stream, this is a no-op.
-     */
-    void set_root_as_stream();
+    /** @} */
 
 public:
 
-    /** recursively duplicate a node from this tree into a new parent,
-     * placing it after one of its children
-     * @return the index of the copy */
-    size_t duplicate(size_t node, size_t new_parent, size_t after);
-    /** recursively duplicate a node from a different tree into a new parent,
-     * placing it after one of its children
-     * @return the index of the copy */
-    size_t duplicate(Tree const* src, size_t node, size_t new_parent, size_t after);
+    /** @name memory and sizing */
+    /** @{ */
 
-    /** recursively duplicate the node's children (but not the node)
-     * @return the index of the last duplicated child */
-    size_t duplicate_children(size_t node, size_t parent, size_t after);
-    /** recursively duplicate the node's children (but not the node), where
-     * the node is from a different tree
-     * @return the index of the last duplicated child */
-    size_t duplicate_children(Tree const* src, size_t node, size_t parent, size_t after);
+    void reserve(id_type node_capacity);
 
-    void duplicate_contents(size_t node, size_t where);
-    void duplicate_contents(Tree const* src, size_t node, size_t where);
+    /** clear the tree and zero every node
+     * @note does NOT clear the arena
+     * @see clear_arena() */
+    void clear();
+    inline void clear_arena() { m_arena_pos = 0; }
 
-    /** duplicate the node's children (but not the node) in a new parent, but
-     * omit repetitions where a duplicated node has the same key (in maps) or
-     * value (in seqs). If one of the duplicated children has the same key
-     * (in maps) or value (in seqs) as one of the parent's children, the one
-     * that is placed closest to the end will prevail. */
-    size_t duplicate_children_no_rep(size_t node, size_t parent, size_t after);
-    size_t duplicate_children_no_rep(Tree const* src, size_t node, size_t parent, size_t after);
+    inline bool   empty() const { return m_size == 0; }
 
-public:
+    inline id_type size() const { return m_size; }
+    inline id_type capacity() const { return m_cap; }
+    inline id_type slack() const { RYML_ASSERT(m_cap >= m_size); return m_cap - m_size; }
 
-    void merge_with(Tree const* src, size_t src_node=NONE, size_t dst_root=NONE);
+    Callbacks const& callbacks() const { return m_callbacks; }
+    void callbacks(Callbacks const& cb) { m_callbacks = cb; }
 
     /** @} */
 
 public:
 
-    /** @name internal string arena */
+    /** @name node getters */
     /** @{ */
 
-    /** get the current size of the tree's internal arena */
-    size_t arena_pos() const { return m_arena_pos; }
-
-    /** get the current arena */
-    substr arena() const { return m_arena.first(m_arena_pos); }
-
-    /** return true if the given substring is part of the tree's string arena */
-    bool in_arena(csubstr s) const
+    //! get the index of a node belonging to this tree.
+    //! @p n can be nullptr, in which case NONE is returned
+    id_type id(NodeData const* n) const
     {
-        return m_arena.is_super(s);
+        if( ! n)
+            return NONE;
+        _RYML_CB_ASSERT(m_callbacks, n >= m_buf && n < m_buf + m_cap);
+        return static_cast<id_type>(n - m_buf);
     }
 
-    /** serialize the given non-floating-point variable to the tree's arena, growing it as
-     * needed to accomodate the serialization.
-     * @note Growing the arena may cause relocation of the entire
-     * existing arena, and thus change the contents of individual nodes.
-     * @see alloc_arena() */
-    template<class T>
-    typename std::enable_if<!std::is_floating_point<T>::value, csubstr>::type
-    to_arena(T const& C4_RESTRICT a)
+    //! get a pointer to a node's NodeData.
+    //! i can be NONE, in which case a nullptr is returned
+    inline NodeData *get(id_type node)
     {
-        substr rem(m_arena.sub(m_arena_pos));
-        size_t num = to_chars(rem, a);
-        if(num > rem.len)
-        {
-            rem = _grow_arena(num);
-            num = to_chars(rem, a);
-            RYML_ASSERT(num <= rem.len);
-        }
-        rem = _request_span(num);
-        return rem;
+        if(node == NONE)
+            return nullptr;
+        _RYML_CB_ASSERT(m_callbacks, node >= 0 && node < m_cap);
+        return m_buf + node;
     }
-
-    /** serialize the given floating-point variable to the tree's arena, growing it as
-     * needed to accomodate the serialization.
-     * @note Growing the arena may cause relocation of the entire
-     * existing arena, and thus change the contents of individual nodes.
-     * @see alloc_arena() */
-    template<class T>
-    typename std::enable_if<std::is_floating_point<T>::value, csubstr>::type
-    to_arena(T const& C4_RESTRICT a)
+    //! get a pointer to a node's NodeData.
+    //! i can be NONE, in which case a nullptr is returned.
+    inline NodeData const *get(id_type node) const
     {
-        substr rem(m_arena.sub(m_arena_pos));
-        size_t num = to_chars_float(rem, a);
-        if(num > rem.len)
-        {
-            rem = _grow_arena(num);
-            num = to_chars_float(rem, a);
-            RYML_ASSERT(num <= rem.len);
-        }
-        rem = _request_span(num);
-        return rem;
+        if(node == NONE)
+            return nullptr;
+        _RYML_CB_ASSERT(m_callbacks, node >= 0 && node < m_cap);
+        return m_buf + node;
     }
 
-    /** copy the given substr to the tree's arena, growing it by the required size
-     * @note Growing the arena may cause relocation of the entire
-     * existing arena, and thus change the contents of individual nodes.
-     * @see alloc_arena() */
-    substr copy_to_arena(csubstr s)
-    {
-        substr cp = alloc_arena(s.len);
-        RYML_ASSERT(cp.len == s.len);
-        RYML_ASSERT(!s.overlaps(cp));
-        #if (!defined(__clang__)) && (defined(__GNUC__) && __GNUC__ >= 10)
-        C4_SUPPRESS_WARNING_GCC_PUSH
-        C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow=") // no need for terminating \0
-        C4_SUPPRESS_WARNING_GCC( "-Wrestrict") // there's an assert to ensure no violation of restrict behavior
-        #endif
-        memcpy(cp.str, s.str, s.len);
-        #if (!defined(__clang__)) && (defined(__GNUC__) && __GNUC__ >= 10)
-        C4_SUPPRESS_WARNING_GCC_POP
-        #endif
-        return cp;
-    }
+    //! An if-less form of get() that demands a valid node index.
+    //! This function is implementation only; use at your own risk.
+    inline NodeData       * _p(id_type node)       { _RYML_CB_ASSERT(m_callbacks, node != NONE && node >= 0 && node < m_cap); return m_buf + node; }
+    //! An if-less form of get() that demands a valid node index.
+    //! This function is implementation only; use at your own risk.
+    inline NodeData const * _p(id_type node) const { _RYML_CB_ASSERT(m_callbacks, node != NONE && node >= 0 && node < m_cap); return m_buf + node; }
 
-    /** grow the tree's string arena by the given size and return a substr
-     * of the added portion
-     * @note Growing the arena may cause relocation of the entire
-     * existing arena, and thus change the contents of individual nodes. */
-    substr alloc_arena(size_t sz)
-    {
-        if(sz > arena_slack())
-            _grow_arena(sz - arena_slack());
-        substr s = _request_span(sz);
-        return s;
-    }
+    //! Get the id of the root node
+    id_type root_id()       { if(m_cap == 0) { reserve(16); } _RYML_CB_ASSERT(m_callbacks, m_cap > 0 && m_size > 0); return 0; }
+    //! Get the id of the root node
+    id_type root_id() const {                                 _RYML_CB_ASSERT(m_callbacks, m_cap > 0 && m_size > 0); return 0; }
 
-    /** ensure the tree's internal string arena is at least the given capacity
-     * @note Growing the arena may cause relocation of the entire
-     * existing arena, and thus change the contents of individual nodes. */
-    void reserve_arena(size_t arena_cap)
-    {
-        if(arena_cap > m_arena.len)
-        {
-            substr buf;
-            buf.str = (char*) m_callbacks.m_allocate(arena_cap, m_arena.str, m_callbacks.m_user_data);
-            buf.len = arena_cap;
-            if(m_arena.str)
-            {
-                RYML_ASSERT(m_arena.len >= 0);
-                _relocate(buf); // does a memcpy and changes nodes using the arena
-                m_callbacks.m_free(m_arena.str, m_arena.len, m_callbacks.m_user_data);
-            }
-            m_arena = buf;
-        }
-    }
+    //! Get a NodeRef of a node by id
+    NodeRef      ref(id_type node);
+    //! Get a NodeRef of a node by id
+    ConstNodeRef ref(id_type node) const;
+    //! Get a NodeRef of a node by id
+    ConstNodeRef cref(id_type node) const;
 
-    /** @} */
+    //! Get the root as a NodeRef
+    NodeRef      rootref();
+    //! Get the root as a ConstNodeRef
+    ConstNodeRef rootref() const;
+    //! Get the root as a ConstNodeRef
+    ConstNodeRef crootref() const;
 
-private:
+    //! get the i-th document of the stream
+    //! @note @p i is NOT the node id, but the doc position within the stream
+    NodeRef      docref(id_type i);
+    //! get the i-th document of the stream
+    //! @note @p i is NOT the node id, but the doc position within the stream
+    ConstNodeRef docref(id_type i) const;
+    //! get the i-th document of the stream
+    //! @note @p i is NOT the node id, but the doc position within the stream
+    ConstNodeRef cdocref(id_type i) const;
 
-    substr _grow_arena(size_t more)
-    {
-        size_t cap = m_arena_pos + more;
-        cap = cap < 2 * m_arena.len ? 2 * m_arena.len : cap;
-        cap = cap < 64 ? 64 : cap;
-        reserve_arena(cap);
-        return m_arena.sub(m_arena_pos);
-    }
+    //! find a root child by name, return it as a NodeRef
+    //! @note requires the root to be a map.
+    NodeRef      operator[] (csubstr key);
+    //! find a root child by name, return it as a NodeRef
+    //! @note requires the root to be a map.
+    ConstNodeRef operator[] (csubstr key) const;
 
-    substr _request_span(size_t sz)
-    {
-        substr s;
-        s = m_arena.sub(m_arena_pos, sz);
-        m_arena_pos += sz;
-        return s;
-    }
+    //! find a root child by index: return the root node's @p i-th child as a NodeRef
+    //! @note @p i is NOT the node id, but the child's position
+    NodeRef      operator[] (id_type i);
+    //! find a root child by index: return the root node's @p i-th child as a NodeRef
+    //! @note @p i is NOT the node id, but the child's position
+    ConstNodeRef operator[] (id_type i) const;
 
-    substr _relocated(csubstr s, substr next_arena) const
-    {
-        RYML_ASSERT(m_arena.is_super(s));
-        RYML_ASSERT(m_arena.sub(0, m_arena_pos).is_super(s));
-        auto pos = (s.str - m_arena.str);
-        substr r(next_arena.str + pos, s.len);
-        RYML_ASSERT(r.str - next_arena.str == pos);
-        RYML_ASSERT(next_arena.sub(0, m_arena_pos).is_super(r));
-        return r;
-    }
+    /** @} */
 
 public:
 
-    /** @name lookup */
+    /** @name node property getters */
     /** @{ */
 
-    struct lookup_result
-    {
-        size_t  target;
-        size_t  closest;
-        size_t  path_pos;
-        csubstr path;
-
-        inline operator bool() const { return target != NONE; }
-
-        lookup_result() : target(NONE), closest(NONE), path_pos(0), path() {}
-        lookup_result(csubstr path_, size_t start) : target(NONE), closest(start), path_pos(0), path(path_) {}
-
-        /** get the part ot the input path that was resolved */
-        csubstr resolved() const;
-        /** get the part ot the input path that was unresolved */
-        csubstr unresolved() const;
-    };
-
-    /** for example foo.bar[0].baz */
-    lookup_result lookup_path(csubstr path, size_t start=NONE) const;
+    NodeType type(id_type node) const { return _p(node)->m_type; }
+    const char* type_str(id_type node) const { return NodeType::type_str(_p(node)->m_type); }
 
-    /** defaulted lookup: lookup @p path; if the lookup fails, recursively modify
-     * the tree so that the corresponding lookup_path() would return the
-     * default value.
-     * @see lookup_path() */
-    size_t lookup_path_or_modify(csubstr default_value, csubstr path, size_t start=NONE);
+    csubstr    const& key       (id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_key(node)); return _p(node)->m_key.scalar; }
+    csubstr    const& key_tag   (id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_key_tag(node)); return _p(node)->m_key.tag; }
+    csubstr    const& key_ref   (id_type node) const { _RYML_CB_ASSERT(m_callbacks, is_key_ref(node)); return _p(node)->m_key.anchor; }
+    csubstr    const& key_anchor(id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_key_anchor(node)); return _p(node)->m_key.anchor; }
+    NodeScalar const& keysc     (id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_key(node)); return _p(node)->m_key; }
 
-    /** defaulted lookup: lookup @p path; if the lookup fails, recursively modify
-     * the tree so that the corresponding lookup_path() would return the
-     * branch @p src_node (from the tree @p src).
-     * @see lookup_path() */
-    size_t lookup_path_or_modify(Tree const *src, size_t src_node, csubstr path, size_t start=NONE);
+    csubstr    const& val       (id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_val(node)); return _p(node)->m_val.scalar; }
+    csubstr    const& val_tag   (id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_val_tag(node)); return _p(node)->m_val.tag; }
+    csubstr    const& val_ref   (id_type node) const { _RYML_CB_ASSERT(m_callbacks, is_val_ref(node)); return _p(node)->m_val.anchor; }
+    csubstr    const& val_anchor(id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_val_anchor(node)); return _p(node)->m_val.anchor; }
+    NodeScalar const& valsc     (id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_val(node)); return _p(node)->m_val; }
 
     /** @} */
 
-private:
+public:
 
-    struct _lookup_path_token
-    {
-        csubstr value;
-        NodeType type;
-        _lookup_path_token() : value(), type() {}
-        _lookup_path_token(csubstr v, NodeType t) : value(v), type(t) {}
-        inline operator bool() const { return type != NOTYPE; }
-        bool is_index() const { return value.begins_with('[') && value.ends_with(']'); }
-    };
+    /** @name node type predicates */
+    /** @{ */
 
-    size_t _lookup_path_or_create(csubstr path, size_t start);
+    C4_ALWAYS_INLINE bool type_has_any(id_type node, NodeType_e bits) const { return _p(node)->m_type.has_any(bits); }
+    C4_ALWAYS_INLINE bool type_has_all(id_type node, NodeType_e bits) const { return _p(node)->m_type.has_all(bits); }
+    C4_ALWAYS_INLINE bool type_has_none(id_type node, NodeType_e bits) const { return _p(node)->m_type.has_none(bits); }
+
+    C4_ALWAYS_INLINE bool is_stream(id_type node) const { return _p(node)->m_type.is_stream(); }
+    C4_ALWAYS_INLINE bool is_doc(id_type node) const { return _p(node)->m_type.is_doc(); }
+    C4_ALWAYS_INLINE bool is_container(id_type node) const { return _p(node)->m_type.is_container(); }
+    C4_ALWAYS_INLINE bool is_map(id_type node) const { return _p(node)->m_type.is_map(); }
+    C4_ALWAYS_INLINE bool is_seq(id_type node) const { return _p(node)->m_type.is_seq(); }
+    C4_ALWAYS_INLINE bool has_key(id_type node) const { return _p(node)->m_type.has_key(); }
+    C4_ALWAYS_INLINE bool has_val(id_type node) const { return _p(node)->m_type.has_val(); }
+    C4_ALWAYS_INLINE bool is_val(id_type node) const { return _p(node)->m_type.is_val(); }
+    C4_ALWAYS_INLINE bool is_keyval(id_type node) const { return _p(node)->m_type.is_keyval(); }
+    C4_ALWAYS_INLINE bool has_key_tag(id_type node) const { return _p(node)->m_type.has_key_tag(); }
+    C4_ALWAYS_INLINE bool has_val_tag(id_type node) const { return _p(node)->m_type.has_val_tag(); }
+    C4_ALWAYS_INLINE bool has_key_anchor(id_type node) const { return _p(node)->m_type.has_key_anchor(); }
+    C4_ALWAYS_INLINE bool has_val_anchor(id_type node) const { return _p(node)->m_type.has_val_anchor(); }
+    C4_ALWAYS_INLINE bool has_anchor(id_type node) const { return _p(node)->m_type.has_anchor(); }
+    C4_ALWAYS_INLINE bool is_key_ref(id_type node) const { return _p(node)->m_type.is_key_ref(); }
+    C4_ALWAYS_INLINE bool is_val_ref(id_type node) const { return _p(node)->m_type.is_val_ref(); }
+    C4_ALWAYS_INLINE bool is_ref(id_type node) const { return _p(node)->m_type.is_ref(); }
+
+    C4_ALWAYS_INLINE bool parent_is_seq(id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_parent(node)); return is_seq(_p(node)->m_parent); }
+    C4_ALWAYS_INLINE bool parent_is_map(id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_parent(node)); return is_map(_p(node)->m_parent); }
 
-    void   _lookup_path       (lookup_result *r) const;
-    void   _lookup_path_modify(lookup_result *r);
+    /** true when the node has an anchor named a */
+    C4_ALWAYS_INLINE bool has_anchor(id_type node, csubstr a) const { return _p(node)->m_key.anchor == a || _p(node)->m_val.anchor == a; }
+
+    /** true if the node key does not have any KEYQUO flags, and its scalar verifies scalar_is_null().
+     * @warning the node must verify .has_key() (asserted) (ie must be a member of a map)
+     * @see https://github.com/biojppm/rapidyaml/issues/413 */
+    C4_ALWAYS_INLINE bool key_is_null(id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_key(node)); NodeData const* C4_RESTRICT n = _p(node); return !n->m_type.is_key_quoted() && scalar_is_null(n->m_key.scalar); }
+    /** true if the node key does not have any VALQUO flags, and its scalar verifies scalar_is_null().
+     * @warning the node must verify .has_val() (asserted) (ie must be a scalar / must not be a container)
+     * @see https://github.com/biojppm/rapidyaml/issues/413 */
+    C4_ALWAYS_INLINE bool val_is_null(id_type node) const { _RYML_CB_ASSERT(m_callbacks, has_val(node)); NodeData const* C4_RESTRICT n = _p(node); return !n->m_type.is_val_quoted() && scalar_is_null(n->m_val.scalar); }
+
+    /// true if the key was a scalar requiring filtering and was left
+    /// unfiltered during the parsing (see ParserOptions)
+    C4_ALWAYS_INLINE bool is_key_unfiltered(id_type node) const { return _p(node)->m_type.is_key_unfiltered(); }
+    /// true if the val was a scalar requiring filtering and was left
+    /// unfiltered during the parsing (see ParserOptions)
+    C4_ALWAYS_INLINE bool is_val_unfiltered(id_type node) const { return _p(node)->m_type.is_val_unfiltered(); }
+
+    RYML_DEPRECATED("use has_key_anchor()")    bool is_key_anchor(id_type node) const { return _p(node)->m_type.has_key_anchor(); }
+    RYML_DEPRECATED("use has_val_anchor()")    bool is_val_anchor(id_type node) const { return _p(node)->m_type.has_val_anchor(); }
+    RYML_DEPRECATED("use has_anchor()")        bool is_anchor(id_type node) const { return _p(node)->m_type.has_anchor(); }
+    RYML_DEPRECATED("use has_anchor_or_ref()") bool is_anchor_or_ref(id_type node) const { return _p(node)->m_type.has_anchor() || _p(node)->m_type.is_ref(); }
 
-    size_t _next_node       (lookup_result *r, _lookup_path_token *parent) const;
-    size_t _next_node_modify(lookup_result *r, _lookup_path_token *parent);
+    /** @} */
 
-    void   _advance(lookup_result *r, size_t more) const;
+public:
 
-    _lookup_path_token _next_token(lookup_result *r, _lookup_path_token const& parent) const;
+    /** @name hierarchy predicates */
+    /** @{ */
 
-private:
+    bool is_root(id_type node) const { _RYML_CB_ASSERT(m_callbacks, _p(node)->m_parent != NONE || node == 0); return _p(node)->m_parent == NONE; }
 
-    void _clear();
-    void _free();
-    void _copy(Tree const& that);
-    void _move(Tree      & that);
+    bool has_parent(id_type node) const { return _p(node)->m_parent != NONE; }
 
-    void _relocate(substr next_arena);
+    /** true when key and val are empty, and has no children */
+    bool empty(id_type node) const { return ! has_children(node) && _p(node)->m_key.empty() && (( ! (_p(node)->m_type & VAL)) || _p(node)->m_val.empty()); }
 
-public:
+    /** true if @p node has a child with id @p ch */
+    bool has_child(id_type node, id_type ch) const { return _p(ch)->m_parent == node; }
+    /** true if @p node has a child with key @p key */
+    bool has_child(id_type node, csubstr key) const { return find_child(node, key) != NONE; }
+    /** true if @p node has any children key */
+    bool has_children(id_type node) const { return _p(node)->m_first_child != NONE; }
 
-    #if ! RYML_USE_ASSERT
-    C4_ALWAYS_INLINE void _check_next_flags(size_t, type_bits) {}
-    #else
-    void _check_next_flags(size_t node, type_bits f)
+    /** true if @p node has a sibling with id @p sib */
+    bool has_sibling(id_type node, id_type sib) const { return _p(node)->m_parent == _p(sib)->m_parent; }
+    /** true if one of the node's siblings has the given key */
+    bool has_sibling(id_type node, csubstr key) const { return find_sibling(node, key) != NONE; }
+    /** true if node is not a single child */
+    bool has_other_siblings(id_type node) const
     {
-        auto n = _p(node);
-        type_bits o = n->m_type; // old
-        C4_UNUSED(o);
-        if(f & MAP)
-        {
-            RYML_ASSERT_MSG((f & SEQ) == 0, "cannot mark simultaneously as map and seq");
-            RYML_ASSERT_MSG((f & VAL) == 0, "cannot mark simultaneously as map and val");
-            RYML_ASSERT_MSG((o & SEQ) == 0, "cannot turn a seq into a map; clear first");
-            RYML_ASSERT_MSG((o & VAL) == 0, "cannot turn a val into a map; clear first");
-        }
-        else if(f & SEQ)
-        {
-            RYML_ASSERT_MSG((f & MAP) == 0, "cannot mark simultaneously as seq and map");
-            RYML_ASSERT_MSG((f & VAL) == 0, "cannot mark simultaneously as seq and val");
-            RYML_ASSERT_MSG((o & MAP) == 0, "cannot turn a map into a seq; clear first");
-            RYML_ASSERT_MSG((o & VAL) == 0, "cannot turn a val into a seq; clear first");
-        }
-        if(f & KEY)
-        {
-            RYML_ASSERT(!is_root(node));
-            auto pid = parent(node); C4_UNUSED(pid);
-            RYML_ASSERT(is_map(pid));
-        }
-        if((f & VAL) && !is_root(node))
+        NodeData const *n = _p(node);
+        if(C4_LIKELY(n->m_parent != NONE))
         {
-            auto pid = parent(node); C4_UNUSED(pid);
-            RYML_ASSERT(is_map(pid) || is_seq(pid));
+            n = _p(n->m_parent);
+            return n->m_first_child != n->m_last_child;
         }
+        return false;
     }
-    #endif
 
-    inline void _set_flags(size_t node, NodeType_e f) { _check_next_flags(node, f); _p(node)->m_type = f; }
-    inline void _set_flags(size_t node, type_bits  f) { _check_next_flags(node, f); _p(node)->m_type = f; }
+    RYML_DEPRECATED("use has_other_siblings()") bool has_siblings(id_type /*node*/) const { return true; }
 
-    inline void _add_flags(size_t node, NodeType_e f) { NodeData *d = _p(node); type_bits fb = f |  d->m_type; _check_next_flags(node, fb); d->m_type = (NodeType_e) fb; }
-    inline void _add_flags(size_t node, type_bits  f) { NodeData *d = _p(node);                f |= d->m_type; _check_next_flags(node,  f); d->m_type = f; }
+    /** @} */
 
-    inline void _rem_flags(size_t node, NodeType_e f) { NodeData *d = _p(node); type_bits fb = d->m_type & ~f; _check_next_flags(node, fb); d->m_type = (NodeType_e) fb; }
-    inline void _rem_flags(size_t node, type_bits  f) { NodeData *d = _p(node);            f = d->m_type & ~f; _check_next_flags(node,  f); d->m_type = f; }
+public:
 
-    void _set_key(size_t node, csubstr key, type_bits more_flags=0)
-    {
-        _p(node)->m_key.scalar = key;
-        _add_flags(node, KEY|more_flags);
-    }
-    void _set_key(size_t node, NodeScalar const& key, type_bits more_flags=0)
-    {
-        _p(node)->m_key = key;
-        _add_flags(node, KEY|more_flags);
-    }
+    /** @name hierarchy getters */
+    /** @{ */
 
-    void _set_val(size_t node, csubstr val, type_bits more_flags=0)
-    {
-        RYML_ASSERT(num_children(node) == 0);
-        RYML_ASSERT(!is_seq(node) && !is_map(node));
-        _p(node)->m_val.scalar = val;
-        _add_flags(node, VAL|more_flags);
-    }
-    void _set_val(size_t node, NodeScalar const& val, type_bits more_flags=0)
-    {
-        RYML_ASSERT(num_children(node) == 0);
-        RYML_ASSERT( ! is_container(node));
-        _p(node)->m_val = val;
-        _add_flags(node, VAL|more_flags);
-    }
+    id_type parent(id_type node) const { return _p(node)->m_parent; }
 
-    void _set(size_t node, NodeInit const& i)
-    {
-        RYML_ASSERT(i._check());
-        NodeData *n = _p(node);
-        RYML_ASSERT(n->m_key.scalar.empty() || i.key.scalar.empty() || i.key.scalar == n->m_key.scalar);
-        _add_flags(node, i.type);
-        if(n->m_key.scalar.empty())
-        {
-            if( ! i.key.scalar.empty())
-            {
-                _set_key(node, i.key.scalar);
-            }
-        }
-        n->m_key.tag = i.key.tag;
-        n->m_val = i.val;
-    }
+    id_type prev_sibling(id_type node) const { return _p(node)->m_prev_sibling; }
+    id_type next_sibling(id_type node) const { return _p(node)->m_next_sibling; }
 
-    void _set_parent_as_container_if_needed(size_t in)
-    {
-        NodeData const* n = _p(in);
-        size_t ip = parent(in);
-        if(ip != NONE)
-        {
-            if( ! (is_seq(ip) || is_map(ip)))
-            {
-                if((in == first_child(ip)) && (in == last_child(ip)))
-                {
-                    if( ! n->m_key.empty() || has_key(in))
-                    {
-                        _add_flags(ip, MAP);
-                    }
-                    else
-                    {
-                        _add_flags(ip, SEQ);
-                    }
-                }
-            }
-        }
-    }
+    /** O(#num_children) */
+    id_type num_children(id_type node) const;
+    id_type child_pos(id_type node, id_type ch) const;
+    id_type first_child(id_type node) const { return _p(node)->m_first_child; }
+    id_type last_child(id_type node) const { return _p(node)->m_last_child; }
+    id_type child(id_type node, id_type pos) const;
+    id_type find_child(id_type node, csubstr const& key) const;
 
-    void _seq2map(size_t node)
-    {
-        RYML_ASSERT(is_seq(node));
-        for(size_t i = first_child(node); i != NONE; i = next_sibling(i))
-        {
-            NodeData *C4_RESTRICT ch = _p(i);
-            if(ch->m_type.is_keyval())
-                continue;
-            ch->m_type.add(KEY);
-            ch->m_key = ch->m_val;
-        }
-        auto *C4_RESTRICT n = _p(node);
-        n->m_type.rem(SEQ);
-        n->m_type.add(MAP);
-    }
+    /** O(#num_siblings) */
+    /** counts with this */
+    id_type num_siblings(id_type node) const { return is_root(node) ? 1 : num_children(_p(node)->m_parent); }
+    /** does not count with this */
+    id_type num_other_siblings(id_type node) const { id_type ns = num_siblings(node); _RYML_CB_ASSERT(m_callbacks, ns > 0); return ns-1; }
+    id_type sibling_pos(id_type node, id_type sib) const { _RYML_CB_ASSERT(m_callbacks,  ! is_root(node) || node == root_id()); return child_pos(_p(node)->m_parent, sib); }
+    id_type first_sibling(id_type node) const { return is_root(node) ? node : _p(_p(node)->m_parent)->m_first_child; }
+    id_type last_sibling(id_type node) const { return is_root(node) ? node : _p(_p(node)->m_parent)->m_last_child; }
+    id_type sibling(id_type node, id_type pos) const { return child(_p(node)->m_parent, pos); }
+    id_type find_sibling(id_type node, csubstr const& key) const { return find_child(_p(node)->m_parent, key); }
 
-    size_t _do_reorder(size_t *node, size_t count);
+    id_type doc(id_type i) const { id_type rid = root_id(); _RYML_CB_ASSERT(m_callbacks, is_stream(rid)); return child(rid, i); } //!< gets the @p i document node index. requires that the root node is a stream.
 
-    void _swap(size_t n_, size_t m_);
-    void _swap_props(size_t n_, size_t m_);
-    void _swap_hierarchy(size_t n_, size_t m_);
-    void _copy_hierarchy(size_t dst_, size_t src_);
+    id_type depth_asc(id_type node) const; /**< O(log(num_tree_nodes)) get the ascending depth of the node: number of levels between root and node */
+    id_type depth_desc(id_type node) const; /**< O(num_tree_nodes) get the descending depth of the node: number of levels between node and deepest child */
 
-    void _copy_props(size_t dst_, size_t src_)
-    {
-        auto      & C4_RESTRICT dst = *_p(dst_);
-        auto const& C4_RESTRICT src = *_p(src_);
-        dst.m_type = src.m_type;
-        dst.m_key  = src.m_key;
-        dst.m_val  = src.m_val;
-    }
+    /** @} */
 
-    void _copy_props_wo_key(size_t dst_, size_t src_)
-    {
-        auto      & C4_RESTRICT dst = *_p(dst_);
-        auto const& C4_RESTRICT src = *_p(src_);
-        dst.m_type = src.m_type;
-        dst.m_val  = src.m_val;
-    }
+public:
 
-    void _copy_props(size_t dst_, Tree const* that_tree, size_t src_)
-    {
-        auto      & C4_RESTRICT dst = *_p(dst_);
-        auto const& C4_RESTRICT src = *that_tree->_p(src_);
-        dst.m_type = src.m_type;
-        dst.m_key  = src.m_key;
-        dst.m_val  = src.m_val;
-    }
+    /** @name node style predicates and modifiers. see the corresponding predicate in NodeType */
+    /** @{ */
 
-    void _copy_props_wo_key(size_t dst_, Tree const* that_tree, size_t src_)
-    {
-        auto      & C4_RESTRICT dst = *_p(dst_);
-        auto const& C4_RESTRICT src = *that_tree->_p(src_);
-        dst.m_type = src.m_type;
-        dst.m_val  = src.m_val;
-    }
+    C4_ALWAYS_INLINE bool is_container_styled(id_type node) const { return _p(node)->m_type.is_container_styled(); }
+    C4_ALWAYS_INLINE bool is_block(id_type node) const { return _p(node)->m_type.is_block(); }
+    C4_ALWAYS_INLINE bool is_flow_sl(id_type node) const { return _p(node)->m_type.is_flow_sl(); }
+    C4_ALWAYS_INLINE bool is_flow_ml(id_type node) const { return _p(node)->m_type.is_flow_ml(); }
+    C4_ALWAYS_INLINE bool is_flow(id_type node) const { return _p(node)->m_type.is_flow(); }
+
+    C4_ALWAYS_INLINE bool is_key_styled(id_type node) const { return _p(node)->m_type.is_key_styled(); }
+    C4_ALWAYS_INLINE bool is_val_styled(id_type node) const { return _p(node)->m_type.is_val_styled(); }
+    C4_ALWAYS_INLINE bool is_key_literal(id_type node) const { return _p(node)->m_type.is_key_literal(); }
+    C4_ALWAYS_INLINE bool is_val_literal(id_type node) const { return _p(node)->m_type.is_val_literal(); }
+    C4_ALWAYS_INLINE bool is_key_folded(id_type node) const { return _p(node)->m_type.is_key_folded(); }
+    C4_ALWAYS_INLINE bool is_val_folded(id_type node) const { return _p(node)->m_type.is_val_folded(); }
+    C4_ALWAYS_INLINE bool is_key_squo(id_type node) const { return _p(node)->m_type.is_key_squo(); }
+    C4_ALWAYS_INLINE bool is_val_squo(id_type node) const { return _p(node)->m_type.is_val_squo(); }
+    C4_ALWAYS_INLINE bool is_key_dquo(id_type node) const { return _p(node)->m_type.is_key_dquo(); }
+    C4_ALWAYS_INLINE bool is_val_dquo(id_type node) const { return _p(node)->m_type.is_val_dquo(); }
+    C4_ALWAYS_INLINE bool is_key_plain(id_type node) const { return _p(node)->m_type.is_key_plain(); }
+    C4_ALWAYS_INLINE bool is_val_plain(id_type node) const { return _p(node)->m_type.is_val_plain(); }
+    C4_ALWAYS_INLINE bool is_key_quoted(id_type node) const { return _p(node)->m_type.is_key_quoted(); }
+    C4_ALWAYS_INLINE bool is_val_quoted(id_type node) const { return _p(node)->m_type.is_val_quoted(); }
+    C4_ALWAYS_INLINE bool is_quoted(id_type node) const { return _p(node)->m_type.is_quoted(); }
+
+    C4_ALWAYS_INLINE void set_container_style(id_type node, NodeType_e style) { _RYML_CB_ASSERT(m_callbacks, is_container(node)); _p(node)->m_type.set_container_style(style); }
+    C4_ALWAYS_INLINE void set_key_style(id_type node, NodeType_e style) { _RYML_CB_ASSERT(m_callbacks, has_key(node)); _p(node)->m_type.set_key_style(style); }
+    C4_ALWAYS_INLINE void set_val_style(id_type node, NodeType_e style) { _RYML_CB_ASSERT(m_callbacks, has_val(node)); _p(node)->m_type.set_val_style(style); }
 
-    inline void _clear_type(size_t node)
-    {
-        _p(node)->m_type = NOTYPE;
-    }
+    /** @} */
 
-    inline void _clear(size_t node)
-    {
-        auto *C4_RESTRICT n = _p(node);
-        n->m_type = NOTYPE;
-        n->m_key.clear();
-        n->m_val.clear();
-        n->m_parent = NONE;
-        n->m_first_child = NONE;
-        n->m_last_child = NONE;
-    }
+public:
 
-    inline void _clear_key(size_t node)
-    {
-        _p(node)->m_key.clear();
-        _rem_flags(node, KEY);
-    }
+    /** @name node type modifiers */
+    /** @{ */
 
-    inline void _clear_val(size_t node)
-    {
-        _p(node)->m_key.clear();
-        _rem_flags(node, VAL);
-    }
+    void to_keyval(id_type node, csubstr key, csubstr val, type_bits more_flags=0);
+    void to_map(id_type node, csubstr key, type_bits more_flags=0);
+    void to_seq(id_type node, csubstr key, type_bits more_flags=0);
+    void to_val(id_type node, csubstr val, type_bits more_flags=0);
+    void to_map(id_type node, type_bits more_flags=0);
+    void to_seq(id_type node, type_bits more_flags=0);
+    void to_doc(id_type node, type_bits more_flags=0);
+    void to_stream(id_type node, type_bits more_flags=0);
 
-private:
+    void set_key(id_type node, csubstr key) { _RYML_CB_ASSERT(m_callbacks, has_key(node)); _p(node)->m_key.scalar = key; }
+    void set_val(id_type node, csubstr val) { _RYML_CB_ASSERT(m_callbacks, has_val(node)); _p(node)->m_val.scalar = val; }
 
-    void _clear_range(size_t first, size_t num);
+    void set_key_tag(id_type node, csubstr tag) { _RYML_CB_ASSERT(m_callbacks, has_key(node)); _p(node)->m_key.tag = tag; _add_flags(node, KEYTAG); }
+    void set_val_tag(id_type node, csubstr tag) { _RYML_CB_ASSERT(m_callbacks, has_val(node) || is_container(node)); _p(node)->m_val.tag = tag; _add_flags(node, VALTAG); }
 
-    size_t _claim();
-    void   _claim_root();
-    void   _release(size_t node);
-    void   _free_list_add(size_t node);
-    void   _free_list_rem(size_t node);
+    void set_key_anchor(id_type node, csubstr anchor) { _RYML_CB_ASSERT(m_callbacks,  ! is_key_ref(node)); _p(node)->m_key.anchor = anchor.triml('&'); _add_flags(node, KEYANCH); }
+    void set_val_anchor(id_type node, csubstr anchor) { _RYML_CB_ASSERT(m_callbacks,  ! is_val_ref(node)); _p(node)->m_val.anchor = anchor.triml('&'); _add_flags(node, VALANCH); }
+    void set_key_ref   (id_type node, csubstr ref   ) { _RYML_CB_ASSERT(m_callbacks,  ! has_key_anchor(node)); NodeData* C4_RESTRICT n = _p(node); n->m_key.set_ref_maybe_replacing_scalar(ref, n->m_type.has_key()); _add_flags(node, KEY|KEYREF); }
+    void set_val_ref   (id_type node, csubstr ref   ) { _RYML_CB_ASSERT(m_callbacks,  ! has_val_anchor(node)); NodeData* C4_RESTRICT n = _p(node); n->m_val.set_ref_maybe_replacing_scalar(ref, n->m_type.has_val()); _add_flags(node, VAL|VALREF); }
+
+    void rem_key_anchor(id_type node) { _p(node)->m_key.anchor.clear(); _rem_flags(node, KEYANCH); }
+    void rem_val_anchor(id_type node) { _p(node)->m_val.anchor.clear(); _rem_flags(node, VALANCH); }
+    void rem_key_ref   (id_type node) { _p(node)->m_key.anchor.clear(); _rem_flags(node, KEYREF); }
+    void rem_val_ref   (id_type node) { _p(node)->m_val.anchor.clear(); _rem_flags(node, VALREF); }
+    void rem_anchor_ref(id_type node) { _p(node)->m_key.anchor.clear(); _p(node)->m_val.anchor.clear(); _rem_flags(node, KEYANCH|VALANCH|KEYREF|VALREF); }
 
-    void _set_hierarchy(size_t node, size_t parent, size_t after_sibling);
-    void _rem_hierarchy(size_t node);
+    /** @} */
 
 public:
 
-    // members are exposed, but you should NOT access them directly
+    /** @name tree modifiers */
+    /** @{ */
 
-    NodeData * m_buf;
-    size_t m_cap;
+    /** reorder the tree in memory so that all the nodes are stored
+     * in a linear sequence when visited in depth-first order.
+     * This will invalidate existing ids, since the node id is its
+     * position in the tree's node array. */
+    void reorder();
 
-    size_t m_size;
+    /** Resolve references (aliases <- anchors) in the tree.
+     *
+     * Dereferencing is opt-in; after parsing, Tree::resolve() has to
+     * be called explicitly for obtaining resolved references in the
+     * tree. This method will @ref ReferenceResolver::resolve()
+     * to resolve all references and substitute the anchored values in
+     * place of the reference.
+     *
+     * This method first does a full traversal of the tree to gather all
+     * anchors and references in a separate collection, then it goes through
+     * that collection to locate the names, which it does by obeying the YAML
+     * standard diktat that "an alias node refers to the most recent node in
+     * the serialization having the specified anchor"
+     *
+     * So, depending on the number of anchor/alias nodes, this is a
+     * potentially expensive operation, with a best-case linear complexity
+     * (from the initial traversal). This potential cost is the reason for
+     * requiring an explicit call.
+     *
+     * @see ReferenceResolver::resolve()
+     */
+    void resolve(ReferenceResolver *C4_RESTRICT rr);
 
-    size_t m_free_head;
-    size_t m_free_tail;
+    /** Resolve references using a throw-away resolver. */
+    void resolve();
 
-    substr m_arena;
-    size_t m_arena_pos;
+    /** @} */
 
-    Callbacks m_callbacks;
+public:
 
-    TagDirective m_tag_directives[RYML_MAX_TAG_DIRECTIVES];
+    /** @name tag directives */
+    /** @{ */
 
-};
+    void resolve_tags();
+    void normalize_tags();
+    void normalize_tags_long();
 
-} // namespace yml
-} // namespace c4
+    id_type num_tag_directives() const;
+    bool add_tag_directive(csubstr directive);
+    id_type add_tag_directive(TagDirective const& td);
+    void clear_tag_directives();
 
+    /** resolve the given tag, appearing at node_id. Write the result into output.
+     * @return the number of characters required for the resolved tag */
+    size_t resolve_tag(substr output, csubstr tag, id_type node_id) const;
+    csubstr resolve_tag_sub(substr output, csubstr tag, id_type node_id) const
+    {
+        size_t needed = resolve_tag(output, tag, node_id);
+        return needed <= output.len ? output.first(needed) : output;
+    }
 
-C4_SUPPRESS_WARNING_MSVC_POP
-C4_SUPPRESS_WARNING_GCC_CLANG_POP
+    TagDirective const* begin_tag_directives() const { return m_tag_directives; }
+    TagDirective const* end_tag_directives() const { return m_tag_directives + num_tag_directives(); }
+    c4::yml::TagDirectiveRange tag_directives() const { return c4::yml::TagDirectiveRange{begin_tag_directives(), end_tag_directives()}; }
 
+    RYML_DEPRECATED("use c4::yml::tag_directive_const_iterator") typedef TagDirective const* tag_directive_const_iterator;
+    RYML_DEPRECATED("use c4::yml::TagDirectiveRange") typedef c4::yml::TagDirectiveRange TagDirectiveProxy;
 
-#endif /* _C4_YML_TREE_HPP_ */
+    /** @} */
 
+public:
 
-// (end https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp)
+    /** @name modifying hierarchy */
+    /** @{ */
 
+    /** create and insert a new child of @p parent. insert after the (to-be)
+     * sibling @p after, which must be a child of @p parent. To insert as the
+     * first child, set after to NONE */
+    C4_ALWAYS_INLINE id_type insert_child(id_type parent, id_type after)
+    {
+        _RYML_CB_ASSERT(m_callbacks, parent != NONE);
+        _RYML_CB_ASSERT(m_callbacks, is_container(parent) || is_root(parent));
+        _RYML_CB_ASSERT(m_callbacks, after == NONE || (_p(after)->m_parent == parent));
+        id_type child = _claim();
+        _set_hierarchy(child, parent, after);
+        return child;
+    }
+    /** create and insert a node as the first child of @p parent */
+    C4_ALWAYS_INLINE id_type prepend_child(id_type parent) { return insert_child(parent, NONE); }
+    /** create and insert a node as the last child of @p parent */
+    C4_ALWAYS_INLINE id_type append_child(id_type parent) { return insert_child(parent, _p(parent)->m_last_child); }
+    C4_ALWAYS_INLINE id_type _append_child__unprotected(id_type parent)
+    {
+        id_type child = _claim();
+        _set_hierarchy(child, parent, _p(parent)->m_last_child);
+        return child;
+    }
 
+public:
 
-//********************************************************************************
-//--------------------------------------------------------------------------------
-// src/c4/yml/node.hpp
-// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp
-//--------------------------------------------------------------------------------
-//********************************************************************************
+    #if defined(__clang__)
+    #   pragma clang diagnostic push
+    #   pragma clang diagnostic ignored "-Wnull-dereference"
+    #elif defined(__GNUC__)
+    #   pragma GCC diagnostic push
+    #   if __GNUC__ >= 6
+    #       pragma GCC diagnostic ignored "-Wnull-dereference"
+    #   endif
+    #endif
 
-#ifndef _C4_YML_NODE_HPP_
-#define _C4_YML_NODE_HPP_
+    //! create and insert a new sibling of n. insert after "after"
+    C4_ALWAYS_INLINE id_type insert_sibling(id_type node, id_type after)
+    {
+        return insert_child(_p(node)->m_parent, after);
+    }
+    /** create and insert a node as the first node of @p parent */
+    C4_ALWAYS_INLINE id_type prepend_sibling(id_type node) { return prepend_child(_p(node)->m_parent); }
+    C4_ALWAYS_INLINE id_type  append_sibling(id_type node) { return append_child(_p(node)->m_parent); }
 
-/** @file node.hpp
- * @see NodeRef */
+public:
 
-//included above:
-//#include <cstddef>
+    /** remove an entire branch at once: ie remove the children and the node itself */
+    inline void remove(id_type node)
+    {
+        remove_children(node);
+        _release(node);
+    }
 
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp
-//#include "c4/yml/tree.hpp"
-#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_)
-#error "amalgamate: file c4/yml/tree.hpp must have been included at this point"
-#endif /* C4_YML_TREE_HPP_ */
+    /** remove all the node's children, but keep the node itself */
+    void remove_children(id_type node);
 
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/base64.hpp
-//#include "c4/base64.hpp"
-#if !defined(C4_BASE64_HPP_) && !defined(_C4_BASE64_HPP_)
-#error "amalgamate: file c4/base64.hpp must have been included at this point"
-#endif /* C4_BASE64_HPP_ */
+    /** change the @p type of the node to one of MAP, SEQ or VAL.  @p
+     * type must have one and only one of MAP,SEQ,VAL; @p type may
+     * possibly have KEY, but if it does, then the @p node must also
+     * have KEY. Changing to the same type is a no-op. Otherwise,
+     * changing to a different type will initialize the node with an
+     * empty value of the desired type: changing to VAL will
+     * initialize with a null scalar (~), changing to MAP will
+     * initialize with an empty map ({}), and changing to SEQ will
+     * initialize with an empty seq ([]). */
+    bool change_type(id_type node, NodeType type);
 
+    bool change_type(id_type node, type_bits type)
+    {
+        return change_type(node, (NodeType)type);
+    }
 
-#ifdef __GNUC__
-#   pragma GCC diagnostic push
-#   pragma GCC diagnostic ignored "-Wtype-limits"
-#endif
+    #if defined(__clang__)
+    #   pragma clang diagnostic pop
+    #elif defined(__GNUC__)
+    #   pragma GCC diagnostic pop
+    #endif
 
-#if defined(_MSC_VER)
-#   pragma warning(push)
-#   pragma warning(disable: 4251/*needs to have dll-interface to be used by clients of struct*/)
-#   pragma warning(disable: 4296/*expression is always 'boolean_value'*/)
-#endif
+public:
 
-namespace c4 {
-namespace yml {
+    /** change the node's position in the parent */
+    void move(id_type node, id_type after);
 
-template<class K> struct Key { K & k; };
-template<> struct Key<fmt::const_base64_wrapper> { fmt::const_base64_wrapper wrapper; };
-template<> struct Key<fmt::base64_wrapper> { fmt::base64_wrapper wrapper; };
+    /** change the node's parent and position */
+    void move(id_type node, id_type new_parent, id_type after);
 
-template<class K> C4_ALWAYS_INLINE Key<K> key(K & k) { return Key<K>{k}; }
-C4_ALWAYS_INLINE Key<fmt::const_base64_wrapper> key(fmt::const_base64_wrapper w) { return {w}; }
-C4_ALWAYS_INLINE Key<fmt::base64_wrapper> key(fmt::base64_wrapper w) { return {w}; }
+    /** change the node's parent and position to a different tree
+     * @return the index of the new node in the destination tree */
+    id_type move(Tree * src, id_type node, id_type new_parent, id_type after);
 
-template<class T> void write(NodeRef *n, T const& v);
+    /** ensure the first node is a stream. Eg, change this tree
+     *
+     *  DOCMAP
+     *    MAP
+     *      KEYVAL
+     *      KEYVAL
+     *    SEQ
+     *      VAL
+     *
+     * to
+     *
+     *  STREAM
+     *    DOCMAP
+     *      MAP
+     *        KEYVAL
+     *        KEYVAL
+     *      SEQ
+     *        VAL
+     *
+     * If the root is already a stream, this is a no-op.
+     */
+    void set_root_as_stream();
 
-template<class T>
-typename std::enable_if< ! std::is_floating_point<T>::value, bool>::type
-read(NodeRef const& n, T *v);
+public:
 
-template<class T>
-typename std::enable_if<   std::is_floating_point<T>::value, bool>::type
-read(NodeRef const& n, T *v);
+    /** recursively duplicate a node from this tree into a new parent,
+     * placing it after one of its children
+     * @return the index of the copy */
+    id_type duplicate(id_type node, id_type new_parent, id_type after);
+    /** recursively duplicate a node from a different tree into a new parent,
+     * placing it after one of its children
+     * @return the index of the copy */
+    id_type duplicate(Tree const* src, id_type node, id_type new_parent, id_type after);
 
+    /** recursively duplicate the node's children (but not the node)
+     * @return the index of the last duplicated child */
+    id_type duplicate_children(id_type node, id_type parent, id_type after);
+    /** recursively duplicate the node's children (but not the node), where
+     * the node is from a different tree
+     * @return the index of the last duplicated child */
+    id_type duplicate_children(Tree const* src, id_type node, id_type parent, id_type after);
 
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
+    void duplicate_contents(id_type node, id_type where);
+    void duplicate_contents(Tree const* src, id_type node, id_type where);
 
-/** a reference to a node in an existing yaml tree, offering a more
- * convenient API than the index-based API used in the tree. */
-class RYML_EXPORT NodeRef
-{
-private:
+    /** duplicate the node's children (but not the node) in a new parent, but
+     * omit repetitions where a duplicated node has the same key (in maps) or
+     * value (in seqs). If one of the duplicated children has the same key
+     * (in maps) or value (in seqs) as one of the parent's children, the one
+     * that is placed closest to the end will prevail. */
+    id_type duplicate_children_no_rep(id_type node, id_type parent, id_type after);
+    id_type duplicate_children_no_rep(Tree const* src, id_type node, id_type parent, id_type after);
 
-    // require valid: a helper macro, undefined at the end
-    #define _C4RV() RYML_ASSERT(valid() && !is_seed())
+public:
 
-    Tree *C4_RESTRICT m_tree;
-    size_t m_id;
+    void merge_with(Tree const* src, id_type src_node=NONE, id_type dst_root=NONE);
 
-    /** This member is used to enable lazy operator[] writing. When a child
-     * with a key or index is not found, m_id is set to the id of the parent
-     * and the asked-for key or index are stored in this member until a write
-     * does happen. Then it is given as key or index for creating the child.
-     * When a key is used, the csubstr stores it (so the csubstr's string is
-     * non-null and the csubstr's size is different from NONE). When an index is
-     * used instead, the csubstr's string is set to null, and only the csubstr's
-     * size is set to a value different from NONE. Otherwise, when operator[]
-     * does find the child then this member is empty: the string is null and
-     * the size is NONE. */
-    csubstr m_seed;
+    /** @} */
 
 public:
 
-    /** @name node construction */
+    /** @name internal string arena */
     /** @{ */
 
-    NodeRef() : m_tree(nullptr), m_id(NONE), m_seed() { _clear_seed(); }
-    NodeRef(Tree &t) : m_tree(&t), m_id(t .root_id()), m_seed() { _clear_seed(); }
-    NodeRef(Tree *t) : m_tree(t ), m_id(t->root_id()), m_seed() { _clear_seed(); }
-    NodeRef(Tree *t, size_t id) : m_tree(t), m_id(id), m_seed() { _clear_seed(); }
-    NodeRef(Tree *t, size_t id, size_t seed_pos) : m_tree(t), m_id(id), m_seed() { m_seed.str = nullptr; m_seed.len = seed_pos; }
-    NodeRef(Tree *t, size_t id, csubstr  seed_key) : m_tree(t), m_id(id), m_seed(seed_key) {}
-    NodeRef(std::nullptr_t) : m_tree(nullptr), m_id(NONE), m_seed() {}
-
-    NodeRef(NodeRef const&) = default;
-    NodeRef(NodeRef     &&) = default;
+    /** get the current size of the tree's internal arena */
+    RYML_DEPRECATED("use arena_size() instead") size_t arena_pos() const { return m_arena_pos; }
+    /** get the current size of the tree's internal arena */
+    inline size_t arena_size() const { return m_arena_pos; }
+    /** get the current capacity of the tree's internal arena */
+    inline size_t arena_capacity() const { return m_arena.len; }
+    /** get the current slack of the tree's internal arena */
+    inline size_t arena_slack() const { _RYML_CB_ASSERT(m_callbacks, m_arena.len >= m_arena_pos); return m_arena.len - m_arena_pos; }
 
-    NodeRef& operator= (NodeRef const&) = default;
-    NodeRef& operator= (NodeRef     &&) = default;
+    /** get the current arena */
+    csubstr arena() const { return m_arena.first(m_arena_pos); }
+    /** get the current arena */
+    substr arena() { return m_arena.first(m_arena_pos); }
 
-    /** @} */
+    /** return true if the given substring is part of the tree's string arena */
+    bool in_arena(csubstr s) const
+    {
+        return m_arena.is_super(s);
+    }
 
-public:
+    /** serialize the given floating-point variable to the tree's
+     * arena, growing it as needed to accomodate the serialization.
+     *
+     * @note Growing the arena may cause relocation of the entire
+     * existing arena, and thus change the contents of individual
+     * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this
+     * cost, ensure that the arena is reserved to an appropriate size
+     * using .reserve_arena()
+     *
+     * @see alloc_arena() */
+    template<class T>
+    typename std::enable_if<std::is_floating_point<T>::value, csubstr>::type
+    to_arena(T const& C4_RESTRICT a)
+    {
+        substr rem(m_arena.sub(m_arena_pos));
+        size_t num = to_chars_float(rem, a);
+        if(num > rem.len)
+        {
+            rem = _grow_arena(num);
+            num = to_chars_float(rem, a);
+            _RYML_CB_ASSERT(m_callbacks, num <= rem.len);
+        }
+        rem = _request_span(num);
+        return rem;
+    }
 
-    inline Tree      * tree()       { return m_tree; }
-    inline Tree const* tree() const { return m_tree; }
+    /** serialize the given non-floating-point variable to the tree's
+     * arena, growing it as needed to accomodate the serialization.
+     *
+     * @note Growing the arena may cause relocation of the entire
+     * existing arena, and thus change the contents of individual
+     * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this
+     * cost, ensure that the arena is reserved to an appropriate size
+     * using .reserve_arena()
+     *
+     * @see alloc_arena() */
+    template<class T>
+    typename std::enable_if<!std::is_floating_point<T>::value, csubstr>::type
+    to_arena(T const& C4_RESTRICT a)
+    {
+        substr rem(m_arena.sub(m_arena_pos));
+        size_t num = to_chars(rem, a);
+        if(num > rem.len)
+        {
+            rem = _grow_arena(num);
+            num = to_chars(rem, a);
+            _RYML_CB_ASSERT(m_callbacks, num <= rem.len);
+        }
+        rem = _request_span(num);
+        return rem;
+    }
 
-    inline size_t id() const { return m_id; }
+    /** serialize the given csubstr to the tree's arena, growing the
+     * arena as needed to accomodate the serialization.
+     *
+     * @note Growing the arena may cause relocation of the entire
+     * existing arena, and thus change the contents of individual
+     * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this
+     * cost, ensure that the arena is reserved to an appropriate size
+     * using .reserve_arena()
+     *
+     * @see alloc_arena() */
+    csubstr to_arena(csubstr a)
+    {
+        if(a.len > 0)
+        {
+            substr rem(m_arena.sub(m_arena_pos));
+            size_t num = to_chars(rem, a);
+            if(num > rem.len)
+            {
+                rem = _grow_arena(num);
+                num = to_chars(rem, a);
+                _RYML_CB_ASSERT(m_callbacks, num <= rem.len);
+            }
+            return _request_span(num);
+        }
+        else
+        {
+            if(a.str == nullptr)
+            {
+                return csubstr{};
+            }
+            else if(m_arena.str == nullptr)
+            {
+                // Arena is empty and we want to store a non-null
+                // zero-length string.
+                // Even though the string has zero length, we need
+                // some "memory" to store a non-nullptr string
+                _grow_arena(1);
+            }
+            return _request_span(0);
+        }
+    }
+    C4_ALWAYS_INLINE csubstr to_arena(const char *s)
+    {
+        return to_arena(to_csubstr(s));
+    }
+    C4_ALWAYS_INLINE csubstr to_arena(std::nullptr_t)
+    {
+        return csubstr{};
+    }
 
-    inline NodeData      * get()       { return m_tree->get(m_id); }
-    inline NodeData const* get() const { return m_tree->get(m_id); }
+    /** copy the given substr to the tree's arena, growing it by the
+     * required size
+     *
+     * @note Growing the arena may cause relocation of the entire
+     * existing arena, and thus change the contents of individual
+     * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this
+     * cost, ensure that the arena is reserved to an appropriate size
+     * using .reserve_arena()
+     *
+     * @see alloc_arena() */
+    substr copy_to_arena(csubstr s)
+    {
+        substr cp = alloc_arena(s.len);
+        _RYML_CB_ASSERT(m_callbacks, cp.len == s.len);
+        _RYML_CB_ASSERT(m_callbacks, !s.overlaps(cp));
+        #if (!defined(__clang__)) && (defined(__GNUC__) && __GNUC__ >= 10)
+        C4_SUPPRESS_WARNING_GCC_PUSH
+        C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow=") // no need for terminating \0
+        C4_SUPPRESS_WARNING_GCC( "-Wrestrict") // there's an assert to ensure no violation of restrict behavior
+        #endif
+        if(s.len)
+            memcpy(cp.str, s.str, s.len);
+        #if (!defined(__clang__)) && (defined(__GNUC__) && __GNUC__ >= 10)
+        C4_SUPPRESS_WARNING_GCC_POP
+        #endif
+        return cp;
+    }
 
-    inline bool operator== (NodeRef const& that) const { _C4RV(); RYML_ASSERT(that.valid() && !that.is_seed()); RYML_ASSERT(that.m_tree == m_tree); return m_id == that.m_id; }
-    inline bool operator!= (NodeRef const& that) const { return ! this->operator==(that); }
+    /** grow the tree's string arena by the given size and return a substr
+     * of the added portion
+     *
+     * @note Growing the arena may cause relocation of the entire
+     * existing arena, and thus change the contents of individual
+     * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this
+     * cost, ensure that the arena is reserved to an appropriate size
+     * using .reserve_arena().
+     *
+     * @see reserve_arena() */
+    substr alloc_arena(size_t sz)
+    {
+        if(sz > arena_slack())
+            _grow_arena(sz - arena_slack());
+        substr s = _request_span(sz);
+        return s;
+    }
 
-    inline bool operator== (std::nullptr_t) const { return m_tree == nullptr || m_id == NONE || is_seed(); }
-    inline bool operator!= (std::nullptr_t) const { return ! this->operator== (nullptr); }
+    /** ensure the tree's internal string arena is at least the given capacity
+     * @warning This operation may be expensive, with a potential complexity of O(numNodes)+O(arenasize).
+     * @warning Growing the arena may cause relocation of the entire
+     * existing arena, and thus change the contents of individual nodes. */
+    void reserve_arena(size_t arena_cap)
+    {
+        if(arena_cap > m_arena.len)
+        {
+            substr buf;
+            buf.str = (char*) m_callbacks.m_allocate(arena_cap, m_arena.str, m_callbacks.m_user_data);
+            buf.len = arena_cap;
+            if(m_arena.str)
+            {
+                _RYML_CB_ASSERT(m_callbacks, m_arena.len >= 0);
+                _relocate(buf); // does a memcpy and changes nodes using the arena
+                m_callbacks.m_free(m_arena.str, m_arena.len, m_callbacks.m_user_data);
+            }
+            m_arena = buf;
+        }
+    }
 
-    inline bool operator== (csubstr val) const { _C4RV(); RYML_ASSERT(has_val()); return m_tree->val(m_id) == val; }
-    inline bool operator!= (csubstr val) const { _C4RV(); RYML_ASSERT(has_val()); return m_tree->val(m_id) != val; }
+    /** @} */
 
-    //inline operator bool () const { return m_tree == nullptr || m_id == NONE || is_seed(); }
+private:
 
-public:
+    substr _grow_arena(size_t more)
+    {
+        size_t cap = m_arena.len + more;
+        cap = cap < 2 * m_arena.len ? 2 * m_arena.len : cap;
+        cap = cap < 64 ? 64 : cap;
+        reserve_arena(cap);
+        return m_arena.sub(m_arena_pos);
+    }
 
-    inline bool valid() const { return m_tree != nullptr && m_id != NONE; }
-    inline bool is_seed() const { return m_seed.str != nullptr || m_seed.len != NONE; }
+    substr _request_span(size_t sz)
+    {
+        _RYML_CB_ASSERT(m_callbacks, m_arena_pos + sz <= m_arena.len);
+        substr s;
+        s = m_arena.sub(m_arena_pos, sz);
+        m_arena_pos += sz;
+        return s;
+    }
 
-    inline void _clear_seed() { /*do this manually or an assert is triggered*/ m_seed.str = nullptr; m_seed.len = NONE; }
+    substr _relocated(csubstr s, substr next_arena) const
+    {
+        _RYML_CB_ASSERT(m_callbacks, m_arena.is_super(s));
+        _RYML_CB_ASSERT(m_callbacks, m_arena.sub(0, m_arena_pos).is_super(s));
+        auto pos = (s.str - m_arena.str); // this is larger than 0 based on the assertions above
+        substr r(next_arena.str + pos, s.len);
+        _RYML_CB_ASSERT(m_callbacks, r.str - next_arena.str == pos);
+        _RYML_CB_ASSERT(m_callbacks, next_arena.sub(0, m_arena_pos).is_super(r));
+        return r;
+    }
 
 public:
 
-    /** @name node property getters */
+    /** @name lookup */
     /** @{ */
 
-    inline NodeType     type() const { _C4RV(); return m_tree->type(m_id); }
-    inline const char*  type_str() const { _C4RV(); RYML_ASSERT(valid() && ! is_seed()); return m_tree->type_str(m_id); }
-
-    inline csubstr    key()        const { _C4RV(); return m_tree->key(m_id); }
-    inline csubstr    key_tag()    const { _C4RV(); return m_tree->key_tag(m_id); }
-    inline csubstr    key_ref()    const { _C4RV(); return m_tree->key_ref(m_id); }
-    inline csubstr    key_anchor() const { _C4RV(); return m_tree->key_anchor(m_id); }
-    inline NodeScalar keysc()      const { _C4RV(); return m_tree->keysc(m_id); }
-
-    inline csubstr    val()        const { _C4RV(); return m_tree->val(m_id); }
-    inline csubstr    val_tag()    const { _C4RV(); return m_tree->val_tag(m_id); }
-    inline csubstr    val_ref()    const { _C4RV(); return m_tree->val_ref(m_id); }
-    inline csubstr    val_anchor() const { _C4RV(); return m_tree->val_anchor(m_id); }
-    inline NodeScalar valsc()      const { _C4RV(); return m_tree->valsc(m_id); }
+    struct lookup_result
+    {
+        id_type  target;
+        id_type  closest;
+        size_t  path_pos;
+        csubstr path;
 
-    inline bool key_is_null() const { _C4RV(); return m_tree->key_is_null(m_id); }
-    inline bool val_is_null() const { _C4RV(); return m_tree->val_is_null(m_id); }
+        inline operator bool() const { return target != NONE; }
 
-    /** decode the base64-encoded key deserialize and assign the
-     * decoded blob to the given buffer/
-     * @return the size of base64-decoded blob */
-    size_t deserialize_key(fmt::base64_wrapper v) const;
-    /** decode the base64-encoded key deserialize and assign the
-     * decoded blob to the given buffer/
-     * @return the size of base64-decoded blob */
-    size_t deserialize_val(fmt::base64_wrapper v) const;
+        lookup_result() : target(NONE), closest(NONE), path_pos(0), path() {}
+        lookup_result(csubstr path_, id_type start) : target(NONE), closest(start), path_pos(0), path(path_) {}
 
-    /** @} */
+        /** get the part ot the input path that was resolved */
+        csubstr resolved() const;
+        /** get the part ot the input path that was unresolved */
+        csubstr unresolved() const;
+    };
 
-public:
+    /** for example foo.bar[0].baz */
+    lookup_result lookup_path(csubstr path, id_type start=NONE) const;
 
-    /** @name node property predicates */
-    /** @{ */
+    /** defaulted lookup: lookup @p path; if the lookup fails, recursively modify
+     * the tree so that the corresponding lookup_path() would return the
+     * default value.
+     * @see lookup_path() */
+    id_type lookup_path_or_modify(csubstr default_value, csubstr path, id_type start=NONE);
 
-    C4_ALWAYS_INLINE bool is_stream()        const { _C4RV(); return m_tree->is_stream(m_id); }
-    C4_ALWAYS_INLINE bool is_doc()           const { _C4RV(); return m_tree->is_doc(m_id); }
-    C4_ALWAYS_INLINE bool is_container()     const { _C4RV(); return m_tree->is_container(m_id); }
-    C4_ALWAYS_INLINE bool is_map()           const { _C4RV(); return m_tree->is_map(m_id); }
-    C4_ALWAYS_INLINE bool is_seq()           const { _C4RV(); return m_tree->is_seq(m_id); }
-    C4_ALWAYS_INLINE bool has_val()          const { _C4RV(); return m_tree->has_val(m_id); }
-    C4_ALWAYS_INLINE bool has_key()          const { _C4RV(); return m_tree->has_key(m_id); }
-    C4_ALWAYS_INLINE bool is_val()           const { _C4RV(); return m_tree->is_val(m_id); }
-    C4_ALWAYS_INLINE bool is_keyval()        const { _C4RV(); return m_tree->is_keyval(m_id); }
-    C4_ALWAYS_INLINE bool has_key_tag()      const { _C4RV(); return m_tree->has_key_tag(m_id); }
-    C4_ALWAYS_INLINE bool has_val_tag()      const { _C4RV(); return m_tree->has_val_tag(m_id); }
-    C4_ALWAYS_INLINE bool has_key_anchor()   const { _C4RV(); return m_tree->has_key_anchor(m_id); }
-    C4_ALWAYS_INLINE bool is_key_anchor()    const { _C4RV(); return m_tree->is_key_anchor(m_id); }
-    C4_ALWAYS_INLINE bool has_val_anchor()   const { _C4RV(); return m_tree->has_val_anchor(m_id); }
-    C4_ALWAYS_INLINE bool is_val_anchor()    const { _C4RV(); return m_tree->is_val_anchor(m_id); }
-    C4_ALWAYS_INLINE bool has_anchor()       const { _C4RV(); return m_tree->has_anchor(m_id); }
-    C4_ALWAYS_INLINE bool is_anchor()        const { _C4RV(); return m_tree->is_anchor(m_id); }
-    C4_ALWAYS_INLINE bool is_key_ref()       const { _C4RV(); return m_tree->is_key_ref(m_id); }
-    C4_ALWAYS_INLINE bool is_val_ref()       const { _C4RV(); return m_tree->is_val_ref(m_id); }
-    C4_ALWAYS_INLINE bool is_ref()           const { _C4RV(); return m_tree->is_ref(m_id); }
-    C4_ALWAYS_INLINE bool is_anchor_or_ref() const { _C4RV(); return m_tree->is_anchor_or_ref(m_id); }
-    C4_ALWAYS_INLINE bool is_key_quoted()    const { _C4RV(); return m_tree->is_key_quoted(m_id); }
-    C4_ALWAYS_INLINE bool is_val_quoted()    const { _C4RV(); return m_tree->is_val_quoted(m_id); }
-    C4_ALWAYS_INLINE bool is_quoted()        const { _C4RV(); return m_tree->is_quoted(m_id); }
-
-    C4_ALWAYS_INLINE bool parent_is_seq()    const { _C4RV(); return m_tree->parent_is_seq(m_id); }
-    C4_ALWAYS_INLINE bool parent_is_map()    const { _C4RV(); return m_tree->parent_is_map(m_id); }
-
-    /** true when name and value are empty, and has no children */
-    C4_ALWAYS_INLINE bool empty() const { _C4RV(); return m_tree->empty(m_id); }
+    /** defaulted lookup: lookup @p path; if the lookup fails, recursively modify
+     * the tree so that the corresponding lookup_path() would return the
+     * branch @p src_node (from the tree @p src).
+     * @see lookup_path() */
+    id_type lookup_path_or_modify(Tree const *src, id_type src_node, csubstr path, id_type start=NONE);
 
     /** @} */
 
-public:
+private:
 
-    /** @name hierarchy predicates */
-    /** @{ */
+    struct _lookup_path_token
+    {
+        csubstr value;
+        NodeType type;
+        _lookup_path_token() : value(), type() {}
+        _lookup_path_token(csubstr v, NodeType t) : value(v), type(t) {}
+        inline operator bool() const { return type != NOTYPE; }
+        bool is_index() const { return value.begins_with('[') && value.ends_with(']'); }
+    };
 
-    inline bool is_root()    const { _C4RV(); return m_tree->is_root(m_id); }
-    inline bool has_parent() const { _C4RV(); return m_tree->has_parent(m_id); }
+    id_type _lookup_path_or_create(csubstr path, id_type start);
 
-    inline bool has_child(NodeRef const& ch) const { _C4RV(); return m_tree->has_child(m_id, ch.m_id); }
-    inline bool has_child(csubstr name) const { _C4RV();  return m_tree->has_child(m_id, name); }
-    inline bool has_children() const { _C4RV(); return m_tree->has_children(m_id); }
+    void   _lookup_path       (lookup_result *r) const;
+    void   _lookup_path_modify(lookup_result *r);
 
-    inline bool has_sibling(NodeRef const& n) const { _C4RV(); return m_tree->has_sibling(m_id, n.m_id); }
-    inline bool has_sibling(csubstr name) const { _C4RV();  return m_tree->has_sibling(m_id, name); }
-    /** counts with this */
-    inline bool has_siblings() const { _C4RV(); return m_tree->has_siblings(m_id); }
-    /** does not count with this */
-    inline bool has_other_siblings() const { _C4RV(); return m_tree->has_other_siblings(m_id); }
+    id_type _next_node       (lookup_result *r, _lookup_path_token *parent) const;
+    id_type _next_node_modify(lookup_result *r, _lookup_path_token *parent);
 
-    /** @} */
+    void   _advance(lookup_result *r, size_t more) const;
 
-public:
+    _lookup_path_token _next_token(lookup_result *r, _lookup_path_token const& parent) const;
 
-    /** @name hierarchy getters */
-    /** @{ */
+private:
 
-    NodeRef       parent()       { _C4RV(); return {m_tree, m_tree->parent(m_id)}; }
-    NodeRef const parent() const { _C4RV(); return {m_tree, m_tree->parent(m_id)}; }
+    void _clear();
+    void _free();
+    void _copy(Tree const& that);
+    void _move(Tree      & that) noexcept;
 
-    NodeRef       prev_sibling()       { _C4RV(); return {m_tree, m_tree->prev_sibling(m_id)}; }
-    NodeRef const prev_sibling() const { _C4RV(); return {m_tree, m_tree->prev_sibling(m_id)}; }
+    void _relocate(substr next_arena);
 
-    NodeRef       next_sibling()       { _C4RV(); return {m_tree, m_tree->next_sibling(m_id)}; }
-    NodeRef const next_sibling() const { _C4RV(); return {m_tree, m_tree->next_sibling(m_id)}; }
+public:
 
-    /** O(#num_children) */
-    size_t  num_children() const { _C4RV(); return m_tree->num_children(m_id); }
-    size_t  child_pos(NodeRef const& n) const { _C4RV(); return m_tree->child_pos(m_id, n.m_id); }
-    NodeRef       first_child()       { _C4RV(); return {m_tree, m_tree->first_child(m_id)}; }
-    NodeRef const first_child() const { _C4RV(); return {m_tree, m_tree->first_child(m_id)}; }
-    NodeRef       last_child ()       { _C4RV(); return {m_tree, m_tree->last_child (m_id)}; }
-    NodeRef const last_child () const { _C4RV(); return {m_tree, m_tree->last_child (m_id)}; }
-    NodeRef       child(size_t pos)       { _C4RV(); return {m_tree, m_tree->child(m_id, pos)}; }
-    NodeRef const child(size_t pos) const { _C4RV(); return {m_tree, m_tree->child(m_id, pos)}; }
-    NodeRef       find_child(csubstr name)       { _C4RV(); return {m_tree, m_tree->find_child(m_id, name)}; }
-    NodeRef const find_child(csubstr name) const { _C4RV(); return {m_tree, m_tree->find_child(m_id, name)}; }
+    /** @cond dev*/
 
-    /** O(#num_siblings) */
-    size_t  num_siblings() const { _C4RV(); return m_tree->num_siblings(m_id); }
-    size_t  num_other_siblings() const { _C4RV(); return m_tree->num_other_siblings(m_id); }
-    size_t  sibling_pos(NodeRef const& n) const { _C4RV(); return m_tree->child_pos(m_tree->parent(m_id), n.m_id); }
-    NodeRef       first_sibling()       { _C4RV(); return {m_tree, m_tree->first_sibling(m_id)}; }
-    NodeRef const first_sibling() const { _C4RV(); return {m_tree, m_tree->first_sibling(m_id)}; }
-    NodeRef       last_sibling ()       { _C4RV(); return {m_tree, m_tree->last_sibling(m_id)}; }
-    NodeRef const last_sibling () const { _C4RV(); return {m_tree, m_tree->last_sibling(m_id)}; }
-    NodeRef       sibling(size_t pos)       { _C4RV(); return {m_tree, m_tree->sibling(m_id, pos)}; }
-    NodeRef const sibling(size_t pos) const { _C4RV(); return {m_tree, m_tree->sibling(m_id, pos)}; }
-    NodeRef       find_sibling(csubstr name)       { _C4RV(); return {m_tree, m_tree->find_sibling(m_id, name)}; }
-    NodeRef const find_sibling(csubstr name) const { _C4RV(); return {m_tree, m_tree->find_sibling(m_id, name)}; }
-
-    NodeRef       doc(size_t num)       { _C4RV(); return {m_tree, m_tree->doc(num)}; }
-    NodeRef const doc(size_t num) const { _C4RV(); return {m_tree, m_tree->doc(num)}; }
+    #if ! RYML_USE_ASSERT
+    C4_ALWAYS_INLINE void _check_next_flags(id_type, type_bits) {}
+    #else
+    void _check_next_flags(id_type node, type_bits f)
+    {
+        auto n = _p(node);
+        type_bits o = n->m_type; // old
+        C4_UNUSED(o);
+        if(f & MAP)
+        {
+            RYML_ASSERT_MSG((f & SEQ) == 0, "cannot mark simultaneously as map and seq");
+            RYML_ASSERT_MSG((f & VAL) == 0, "cannot mark simultaneously as map and val");
+            RYML_ASSERT_MSG((o & SEQ) == 0, "cannot turn a seq into a map; clear first");
+            RYML_ASSERT_MSG((o & VAL) == 0, "cannot turn a val into a map; clear first");
+        }
+        else if(f & SEQ)
+        {
+            RYML_ASSERT_MSG((f & MAP) == 0, "cannot mark simultaneously as seq and map");
+            RYML_ASSERT_MSG((f & VAL) == 0, "cannot mark simultaneously as seq and val");
+            RYML_ASSERT_MSG((o & MAP) == 0, "cannot turn a map into a seq; clear first");
+            RYML_ASSERT_MSG((o & VAL) == 0, "cannot turn a val into a seq; clear first");
+        }
+        if(f & KEY)
+        {
+            _RYML_CB_ASSERT(m_callbacks, !is_root(node));
+            auto pid = parent(node); C4_UNUSED(pid);
+            _RYML_CB_ASSERT(m_callbacks, is_map(pid));
+        }
+        if((f & VAL) && !is_root(node))
+        {
+            auto pid = parent(node); C4_UNUSED(pid);
+            _RYML_CB_ASSERT(m_callbacks, is_map(pid) || is_seq(pid));
+        }
+    }
+    #endif
 
-    /** @} */
+    inline void _set_flags(id_type node, NodeType_e f) { _check_next_flags(node, f); _p(node)->m_type = f; }
+    inline void _set_flags(id_type node, type_bits  f) { _check_next_flags(node, f); _p(node)->m_type = f; }
 
-public:
+    inline void _add_flags(id_type node, NodeType_e f) { NodeData *d = _p(node); type_bits fb = f |  d->m_type; _check_next_flags(node, fb); d->m_type = (NodeType_e) fb; }
+    inline void _add_flags(id_type node, type_bits  f) { NodeData *d = _p(node);                f |= d->m_type; _check_next_flags(node,  f); d->m_type = f; }
 
-    /** @name node modifiers */
-    /** @{ */
+    inline void _rem_flags(id_type node, NodeType_e f) { NodeData *d = _p(node); type_bits fb = d->m_type & ~f; _check_next_flags(node, fb); d->m_type = (NodeType_e) fb; }
+    inline void _rem_flags(id_type node, type_bits  f) { NodeData *d = _p(node);            f = d->m_type & ~f; _check_next_flags(node,  f); d->m_type = f; }
 
-    void change_type(NodeType t) { _C4RV(); m_tree->change_type(m_id, t); }
-    void set_type(NodeType t) { _C4RV(); m_tree->_set_flags(m_id, t); }
-    void set_key(csubstr key) { _C4RV(); m_tree->_set_key(m_id, key); }
-    void set_val(csubstr val) { _C4RV(); m_tree->_set_val(m_id, val); }
-    void set_key_tag(csubstr key_tag) { _C4RV(); m_tree->set_key_tag(m_id, key_tag); }
-    void set_val_tag(csubstr val_tag) { _C4RV(); m_tree->set_val_tag(m_id, val_tag); }
-    void set_key_anchor(csubstr key_anchor) { _C4RV(); m_tree->set_key_anchor(m_id, key_anchor); }
-    void set_val_anchor(csubstr val_anchor) { _C4RV(); m_tree->set_val_anchor(m_id, val_anchor); }
-    void set_key_ref(csubstr key_ref) { _C4RV(); m_tree->set_key_ref(m_id, key_ref); }
-    void set_val_ref(csubstr val_ref) { _C4RV(); m_tree->set_val_ref(m_id, val_ref); }
+    void _set_key(id_type node, csubstr key, type_bits more_flags=0)
+    {
+        _p(node)->m_key.scalar = key;
+        _add_flags(node, KEY|more_flags);
+    }
+    void _set_key(id_type node, NodeScalar const& key, type_bits more_flags=0)
+    {
+        _p(node)->m_key = key;
+        _add_flags(node, KEY|more_flags);
+    }
 
-    template<class T>
-    size_t set_key_serialized(T const& C4_RESTRICT k)
+    void _set_val(id_type node, csubstr val, type_bits more_flags=0)
     {
-        _C4RV();
-        csubstr s = m_tree->to_arena(k);
-        m_tree->_set_key(m_id, s);
-        return s.len;
+        _RYML_CB_ASSERT(m_callbacks, num_children(node) == 0);
+        _RYML_CB_ASSERT(m_callbacks, !is_seq(node) && !is_map(node));
+        _p(node)->m_val.scalar = val;
+        _add_flags(node, VAL|more_flags);
     }
-    template<class T>
-    size_t set_val_serialized(T const& C4_RESTRICT v)
+    void _set_val(id_type node, NodeScalar const& val, type_bits more_flags=0)
     {
-        _C4RV();
-        csubstr s = m_tree->to_arena(v);
-        m_tree->_set_val(m_id, s);
-        return s.len;
+        _RYML_CB_ASSERT(m_callbacks, num_children(node) == 0);
+        _RYML_CB_ASSERT(m_callbacks,  ! is_container(node));
+        _p(node)->m_val = val;
+        _add_flags(node, VAL|more_flags);
     }
 
-    /** encode a blob as base64, then assign the result to the node's key
-     * @return the size of base64-encoded blob */
-    size_t set_key_serialized(fmt::const_base64_wrapper w);
-    /** encode a blob as base64, then assign the result to the node's val
-     * @return the size of base64-encoded blob */
-    size_t set_val_serialized(fmt::const_base64_wrapper w);
-
-public:
-
-    inline void clear()
-    {
-        if(is_seed())
-            return;
-        m_tree->remove_children(m_id);
-        m_tree->_clear(m_id);
-    }
-
-    inline void clear_key()
+    void _set(id_type node, NodeInit const& i)
     {
-        if(is_seed())
-            return;
-        m_tree->_clear_key(m_id);
+        _RYML_CB_ASSERT(m_callbacks, i._check());
+        NodeData *n = _p(node);
+        _RYML_CB_ASSERT(m_callbacks, n->m_key.scalar.empty() || i.key.scalar.empty() || i.key.scalar == n->m_key.scalar);
+        _add_flags(node, i.type);
+        if(n->m_key.scalar.empty())
+        {
+            if( ! i.key.scalar.empty())
+            {
+                _set_key(node, i.key.scalar);
+            }
+        }
+        n->m_key.tag = i.key.tag;
+        n->m_val = i.val;
     }
 
-    inline void clear_val()
+    void _set_parent_as_container_if_needed(id_type in)
     {
-        if(is_seed())
-            return;
-        m_tree->_clear_val(m_id);
+        NodeData const* n = _p(in);
+        id_type ip = parent(in);
+        if(ip != NONE)
+        {
+            if( ! (is_seq(ip) || is_map(ip)))
+            {
+                if((in == first_child(ip)) && (in == last_child(ip)))
+                {
+                    if( ! n->m_key.empty() || has_key(in))
+                    {
+                        _add_flags(ip, MAP);
+                    }
+                    else
+                    {
+                        _add_flags(ip, SEQ);
+                    }
+                }
+            }
+        }
     }
 
-    inline void clear_children()
+    void _seq2map(id_type node)
     {
-        if(is_seed())
-            return;
-        m_tree->remove_children(m_id);
+        _RYML_CB_ASSERT(m_callbacks, is_seq(node));
+        for(id_type i = first_child(node); i != NONE; i = next_sibling(i))
+        {
+            NodeData *C4_RESTRICT ch = _p(i);
+            if(ch->m_type.is_keyval())
+                continue;
+            ch->m_type.add(KEY);
+            ch->m_key = ch->m_val;
+        }
+        auto *C4_RESTRICT n = _p(node);
+        n->m_type.rem(SEQ);
+        n->m_type.add(MAP);
     }
 
-    /** @} */
-
-public:
+    id_type _do_reorder(id_type *node, id_type count);
 
-    /** hierarchy getters */
-    /** @{ */
+    void _swap(id_type n_, id_type m_);
+    void _swap_props(id_type n_, id_type m_);
+    void _swap_hierarchy(id_type n_, id_type m_);
+    void _copy_hierarchy(id_type dst_, id_type src_);
 
-    /** O(num_children) */
-    NodeRef operator[] (csubstr k)
+    inline void _copy_props(id_type dst_, id_type src_)
     {
-        RYML_ASSERT( ! is_seed());
-        RYML_ASSERT(valid());
-        size_t ch = m_tree->find_child(m_id, k);
-        NodeRef r = ch != NONE ? NodeRef(m_tree, ch) : NodeRef(m_tree, m_id, k);
-        return r;
+        _copy_props(dst_, this, src_);
     }
 
-    /** O(num_children) */
-    NodeRef const operator[] (csubstr k) const
+    inline void _copy_props_wo_key(id_type dst_, id_type src_)
     {
-        RYML_ASSERT( ! is_seed());
-        RYML_ASSERT(valid());
-        size_t ch = m_tree->find_child(m_id, k);
-        RYML_ASSERT(ch != NONE);
-        NodeRef const r(m_tree, ch);
-        return r;
+        _copy_props_wo_key(dst_, this, src_);
     }
 
-    /** O(num_children) */
-    NodeRef operator[] (size_t pos)
+    void _copy_props(id_type dst_, Tree const* that_tree, id_type src_)
     {
-        RYML_ASSERT( ! is_seed());
-        RYML_ASSERT(valid());
-        size_t ch = m_tree->child(m_id, pos);
-        NodeRef r = ch != NONE ? NodeRef(m_tree, ch) : NodeRef(m_tree, m_id, pos);
-        return r;
+        auto      & C4_RESTRICT dst = *_p(dst_);
+        auto const& C4_RESTRICT src = *that_tree->_p(src_);
+        dst.m_type = src.m_type;
+        dst.m_key  = src.m_key;
+        dst.m_val  = src.m_val;
     }
 
-    /** O(num_children) */
-    NodeRef const operator[] (size_t pos) const
+    void _copy_props(id_type dst_, Tree const* that_tree, id_type src_, type_bits src_mask)
     {
-        RYML_ASSERT( ! is_seed());
-        RYML_ASSERT(valid());
-        size_t ch = m_tree->child(m_id, pos);
-        RYML_ASSERT(ch != NONE);
-        NodeRef const r(m_tree, ch);
-        return r;
+        auto      & C4_RESTRICT dst = *_p(dst_);
+        auto const& C4_RESTRICT src = *that_tree->_p(src_);
+        dst.m_type = (src.m_type & src_mask) | (dst.m_type & ~src_mask);
+        dst.m_key  = src.m_key;
+        dst.m_val  = src.m_val;
     }
 
-    /** @} */
-
-public:
-
-    /** node modification */
-    /** @{ */
-
-    void create() { _apply_seed(); }
-
-    inline void operator= (NodeType_e t)
+    void _copy_props_wo_key(id_type dst_, Tree const* that_tree, id_type src_)
     {
-        _apply_seed();
-        m_tree->_add_flags(m_id, t);
+        auto      & C4_RESTRICT dst = *_p(dst_);
+        auto const& C4_RESTRICT src = *that_tree->_p(src_);
+        dst.m_type = (src.m_type & ~_KEYMASK) | (dst.m_type & _KEYMASK);
+        dst.m_val  = src.m_val;
     }
 
-    inline void operator|= (NodeType_e t)
+    void _copy_props_wo_key(id_type dst_, Tree const* that_tree, id_type src_, type_bits src_mask)
     {
-        _apply_seed();
-        m_tree->_add_flags(m_id, t);
+        auto      & C4_RESTRICT dst = *_p(dst_);
+        auto const& C4_RESTRICT src = *that_tree->_p(src_);
+        dst.m_type = (src.m_type & ((~_KEYMASK)|src_mask)) | (dst.m_type & (_KEYMASK|~src_mask));
+        dst.m_val  = src.m_val;
     }
 
-    inline void operator= (NodeInit const& v)
+    inline void _clear_type(id_type node)
     {
-        _apply_seed();
-        _apply(v);
+        _p(node)->m_type = NOTYPE;
     }
 
-    inline void operator= (NodeScalar const& v)
+    inline void _clear(id_type node)
     {
-        _apply_seed();
-        _apply(v);
+        auto *C4_RESTRICT n = _p(node);
+        n->m_type = NOTYPE;
+        n->m_key.clear();
+        n->m_val.clear();
+        n->m_parent = NONE;
+        n->m_first_child = NONE;
+        n->m_last_child = NONE;
     }
 
-    inline void operator= (csubstr v)
+    inline void _clear_key(id_type node)
     {
-        _apply_seed();
-        _apply(v);
+        _p(node)->m_key.clear();
+        _rem_flags(node, KEY);
     }
 
-    template<size_t N>
-    inline void operator= (const char (&v)[N])
+    inline void _clear_val(id_type node)
     {
-        _apply_seed();
-        csubstr sv;
-        sv.assign<N>(v);
-        _apply(sv);
+        _p(node)->m_val.clear();
+        _rem_flags(node, VAL);
     }
 
-    /** @} */
-
-public:
+    /** @endcond */
 
-    /** serialize a variable to the arena */
-    template<class T>
-    inline csubstr to_arena(T const& C4_RESTRICT s) const
-    {
-        _C4RV();
-        return m_tree->to_arena(s);
-    }
+private:
 
-    /** serialize a variable, then assign the result to the node's val */
-    inline NodeRef& operator<< (csubstr s)
-    {
-        // this overload is needed to prevent ambiguity (there's also
-        // operator<< for writing a substr to a stream)
-        _apply_seed();
-        write(this, s);
-        RYML_ASSERT(val() == s);
-        return *this;
-    }
+    void _clear_range(id_type first, id_type num);
 
-    template<class T>
-    inline NodeRef& operator<< (T const& C4_RESTRICT v)
-    {
-        _apply_seed();
-        write(this, v);
-        return *this;
-    }
+public:
+    id_type _claim();
+private:
+    void   _claim_root();
+    void   _release(id_type node);
+    void   _free_list_add(id_type node);
+    void   _free_list_rem(id_type node);
 
-    template<class T>
-    inline NodeRef const& operator>> (T &v) const
-    {
-        RYML_ASSERT( ! is_seed());
-        RYML_ASSERT(valid());
-        RYML_ASSERT(get() != nullptr);
-        if( ! read(*this, &v))
-        {
-            c4::yml::error("could not deserialize value");
-        }
-        return *this;
-    }
+    void _set_hierarchy(id_type node, id_type parent, id_type after_sibling);
+    void _rem_hierarchy(id_type node);
 
 public:
 
-    /** serialize a variable, then assign the result to the node's key */
-    template<class T>
-    inline NodeRef& operator<< (Key<const T> const& C4_RESTRICT v)
-    {
-        _apply_seed();
-        set_key_serialized(v.k);
-        return *this;
-    }
+    // members are exposed, but you should NOT access them directly
 
-    /** serialize a variable, then assign the result to the node's key */
-    template<class T>
-    inline NodeRef& operator<< (Key<T> const& C4_RESTRICT v)
-    {
-        _apply_seed();
-        set_key_serialized(v.k);
-        return *this;
-    }
+    NodeData *m_buf;
+    id_type   m_cap;
 
-    /** deserialize the node's key to the given variable */
-    template<class T>
-    inline NodeRef const& operator>> (Key<T> v) const
-    {
-        RYML_ASSERT( ! is_seed());
-        RYML_ASSERT(valid());
-        RYML_ASSERT(get() != nullptr);
-        from_chars(key(), &v.k);
-        return *this;
-    }
+    id_type m_size;
 
-public:
+    id_type m_free_head;
+    id_type m_free_tail;
 
-    NodeRef& operator<< (Key<fmt::const_base64_wrapper> w)
-    {
-        set_key_serialized(w.wrapper);
-        return *this;
-    }
+    substr m_arena;
+    size_t m_arena_pos;
 
-    NodeRef& operator<< (fmt::const_base64_wrapper w)
-    {
-        set_val_serialized(w);
-        return *this;
-    }
+    Callbacks m_callbacks;
 
-    NodeRef const& operator>> (Key<fmt::base64_wrapper> w) const
-    {
-        deserialize_key(w.wrapper);
-        return *this;
-    }
+    TagDirective m_tag_directives[RYML_MAX_TAG_DIRECTIVES];
 
-    NodeRef const& operator>> (fmt::base64_wrapper w) const
-    {
-        deserialize_val(w);
-        return *this;
-    }
+};
 
-public:
+/** @} */
 
-    template<class T>
-    void get_if(csubstr name, T *var) const
-    {
-        auto ch = find_child(name);
-        if(ch.valid())
-        {
-            ch >> *var;
-        }
-    }
+} // namespace yml
+} // namespace c4
 
-    template<class T>
-    void get_if(csubstr name, T *var, T fallback) const
-    {
-        auto ch = find_child(name);
-        if(ch.valid())
-        {
-            ch >> *var;
-        }
-        else
-        {
-            *var = fallback;
-        }
-    }
 
-private:
+C4_SUPPRESS_WARNING_MSVC_POP
+C4_SUPPRESS_WARNING_GCC_CLANG_POP
 
-    void _apply_seed()
-    {
-        if(m_seed.str) // we have a seed key: use it to create the new child
-        {
-            //RYML_ASSERT(i.key.scalar.empty() || m_key == i.key.scalar || m_key.empty());
-            m_id = m_tree->append_child(m_id);
-            m_tree->_set_key(m_id, m_seed);
-            m_seed.str = nullptr;
-            m_seed.len = NONE;
-        }
-        else if(m_seed.len != NONE) // we have a seed index: create a child at that position
-        {
-            RYML_ASSERT(m_tree->num_children(m_id) == m_seed.len);
-            m_id = m_tree->append_child(m_id);
-            m_seed.str = nullptr;
-            m_seed.len = NONE;
-        }
-        else
-        {
-            RYML_ASSERT(valid());
-        }
-    }
 
-    inline void _apply(csubstr v)
-    {
-        m_tree->_set_val(m_id, v);
-    }
+#endif /* _C4_YML_TREE_HPP_ */
 
-    inline void _apply(NodeScalar const& v)
-    {
-        m_tree->_set_val(m_id, v);
-    }
 
-    inline void _apply(NodeInit const& i)
-    {
-        m_tree->_set(m_id, i);
-    }
-
-public:
-
-    inline NodeRef insert_child(NodeRef after)
-    {
-        _C4RV();
-        RYML_ASSERT(after.m_tree == m_tree);
-        NodeRef r(m_tree, m_tree->insert_child(m_id, after.m_id));
-        return r;
-    }
-
-    inline NodeRef insert_child(NodeInit const& i, NodeRef after)
-    {
-        _C4RV();
-        RYML_ASSERT(after.m_tree == m_tree);
-        NodeRef r(m_tree, m_tree->insert_child(m_id, after.m_id));
-        r._apply(i);
-        return r;
-    }
-
-    inline NodeRef prepend_child()
-    {
-        _C4RV();
-        NodeRef r(m_tree, m_tree->insert_child(m_id, NONE));
-        return r;
-    }
-
-    inline NodeRef prepend_child(NodeInit const& i)
-    {
-        _C4RV();
-        NodeRef r(m_tree, m_tree->insert_child(m_id, NONE));
-        r._apply(i);
-        return r;
-    }
-
-    inline NodeRef append_child()
-    {
-        _C4RV();
-        NodeRef r(m_tree, m_tree->append_child(m_id));
-        return r;
-    }
-
-    inline NodeRef append_child(NodeInit const& i)
-    {
-        _C4RV();
-        NodeRef r(m_tree, m_tree->append_child(m_id));
-        r._apply(i);
-        return r;
-    }
-
-public:
-
-    inline NodeRef insert_sibling(NodeRef const after)
-    {
-        _C4RV();
-        RYML_ASSERT(after.m_tree == m_tree);
-        NodeRef r(m_tree, m_tree->insert_sibling(m_id, after.m_id));
-        return r;
-    }
-
-    inline NodeRef insert_sibling(NodeInit const& i, NodeRef const after)
-    {
-        _C4RV();
-        RYML_ASSERT(after.m_tree == m_tree);
-        NodeRef r(m_tree, m_tree->insert_sibling(m_id, after.m_id));
-        r._apply(i);
-        return r;
-    }
-
-    inline NodeRef prepend_sibling()
-    {
-        _C4RV();
-        NodeRef r(m_tree, m_tree->prepend_sibling(m_id));
-        return r;
-    }
-
-    inline NodeRef prepend_sibling(NodeInit const& i)
-    {
-        _C4RV();
-        NodeRef r(m_tree, m_tree->prepend_sibling(m_id));
-        r._apply(i);
-        return r;
-    }
-
-    inline NodeRef append_sibling()
-    {
-        _C4RV();
-        NodeRef r(m_tree, m_tree->append_sibling(m_id));
-        return r;
-    }
-
-    inline NodeRef append_sibling(NodeInit const& i)
-    {
-        _C4RV();
-        NodeRef r(m_tree, m_tree->append_sibling(m_id));
-        r._apply(i);
-        return r;
-    }
-
-public:
-
-    inline void remove_child(NodeRef & child)
-    {
-        _C4RV();
-        RYML_ASSERT(has_child(child));
-        RYML_ASSERT(child.parent().id() == id());
-        m_tree->remove(child.id());
-        child.clear();
-    }
-
-    //! remove the nth child of this node
-    inline void remove_child(size_t pos)
-    {
-        _C4RV();
-        RYML_ASSERT(pos >= 0 && pos < num_children());
-        size_t child = m_tree->child(m_id, pos);
-        RYML_ASSERT(child != NONE);
-        m_tree->remove(child);
-    }
-
-    //! remove a child by name
-    inline void remove_child(csubstr key)
-    {
-        _C4RV();
-        size_t child = m_tree->find_child(m_id, key);
-        RYML_ASSERT(child != NONE);
-        m_tree->remove(child);
-    }
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp)
 
-public:
 
-    /** change the node's position within its parent */
-    inline void move(NodeRef const after)
-    {
-        _C4RV();
-        m_tree->move(m_id, after.m_id);
-    }
 
-    /** move the node to a different parent, which may belong to a different
-     * tree. When this is the case, then this node's tree pointer is reset to
-     * the tree of the parent node. */
-    inline void move(NodeRef const parent, NodeRef const after)
-    {
-        _C4RV();
-        RYML_ASSERT(parent.m_tree == after.m_tree);
-        if(parent.m_tree == m_tree)
-        {
-            m_tree->move(m_id, parent.m_id, after.m_id);
-        }
-        else
-        {
-            parent.m_tree->move(m_tree, m_id, parent.m_id, after.m_id);
-            m_tree = parent.m_tree;
-        }
-    }
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/node.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
 
-    inline NodeRef duplicate(NodeRef const parent, NodeRef const after) const
-    {
-        _C4RV();
-        RYML_ASSERT(parent.m_tree == after.m_tree);
-        if(parent.m_tree == m_tree)
-        {
-            size_t dup = m_tree->duplicate(m_id, parent.m_id, after.m_id);
-            NodeRef r(m_tree, dup);
-            return r;
-        }
-        else
-        {
-            size_t dup = parent.m_tree->duplicate(m_tree, m_id, parent.m_id, after.m_id);
-            NodeRef r(parent.m_tree, dup);
-            return r;
-        }
-    }
+#ifndef _C4_YML_NODE_HPP_
+#define _C4_YML_NODE_HPP_
 
-    inline void duplicate_children(NodeRef const parent, NodeRef const after) const
-    {
-        _C4RV();
-        RYML_ASSERT(parent.m_tree == after.m_tree);
-        if(parent.m_tree == m_tree)
-        {
-            m_tree->duplicate_children(m_id, parent.m_id, after.m_id);
-        }
-        else
-        {
-            parent.m_tree->duplicate_children(m_tree, m_id, parent.m_id, after.m_id);
-        }
-    }
+/** @file node.hpp Node classes */
 
-private:
+//included above:
+//#include <cstddef>
 
-    template<class Nd>
-    struct child_iterator
-    {
-        Tree * m_tree;
-        size_t m_child_id;
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp
+//#include "c4/yml/tree.hpp"
+#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_)
+#error "amalgamate: file c4/yml/tree.hpp must have been included at this point"
+#endif /* C4_YML_TREE_HPP_ */
 
-        using value_type = NodeRef;
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/base64.hpp
+//#include "c4/base64.hpp"
+#if !defined(C4_BASE64_HPP_) && !defined(_C4_BASE64_HPP_)
+#error "amalgamate: file c4/base64.hpp must have been included at this point"
+#endif /* C4_BASE64_HPP_ */
 
-        child_iterator(Tree * t, size_t id) : m_tree(t), m_child_id(id) {}
 
-        child_iterator& operator++ () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->next_sibling(m_child_id); return *this; }
-        child_iterator& operator-- () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->prev_sibling(m_child_id); return *this; }
+#ifdef __clang__
+#   pragma clang diagnostic push
+#   pragma clang diagnostic ignored "-Wtype-limits"
+#   pragma clang diagnostic ignored "-Wold-style-cast"
+#elif defined(__GNUC__)
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wtype-limits"
+#   pragma GCC diagnostic ignored "-Wold-style-cast"
+#   pragma GCC diagnostic ignored "-Wuseless-cast"
+#elif defined(_MSC_VER)
+#   pragma warning(push)
+#   pragma warning(disable: 4251/*needs to have dll-interface to be used by clients of struct*/)
+#   pragma warning(disable: 4296/*expression is always 'boolean_value'*/)
+#endif
 
-        Nd operator*  () const { return Nd(m_tree, m_child_id); }
-        Nd operator-> () const { return Nd(m_tree, m_child_id); }
+namespace c4 {
+namespace yml {
 
-        bool operator!= (child_iterator that) const { RYML_ASSERT(m_tree == that.m_tree); return m_child_id != that.m_child_id; }
-        bool operator== (child_iterator that) const { RYML_ASSERT(m_tree == that.m_tree); return m_child_id == that.m_child_id; }
-    };
+/** @addtogroup doc_node_classes
+ *
+ * @{
+ */
 
-public:
 
-    using       iterator = child_iterator<      NodeRef>;
-    using const_iterator = child_iterator<const NodeRef>;
+/** @defgroup doc_serialization_helpers Serialization helpers
+ *
+ * @{
+ */
+template<class K> struct Key { K & k; };
+template<> struct Key<fmt::const_base64_wrapper> { fmt::const_base64_wrapper wrapper; };
+template<> struct Key<fmt::base64_wrapper> { fmt::base64_wrapper wrapper; };
 
-    inline iterator begin() { return iterator(m_tree, m_tree->first_child(m_id)); }
-    inline iterator end  () { return iterator(m_tree, NONE); }
+template<class K> C4_ALWAYS_INLINE Key<K> key(K & k) { return Key<K>{k}; }
+C4_ALWAYS_INLINE Key<fmt::const_base64_wrapper> key(fmt::const_base64_wrapper w) { return {w}; }
+C4_ALWAYS_INLINE Key<fmt::base64_wrapper> key(fmt::base64_wrapper w) { return {w}; }
 
-    inline const_iterator begin() const { return const_iterator(m_tree, m_tree->first_child(m_id)); }
-    inline const_iterator end  () const { return const_iterator(m_tree, NONE); }
+template<class T> void write(NodeRef *n, T const& v);
 
-private:
+template<class T>
+typename std::enable_if< ! std::is_floating_point<T>::value, bool>::type
+read(NodeRef const& n, T *v);
 
-    template<class Nd>
-    struct children_view_
-    {
-        using n_iterator = child_iterator<Nd>;
+template<class T>
+typename std::enable_if<   std::is_floating_point<T>::value, bool>::type
+read(NodeRef const& n, T *v);
 
-        n_iterator b, e;
+/** @} */
 
-        inline children_view_(n_iterator const& b_, n_iterator const& e_) : b(b_), e(e_) {}
 
-        inline n_iterator begin() const { return b; }
-        inline n_iterator end  () const { return e; }
-    };
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 
-public:
+// forward decls
+class NodeRef;
+class ConstNodeRef;
 
-    using       children_view = children_view_<      NodeRef>;
-    using const_children_view = children_view_<const NodeRef>;
 
-          children_view children()       { return       children_view(begin(), end()); }
-    const_children_view children() const { return const_children_view(begin(), end()); }
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 
-    #if defined(__clang__)
-    #   pragma clang diagnostic push
-    #   pragma clang diagnostic ignored "-Wnull-dereference"
-    #elif defined(__GNUC__)
-    #   pragma GCC diagnostic push
-    #   if __GNUC__ >= 6
-    #       pragma GCC diagnostic ignored "-Wnull-dereference"
-    #   endif
-    #endif
+/** @cond dev */
+namespace detail {
 
-          children_view siblings()       { if(is_root()) { return       children_view(end(), end()); } else { size_t p = get()->m_parent; return       children_view(iterator(m_tree, m_tree->get(p)->m_first_child), iterator(m_tree, NONE)); } }
-    const_children_view siblings() const { if(is_root()) { return const_children_view(end(), end()); } else { size_t p = get()->m_parent; return const_children_view(const_iterator(m_tree, m_tree->get(p)->m_first_child), const_iterator(m_tree, NONE)); } }
+template<class NodeRefType>
+struct child_iterator
+{
+    using value_type = NodeRefType;
+    using tree_type = typename NodeRefType::tree_type;
 
-    #if defined(__clang__)
-    #   pragma clang diagnostic pop
-    #elif defined(__GNUC__)
-    #   pragma GCC diagnostic pop
-    #endif
+    tree_type * C4_RESTRICT m_tree;
+    id_type m_child_id;
 
-public:
+    child_iterator(tree_type * t, id_type id) : m_tree(t), m_child_id(id) {}
 
-    /** visit every child node calling fn(node) */
-    template<class Visitor> bool visit(Visitor fn, size_t indentation_level=0, bool skip_root=true);
-    /** visit every child node calling fn(node) */
-    template<class Visitor> bool visit(Visitor fn, size_t indentation_level=0, bool skip_root=true) const;
+    child_iterator& operator++ () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->next_sibling(m_child_id); return *this; }
+    child_iterator& operator-- () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->prev_sibling(m_child_id); return *this; }
 
-    /** visit every child node calling fn(node, level) */
-    template<class Visitor> bool visit_stacked(Visitor fn, size_t indentation_level=0, bool skip_root=true);
-    /** visit every child node calling fn(node, level) */
-    template<class Visitor> bool visit_stacked(Visitor fn, size_t indentation_level=0, bool skip_root=true) const;
+    NodeRefType operator*  () const { return NodeRefType(m_tree, m_child_id); }
+    NodeRefType operator-> () const { return NodeRefType(m_tree, m_child_id); }
 
-#undef _C4RV
+    bool operator!= (child_iterator that) const { RYML_ASSERT(m_tree == that.m_tree); return m_child_id != that.m_child_id; }
+    bool operator== (child_iterator that) const { RYML_ASSERT(m_tree == that.m_tree); return m_child_id == that.m_child_id; }
 };
 
-//-----------------------------------------------------------------------------
-template<class T>
-inline void write(NodeRef *n, T const& v)
-{
-    n->set_val_serialized(v);
-}
-
-template<class T>
-typename std::enable_if< ! std::is_floating_point<T>::value, bool>::type
-inline read(NodeRef const& n, T *v)
+template<class NodeRefType>
+struct children_view_
 {
-    return from_chars(n.val(), v);
-}
+    using n_iterator = child_iterator<NodeRefType>;
 
-template<class T>
-typename std::enable_if< std::is_floating_point<T>::value, bool>::type
-inline read(NodeRef const& n, T *v)
-{
-    return from_chars_float(n.val(), v);
-}
+    n_iterator b, e;
 
+    inline children_view_(n_iterator const& C4_RESTRICT b_,
+                          n_iterator const& C4_RESTRICT e_) : b(b_), e(e_) {}
 
-//-----------------------------------------------------------------------------
-template<class Visitor>
-bool NodeRef::visit(Visitor fn, size_t indentation_level, bool skip_root)
-{
-    return const_cast<NodeRef const*>(this)->visit(fn, indentation_level, skip_root);
-}
+    inline n_iterator begin() const { return b; }
+    inline n_iterator end  () const { return e; }
+};
 
-template<class Visitor>
-bool NodeRef::visit(Visitor fn, size_t indentation_level, bool skip_root) const
+template<class NodeRefType, class Visitor>
+bool _visit(NodeRefType &node, Visitor fn, id_type indentation_level, bool skip_root=false)
 {
-    size_t increment = 0;
-    if( ! (is_root() && skip_root))
+    id_type increment = 0;
+    if( ! (node.is_root() && skip_root))
     {
-        if(fn(this, indentation_level))
-        {
+        if(fn(node, indentation_level))
             return true;
-        }
         ++increment;
     }
-    if(has_children())
+    if(node.has_children())
     {
-        for(auto ch : children())
+        for(auto ch : node.children())
         {
-            if(ch.visit(fn, indentation_level + increment)) // no need to forward skip_root as it won't be root
+            if(_visit(ch, fn, indentation_level + increment, false)) // no need to forward skip_root as it won't be root
             {
                 return true;
             }
@@ -20462,515 +22821,803 @@ bool NodeRef::visit(Visitor fn, size_t indentation_level, bool skip_root) const
     return false;
 }
 
-
-template<class Visitor>
-bool NodeRef::visit_stacked(Visitor fn, size_t indentation_level, bool skip_root)
-{
-    return const_cast< NodeRef const* >(this)->visit_stacked(fn, indentation_level, skip_root);
-}
-
-template<class Visitor>
-bool NodeRef::visit_stacked(Visitor fn, size_t indentation_level, bool skip_root) const
+template<class NodeRefType, class Visitor>
+bool _visit_stacked(NodeRefType &node, Visitor fn, id_type indentation_level, bool skip_root=false)
 {
-    size_t increment = 0;
-    if( ! (is_root() && skip_root))
+    id_type increment = 0;
+    if( ! (node.is_root() && skip_root))
     {
-        if(fn(this, indentation_level))
+        if(fn(node, indentation_level))
         {
             return true;
         }
         ++increment;
     }
-    if(has_children())
+    if(node.has_children())
     {
-        fn.push(this, indentation_level);
-        for(auto ch : children())
+        fn.push(node, indentation_level);
+        for(auto ch : node.children())
         {
-            if(ch.visit(fn, indentation_level + increment)) // no need to forward skip_root as it won't be root
+            if(_visit_stacked(ch, fn, indentation_level + increment, false)) // no need to forward skip_root as it won't be root
             {
-                fn.pop(this, indentation_level);
+                fn.pop(node, indentation_level);
                 return true;
             }
         }
-        fn.pop(this, indentation_level);
+        fn.pop(node, indentation_level);
     }
     return false;
 }
 
-} // namespace yml
-} // namespace c4
+template<class Impl, class ConstImpl>
+struct RoNodeMethods;
+} // detail
+/** @endcond */
 
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 
-#if defined(_MSC_VER)
-#   pragma warning(pop)
-#endif
 
-#ifdef __GNUC__
-#   pragma GCC diagnostic pop
-#endif
+/** a CRTP base providing read-only methods for @ref ConstNodeRef and @ref NodeRef */
+namespace detail {
+template<class Impl, class ConstImpl>
+struct RoNodeMethods
+{
+    C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wcast-align")
+    /** @cond dev */
+    // helper CRTP macros, undefined at the end
+    #define tree_ ((ConstImpl const* C4_RESTRICT)this)->m_tree
+    #define id_ ((ConstImpl const* C4_RESTRICT)this)->m_id
+    #define tree__ ((Impl const* C4_RESTRICT)this)->m_tree
+    #define id__ ((Impl const* C4_RESTRICT)this)->m_id
+    // require readable: this is a precondition for reading from the
+    // tree using this object.
+    #define _C4RR()                                       \
+        RYML_ASSERT(tree_ != nullptr);                    \
+        _RYML_CB_ASSERT(tree_->m_callbacks, id_ != NONE); \
+        _RYML_CB_ASSERT(tree_->m_callbacks, (((Impl const* C4_RESTRICT)this)->readable()))
+    // a SFINAE beautifier to enable a function only if the
+    // implementation is mutable
+    #define _C4_IF_MUTABLE(ty) typename std::enable_if<!std::is_same<U, ConstImpl>::value, ty>::type
+    /** @endcond */
 
-#endif /* _C4_YML_NODE_HPP_ */
+public:
 
+    /** @name node property getters */
+    /** @{ */
 
-// (end https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp)
+    /** returns the data or null when the id is NONE */
+    C4_ALWAYS_INLINE NodeData const* get() const RYML_NOEXCEPT { return ((Impl const*)this)->readable() ? tree_->get(id_) : nullptr; }
+    /** returns the data or null when the id is NONE */
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto get() RYML_NOEXCEPT -> _C4_IF_MUTABLE(NodeData*) { return ((Impl const*)this)->readable() ? tree__->get(id__) : nullptr; }
 
+    C4_ALWAYS_INLINE NodeType    type()     const RYML_NOEXCEPT { _C4RR(); return tree_->type(id_); }     /**< Forward to @ref Tree::type_str(). Node must be readable. */
+    C4_ALWAYS_INLINE const char* type_str() const RYML_NOEXCEPT { _C4RR(); return tree_->type_str(id_); } /**< Forward to @ref Tree::type_str(). Node must be readable. */
 
+    C4_ALWAYS_INLINE csubstr key()        const RYML_NOEXCEPT { _C4RR(); return tree_->key(id_); }        /**< Forward to @ref Tree::key(). Node must be readable. */
+    C4_ALWAYS_INLINE csubstr key_tag()    const RYML_NOEXCEPT { _C4RR(); return tree_->key_tag(id_); }    /**< Forward to @ref Tree::key_tag(). Node must be readable. */
+    C4_ALWAYS_INLINE csubstr key_ref()    const RYML_NOEXCEPT { _C4RR(); return tree_->key_ref(id_); }    /**< Forward to @ref Tree::key_ref(). Node must be readable. */
+    C4_ALWAYS_INLINE csubstr key_anchor() const RYML_NOEXCEPT { _C4RR(); return tree_->key_anchor(id_); } /**< Forward to @ref Tree::key_anchor(). Node must be readable. */
 
-//********************************************************************************
-//--------------------------------------------------------------------------------
-// src/c4/yml/writer.hpp
-// https://github.com/biojppm/rapidyaml/src/c4/yml/writer.hpp
-//--------------------------------------------------------------------------------
-//********************************************************************************
+    C4_ALWAYS_INLINE csubstr val()        const RYML_NOEXCEPT { _C4RR(); return tree_->val(id_); }        /**< Forward to @ref Tree::val(). Node must be readable. */
+    C4_ALWAYS_INLINE csubstr val_tag()    const RYML_NOEXCEPT { _C4RR(); return tree_->val_tag(id_); }    /**< Forward to @ref Tree::val_tag(). Node must be readable. */
+    C4_ALWAYS_INLINE csubstr val_ref()    const RYML_NOEXCEPT { _C4RR(); return tree_->val_ref(id_); }    /**< Forward to @ref Tree::val_ref(). Node must be readable. */
+    C4_ALWAYS_INLINE csubstr val_anchor() const RYML_NOEXCEPT { _C4RR(); return tree_->val_anchor(id_); } /**< Forward to @ref Tree::val_anchor(). Node must be readable. */
 
-#ifndef _C4_YML_WRITER_HPP_
-#define _C4_YML_WRITER_HPP_
+    C4_ALWAYS_INLINE NodeScalar const& keysc() const RYML_NOEXCEPT { _C4RR(); return tree_->keysc(id_); } /**< Forward to @ref Tree::keysc(). Node must be readable. */
+    C4_ALWAYS_INLINE NodeScalar const& valsc() const RYML_NOEXCEPT { _C4RR(); return tree_->valsc(id_); } /**< Forward to @ref Tree::valsc(). Node must be readable. */
 
-#ifndef _C4_YML_COMMON_HPP_
-#include "./common.hpp"
-#endif
+    C4_ALWAYS_INLINE bool key_is_null() const RYML_NOEXCEPT { _C4RR(); return tree_->key_is_null(id_); } /**< Forward to @ref Tree::key_is_null(). Node must be readable. */
+    C4_ALWAYS_INLINE bool val_is_null() const RYML_NOEXCEPT { _C4RR(); return tree_->val_is_null(id_); } /**< Forward to @ref Tree::val_is_null(). Node must be readable. */
 
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/substr.hpp
-//#include <c4/substr.hpp>
-#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_)
-#error "amalgamate: file c4/substr.hpp must have been included at this point"
-#endif /* C4_SUBSTR_HPP_ */
+    C4_ALWAYS_INLINE bool is_key_unfiltered() const noexcept { _C4RR(); return tree_->is_key_unfiltered(id_); } /**< Forward to @ref Tree::is_key_unfiltered(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_val_unfiltered() const noexcept { _C4RR(); return tree_->is_val_unfiltered(id_); } /**< Forward to @ref Tree::is_val_unfiltered(). Node must be readable. */
 
-//included above:
-//#include <stdio.h>  // fwrite(), fputc()
-//included above:
-//#include <string.h> // memcpy()
+    /** @} */
 
+public:
 
-namespace c4 {
-namespace yml {
+    /** @name node type predicates */
+    /** @{ */
 
+    C4_ALWAYS_INLINE bool empty()            const RYML_NOEXCEPT { _C4RR(); return tree_->empty(id_); } /**< Forward to @ref Tree::empty(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_stream()        const RYML_NOEXCEPT { _C4RR(); return tree_->is_stream(id_); } /**< Forward to @ref Tree::is_stream(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_doc()           const RYML_NOEXCEPT { _C4RR(); return tree_->is_doc(id_); } /**< Forward to @ref Tree::is_doc(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_container()     const RYML_NOEXCEPT { _C4RR(); return tree_->is_container(id_); } /**< Forward to @ref Tree::is_container(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_map()           const RYML_NOEXCEPT { _C4RR(); return tree_->is_map(id_); } /**< Forward to @ref Tree::is_map(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_seq()           const RYML_NOEXCEPT { _C4RR(); return tree_->is_seq(id_); } /**< Forward to @ref Tree::is_seq(). Node must be readable. */
+    C4_ALWAYS_INLINE bool has_val()          const RYML_NOEXCEPT { _C4RR(); return tree_->has_val(id_); } /**< Forward to @ref Tree::has_val(). Node must be readable. */
+    C4_ALWAYS_INLINE bool has_key()          const RYML_NOEXCEPT { _C4RR(); return tree_->has_key(id_); } /**< Forward to @ref Tree::has_key(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_val()           const RYML_NOEXCEPT { _C4RR(); return tree_->is_val(id_); } /**< Forward to @ref Tree::is_val(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_keyval()        const RYML_NOEXCEPT { _C4RR(); return tree_->is_keyval(id_); } /**< Forward to @ref Tree::is_keyval(). Node must be readable. */
+    C4_ALWAYS_INLINE bool has_key_tag()      const RYML_NOEXCEPT { _C4RR(); return tree_->has_key_tag(id_); } /**< Forward to @ref Tree::has_key_tag(). Node must be readable. */
+    C4_ALWAYS_INLINE bool has_val_tag()      const RYML_NOEXCEPT { _C4RR(); return tree_->has_val_tag(id_); } /**< Forward to @ref Tree::has_val_tag(). Node must be readable. */
+    C4_ALWAYS_INLINE bool has_key_anchor()   const RYML_NOEXCEPT { _C4RR(); return tree_->has_key_anchor(id_); } /**< Forward to @ref Tree::has_key_anchor(). Node must be readable. */
+    C4_ALWAYS_INLINE bool has_val_anchor()   const RYML_NOEXCEPT { _C4RR(); return tree_->has_val_anchor(id_); } /**< Forward to @ref Tree::has_val_anchor(). Node must be readable. */
+    C4_ALWAYS_INLINE bool has_anchor()       const RYML_NOEXCEPT { _C4RR(); return tree_->has_anchor(id_); } /**< Forward to @ref Tree::has_anchor(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_key_ref()       const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_ref(id_); } /**< Forward to @ref Tree::is_key_ref(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_val_ref()       const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_ref(id_); } /**< Forward to @ref Tree::is_val_ref(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_ref()           const RYML_NOEXCEPT { _C4RR(); return tree_->is_ref(id_); } /**< Forward to @ref Tree::is_ref(). Node must be readable. */
+    C4_ALWAYS_INLINE bool parent_is_seq()    const RYML_NOEXCEPT { _C4RR(); return tree_->parent_is_seq(id_); } /**< Forward to @ref Tree::parent_is_seq(). Node must be readable. */
+    C4_ALWAYS_INLINE bool parent_is_map()    const RYML_NOEXCEPT { _C4RR(); return tree_->parent_is_map(id_); } /**< Forward to @ref Tree::parent_is_map(). Node must be readable. */
+
+    RYML_DEPRECATED("use has_key_anchor()")  bool is_key_anchor() const noexcept { _C4RR(); return tree_->has_key_anchor(id_); }
+    RYML_DEPRECATED("use has_val_anchor()")  bool is_val_hanchor() const noexcept { _C4RR(); return tree_->has_val_anchor(id_); }
+    RYML_DEPRECATED("use has_anchor()")      bool is_anchor()     const noexcept { _C4RR(); return tree_->has_anchor(id_); }
+    RYML_DEPRECATED("use has_anchor() || is_ref()") bool is_anchor_or_ref() const noexcept { _C4RR(); return tree_->is_anchor_or_ref(id_); }
 
-/** Repeat-Character: a character to be written a number of times. */
-struct RepC
-{
-    char c;
-    size_t num_times;
-};
-inline RepC indent_to(size_t num_levels)
-{
-    return {' ', size_t(2) * num_levels};
-}
+    /** @} */
 
+public:
 
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-/** A writer that outputs to a file. Defaults to stdout. */
-struct WriterFile
-{
-    FILE * m_file;
-    size_t m_pos;
+    /** @name node container+scalar style predicates */
+    /** @{ */
 
-    WriterFile(FILE *f = nullptr) : m_file(f ? f : stdout), m_pos(0) {}
+    // documentation to the right -->
+
+    C4_ALWAYS_INLINE bool type_has_any(NodeType_e bits)  const RYML_NOEXCEPT { _C4RR(); return tree_->type_has_any(id_, bits); }  /**< Forward to @ref Tree::type_has_any(). Node must be readable. */
+    C4_ALWAYS_INLINE bool type_has_all(NodeType_e bits)  const RYML_NOEXCEPT { _C4RR(); return tree_->type_has_all(id_, bits); }  /**< Forward to @ref Tree::type_has_all(). Node must be readable. */
+    C4_ALWAYS_INLINE bool type_has_none(NodeType_e bits) const RYML_NOEXCEPT { _C4RR(); return tree_->type_has_none(id_, bits); } /**< Forward to @ref Tree::type_has_none(). Node must be readable. */
+
+    C4_ALWAYS_INLINE bool is_container_styled() const RYML_NOEXCEPT { _C4RR(); return tree_->is_container_styled(id_); } /**< Forward to @ref Tree::is_container_styled(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_block()            const RYML_NOEXCEPT { _C4RR(); return tree_->is_block(id_); }   /**< Forward to @ref Tree::is_block(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_flow_sl()          const RYML_NOEXCEPT { _C4RR(); return tree_->is_flow_sl(id_); } /**< Forward to @ref Tree::is_flow_sl(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_flow_ml()          const RYML_NOEXCEPT { _C4RR(); return tree_->is_flow_ml(id_); } /**< Forward to @ref Tree::is_flow_ml(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_flow()             const RYML_NOEXCEPT { _C4RR(); return tree_->is_flow(id_); }    /**< Forward to @ref Tree::is_flow(). Node must be readable. */
+
+    C4_ALWAYS_INLINE bool is_key_styled()       const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_styled(id_); }  /**< Forward to @ref Tree::is_key_styled(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_val_styled()       const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_styled(id_); }  /**< Forward to @ref Tree::is_val_styled(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_key_literal()      const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_literal(id_); } /**< Forward to @ref Tree::is_key_literal(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_val_literal()      const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_literal(id_); } /**< Forward to @ref Tree::is_val_literal(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_key_folded()       const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_folded(id_); }  /**< Forward to @ref Tree::is_key_folded(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_val_folded()       const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_folded(id_); }  /**< Forward to @ref Tree::is_val_folded(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_key_squo()         const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_squo(id_); }    /**< Forward to @ref Tree::is_key_squo(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_val_squo()         const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_squo(id_); }    /**< Forward to @ref Tree::is_val_squo(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_key_dquo()         const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_dquo(id_); }    /**< Forward to @ref Tree::is_key_dquo(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_val_dquo()         const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_dquo(id_); }    /**< Forward to @ref Tree::is_val_dquo(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_key_plain()        const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_plain(id_); }   /**< Forward to @ref Tree::is_key_plain(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_val_plain()        const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_plain(id_); }   /**< Forward to @ref Tree::is_val_plain(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_key_quoted()       const RYML_NOEXCEPT { _C4RR(); return tree_->is_key_quoted(id_); }  /**< Forward to @ref Tree::is_key_quoted(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_val_quoted()       const RYML_NOEXCEPT { _C4RR(); return tree_->is_val_quoted(id_); }  /**< Forward to @ref Tree::is_val_quoted(). Node must be readable. */
+    C4_ALWAYS_INLINE bool is_quoted()           const RYML_NOEXCEPT { _C4RR(); return tree_->is_quoted(id_); }      /**< Forward to @ref Tree::is_quoted(). Node must be readable. */
 
-    inline substr _get(bool /*error_on_excess*/)
+    /** @} */
+
+public:
+
+    /** @name hierarchy predicates */
+    /** @{ */
+
+    // documentation to the right -->
+
+    C4_ALWAYS_INLINE bool is_root()    const RYML_NOEXCEPT { _C4RR(); return tree_->is_root(id_); } /**< Forward to @ref Tree::is_root(). Node must be readable. */
+    C4_ALWAYS_INLINE bool has_parent() const RYML_NOEXCEPT { _C4RR(); return tree_->has_parent(id_); } /**< Forward to @ref Tree::has_parent()  Node must be readable. */
+
+    C4_ALWAYS_INLINE bool has_child(ConstImpl const& n) const RYML_NOEXCEPT { _C4RR(); return n.readable() ? tree_->has_child(id_, n.m_id) : false; } /**< Forward to @ref Tree::has_child(). Node must be readable. */
+    C4_ALWAYS_INLINE bool has_child(id_type node) const RYML_NOEXCEPT { _C4RR(); return tree_->has_child(id_, node); } /**< Forward to @ref Tree::has_child(). Node must be readable. */
+    C4_ALWAYS_INLINE bool has_child(csubstr name) const RYML_NOEXCEPT { _C4RR(); return tree_->has_child(id_, name); } /**< Forward to @ref Tree::has_child(). Node must be readable. */
+    C4_ALWAYS_INLINE bool has_children() const RYML_NOEXCEPT { _C4RR(); return tree_->has_children(id_); } /**< Forward to @ref Tree::has_child(). Node must be readable. */
+
+    C4_ALWAYS_INLINE bool has_sibling(ConstImpl const& n) const RYML_NOEXCEPT { _C4RR(); return n.readable() ? tree_->has_sibling(id_, n.m_id) : false; } /**< Forward to @ref Tree::has_sibling(). Node must be readable. */
+    C4_ALWAYS_INLINE bool has_sibling(id_type node) const RYML_NOEXCEPT { _C4RR(); return tree_->has_sibling(id_, node); } /**< Forward to @ref Tree::has_sibling(). Node must be readable. */
+    C4_ALWAYS_INLINE bool has_sibling(csubstr name) const RYML_NOEXCEPT { _C4RR(); return tree_->has_sibling(id_, name); } /**< Forward to @ref Tree::has_sibling(). Node must be readable. */
+    C4_ALWAYS_INLINE bool has_other_siblings() const RYML_NOEXCEPT { _C4RR(); return tree_->has_other_siblings(id_); }  /**< Forward to @ref Tree::has_sibling(). Node must be readable. */
+
+    RYML_DEPRECATED("use has_other_siblings()") bool has_siblings() const RYML_NOEXCEPT { _C4RR(); return tree_->has_siblings(id_); }
+
+    /** @} */
+
+public:
+
+    /** @name hierarchy getters */
+    /** @{ */
+
+    // documentation to the right -->
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto doc(id_type i) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { RYML_ASSERT(tree_); return {tree__, tree__->doc(i)}; } /**< Forward to @ref Tree::doc(). Node must be readable. */
+    /** succeeds even when the node may have invalid or seed id */
+    C4_ALWAYS_INLINE ConstImpl doc(id_type i) const RYML_NOEXCEPT { RYML_ASSERT(tree_); return {tree_, tree_->doc(i)}; }                /**< Forward to @ref Tree::doc(). Node must be readable. */
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto parent() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->parent(id__)}; } /**< Forward to @ref Tree::parent(). Node must be readable. */
+    C4_ALWAYS_INLINE ConstImpl parent() const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->parent(id_)}; }                 /**< Forward to @ref Tree::parent(). Node must be readable. */
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto first_child() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->first_child(id__)}; }  /**< Forward to @ref Tree::first_child(). Node must be readable. */
+    C4_ALWAYS_INLINE ConstImpl first_child() const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->first_child(id_)}; }                  /**< Forward to @ref Tree::first_child(). Node must be readable. */
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto last_child() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->last_child(id__)}; }  /**< Forward to @ref Tree::last_child(). Node must be readable. */
+    C4_ALWAYS_INLINE ConstImpl last_child () const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->last_child (id_)}; }                /**< Forward to @ref Tree::last_child(). Node must be readable. */
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto child(id_type pos) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->child(id__, pos)}; }  /**< Forward to @ref Tree::child(). Node must be readable. */
+    C4_ALWAYS_INLINE ConstImpl child(id_type pos) const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->child(id_, pos)}; }                  /**< Forward to @ref Tree::child(). Node must be readable. */
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto find_child(csubstr name)  RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->find_child(id__, name)}; }  /**< Forward to @ref Tree::first_child(). Node must be readable. */
+    C4_ALWAYS_INLINE ConstImpl find_child(csubstr name) const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->find_child(id_, name)}; }                   /**< Forward to @ref Tree::first_child(). Node must be readable. */
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto prev_sibling() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->prev_sibling(id__)}; }  /**< Forward to @ref Tree::prev_sibling(). Node must be readable. */
+    C4_ALWAYS_INLINE ConstImpl prev_sibling() const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->prev_sibling(id_)}; }                  /**< Forward to @ref Tree::prev_sibling(). Node must be readable. */
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto next_sibling() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->next_sibling(id__)}; }  /**< Forward to @ref Tree::next_sibling(). Node must be readable. */
+    C4_ALWAYS_INLINE ConstImpl next_sibling() const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->next_sibling(id_)}; }                  /**< Forward to @ref Tree::next_sibling(). Node must be readable. */
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto first_sibling() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->first_sibling(id__)}; }  /**< Forward to @ref Tree::first_sibling(). Node must be readable. */
+    C4_ALWAYS_INLINE ConstImpl first_sibling() const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->first_sibling(id_)}; }                  /**< Forward to @ref Tree::first_sibling(). Node must be readable. */
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto last_sibling() RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->last_sibling(id__)}; }  /**< Forward to @ref Tree::last_sibling(). Node must be readable. */
+    C4_ALWAYS_INLINE ConstImpl last_sibling () const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->last_sibling(id_)}; }                 /**< Forward to @ref Tree::last_sibling(). Node must be readable. */
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto sibling(id_type pos) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->sibling(id__, pos)}; }  /**< Forward to @ref Tree::sibling(). Node must be readable. */
+    C4_ALWAYS_INLINE ConstImpl sibling(id_type pos) const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->sibling(id_, pos)}; }                  /**< Forward to @ref Tree::sibling(). Node must be readable. */
+
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto find_sibling(csubstr name) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl) { _C4RR(); return {tree__, tree__->find_sibling(id__, name)}; }  /**< Forward to @ref Tree::find_sibling(). Node must be readable. */
+    C4_ALWAYS_INLINE ConstImpl find_sibling(csubstr name) const RYML_NOEXCEPT { _C4RR(); return {tree_, tree_->find_sibling(id_, name)}; }                  /**< Forward to @ref Tree::find_sibling(). Node must be readable. */
+
+    C4_ALWAYS_INLINE id_type num_children() const RYML_NOEXCEPT { _C4RR(); return tree_->num_children(id_); } /**< O(num_children). Forward to @ref Tree::num_children(). */
+    C4_ALWAYS_INLINE id_type num_siblings() const RYML_NOEXCEPT { _C4RR(); return tree_->num_siblings(id_); } /**< O(num_children). Forward to @ref Tree::num_siblings(). */
+    C4_ALWAYS_INLINE id_type num_other_siblings() const RYML_NOEXCEPT { _C4RR(); return tree_->num_other_siblings(id_); } /**< O(num_siblings). Forward to @ref Tree::num_other_siblings(). */
+    C4_ALWAYS_INLINE id_type child_pos(ConstImpl const& n) const RYML_NOEXCEPT { _C4RR(); _RYML_CB_ASSERT(tree_->m_callbacks, n.readable()); return tree_->child_pos(id_, n.m_id); } /**< O(num_children). Forward to @ref Tree::child_pos(). */
+    C4_ALWAYS_INLINE id_type sibling_pos(ConstImpl const& n) const RYML_NOEXCEPT { _C4RR(); _RYML_CB_ASSERT(tree_->callbacks(), n.readable()); return tree_->child_pos(tree_->parent(id_), n.m_id); } /**< O(num_siblings). Forward to @ref Tree::sibling_pos(). */
+
+    C4_ALWAYS_INLINE id_type depth_asc() const RYML_NOEXCEPT { _C4RR(); return tree_->depth_asc(id_); } /** O(log(num_nodes)). Forward to Tree::depth_asc(). Node must be readable. */
+    C4_ALWAYS_INLINE id_type depth_desc() const RYML_NOEXCEPT { _C4RR(); return tree_->depth_desc(id_); } /** O(num_nodes). Forward to Tree::depth_desc(). Node must be readable. */
+
+    /** @} */
+
+public:
+
+    /** @name square_brackets
+     * operator[] */
+    /** @{ */
+
+    /** Find child by key; complexity is O(num_children).
+     *
+     * Returns the requested node, or an object in seed state if no
+     * such child is found (see @ref NodeRef for an explanation of
+     * what is seed state). When the object is in seed state, using it
+     * to read from the tree is UB. The seed node can be used to write
+     * to the tree provided that its create() method is called prior
+     * to writing, which happens in most modifying methods in
+     * NodeRef. It is the caller's responsibility to verify that the
+     * returned node is readable before subsequently using it to read
+     * from the tree.
+     *
+     * @warning the calling object must be readable. This precondition
+     * is asserted. The assertion is performed only if @ref
+     * RYML_USE_ASSERT is set to true. As with the non-const overload,
+     * it is UB to call this method if the node is not readable.
+     *
+     * @see https://github.com/biojppm/rapidyaml/issues/389 */
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto operator[] (csubstr key) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl)
     {
-        substr sp;
-        sp.str = nullptr;
-        sp.len = m_pos;
-        return sp;
+        _C4RR();
+        id_type ch = tree__->find_child(id__, key);
+        return ch != NONE ? Impl(tree__, ch) : Impl(tree__, id__, key);
     }
 
-    template<size_t N>
-    inline void _do_write(const char (&a)[N])
+    /** Find child by position; complexity is O(pos).
+     *
+     * Returns the requested node, or an object in seed state if no
+     * such child is found (see @ref NodeRef for an explanation of
+     * what is seed state). When the object is in seed state, using it
+     * to read from the tree is UB. The seed node can be used to write
+     * to the tree provided that its create() method is called prior
+     * to writing, which happens in most modifying methods in
+     * NodeRef. It is the caller's responsibility to verify that the
+     * returned node is readable before subsequently using it to read
+     * from the tree.
+     *
+     * @warning the calling object must be readable. This precondition
+     * is asserted. The assertion is performed only if @ref
+     * RYML_USE_ASSERT is set to true. As with the non-const overload,
+     * it is UB to call this method if the node is not readable.
+     *
+     * @see https://github.com/biojppm/rapidyaml/issues/389 */
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto operator[] (id_type pos) RYML_NOEXCEPT -> _C4_IF_MUTABLE(Impl)
     {
-        fwrite(a, sizeof(char), N - 1, m_file);
-        m_pos += N - 1;
+        _C4RR();
+        id_type ch = tree__->child(id__, pos);
+        return ch != NONE ? Impl(tree__, ch) : Impl(tree__, id__, pos);
     }
 
-    inline void _do_write(csubstr sp)
+    /** Find a child by key; complexity is O(num_children).
+     *
+     * Behaves similar to the non-const overload, but further asserts
+     * that the returned node is readable (because it can never be in
+     * a seed state). The assertion is performed only if @ref
+     * RYML_USE_ASSERT is set to true. As with the non-const overload,
+     * it is UB to use the return value if it is not valid.
+     *
+     * @see https://github.com/biojppm/rapidyaml/issues/389  */
+    C4_ALWAYS_INLINE ConstImpl operator[] (csubstr key) const RYML_NOEXCEPT
     {
-        #if defined(__clang__)
-        #   pragma clang diagnostic push
-        #   pragma GCC diagnostic ignored "-Wsign-conversion"
-        #elif defined(__GNUC__)
-        #   pragma GCC diagnostic push
-        #   pragma GCC diagnostic ignored "-Wsign-conversion"
-        #endif
-        if(sp.empty()) return;
-        fwrite(sp.str, sizeof(csubstr::char_type), sp.len, m_file);
-        m_pos += sp.len;
-        #if defined(__clang__)
-        #   pragma clang diagnostic pop
-        #elif defined(__GNUC__)
-        #   pragma GCC diagnostic pop
-        #endif
+        _C4RR();
+        id_type ch = tree_->find_child(id_, key);
+        _RYML_CB_ASSERT(tree_->m_callbacks, ch != NONE);
+        return {tree_, ch};
     }
 
-    inline void _do_write(const char c)
+    /** Find a child by position; complexity is O(pos).
+     *
+     * Behaves similar to the non-const overload, but further asserts
+     * that the returned node is readable (because it can never be in
+     * a seed state). This assertion is performed only if @ref
+     * RYML_USE_ASSERT is set to true. As with the non-const overload,
+     * it is UB to use the return value if it is not valid.
+     *
+     * @see https://github.com/biojppm/rapidyaml/issues/389  */
+    C4_ALWAYS_INLINE ConstImpl operator[] (id_type pos) const RYML_NOEXCEPT
     {
-        fputc(c, m_file);
-        ++m_pos;
+        _C4RR();
+        id_type ch = tree_->child(id_, pos);
+        _RYML_CB_ASSERT(tree_->m_callbacks, ch != NONE);
+        return {tree_, ch};
     }
 
-    inline void _do_write(RepC const rc)
+    /** @} */
+
+public:
+
+    /** @name at
+     *
+     * These functions are the analogue to operator[], with the
+     * difference that they emit an error instead of an
+     * assertion. That is, if any of the pre or post conditions is
+     * violated, an error is always emitted (resulting in a call to
+     * the error callback).
+     *
+     * @{ */
+
+    /** Find child by key; complexity is O(num_children).
+     *
+     * Returns the requested node, or an object in seed state if no
+     * such child is found (see @ref NodeRef for an explanation of
+     * what is seed state). When the object is in seed state, using it
+     * to read from the tree is UB. The seed node can be subsequently
+     * used to write to the tree provided that its create() method is
+     * called prior to writing, which happens inside most mutating
+     * methods in NodeRef. It is the caller's responsibility to verify
+     * that the returned node is readable before subsequently using it
+     * to read from the tree.
+     *
+     * @warning This method will call the error callback (regardless
+     * of build type or of the value of RYML_USE_ASSERT) whenever any
+     * of the following preconditions is violated: a) the object is
+     * valid (points at a tree and a node), b) the calling object must
+     * be readable (must not be in seed state), c) the calling object
+     * must be pointing at a MAP node. The preconditions are similar
+     * to the non-const operator[](csubstr), but instead of using
+     * assertions, this function directly checks those conditions and
+     * calls the error callback if any of the checks fail.
+     *
+     * @note since it is valid behavior for the returned node to be in
+     * seed state, the error callback is not invoked when this
+     * happens. */
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto at(csubstr key) -> _C4_IF_MUTABLE(Impl)
     {
-        for(size_t i = 0; i < rc.num_times; ++i)
-        {
-            fputc(rc.c, m_file);
-        }
-        m_pos += rc.num_times;
+        RYML_CHECK(tree_ != nullptr);
+        _RYML_CB_CHECK(tree_->m_callbacks, (id_ >= 0 && id_ < tree_->capacity()));
+        _RYML_CB_CHECK(tree_->m_callbacks, ((Impl const*)this)->readable());
+        _RYML_CB_CHECK(tree_->m_callbacks, tree_->is_map(id_));
+        id_type ch = tree__->find_child(id__, key);
+        return ch != NONE ? Impl(tree__, ch) : Impl(tree__, id__, key);
     }
-};
 
+    /** Find child by position; complexity is O(pos).
+     *
+     * Returns the requested node, or an object in seed state if no
+     * such child is found (see @ref NodeRef for an explanation of
+     * what is seed state). When the object is in seed state, using it
+     * to read from the tree is UB. The seed node can be used to write
+     * to the tree provided that its create() method is called prior
+     * to writing, which happens in most modifying methods in
+     * NodeRef. It is the caller's responsibility to verify that the
+     * returned node is readable before subsequently using it to read
+     * from the tree.
+     *
+     * @warning This method will call the error callback (regardless
+     * of build type or of the value of RYML_USE_ASSERT) whenever any
+     * of the following preconditions is violated: a) the object is
+     * valid (points at a tree and a node), b) the calling object must
+     * be readable (must not be in seed state), c) the calling object
+     * must be pointing at a MAP node. The preconditions are similar
+     * to the non-const operator[](id_type), but instead of using
+     * assertions, this function directly checks those conditions and
+     * calls the error callback if any of the checks fail.
+     *
+     * @note since it is valid behavior for the returned node to be in
+     * seed state, the error callback is not invoked when this
+     * happens. */
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto at(id_type pos) -> _C4_IF_MUTABLE(Impl)
+    {
+        RYML_CHECK(tree_ != nullptr);
+        const id_type cap = tree_->capacity();
+        _RYML_CB_CHECK(tree_->m_callbacks, (id_ >= 0 && id_ < cap));
+        _RYML_CB_CHECK(tree_->m_callbacks, (pos >= 0 && pos < cap));
+        _RYML_CB_CHECK(tree_->m_callbacks, ((Impl const*)this)->readable());
+        _RYML_CB_CHECK(tree_->m_callbacks, tree_->is_container(id_));
+        id_type ch = tree__->child(id__, pos);
+        return ch != NONE ? Impl(tree__, ch) : Impl(tree__, id__, pos);
+    }
+
+    /** Get a child by name, with error checking; complexity is
+     * O(num_children).
+     *
+     * Behaves as operator[](csubstr) const, but always raises an
+     * error (even when RYML_USE_ASSERT is set to false) when the
+     * returned node does not exist, or when this node is not
+     * readable, or when it is not a map. This behaviour is similar to
+     * std::vector::at(), but the error consists in calling the error
+     * callback instead of directly raising an exception. */
+    ConstImpl at(csubstr key) const
+    {
+        RYML_CHECK(tree_ != nullptr);
+        _RYML_CB_CHECK(tree_->m_callbacks, (id_ >= 0 && id_ < tree_->capacity()));
+        _RYML_CB_CHECK(tree_->m_callbacks, ((Impl const*)this)->readable());
+        _RYML_CB_CHECK(tree_->m_callbacks, tree_->is_map(id_));
+        id_type ch = tree_->find_child(id_, key);
+        _RYML_CB_CHECK(tree_->m_callbacks, ch != NONE);
+        return {tree_, ch};
+    }
+
+    /** Get a child by position, with error checking; complexity is
+     * O(pos).
+     *
+     * Behaves as operator[](id_type) const, but always raises an error
+     * (even when RYML_USE_ASSERT is set to false) when the returned
+     * node does not exist, or when this node is not readable, or when
+     * it is not a container. This behaviour is similar to
+     * std::vector::at(), but the error consists in calling the error
+     * callback instead of directly raising an exception. */
+    ConstImpl at(id_type pos) const
+    {
+        RYML_CHECK(tree_ != nullptr);
+        const id_type cap = tree_->capacity();
+        _RYML_CB_CHECK(tree_->m_callbacks, (id_ >= 0 && id_ < cap));
+        _RYML_CB_CHECK(tree_->m_callbacks, (pos >= 0 && pos < cap));
+        _RYML_CB_CHECK(tree_->m_callbacks, ((Impl const*)this)->readable());
+        _RYML_CB_CHECK(tree_->m_callbacks, tree_->is_container(id_));
+        const id_type ch = tree_->child(id_, pos);
+        _RYML_CB_CHECK(tree_->m_callbacks, ch != NONE);
+        return {tree_, ch};
+    }
 
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-/** A writer that outputs to an STL-like ostream. */
-template<class OStream>
-struct WriterOStream
-{
-    OStream& m_stream;
-    size_t   m_pos;
+    /** @} */
 
-    WriterOStream(OStream &s) : m_stream(s), m_pos(0) {}
+public:
 
-    inline substr _get(bool /*error_on_excess*/)
+    /** @name deserialization */
+    /** @{ */
+
+    /** deserialize the node's val to the given variable, forwarding
+     * to the user-overrideable @ref read() function. */
+    template<class T>
+    ConstImpl const& operator>> (T &v) const
     {
-        substr sp;
-        sp.str = nullptr;
-        sp.len = m_pos;
-        return sp;
+        _C4RR();
+        if( ! read((ConstImpl const&)*this, &v))
+            _RYML_CB_ERR(tree_->m_callbacks, "could not deserialize value");
+        return *((ConstImpl const*)this);
     }
 
-    template<size_t N>
-    inline void _do_write(const char (&a)[N])
+    /** deserialize the node's key to the given variable, forwarding
+     * to the user-overrideable @ref read() function; use @ref key()
+     * to disambiguate; for example: `node >> ryml::key(var)` */
+    template<class T>
+    ConstImpl const& operator>> (Key<T> v) const
     {
-        m_stream.write(a, N - 1);
-        m_pos += N - 1;
+        _C4RR();
+        if(key().empty() || ! from_chars(key(), &v.k))
+            _RYML_CB_ERR(tree_->m_callbacks, "could not deserialize key");
+        return *((ConstImpl const*)this);
     }
 
-    inline void _do_write(csubstr sp)
+    /** deserialize the node's key as base64. lightweight wrapper over @ref deserialize_key() */
+    ConstImpl const& operator>> (Key<fmt::base64_wrapper> w) const
     {
-        #if defined(__clang__)
-        #   pragma clang diagnostic push
-        #   pragma GCC diagnostic ignored "-Wsign-conversion"
-        #elif defined(__GNUC__)
-        #   pragma GCC diagnostic push
-        #   pragma GCC diagnostic ignored "-Wsign-conversion"
-        #endif
-        if(sp.empty()) return;
-        m_stream.write(sp.str, sp.len);
-        m_pos += sp.len;
-        #if defined(__clang__)
-        #   pragma clang diagnostic pop
-        #elif defined(__GNUC__)
-        #   pragma GCC diagnostic pop
-        #endif
+        deserialize_key(w.wrapper);
+        return *((ConstImpl const*)this);
     }
 
-    inline void _do_write(const char c)
+    /** deserialize the node's val as base64. lightweight wrapper over @ref deserialize_val() */
+    ConstImpl const& operator>> (fmt::base64_wrapper w) const
     {
-        m_stream.put(c);
-        ++m_pos;
+        deserialize_val(w);
+        return *((ConstImpl const*)this);
     }
 
-    inline void _do_write(RepC const rc)
+    /** decode the base64-encoded key and assign the
+     * decoded blob to the given buffer/
+     * @return the size of base64-decoded blob */
+    size_t deserialize_key(fmt::base64_wrapper v) const
     {
-        for(size_t i = 0; i < rc.num_times; ++i)
-        {
-            m_stream.put(rc.c);
-        }
-        m_pos += rc.num_times;
+        _C4RR();
+        return from_chars(key(), &v);
     }
-};
-
-
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-/** a writer to a substr */
-struct WriterBuf
-{
-    substr m_buf;
-    size_t m_pos;
+    /** decode the base64-encoded key and assign the
+     * decoded blob to the given buffer/
+     * @return the size of base64-decoded blob */
+    size_t deserialize_val(fmt::base64_wrapper v) const
+    {
+        _C4RR();
+        return from_chars(val(), &v);
+    };
 
-    WriterBuf(substr sp) : m_buf(sp), m_pos(0) {}
+    /** look for a child by name, if it exists assign to var. return
+     * true if the child existed. */
+    template<class T>
+    bool get_if(csubstr name, T *var) const
+    {
+        _C4RR();
+        ConstImpl ch = find_child(name);
+        if(!ch.readable())
+            return false;
+        ch >> *var;
+        return true;
+    }
 
-    inline substr _get(bool error_on_excess)
+    /** look for a child by name, if it exists assign to var,
+     * otherwise default to fallback. return true if the child
+     * existed. */
+    template<class T>
+    bool get_if(csubstr name, T *var, T const& fallback) const
     {
-        if(m_pos <= m_buf.len)
+        _C4RR();
+        ConstImpl ch = find_child(name);
+        if(ch.readable())
         {
-            return m_buf.first(m_pos);
+            ch >> *var;
+            return true;
         }
-        if(error_on_excess)
+        else
         {
-            c4::yml::error("not enough space in the given buffer");
+            *var = fallback;
+            return false;
         }
-        substr sp;
-        sp.str = nullptr;
-        sp.len = m_pos;
-        return sp;
     }
 
-    template<size_t N>
-    inline void _do_write(const char (&a)[N])
-    {
-        RYML_ASSERT( ! m_buf.overlaps(a));
-        if(m_pos + N-1 <= m_buf.len)
-        {
-            memcpy(&(m_buf[m_pos]), a, N-1);
-        }
-        m_pos += N-1;
-    }
+    /** @} */
 
-    inline void _do_write(csubstr sp)
+public:
+
+    #if defined(__clang__)
+    #   pragma clang diagnostic push
+    #   pragma clang diagnostic ignored "-Wnull-dereference"
+    #elif defined(__GNUC__)
+    #   pragma GCC diagnostic push
+    #   if __GNUC__ >= 6
+    #       pragma GCC diagnostic ignored "-Wnull-dereference"
+    #   endif
+    #endif
+
+    /** @name iteration */
+    /** @{ */
+
+    using iterator = detail::child_iterator<Impl>;
+    using const_iterator = detail::child_iterator<ConstImpl>;
+    using children_view = detail::children_view_<Impl>;
+    using const_children_view = detail::children_view_<ConstImpl>;
+
+    /** get an iterator to the first child */
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto begin() RYML_NOEXCEPT -> _C4_IF_MUTABLE(iterator) { _C4RR(); return iterator(tree__, tree__->first_child(id__)); }
+    /** get an iterator to the first child */
+    C4_ALWAYS_INLINE const_iterator begin() const RYML_NOEXCEPT { _C4RR(); return const_iterator(tree_, tree_->first_child(id_)); }
+    /** get an iterator to the first child */
+    C4_ALWAYS_INLINE const_iterator cbegin() const RYML_NOEXCEPT { _C4RR(); return const_iterator(tree_, tree_->first_child(id_)); }
+
+    /** get an iterator to after the last child */
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto end() RYML_NOEXCEPT -> _C4_IF_MUTABLE(iterator) { _C4RR(); return iterator(tree__, NONE); }
+    /** get an iterator to after the last child */
+    C4_ALWAYS_INLINE const_iterator end() const RYML_NOEXCEPT { _C4RR(); return const_iterator(tree_, NONE); }
+    /** get an iterator to after the last child */
+    C4_ALWAYS_INLINE const_iterator cend() const RYML_NOEXCEPT { _C4RR(); return const_iterator(tree_, tree_->first_child(id_)); }
+
+    /** get an iterable view over children */
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto children() RYML_NOEXCEPT -> _C4_IF_MUTABLE(children_view) { _C4RR(); return children_view(begin(), end()); }
+    /** get an iterable view over children */
+    C4_ALWAYS_INLINE const_children_view children() const RYML_NOEXCEPT { _C4RR(); return const_children_view(begin(), end()); }
+    /** get an iterable view over children */
+    C4_ALWAYS_INLINE const_children_view cchildren() const RYML_NOEXCEPT { _C4RR(); return const_children_view(begin(), end()); }
+
+    /** get an iterable view over all siblings (including the calling node) */
+    template<class U=Impl>
+    C4_ALWAYS_INLINE auto siblings() RYML_NOEXCEPT -> _C4_IF_MUTABLE(children_view)
+    {
+        _C4RR();
+        NodeData const *nd = tree__->get(id__);
+        return (nd->m_parent != NONE) ? // does it have a parent?
+            children_view(iterator(tree__, tree_->get(nd->m_parent)->m_first_child), iterator(tree__, NONE))
+            :
+            children_view(end(), end());
+    }
+    /** get an iterable view over all siblings (including the calling node) */
+    C4_ALWAYS_INLINE const_children_view siblings() const RYML_NOEXCEPT
+    {
+        _C4RR();
+        NodeData const *nd = tree_->get(id_);
+        return (nd->m_parent != NONE) ? // does it have a parent?
+            const_children_view(const_iterator(tree_, tree_->get(nd->m_parent)->m_first_child), const_iterator(tree_, NONE))
+            :
+            const_children_view(end(), end());
+    }
+    /** get an iterable view over all siblings (including the calling node) */
+    C4_ALWAYS_INLINE const_children_view csiblings() const RYML_NOEXCEPT { return siblings(); }
+
+    /** visit every child node calling fn(node) */
+    template<class Visitor>
+    bool visit(Visitor fn, id_type indentation_level=0, bool skip_root=true) const RYML_NOEXCEPT
     {
-        if(sp.empty()) return;
-        RYML_ASSERT( ! sp.overlaps(m_buf));
-        if(m_pos + sp.len <= m_buf.len)
-        {
-            memcpy(&(m_buf[m_pos]), sp.str, sp.len);
-        }
-        m_pos += sp.len;
+        _C4RR();
+        return detail::_visit(*(ConstImpl const*)this, fn, indentation_level, skip_root);
     }
-
-    inline void _do_write(const char c)
+    /** visit every child node calling fn(node) */
+    template<class Visitor, class U=Impl>
+    auto visit(Visitor fn, id_type indentation_level=0, bool skip_root=true) RYML_NOEXCEPT
+        -> _C4_IF_MUTABLE(bool)
     {
-        if(m_pos + 1 <= m_buf.len)
-        {
-            m_buf[m_pos] = c;
-        }
-        ++m_pos;
+        _C4RR();
+        return detail::_visit(*(Impl*)this, fn, indentation_level, skip_root);
     }
 
-    inline void _do_write(RepC const rc)
+    /** visit every child node calling fn(node, level) */
+    template<class Visitor>
+    bool visit_stacked(Visitor fn, id_type indentation_level=0, bool skip_root=true) const RYML_NOEXCEPT
     {
-        if(m_pos + rc.num_times <= m_buf.len)
-        {
-            for(size_t i = 0; i < rc.num_times; ++i)
-            {
-                m_buf[m_pos + i] = rc.c;
-            }
-        }
-        m_pos += rc.num_times;
+        _C4RR();
+        return detail::_visit_stacked(*(ConstImpl const*)this, fn, indentation_level, skip_root);
+    }
+    /** visit every child node calling fn(node, level) */
+    template<class Visitor, class U=Impl>
+    auto visit_stacked(Visitor fn, id_type indentation_level=0, bool skip_root=true) RYML_NOEXCEPT
+        -> _C4_IF_MUTABLE(bool)
+    {
+        _C4RR();
+        return detail::_visit_stacked(*(Impl*)this, fn, indentation_level, skip_root);
     }
-};
 
+    /** @} */
 
-} // namespace yml
-} // namespace c4
+    #if defined(__clang__)
+    #   pragma clang diagnostic pop
+    #elif defined(__GNUC__)
+    #   pragma GCC diagnostic pop
+    #endif
 
-#endif /* _C4_YML_WRITER_HPP_ */
+    #undef _C4_IF_MUTABLE
+    #undef _C4RR
+    #undef tree_
+    #undef tree__
+    #undef id_
+    #undef id__
 
+    C4_SUPPRESS_WARNING_GCC_CLANG_POP
+};
+} // detail
 
-// (end https://github.com/biojppm/rapidyaml/src/c4/yml/writer.hpp)
 
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+/** Holds a pointer to an existing tree, and a node id. It can be used
+ * only to read from the tree.
+ *
+ * @warning The lifetime of the tree must be larger than that of this
+ * object. It is up to the user to ensure that this happens. */
+class RYML_EXPORT ConstNodeRef : public detail::RoNodeMethods<ConstNodeRef, ConstNodeRef>
+{
+public:
 
+    using tree_type = Tree const;
 
-//********************************************************************************
-//--------------------------------------------------------------------------------
-// src/c4/yml/detail/parser_dbg.hpp
-// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp
-//--------------------------------------------------------------------------------
-//********************************************************************************
+public:
 
-#ifndef _C4_YML_DETAIL_PARSER_DBG_HPP_
-#define _C4_YML_DETAIL_PARSER_DBG_HPP_
+    Tree const* C4_RESTRICT m_tree;
+    id_type m_id;
 
-#ifndef _C4_YML_COMMON_HPP_
-#include "../common.hpp"
-#endif
-//included above:
-//#include <cstdio>
+    friend NodeRef;
+    friend struct detail::RoNodeMethods<ConstNodeRef, ConstNodeRef>;
 
-//-----------------------------------------------------------------------------
-// some debugging scaffolds
+public:
 
-#if defined(_MSC_VER)
-#   pragma warning(push)
-#   pragma warning(disable: 4068/*unknown pragma*/)
-#endif
+    /** @name construction */
+    /** @{ */
 
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunknown-pragmas"
-//#pragma GCC diagnostic ignored "-Wpragma-system-header-outside-header"
-#pragma GCC system_header
+    ConstNodeRef() noexcept : m_tree(nullptr), m_id(NONE) {}
+    ConstNodeRef(Tree const &t) noexcept : m_tree(&t), m_id(t .root_id()) {}
+    ConstNodeRef(Tree const *t) noexcept : m_tree(t ), m_id(t->root_id()) {}
+    ConstNodeRef(Tree const *t, id_type id) noexcept : m_tree(t), m_id(id) {}
+    ConstNodeRef(std::nullptr_t) noexcept : m_tree(nullptr), m_id(NONE) {}
 
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Werror"
-#pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments"
+    ConstNodeRef(ConstNodeRef const&) noexcept = default;
+    ConstNodeRef(ConstNodeRef     &&) noexcept = default;
 
-// some debugging scaffolds
-#ifdef RYML_DBG
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/dump.hpp
-//#include <c4/dump.hpp>
-#if !defined(C4_DUMP_HPP_) && !defined(_C4_DUMP_HPP_)
-#error "amalgamate: file c4/dump.hpp must have been included at this point"
-#endif /* C4_DUMP_HPP_ */
+    ConstNodeRef(NodeRef const&) noexcept;
+    ConstNodeRef(NodeRef     &&) noexcept;
 
-namespace c4 {
-inline void _dbg_dumper(csubstr s) { fwrite(s.str, 1, s.len, stdout); };
-template<class ...Args>
-void _dbg_printf(c4::csubstr fmt, Args&& ...args)
-{
-    static char writebuf[256];
-    auto results = c4::format_dump_resume<&_dbg_dumper>(writebuf, fmt, std::forward<Args>(args)...);
-    // resume writing if the results failed to fit the buffer
-    if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) // bufsize will be that of the largest element serialized. Eg int(1), will require 1 byte.
-    {
-        results = format_dump_resume<&_dbg_dumper>(results, writebuf, fmt, std::forward<Args>(args)...);
-        if(C4_UNLIKELY(results.bufsize > sizeof(writebuf)))
-        {
-            results = format_dump_resume<&_dbg_dumper>(results, writebuf, fmt, std::forward<Args>(args)...);
-        }
-    }
-}
-} // namespace c4
+    /** @} */
 
-#   define _c4dbgt(fmt, ...)   this->_dbg ("{}:{}: "   fmt     , __FILE__, __LINE__, ## __VA_ARGS__)
-#   define _c4dbgpf(fmt, ...)  _dbg_printf("{}:{}: "   fmt "\n", __FILE__, __LINE__, ## __VA_ARGS__)
-#   define _c4dbgp(msg)        _dbg_printf("{}:{}: "   msg "\n", __FILE__, __LINE__                )
-#   define _c4dbgq(msg)        _dbg_printf(msg "\n")
-#   define _c4err(fmt, ...)   \
-    do { if(c4::is_debugger_attached()) { C4_DEBUG_BREAK(); } \
-         this->_err("ERROR:\n" "{}:{}: " fmt, __FILE__, __LINE__, ## __VA_ARGS__); } while(0)
-#else
-#   define _c4dbgt(fmt, ...)
-#   define _c4dbgpf(fmt, ...)
-#   define _c4dbgp(msg)
-#   define _c4dbgq(msg)
-#   define _c4err(fmt, ...)   \
-    do { if(c4::is_debugger_attached()) { C4_DEBUG_BREAK(); } \
-         this->_err("ERROR: " fmt, ## __VA_ARGS__); } while(0)
-#endif
+public:
 
-#define _c4prsp(sp) sp
-#define _c4presc(s) __c4presc(s.str, s.len)
-inline c4::csubstr _c4prc(const char &C4_RESTRICT c)
-{
-    switch(c)
-    {
-    case '\n': return c4::csubstr("\\n");
-    case '\t': return c4::csubstr("\\t");
-    case '\0': return c4::csubstr("\\0");
-    case '\r': return c4::csubstr("\\r");
-    case '\f': return c4::csubstr("\\f");
-    case '\b': return c4::csubstr("\\b");
-    case '\v': return c4::csubstr("\\v");
-    case '\a': return c4::csubstr("\\a");
-    default: return c4::csubstr(&c, 1);
-    }
-}
-inline void __c4presc(const char *s, size_t len)
-{
-    size_t prev = 0;
-    for(size_t i = 0; i < len; ++i)
-    {
-        switch(s[i])
-        {
-        case '\n'  : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('n'); putchar('\n'); prev = i+1; break;
-        case '\t'  : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('t'); prev = i+1; break;
-        case '\0'  : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('0'); prev = i+1; break;
-        case '\r'  : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('r'); prev = i+1; break;
-        case '\f'  : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('f'); prev = i+1; break;
-        case '\b'  : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('b'); prev = i+1; break;
-        case '\v'  : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('v'); prev = i+1; break;
-        case '\a'  : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('a'); prev = i+1; break;
-        case '\x1b': fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('e'); prev = i+1; break;
-        case -0x3e/*0xc2u*/:
-            if(i+1 < len)
-            {
-                if(s[i+1] == -0x60/*0xa0u*/)
-                {
-                    fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('_'); prev = i+2; ++i;
-                }
-                else if(s[i+1] == -0x7b/*0x85u*/)
-                {
-                    fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('N'); prev = i+2; ++i;
-                }
-                break;
-            }
-        case -0x1e/*0xe2u*/:
-            if(i+2 < len && s[i+1] == -0x80/*0x80u*/)
-            {
-                if(s[i+2] == -0x58/*0xa8u*/)
-                {
-                    fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('L'); prev = i+3; i += 2;
-                }
-                else if(s[i+2] == -0x57/*0xa9u*/)
-                {
-                    fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('P'); prev = i+3; i += 2;
-                }
-                break;
-            }
-        }
-    }
-    fwrite(s + prev, 1, len - prev, stdout);
-}
+    /** @name assignment */
+    /** @{ */
 
-#pragma clang diagnostic pop
-#pragma GCC diagnostic pop
+    ConstNodeRef& operator= (std::nullptr_t) noexcept { m_tree = nullptr; m_id = NONE; return *this; }
 
-#if defined(_MSC_VER)
-#   pragma warning(pop)
-#endif
+    ConstNodeRef& operator= (ConstNodeRef const&) noexcept = default;
+    ConstNodeRef& operator= (ConstNodeRef     &&) noexcept = default;
 
+    ConstNodeRef& operator= (NodeRef const&) noexcept;
+    ConstNodeRef& operator= (NodeRef     &&) noexcept;
 
-#endif /* _C4_YML_DETAIL_PARSER_DBG_HPP_ */
 
+    /** @} */
 
-// (end https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp)
+public:
 
-#define C4_YML_EMIT_DEF_HPP_
+    /** @name state queries
+     *
+     * see @ref NodeRef for an explanation on what these states mean */
+    /** @{ */
 
+    C4_ALWAYS_INLINE bool invalid() const noexcept { return (!m_tree) || (m_id == NONE); }
+    /** because a ConstNodeRef cannot be used to write to the tree,
+     * readable() has the same meaning as !invalid() */
+    C4_ALWAYS_INLINE bool readable() const noexcept { return m_tree != nullptr && m_id != NONE; }
+    /** because a ConstNodeRef cannot be used to write to the tree, it can never be a seed.
+     * This method is provided for API equivalence between ConstNodeRef and NodeRef. */
+    constexpr static C4_ALWAYS_INLINE bool is_seed() noexcept { return false; }
 
+    RYML_DEPRECATED("use one of readable(), is_seed() or !invalid()") bool valid() const noexcept { return m_tree != nullptr && m_id != NONE; }
 
-//********************************************************************************
-//--------------------------------------------------------------------------------
-// src/c4/yml/emit.hpp
-// https://github.com/biojppm/rapidyaml/src/c4/yml/emit.hpp
-//--------------------------------------------------------------------------------
-//********************************************************************************
+    /** @} */
 
-#ifndef _C4_YML_EMIT_HPP_
-#define _C4_YML_EMIT_HPP_
+public:
 
-#ifndef _C4_YML_WRITER_HPP_
-#include "./writer.hpp"
-#endif
+    /** @name member getters */
+    /** @{ */
 
-#ifndef _C4_YML_TREE_HPP_
-#include "./tree.hpp"
-#endif
+    C4_ALWAYS_INLINE Tree const* tree() const noexcept { return m_tree; }
+    C4_ALWAYS_INLINE id_type id() const noexcept { return m_id; }
 
-#ifndef _C4_YML_NODE_HPP_
-#include "./node.hpp"
-#endif
+    /** @} */
 
-namespace c4 {
-namespace yml {
+public:
 
-template<class Writer> class Emitter;
+    /** @name comparisons */
+    /** @{ */
 
-template<class OStream>
-using EmitterOStream = Emitter<WriterOStream<OStream>>;
-using EmitterFile = Emitter<WriterFile>;
-using EmitterBuf  = Emitter<WriterBuf>;
+    C4_ALWAYS_INLINE bool operator== (ConstNodeRef const& that) const RYML_NOEXCEPT { return that.m_tree == m_tree && m_id == that.m_id; }
+    C4_ALWAYS_INLINE bool operator!= (ConstNodeRef const& that) const RYML_NOEXCEPT { return ! this->operator== (that); }
 
-typedef enum {
-    EMIT_YAML = 0,
-    EMIT_JSON = 1
-} EmitType_e;
+    /** @cond dev */
+    RYML_DEPRECATED("use invalid()")  bool operator== (std::nullptr_t) const noexcept { return m_tree == nullptr || m_id == NONE; }
+    RYML_DEPRECATED("use !invalid()") bool operator!= (std::nullptr_t) const noexcept { return !(m_tree == nullptr || m_id == NONE); }
 
+    RYML_DEPRECATED("use (this->val() == s)") bool operator== (csubstr s) const RYML_NOEXCEPT { RYML_ASSERT(m_tree); _RYML_CB_ASSERT(m_tree->m_callbacks, m_id != NONE); return m_tree->val(m_id) == s; }
+    RYML_DEPRECATED("use (this->val() != s)") bool operator!= (csubstr s) const RYML_NOEXCEPT { RYML_ASSERT(m_tree); _RYML_CB_ASSERT(m_tree->m_callbacks, m_id != NONE); return m_tree->val(m_id) != s; }
+    /** @endcond */
+
+    /** @} */
 
-/** mark a tree or node to be emitted as json */
-struct as_json
-{
-    Tree const* tree;
-    size_t node;
-    as_json(Tree const& t) : tree(&t), node(t.empty() ? NONE : t.root_id()) {}
-    as_json(Tree const& t, size_t id) : tree(&t), node(id) {}
-    as_json(NodeRef const& n) : tree(n.tree()), node(n.id()) {}
 };
 
 
@@ -20978,5880 +23625,14156 @@ struct as_json
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-template<class Writer>
-class Emitter : public Writer
+/** A reference to a node in an existing yaml tree, offering a more
+ * convenient API than the index-based API used in the tree.
+ *
+ * Unlike its imutable ConstNodeRef peer, a NodeRef can be used to
+ * mutate the tree, both by writing to existing nodes and by creating
+ * new nodes to subsequently write to. Semantically, a NodeRef
+ * object can be in one of three states:
+ *
+ * ```text
+ * invalid  := not pointing at anything
+ * readable := points at an existing tree/node
+ * seed     := points at an existing tree, and the node
+ *             may come to exist, if we write to it.
+ * ```
+ *
+ * So both `readable` and `seed` are states where the node is also `valid`.
+ *
+ * ```cpp
+ * Tree t = parse_in_arena("{a: b}");
+ * NodeRef invalid; // not pointing at anything.
+ * NodeRef readable = t["a"]; // also valid, because "a" exists
+ * NodeRef seed = t["none"]; // also valid, but is seed because "none" is not in the map
+ * ```
+ *
+ * When the object is in seed state, using it to read from the tree is
+ * UB. The seed node can be used to write to the tree, provided that
+ * its create() method is called prior to writing, which happens in
+ * most modifying methods in NodeRef.
+ *
+ * It is the owners's responsibility to verify that an existing
+ * node is readable before subsequently using it to read from the
+ * tree.
+ *
+ * @warning The lifetime of the tree must be larger than that of this
+ * object. It is up to the user to ensure that this happens.
+ */
+class RYML_EXPORT NodeRef : public detail::RoNodeMethods<NodeRef, ConstNodeRef>
 {
 public:
 
-    using Writer::Writer;
-
-    /** emit!
-     *
-     * When writing to a buffer, returns a substr of the emitted YAML.
-     * If the given buffer has insufficient space, the returned span will
-     * be null and its size will be the needed space. No writes are done
-     * after the end of the buffer.
-     *
-     * When writing to a file, the returned substr will be null, but its
-     * length will be set to the number of bytes written. */
-    substr emit(EmitType_e type, Tree const& t, size_t id, bool error_on_excess);
-    /** emit starting at the root node */
-    substr emit(EmitType_e type, Tree const& t, bool error_on_excess=true);
-    /** emit the given node */
-    substr emit(EmitType_e type, NodeRef const& n, bool error_on_excess=true);
+    using tree_type = Tree;
+    using base_type = detail::RoNodeMethods<NodeRef, ConstNodeRef>;
 
 private:
 
-    Tree const* C4_RESTRICT m_tree;
+    Tree *C4_RESTRICT m_tree;
+    id_type m_id;
 
-    void _emit_yaml(size_t id);
-    void _do_visit_flow_sl(size_t id, size_t ilevel=0);
-    void _do_visit_flow_ml(size_t id, size_t ilevel=0, size_t do_indent=1);
-    void _do_visit_block(size_t id, size_t ilevel=0, size_t do_indent=1);
-    void _do_visit_block_container(size_t id, size_t next_level, size_t do_indent);
-    void _do_visit_json(size_t id);
+    /** This member is used to enable lazy operator[] writing. When a child
+     * with a key or index is not found, m_id is set to the id of the parent
+     * and the asked-for key or index are stored in this member until a write
+     * does happen. Then it is given as key or index for creating the child.
+     * When a key is used, the csubstr stores it (so the csubstr's string is
+     * non-null and the csubstr's size is different from NONE). When an index is
+     * used instead, the csubstr's string is set to null, and only the csubstr's
+     * size is set to a value different from NONE. Otherwise, when operator[]
+     * does find the child then this member is empty: the string is null and
+     * the size is NONE. */
+    csubstr m_seed;
 
-private:
+    friend ConstNodeRef;
+    friend struct detail::RoNodeMethods<NodeRef, ConstNodeRef>;
 
-    void _write(NodeScalar const& C4_RESTRICT sc, NodeType flags, size_t level);
-    void _write_json(NodeScalar const& C4_RESTRICT sc, NodeType flags);
+    // require valid: a helper macro, undefined at the end
+    #define _C4RR()                                                         \
+        RYML_ASSERT(m_tree != nullptr);                                     \
+        _RYML_CB_ASSERT(m_tree->m_callbacks, m_id != NONE && !is_seed())
+    // require id: a helper macro, undefined at the end
+    #define _C4RID()                                                        \
+        RYML_ASSERT(m_tree != nullptr);                                     \
+        _RYML_CB_ASSERT(m_tree->m_callbacks, m_id != NONE)
 
-    void _write_doc(size_t id);
-    void _write_scalar(csubstr s, bool was_quoted);
-    void _write_scalar_json(csubstr s, bool as_key, bool was_quoted);
-    void _write_scalar_literal(csubstr s, size_t level, bool as_key, bool explicit_indentation=false);
-    void _write_scalar_folded(csubstr s, size_t level, bool as_key);
-    void _write_scalar_squo(csubstr s, size_t level);
-    void _write_scalar_dquo(csubstr s, size_t level);
-    void _write_scalar_plain(csubstr s, size_t level);
+public:
 
-    void _write_tag(csubstr tag)
-    {
-        if(!tag.begins_with('!'))
-            this->Writer::_do_write('!');
-        this->Writer::_do_write(tag);
-    }
+    /** @name construction */
+    /** @{ */
 
-    enum : type_bits {
-        _keysc =  (KEY|KEYREF|KEYANCH|KEYQUO|_WIP_KEY_STYLE) | ~(VAL|VALREF|VALANCH|VALQUO|_WIP_VAL_STYLE),
-        _valsc = ~(KEY|KEYREF|KEYANCH|KEYQUO|_WIP_KEY_STYLE) |  (VAL|VALREF|VALANCH|VALQUO|_WIP_VAL_STYLE),
-        _keysc_json =  (KEY)  | ~(VAL),
-        _valsc_json = ~(KEY)  |  (VAL),
-    };
+    NodeRef() noexcept : m_tree(nullptr), m_id(NONE), m_seed() { _clear_seed(); }
+    NodeRef(Tree &t) noexcept : m_tree(&t), m_id(t .root_id()), m_seed() { _clear_seed(); }
+    NodeRef(Tree *t) noexcept : m_tree(t ), m_id(t->root_id()), m_seed() { _clear_seed(); }
+    NodeRef(Tree *t, id_type id) noexcept : m_tree(t), m_id(id), m_seed() { _clear_seed(); }
+    NodeRef(Tree *t, id_type id, id_type seed_pos) noexcept : m_tree(t), m_id(id), m_seed() { m_seed.str = nullptr; m_seed.len = (size_t)seed_pos; }
+    NodeRef(Tree *t, id_type id, csubstr  seed_key) noexcept : m_tree(t), m_id(id), m_seed(seed_key) {}
+    NodeRef(std::nullptr_t) noexcept : m_tree(nullptr), m_id(NONE), m_seed() {}
 
-    C4_ALWAYS_INLINE void _writek(size_t id, size_t level) { _write(m_tree->keysc(id), m_tree->_p(id)->m_type.type & ~_valsc, level); }
-    C4_ALWAYS_INLINE void _writev(size_t id, size_t level) { _write(m_tree->valsc(id), m_tree->_p(id)->m_type.type & ~_keysc, level); }
+    inline void _clear_seed() noexcept { /*do the following manually or an assert is triggered: */ m_seed.str = nullptr; m_seed.len = npos; }
 
-    C4_ALWAYS_INLINE void _writek_json(size_t id) { _write_json(m_tree->keysc(id), m_tree->_p(id)->m_type.type & ~(VAL)); }
-    C4_ALWAYS_INLINE void _writev_json(size_t id) { _write_json(m_tree->valsc(id), m_tree->_p(id)->m_type.type & ~(KEY)); }
+    /** @} */
 
-};
+public:
 
+    /** @name assignment */
+    /** @{ */
 
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
+    NodeRef(NodeRef const&) noexcept = default;
+    NodeRef(NodeRef     &&) noexcept = default;
 
-/** emit YAML to the given file. A null file defaults to stdout.
- * Return the number of bytes written. */
-inline size_t emit(Tree const& t, size_t id, FILE *f)
-{
-    EmitterFile em(f);
-    return em.emit(EMIT_YAML, t, id, /*error_on_excess*/true).len;
-}
-/** emit JSON to the given file. A null file defaults to stdout.
- * Return the number of bytes written. */
-inline size_t emit_json(Tree const& t, size_t id, FILE *f)
-{
-    EmitterFile em(f);
-    return em.emit(EMIT_JSON, t, id, /*error_on_excess*/true).len;
-}
+    NodeRef& operator= (NodeRef const&) noexcept = default;
+    NodeRef& operator= (NodeRef     &&) noexcept = default;
 
+    /** @} */
 
-/** emit YAML to the given file. A null file defaults to stdout.
- * Return the number of bytes written.
- * @overload */
-inline size_t emit(Tree const& t, FILE *f=nullptr)
-{
-    EmitterFile em(f);
-    return em.emit(EMIT_YAML, t, /*error_on_excess*/true).len;
-}
+public:
 
-/** emit JSON to the given file. A null file defaults to stdout.
- * Return the number of bytes written.
- * @overload */
-inline size_t emit_json(Tree const& t, FILE *f=nullptr)
-{
-    EmitterFile em(f);
-    return em.emit(EMIT_JSON, t, /*error_on_excess*/true).len;
-}
+    /** @name state_queries
+     * @{ */
 
+    /** true if the object is not referring to any existing or seed node. @see the doc for @ref NodeRef */
+    inline bool invalid() const noexcept { return m_tree == nullptr || m_id == NONE; }
+    /** true if the object is not invalid and in seed state. @see the doc for @ref NodeRef */
+    inline bool is_seed() const noexcept { return (m_tree != NULL && m_id != NONE) && (m_seed.str != nullptr || m_seed.len != (size_t)NONE); }
+    /** true if the object is not invalid and not in seed state. @see the doc for @ref NodeRef */
+    inline bool readable() const noexcept { return (m_tree != NULL && m_id != NONE) && (m_seed.str == nullptr && m_seed.len == (size_t)NONE); }
 
-/** emit YAML to the given file. A null file defaults to stdout.
- * Return the number of bytes written.
- * @overload */
-inline size_t emit(NodeRef const& r, FILE *f=nullptr)
-{
-    EmitterFile em(f);
-    return em.emit(EMIT_YAML, r, /*error_on_excess*/true).len;
-}
+    RYML_DEPRECATED("use one of readable(), is_seed() or !invalid()") inline bool valid() const { return m_tree != nullptr && m_id != NONE; }
 
-/** emit JSON to the given file. A null file defaults to stdout.
- * Return the number of bytes written.
- * @overload */
-inline size_t emit_json(NodeRef const& r, FILE *f=nullptr)
-{
-    EmitterFile em(f);
-    return em.emit(EMIT_JSON, r, /*error_on_excess*/true).len;
-}
+    /** @} */
 
+public:
 
-//-----------------------------------------------------------------------------
+    /** @name comparisons */
+    /** @{ */
 
-/** emit YAML to an STL-like ostream */
-template<class OStream>
-inline OStream& operator<< (OStream& s, Tree const& t)
-{
-    EmitterOStream<OStream> em(s);
-    em.emit(EMIT_YAML, t);
-    return s;
-}
+    bool operator== (NodeRef const& that) const
+    {
+        if(m_tree == that.m_tree && m_id == that.m_id)
+        {
+            bool seed = is_seed();
+            if(seed == that.is_seed())
+            {
+                if(seed)
+                {
+                    return (m_seed.len == that.m_seed.len)
+                        && (m_seed.str == that.m_seed.str
+                            || m_seed == that.m_seed); // do strcmp only in the last resort
+                }
+                return true;
+            }
+        }
+        return false;
+    }
+    inline bool operator!= (NodeRef const& that) const { return ! this->operator==(that); }
 
-/** emit YAML to an STL-like ostream
- * @overload */
-template<class OStream>
-inline OStream& operator<< (OStream& s, NodeRef const& n)
-{
-    EmitterOStream<OStream> em(s);
-    em.emit(EMIT_YAML, n);
-    return s;
-}
+    inline bool operator== (ConstNodeRef const& that) const { return m_tree == that.m_tree && m_id == that.m_id && !is_seed(); }
+    inline bool operator!= (ConstNodeRef const& that) const { return ! this->operator==(that); }
 
-/** emit json to an STL-like stream */
-template<class OStream>
-inline OStream& operator<< (OStream& s, as_json const& j)
-{
-    EmitterOStream<OStream> em(s);
-    em.emit(EMIT_JSON, *j.tree, j.node, true);
-    return s;
-}
+    /** @cond dev */
+    RYML_DEPRECATED("use !readable()") bool operator== (std::nullptr_t) const { return m_tree == nullptr || m_id == NONE || is_seed(); }
+    RYML_DEPRECATED("use readable()")  bool operator!= (std::nullptr_t) const { return !(m_tree == nullptr || m_id == NONE || is_seed()); }
 
+    RYML_DEPRECATED("use `this->val() == s`") bool operator== (csubstr s) const { _C4RR(); _RYML_CB_ASSERT(m_tree->m_callbacks, has_val()); return m_tree->val(m_id) == s; }
+    RYML_DEPRECATED("use `this->val() != s`") bool operator!= (csubstr s) const { _C4RR(); _RYML_CB_ASSERT(m_tree->m_callbacks, has_val()); return m_tree->val(m_id) != s; }
+    /** @endcond */
 
-//-----------------------------------------------------------------------------
+public:
 
+    /** @name node_property_getters
+     * @{ */
 
-/** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML.
- * @param error_on_excess Raise an error if the space in the buffer is insufficient.
- * @overload */
-inline substr emit(Tree const& t, size_t id, substr buf, bool error_on_excess=true)
-{
-    EmitterBuf em(buf);
-    return em.emit(EMIT_YAML, t, id, error_on_excess);
-}
+    C4_ALWAYS_INLINE Tree * tree() noexcept { return m_tree; }
+    C4_ALWAYS_INLINE Tree const* tree() const noexcept { return m_tree; }
 
-/** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON.
- * @param error_on_excess Raise an error if the space in the buffer is insufficient.
- * @overload */
-inline substr emit_json(Tree const& t, size_t id, substr buf, bool error_on_excess=true)
-{
-    EmitterBuf em(buf);
-    return em.emit(EMIT_JSON, t, id, error_on_excess);
-}
+    C4_ALWAYS_INLINE id_type id() const noexcept { return m_id; }
 
+    /** @} */
 
-/** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML.
- * @param error_on_excess Raise an error if the space in the buffer is insufficient.
- * @overload */
-inline substr emit(Tree const& t, substr buf, bool error_on_excess=true)
-{
-    EmitterBuf em(buf);
-    return em.emit(EMIT_YAML, t, error_on_excess);
-}
+public:
 
-/** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON.
- * @param error_on_excess Raise an error if the space in the buffer is insufficient.
- * @overload */
-inline substr emit_json(Tree const& t, substr buf, bool error_on_excess=true)
-{
-    EmitterBuf em(buf);
-    return em.emit(EMIT_JSON, t, error_on_excess);
-}
+    /** @name node_modifiers */
+    /** @{ */
 
+    void create() { _apply_seed(); }
 
-/** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML.
- * @param error_on_excess Raise an error if the space in the buffer is insufficient.
- * @overload
- */
-inline substr emit(NodeRef const& r, substr buf, bool error_on_excess=true)
-{
-    EmitterBuf em(buf);
-    return em.emit(EMIT_YAML, r, error_on_excess);
-}
+    void change_type(NodeType t) { _C4RR(); m_tree->change_type(m_id, t); }
 
-/** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON.
- * @param error_on_excess Raise an error if the space in the buffer is insufficient.
- * @overload
- */
-inline substr emit_json(NodeRef const& r, substr buf, bool error_on_excess=true)
-{
-    EmitterBuf em(buf);
-    return em.emit(EMIT_JSON, r, error_on_excess);
-}
+    void set_type(NodeType t) { _apply_seed(); m_tree->_set_flags(m_id, t); }
+    void set_key(csubstr key) { _apply_seed(); m_tree->_set_key(m_id, key); }
+    void set_val(csubstr val) { _apply_seed(); m_tree->_set_val(m_id, val); }
+    void set_key_tag(csubstr key_tag) { _apply_seed(); m_tree->set_key_tag(m_id, key_tag); }
+    void set_val_tag(csubstr val_tag) { _apply_seed(); m_tree->set_val_tag(m_id, val_tag); }
+    void set_key_anchor(csubstr key_anchor) { _apply_seed(); m_tree->set_key_anchor(m_id, key_anchor); }
+    void set_val_anchor(csubstr val_anchor) { _apply_seed(); m_tree->set_val_anchor(m_id, val_anchor); }
+    void set_key_ref(csubstr key_ref) { _apply_seed(); m_tree->set_key_ref(m_id, key_ref); }
+    void set_val_ref(csubstr val_ref) { _apply_seed(); m_tree->set_val_ref(m_id, val_ref); }
 
+    void set_container_style(NodeType_e style) { _C4RR(); m_tree->set_container_style(m_id, style); }
+    void set_key_style(NodeType_e style) { _C4RR(); m_tree->set_key_style(m_id, style); }
+    void set_val_style(NodeType_e style) { _C4RR(); m_tree->set_val_style(m_id, style); }
 
-//-----------------------------------------------------------------------------
+public:
 
-/** emit+resize: emit YAML to the given std::string/std::vector-like
- * container, resizing it as needed to fit the emitted YAML. */
-template<class CharOwningContainer>
-substr emitrs(Tree const& t, size_t id, CharOwningContainer * cont)
-{
-    substr buf = to_substr(*cont);
-    substr ret = emit(t, id, buf, /*error_on_excess*/false);
-    if(ret.str == nullptr && ret.len > 0)
+    inline void clear()
     {
-        cont->resize(ret.len);
-        buf = to_substr(*cont);
-        ret = emit(t, id, buf, /*error_on_excess*/true);
+        if(is_seed())
+            return;
+        m_tree->remove_children(m_id);
+        m_tree->_clear(m_id);
     }
-    return ret;
-}
 
-/** emit+resize: emit JSON to the given std::string/std::vector-like
- * container, resizing it as needed to fit the emitted JSON. */
-template<class CharOwningContainer>
-substr emitrs_json(Tree const& t, size_t id, CharOwningContainer * cont)
-{
-    substr buf = to_substr(*cont);
-    substr ret = emit_json(t, id, buf, /*error_on_excess*/false);
-    if(ret.str == nullptr && ret.len > 0)
+    inline void clear_key()
     {
-        cont->resize(ret.len);
-        buf = to_substr(*cont);
-        ret = emit_json(t, id, buf, /*error_on_excess*/true);
+        if(is_seed())
+            return;
+        m_tree->_clear_key(m_id);
     }
-    return ret;
-}
 
+    inline void clear_val()
+    {
+        if(is_seed())
+            return;
+        m_tree->_clear_val(m_id);
+    }
 
-/** emit+resize: emit YAML to the given std::string/std::vector-like
- * container, resizing it as needed to fit the emitted YAML. */
-template<class CharOwningContainer>
-CharOwningContainer emitrs(Tree const& t, size_t id)
-{
-    CharOwningContainer c;
-    emitrs(t, id, &c);
-    return c;
-}
+    inline void clear_children()
+    {
+        if(is_seed())
+            return;
+        m_tree->remove_children(m_id);
+    }
 
-/** emit+resize: emit JSON to the given std::string/std::vector-like container,
- * resizing it as needed to fit the emitted JSON. */
-template<class CharOwningContainer>
-CharOwningContainer emitrs_json(Tree const& t, size_t id)
-{
-    CharOwningContainer c;
-    emitrs_json(t, id, &c);
-    return c;
-}
+    inline void operator= (NodeType_e t)
+    {
+        _apply_seed();
+        m_tree->_add_flags(m_id, t);
+    }
 
+    inline void operator|= (NodeType_e t)
+    {
+        _apply_seed();
+        m_tree->_add_flags(m_id, t);
+    }
 
-/** emit+resize: YAML to the given std::string/std::vector-like container,
- * resizing it as needed to fit the emitted YAML. */
-template<class CharOwningContainer>
-substr emitrs(Tree const& t, CharOwningContainer * cont)
-{
-    if(t.empty())
-        return {};
-    return emitrs(t, t.root_id(), cont);
-}
+    inline void operator= (NodeInit const& v)
+    {
+        _apply_seed();
+        _apply(v);
+    }
 
-/** emit+resize: JSON to the given std::string/std::vector-like container,
- * resizing it as needed to fit the emitted JSON. */
-template<class CharOwningContainer>
-substr emitrs_json(Tree const& t, CharOwningContainer * cont)
-{
-    if(t.empty())
-        return {};
-    return emitrs_json(t, t.root_id(), cont);
-}
+    inline void operator= (NodeScalar const& v)
+    {
+        _apply_seed();
+        _apply(v);
+    }
 
+    inline void operator= (std::nullptr_t)
+    {
+        _apply_seed();
+        _apply(csubstr{});
+    }
 
-/** emit+resize: YAML to the given std::string/std::vector-like container,
- * resizing it as needed to fit the emitted YAML. */
-template<class CharOwningContainer>
-CharOwningContainer emitrs(Tree const& t)
-{
-    CharOwningContainer c;
-    if(t.empty())
-        return c;
-    emitrs(t, t.root_id(), &c);
-    return c;
-}
+    inline void operator= (csubstr v)
+    {
+        _apply_seed();
+        _apply(v);
+    }
 
-/** emit+resize: JSON to the given std::string/std::vector-like container,
- * resizing it as needed to fit the emitted JSON. */
-template<class CharOwningContainer>
-CharOwningContainer emitrs_json(Tree const& t)
-{
-    CharOwningContainer c;
-    if(t.empty())
-        return c;
-    emitrs_json(t, t.root_id(), &c);
-    return c;
-}
+    template<size_t N>
+    inline void operator= (const char (&v)[N])
+    {
+        _apply_seed();
+        csubstr sv;
+        sv.assign<N>(v);
+        _apply(sv);
+    }
 
+    /** @} */
 
-/** emit+resize: YAML to the given std::string/std::vector-like container,
- * resizing it as needed to fit the emitted YAML. */
-template<class CharOwningContainer>
-substr emitrs(NodeRef const& n, CharOwningContainer * cont)
-{
-    _RYML_CB_CHECK(n.tree()->callbacks(), n.valid());
-    return emitrs(*n.tree(), n.id(), cont);
-}
+public:
 
-/** emit+resize: JSON to the given std::string/std::vector-like container,
- * resizing it as needed to fit the emitted JSON. */
-template<class CharOwningContainer>
-substr emitrs_json(NodeRef const& n, CharOwningContainer * cont)
-{
-    _RYML_CB_CHECK(n.tree()->callbacks(), n.valid());
-    return emitrs_json(*n.tree(), n.id(), cont);
-}
+    /** @name serialization */
+    /** @{ */
 
+    /** serialize a variable to the arena */
+    template<class T>
+    inline csubstr to_arena(T const& C4_RESTRICT s)
+    {
+        RYML_ASSERT(m_tree); // no need for valid or readable
+        return m_tree->to_arena(s);
+    }
 
-/** emit+resize: YAML to the given std::string/std::vector-like container,
- * resizing it as needed to fit the emitted YAML. */
-template<class CharOwningContainer>
-CharOwningContainer emitrs(NodeRef const& n)
-{
-    _RYML_CB_CHECK(n.tree()->callbacks(), n.valid());
-    CharOwningContainer c;
-    emitrs(*n.tree(), n.id(), &c);
-    return c;
-}
+    template<class T>
+    size_t set_key_serialized(T const& C4_RESTRICT k)
+    {
+        _apply_seed();
+        csubstr s = m_tree->to_arena(k);
+        m_tree->_set_key(m_id, s);
+        return s.len;
+    }
+    size_t set_key_serialized(std::nullptr_t)
+    {
+        _apply_seed();
+        m_tree->_set_key(m_id, csubstr{});
+        return 0;
+    }
 
-/** emit+resize: JSON to the given std::string/std::vector-like container,
- * resizing it as needed to fit the emitted JSON. */
-template<class CharOwningContainer>
-CharOwningContainer emitrs_json(NodeRef const& n)
-{
-    _RYML_CB_CHECK(n.tree()->callbacks(), n.valid());
-    CharOwningContainer c;
-    emitrs_json(*n.tree(), n.id(), &c);
-    return c;
-}
+    template<class T>
+    size_t set_val_serialized(T const& C4_RESTRICT v)
+    {
+        _apply_seed();
+        csubstr s = m_tree->to_arena(v);
+        m_tree->_set_val(m_id, s);
+        return s.len;
+    }
+    size_t set_val_serialized(std::nullptr_t)
+    {
+        _apply_seed();
+        m_tree->_set_val(m_id, csubstr{});
+        return 0;
+    }
 
-} // namespace yml
-} // namespace c4
-
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/yml/emit.def.hpp
-//#include "c4/yml/emit.def.hpp"
-#if !defined(C4_YML_EMIT_DEF_HPP_) && !defined(_C4_YML_EMIT_DEF_HPP_)
-#error "amalgamate: file c4/yml/emit.def.hpp must have been included at this point"
-#endif /* C4_YML_EMIT_DEF_HPP_ */
-
-
-#endif /* _C4_YML_EMIT_HPP_ */
-
-
-// (end https://github.com/biojppm/rapidyaml/src/c4/yml/emit.hpp)
-
-
-
-//********************************************************************************
-//--------------------------------------------------------------------------------
-// src/c4/yml/emit.def.hpp
-// https://github.com/biojppm/rapidyaml/src/c4/yml/emit.def.hpp
-//--------------------------------------------------------------------------------
-//********************************************************************************
-
-#ifndef _C4_YML_EMIT_DEF_HPP_
-#define _C4_YML_EMIT_DEF_HPP_
+    /** encode a blob as base64 into the tree's arena, then assign the
+     * result to the node's key
+     * @return the size of base64-encoded blob */
+    size_t set_key_serialized(fmt::const_base64_wrapper w);
+    /** encode a blob as base64 into the tree's arena, then assign the
+     * result to the node's val
+     * @return the size of base64-encoded blob */
+    size_t set_val_serialized(fmt::const_base64_wrapper w);
 
-#ifndef _C4_YML_EMIT_HPP_
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/yml/emit.hpp
-//#include "c4/yml/emit.hpp"
-#if !defined(C4_YML_EMIT_HPP_) && !defined(_C4_YML_EMIT_HPP_)
-#error "amalgamate: file c4/yml/emit.hpp must have been included at this point"
-#endif /* C4_YML_EMIT_HPP_ */
+    /** serialize a variable, then assign the result to the node's val */
+    inline NodeRef& operator<< (csubstr s)
+    {
+        // this overload is needed to prevent ambiguity (there's also
+        // operator<< for writing a substr to a stream)
+        _apply_seed();
+        write(this, s);
+        _RYML_CB_ASSERT(m_tree->m_callbacks, val() == s);
+        return *this;
+    }
 
-#endif
+    template<class T>
+    inline NodeRef& operator<< (T const& C4_RESTRICT v)
+    {
+        _apply_seed();
+        write(this, v);
+        return *this;
+    }
 
-namespace c4 {
-namespace yml {
+    /** serialize a variable, then assign the result to the node's key */
+    template<class T>
+    inline NodeRef& operator<< (Key<const T> const& C4_RESTRICT v)
+    {
+        _apply_seed();
+        set_key_serialized(v.k);
+        return *this;
+    }
 
-template<class Writer>
-substr Emitter<Writer>::emit(EmitType_e type, Tree const& t, size_t id, bool error_on_excess)
-{
-    if(t.empty())
+    /** serialize a variable, then assign the result to the node's key */
+    template<class T>
+    inline NodeRef& operator<< (Key<T> const& C4_RESTRICT v)
     {
-        _RYML_CB_ASSERT(t.callbacks(), id == NONE);
-        return {};
+        _apply_seed();
+        set_key_serialized(v.k);
+        return *this;
     }
-    _RYML_CB_CHECK(t.callbacks(), id < t.size());
-    m_tree = &t;
-    if(type == EMIT_YAML)
-        _emit_yaml(id);
-    else if(type == EMIT_JSON)
-        _do_visit_json(id);
-    else
-        _RYML_CB_ERR(m_tree->callbacks(), "unknown emit type");
-    return this->Writer::_get(error_on_excess);
-}
 
-template<class Writer>
-substr Emitter<Writer>::emit(EmitType_e type, Tree const& t, bool error_on_excess)
-{
-    if(t.empty())
-        return {};
-    return emit(type, t, t.root_id(), error_on_excess);
-}
+    NodeRef& operator<< (Key<fmt::const_base64_wrapper> w)
+    {
+        set_key_serialized(w.wrapper);
+        return *this;
+    }
 
-template<class Writer>
-substr Emitter<Writer>::emit(EmitType_e type, NodeRef const& n, bool error_on_excess)
-{
-    _RYML_CB_CHECK(n.tree()->callbacks(), n.valid());
-    return emit(type, *n.tree(), n.id(), error_on_excess);
-}
+    NodeRef& operator<< (fmt::const_base64_wrapper w)
+    {
+        set_val_serialized(w);
+        return *this;
+    }
 
+    /** @} */
 
-//-----------------------------------------------------------------------------
+private:
 
-template<class Writer>
-void Emitter<Writer>::_emit_yaml(size_t id)
-{
-    // save branches in the visitor by doing the initial stream/doc
-    // logic here, sparing the need to check stream/val/keyval inside
-    // the visitor functions
-    auto dispatch = [this](size_t node){
-        NodeType ty = m_tree->type(node);
-        if(ty.marked_flow_sl())
-            _do_visit_flow_sl(node, 0);
-        else if(ty.marked_flow_ml())
-            _do_visit_flow_ml(node, 0);
-        else
-        {
-            _do_visit_block(node, 0);
-        }
-    };
-    if(!m_tree->is_root(id))
+    void _apply_seed()
     {
-        if(m_tree->is_container(id) && !m_tree->type(id).marked_flow())
+        _C4RID();
+        if(m_seed.str) // we have a seed key: use it to create the new child
         {
-            size_t ilevel = 0;
-            if(m_tree->has_key(id))
-            {
-                this->Writer::_do_write(m_tree->key(id));
-                this->Writer::_do_write(":\n");
-                ++ilevel;
-            }
-            _do_visit_block_container(id, ilevel, ilevel);
-            return;
+            m_id = m_tree->append_child(m_id);
+            m_tree->_set_key(m_id, m_seed);
+            m_seed.str = nullptr;
+            m_seed.len = (size_t)NONE;
         }
-    }
-
-    auto *btd = m_tree->tag_directives().b;
-    auto *etd = m_tree->tag_directives().e;
-    auto write_tag_directives = [&btd, etd, this](size_t next_node){
-        auto end = btd;
-        while(end < etd)
+        else if(m_seed.len != (size_t)NONE) // we have a seed index: create a child at that position
         {
-            if(end->next_node_id > next_node)
-                break;
-            ++end;
+            _RYML_CB_ASSERT(m_tree->m_callbacks, (size_t)m_tree->num_children(m_id) == m_seed.len);
+            m_id = m_tree->append_child(m_id);
+            m_seed.str = nullptr;
+            m_seed.len = (size_t)NONE;
         }
-        for( ; btd != end; ++btd)
+        else
         {
-            if(next_node != m_tree->first_child(m_tree->parent(next_node)))
-                this->Writer::_do_write("...\n");
-            this->Writer::_do_write("%TAG ");
-            this->Writer::_do_write(btd->handle);
-            this->Writer::_do_write(' ');
-            this->Writer::_do_write(btd->prefix);
-            this->Writer::_do_write('\n');
+            _RYML_CB_ASSERT(m_tree->m_callbacks, readable());
         }
-    };
-    if(m_tree->is_stream(id))
+    }
+
+    inline void _apply(csubstr v)
     {
-        if(m_tree->first_child(id) != NONE)
-            write_tag_directives(m_tree->first_child(id));
-        for(size_t child = m_tree->first_child(id); child != NONE; child = m_tree->next_sibling(child))
-        {
-            dispatch(child);
-            if(m_tree->next_sibling(child) != NONE)
-                write_tag_directives(m_tree->next_sibling(child));
-        }
+        m_tree->_set_val(m_id, v);
     }
-    else if(m_tree->is_container(id))
+
+    inline void _apply(NodeScalar const& v)
     {
-        dispatch(id);
+        m_tree->_set_val(m_id, v);
     }
-    else if(m_tree->is_doc(id))
+
+    inline void _apply(NodeInit const& i)
     {
-        _RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->is_container(id)); // checked above
-        _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_val(id)); // so it must be a val
-        _write_doc(id);
+        m_tree->_set(m_id, i);
     }
-    else if(m_tree->is_keyval(id))
+
+public:
+
+    /** @name modification of hierarchy */
+    /** @{ */
+
+    inline NodeRef insert_child(NodeRef after)
     {
-        _writek(id, 0);
-        this->Writer::_do_write(": ");
-        _writev(id, 0);
-        if(!m_tree->type(id).marked_flow())
-            this->Writer::_do_write('\n');
+        _C4RR();
+        _RYML_CB_ASSERT(m_tree->m_callbacks, after.m_tree == m_tree);
+        NodeRef r(m_tree, m_tree->insert_child(m_id, after.m_id));
+        return r;
     }
-    else if(m_tree->is_val(id))
+
+    inline NodeRef insert_child(NodeInit const& i, NodeRef after)
     {
-        //this->Writer::_do_write("- ");
-        _writev(id, 0);
-        if(!m_tree->type(id).marked_flow())
-            this->Writer::_do_write('\n');
+        _C4RR();
+        _RYML_CB_ASSERT(m_tree->m_callbacks, after.m_tree == m_tree);
+        NodeRef r(m_tree, m_tree->insert_child(m_id, after.m_id));
+        r._apply(i);
+        return r;
     }
-    else if(m_tree->type(id) == NOTYPE)
+
+    inline NodeRef prepend_child()
     {
-        ;
+        _C4RR();
+        NodeRef r(m_tree, m_tree->insert_child(m_id, NONE));
+        return r;
     }
-    else
+
+    inline NodeRef prepend_child(NodeInit const& i)
     {
-        _RYML_CB_ERR(m_tree->callbacks(), "unknown type");
+        _C4RR();
+        NodeRef r(m_tree, m_tree->insert_child(m_id, NONE));
+        r._apply(i);
+        return r;
     }
-}
 
-template<class Writer>
-void Emitter<Writer>::_write_doc(size_t id)
-{
-    RYML_ASSERT(m_tree->is_doc(id));
-    if(!m_tree->is_root(id))
+    inline NodeRef append_child()
     {
-        RYML_ASSERT(m_tree->is_stream(m_tree->parent(id)));
-        this->Writer::_do_write("---");
+        _C4RR();
+        NodeRef r(m_tree, m_tree->append_child(m_id));
+        return r;
     }
-    if(!m_tree->has_val(id)) // this is more frequent
+
+    inline NodeRef append_child(NodeInit const& i)
     {
-        if(m_tree->has_val_tag(id))
-        {
-            if(!m_tree->is_root(id))
-                this->Writer::_do_write(' ');
-            _write_tag(m_tree->val_tag(id));
-        }
-        if(m_tree->has_val_anchor(id))
-        {
-            if(!m_tree->is_root(id))
-                this->Writer::_do_write(' ');
-            this->Writer::_do_write('&');
-            this->Writer::_do_write(m_tree->val_anchor(id));
-        }
+        _C4RR();
+        NodeRef r(m_tree, m_tree->append_child(m_id));
+        r._apply(i);
+        return r;
     }
-    else // docval
+
+    inline NodeRef insert_sibling(ConstNodeRef const& after)
     {
-        RYML_ASSERT(m_tree->has_val(id));
-        RYML_ASSERT(!m_tree->has_key(id));
-        if(!m_tree->is_root(id))
-            this->Writer::_do_write(' ');
-        _writev(id, 0);
+        _C4RR();
+        _RYML_CB_ASSERT(m_tree->m_callbacks, after.m_tree == m_tree);
+        NodeRef r(m_tree, m_tree->insert_sibling(m_id, after.m_id));
+        return r;
     }
-    this->Writer::_do_write('\n');
-}
 
-template<class Writer>
-void Emitter<Writer>::_do_visit_flow_sl(size_t node, size_t ilevel)
-{
-    RYML_ASSERT(!m_tree->is_stream(node));
-    RYML_ASSERT(m_tree->is_container(node) || m_tree->is_doc(node));
-    RYML_ASSERT(m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node)));
+    inline NodeRef insert_sibling(NodeInit const& i, ConstNodeRef const& after)
+    {
+        _C4RR();
+        _RYML_CB_ASSERT(m_tree->m_callbacks, after.m_tree == m_tree);
+        NodeRef r(m_tree, m_tree->insert_sibling(m_id, after.m_id));
+        r._apply(i);
+        return r;
+    }
 
-    if(m_tree->is_doc(node))
+    inline NodeRef prepend_sibling()
     {
-        _write_doc(node);
-        if(!m_tree->has_children(node))
-            return;
+        _C4RR();
+        NodeRef r(m_tree, m_tree->prepend_sibling(m_id));
+        return r;
     }
-    else if(m_tree->is_container(node))
+
+    inline NodeRef prepend_sibling(NodeInit const& i)
     {
-        RYML_ASSERT(m_tree->is_map(node) || m_tree->is_seq(node));
+        _C4RR();
+        NodeRef r(m_tree, m_tree->prepend_sibling(m_id));
+        r._apply(i);
+        return r;
+    }
 
-        bool spc = false; // write a space
-
-        if(m_tree->has_key(node))
-        {
-            _writek(node, ilevel);
-            this->Writer::_do_write(':');
-            spc = true;
-        }
-
-        if(m_tree->has_val_tag(node))
-        {
-            if(spc)
-                this->Writer::_do_write(' ');
-            _write_tag(m_tree->val_tag(node));
-            spc = true;
-        }
-
-        if(m_tree->has_val_anchor(node))
-        {
-            if(spc)
-                this->Writer::_do_write(' ');
-            this->Writer::_do_write('&');
-            this->Writer::_do_write(m_tree->val_anchor(node));
-            spc = true;
-        }
+    inline NodeRef append_sibling()
+    {
+        _C4RR();
+        NodeRef r(m_tree, m_tree->append_sibling(m_id));
+        return r;
+    }
 
-        if(spc)
-            this->Writer::_do_write(' ');
+    inline NodeRef append_sibling(NodeInit const& i)
+    {
+        _C4RR();
+        NodeRef r(m_tree, m_tree->append_sibling(m_id));
+        r._apply(i);
+        return r;
+    }
 
-        if(m_tree->is_map(node))
-        {
-            this->Writer::_do_write('{');
-        }
-        else
-        {
-            _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_seq(node));
-            this->Writer::_do_write('[');
-        }
-    } // container
+public:
 
-    for(size_t child = m_tree->first_child(node), count = 0; child != NONE; child = m_tree->next_sibling(child))
+    inline void remove_child(NodeRef & child)
     {
-        if(count++)
-            this->Writer::_do_write(',');
-        if(m_tree->is_keyval(child))
-        {
-            _writek(child, ilevel);
-            this->Writer::_do_write(": ");
-            _writev(child, ilevel);
-        }
-        else if(m_tree->is_val(child))
-        {
-            _writev(child, ilevel);
-        }
-        else
-        {
-            // with single-line flow, we can never go back to block
-            _do_visit_flow_sl(child, ilevel + 1);
-        }
+        _C4RR();
+        _RYML_CB_ASSERT(m_tree->m_callbacks, has_child(child));
+        _RYML_CB_ASSERT(m_tree->m_callbacks, child.parent().id() == id());
+        m_tree->remove(child.id());
+        child.clear();
     }
 
-    if(m_tree->is_map(node))
+    //! remove the nth child of this node
+    inline void remove_child(id_type pos)
     {
-        this->Writer::_do_write('}');
+        _C4RR();
+        _RYML_CB_ASSERT(m_tree->m_callbacks, pos >= 0 && pos < num_children());
+        id_type child = m_tree->child(m_id, pos);
+        _RYML_CB_ASSERT(m_tree->m_callbacks, child != NONE);
+        m_tree->remove(child);
     }
-    else if(m_tree->is_seq(node))
+
+    //! remove a child by name
+    inline void remove_child(csubstr key)
     {
-        this->Writer::_do_write(']');
+        _C4RR();
+        id_type child = m_tree->find_child(m_id, key);
+        _RYML_CB_ASSERT(m_tree->m_callbacks, child != NONE);
+        m_tree->remove(child);
     }
-}
 
-template<class Writer>
-void Emitter<Writer>::_do_visit_flow_ml(size_t id, size_t ilevel, size_t do_indent)
-{
-    C4_UNUSED(id);
-    C4_UNUSED(ilevel);
-    C4_UNUSED(do_indent);
-    RYML_CHECK(false/*not implemented*/);
-}
+public:
 
-template<class Writer>
-void Emitter<Writer>::_do_visit_block_container(size_t node, size_t next_level, size_t do_indent)
-{
-    RepC ind = indent_to(do_indent * next_level);
+    /** change the node's position within its parent, placing it after
+     * @p after. To move to the first position in the parent, simply
+     * pass an empty or default-constructed reference like this:
+     * `n.move({})`. */
+    inline void move(ConstNodeRef const& after)
+    {
+        _C4RR();
+        m_tree->move(m_id, after.m_id);
+    }
 
-    if(m_tree->is_seq(node))
+    /** move the node to a different @p parent (which may belong to a
+     * different tree), placing it after @p after. When the
+     * destination parent is in a new tree, then this node's tree
+     * pointer is reset to the tree of the parent node. */
+    inline void move(NodeRef const& parent, ConstNodeRef const& after)
     {
-        for(size_t child = m_tree->first_child(node); child != NONE; child = m_tree->next_sibling(child))
+        _C4RR();
+        if(parent.m_tree == m_tree)
         {
-            _RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->has_key(child));
-            if(m_tree->is_val(child))
-            {
-                this->Writer::_do_write(ind);
-                this->Writer::_do_write("- ");
-                _writev(child, next_level);
-                this->Writer::_do_write('\n');
-            }
-            else
-            {
-                _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(child));
-                NodeType ty = m_tree->type(child);
-                if(ty.marked_flow_sl())
-                {
-                    this->Writer::_do_write(ind);
-                    this->Writer::_do_write("- ");
-                    _do_visit_flow_sl(child, 0u);
-                    this->Writer::_do_write('\n');
-                }
-                else if(ty.marked_flow_ml())
-                {
-                    this->Writer::_do_write(ind);
-                    this->Writer::_do_write("- ");
-                    _do_visit_flow_ml(child, next_level, do_indent);
-                    this->Writer::_do_write('\n');
-                }
-                else
-                {
-                    _do_visit_block(child, next_level, do_indent);
-                }
-            }
-            do_indent = true;
-            ind = indent_to(do_indent * next_level);
+            m_tree->move(m_id, parent.m_id, after.m_id);
         }
-    }
-    else // map
-    {
-        _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_map(node));
-        for(size_t ich = m_tree->first_child(node); ich != NONE; ich = m_tree->next_sibling(ich))
+        else
         {
-            _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_key(ich));
-            if(m_tree->is_keyval(ich))
-            {
-                this->Writer::_do_write(ind);
-                _writek(ich, next_level);
-                this->Writer::_do_write(": ");
-                _writev(ich, next_level);
-                this->Writer::_do_write('\n');
-            }
-            else
-            {
-                _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(ich));
-                NodeType ty = m_tree->type(ich);
-                if(ty.marked_flow_sl())
-                {
-                    this->Writer::_do_write(ind);
-                    _do_visit_flow_sl(ich, 0u);
-                    this->Writer::_do_write('\n');
-                }
-                else if(ty.marked_flow_ml())
-                {
-                    this->Writer::_do_write(ind);
-                    _do_visit_flow_ml(ich, 0u);
-                    this->Writer::_do_write('\n');
-                }
-                else
-                {
-                    _do_visit_block(ich, next_level, do_indent);
-                }
-            }
-            do_indent = true;
-            ind = indent_to(do_indent * next_level);
+            parent.m_tree->move(m_tree, m_id, parent.m_id, after.m_id);
+            m_tree = parent.m_tree;
         }
     }
-}
-
-template<class Writer>
-void Emitter<Writer>::_do_visit_block(size_t node, size_t ilevel, size_t do_indent)
-{
-    RYML_ASSERT(!m_tree->is_stream(node));
-    RYML_ASSERT(m_tree->is_container(node) || m_tree->is_doc(node));
-    RYML_ASSERT(m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node)));
-    RepC ind = indent_to(do_indent * ilevel);
 
-    if(m_tree->is_doc(node))
+    /** duplicate the current node somewhere within its parent, and
+     * place it after the node @p after. To place into the first
+     * position of the parent, simply pass an empty or
+     * default-constructed reference like this: `n.move({})`. */
+    inline NodeRef duplicate(ConstNodeRef const& after) const
     {
-        _write_doc(node);
-        if(!m_tree->has_children(node))
-            return;
+        _C4RR();
+        _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree == after.m_tree || after.m_id == NONE);
+        id_type dup = m_tree->duplicate(m_id, m_tree->parent(m_id), after.m_id);
+        NodeRef r(m_tree, dup);
+        return r;
     }
-    else if(m_tree->is_container(node))
-    {
-        RYML_ASSERT(m_tree->is_map(node) || m_tree->is_seq(node));
-
-        bool spc = false; // write a space
-        bool nl = false;  // write a newline
-
-        if(m_tree->has_key(node))
-        {
-            this->Writer::_do_write(ind);
-            _writek(node, ilevel);
-            this->Writer::_do_write(':');
-            spc = true;
-        }
-        else if(!m_tree->is_root(node))
-        {
-            this->Writer::_do_write(ind);
-            this->Writer::_do_write('-');
-            spc = true;
-        }
 
-        if(m_tree->has_val_tag(node))
+    /** duplicate the current node somewhere into a different @p parent
+     * (possibly from a different tree), and place it after the node
+     * @p after. To place into the first position of the parent,
+     * simply pass an empty or default-constructed reference like
+     * this: `n.move({})`. */
+    inline NodeRef duplicate(NodeRef const& parent, ConstNodeRef const& after) const
+    {
+        _C4RR();
+        _RYML_CB_ASSERT(m_tree->m_callbacks, parent.m_tree == after.m_tree || after.m_id == NONE);
+        if(parent.m_tree == m_tree)
         {
-            if(spc)
-                this->Writer::_do_write(' ');
-            _write_tag(m_tree->val_tag(node));
-            spc = true;
-            nl = true;
+            id_type dup = m_tree->duplicate(m_id, parent.m_id, after.m_id);
+            NodeRef r(m_tree, dup);
+            return r;
         }
-
-        if(m_tree->has_val_anchor(node))
+        else
         {
-            if(spc)
-                this->Writer::_do_write(' ');
-            this->Writer::_do_write('&');
-            this->Writer::_do_write(m_tree->val_anchor(node));
-            spc = true;
-            nl = true;
+            id_type dup = parent.m_tree->duplicate(m_tree, m_id, parent.m_id, after.m_id);
+            NodeRef r(parent.m_tree, dup);
+            return r;
         }
+    }
 
-        if(m_tree->has_children(node))
+    inline void duplicate_children(NodeRef const& parent, ConstNodeRef const& after) const
+    {
+        _C4RR();
+        _RYML_CB_ASSERT(m_tree->m_callbacks, parent.m_tree == after.m_tree);
+        if(parent.m_tree == m_tree)
         {
-            if(m_tree->has_key(node))
-                nl = true;
-            else
-                if(!m_tree->is_root(node) && !nl)
-                    spc = true;
+            m_tree->duplicate_children(m_id, parent.m_id, after.m_id);
         }
         else
         {
-            if(m_tree->is_seq(node))
-                this->Writer::_do_write(" []\n");
-            else if(m_tree->is_map(node))
-                this->Writer::_do_write(" {}\n");
-            return;
+            parent.m_tree->duplicate_children(m_tree, m_id, parent.m_id, after.m_id);
         }
+    }
 
-        if(spc && !nl)
-            this->Writer::_do_write(' ');
+    /** @} */
 
-        do_indent = 0;
-        if(nl)
-        {
-            this->Writer::_do_write('\n');
-            do_indent = 1;
-        }
-    } // container
+#undef _C4RR
+#undef _C4RID
+};
 
-    size_t next_level = ilevel + 1;
-    if(m_tree->is_root(node) || m_tree->is_doc(node))
-        next_level = ilevel; // do not indent at top level
 
-    _do_visit_block_container(node, next_level, do_indent);
+//-----------------------------------------------------------------------------
+
+inline ConstNodeRef::ConstNodeRef(NodeRef const& that) noexcept
+    : m_tree(that.m_tree)
+    , m_id(!that.is_seed() ? that.id() : (id_type)NONE)
+{
 }
 
-template<class Writer>
-void Emitter<Writer>::_do_visit_json(size_t id)
+inline ConstNodeRef::ConstNodeRef(NodeRef && that) noexcept
+    : m_tree(that.m_tree)
+    , m_id(!that.is_seed() ? that.id() : (id_type)NONE)
 {
-    _RYML_CB_CHECK(m_tree->callbacks(), !m_tree->is_stream(id)); // JSON does not have streams
-    if(m_tree->is_keyval(id))
-    {
-        _writek_json(id);
-        this->Writer::_do_write(": ");
-        _writev_json(id);
-    }
-    else if(m_tree->is_val(id))
-    {
-        _writev_json(id);
-    }
-    else if(m_tree->is_container(id))
-    {
-        if(m_tree->has_key(id))
-        {
-            _writek_json(id);
-            this->Writer::_do_write(": ");
-        }
-        if(m_tree->is_seq(id))
-            this->Writer::_do_write('[');
-        else if(m_tree->is_map(id))
-            this->Writer::_do_write('{');
-    }  // container
+}
 
-    for(size_t ich = m_tree->first_child(id); ich != NONE; ich = m_tree->next_sibling(ich))
-    {
-        if(ich != m_tree->first_child(id))
-            this->Writer::_do_write(',');
-        _do_visit_json(ich);
-    }
 
-    if(m_tree->is_seq(id))
-        this->Writer::_do_write(']');
-    else if(m_tree->is_map(id))
-        this->Writer::_do_write('}');
+inline ConstNodeRef& ConstNodeRef::operator= (NodeRef const& that) noexcept
+{
+    m_tree = (that.m_tree);
+    m_id = (!that.is_seed() ? that.id() : (id_type)NONE);
+    return *this;
 }
 
-template<class Writer>
-void Emitter<Writer>::_write(NodeScalar const& C4_RESTRICT sc, NodeType flags, size_t ilevel)
+inline ConstNodeRef& ConstNodeRef::operator= (NodeRef && that) noexcept
 {
-    if( ! sc.tag.empty())
+    m_tree = (that.m_tree);
+    m_id = (!that.is_seed() ? that.id() : (id_type)NONE);
+    return *this;
+}
+
+
+//-----------------------------------------------------------------------------
+
+/** @addtogroup doc_serialization_helpers
+ *
+ * @{
+ */
+
+template<class T>
+inline void write(NodeRef *n, T const& v)
+{
+    n->set_val_serialized(v);
+}
+
+namespace detail {
+// SFINAE overloads for skipping leading + which cannot be read by the charconv functions
+template<class T>
+C4_ALWAYS_INLINE auto read_skip_plus(csubstr val, T *v)
+    -> typename std::enable_if<std::is_arithmetic<T>::value, bool>::type
+{
+    if(val.begins_with('+'))
+        val = val.sub(1);
+    return from_chars(val, v);
+}
+template<class T>
+C4_ALWAYS_INLINE auto read_skip_plus(csubstr val, T *v)
+    -> typename std::enable_if< ! std::is_arithmetic<T>::value, bool>::type
+{
+    return from_chars(val, v);
+}
+} // namespace detail
+
+/** convert the val of a scalar node to a particular type, by
+ * forwarding its val to @ref from_chars<T>(). The full string is
+ * used.
+ * @return false if the conversion failed */
+template<class T>
+inline auto read(NodeRef const& n, T *v)
+    -> typename std::enable_if< ! std::is_floating_point<T>::value, bool>::type
+{
+    csubstr val = n.val();
+    if(val.empty())
+        return false;
+    return detail::read_skip_plus(val, v);
+}
+/** convert the val of a scalar node to a particular type, by
+ * forwarding its val to @ref from_chars<T>(). The full string is
+ * used.
+ * @return false if the conversion failed */
+template<class T>
+inline auto read(ConstNodeRef const& n, T *v)
+    -> typename std::enable_if< ! std::is_floating_point<T>::value, bool>::type
+{
+    csubstr val = n.val();
+    if(val.empty())
+        return false;
+    return detail::read_skip_plus(val, v);
+}
+
+/** convert the val of a scalar node to a floating point type, by
+ * forwarding its val to @ref from_chars_float<T>().
+ *
+ * @return false if the conversion failed
+ *
+ * @warning Unlike non-floating types, only the leading part of the
+ * string that may constitute a number is processed. This happens
+ * because the float parsing is delegated to fast_float, which is
+ * implemented that way. Consequently, for example, all of `"34"`,
+ * `"34 "` `"34hg"` `"34 gh"` will be read as 34. If you are not sure
+ * about the contents of the data, you can use
+ * csubstr::first_real_span() to check before calling `>>`, for
+ * example like this:
+ *
+ * ```cpp
+ * csubstr val = node.val();
+ * if(val.first_real_span() == val)
+ *     node >> v;
+ * else
+ *     ERROR("not a real")
+ * ```
+ */
+template<class T>
+typename std::enable_if<std::is_floating_point<T>::value, bool>::type
+inline read(NodeRef const& n, T *v)
+{
+    csubstr val = n.val();
+    if(val.empty())
+        return false;
+    return from_chars_float(val, v);
+}
+/** convert the val of a scalar node to a floating point type, by
+ * forwarding its val to @ref from_chars_float<T>().
+ *
+ * @return false if the conversion failed
+ *
+ * @warning Unlike non-floating types, only the leading part of the
+ * string that may constitute a number is processed. This happens
+ * because the float parsing is delegated to fast_float, which is
+ * implemented that way. Consequently, for example, all of `"34"`,
+ * `"34 "` `"34hg"` `"34 gh"` will be read as 34. If you are not sure
+ * about the contents of the data, you can use
+ * csubstr::first_real_span() to check before calling `>>`, for
+ * example like this:
+ *
+ * ```cpp
+ * csubstr val = node.val();
+ * if(val.first_real_span() == val)
+ *     node >> v;
+ * else
+ *     ERROR("not a real")
+ * ```
+ */
+template<class T>
+typename std::enable_if<std::is_floating_point<T>::value, bool>::type
+inline read(ConstNodeRef const& n, T *v)
+{
+    csubstr val = n.val();
+    if(val.empty())
+        return false;
+    return from_chars_float(val, v);
+}
+
+/** @} */
+
+/** @} */
+
+
+} // namespace yml
+} // namespace c4
+
+
+
+#ifdef __clang__
+#   pragma clang diagnostic pop
+#elif defined(__GNUC__)
+#   pragma GCC diagnostic pop
+#elif defined(_MSC_VER)
+#   pragma warning(pop)
+#endif
+
+#endif /* _C4_YML_NODE_HPP_ */
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/writer.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/writer.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifndef _C4_YML_WRITER_HPP_
+#define _C4_YML_WRITER_HPP_
+
+#ifndef _C4_YML_COMMON_HPP_
+#include "./common.hpp"
+#endif
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/substr.hpp
+//#include <c4/substr.hpp>
+#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_)
+#error "amalgamate: file c4/substr.hpp must have been included at this point"
+#endif /* C4_SUBSTR_HPP_ */
+
+//included above:
+//#include <stdio.h>  // fwrite(), fputc()
+//included above:
+//#include <string.h> // memcpy()
+
+
+namespace c4 {
+namespace yml {
+
+/** @addtogroup doc_emit
+ * @{
+ */
+
+/** @defgroup doc_writers Writer objects to use with an Emitter
+ * @see Emitter
+ * @{
+ */
+
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+/** A writer that outputs to a file. Defaults to stdout. */
+struct WriterFile
+{
+    FILE * m_file;
+    size_t m_pos;
+
+    WriterFile(FILE *f = nullptr) : m_file(f ? f : stdout), m_pos(0) {}
+
+    inline substr _get(bool /*error_on_excess*/)
     {
-        _write_tag(sc.tag);
-        this->Writer::_do_write(' ');
+        substr sp;
+        sp.str = nullptr;
+        sp.len = m_pos;
+        return sp;
     }
-    if(flags.has_anchor())
+
+    template<size_t N>
+    inline void _do_write(const char (&a)[N])
     {
-        RYML_ASSERT(flags.is_ref() != flags.has_anchor());
-        RYML_ASSERT( ! sc.anchor.empty());
-        this->Writer::_do_write('&');
-        this->Writer::_do_write(sc.anchor);
-        this->Writer::_do_write(' ');
+        fwrite(a, sizeof(char), N - 1, m_file);
+        m_pos += N - 1;
     }
-    else if(flags.is_ref())
+
+    inline void _do_write(csubstr sp)
     {
-        if(sc.anchor != "<<")
-            this->Writer::_do_write('*');
-        this->Writer::_do_write(sc.anchor);
-        return;
+        #if defined(__clang__)
+        #   pragma clang diagnostic push
+        #   pragma GCC diagnostic ignored "-Wsign-conversion"
+        #elif defined(__GNUC__)
+        #   pragma GCC diagnostic push
+        #   pragma GCC diagnostic ignored "-Wsign-conversion"
+        #endif
+        if(sp.empty()) return;
+        fwrite(sp.str, sizeof(csubstr::char_type), sp.len, m_file);
+        m_pos += sp.len;
+        #if defined(__clang__)
+        #   pragma clang diagnostic pop
+        #elif defined(__GNUC__)
+        #   pragma GCC diagnostic pop
+        #endif
     }
 
-    // ensure the style flags only have one of KEY or VAL
-    _RYML_CB_ASSERT(m_tree->callbacks(), ((flags & (_WIP_KEY_STYLE|_WIP_VAL_STYLE)) == 0) || (((flags&_WIP_KEY_STYLE) == 0) != ((flags&_WIP_VAL_STYLE) == 0)));
-
-    auto style_marks = flags & (_WIP_KEY_STYLE|_WIP_VAL_STYLE);
-    if(style_marks & (_WIP_KEY_LITERAL|_WIP_VAL_LITERAL))
+    inline void _do_write(const char c)
     {
-        _write_scalar_literal(sc.scalar, ilevel, flags.has_key());
+        fputc(c, m_file);
+        ++m_pos;
     }
-    else if(style_marks & (_WIP_KEY_FOLDED|_WIP_VAL_FOLDED))
+
+    inline void _do_write(const char c, size_t num_times)
     {
-        _write_scalar_folded(sc.scalar, ilevel, flags.has_key());
+        for(size_t i = 0; i < num_times; ++i)
+            fputc(c, m_file);
+        m_pos += num_times;
     }
-    else if(style_marks & (_WIP_KEY_SQUO|_WIP_VAL_SQUO))
+};
+
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+/** A writer that outputs to an STL-like ostream. */
+template<class OStream>
+struct WriterOStream
+{
+    OStream& m_stream;
+    size_t   m_pos;
+
+    WriterOStream(OStream &s) : m_stream(s), m_pos(0) {}
+
+    inline substr _get(bool /*error_on_excess*/)
     {
-        _write_scalar_squo(sc.scalar, ilevel);
+        substr sp;
+        sp.str = nullptr;
+        sp.len = m_pos;
+        return sp;
     }
-    else if(style_marks & (_WIP_KEY_DQUO|_WIP_VAL_DQUO))
+
+    template<size_t N>
+    inline void _do_write(const char (&a)[N])
     {
-        _write_scalar_dquo(sc.scalar, ilevel);
+        m_stream.write(a, N - 1);
+        m_pos += N - 1;
     }
-    else if(style_marks & (_WIP_KEY_PLAIN|_WIP_VAL_PLAIN))
+
+    inline void _do_write(csubstr sp)
     {
-        _write_scalar_plain(sc.scalar, ilevel);
+        #if defined(__clang__)
+        #   pragma clang diagnostic push
+        #   pragma GCC diagnostic ignored "-Wsign-conversion"
+        #elif defined(__GNUC__)
+        #   pragma GCC diagnostic push
+        #   pragma GCC diagnostic ignored "-Wsign-conversion"
+        #endif
+        if(sp.empty()) return;
+        m_stream.write(sp.str, sp.len);
+        m_pos += sp.len;
+        #if defined(__clang__)
+        #   pragma clang diagnostic pop
+        #elif defined(__GNUC__)
+        #   pragma GCC diagnostic pop
+        #endif
     }
-    else if(!style_marks)
+
+    inline void _do_write(const char c)
     {
-        size_t first_non_nl = sc.scalar.first_not_of('\n');
-        bool all_newlines = first_non_nl == npos;
-        bool has_leading_ws = (!all_newlines) && sc.scalar.sub(first_non_nl).begins_with_any(" \t");
-        bool do_literal = ((!sc.scalar.empty() && all_newlines) || (has_leading_ws && !sc.scalar.trim(' ').empty()));
-        if(do_literal)
-        {
-            _write_scalar_literal(sc.scalar, ilevel, flags.has_key(), /*explicit_indentation*/has_leading_ws);
-        }
-        else
-        {
-            for(size_t i = 0; i < sc.scalar.len; ++i)
-            {
-                if(sc.scalar.str[i] == '\n')
-                {
-                    _write_scalar_literal(sc.scalar, ilevel, flags.has_key(), /*explicit_indentation*/has_leading_ws);
-                    goto wrote_special;
-                }
-                // todo: check for escaped characters requiring double quotes
-            }
-            _write_scalar(sc.scalar, flags.is_quoted());
-        wrote_special:
-            ;
-        }
+        m_stream.put(c);
+        ++m_pos;
     }
-    else
+
+    inline void _do_write(const char c, size_t num_times)
     {
-        _RYML_CB_ERR(m_tree->callbacks(), "not implemented");
+        for(size_t i = 0; i < num_times; ++i)
+            m_stream.put(c);
+        m_pos += num_times;
     }
-}
-template<class Writer>
-void Emitter<Writer>::_write_json(NodeScalar const& C4_RESTRICT sc, NodeType flags)
-{
-    if(C4_UNLIKELY( ! sc.tag.empty()))
-        _RYML_CB_ERR(m_tree->callbacks(), "JSON does not have tags");
-    if(C4_UNLIKELY(flags.has_anchor()))
-        _RYML_CB_ERR(m_tree->callbacks(), "JSON does not have anchors");
-    _write_scalar_json(sc.scalar, flags.has_key(), flags.is_quoted());
-}
+};
 
-#define _rymlindent_nextline() for(size_t lv = 0; lv < ilevel+1; ++lv) { this->Writer::_do_write(' '); this->Writer::_do_write(' '); }
 
-template<class Writer>
-void Emitter<Writer>::_write_scalar_literal(csubstr s, size_t ilevel, bool explicit_key, bool explicit_indentation)
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+/** a writer to a substr */
+struct WriterBuf
 {
-    if(explicit_key)
-        this->Writer::_do_write("? ");
-    csubstr trimmed = s.trimr("\n\r");
-    size_t numnewlines_at_end = s.len - trimmed.len - s.sub(trimmed.len).count('\r');
-    //
-    if(!explicit_indentation)
-        this->Writer::_do_write('|');
-    else
-        this->Writer::_do_write("|2");
-    //
-    if(numnewlines_at_end > 1 || (trimmed.len == 0 && s.len > 0)/*only newlines*/)
-        this->Writer::_do_write("+\n");
-    else if(numnewlines_at_end == 1)
-        this->Writer::_do_write('\n');
-    else
-        this->Writer::_do_write("-\n");
-    //
-    if(trimmed.len)
-    {
-        size_t pos = 0; // tracks the last character that was already written
-        for(size_t i = 0; i < trimmed.len; ++i)
-        {
-            if(trimmed[i] != '\n')
-                continue;
-            // write everything up to this point
-            csubstr since_pos = trimmed.range(pos, i+1); // include the newline
-            _rymlindent_nextline()
-            this->Writer::_do_write(since_pos);
-            pos = i+1; // already written
-        }
-        if(pos < trimmed.len)
-        {
-            _rymlindent_nextline()
-            this->Writer::_do_write(trimmed.sub(pos));
-        }
-        if(numnewlines_at_end)
-        {
-            this->Writer::_do_write('\n');
-            --numnewlines_at_end;
-        }
-    }
-    for(size_t i = 0; i < numnewlines_at_end; ++i)
-    {
-        _rymlindent_nextline()
-        if(i+1 < numnewlines_at_end || explicit_key)
-            this->Writer::_do_write('\n');
-    }
-    if(explicit_key && !numnewlines_at_end)
-        this->Writer::_do_write('\n');
-}
+    substr m_buf;
+    size_t m_pos;
 
-template<class Writer>
-void Emitter<Writer>::_write_scalar_folded(csubstr s, size_t ilevel, bool explicit_key)
-{
-    if(explicit_key)
-    {
-        this->Writer::_do_write("? ");
-    }
-    RYML_ASSERT(s.find("\r") == csubstr::npos);
-    csubstr trimmed = s.trimr('\n');
-    size_t numnewlines_at_end = s.len - trimmed.len;
-    if(numnewlines_at_end == 0)
-    {
-        this->Writer::_do_write(">-\n");
-    }
-    else if(numnewlines_at_end == 1)
-    {
-        this->Writer::_do_write(">\n");
-    }
-    else if(numnewlines_at_end > 1)
-    {
-        this->Writer::_do_write(">+\n");
-    }
-    if(trimmed.len)
-    {
-        size_t pos = 0; // tracks the last character that was already written
-        for(size_t i = 0; i < trimmed.len; ++i)
-        {
-            if(trimmed[i] != '\n')
-                continue;
-            // write everything up to this point
-            csubstr since_pos = trimmed.range(pos, i+1); // include the newline
-            pos = i+1; // because of the newline
-            _rymlindent_nextline()
-            this->Writer::_do_write(since_pos);
-            this->Writer::_do_write('\n'); // write the newline twice
-        }
-        if(pos < trimmed.len)
-        {
-            _rymlindent_nextline()
-            this->Writer::_do_write(trimmed.sub(pos));
-        }
-        if(numnewlines_at_end)
-        {
-            this->Writer::_do_write('\n');
-            --numnewlines_at_end;
-        }
-    }
-    for(size_t i = 0; i < numnewlines_at_end; ++i)
-    {
-        _rymlindent_nextline()
-        if(i+1 < numnewlines_at_end || explicit_key)
-            this->Writer::_do_write('\n');
-    }
-    if(explicit_key && !numnewlines_at_end)
-        this->Writer::_do_write('\n');
-}
+    WriterBuf(substr sp) : m_buf(sp), m_pos(0) {}
 
-template<class Writer>
-void Emitter<Writer>::_write_scalar_squo(csubstr s, size_t ilevel)
-{
-    size_t pos = 0; // tracks the last character that was already written
-    this->Writer::_do_write('\'');
-    for(size_t i = 0; i < s.len; ++i)
+    inline substr _get(bool error_on_excess)
     {
-        if(s[i] == '\n')
+        if(m_pos <= m_buf.len)
         {
-            csubstr sub = s.range(pos, i+1);
-            this->Writer::_do_write(sub);  // write everything up to (including) this char
-            this->Writer::_do_write('\n'); // write the character again
-            if(i + 1 < s.len)
-                _rymlindent_nextline()     // indent the next line
-            pos = i+1;
+            return m_buf.first(m_pos);
         }
-        else if(s[i] == '\'')
+        if(error_on_excess)
         {
-            csubstr sub = s.range(pos, i+1);
-            this->Writer::_do_write(sub); // write everything up to (including) this char
-            this->Writer::_do_write('\''); // write the character again
-            pos = i+1;
+            c4::yml::error("not enough space in the given buffer");
         }
+        substr sp;
+        sp.str = nullptr;
+        sp.len = m_pos;
+        return sp;
     }
-    // write missing characters at the end of the string
-    if(pos < s.len)
-        this->Writer::_do_write(s.sub(pos));
-    this->Writer::_do_write('\'');
-}
 
-template<class Writer>
-void Emitter<Writer>::_write_scalar_dquo(csubstr s, size_t ilevel)
-{
-    size_t pos = 0; // tracks the last character that was already written
-    this->Writer::_do_write('"');
-    for(size_t i = 0; i < s.len; ++i)
+    template<size_t N>
+    inline void _do_write(const char (&a)[N])
     {
-        const char curr = s.str[i];
-        if(curr == '"' || curr == '\\')
-        {
-            csubstr sub = s.range(pos, i);
-            this->Writer::_do_write(sub);  // write everything up to (excluding) this char
-            this->Writer::_do_write('\\'); // write the escape
-            this->Writer::_do_write(curr); // write the char
-            pos = i+1;
-        }
-        else if(s[i] == '\n')
-        {
-            csubstr sub = s.range(pos, i+1);
-            this->Writer::_do_write(sub);  // write everything up to (including) this newline
-            this->Writer::_do_write('\n'); // write the newline again
-            if(i + 1 < s.len)
-                _rymlindent_nextline()     // indent the next line
-            pos = i+1;
-            if(i+1 < s.len) // escape leading whitespace after the newline
-            {
-                const char next = s.str[i+1];
-                if(next == ' ' || next == '\t')
-                    this->Writer::_do_write('\\');
-            }
-        }
-        else if(curr == ' ' || curr == '\t')
-        {
-            // escape trailing whitespace before a newline
-            size_t next = s.first_not_of(" \t\r", i);
-            if(next != npos && s[next] == '\n')
-            {
-                csubstr sub = s.range(pos, i);
-                this->Writer::_do_write(sub);  // write everything up to (excluding) this char
-                this->Writer::_do_write('\\'); // escape the whitespace
-                pos = i;
-            }
-        }
-        else if(C4_UNLIKELY(curr == '\r'))
+        RYML_ASSERT( ! m_buf.overlaps(a));
+        if(m_pos + N-1 <= m_buf.len)
         {
-            csubstr sub = s.range(pos, i);
-            this->Writer::_do_write(sub);  // write everything up to (excluding) this char
-            this->Writer::_do_write("\\r"); // write the escaped char
-            pos = i+1;
+            memcpy(&(m_buf[m_pos]), a, N-1);
         }
+        m_pos += N-1;
     }
-    // write missing characters at the end of the string
-    if(pos < s.len)
-    {
-        csubstr sub = s.sub(pos);
-        this->Writer::_do_write(sub);
-    }
-    this->Writer::_do_write('"');
-}
 
-template<class Writer>
-void Emitter<Writer>::_write_scalar_plain(csubstr s, size_t ilevel)
-{
-    size_t pos = 0; // tracks the last character that was already written
-    for(size_t i = 0; i < s.len; ++i)
+    inline void _do_write(csubstr sp)
     {
-        const char curr = s.str[i];
-        if(curr == '\n')
+        if(sp.empty()) return;
+        RYML_ASSERT( ! sp.overlaps(m_buf));
+        if(m_pos + sp.len <= m_buf.len)
         {
-            csubstr sub = s.range(pos, i+1);
-            this->Writer::_do_write(sub);  // write everything up to (including) this newline
-            this->Writer::_do_write('\n'); // write the newline again
-            if(i + 1 < s.len)
-                _rymlindent_nextline()     // indent the next line
-            pos = i+1;
+            memcpy(&(m_buf[m_pos]), sp.str, sp.len);
         }
+        m_pos += sp.len;
     }
-    // write missing characters at the end of the string
-    if(pos < s.len)
+
+    inline void _do_write(const char c)
     {
-        csubstr sub = s.sub(pos);
-        this->Writer::_do_write(sub);
+        if(m_pos + 1 <= m_buf.len)
+            m_buf[m_pos] = c;
+        ++m_pos;
     }
-}
-
-#undef _rymlindent_nextline
 
-template<class Writer>
-void Emitter<Writer>::_write_scalar(csubstr s, bool was_quoted)
-{
-    // this block of code needed to be moved to before the needs_quotes
-    // assignment to work around a g++ optimizer bug where (s.str != nullptr)
-    // was evaluated as true even if s.str was actually a nullptr (!!!)
-    if(s.len == size_t(0))
+    inline void _do_write(const char c, size_t num_times)
     {
-        if(was_quoted)
-            this->Writer::_do_write("''");
-        return;
+        if(m_pos + num_times <= m_buf.len)
+            for(size_t i = 0; i < num_times; ++i)
+                m_buf[m_pos + i] = c;
+        m_pos += num_times;
     }
+};
 
-    const bool needs_quotes = (
-        was_quoted
-        ||
-        (
-            ( ! s.is_number())
-            &&
-            (
-                // has leading whitespace
-                s.begins_with_any(" \n\t\r")
-                ||
-                // looks like reference or anchor or would be treated as a directive
-                s.begins_with_any("*&%")
-                ||
-                s.begins_with("<<")
-                ||
-                // has trailing whitespace
-                s.ends_with_any(" \n\t\r")
-                ||
-                // has special chars
-                (s.first_of("#:-?,\n{}[]'\"") != npos)
-            )
-        )
-    );
+/** @ } */
+
+/** @ } */
 
-    if( ! needs_quotes)
-    {
-        this->Writer::_do_write(s);
-    }
-    else
-    {
-        const bool has_dquotes = s.first_of( '"') != npos;
-        const bool has_squotes = s.first_of('\'') != npos;
-        if(!has_squotes && has_dquotes)
-        {
-            this->Writer::_do_write('\'');
-            this->Writer::_do_write(s);
-            this->Writer::_do_write('\'');
-        }
-        else if(has_squotes && !has_dquotes)
-        {
-            RYML_ASSERT(s.count('\n') == 0);
-            this->Writer::_do_write('"');
-            this->Writer::_do_write(s);
-            this->Writer::_do_write('"');
-        }
-        else
-        {
-            _write_scalar_squo(s, /*FIXME FIXME FIXME*/0);
-        }
-    }
-}
-template<class Writer>
-void Emitter<Writer>::_write_scalar_json(csubstr s, bool as_key, bool was_quoted)
-{
-    if(was_quoted)
-    {
-        this->Writer::_do_write('"');
-        this->Writer::_do_write(s);
-        this->Writer::_do_write('"');
-    }
-    // json only allows strings as keys
-    else if(!as_key && (s.is_number() || s == "true" || s == "null" || s == "false"))
-    {
-        this->Writer::_do_write(s);
-    }
-    else
-    {
-        size_t pos = 0;
-        this->Writer::_do_write('"');
-        for(size_t i = 0; i < s.len; ++i)
-        {
-            if(s[i] == '"')
-            {
-                if(i > 0)
-                {
-                    csubstr sub = s.range(pos, i);
-                    this->Writer::_do_write(sub);
-                }
-                pos = i + 1;
-                this->Writer::_do_write("\\\"");
-            }
-        }
-        if(pos < s.len)
-        {
-            csubstr sub = s.sub(pos);
-            this->Writer::_do_write(sub);
-        }
-        this->Writer::_do_write('"');
-    }
-}
 
 } // namespace yml
 } // namespace c4
 
-#endif /* _C4_YML_EMIT_DEF_HPP_ */
+#endif /* _C4_YML_WRITER_HPP_ */
 
 
-// (end https://github.com/biojppm/rapidyaml/src/c4/yml/emit.def.hpp)
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/writer.hpp)
 
 
 
 //********************************************************************************
 //--------------------------------------------------------------------------------
-// src/c4/yml/detail/stack.hpp
-// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/stack.hpp
+// src/c4/yml/detail/parser_dbg.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp
 //--------------------------------------------------------------------------------
 //********************************************************************************
 
-#ifndef _C4_YML_DETAIL_STACK_HPP_
-#define _C4_YML_DETAIL_STACK_HPP_
+#ifndef _C4_YML_DETAIL_PARSER_DBG_HPP_
+#define _C4_YML_DETAIL_PARSER_DBG_HPP_
 
 #ifndef _C4_YML_COMMON_HPP_
-//included above:
-//#include "../common.hpp"
+#include "../common.hpp"
 #endif
 
 #ifdef RYML_DBG
 //included above:
-//#   include <type_traits>
+//#include <cstdio>
 #endif
 
-//included above:
-//#include <string.h>
 
-namespace c4 {
-namespace yml {
-namespace detail {
+//-----------------------------------------------------------------------------
+// some debugging scaffolds
 
-/** A lightweight contiguous stack with SSO. This avoids a dependency on std. */
-template<class T, size_t N=16>
-class stack
-{
-    static_assert(std::is_trivially_copyable<T>::value, "T must be trivially copyable");
-    static_assert(std::is_trivially_destructible<T>::value, "T must be trivially destructible");
+#if defined(_MSC_VER)
+#   pragma warning(push)
+#   pragma warning(disable: 4068/*unknown pragma*/)
+#endif
 
-    enum : size_t { sso_size = N };
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunknown-pragmas"
+//#pragma GCC diagnostic ignored "-Wpragma-system-header-outside-header"
+#pragma GCC system_header
 
-public:
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Werror"
+#pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments"
 
-    T         m_buf[N];
-    T *       m_stack;
-    size_t    m_size;
-    size_t    m_capacity;
-    Callbacks m_callbacks;
 
-public:
+#ifndef RYML_DBG
+#   define _c4err(fmt, ...)   \
+    this->_err("ERROR: " fmt, ## __VA_ARGS__)
+#   define _c4dbgt(fmt, ...)
+#   define _c4dbgpf(fmt, ...)
+#   define _c4dbgpf_(fmt, ...)
+#   define _c4dbgp(msg)
+#   define _c4dbgp_(msg)
+#   define _c4dbgq(msg)
+#   define _c4presc(...)
+#   define _c4prscalar(msg, scalar, keep_newlines)
+#else
+#   define _c4err(fmt, ...)   \
+    do { RYML_DEBUG_BREAK(); this->_err("ERROR:\n" "{}:{}: " fmt, __FILE__, __LINE__, ## __VA_ARGS__); } while(0)
+#   define _c4dbgt(fmt, ...)   do { if(_dbg_enabled()) {                \
+                               this->_dbg ("{}:{}: "   fmt     , __FILE__, __LINE__, ## __VA_ARGS__); } } while(0)
+#   define _c4dbgpf(fmt, ...)  _dbg_printf("{}:{}: "   fmt "\n", __FILE__, __LINE__, ## __VA_ARGS__)
+#   define _c4dbgpf_(fmt, ...) _dbg_printf("{}:{}: "   fmt     , __FILE__, __LINE__, ## __VA_ARGS__)
+#   define _c4dbgp(msg)        _dbg_printf("{}:{}: "   msg "\n", __FILE__, __LINE__                )
+#   define _c4dbgp_(msg)       _dbg_printf("{}:{}: "   msg     , __FILE__, __LINE__                )
+#   define _c4dbgq(msg)        _dbg_printf(msg "\n")
+#   define _c4presc(...)       do { if(_dbg_enabled()) __c4presc(__VA_ARGS__); } while(0)
+#   define _c4prscalar(msg, scalar, keep_newlines)                  \
+    do {                                                            \
+        _c4dbgpf_("{}: [{}]~~~", msg, scalar.len);                  \
+        if(_dbg_enabled()) {                                        \
+            __c4presc((scalar).str, (scalar).len, (keep_newlines)); \
+        }                                                           \
+        _c4dbgq("~~~");                                             \
+    } while(0)
+#endif // RYML_DBG
 
-    constexpr static bool is_contiguous() { return true; }
 
-    stack(Callbacks const& cb)
-        : m_buf()
-        , m_stack(m_buf)
-        , m_size(0)
-        , m_capacity(N)
-        , m_callbacks(cb) {}
-    stack() : stack(get_callbacks()) {}
-    ~stack()
-    {
-        _free();
-    }
+//-----------------------------------------------------------------------------
 
-    stack(stack const& that) noexcept : stack(that.m_callbacks)
-    {
-        resize(that.m_size);
-        _cp(&that);
-    }
+#ifdef RYML_DBG
 
-    stack(stack &&that) noexcept : stack(that.m_callbacks)
-    {
-        _mv(&that);
-    }
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/dump.hpp
+//#include <c4/dump.hpp>
+#if !defined(C4_DUMP_HPP_) && !defined(_C4_DUMP_HPP_)
+#error "amalgamate: file c4/dump.hpp must have been included at this point"
+#endif /* C4_DUMP_HPP_ */
 
-    stack& operator= (stack const& that) noexcept
+namespace c4 {
+inline bool& _dbg_enabled() { static bool enabled = true; return enabled; }
+inline void _dbg_set_enabled(bool yes) { _dbg_enabled() = yes; }
+inline void _dbg_dumper(csubstr s)
+{
+    if(s.str)
+        fwrite(s.str, 1, s.len, stdout);
+}
+inline substr _dbg_buf() noexcept
+{
+    static char writebuf[2048];
+    return writebuf;
+}
+template<class ...Args>
+C4_NO_INLINE void _dbg_printf(c4::csubstr fmt, Args const& ...args)
+{
+    if(_dbg_enabled())
     {
-        _cb(that.m_callbacks);
-        resize(that.m_size);
-        _cp(&that);
-        return *this;
+        substr buf = _dbg_buf();
+        const size_t needed_size = c4::format_dump(&_dbg_dumper, buf, fmt, args...);
+        C4_CHECK(needed_size <= buf.len);
     }
-
-    stack& operator= (stack &&that) noexcept
+}
+inline void __c4presc(const char *s, size_t len, bool keep_newlines=false)
+{
+    RYML_ASSERT(s || !len);
+    size_t prev = 0;
+    for(size_t i = 0; i < len; ++i)
     {
-        _cb(that.m_callbacks);
-        _mv(&that);
-        return *this;
+        switch(s[i])
+        {
+        case '\n'  : _dbg_printf("{}{}{}", csubstr(s+prev, i-prev), csubstr("\\n"), csubstr(keep_newlines ? "\n":"")); prev = i+1; break;
+        case '\t'  : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\t")); prev = i+1; break;
+        case '\0'  : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\0")); prev = i+1; break;
+        case '\r'  : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\r")); prev = i+1; break;
+        case '\f'  : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\f")); prev = i+1; break;
+        case '\b'  : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\b")); prev = i+1; break;
+        case '\v'  : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\v")); prev = i+1; break;
+        case '\a'  : _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\a")); prev = i+1; break;
+        case '\x1b': _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\x1b")); prev = i+1; break;
+        case -0x3e/*0xc2u*/:
+            if(i+1 < len)
+            {
+                if(s[i+1] == -0x60/*0xa0u*/)
+                {
+                    _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\_")); prev = i+1;
+                }
+                else if(s[i+1] == -0x7b/*0x85u*/)
+                {
+                    _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\N")); prev = i+1;
+                }
+            }
+            break;
+        case -0x1e/*0xe2u*/:
+            if(i+2 < len && s[i+1] == -0x80/*0x80u*/)
+            {
+                if(s[i+2] == -0x58/*0xa8u*/)
+                {
+                    _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\L")); prev = i+1;
+                }
+                else if(s[i+2] == -0x57/*0xa9u*/)
+                {
+                    _dbg_printf("{}{}", csubstr(s+prev, i-prev), csubstr("\\P")); prev = i+1;
+                }
+            }
+            break;
+        }
     }
+    if(len > prev)
+        _dbg_printf("{}", csubstr(s+prev, len-prev));
+}
+inline void __c4presc(csubstr s, bool keep_newlines=false)
+{
+    __c4presc(s.str, s.len, keep_newlines);
+}
+} // namespace c4
 
-public:
+#endif // RYML_DBG
 
-    size_t size() const { return m_size; }
-    size_t empty() const { return m_size == 0; }
-    size_t capacity() const { return m_capacity; }
+#pragma clang diagnostic pop
+#pragma GCC diagnostic pop
 
-    void clear()
-    {
-        m_size = 0;
-    }
+#if defined(_MSC_VER)
+#   pragma warning(pop)
+#endif
 
-    void resize(size_t sz)
-    {
-        reserve(sz);
-        m_size = sz;
-    }
+#endif /* _C4_YML_DETAIL_PARSER_DBG_HPP_ */
 
-    void reserve(size_t sz);
 
-    void push(T const& C4_RESTRICT n)
-    {
-        RYML_ASSERT((const char*)&n + sizeof(T) < (const char*)m_stack || &n > m_stack + m_capacity);
-        if(m_size == m_capacity)
-        {
-            size_t cap = m_capacity == 0 ? N : 2 * m_capacity;
-            reserve(cap);
-        }
-        m_stack[m_size] = n;
-        ++m_size;
-    }
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp)
 
-    void push_top()
-    {
-        RYML_ASSERT(m_size > 0);
-        if(m_size == m_capacity)
-        {
-            size_t cap = m_capacity == 0 ? N : 2 * m_capacity;
-            reserve(cap);
-        }
-        m_stack[m_size] = m_stack[m_size - 1];
-        ++m_size;
-    }
+#define C4_YML_EMIT_DEF_HPP_
 
-    T const& C4_RESTRICT pop()
-    {
-        RYML_ASSERT(m_size > 0);
-        --m_size;
-        return m_stack[m_size];
-    }
 
-    C4_ALWAYS_INLINE T const& C4_RESTRICT top() const { RYML_ASSERT(m_size > 0); return m_stack[m_size - 1]; }
-    C4_ALWAYS_INLINE T      & C4_RESTRICT top()       { RYML_ASSERT(m_size > 0); return m_stack[m_size - 1]; }
 
-    C4_ALWAYS_INLINE T const& C4_RESTRICT bottom() const { RYML_ASSERT(m_size > 0); return m_stack[0]; }
-    C4_ALWAYS_INLINE T      & C4_RESTRICT bottom()       { RYML_ASSERT(m_size > 0); return m_stack[0]; }
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/emit.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/emit.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
 
-    C4_ALWAYS_INLINE T const& C4_RESTRICT top(size_t i) const { RYML_ASSERT(i < m_size); return m_stack[m_size - 1 - i]; }
-    C4_ALWAYS_INLINE T      & C4_RESTRICT top(size_t i)       { RYML_ASSERT(i < m_size); return m_stack[m_size - 1 - i]; }
+#ifndef _C4_YML_EMIT_HPP_
+#define _C4_YML_EMIT_HPP_
 
-    C4_ALWAYS_INLINE T const& C4_RESTRICT bottom(size_t i) const { RYML_ASSERT(i < m_size); return m_stack[i]; }
-    C4_ALWAYS_INLINE T      & C4_RESTRICT bottom(size_t i)       { RYML_ASSERT(i < m_size); return m_stack[i]; }
+/** @file emit.hpp Utilities to emit YAML and JSON. */
 
-    C4_ALWAYS_INLINE T const& C4_RESTRICT operator[](size_t i) const { RYML_ASSERT(i < m_size); return m_stack[i]; }
-    C4_ALWAYS_INLINE T      & C4_RESTRICT operator[](size_t i)       { RYML_ASSERT(i < m_size); return m_stack[i]; }
+#ifndef _C4_YML_WRITER_HPP_
+#include "./writer.hpp"
+#endif
 
-public:
+#ifndef _C4_YML_TREE_HPP_
+#include "./tree.hpp"
+#endif
 
-    using       iterator = T       *;
-    using const_iterator = T const *;
+#ifndef _C4_YML_NODE_HPP_
+#include "./node.hpp"
+#endif
 
-    iterator begin() { return m_stack; }
-    iterator end  () { return m_stack + m_size; }
+#define RYML_DEPRECATE_EMIT                                             \
+    RYML_DEPRECATED("use emit_yaml() instead. "                         \
+                    "See https://github.com/biojppm/rapidyaml/issues/120")
+#define RYML_DEPRECATE_EMITRS                                           \
+    RYML_DEPRECATED("use emitrs_yaml() instead. "                       \
+                    "See https://github.com/biojppm/rapidyaml/issues/120")
+
+#ifdef emit
+#error "emit is defined, likely from a Qt include. "                    \
+    "This will cause a compilation error. "                             \
+    "See https://github.com/biojppm/rapidyaml/issues/120"
+#endif
 
-    const_iterator begin() const { return (const_iterator)m_stack; }
-    const_iterator end  () const { return (const_iterator)m_stack + m_size; }
 
-public:
-    void _free();
-    void _cp(stack const* C4_RESTRICT that);
-    void _mv(stack * that);
-    void _cb(Callbacks const& cb);
-};
+C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast")
 
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-template<class T, size_t N>
-void stack<T, N>::reserve(size_t sz)
-{
-    if(sz <= m_size)
-        return;
-    if(sz <= N)
-    {
-        m_stack = m_buf;
-        m_capacity = N;
-        return;
-    }
-    T *buf = (T*) m_callbacks.m_allocate(sz * sizeof(T), m_stack, m_callbacks.m_user_data);
-    memcpy(buf, m_stack, m_size * sizeof(T));
-    if(m_stack != m_buf)
-    {
-        m_callbacks.m_free(m_stack, m_capacity * sizeof(T), m_callbacks.m_user_data);
-    }
-    m_stack = buf;
-    m_capacity = sz;
+namespace c4 {
+namespace yml {
+
+/** @addtogroup doc_emit
+ *
+ * @{
+ */
+
+// fwd declarations
+template<class Writer> class Emitter;
+template<class OStream>
+using EmitterOStream = Emitter<WriterOStream<OStream>>;
+using EmitterFile = Emitter<WriterFile>;
+using EmitterBuf  = Emitter<WriterBuf>;
+
+namespace detail {
+inline bool is_set_(ConstNodeRef n) { return n.tree() && (n.id() != NONE); }
 }
 
 
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-template<class T, size_t N>
-void stack<T, N>::_free()
-{
-    RYML_ASSERT(m_stack != nullptr); // this structure cannot be memset() to zero
-    if(m_stack != m_buf)
-    {
-        m_callbacks.m_free(m_stack, m_capacity * sizeof(T), m_callbacks.m_user_data);
-        m_stack = m_buf;
-        m_size = N;
-        m_capacity = N;
-    }
-    else
-    {
-        RYML_ASSERT(m_capacity == N);
-    }
-}
+/** Specifies the type of content to emit */
+typedef enum {
+    EMIT_YAML = 0, ///< emit YAML
+    EMIT_JSON = 1  ///< emit JSON
+} EmitType_e;
 
 
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-template<class T, size_t N>
-void stack<T, N>::_cp(stack const* C4_RESTRICT that)
+/** A lightweight object containing options to be used when emitting. */
+struct EmitOptions
 {
-    if(that->m_stack != that->m_buf)
-    {
-        RYML_ASSERT(that->m_capacity > N);
-        RYML_ASSERT(that->m_size <= that->m_capacity);
-    }
-    else
+    typedef enum : uint32_t {
+        DEFAULT_FLAGS = 0,
+        JSON_ERR_ON_TAG = 1 << 0,
+        JSON_ERR_ON_ANCHOR = 1 << 1,
+        _JSON_ERR_MASK = JSON_ERR_ON_TAG|JSON_ERR_ON_ANCHOR,
+    } EmitOptionFlags_e;
+
+public:
+
+    /** @name option flags
+     *
+     * @{ */
+    C4_ALWAYS_INLINE EmitOptionFlags_e json_error_flags() const noexcept { return m_option_flags; }
+    EmitOptions& json_error_flags(EmitOptionFlags_e d) noexcept { m_option_flags = (EmitOptionFlags_e)(d & _JSON_ERR_MASK); return *this; }
+    /** @} */
+
+public:
+
+    /** @name max depth for the emitted tree
+     *
+     * This makes the emitter fail when emitting trees exceeding the
+     * max_depth.
+     *
+     * @{ */
+    C4_ALWAYS_INLINE id_type max_depth() const noexcept { return m_max_depth; }
+    EmitOptions& max_depth(id_type d) noexcept { m_max_depth = d; return *this; }
+    static constexpr const id_type max_depth_default = 64;
+    /** @} */
+
+public:
+
+    bool operator== (const EmitOptions& that) const noexcept
     {
-        RYML_ASSERT(that->m_capacity <= N);
-        RYML_ASSERT(that->m_size <= that->m_capacity);
+        return m_max_depth == that.m_max_depth &&
+            m_option_flags == that.m_option_flags;
     }
-    memcpy(m_stack, that->m_stack, that->m_size * sizeof(T));
-    m_size = that->m_size;
-    m_capacity = that->m_size < N ? N : that->m_size;
-    m_callbacks = that->m_callbacks;
-}
+
+private:
+
+    /** @cond dev */
+    id_type m_max_depth{max_depth_default};
+    EmitOptionFlags_e m_option_flags{DEFAULT_FLAGS};
+    /** @endcond */
+};
 
 
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-template<class T, size_t N>
-void stack<T, N>::_mv(stack * that)
+/** A stateful emitter, for use with a writer such as @ref WriterBuf,
+ * @ref WriterFile, or @ref WriterOStream */
+template<class Writer>
+class Emitter : public Writer
 {
-    if(that->m_stack != that->m_buf)
-    {
-        RYML_ASSERT(that->m_capacity > N);
-        RYML_ASSERT(that->m_size <= that->m_capacity);
-        m_stack = that->m_stack;
-    }
-    else
-    {
-        RYML_ASSERT(that->m_capacity <= N);
-        RYML_ASSERT(that->m_size <= that->m_capacity);
-        memcpy(m_buf, that->m_buf, that->m_size * sizeof(T));
-        m_stack = m_buf;
-    }
-    m_size = that->m_size;
-    m_capacity = that->m_capacity;
-    m_callbacks = that->m_callbacks;
-    // make sure no deallocation happens on destruction
-    RYML_ASSERT(that->m_stack != m_buf);
-    that->m_stack = that->m_buf;
-    that->m_capacity = N;
-    that->m_size = 0;
-}
+public:
 
+    /** Construct the emitter and its internal Writer state, using default emit options.
+     * @param args arguments to be forwarded to the constructor of the writer.
+     * */
+    template<class ...Args>
+    Emitter(Args &&...args) : Writer(std::forward<Args>(args)...), m_tree(), m_opts(), m_flow(false) {}
 
-//-----------------------------------------------------------------------------
+    /** Construct the emitter and its internal Writer state.
+     *
+     * @param opts EmitOptions
+     * @param args arguments to be forwarded to the constructor of the writer.
+     * */
+    template<class ...Args>
+    Emitter(EmitOptions const& opts, Args &&...args) : Writer(std::forward<Args>(args)...), m_tree(), m_opts(opts), m_flow(false) {}
 
-template<class T, size_t N>
-void stack<T, N>::_cb(Callbacks const& cb)
-{
-    if(cb != m_callbacks)
+    /** emit!
+     *
+     * When writing to a buffer, returns a substr of the emitted YAML.
+     * If the given buffer has insufficient space, the returned substr
+     * will be null and its size will be the needed space. Whatever
+     * the size of the buffer, it is guaranteed that no writes are
+     * done past its end.
+     *
+     * When writing to a file, the returned substr will be null, but its
+     * length will be set to the number of bytes written.
+     *
+     * @param type specify what to emit
+     * @param t the tree to emit
+     * @param id the id of the node to emit
+     * @param error_on_excess when true, an error is raised when the
+     *        output buffer is too small for the emitted YAML/JSON
+     * */
+    substr emit_as(EmitType_e type, Tree const& t, id_type id, bool error_on_excess);
+    /** emit starting at the root node */
+    substr emit_as(EmitType_e type, Tree const& t, bool error_on_excess=true)
     {
-        _free();
-        m_callbacks = cb;
+        if(t.empty())
+            return {};
+        return this->emit_as(type, t, t.root_id(), error_on_excess);
+    }
+    /** emit starting at the given node */
+    substr emit_as(EmitType_e type, ConstNodeRef const& n, bool error_on_excess=true)
+    {
+        if(!detail::is_set_(n))
+            return {};
+        _RYML_CB_CHECK(n.tree()->callbacks(), n.readable());
+        return this->emit_as(type, *n.tree(), n.id(), error_on_excess);
     }
-}
-
-} // namespace detail
-} // namespace yml
-} // namespace c4
 
-#endif /* _C4_YML_DETAIL_STACK_HPP_ */
+public:
 
+    /** get the emit options for this object */
+    EmitOptions const& options() const noexcept { return m_opts; }
 
-// (end https://github.com/biojppm/rapidyaml/src/c4/yml/detail/stack.hpp)
+    /** set the max depth for emitted trees (to prevent a stack overflow) */
+    void max_depth(id_type max_depth) noexcept { m_opts.max_depth(max_depth); }
+    /** get the max depth for emitted trees (to prevent a stack overflow) */
+    id_type max_depth() const noexcept { return m_opts.max_depth(); }
 
+private:
 
+    Tree const* C4_RESTRICT m_tree;
+    EmitOptions m_opts;
+    bool m_flow;
 
-//********************************************************************************
-//--------------------------------------------------------------------------------
-// src/c4/yml/parse.hpp
-// https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp
-//--------------------------------------------------------------------------------
-//********************************************************************************
+private:
 
-#ifndef _C4_YML_PARSE_HPP_
-#define _C4_YML_PARSE_HPP_
+    void _emit_yaml(id_type id);
+    void _do_visit_flow_sl(id_type id, id_type depth, id_type ilevel=0);
+    void _do_visit_flow_ml(id_type id, id_type depth, id_type ilevel=0, id_type do_indent=1);
+    void _do_visit_block(id_type id, id_type depth, id_type ilevel=0, id_type do_indent=1);
+    void _do_visit_block_container(id_type id, id_type depth, id_type next_level, bool do_indent);
+    void _do_visit_json(id_type id, id_type depth);
 
-#ifndef _C4_YML_TREE_HPP_
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp
-//#include "c4/yml/tree.hpp"
-#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_)
-#error "amalgamate: file c4/yml/tree.hpp must have been included at this point"
-#endif /* C4_YML_TREE_HPP_ */
+private:
 
-#endif
+    void _write(NodeScalar const& C4_RESTRICT sc, NodeType flags, id_type level);
+    void _write_json(NodeScalar const& C4_RESTRICT sc, NodeType flags);
 
-#ifndef _C4_YML_NODE_HPP_
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp
-//#include "c4/yml/node.hpp"
-#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_)
-#error "amalgamate: file c4/yml/node.hpp must have been included at this point"
-#endif /* C4_YML_NODE_HPP_ */
+    void _write_doc(id_type id);
+    void _write_scalar_json_dquo(csubstr s);
+    void _write_scalar_literal(csubstr s, id_type level, bool as_key);
+    void _write_scalar_folded(csubstr s, id_type level, bool as_key);
+    void _write_scalar_squo(csubstr s, id_type level);
+    void _write_scalar_dquo(csubstr s, id_type level);
+    void _write_scalar_plain(csubstr s, id_type level);
 
-#endif
+    size_t _write_escaped_newlines(csubstr s, size_t i);
+    size_t _write_indented_block(csubstr s, size_t i, id_type level);
 
-#ifndef _C4_YML_DETAIL_STACK_HPP_
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/stack.hpp
-//#include "c4/yml/detail/stack.hpp"
-#if !defined(C4_YML_DETAIL_STACK_HPP_) && !defined(_C4_YML_DETAIL_STACK_HPP_)
-#error "amalgamate: file c4/yml/detail/stack.hpp must have been included at this point"
-#endif /* C4_YML_DETAIL_STACK_HPP_ */
+    void _write_tag(csubstr tag)
+    {
+        if(!tag.begins_with('!'))
+            this->Writer::_do_write('!');
+        this->Writer::_do_write(tag);
+    }
 
-#endif
+    enum : type_bits {
+        _keysc =  (KEY|KEYREF|KEYANCH|KEYQUO|KEY_STYLE) | ~(VAL|VALREF|VALANCH|VALQUO|VAL_STYLE) | CONTAINER_STYLE,
+        _valsc = ~(KEY|KEYREF|KEYANCH|KEYQUO|KEY_STYLE) |  (VAL|VALREF|VALANCH|VALQUO|VAL_STYLE) | CONTAINER_STYLE,
+        _keysc_json =  (KEY)  | ~(VAL),
+        _valsc_json = ~(KEY)  |  (VAL),
+    };
 
-//included above:
-//#include <stdarg.h>
+    C4_ALWAYS_INLINE void _writek(id_type id, id_type level) { _write(m_tree->keysc(id), (m_tree->_p(id)->m_type.type & ~_valsc), level); }
+    C4_ALWAYS_INLINE void _writev(id_type id, id_type level) { _write(m_tree->valsc(id), (m_tree->_p(id)->m_type.type & ~_keysc), level); }
 
-#if defined(_MSC_VER)
-#   pragma warning(push)
-#   pragma warning(disable: 4251/*needs to have dll-interface to be used by clients of struct*/)
-#endif
+    C4_ALWAYS_INLINE void _writek_json(id_type id) { _write_json(m_tree->keysc(id), m_tree->_p(id)->m_type.type & ~(VAL)); }
+    C4_ALWAYS_INLINE void _writev_json(id_type id) { _write_json(m_tree->valsc(id), m_tree->_p(id)->m_type.type & ~(KEY)); }
 
-namespace c4 {
-namespace yml {
+    void _indent(id_type level, bool enabled)
+    {
+        if(enabled)
+            this->Writer::_do_write(' ', 2u * (size_t)level);
+    }
+    void _indent(id_type level)
+    {
+        if(!m_flow)
+            this->Writer::_do_write(' ', 2u * (size_t)level);
+    }
+};
 
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
-class RYML_EXPORT Parser
-{
-public:
-
-    /** @name construction and assignment */
-    /** @{ */
-
-    Parser() : Parser(get_callbacks()) {}
-    Parser(Callbacks const& cb);
-    ~Parser();
 
-    Parser(Parser &&);
-    Parser(Parser const&);
-    Parser& operator=(Parser &&);
-    Parser& operator=(Parser const&);
+/** @defgroup doc_emit_to_file Emit to file
+ *
+ * @{
+ */
 
-    /** @} */
 
-public:
+// emit from tree and node id -----------------------
 
-    /** @name modifiers */
-    /** @{ */
+/** (1) emit YAML to the given file, starting at the given node. A null
+ * file defaults to stdout. Return the number of bytes written. */
+inline size_t emit_yaml(Tree const& t, id_type id, EmitOptions const& opts, FILE *f)
+{
+    EmitterFile em(opts, f);
+    return em.emit_as(EMIT_YAML, t, id, /*error_on_excess*/true).len;
+}
+/** (2) like (1), but use default emit options */
+inline size_t emit_yaml(Tree const& t, id_type id, FILE *f)
+{
+    EmitterFile em(f);
+    return em.emit_as(EMIT_YAML, t, id, /*error_on_excess*/true).len;
+}
+/** (1) emit JSON to the given file, starting at the given node. A null
+ * file defaults to stdout.  Return the number of bytes written. */
+inline size_t emit_json(Tree const& t, id_type id, EmitOptions const& opts, FILE *f)
+{
+    EmitterFile em(opts, f);
+    return em.emit_as(EMIT_JSON, t, id, /*error_on_excess*/true).len;
+}
+/** (2) like (1), but use default emit options */
+inline size_t emit_json(Tree const& t, id_type id, FILE *f)
+{
+    EmitterFile em(f);
+    return em.emit_as(EMIT_JSON, t, id, /*error_on_excess*/true).len;
+}
 
-    /** Reserve a certain capacity for the parsing stack.
-     * This should be larger than the expected depth of the parsed
-     * YAML tree.
-     *
-     * The parsing stack is the only (potential) heap memory used by
-     * the parser.
-     *
-     * If the requested capacity is below the default
-     * stack size of 16, the memory is used directly in the parser
-     * object; otherwise it will be allocated from the heap.
-     *
-     * @note this reserves memory only for the parser itself; all the
-     * allocations for the parsed tree will go through the tree's
-     * allocator.
-     *
-     * @note the tree and the arena can (and should) also be reserved. */
-    void reserve_stack(size_t capacity)
-    {
-        m_stack.reserve(capacity);
-    }
 
-    /** Reserve a certain capacity for the array used to track node
-     * locations in the source buffer. */
-    void reserve_locations(size_t num_source_lines)
-    {
-        _resize_locations(num_source_lines);
-    }
+// emit from root -------------------------
 
-    /** Reserve a certain capacity for the character arena used to
-     * filter scalars. */
-    void reserve_filter_arena(size_t num_characters)
-    {
-        _resize_filter_arena(num_characters);
-    }
+/** (1) emit YAML to the given file, starting at the root node. A null file defaults to stdout.
+ * Return the number of bytes written. */
+inline size_t emit_yaml(Tree const& t, EmitOptions const& opts, FILE *f=nullptr)
+{
+    EmitterFile em(opts, f);
+    return em.emit_as(EMIT_YAML, t, /*error_on_excess*/true).len;
+}
+/** (2) like (1), but use default emit options */
+inline size_t emit_yaml(Tree const& t, FILE *f=nullptr)
+{
+    EmitterFile em(f);
+    return em.emit_as(EMIT_YAML, t, /*error_on_excess*/true).len;
+}
+/** (1) emit JSON to the given file. A null file defaults to stdout.
+ * Return the number of bytes written. */
+inline size_t emit_json(Tree const& t, EmitOptions const& opts, FILE *f=nullptr)
+{
+    EmitterFile em(opts, f);
+    return em.emit_as(EMIT_JSON, t, /*error_on_excess*/true).len;
+}
+/** (2) like (1), but use default emit options */
+inline size_t emit_json(Tree const& t, FILE *f=nullptr)
+{
+    EmitterFile em(f);
+    return em.emit_as(EMIT_JSON, t, /*error_on_excess*/true).len;
+}
 
-    /** @} */
 
-public:
+// emit from ConstNodeRef ------------------------
 
-    /** @name getters and modifiers */
-    /** @{ */
+/** (1) emit YAML to the given file. A null file defaults to stdout.
+ * Return the number of bytes written. */
+inline size_t emit_yaml(ConstNodeRef const& r, EmitOptions const& opts, FILE *f=nullptr)
+{
+    if(!detail::is_set_(r))
+        return {};
+    EmitterFile em(opts, f);
+    return em.emit_as(EMIT_YAML, r, /*error_on_excess*/true).len;
+}
+/** (2) like (1), but use default emit options */
+inline size_t emit_yaml(ConstNodeRef const& r, FILE *f=nullptr)
+{
+    if(!detail::is_set_(r))
+        return {};
+    EmitterFile em(f);
+    return em.emit_as(EMIT_YAML, r, /*error_on_excess*/true).len;
+}
+/** (1) emit JSON to the given file. A null file defaults to stdout.
+ * Return the number of bytes written. */
+inline size_t emit_json(ConstNodeRef const& r, EmitOptions const& opts, FILE *f=nullptr)
+{
+    if(!detail::is_set_(r))
+        return {};
+    EmitterFile em(opts, f);
+    return em.emit_as(EMIT_JSON, r, /*error_on_excess*/true).len;
+}
+/** (2) like (1), but use default emit options */
+inline size_t emit_json(ConstNodeRef const& r, FILE *f=nullptr)
+{
+    if(!detail::is_set_(r))
+        return {};
+    EmitterFile em(f);
+    return em.emit_as(EMIT_JSON, r, /*error_on_excess*/true).len;
+}
 
-    /** Get the current callbacks in the parser. */
-    Callbacks callbacks() const { return m_stack.m_callbacks; }
+/** @} */
 
-    /** Get the name of the latest file parsed by this object. */
-    csubstr filename() const { return m_file; }
 
-    /** Get the latest YAML buffer parsed by this object. */
-    csubstr source() const { return m_buf; }
+//-----------------------------------------------------------------------------
 
-    size_t stack_capacity() const { return m_stack.capacity(); }
-    size_t locations_capacity() const { return m_newline_offsets_capacity; }
-    size_t filter_arena_capacity() const { return m_filter_arena.len; }
+/** @defgroup doc_emit_to_ostream Emit to an STL-like ostream
+ *
+ * @{
+ */
 
-    /** @} */
+/** emit YAML to an STL-like ostream */
+template<class OStream>
+inline OStream& operator<< (OStream& s, Tree const& t)
+{
+    EmitterOStream<OStream> em(s);
+    em.emit_as(EMIT_YAML, t);
+    return s;
+}
 
-public:
+/** emit YAML to an STL-like ostream
+ * @overload */
+template<class OStream>
+inline OStream& operator<< (OStream& s, ConstNodeRef const& n)
+{
+    if(!detail::is_set_(n))
+        return s;
+    EmitterOStream<OStream> em(s);
+    em.emit_as(EMIT_YAML, n);
+    return s;
+}
 
-    /** @name parse_in_place */
-    /** @{ */
+/** mark a tree or node to be emitted as yaml when using @ref
+ * operator<<, with options. For example:
+ *
+ * ```cpp
+ * Tree t = parse_in_arena("{foo: bar}");
+ * std::cout << t; // emits YAML
+ * std::cout << as_yaml(t); // emits YAML, same as above
+ * std::cout << as_yaml(t, EmitOptions().max_depth(10)); // emits JSON with a max tree depth
+ * ```
+ *
+ * @see @ref operator<< */
+struct as_json
+{
+    Tree const* tree;
+    size_t node;
+    EmitOptions options;
+    as_json(Tree const& t, EmitOptions const& opts={}) : tree(&t), node(t.empty() ? NONE : t.root_id()), options(opts)  {}
+    as_json(Tree const& t, size_t id, EmitOptions const& opts={}) : tree(&t), node(id), options(opts)  {}
+    as_json(ConstNodeRef const& n, EmitOptions const& opts={}) : tree(n.tree()), node(n.id()), options(opts) {}
+};
 
-    /** Create a new tree and parse into its root.
-     * The tree is created with the callbacks currently in the parser. */
-    Tree parse_in_place(csubstr filename, substr src)
-    {
-        Tree t(callbacks());
-        t.reserve(_estimate_capacity(src));
-        this->parse_in_place(filename, src, &t, t.root_id());
-        return t;
-    }
+/** mark a tree or node to be emitted as yaml when using @ref
+ * operator<< . For example:
+ *
+ * ```cpp
+ * Tree t = parse_in_arena("{foo: bar}");
+ * std::cout << t; // emits YAML
+ * std::cout << as_json(t); // emits JSON
+ * std::cout << as_json(t, EmitOptions().max_depth(10)); // emits JSON with a max tree depth
+ * ```
+ *
+ * @see @ref operator<< */
+struct as_yaml
+{
+    Tree const* tree;
+    size_t node;
+    EmitOptions options;
+    as_yaml(Tree const& t, EmitOptions const& opts={}) : tree(&t), node(t.empty() ? NONE : t.root_id()), options(opts)  {}
+    as_yaml(Tree const& t, size_t id, EmitOptions const& opts={}) : tree(&t), node(id), options(opts)  {}
+    as_yaml(ConstNodeRef const& n, EmitOptions const& opts={}) : tree(n.tree()), node(n.id()), options(opts) {}
+};
 
-    /** Parse into an existing tree, starting at its root node.
-     * The callbacks in the tree are kept, and used to allocate
-     * the tree members, if any allocation is required. */
-    void parse_in_place(csubstr filename, substr src, Tree *t)
-    {
-        this->parse_in_place(filename, src, t, t->root_id());
-    }
+/** emit json to an STL-like stream */
+template<class OStream>
+inline OStream& operator<< (OStream& s, as_json const& j)
+{
+    if(!j.tree || j.node == NONE)
+        return s;
+    EmitterOStream<OStream> em(j.options, s);
+    em.emit_as(EMIT_JSON, *j.tree, j.node, true);
+    return s;
+}
 
-    /** Parse into an existing node.
-     * The callbacks in the tree are kept, and used to allocate
-     * the tree members, if any allocation is required. */
-    void parse_in_place(csubstr filename, substr src, Tree *t, size_t node_id);
-    //   ^^^^^^^^^^^^^ this is the workhorse overload; everything else is syntactic candy
+/** emit yaml to an STL-like stream */
+template<class OStream>
+inline OStream& operator<< (OStream& s, as_yaml const& y)
+{
+    if(!y.tree || y.node == NONE)
+        return s;
+    EmitterOStream<OStream> em(y.options, s);
+    em.emit_as(EMIT_YAML, *y.tree, y.node, true);
+    return s;
+}
 
-    /** Parse into an existing node.
-     * The callbacks in the tree are kept, and used to allocate
-     * the tree members, if any allocation is required. */
-    void parse_in_place(csubstr filename, substr src, NodeRef node)
-    {
-        this->parse_in_place(filename, src, node.tree(), node.id());
-    }
+/** @} */
 
-    RYML_DEPRECATED("use parse_in_place() instead") Tree parse(csubstr filename, substr src) { return parse_in_place(filename, src); }
-    RYML_DEPRECATED("use parse_in_place() instead") void parse(csubstr filename, substr src, Tree *t) { parse_in_place(filename, src, t); }
-    RYML_DEPRECATED("use parse_in_place() instead") void parse(csubstr filename, substr src, Tree *t, size_t node_id) { parse_in_place(filename, src, t, node_id); }
-    RYML_DEPRECATED("use parse_in_place() instead") void parse(csubstr filename, substr src, NodeRef node) { parse_in_place(filename, src, node); }
 
-    /** @} */
+//-----------------------------------------------------------------------------
 
-public:
+/** @defgroup doc_emit_to_buffer Emit to memory buffer
+ *
+ * @{
+ */
 
-    /** @name parse_in_arena: copy the YAML source buffer to the
-     * tree's arena, then parse the copy in situ
-     *
-     * @note overloads receiving a substr YAML buffer are intentionally
-     * left undefined, such that calling parse_in_arena() with a substr
-     * will cause a linker error. This is to prevent an accidental
-     * copy of the source buffer to the tree's arena, because substr
-     * is implicitly convertible to csubstr. If you really intend to parse
-     * a mutable buffer in the tree's arena, convert it first to immutable
-     * by assigning the substr to a csubstr prior to calling parse_in_arena().
-     * This is not needed for parse_in_place() because csubstr is not
-     * implicitly convertible to substr. */
-    /** @{ */
+// emit from tree and node id -----------------------
 
-    // READ THE NOTE ABOVE!
-    #define RYML_DONT_PARSE_SUBSTR_IN_ARENA "Do not pass a (mutable) substr to parse_in_arena(); if you have a substr, it should be parsed in place. Consider using parse_in_place() instead, or convert the buffer to csubstr prior to calling. This function is deliberately left undefined and will cause a compiler error."
-    RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(csubstr filename, substr csrc);
-    RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, Tree *t);
-    RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, Tree *t, size_t node_id);
-    RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, NodeRef node);
+/** (1) emit YAML to the given buffer. Return a substr trimmed to the emitted YAML.
+ * @param t the tree to emit.
+ * @param id the node where to start emitting.
+ * @param opts emit options.
+ * @param buf the output buffer.
+ * @param opts emit options.
+ * @param error_on_excess Raise an error if the space in the buffer is insufficient.
+ * @return a substr trimmed to the result in the output buffer. If the buffer is
+ * insufficient (when error_on_excess is false), the string pointer of the
+ * result will be set to null, and the length will report the required buffer size. */
+inline substr emit_yaml(Tree const& t, id_type id, EmitOptions const& opts, substr buf, bool error_on_excess=true)
+{
+    EmitterBuf em(opts, buf);
+    return em.emit_as(EMIT_YAML, t, id, error_on_excess);
+}
+/** (2) like (1), but use default emit options */
+inline substr emit_yaml(Tree const& t, id_type id, substr buf, bool error_on_excess=true)
+{
+    EmitterBuf em(buf);
+    return em.emit_as(EMIT_YAML, t, id, error_on_excess);
+}
+/** (1) emit JSON to the given buffer. Return a substr trimmed to the emitted JSON.
+ * @param t the tree to emit.
+ * @param id the node where to start emitting.
+ * @param opts emit options.
+ * @param buf the output buffer.
+ * @param opts emit options.
+ * @param error_on_excess Raise an error if the space in the buffer is insufficient.
+ * @return a substr trimmed to the result in the output buffer. If the buffer is
+ * insufficient (when error_on_excess is false), the string pointer of the
+ * result will be set to null, and the length will report the required buffer size. */
+inline substr emit_json(Tree const& t, id_type id, EmitOptions const& opts, substr buf, bool error_on_excess=true)
+{
+    EmitterBuf em(opts, buf);
+    return em.emit_as(EMIT_JSON, t, id, error_on_excess);
+}
+/** (2) like (1), but use default emit options */
+inline substr emit_json(Tree const& t, id_type id, substr buf, bool error_on_excess=true)
+{
+    EmitterBuf em(buf);
+    return em.emit_as(EMIT_JSON, t, id, error_on_excess);
+}
 
-    /** Create a new tree and parse into its root.
-     * The immutable YAML source is first copied to the tree's arena,
-     * and parsed from there.
-     * The callbacks in the tree are kept, and used to allocate
-     * the tree members, if any allocation is required. */
-    Tree parse_in_arena(csubstr filename, csubstr csrc)
-    {
-        Tree t(callbacks());
-        substr src = t.copy_to_arena(csrc);
-        t.reserve(_estimate_capacity(csrc));
-        this->parse_in_place(filename, src, &t, t.root_id());
-        return t;
-    }
 
-    /** Parse into an existing tree, starting at its root node.
-     * The immutable YAML source is first copied to the tree's arena,
-     * and parsed from there.
-     * The callbacks in the tree are kept, and used to allocate
-     * the tree members, if any allocation is required. */
-    void parse_in_arena(csubstr filename, csubstr csrc, Tree *t)
-    {
-        substr src = t->copy_to_arena(csrc);
-        this->parse_in_place(filename, src, t, t->root_id());
-    }
+// emit from root -------------------------
 
-    /** Parse into a specific node in an existing tree.
-     * The immutable YAML source is first copied to the tree's arena,
-     * and parsed from there.
-     * The callbacks in the tree are kept, and used to allocate
-     * the tree members, if any allocation is required. */
-    void parse_in_arena(csubstr filename, csubstr csrc, Tree *t, size_t node_id)
-    {
-        substr src = t->copy_to_arena(csrc);
-        this->parse_in_place(filename, src, t, node_id);
-    }
-
-    /** Parse into a specific node in an existing tree.
-     * The immutable YAML source is first copied to the tree's arena,
-     * and parsed from there.
-     * The callbacks in the tree are kept, and used to allocate
-     * the tree members, if any allocation is required. */
-    void parse_in_arena(csubstr filename, csubstr csrc, NodeRef node)
-    {
-        substr src = node.tree()->copy_to_arena(csrc);
-        this->parse_in_place(filename, src, node.tree(), node.id());
-    }
-
-    RYML_DEPRECATED("use parse_in_arena() instead") Tree parse(csubstr filename, csubstr csrc) { return parse_in_arena(filename, csrc); }
-    RYML_DEPRECATED("use parse_in_arena() instead") void parse(csubstr filename, csubstr csrc, Tree *t) { parse_in_arena(filename, csrc, t); }
-    RYML_DEPRECATED("use parse_in_arena() instead") void parse(csubstr filename, csubstr csrc, Tree *t, size_t node_id) { parse_in_arena(filename, csrc, t, node_id); }
-    RYML_DEPRECATED("use parse_in_arena() instead") void parse(csubstr filename, csubstr csrc, NodeRef node) { parse_in_arena(filename, csrc, node); }
-
-    /** @} */
+/** (1) emit YAML to the given buffer. Return a substr trimmed to the emitted YAML.
+ * @param t the tree; will be emitted from the root node.
+ * @param buf the output buffer.
+ * @param error_on_excess Raise an error if the space in the buffer is insufficient.
+ * @return a substr trimmed to the result in the output buffer. If the buffer is
+ * insufficient (when error_on_excess is false), the string pointer of the
+ * result will be set to null, and the length will report the required buffer size. */
+inline substr emit_yaml(Tree const& t, EmitOptions const& opts, substr buf, bool error_on_excess=true)
+{
+    EmitterBuf em(opts, buf);
+    return em.emit_as(EMIT_YAML, t, error_on_excess);
+}
+/** (2) like (1), but use default emit options */
+inline substr emit_yaml(Tree const& t, substr buf, bool error_on_excess=true)
+{
+    EmitterBuf em(buf);
+    return em.emit_as(EMIT_YAML, t, error_on_excess);
+}
+/** (1) emit JSON to the given buffer. Return a substr trimmed to the emitted JSON.
+ * @param t the tree; will be emitted from the root node.
+ * @param buf the output buffer.
+ * @param error_on_excess Raise an error if the space in the buffer is insufficient.
+ * @return a substr trimmed to the result in the output buffer. If the buffer is
+ * insufficient (when error_on_excess is false), the string pointer of the
+ * result will be set to null, and the length will report the required buffer size. */
+inline substr emit_json(Tree const& t, EmitOptions const& opts, substr buf, bool error_on_excess=true)
+{
+    EmitterBuf em(opts, buf);
+    return em.emit_as(EMIT_JSON, t, error_on_excess);
+}
+/** (2) like (1), but use default emit options */
+inline substr emit_json(Tree const& t, substr buf, bool error_on_excess=true)
+{
+    EmitterBuf em(buf);
+    return em.emit_as(EMIT_JSON, t, error_on_excess);
+}
 
-public:
 
-    /** @name locations */
-    /** @{ */
+// emit from ConstNodeRef ------------------------
 
-    /** Get the location of a node of the last tree to be parsed by this parser. */
-    Location location(Tree const& tree, size_t node_id) const;
-    /** Get the location of a node of the last tree to be parsed by this parser. */
-    Location location(NodeRef node) const;
-    /** Get the string starting at a particular location, to the end
-     * of the parsed source buffer. */
-    csubstr location_contents(Location const& loc) const;
-    /** Given a pointer to a buffer position, get the location. @p val
-     * must be pointing to somewhere in the source buffer that was
-     * last parsed by this object. */
-    Location val_location(const char *val) const;
+/** (1) emit YAML to the given buffer. Return a substr trimmed to the emitted YAML.
+ * @param r the starting node.
+ * @param buf the output buffer.
+ * @param opts emit options.
+ * @param error_on_excess Raise an error if the space in the buffer is insufficient.
+ * @return a substr trimmed to the result in the output buffer. If the buffer is
+ * insufficient (when error_on_excess is false), the string pointer of the
+ * result will be set to null, and the length will report the required buffer size. */
+inline substr emit_yaml(ConstNodeRef const& r, EmitOptions const& opts, substr buf, bool error_on_excess=true)
+{
+    if(!detail::is_set_(r))
+        return {};
+    EmitterBuf em(opts, buf);
+    return em.emit_as(EMIT_YAML, r, error_on_excess);
+}
+/** (2) like (1), but use default emit options */
+inline substr emit_yaml(ConstNodeRef const& r, substr buf, bool error_on_excess=true)
+{
+    if(!detail::is_set_(r))
+        return {};
+    EmitterBuf em(buf);
+    return em.emit_as(EMIT_YAML, r, error_on_excess);
+}
+/** (1) emit JSON to the given buffer. Return a substr trimmed to the emitted JSON.
+ * @param r the starting node.
+ * @param buf the output buffer.
+ * @param opts emit options.
+ * @param error_on_excess Raise an error if the space in the buffer is insufficient.
+ * @return a substr trimmed to the result in the output buffer. If the buffer is
+ * insufficient (when error_on_excess is false), the string pointer of the
+ * result will be set to null, and the length will report the required buffer size. */
+inline substr emit_json(ConstNodeRef const& r, EmitOptions const& opts, substr buf, bool error_on_excess=true)
+{
+    if(!detail::is_set_(r))
+        return {};
+    EmitterBuf em(opts, buf);
+    return em.emit_as(EMIT_JSON, r, error_on_excess);
+}
+/** (2) like (1), but use default emit options */
+inline substr emit_json(ConstNodeRef const& r, substr buf, bool error_on_excess=true)
+{
+    if(!detail::is_set_(r))
+        return {};
+    EmitterBuf em(buf);
+    return em.emit_as(EMIT_JSON, r, error_on_excess);
+}
 
-    /** @} */
 
-private:
+//-----------------------------------------------------------------------------
 
-    typedef enum {
-        BLOCK_LITERAL, //!< keep newlines (|)
-        BLOCK_FOLD     //!< replace newline with single space (>)
-    } BlockStyle_e;
+/** @defgroup doc_emit_to_container Emit to resizeable container
+ *
+ * @{
+ */
 
-    typedef enum {
-        CHOMP_CLIP,    //!< single newline at end (default)
-        CHOMP_STRIP,   //!< no newline at end     (-)
-        CHOMP_KEEP     //!< all newlines from end (+)
-    } BlockChomp_e;
+// emit from tree and node id ---------------------------
 
-private:
+/** (1) emit+resize: emit YAML to the given `std::string`/`std::vector`-like
+ * container, resizing it as needed to fit the emitted YAML. If @p append is
+ * set to true, the emitted YAML is appended at the end of the container.
+ *
+ * @return a substr trimmed to the emitted YAML (excluding the initial contents, when appending) */
+template<class CharOwningContainer>
+substr emitrs_yaml(Tree const& t, id_type id, EmitOptions const& opts, CharOwningContainer * cont, bool append=false)
+{
+    size_t startpos = append ? cont->size() : 0u;
+    cont->resize(cont->capacity()); // otherwise the first emit would be certain to fail
+    substr buf = to_substr(*cont).sub(startpos);
+    substr ret = emit_yaml(t, id, opts, buf, /*error_on_excess*/false);
+    if(ret.str == nullptr && ret.len > 0)
+    {
+        cont->resize(startpos + ret.len);
+        buf = to_substr(*cont).sub(startpos);
+        ret = emit_yaml(t, id, opts, buf, /*error_on_excess*/true);
+    }
+    else
+    {
+        cont->resize(startpos + ret.len);
+    }
+    return ret;
+}
+/** (2) like (1), but use default emit options */
+template<class CharOwningContainer>
+substr emitrs_yaml(Tree const& t, id_type id, CharOwningContainer * cont, bool append=false)
+{
+    return emitrs_yaml(t, id, EmitOptions{}, cont, append);
+}
+/** (1) emit+resize: emit JSON to the given `std::string`/`std::vector`-like
+ * container, resizing it as needed to fit the emitted JSON. If @p append is
+ * set to true, the emitted YAML is appended at the end of the container.
+ *
+ * @return a substr trimmed to the emitted JSON (excluding the initial contents, when appending) */
+template<class CharOwningContainer>
+substr emitrs_json(Tree const& t, id_type id, EmitOptions const& opts, CharOwningContainer * cont, bool append=false)
+{
+    const size_t startpos = append ? cont->size() : 0u;
+    cont->resize(cont->capacity()); // otherwise the first emit would be certain to fail
+    substr buf = to_substr(*cont).sub(startpos);
+    EmitterBuf em(opts, buf);
+    substr ret = emit_json(t, id, opts, buf, /*error_on_excess*/false);
+    if(ret.str == nullptr && ret.len > 0)
+    {
+        cont->resize(startpos + ret.len);
+        buf = to_substr(*cont).sub(startpos);
+        ret = emit_json(t, id, opts, buf, /*error_on_excess*/true);
+    }
+    else
+    {
+        cont->resize(startpos + ret.len);
+    }
+    return ret;
+}
+/** (2) like (1), but use default emit options */
+template<class CharOwningContainer>
+substr emitrs_json(Tree const& t, id_type id, CharOwningContainer * cont, bool append=false)
+{
+    return emitrs_json(t, id, EmitOptions{}, cont, append);
+}
 
-    using flag_t = int;
 
-    static size_t _estimate_capacity(csubstr src) { size_t c = _count_nlines(src); c = c >= 16 ? c : 16; return c; }
+/** (3) emit+resize: YAML to a newly-created `std::string`/`std::vector`-like container. */
+template<class CharOwningContainer>
+CharOwningContainer emitrs_yaml(Tree const& t, id_type id, EmitOptions const& opts={})
+{
+    CharOwningContainer c;
+    emitrs_yaml(t, id, opts, &c);
+    return c;
+}
+/** (3) emit+resize: JSON to a newly-created `std::string`/`std::vector`-like container. */
+template<class CharOwningContainer>
+CharOwningContainer emitrs_json(Tree const& t, id_type id, EmitOptions const& opts={})
+{
+    CharOwningContainer c;
+    emitrs_json(t, id, opts, &c);
+    return c;
+}
 
-    void  _reset();
 
-    bool  _finished_file() const;
-    bool  _finished_line() const;
+// emit from root -------------------------
 
-    csubstr _peek_next_line(size_t pos=npos) const;
-    bool    _advance_to_peeked();
-    void    _scan_line();
+/** (1) emit+resize: YAML to the given `std::string`/`std::vector`-like
+ * container, resizing it as needed to fit the emitted YAML.
+ * @return a substr trimmed to the new emitted contents. */
+template<class CharOwningContainer>
+substr emitrs_yaml(Tree const& t, EmitOptions const& opts, CharOwningContainer * cont, bool append=false)
+{
+    if(t.empty())
+        return {};
+    return emitrs_yaml(t, t.root_id(), opts, cont, append);
+}
+/** (2) like (1), but use default emit options */
+template<class CharOwningContainer>
+substr emitrs_yaml(Tree const& t, CharOwningContainer * cont, bool append=false)
+{
+    if(t.empty())
+        return {};
+    return emitrs_yaml(t, t.root_id(), EmitOptions{}, cont, append);
+}
+/** (1) emit+resize: JSON to the given `std::string`/`std::vector`-like
+ * container, resizing it as needed to fit the emitted JSON.
+ * @return a substr trimmed to the new emitted contents. */
+template<class CharOwningContainer>
+substr emitrs_json(Tree const& t, EmitOptions const& opts, CharOwningContainer * cont, bool append=false)
+{
+    if(t.empty())
+        return {};
+    return emitrs_json(t, t.root_id(), opts, cont, append);
+}
+/** (2) like (1), but use default emit options */
+template<class CharOwningContainer>
+substr emitrs_json(Tree const& t, CharOwningContainer * cont, bool append=false)
+{
+    if(t.empty())
+        return {};
+    return emitrs_json(t, t.root_id(), EmitOptions{}, cont, append);
+}
 
-    csubstr _slurp_doc_scalar();
 
-    /**
-     * @param [out] quoted
-     * Will only be written to if this method returns true.
-     * Will be set to true if the scanned scalar was quoted, by '', "", > or |.
-     */
-    bool    _scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted);
-
-    csubstr _scan_comment();
-    csubstr _scan_squot_scalar();
-    csubstr _scan_dquot_scalar();
-    csubstr _scan_block();
-    substr  _scan_plain_scalar_blck(csubstr currscalar, csubstr peeked_line, size_t indentation);
-    substr  _scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line);
-    substr  _scan_complex_key(csubstr currscalar, csubstr peeked_line);
-    csubstr _scan_to_next_nonempty_line(size_t indentation);
-    csubstr _extend_scanned_scalar(csubstr currscalar);
-
-    csubstr _filter_squot_scalar(const substr s);
-    csubstr _filter_dquot_scalar(substr s);
-    csubstr _filter_plain_scalar(substr s, size_t indentation);
-    csubstr _filter_block_scalar(substr s, BlockStyle_e style, BlockChomp_e chomp, size_t indentation);
-    template<bool backslash_is_escape, bool keep_trailing_whitespace>
-    bool    _filter_nl(substr scalar, size_t *C4_RESTRICT pos, size_t *C4_RESTRICT filter_arena_pos, size_t indentation);
-    template<bool keep_trailing_whitespace>
-    void    _filter_ws(substr scalar, size_t *C4_RESTRICT pos, size_t *C4_RESTRICT filter_arena_pos);
-    bool    _apply_chomp(substr buf, size_t *C4_RESTRICT pos, BlockChomp_e chomp);
-
-    void  _handle_finished_file();
-    void  _handle_line();
-
-    bool  _handle_indentation();
-
-    bool  _handle_unk();
-    bool  _handle_map_flow();
-    bool  _handle_map_blck();
-    bool  _handle_seq_flow();
-    bool  _handle_seq_blck();
-    bool  _handle_top();
-    bool  _handle_types();
-    bool  _handle_key_anchors_and_refs();
-    bool  _handle_val_anchors_and_refs();
-    void  _move_val_tag_to_key_tag();
-    void  _move_key_tag_to_val_tag();
-    void  _move_key_tag2_to_key_tag();
-    void  _move_val_anchor_to_key_anchor();
-    void  _move_key_anchor_to_val_anchor();
-
-    void  _push_level(bool explicit_flow_chars = false);
-    void  _pop_level();
-
-    void  _start_unk(bool as_child=true);
-
-    void  _start_map(bool as_child=true);
-    void  _start_map_unk(bool as_child);
-    void  _stop_map();
-
-    void  _start_seq(bool as_child=true);
-    void  _stop_seq();
-
-    void  _start_seqimap();
-    void  _stop_seqimap();
-
-    void  _start_doc(bool as_child=true);
-    void  _stop_doc();
-    void  _start_new_doc(csubstr rem);
-    void  _end_stream();
+/** (3) emit+resize: YAML to a newly-created `std::string`/`std::vector`-like container. */
+template<class CharOwningContainer>
+CharOwningContainer emitrs_yaml(Tree const& t, EmitOptions const& opts={})
+{
+    CharOwningContainer c;
+    if(t.empty())
+        return c;
+    emitrs_yaml(t, t.root_id(), opts, &c);
+    return c;
+}
+/** (3) emit+resize: JSON to a newly-created `std::string`/`std::vector`-like container. */
+template<class CharOwningContainer>
+CharOwningContainer emitrs_json(Tree const& t, EmitOptions const& opts={})
+{
+    CharOwningContainer c;
+    if(t.empty())
+        return c;
+    emitrs_json(t, t.root_id(), opts, &c);
+    return c;
+}
 
-    NodeData* _append_val(csubstr val, flag_t quoted=false);
-    NodeData* _append_key_val(csubstr val, flag_t val_quoted=false);
-    bool  _rval_dash_start_or_continue_seq();
 
-    void  _store_scalar(csubstr s, flag_t is_quoted);
-    csubstr _consume_scalar();
-    void  _move_scalar_from_top();
+// emit from ConstNodeRef ------------------------
 
-    inline NodeData* _append_val_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); return _append_val({str, size_t(0)}); }
-    inline NodeData* _append_key_val_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); return _append_key_val({str, size_t(0)}); }
-    inline void      _store_scalar_null(const char *str) {  _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); _store_scalar({str, size_t(0)}, false); }
 
-    void  _set_indentation(size_t behind);
-    void  _save_indentation(size_t behind=0);
-    bool  _maybe_set_indentation_from_anchor_or_tag();
+/** (1) emit+resize: YAML to the given `std::string`/`std::vector`-like container,
+ * resizing it as needed to fit the emitted YAML.
+ * @return a substr trimmed to the new emitted contents */
+template<class CharOwningContainer>
+substr emitrs_yaml(ConstNodeRef const& n, EmitOptions const& opts, CharOwningContainer * cont, bool append=false)
+{
+    if(!detail::is_set_(n))
+        return {};
+    _RYML_CB_CHECK(n.tree()->callbacks(), n.readable());
+    return emitrs_yaml(*n.tree(), n.id(), opts, cont, append);
+}
+/** (2) like (1), but use default emit options */
+template<class CharOwningContainer>
+substr emitrs_yaml(ConstNodeRef const& n, CharOwningContainer * cont, bool append=false)
+{
+    if(!detail::is_set_(n))
+        return {};
+    _RYML_CB_CHECK(n.tree()->callbacks(), n.readable());
+    return emitrs_yaml(*n.tree(), n.id(), EmitOptions{}, cont, append);
+}
+/** (1) emit+resize: JSON to the given `std::string`/`std::vector`-like container,
+ * resizing it as needed to fit the emitted JSON.
+ * @return a substr trimmed to the new emitted contents */
+template<class CharOwningContainer>
+substr emitrs_json(ConstNodeRef const& n, EmitOptions const& opts, CharOwningContainer * cont, bool append=false)
+{
+    if(!detail::is_set_(n))
+        return {};
+    _RYML_CB_CHECK(n.tree()->callbacks(), n.readable());
+    return emitrs_json(*n.tree(), n.id(), opts, cont, append);
+}
+/** (2) like (1), but use default emit options */
+template<class CharOwningContainer>
+substr emitrs_json(ConstNodeRef const& n, CharOwningContainer * cont, bool append=false)
+{
+    if(!detail::is_set_(n))
+        return {};
+    _RYML_CB_CHECK(n.tree()->callbacks(), n.readable());
+    return emitrs_json(*n.tree(), n.id(), EmitOptions{}, cont, append);
+}
 
-    void  _write_key_anchor(size_t node_id);
-    void  _write_val_anchor(size_t node_id);
 
-    void _handle_directive(csubstr directive);
+/** (3) emit+resize: YAML to a newly-created `std::string`/`std::vector`-like container. */
+template<class CharOwningContainer>
+CharOwningContainer emitrs_yaml(ConstNodeRef const& n, EmitOptions const& opts={})
+{
+    if(!detail::is_set_(n))
+        return {};
+    _RYML_CB_CHECK(n.tree()->callbacks(), n.readable());
+    CharOwningContainer c;
+    emitrs_yaml(*n.tree(), n.id(), opts, &c);
+    return c;
+}
+/** (3) emit+resize: JSON to a newly-created `std::string`/`std::vector`-like container. */
+template<class CharOwningContainer>
+CharOwningContainer emitrs_json(ConstNodeRef const& n, EmitOptions const& opts={})
+{
+    if(!detail::is_set_(n))
+        return {};
+    _RYML_CB_CHECK(n.tree()->callbacks(), n.readable());
+    CharOwningContainer c;
+    emitrs_json(*n.tree(), n.id(), opts, &c);
+    return c;
+}
 
-    void _skipchars(char c);
-    template<size_t N>
-    void _skipchars(const char (&chars)[N]);
 
-private:
+/** @} */
 
-    static size_t _count_nlines(csubstr src);
 
-private:
+//-----------------------------------------------------------------------------
 
-    typedef enum : flag_t {
-        RTOP = 0x01 <<  0,   ///< reading at top level
-        RUNK = 0x01 <<  1,   ///< reading an unknown: must determine whether scalar, map or seq
-        RMAP = 0x01 <<  2,   ///< reading a map
-        RSEQ = 0x01 <<  3,   ///< reading a seq
-        FLOW = 0x01 <<  4,   ///< reading is inside explicit flow chars: [] or {}
-        QMRK = 0x01 <<  5,   ///< reading an explicit key (`? key`)
-        RKEY = 0x01 <<  6,   ///< reading a scalar as key
-        RVAL = 0x01 <<  7,   ///< reading a scalar as val
-        RNXT = 0x01 <<  8,   ///< read next val or keyval
-        SSCL = 0x01 <<  9,   ///< there's a stored scalar
-        QSCL = 0x01 << 10,   ///< stored scalar was quoted
-        RSET = 0x01 << 11,   ///< the (implicit) map being read is a !!set. @see https://yaml.org/type/set.html
-        NDOC = 0x01 << 12,   ///< no document mode. a document has ended and another has not started yet.
-        //! reading an implicit map nested in an explicit seq.
-        //! eg, {key: [key2: value2, key3: value3]}
-        //! is parsed as {key: [{key2: value2}, {key3: value3}]}
-        RSEQIMAP = 0x01 << 13,
-    } State_e;
+/** @cond dev */
 
-    struct LineContents
-    {
-        csubstr  full;        ///< the full line, including newlines on the right
-        csubstr  stripped;    ///< the stripped line, excluding newlines on the right
-        csubstr  rem;         ///< the stripped line remainder; initially starts at the first non-space character
-        size_t   indentation; ///< the number of spaces on the beginning of the line
+RYML_DEPRECATE_EMIT inline size_t emit(Tree const& t, id_type id, FILE *f)
+{
+    return emit_yaml(t, id, f);
+}
+RYML_DEPRECATE_EMIT inline size_t emit(Tree const& t, FILE *f=nullptr)
+{
+    return emit_yaml(t, f);
+}
+RYML_DEPRECATE_EMIT inline size_t emit(ConstNodeRef const& r, FILE *f=nullptr)
+{
+    return emit_yaml(r, f);
+}
 
-        LineContents() : full(), stripped(), rem(), indentation() {}
+RYML_DEPRECATE_EMIT inline substr emit(Tree const& t, id_type id, substr buf, bool error_on_excess=true)
+{
+    return emit_yaml(t, id, buf, error_on_excess);
+}
+RYML_DEPRECATE_EMIT inline substr emit(Tree const& t, substr buf, bool error_on_excess=true)
+{
+    return emit_yaml(t, buf, error_on_excess);
+}
+RYML_DEPRECATE_EMIT inline substr emit(ConstNodeRef const& r, substr buf, bool error_on_excess=true)
+{
+    return emit_yaml(r, buf, error_on_excess);
+}
 
-        void reset_with_next_line(csubstr buf, size_t pos);
+template<class CharOwningContainer>
+RYML_DEPRECATE_EMITRS substr emitrs(Tree const& t, id_type id, CharOwningContainer * cont)
+{
+    return emitrs_yaml(t, id, cont);
+}
+template<class CharOwningContainer>
+RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(Tree const& t, id_type id)
+{
+    return emitrs_yaml<CharOwningContainer>(t, id);
+}
+template<class CharOwningContainer>
+RYML_DEPRECATE_EMITRS substr emitrs(Tree const& t, CharOwningContainer * cont)
+{
+    return emitrs_yaml(t, cont);
+}
+template<class CharOwningContainer>
+RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(Tree const& t)
+{
+    return emitrs_yaml<CharOwningContainer>(t);
+}
+template<class CharOwningContainer>
+RYML_DEPRECATE_EMITRS substr emitrs(ConstNodeRef const& n, CharOwningContainer * cont)
+{
+    return emitrs_yaml(n, cont);
+}
+template<class CharOwningContainer>
+RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(ConstNodeRef const& n)
+{
+    return emitrs_yaml<CharOwningContainer>(n);
+}
+/** @endcond */
 
-        void reset(csubstr full_, csubstr stripped_)
-        {
-            full = full_;
-            stripped = stripped_;
-            rem = stripped_;
-            // find the first column where the character is not a space
-            indentation = full.first_not_of(' ');
-        }
 
-        size_t current_col() const
-        {
-            return current_col(rem);
-        }
+} // namespace yml
+} // namespace c4
 
-        size_t current_col(csubstr s) const
-        {
-            RYML_ASSERT(s.str >= full.str);
-            RYML_ASSERT(full.is_super(s));
-            size_t col = static_cast<size_t>(s.str - full.str);
-            return col;
-        }
-    };
+C4_SUPPRESS_WARNING_GCC_CLANG_POP
 
-    struct State
-    {
-        flag_t       flags;
-        size_t       level;
-        size_t       node_id; // don't hold a pointer to the node as it will be relocated during tree resizes
-        csubstr      scalar;
-        size_t       scalar_col; // the column where the scalar (or its quotes) begin
+#undef RYML_DEPRECATE_EMIT
+#undef RYML_DEPRECATE_EMITRS
 
-        Location     pos;
-        LineContents line_contents;
-        size_t       indref;
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/emit.def.hpp
+//#include "c4/yml/emit.def.hpp"
+#if !defined(C4_YML_EMIT_DEF_HPP_) && !defined(_C4_YML_EMIT_DEF_HPP_)
+#error "amalgamate: file c4/yml/emit.def.hpp must have been included at this point"
+#endif /* C4_YML_EMIT_DEF_HPP_ */
 
-        State() : flags(), level(), node_id(), scalar(), scalar_col(), pos(), line_contents(), indref() {}
 
-        void reset(const char *file, size_t node_id_)
-        {
-            flags = RUNK|RTOP;
-            level = 0;
-            pos.name = to_csubstr(file);
-            pos.offset = 0;
-            pos.line = 1;
-            pos.col = 1;
-            node_id = node_id_;
-            scalar_col = 0;
-            scalar.clear();
-            indref = 0;
-        }
-    };
+#endif /* _C4_YML_EMIT_HPP_ */
 
-    void _line_progressed(size_t ahead);
-    void _line_ended();
-    void _line_ended_undo();
 
-    void _prepare_pop()
-    {
-        RYML_ASSERT(m_stack.size() > 1);
-        State const& curr = m_stack.top();
-        State      & next = m_stack.top(1);
-        next.pos = curr.pos;
-        next.line_contents = curr.line_contents;
-        next.scalar = curr.scalar;
-    }
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/emit.hpp)
 
-    inline bool _at_line_begin() const
-    {
-        return m_state->line_contents.rem.begin() == m_state->line_contents.full.begin();
-    }
-    inline bool _at_line_end() const
-    {
-        csubstr r = m_state->line_contents.rem;
-        return r.empty() || r.begins_with(' ', r.len);
-    }
-    inline bool _token_is_from_this_line(csubstr token) const
-    {
-        return token.is_sub(m_state->line_contents.full);
-    }
 
-    inline NodeData * node(State const* s) const { return m_tree->get(s->node_id); }
-    inline NodeData * node(State const& s) const { return m_tree->get(s .node_id); }
-    inline NodeData * node(size_t node_id) const { return m_tree->get(   node_id); }
 
-    inline bool has_all(flag_t f) const { return (m_state->flags & f) == f; }
-    inline bool has_any(flag_t f) const { return (m_state->flags & f) != 0; }
-    inline bool has_none(flag_t f) const { return (m_state->flags & f) == 0; }
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/emit.def.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/emit.def.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
 
-    static inline bool has_all(flag_t f, State const* s) { return (s->flags & f) == f; }
-    static inline bool has_any(flag_t f, State const* s) { return (s->flags & f) != 0; }
-    static inline bool has_none(flag_t f, State const* s) { return (s->flags & f) == 0; }
+#ifndef _C4_YML_EMIT_DEF_HPP_
+#define _C4_YML_EMIT_DEF_HPP_
 
-    inline void set_flags(flag_t f) { set_flags(f, m_state); }
-    inline void add_flags(flag_t on) { add_flags(on, m_state); }
-    inline void addrem_flags(flag_t on, flag_t off) { addrem_flags(on, off, m_state); }
-    inline void rem_flags(flag_t off) { rem_flags(off, m_state); }
+#ifndef _C4_YML_EMIT_HPP_
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/emit.hpp
+//#include "c4/yml/emit.hpp"
+#if !defined(C4_YML_EMIT_HPP_) && !defined(_C4_YML_EMIT_HPP_)
+#error "amalgamate: file c4/yml/emit.hpp must have been included at this point"
+#endif /* C4_YML_EMIT_HPP_ */
 
-    void set_flags(flag_t f, State * s);
-    void add_flags(flag_t on, State * s);
-    void addrem_flags(flag_t on, flag_t off, State * s);
-    void rem_flags(flag_t off, State * s);
+#endif
 
-    void _resize_filter_arena(size_t num_characters);
-    void _grow_filter_arena(size_t num_characters);
-    substr _finish_filter_arena(substr dst, size_t pos);
+/** @file emit.def.hpp Definitions for emit functions. */
+#ifndef _C4_YML_DETAIL_PARSER_DBG_HPP_
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp
+//#include "c4/yml/detail/parser_dbg.hpp"
+#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_)
+#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point"
+#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */
 
-    void _prepare_locations() const;         // only changes mutable members
-    void _resize_locations(size_t sz) const; // only changes mutable members
-    void _mark_locations_dirty();
-    bool _locations_dirty() const;
+#endif
 
-private:
+namespace c4 {
+namespace yml {
 
-    void _free();
-    void _clr();
-    void _cp(Parser const* that);
-    void _mv(Parser *that);
+template<class Writer>
+substr Emitter<Writer>::emit_as(EmitType_e type, Tree const& t, id_type id, bool error_on_excess)
+{
+    if(t.empty())
+    {
+        _RYML_CB_ASSERT(t.callbacks(), id == NONE);
+        return {};
+    }
+    if(id == NONE)
+        id = t.root_id();
+    _RYML_CB_CHECK(t.callbacks(), id < t.capacity());
+    m_tree = &t;
+    m_flow = false;
+    if(type == EMIT_YAML)
+        _emit_yaml(id);
+    else if(type == EMIT_JSON)
+        _do_visit_json(id, 0);
+    else
+        _RYML_CB_ERR(m_tree->callbacks(), "unknown emit type");
+    m_tree = nullptr;
+    return this->Writer::_get(error_on_excess);
+}
+
+
+//-----------------------------------------------------------------------------
+
+template<class Writer>
+void Emitter<Writer>::_emit_yaml(id_type id)
+{
+    // save branches in the visitor by doing the initial stream/doc
+    // logic here, sparing the need to check stream/val/keyval inside
+    // the visitor functions
+    auto dispatch = [this](id_type node){
+        NodeType ty = m_tree->type(node);
+        if(ty.is_flow_sl())
+            _do_visit_flow_sl(node, 0);
+        else if(ty.is_flow_ml())
+            _do_visit_flow_ml(node, 0);
+        else
+        {
+            _do_visit_block(node, 0);
+        }
+    };
+    if(!m_tree->is_root(id))
+    {
+        if(m_tree->is_container(id) && !m_tree->type(id).is_flow())
+        {
+            id_type ilevel = 0;
+            if(m_tree->has_key(id))
+            {
+                this->Writer::_do_write(m_tree->key(id));
+                this->Writer::_do_write(":\n");
+                ++ilevel;
+            }
+            _do_visit_block_container(id, 0, ilevel, ilevel);
+            return;
+        }
+    }
+
+    TagDirectiveRange tagds = m_tree->tag_directives();
+    auto write_tag_directives = [&tagds, this](const id_type next_node){
+        TagDirective const* C4_RESTRICT end = tagds.b;
+        while(end < tagds.e)
+        {
+            if(end->next_node_id > next_node)
+                break;
+            ++end;
+        }
+        const id_type parent = m_tree->parent(next_node);
+        for( ; tagds.b != end; ++tagds.b)
+        {
+            if(next_node != m_tree->first_child(parent))
+                this->Writer::_do_write("...\n");
+            this->Writer::_do_write("%TAG ");
+            this->Writer::_do_write(tagds.b->handle);
+            this->Writer::_do_write(' ');
+            this->Writer::_do_write(tagds.b->prefix);
+            this->Writer::_do_write('\n');
+        }
+    };
+    if(m_tree->is_stream(id))
+    {
+        const id_type first_child = m_tree->first_child(id);
+        if(first_child != NONE)
+            write_tag_directives(first_child);
+        for(id_type child = first_child; child != NONE; child = m_tree->next_sibling(child))
+        {
+            dispatch(child);
+            if(m_tree->is_doc(child) && m_tree->type(child).is_flow_sl())
+                this->Writer::_do_write('\n');
+            if(m_tree->next_sibling(child) != NONE)
+                write_tag_directives(m_tree->next_sibling(child));
+        }
+    }
+    else if(m_tree->is_container(id))
+    {
+        dispatch(id);
+    }
+    else if(m_tree->is_doc(id))
+    {
+        _RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->is_container(id)); // checked above
+        _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_val(id)); // so it must be a val
+        _write_doc(id);
+    }
+    else if(m_tree->is_keyval(id))
+    {
+        _writek(id, 0);
+        this->Writer::_do_write(": ");
+        _writev(id, 0);
+        if(!m_tree->type(id).is_flow())
+            this->Writer::_do_write('\n');
+    }
+    else if(m_tree->is_val(id))
+    {
+        //this->Writer::_do_write("- ");
+        _writev(id, 0);
+        if(!m_tree->type(id).is_flow())
+            this->Writer::_do_write('\n');
+    }
+    else if(m_tree->type(id) == NOTYPE)
+    {
+        ;
+    }
+    else
+    {
+        _RYML_CB_ERR(m_tree->callbacks(), "unknown type");
+    }
+}
+
+#define _rymlindent_nextline() this->_indent(ilevel + 1);
+
+template<class Writer>
+void Emitter<Writer>::_write_doc(id_type id)
+{
+    RYML_ASSERT(m_tree->is_doc(id));
+    RYML_ASSERT(!m_tree->has_key(id));
+    if(!m_tree->is_root(id))
+    {
+        RYML_ASSERT(m_tree->is_stream(m_tree->parent(id)));
+        this->Writer::_do_write("---");
+    }
+    //
+    if(!m_tree->has_val(id)) // this is more frequent
+    {
+        const bool tag = m_tree->has_val_tag(id);
+        const bool anchor = m_tree->has_val_anchor(id);
+        if(!tag && !anchor)
+        {
+            ;
+        }
+        else if(!tag && anchor)
+        {
+            if(!m_tree->is_root(id))
+                this->Writer::_do_write(' ');
+            this->Writer::_do_write('&');
+            this->Writer::_do_write(m_tree->val_anchor(id));
+            #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
+            if(m_tree->has_children(id) && m_tree->is_root(id))
+                this->Writer::_do_write('\n');
+            #endif
+        }
+        else if(tag && !anchor)
+        {
+            if(!m_tree->is_root(id))
+                this->Writer::_do_write(' ');
+            _write_tag(m_tree->val_tag(id));
+            #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
+            if(m_tree->has_children(id) && m_tree->is_root(id))
+                this->Writer::_do_write('\n');
+            #endif
+        }
+        else // tag && anchor
+        {
+            if(!m_tree->is_root(id))
+                this->Writer::_do_write(' ');
+            _write_tag(m_tree->val_tag(id));
+            this->Writer::_do_write(" &");
+            this->Writer::_do_write(m_tree->val_anchor(id));
+            #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
+            if(m_tree->has_children(id) && m_tree->is_root(id))
+                this->Writer::_do_write('\n');
+            #endif
+        }
+    }
+    else // docval
+    {
+        _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_val(id));
+        // some plain scalars such as '...' and '---' must not
+        // appear at 0-indentation
+        const csubstr val = m_tree->val(id);
+        const bool preceded_by_3_dashes = !m_tree->is_root(id);
+        const type_bits style_marks = m_tree->type(id) & (KEY_STYLE|VAL_STYLE);
+        const bool is_plain = m_tree->type(id).is_val_plain();
+        const bool is_ambiguous = (is_plain || !style_marks)
+            && ((val.begins_with("...") || val.begins_with("---"))
+                ||
+                (val.find('\n') != npos));
+        if(preceded_by_3_dashes)
+        {
+            if(val.len == 0 && !m_tree->has_val_anchor(id) && !m_tree->has_val_tag(id))
+            {
+                this->Writer::_do_write('\n');
+                return;
+            }
+            else if(val.len && is_ambiguous)
+            {
+                this->Writer::_do_write('\n');
+            }
+            else
+            {
+                this->Writer::_do_write(' ');
+            }
+        }
+        id_type ilevel = 0u;
+        if(is_ambiguous)
+        {
+            _rymlindent_nextline();
+            ++ilevel;
+        }
+        _writev(id, ilevel);
+        if(val.len && m_tree->is_root(id))
+            this->Writer::_do_write('\n');
+    }
+    if(!m_tree->is_root(id))
+        this->Writer::_do_write('\n');
+}
+
+template<class Writer>
+void Emitter<Writer>::_do_visit_flow_sl(id_type node, id_type depth, id_type ilevel)
+{
+    const bool prev_flow = m_flow;
+    m_flow = true;
+    _RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->is_stream(node));
+    _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(node) || m_tree->is_doc(node));
+    _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node)));
+    if(C4_UNLIKELY(depth > m_opts.max_depth()))
+        _RYML_CB_ERR(m_tree->callbacks(), "max depth exceeded");
+
+    if(m_tree->is_doc(node))
+    {
+        _write_doc(node);
+        #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
+        if(!m_tree->has_children(node))
+            return;
+        else
+        #endif
+        {
+            if(m_tree->is_map(node))
+            {
+                this->Writer::_do_write('{');
+            }
+            else
+            {
+                _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_seq(node));
+                this->Writer::_do_write('[');
+            }
+        }
+    }
+    else if(m_tree->is_container(node))
+    {
+        _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_map(node) || m_tree->is_seq(node));
+
+        bool spc = false; // write a space
+
+        if(m_tree->has_key(node))
+        {
+            _writek(node, ilevel);
+            this->Writer::_do_write(':');
+            spc = true;
+        }
+
+        if(m_tree->has_val_tag(node))
+        {
+            if(spc)
+                this->Writer::_do_write(' ');
+            _write_tag(m_tree->val_tag(node));
+            spc = true;
+        }
+
+        if(m_tree->has_val_anchor(node))
+        {
+            if(spc)
+                this->Writer::_do_write(' ');
+            this->Writer::_do_write('&');
+            this->Writer::_do_write(m_tree->val_anchor(node));
+            spc = true;
+        }
+
+        if(spc)
+            this->Writer::_do_write(' ');
+
+        if(m_tree->is_map(node))
+        {
+            this->Writer::_do_write('{');
+        }
+        else
+        {
+            _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_seq(node));
+            this->Writer::_do_write('[');
+        }
+    } // container
+
+    for(id_type child = m_tree->first_child(node), count = 0; child != NONE; child = m_tree->next_sibling(child))
+    {
+        if(count++)
+            this->Writer::_do_write(',');
+        if(m_tree->is_keyval(child))
+        {
+            _writek(child, ilevel);
+            this->Writer::_do_write(": ");
+            _writev(child, ilevel);
+        }
+        else if(m_tree->is_val(child))
+        {
+            _writev(child, ilevel);
+        }
+        else
+        {
+            // with single-line flow, we can never go back to block
+            _do_visit_flow_sl(child, depth + 1, ilevel + 1);
+        }
+    }
+
+    if(m_tree->is_map(node))
+    {
+        this->Writer::_do_write('}');
+    }
+    else if(m_tree->is_seq(node))
+    {
+        this->Writer::_do_write(']');
+    }
+    m_flow = prev_flow;
+}
+
+C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4702) // unreachable error, triggered by flow_ml not implemented
+
+template<class Writer>
+void Emitter<Writer>::_do_visit_flow_ml(id_type id, id_type depth, id_type ilevel, id_type do_indent)
+{
+    C4_UNUSED(id);
+    C4_UNUSED(depth);
+    C4_UNUSED(ilevel);
+    C4_UNUSED(do_indent);
+    c4::yml::error("not implemented");
+    #ifdef THIS_IS_A_WORK_IN_PROGRESS
+    if(C4_UNLIKELY(depth > m_opts.max_depth()))
+        _RYML_CB_ERR(m_tree->callbacks(), "max depth exceeded");
+    const bool prev_flow = m_flow;
+    m_flow = true;
+    // do it...
+    m_flow = prev_flow;
+    #endif
+}
+
+template<class Writer>
+void Emitter<Writer>::_do_visit_block_container(id_type node, id_type depth, id_type level, bool do_indent)
+{
+    if(m_tree->is_seq(node))
+    {
+        for(id_type child = m_tree->first_child(node); child != NONE; child = m_tree->next_sibling(child))
+        {
+            _RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->has_key(child));
+            if(m_tree->is_val(child))
+            {
+                _indent(level, do_indent);
+                this->Writer::_do_write("- ");
+                _writev(child, level);
+                this->Writer::_do_write('\n');
+            }
+            else
+            {
+                _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(child));
+                NodeType ty = m_tree->type(child);
+                if(ty.is_flow_sl())
+                {
+                    _indent(level, do_indent);
+                    this->Writer::_do_write("- ");
+                    _do_visit_flow_sl(child, depth+1, 0u);
+                    this->Writer::_do_write('\n');
+                }
+                else if(ty.is_flow_ml())
+                {
+                    _indent(level, do_indent);
+                    this->Writer::_do_write("- ");
+                    _do_visit_flow_ml(child, depth+1, 0u, do_indent);
+                    this->Writer::_do_write('\n');
+                }
+                else
+                {
+                    _do_visit_block(child, depth+1, level, do_indent); // same indentation level
+                }
+            }
+            do_indent = true;
+        }
+    }
+    else // map
+    {
+        _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_map(node));
+        for(id_type ich = m_tree->first_child(node); ich != NONE; ich = m_tree->next_sibling(ich))
+        {
+            _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_key(ich));
+            if(m_tree->is_keyval(ich))
+            {
+                _indent(level, do_indent);
+                _writek(ich, level);
+                this->Writer::_do_write(": ");
+                _writev(ich, level);
+                this->Writer::_do_write('\n');
+            }
+            else
+            {
+                _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(ich));
+                NodeType ty = m_tree->type(ich);
+                if(ty.is_flow_sl())
+                {
+                    _indent(level, do_indent);
+                    _do_visit_flow_sl(ich, depth+1, 0u);
+                    this->Writer::_do_write('\n');
+                }
+                else if(ty.is_flow_ml())
+                {
+                    _indent(level, do_indent);
+                    _do_visit_flow_ml(ich, depth+1, 0u);
+                    this->Writer::_do_write('\n');
+                }
+                else
+                {
+                    _do_visit_block(ich, depth+1, level, do_indent); // same level!
+                }
+            } // keyval vs container
+            do_indent = true;
+        } // for children
+    } // seq vs map
+}
+
+template<class Writer>
+void Emitter<Writer>::_do_visit_block(id_type node, id_type depth, id_type ilevel, id_type do_indent)
+{
+    _RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->is_stream(node));
+    _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(node) || m_tree->is_doc(node));
+    _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node)));
+    if(C4_UNLIKELY(depth > m_opts.max_depth()))
+        _RYML_CB_ERR(m_tree->callbacks(), "max depth exceeded");
+    if(m_tree->is_doc(node))
+    {
+        _write_doc(node);
+        if(!m_tree->has_children(node))
+            return;
+    }
+    else if(m_tree->is_container(node))
+    {
+        _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_map(node) || m_tree->is_seq(node));
+        bool spc = false; // write a space
+        bool nl = false;  // write a newline
+        if(m_tree->has_key(node))
+        {
+            _indent(ilevel, do_indent);
+            _writek(node, ilevel);
+            this->Writer::_do_write(':');
+            spc = true;
+        }
+        else if(!m_tree->is_root(node))
+        {
+            _indent(ilevel, do_indent);
+            this->Writer::_do_write('-');
+            spc = true;
+        }
+
+        if(m_tree->has_val_tag(node))
+        {
+            if(spc)
+                this->Writer::_do_write(' ');
+            _write_tag(m_tree->val_tag(node));
+            spc = true;
+            nl = true;
+        }
+
+        if(m_tree->has_val_anchor(node))
+        {
+            if(spc)
+                this->Writer::_do_write(' ');
+            this->Writer::_do_write('&');
+            this->Writer::_do_write(m_tree->val_anchor(node));
+            spc = true;
+            nl = true;
+        }
+
+        if(m_tree->has_children(node))
+        {
+            if(m_tree->has_key(node))
+                nl = true;
+            else
+                if(!m_tree->is_root(node) && !nl)
+                    spc = true;
+        }
+        else
+        {
+            if(m_tree->is_seq(node))
+                this->Writer::_do_write(" []\n");
+            else if(m_tree->is_map(node))
+                this->Writer::_do_write(" {}\n");
+            return;
+        }
+
+        if(spc && !nl)
+            this->Writer::_do_write(' ');
+
+        do_indent = 0;
+        if(nl)
+        {
+            this->Writer::_do_write('\n');
+            do_indent = 1;
+        }
+    } // container
+
+    id_type next_level = ilevel + 1;
+    if(m_tree->is_root(node) || m_tree->is_doc(node))
+        next_level = ilevel; // do not indent at top level
+
+    _do_visit_block_container(node, depth, next_level, do_indent);
+}
+
+C4_SUPPRESS_WARNING_MSVC_POP
+
+
+template<class Writer>
+void Emitter<Writer>::_do_visit_json(id_type id, id_type depth)
+{
+    _RYML_CB_CHECK(m_tree->callbacks(), !m_tree->is_stream(id)); // JSON does not have streams
+    if(C4_UNLIKELY(depth > m_opts.max_depth()))
+        _RYML_CB_ERR(m_tree->callbacks(), "max depth exceeded");
+    if(m_tree->is_keyval(id))
+    {
+        _writek_json(id);
+        this->Writer::_do_write(": ");
+        _writev_json(id);
+    }
+    else if(m_tree->is_val(id))
+    {
+        _writev_json(id);
+    }
+    else if(m_tree->is_container(id))
+    {
+        if(m_tree->has_key(id))
+        {
+            _writek_json(id);
+            this->Writer::_do_write(": ");
+        }
+        if(m_tree->is_seq(id))
+            this->Writer::_do_write('[');
+        else if(m_tree->is_map(id))
+            this->Writer::_do_write('{');
+    }  // container
+
+    for(id_type ich = m_tree->first_child(id); ich != NONE; ich = m_tree->next_sibling(ich))
+    {
+        if(ich != m_tree->first_child(id))
+            this->Writer::_do_write(',');
+        _do_visit_json(ich, depth+1);
+    }
+
+    if(m_tree->is_seq(id))
+        this->Writer::_do_write(']');
+    else if(m_tree->is_map(id))
+        this->Writer::_do_write('}');
+}
+
+template<class Writer>
+void Emitter<Writer>::_write(NodeScalar const& C4_RESTRICT sc, NodeType flags, id_type ilevel)
+{
+    if( ! sc.tag.empty())
+    {
+        _write_tag(sc.tag);
+        this->Writer::_do_write(' ');
+    }
+    if(flags.has_anchor())
+    {
+        RYML_ASSERT(flags.is_ref() != flags.has_anchor());
+        RYML_ASSERT( ! sc.anchor.empty());
+        this->Writer::_do_write('&');
+        this->Writer::_do_write(sc.anchor);
+        this->Writer::_do_write(' ');
+    }
+    else if(flags.is_ref())
+    {
+        if(sc.anchor != "<<")
+            this->Writer::_do_write('*');
+        this->Writer::_do_write(sc.anchor);
+        if(flags.is_key_ref())
+            this->Writer::_do_write(' ');
+        return;
+    }
+
+    // ensure the style flags only have one of KEY or VAL
+    _RYML_CB_ASSERT(m_tree->callbacks(), ((flags & SCALAR_STYLE) == 0) || (((flags&KEY_STYLE) == 0) != ((flags&VAL_STYLE) == 0)));
+    type_bits style_marks = flags & SCALAR_STYLE;
+    if(!style_marks)
+        style_marks = scalar_style_choose(sc.scalar);
+    if(style_marks & (KEY_LITERAL|VAL_LITERAL))
+    {
+        _write_scalar_literal(sc.scalar, ilevel, flags.has_key());
+    }
+    else if(style_marks & (KEY_FOLDED|VAL_FOLDED))
+    {
+        _write_scalar_folded(sc.scalar, ilevel, flags.has_key());
+    }
+    else if(style_marks & (KEY_SQUO|VAL_SQUO))
+    {
+        _write_scalar_squo(sc.scalar, ilevel);
+    }
+    else if(style_marks & (KEY_DQUO|VAL_DQUO))
+    {
+        _write_scalar_dquo(sc.scalar, ilevel);
+    }
+    else if(style_marks & (KEY_PLAIN|VAL_PLAIN))
+    {
+        if(C4_LIKELY(!(sc.scalar.begins_with(": ") || sc.scalar.begins_with(":\t"))))
+            _write_scalar_plain(sc.scalar, ilevel);
+        else
+            _write_scalar_squo(sc.scalar, ilevel);
+    }
+    else
+    {
+        _RYML_CB_ERR(m_tree->callbacks(), "not implemented");
+    }
+}
+
+template<class Writer>
+void Emitter<Writer>::_write_json(NodeScalar const& C4_RESTRICT sc, NodeType flags)
+{
+    if(flags & (KEYTAG|VALTAG))
+        if(m_opts.json_error_flags() & EmitOptions::JSON_ERR_ON_TAG)
+            _RYML_CB_ERR(m_tree->callbacks(), "JSON does not have tags");
+    if(C4_UNLIKELY(flags.has_anchor()))
+        if(m_opts.json_error_flags() & EmitOptions::JSON_ERR_ON_ANCHOR)
+            _RYML_CB_ERR(m_tree->callbacks(), "JSON does not have anchors");
+    if(sc.scalar.len)
+    {
+        // use double quoted style...
+        // if it is a key (mandatory in JSON)
+        // if the style is marked quoted
+        bool dquoted = ((flags & (KEY|VALQUO))
+                        || (scalar_style_json_choose(sc.scalar) & SCALAR_DQUO)); // choose the style
+        if(dquoted)
+            _write_scalar_json_dquo(sc.scalar);
+        else
+            this->Writer::_do_write(sc.scalar);
+    }
+    else
+    {
+        if(sc.scalar.str || (flags & (KEY|VALQUO|KEYTAG|VALTAG)))
+            this->Writer::_do_write("\"\"");
+        else
+            this->Writer::_do_write("null");
+    }
+}
+
+template<class Writer>
+size_t Emitter<Writer>::_write_escaped_newlines(csubstr s, size_t i)
+{
+    RYML_ASSERT(s.len > i);
+    RYML_ASSERT(s.str[i] == '\n');
+    //_c4dbgpf("nl@i={} rem=[{}]~~~{}~~~", i, s.sub(i).len, s.sub(i));
+    // add an extra newline for each sequence of consecutive
+    // newline/whitespace
+    this->Writer::_do_write('\n');
+    do
+    {
+        this->Writer::_do_write('\n'); // write the newline again
+        ++i; // increase the outer loop counter!
+    } while(i < s.len && s.str[i] == '\n');
+    _RYML_CB_ASSERT(m_tree->callbacks(), i > 0);
+    --i;
+    _RYML_CB_ASSERT(m_tree->callbacks(), s.str[i] == '\n');
+    return i;
+}
+
+inline bool _is_indented_block(csubstr s, size_t prev, size_t i) noexcept
+{
+    if(prev == 0 && s.begins_with_any(" \t"))
+        return true;
+    const size_t pos = s.first_not_of('\n', i);
+    return (pos != npos) && (s.str[pos] == ' ' || s.str[pos] == '\t');
+}
+
+template<class Writer>
+size_t Emitter<Writer>::_write_indented_block(csubstr s, size_t i, id_type ilevel)
+{
+    //_c4dbgpf("indblock@i={} rem=[{}]~~~\n{}~~~", i, s.sub(i).len, s.sub(i));
+    _RYML_CB_ASSERT(m_tree->callbacks(), i > 0);
+    _RYML_CB_ASSERT(m_tree->callbacks(), s.str[i-1] == '\n');
+    _RYML_CB_ASSERT(m_tree->callbacks(), i < s.len);
+    _RYML_CB_ASSERT(m_tree->callbacks(), s.str[i] == ' ' || s.str[i] == '\t' || s.str[i] == '\n');
+again:
+    size_t pos = s.find("\n ", i);
+    if(pos == npos)
+        pos = s.find("\n\t", i);
+    if(pos != npos)
+    {
+        ++pos;
+        //_c4dbgpf("indblock line@i={} rem=[{}]~~~\n{}~~~", i, s.range(i, pos).len, s.range(i, pos));
+        _rymlindent_nextline();
+        this->Writer::_do_write(s.range(i, pos));
+        i = pos;
+        goto again;
+    }
+    // consume the newlines after the indented block
+    // to prevent them from being escaped
+    pos = s.find('\n', i);
+    if(pos != npos)
+    {
+        const size_t pos2 = s.first_not_of('\n', pos);
+        pos = (pos2 != npos) ? pos2 : pos;
+        //_c4dbgpf("indblock line@i={} rem=[{}]~~~\n{}~~~", i, s.range(i, pos).len, s.range(i, pos));
+        _rymlindent_nextline();
+        this->Writer::_do_write(s.range(i, pos));
+        i = pos;
+    }
+    return i;
+}
+
+template<class Writer>
+void Emitter<Writer>::_write_scalar_literal(csubstr s, id_type ilevel, bool explicit_key)
+{
+    _RYML_CB_ASSERT(m_tree->callbacks(), s.find("\r") == csubstr::npos);
+    if(explicit_key)
+        this->Writer::_do_write("? ");
+    csubstr trimmed = s.trimr('\n');
+    const size_t numnewlines_at_end = s.len - trimmed.len;
+    const bool is_newline_only = (trimmed.len == 0 && (s.len > 0));
+    const bool explicit_indentation = s.triml("\n\r").begins_with_any(" \t");
+    //
+    this->Writer::_do_write('|');
+    if(explicit_indentation)
+        this->Writer::_do_write('2');
+    //
+    if(numnewlines_at_end > 1 || is_newline_only)
+        this->Writer::_do_write('+');
+    else if(numnewlines_at_end == 0)
+        this->Writer::_do_write('-');
+    //
+    if(trimmed.len)
+    {
+        this->Writer::_do_write('\n');
+        size_t pos = 0; // tracks the last character that was already written
+        for(size_t i = 0; i < trimmed.len; ++i)
+        {
+            if(trimmed[i] != '\n')
+                continue;
+            // write everything up to this point
+            csubstr since_pos = trimmed.range(pos, i+1); // include the newline
+            _rymlindent_nextline()
+            this->Writer::_do_write(since_pos);
+            pos = i+1; // already written
+        }
+        if(pos < trimmed.len)
+        {
+            _rymlindent_nextline()
+            this->Writer::_do_write(trimmed.sub(pos));
+        }
+    }
+    for(size_t i = !is_newline_only; i < numnewlines_at_end; ++i)
+        this->Writer::_do_write('\n');
+    if(explicit_key)
+        this->Writer::_do_write('\n');
+}
+
+template<class Writer>
+void Emitter<Writer>::_write_scalar_folded(csubstr s, id_type ilevel, bool explicit_key)
+{
+    if(explicit_key)
+        this->Writer::_do_write("? ");
+    _RYML_CB_ASSERT(m_tree->callbacks(), s.find("\r") == csubstr::npos);
+    csubstr trimmed = s.trimr('\n');
+    const size_t numnewlines_at_end = s.len - trimmed.len;
+    const bool is_newline_only = (trimmed.len == 0 && (s.len > 0));
+    const bool explicit_indentation = s.triml("\n\r").begins_with_any(" \t");
+    //
+    this->Writer::_do_write('>');
+    if(explicit_indentation)
+        this->Writer::_do_write('2');
+    //
+    if(numnewlines_at_end == 0)
+        this->Writer::_do_write('-');
+    else if(numnewlines_at_end > 1 || is_newline_only)
+        this->Writer::_do_write('+');
+    //
+    if(trimmed.len)
+    {
+        this->Writer::_do_write('\n');
+        size_t pos = 0; // tracks the last character that was already written
+        for(size_t i = 0; i < trimmed.len; ++i)
+        {
+            if(trimmed[i] != '\n')
+                continue;
+            // escape newline sequences
+            if( ! _is_indented_block(s, pos, i))
+            {
+                if(pos < i)
+                {
+                    _rymlindent_nextline()
+                    this->Writer::_do_write(s.range(pos, i));
+                    i = _write_escaped_newlines(s, i);
+                    pos = i+1;
+                }
+                else
+                {
+                    if(i+1 < s.len)
+                    {
+                        if(s.str[i+1] == '\n')
+                        {
+                            ++i;
+                            i = _write_escaped_newlines(s, i);
+                            pos = i+1;
+                        }
+                        else
+                        {
+                            this->Writer::_do_write('\n');
+                            pos = i+1;
+                        }
+                    }
+                }
+            }
+            else // do not escape newlines in indented blocks
+            {
+                ++i;
+                _rymlindent_nextline()
+                this->Writer::_do_write(s.range(pos, i));
+                if(pos > 0 || !s.begins_with_any(" \t"))
+                    i = _write_indented_block(s, i, ilevel);
+                pos = i;
+            }
+        }
+        if(pos < trimmed.len)
+        {
+            _rymlindent_nextline()
+            this->Writer::_do_write(trimmed.sub(pos));
+        }
+    }
+    for(size_t i = !is_newline_only; i < numnewlines_at_end; ++i)
+        this->Writer::_do_write('\n');
+    if(explicit_key)
+        this->Writer::_do_write('\n');
+}
+
+template<class Writer>
+void Emitter<Writer>::_write_scalar_squo(csubstr s, id_type ilevel)
+{
+    size_t pos = 0; // tracks the last character that was already written
+    this->Writer::_do_write('\'');
+    for(size_t i = 0; i < s.len; ++i)
+    {
+        if(s[i] == '\n')
+        {
+            this->Writer::_do_write(s.range(pos, i));  // write everything up to (excluding) this char
+            //_c4dbgpf("newline at {}. writing ~~~{}~~~", i, s.range(pos, i));
+            i = _write_escaped_newlines(s, i);
+            //_c4dbgpf("newline --> {}", i);
+            if(i < s.len)
+                _rymlindent_nextline()
+            pos = i+1;
+        }
+        else if(s[i] == '\'')
+        {
+            csubstr sub = s.range(pos, i+1);
+            //_c4dbgpf("squote at {}. writing ~~~{}~~~", i, sub);
+            this->Writer::_do_write(sub); // write everything up to (including) this squote
+            this->Writer::_do_write('\''); // write the squote again
+            pos = i+1;
+        }
+    }
+    // write missing characters at the end of the string
+    if(pos < s.len)
+        this->Writer::_do_write(s.sub(pos));
+    this->Writer::_do_write('\'');
+}
+
+template<class Writer>
+void Emitter<Writer>::_write_scalar_dquo(csubstr s, id_type ilevel)
+{
+    size_t pos = 0; // tracks the last character that was already written
+    this->Writer::_do_write('"');
+    for(size_t i = 0; i < s.len; ++i)
+    {
+        const char curr = s.str[i];
+        switch(curr)
+        {
+        case '"':
+        case '\\':
+        {
+            csubstr sub = s.range(pos, i);
+            this->Writer::_do_write(sub);  // write everything up to (excluding) this char
+            this->Writer::_do_write('\\'); // write the escape
+            this->Writer::_do_write(curr); // write the char
+            pos = i+1;
+            break;
+        }
+#ifndef prefer_writing_newlines_as_double_newlines
+        case '\n':
+        {
+            csubstr sub = s.range(pos, i);
+            this->Writer::_do_write(sub);   // write everything up to (excluding) this char
+            this->Writer::_do_write("\\n"); // write the escape
+            pos = i+1;
+            (void)ilevel;
+            break;
+        }
+#else
+        case '\n':
+        {
+            // write everything up to (excluding) this newline
+            //_c4dbgpf("nl@i={} rem=[{}]~~~{}~~~", i, s.sub(i).len, s.sub(i));
+            this->Writer::_do_write(s.range(pos, i));
+            i = _write_escaped_newlines(s, i);
+            ++i;
+            pos = i;
+            // as for the next line...
+            if(i < s.len)
+            {
+                _rymlindent_nextline() // indent the next line
+                // escape leading whitespace, and flush it
+                size_t first = s.first_not_of(" \t", i);
+                _c4dbgpf("@i={} first={} rem=[{}]~~~{}~~~", i, first, s.sub(i).len, s.sub(i));
+                if(first > i)
+                {
+                    if(first == npos)
+                        first = s.len;
+                    this->Writer::_do_write('\\');
+                    this->Writer::_do_write(s.range(i, first));
+                    this->Writer::_do_write('\\');
+                    i = first-1;
+                    pos = first;
+                }
+            }
+            break;
+        }
+        // escape trailing whitespace before a newline
+        case ' ':
+        case '\t':
+        {
+            const size_t next = s.first_not_of(" \t\r", i);
+            if(next != npos && s.str[next] == '\n')
+            {
+                csubstr sub = s.range(pos, i);
+                this->Writer::_do_write(sub);  // write everything up to (excluding) this char
+                this->Writer::_do_write('\\'); // escape the whitespace
+                pos = i;
+            }
+            break;
+        }
+#endif
+        case '\r':
+        {
+            csubstr sub = s.range(pos, i);
+            this->Writer::_do_write(sub);  // write everything up to (excluding) this char
+            this->Writer::_do_write("\\r"); // write the escaped char
+            pos = i+1;
+            break;
+        }
+        case '\b':
+        {
+            csubstr sub = s.range(pos, i);
+            this->Writer::_do_write(sub);  // write everything up to (excluding) this char
+            this->Writer::_do_write("\\b"); // write the escaped char
+            pos = i+1;
+            break;
+        }
+        }
+    }
+    // write missing characters at the end of the string
+    if(pos < s.len)
+        this->Writer::_do_write(s.sub(pos));
+    this->Writer::_do_write('"');
+}
+
+template<class Writer>
+void Emitter<Writer>::_write_scalar_plain(csubstr s, id_type ilevel)
+{
+    if(C4_UNLIKELY(ilevel == 0 && (s.begins_with("...") || s.begins_with("---"))))
+    {
+        _rymlindent_nextline()     // indent the next line
+        ++ilevel;
+    }
+    size_t pos = 0; // tracks the last character that was already written
+    for(size_t i = 0; i < s.len; ++i)
+    {
+        const char curr = s.str[i];
+        if(curr == '\n')
+        {
+            csubstr sub = s.range(pos, i);
+            this->Writer::_do_write(sub);  // write everything up to (including) this newline
+            i = _write_escaped_newlines(s, i);
+            pos = i+1;
+            if(pos < s.len)
+                _rymlindent_nextline()     // indent the next line
+        }
+    }
+    // write missing characters at the end of the string
+    if(pos < s.len)
+        this->Writer::_do_write(s.sub(pos));
+}
+
+#undef _rymlindent_nextline
+
+template<class Writer>
+void Emitter<Writer>::_write_scalar_json_dquo(csubstr s)
+{
+    size_t pos = 0;
+    this->Writer::_do_write('"');
+    for(size_t i = 0; i < s.len; ++i)
+    {
+        switch(s.str[i])
+        {
+        case '"':
+            this->Writer ::_do_write(s.range(pos, i));
+            this->Writer ::_do_write("\\\"");
+            pos = i + 1;
+            break;
+        case '\n':
+            this->Writer ::_do_write(s.range(pos, i));
+            this->Writer ::_do_write("\\n");
+            pos = i + 1;
+            break;
+        case '\t':
+            this->Writer ::_do_write(s.range(pos, i));
+            this->Writer ::_do_write("\\t");
+            pos = i + 1;
+            break;
+        case '\\':
+            this->Writer ::_do_write(s.range(pos, i));
+            this->Writer ::_do_write("\\\\");
+            pos = i + 1;
+            break;
+        case '\r':
+            this->Writer ::_do_write(s.range(pos, i));
+            this->Writer ::_do_write("\\r");
+            pos = i + 1;
+            break;
+        case '\b':
+            this->Writer ::_do_write(s.range(pos, i));
+            this->Writer ::_do_write("\\b");
+            pos = i + 1;
+            break;
+        case '\f':
+            this->Writer ::_do_write(s.range(pos, i));
+            this->Writer ::_do_write("\\f");
+            pos = i + 1;
+            break;
+        }
+    }
+    if(pos < s.len)
+    {
+        csubstr sub = s.sub(pos);
+        this->Writer::_do_write(sub);
+    }
+    this->Writer::_do_write('"');
+}
+
+} // namespace yml
+} // namespace c4
+
+#endif /* _C4_YML_EMIT_DEF_HPP_ */
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/emit.def.hpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/detail/stack.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/stack.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifndef _C4_YML_DETAIL_STACK_HPP_
+#define _C4_YML_DETAIL_STACK_HPP_
+
+#ifndef _C4_YML_COMMON_HPP_
+//included above:
+//#include "../common.hpp"
+#endif
+
+#ifdef RYML_DBG
+//included above:
+//#   include <type_traits>
+#endif
+
+//included above:
+//#include <string.h>
+
+namespace c4 {
+namespace yml {
+
+C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast")
+
+namespace detail {
+
+/** A lightweight contiguous stack with Small Storage
+ * Optimization. This is required because std::vector can throw
+ * exceptions, and we don't want to enforce any particular error
+ * mechanism. */
+template<class T, id_type N=16>
+class stack
+{
+    static_assert(std::is_trivially_copyable<T>::value, "T must be trivially copyable");
+    static_assert(std::is_trivially_destructible<T>::value, "T must be trivially destructible");
+
+public:
+
+    enum : id_type { sso_size = N };
+
+public:
+
+    T              m_buf[size_t(N)];
+    T *C4_RESTRICT m_stack;
+    id_type        m_size;
+    id_type        m_capacity;
+    Callbacks      m_callbacks;
+
+public:
+
+    constexpr static bool is_contiguous() { return true; }
+
+    stack(Callbacks const& cb)
+        : m_buf()
+        , m_stack(m_buf)
+        , m_size(0)
+        , m_capacity(N)
+        , m_callbacks(cb) {}
+    stack() : stack(get_callbacks()) {}
+    ~stack()
+    {
+        _free();
+    }
+
+    stack(stack const& that) RYML_NOEXCEPT : stack(that.m_callbacks)
+    {
+        resize(that.m_size);
+        _cp(&that);
+    }
+
+    stack(stack &&that) noexcept : stack(that.m_callbacks)
+    {
+        _mv(&that);
+    }
+
+    stack& operator= (stack const& that) RYML_NOEXCEPT
+    {
+        _cb(that.m_callbacks);
+        resize(that.m_size);
+        _cp(&that);
+        return *this;
+    }
+
+    stack& operator= (stack &&that) noexcept
+    {
+        _cb(that.m_callbacks);
+        _mv(&that);
+        return *this;
+    }
+
+public:
+
+    id_type size() const { return m_size; }
+    id_type empty() const { return m_size == 0; }
+    id_type capacity() const { return m_capacity; }
+
+    void clear()
+    {
+        m_size = 0;
+    }
+
+    void resize(id_type sz)
+    {
+        reserve(sz);
+        m_size = sz;
+    }
+
+    void reserve(id_type sz);
+
+    void push(T const& C4_RESTRICT n)
+    {
+        _RYML_CB_ASSERT(m_callbacks, (const char*)&n + sizeof(T) < (const char*)m_stack || &n > m_stack + m_capacity);
+        if(m_size == m_capacity)
+        {
+            id_type cap = m_capacity == 0 ? N : 2 * m_capacity;
+            reserve(cap);
+        }
+        m_stack[m_size] = n;
+        ++m_size;
+    }
+
+    void push_top()
+    {
+        _RYML_CB_ASSERT(m_callbacks, m_size > 0);
+        if(m_size == m_capacity)
+        {
+            id_type cap = m_capacity == 0 ? N : 2 * m_capacity;
+            reserve(cap);
+        }
+        m_stack[m_size] = m_stack[m_size - 1];
+        ++m_size;
+    }
+
+    T const& C4_RESTRICT pop()
+    {
+        _RYML_CB_ASSERT(m_callbacks, m_size > 0);
+        --m_size;
+        return m_stack[m_size];
+    }
+
+    C4_ALWAYS_INLINE T const& C4_RESTRICT top() const { _RYML_CB_ASSERT(m_callbacks, m_size > 0); return m_stack[m_size - 1]; }
+    C4_ALWAYS_INLINE T      & C4_RESTRICT top()       { _RYML_CB_ASSERT(m_callbacks, m_size > 0); return m_stack[m_size - 1]; }
+
+    C4_ALWAYS_INLINE T const& C4_RESTRICT bottom() const { _RYML_CB_ASSERT(m_callbacks, m_size > 0); return m_stack[0]; }
+    C4_ALWAYS_INLINE T      & C4_RESTRICT bottom()       { _RYML_CB_ASSERT(m_callbacks, m_size > 0); return m_stack[0]; }
+
+    C4_ALWAYS_INLINE T const& C4_RESTRICT top(id_type i) const { _RYML_CB_ASSERT(m_callbacks, i < m_size); return m_stack[m_size - 1 - i]; }
+    C4_ALWAYS_INLINE T      & C4_RESTRICT top(id_type i)       { _RYML_CB_ASSERT(m_callbacks, i < m_size); return m_stack[m_size - 1 - i]; }
+
+    C4_ALWAYS_INLINE T const& C4_RESTRICT bottom(id_type i) const { _RYML_CB_ASSERT(m_callbacks, i < m_size); return m_stack[i]; }
+    C4_ALWAYS_INLINE T      & C4_RESTRICT bottom(id_type i)       { _RYML_CB_ASSERT(m_callbacks, i < m_size); return m_stack[i]; }
+
+    C4_ALWAYS_INLINE T const& C4_RESTRICT operator[](id_type i) const { _RYML_CB_ASSERT(m_callbacks, i < m_size); return m_stack[i]; }
+    C4_ALWAYS_INLINE T      & C4_RESTRICT operator[](id_type i)       { _RYML_CB_ASSERT(m_callbacks, i < m_size); return m_stack[i]; }
+
+public:
+
+    using       iterator = T       *;
+    using const_iterator = T const *;
+
+    iterator begin() { return m_stack; }
+    iterator end  () { return m_stack + m_size; }
+
+    const_iterator begin() const { return (const_iterator)m_stack; }
+    const_iterator end  () const { return (const_iterator)m_stack + m_size; }
+
+public:
+
+    void _free();
+    void _cp(stack const* C4_RESTRICT that);
+    void _mv(stack * that);
+    void _cb(Callbacks const& cb);
+
+};
+
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
+template<class T, id_type N>
+void stack<T, N>::reserve(id_type sz)
+{
+    if(sz <= m_size)
+        return;
+    if(sz <= N)
+    {
+        m_stack = m_buf;
+        m_capacity = N;
+        return;
+    }
+    T *buf = (T*) m_callbacks.m_allocate((size_t)sz * sizeof(T), m_stack, m_callbacks.m_user_data);
+    _RYML_CB_ASSERT(m_callbacks, ((uintptr_t)buf % alignof(T)) == 0u);
+    memcpy(buf, m_stack, (size_t)m_size * sizeof(T));
+    if(m_stack != m_buf)
+    {
+        m_callbacks.m_free(m_stack, (size_t)m_capacity * sizeof(T), m_callbacks.m_user_data);
+    }
+    m_stack = buf;
+    m_capacity = sz;
+}
+
+
+//-----------------------------------------------------------------------------
+
+template<class T, id_type N>
+void stack<T, N>::_free()
+{
+    _RYML_CB_ASSERT(m_callbacks, m_stack != nullptr); // this structure cannot be memset() to zero
+    if(m_stack != m_buf)
+    {
+        m_callbacks.m_free(m_stack, (size_t)m_capacity * sizeof(T), m_callbacks.m_user_data);
+        m_stack = m_buf;
+        m_size = N;
+        m_capacity = N;
+    }
+    else
+    {
+        _RYML_CB_ASSERT(m_callbacks, m_capacity == N);
+    }
+}
+
+
+//-----------------------------------------------------------------------------
+
+template<class T, id_type N>
+void stack<T, N>::_cp(stack const* C4_RESTRICT that)
+{
+    if(that->m_stack != that->m_buf)
+    {
+        _RYML_CB_ASSERT(m_callbacks, that->m_capacity > N);
+        _RYML_CB_ASSERT(m_callbacks, that->m_size <= that->m_capacity);
+    }
+    else
+    {
+        _RYML_CB_ASSERT(m_callbacks, that->m_capacity <= N);
+        _RYML_CB_ASSERT(m_callbacks, that->m_size <= that->m_capacity);
+    }
+    memcpy(m_stack, that->m_stack, that->m_size * sizeof(T));
+    m_size = that->m_size;
+    m_capacity = that->m_size < N ? N : that->m_size;
+    m_callbacks = that->m_callbacks;
+}
+
+
+//-----------------------------------------------------------------------------
+
+template<class T, id_type N>
+void stack<T, N>::_mv(stack * that)
+{
+    if(that->m_stack != that->m_buf)
+    {
+        _RYML_CB_ASSERT(m_callbacks, that->m_capacity > N);
+        _RYML_CB_ASSERT(m_callbacks, that->m_size <= that->m_capacity);
+        m_stack = that->m_stack;
+    }
+    else
+    {
+        _RYML_CB_ASSERT(m_callbacks, that->m_capacity <= N);
+        _RYML_CB_ASSERT(m_callbacks, that->m_size <= that->m_capacity);
+        memcpy(m_buf, that->m_buf, that->m_size * sizeof(T));
+        m_stack = m_buf;
+    }
+    m_size = that->m_size;
+    m_capacity = that->m_capacity;
+    m_callbacks = that->m_callbacks;
+    // make sure no deallocation happens on destruction
+    _RYML_CB_ASSERT(m_callbacks, that->m_stack != m_buf);
+    that->m_stack = that->m_buf;
+    that->m_capacity = N;
+    that->m_size = 0;
+}
+
+
+//-----------------------------------------------------------------------------
+
+template<class T, id_type N>
+void stack<T, N>::_cb(Callbacks const& cb)
+{
+    if(cb != m_callbacks)
+    {
+        _free();
+        m_callbacks = cb;
+    }
+}
+
+} // namespace detail
+
+C4_SUPPRESS_WARNING_GCC_CLANG_POP
+
+} // namespace yml
+} // namespace c4
+
+#endif /* _C4_YML_DETAIL_STACK_HPP_ */
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/detail/stack.hpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/filter_processor.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/filter_processor.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifndef _C4_YML_FILTER_PROCESSOR_HPP_
+#define _C4_YML_FILTER_PROCESSOR_HPP_
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp
+//#include "c4/yml/common.hpp"
+#if !defined(C4_YML_COMMON_HPP_) && !defined(_C4_YML_COMMON_HPP_)
+#error "amalgamate: file c4/yml/common.hpp must have been included at this point"
+#endif /* C4_YML_COMMON_HPP_ */
+
+
+#ifdef RYML_DBG
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/charconv.hpp
+//#include "c4/charconv.hpp"
+#if !defined(C4_CHARCONV_HPP_) && !defined(_C4_CHARCONV_HPP_)
+#error "amalgamate: file c4/charconv.hpp must have been included at this point"
+#endif /* C4_CHARCONV_HPP_ */
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp
+//#include "c4/yml/detail/parser_dbg.hpp"
+#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_)
+#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point"
+#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */
+
+#endif
+
+namespace c4 {
+namespace yml {
+
+/** @defgroup doc_filter_processors Scalar filter processors
+ *
+ * These are internal classes used by @ref ParseEngine to parse the
+ * scalars; normally there is no reason for a user to be manually
+ * using these classes.
+ *
+ * @ingroup doc_parse */
+/** @{ */
+
+//-----------------------------------------------------------------------------
+
+/** Filters an input string into a different output string */
+struct FilterProcessorSrcDst
+{
+    csubstr src;
+    substr dst;
+    size_t rpos; ///< read position
+    size_t wpos; ///< write position
+
+    C4_ALWAYS_INLINE FilterProcessorSrcDst(csubstr src_, substr dst_) noexcept
+        : src(src_)
+        , dst(dst_)
+        , rpos(0)
+        , wpos(0)
+    {
+        RYML_ASSERT(!dst.overlaps(src));
+    }
+
+    C4_ALWAYS_INLINE void setwpos(size_t wpos_) noexcept { wpos = wpos_; }
+    C4_ALWAYS_INLINE void setpos(size_t rpos_, size_t wpos_) noexcept { rpos = rpos_; wpos = wpos_; }
+    C4_ALWAYS_INLINE void set_at_end() noexcept { skip(src.len - rpos); }
+
+    C4_ALWAYS_INLINE bool has_more_chars() const noexcept { return rpos < src.len; }
+    C4_ALWAYS_INLINE bool has_more_chars(size_t maxpos) const noexcept { RYML_ASSERT(maxpos <= src.len); return rpos < maxpos; }
+
+    C4_ALWAYS_INLINE csubstr rem() const noexcept { return src.sub(rpos); }
+    C4_ALWAYS_INLINE csubstr sofar() const noexcept { return csubstr(dst.str, wpos <= dst.len ? wpos : dst.len); }
+    C4_ALWAYS_INLINE FilterResult result() const noexcept
+    {
+        FilterResult ret;
+        ret.str.str = wpos <= dst.len ? dst.str : nullptr;
+        ret.str.len = wpos;
+        return ret;
+    }
+
+    C4_ALWAYS_INLINE char curr() const noexcept { RYML_ASSERT(rpos < src.len); return src[rpos]; }
+    C4_ALWAYS_INLINE char next() const noexcept { return rpos+1 < src.len ? src[rpos+1] : '\0'; }
+    C4_ALWAYS_INLINE bool skipped_chars() const noexcept { return wpos != rpos; }
+
+    C4_ALWAYS_INLINE void skip() noexcept { ++rpos; }
+    C4_ALWAYS_INLINE void skip(size_t num) noexcept { rpos += num; }
+
+    C4_ALWAYS_INLINE void set_at(size_t pos, char c) noexcept
+    {
+        RYML_ASSERT(pos < wpos);
+        dst.str[pos] = c;
+    }
+    C4_ALWAYS_INLINE void set(char c) noexcept
+    {
+        if(wpos < dst.len)
+            dst.str[wpos] = c;
+        ++wpos;
+    }
+    C4_ALWAYS_INLINE void set(char c, size_t num) noexcept
+    {
+        RYML_ASSERT(num > 0);
+        if(wpos + num <= dst.len)
+            memset(dst.str + wpos, c, num);
+        wpos += num;
+    }
+
+    C4_ALWAYS_INLINE void copy() noexcept
+    {
+        RYML_ASSERT(rpos < src.len);
+        if(wpos < dst.len)
+            dst.str[wpos] = src.str[rpos];
+        ++wpos;
+        ++rpos;
+    }
+    C4_ALWAYS_INLINE void copy(size_t num) noexcept
+    {
+        RYML_ASSERT(num);
+        RYML_ASSERT(rpos+num <= src.len);
+        if(wpos + num <= dst.len)
+            memcpy(dst.str + wpos, src.str + rpos, num);
+        wpos += num;
+        rpos += num;
+    }
+
+    C4_ALWAYS_INLINE void translate_esc(char c) noexcept
+    {
+        if(wpos < dst.len)
+            dst.str[wpos] = c;
+        ++wpos;
+        rpos += 2;
+    }
+    C4_ALWAYS_INLINE void translate_esc_bulk(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept
+    {
+        RYML_ASSERT(nw > 0);
+        RYML_ASSERT(nr > 0);
+        RYML_ASSERT(rpos+nr <= src.len);
+        if(wpos+nw <= dst.len)
+            memcpy(dst.str + wpos, s, nw);
+        wpos += nw;
+        rpos += 1 + nr;
+    }
+    C4_ALWAYS_INLINE void translate_esc_extending(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept
+    {
+        translate_esc_bulk(s, nw, nr);
+    }
+};
+
+
+//-----------------------------------------------------------------------------
+// filter in place
+
+// debugging scaffold
+/** @cond dev */
+#if defined(RYML_DBG) && 0
+#define _c4dbgip(...) _c4dbgpf(__VA_ARGS__)
+#else
+#define _c4dbgip(...)
+#endif
+/** @endcond */
+
+/** Filters in place. While the result may be larger than the source,
+ * any extending happens only at the end of the string. Consequently,
+ * it's impossible for characters to be left unfiltered.
+ *
+ * @see FilterProcessorInplaceMidExtending */
+struct FilterProcessorInplaceEndExtending
+{
+    substr src;  ///< the subject string
+    size_t wcap; ///< write capacity - the capacity of the subject string's buffer
+    size_t rpos; ///< read position
+    size_t wpos; ///< write position
+
+    C4_ALWAYS_INLINE FilterProcessorInplaceEndExtending(substr src_, size_t wcap_) noexcept
+        : src(src_)
+        , wcap(wcap_)
+        , rpos(0)
+        , wpos(0)
+    {
+        RYML_ASSERT(wcap >= src.len);
+    }
+
+    C4_ALWAYS_INLINE void setwpos(size_t wpos_) noexcept { wpos = wpos_; }
+    C4_ALWAYS_INLINE void setpos(size_t rpos_, size_t wpos_) noexcept { rpos = rpos_; wpos = wpos_; }
+    C4_ALWAYS_INLINE void set_at_end() noexcept { skip(src.len - rpos); }
+
+    C4_ALWAYS_INLINE bool has_more_chars() const noexcept { return rpos < src.len; }
+    C4_ALWAYS_INLINE bool has_more_chars(size_t maxpos) const noexcept { RYML_ASSERT(maxpos <= src.len); return rpos < maxpos; }
+
+    C4_ALWAYS_INLINE FilterResult result() const noexcept
+    {
+        _c4dbgip("inplace: wpos={} wcap={} small={}", wpos, wcap, wpos > rpos);
+        FilterResult ret;
+        ret.str.str = (wpos <= wcap) ? src.str : nullptr;
+        ret.str.len = wpos;
+        return ret;
+    }
+    C4_ALWAYS_INLINE csubstr sofar() const noexcept { return csubstr(src.str, wpos <= wcap ? wpos : wcap); }
+    C4_ALWAYS_INLINE csubstr rem() const noexcept { return src.sub(rpos); }
+
+    C4_ALWAYS_INLINE char curr() const noexcept { RYML_ASSERT(rpos < src.len); return src[rpos]; }
+    C4_ALWAYS_INLINE char next() const noexcept { return rpos+1 < src.len ? src[rpos+1] : '\0'; }
+
+    C4_ALWAYS_INLINE void skip() noexcept { ++rpos; }
+    C4_ALWAYS_INLINE void skip(size_t num) noexcept { rpos += num; }
+
+    void set_at(size_t pos, char c) noexcept
+    {
+        RYML_ASSERT(pos < wpos);
+        const size_t save = wpos;
+        wpos = pos;
+        set(c);
+        wpos = save;
+    }
+    void set(char c) noexcept
+    {
+        if(wpos < wcap)  // respect write-capacity
+            src.str[wpos] = c;
+        ++wpos;
+    }
+    void set(char c, size_t num) noexcept
+    {
+        RYML_ASSERT(num);
+        if(wpos + num <= wcap)  // respect write-capacity
+            memset(src.str + wpos, c, num);
+        wpos += num;
+    }
+
+    void copy() noexcept
+    {
+        RYML_ASSERT(wpos <= rpos);
+        RYML_ASSERT(rpos < src.len);
+        if(wpos < wcap)  // respect write-capacity
+            src.str[wpos] = src.str[rpos];
+        ++rpos;
+        ++wpos;
+    }
+    void copy(size_t num) noexcept
+    {
+        RYML_ASSERT(num);
+        RYML_ASSERT(rpos+num <= src.len);
+        RYML_ASSERT(wpos <= rpos);
+        if(wpos + num <= wcap)  // respect write-capacity
+        {
+            if(wpos + num <= rpos) // there is no overlap
+                memcpy(src.str + wpos, src.str + rpos, num);
+            else                   // there is overlap
+                memmove(src.str + wpos, src.str + rpos, num);
+        }
+        rpos += num;
+        wpos += num;
+    }
+
+    void translate_esc(char c) noexcept
+    {
+        RYML_ASSERT(rpos + 2 <= src.len);
+        RYML_ASSERT(wpos <= rpos);
+        if(wpos < wcap) // respect write-capacity
+            src.str[wpos] = c;
+        rpos += 2; // add 1u to account for the escape character
+        ++wpos;
+    }
+
+    void translate_esc_bulk(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept
+    {
+        RYML_ASSERT(nw > 0);
+        RYML_ASSERT(nr > 0);
+        RYML_ASSERT(nw <= nr + 1u);
+        RYML_ASSERT(rpos+nr <= src.len);
+        RYML_ASSERT(wpos <= rpos);
+        const size_t wpos_next = wpos + nw;
+        const size_t rpos_next = rpos + nr + 1u; // add 1u to account for the escape character
+        RYML_ASSERT(wpos_next <= rpos_next);
+        if(wpos_next <= wcap)
+            memcpy(src.str + wpos, s, nw);
+        rpos = rpos_next;
+        wpos = wpos_next;
+    }
+
+    C4_ALWAYS_INLINE void translate_esc_extending(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept
+    {
+        translate_esc_bulk(s, nw, nr);
+    }
+};
+
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
+/** Filters in place. The result may be larger than the source, and
+ * extending may happen anywhere. As a result some characters may be
+ * left unfiltered when there is no slack in the buffer and the
+ * write-position would overlap the read-position. Consequently, it's
+ * possible for characters to be left unfiltered. In YAML, this
+ * happens only with double-quoted strings, and only with a small
+ * number of escape sequences such as `\L` which is substituted by three
+ * bytes. These escape sequences cause a call to translate_esc_extending()
+ * which is the only entry point to this unfiltered situation.
+ *
+ * @see FilterProcessorInplaceMidExtending */
+struct FilterProcessorInplaceMidExtending
+{
+    substr src;  ///< the subject string
+    size_t wcap; ///< write capacity - the capacity of the subject string's buffer
+    size_t rpos; ///< read position
+    size_t wpos; ///< write position
+    size_t maxcap; ///< the max capacity needed for filtering the string. This may be larger than the final string size.
+    bool unfiltered_chars; ///< number of characters that were not added to wpos from lack of capacity
+
+    C4_ALWAYS_INLINE FilterProcessorInplaceMidExtending(substr src_, size_t wcap_) noexcept
+        : src(src_)
+        , wcap(wcap_)
+        , rpos(0)
+        , wpos(0)
+        , maxcap(src.len)
+        , unfiltered_chars(false)
+    {
+        RYML_ASSERT(wcap >= src.len);
+    }
+
+    C4_ALWAYS_INLINE void setwpos(size_t wpos_) noexcept { wpos = wpos_; }
+    C4_ALWAYS_INLINE void setpos(size_t rpos_, size_t wpos_) noexcept { rpos = rpos_; wpos = wpos_; }
+    C4_ALWAYS_INLINE void set_at_end() noexcept { skip(src.len - rpos); }
+
+    C4_ALWAYS_INLINE bool has_more_chars() const noexcept { return rpos < src.len; }
+    C4_ALWAYS_INLINE bool has_more_chars(size_t maxpos) const noexcept { RYML_ASSERT(maxpos <= src.len); return rpos < maxpos; }
+
+    C4_ALWAYS_INLINE FilterResultExtending result() const noexcept
+    {
+        _c4dbgip("inplace: wpos={} wcap={} unfiltered={} maxcap={}", this->wpos, this->wcap, this->unfiltered_chars, this->maxcap);
+        FilterResultExtending ret;
+        ret.str.str = (wpos <= wcap && !unfiltered_chars) ? src.str : nullptr;
+        ret.str.len = wpos;
+        ret.reqlen = maxcap;
+        return ret;
+    }
+    C4_ALWAYS_INLINE csubstr sofar() const noexcept { return csubstr(src.str, wpos <= wcap ? wpos : wcap); }
+    C4_ALWAYS_INLINE csubstr rem() const noexcept { return src.sub(rpos); }
+
+    C4_ALWAYS_INLINE char curr() const noexcept { RYML_ASSERT(rpos < src.len); return src[rpos]; }
+    C4_ALWAYS_INLINE char next() const noexcept { return rpos+1 < src.len ? src[rpos+1] : '\0'; }
+
+    C4_ALWAYS_INLINE void skip() noexcept { ++rpos; }
+    C4_ALWAYS_INLINE void skip(size_t num) noexcept { rpos += num; }
+
+    void set_at(size_t pos, char c) noexcept
+    {
+        RYML_ASSERT(pos < wpos);
+        const size_t save = wpos;
+        wpos = pos;
+        set(c);
+        wpos = save;
+    }
+    void set(char c) noexcept
+    {
+        if(wpos < wcap)  // respect write-capacity
+        {
+            if((wpos <= rpos) && !unfiltered_chars)
+                src.str[wpos] = c;
+        }
+        else
+        {
+            _c4dbgip("inplace: add unwritten {}->{}   maxcap={}->{}!", unfiltered_chars, true, maxcap, (wpos+1u > maxcap ? wpos+1u : maxcap));
+            unfiltered_chars = true;
+        }
+        ++wpos;
+        maxcap = wpos > maxcap ? wpos : maxcap;
+    }
+    void set(char c, size_t num) noexcept
+    {
+        RYML_ASSERT(num);
+        if(wpos + num <= wcap)  // respect write-capacity
+        {
+            if((wpos <= rpos) && !unfiltered_chars)
+                memset(src.str + wpos, c, num);
+        }
+        else
+        {
+            _c4dbgip("inplace: add unwritten {}->{}   maxcap={}->{}!", unfiltered_chars, true, maxcap, (wpos+num > maxcap ? wpos+num : maxcap));
+            unfiltered_chars = true;
+        }
+        wpos += num;
+        maxcap = wpos > maxcap ? wpos : maxcap;
+    }
+
+    void copy() noexcept
+    {
+        RYML_ASSERT(rpos < src.len);
+        if(wpos < wcap)  // respect write-capacity
+        {
+            if((wpos < rpos) && !unfiltered_chars)  // write only if wpos is behind rpos
+                src.str[wpos] = src.str[rpos];
+        }
+        else
+        {
+            _c4dbgip("inplace: add unwritten {}->{} (wpos={}!=rpos={})={}  (wpos={}<wcap={})   maxcap={}->{}!", unfiltered_chars, true, wpos, rpos, wpos!=rpos, wpos, wcap, wpos<wcap, maxcap, (wpos+1u > maxcap ? wpos+1u : maxcap));
+            unfiltered_chars = true;
+        }
+        ++rpos;
+        ++wpos;
+        maxcap = wpos > maxcap ? wpos : maxcap;
+    }
+    void copy(size_t num) noexcept
+    {
+        RYML_ASSERT(num);
+        RYML_ASSERT(rpos+num <= src.len);
+        if(wpos + num <= wcap)  // respect write-capacity
+        {
+            if((wpos < rpos) && !unfiltered_chars)  // write only if wpos is behind rpos
+            {
+                if(wpos + num <= rpos) // there is no overlap
+                    memcpy(src.str + wpos, src.str + rpos, num);
+                else                   // there is overlap
+                    memmove(src.str + wpos, src.str + rpos, num);
+            }
+        }
+        else
+        {
+            _c4dbgip("inplace: add unwritten {}->{} (wpos={}!=rpos={})={}  (wpos={}<wcap={})  maxcap={}->{}!", unfiltered_chars, true, wpos, rpos, wpos!=rpos, wpos, wcap, wpos<wcap);
+            unfiltered_chars = true;
+        }
+        rpos += num;
+        wpos += num;
+        maxcap = wpos > maxcap ? wpos : maxcap;
+    }
+
+    void translate_esc(char c) noexcept
+    {
+        RYML_ASSERT(rpos + 2 <= src.len);
+        if(wpos < wcap) // respect write-capacity
+        {
+            if((wpos <= rpos) && !unfiltered_chars)
+                src.str[wpos] = c;
+        }
+        else
+        {
+            _c4dbgip("inplace: add unfiltered {}->{}  maxcap={}->{}!", unfiltered_chars, true, maxcap, (wpos+1u > maxcap ? wpos+1u : maxcap));
+            unfiltered_chars = true;
+        }
+        rpos += 2;
+        ++wpos;
+        maxcap = wpos > maxcap ? wpos : maxcap;
+    }
+
+    C4_NO_INLINE void translate_esc_bulk(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept
+    {
+        RYML_ASSERT(nw > 0);
+        RYML_ASSERT(nr > 0);
+        RYML_ASSERT(nr+1u >= nw);
+        const size_t wpos_next = wpos + nw;
+        const size_t rpos_next = rpos + nr + 1u; // add 1u to account for the escape character
+        if(wpos_next <= wcap)  // respect write-capacity
+        {
+            if((wpos <= rpos) && !unfiltered_chars)  // write only if wpos is behind rpos
+                memcpy(src.str + wpos, s, nw);
+        }
+        else
+        {
+            _c4dbgip("inplace: add unwritten {}->{} (wpos={}!=rpos={})={}  (wpos={}<wcap={})  maxcap={}->{}!", unfiltered_chars, true, wpos, rpos, wpos!=rpos, wpos, wcap, wpos<wcap);
+            unfiltered_chars = true;
+        }
+        rpos = rpos_next;
+        wpos = wpos_next;
+        maxcap = wpos > maxcap ? wpos : maxcap;
+    }
+
+    C4_NO_INLINE void translate_esc_extending(const char *C4_RESTRICT s, size_t nw, size_t nr) noexcept
+    {
+        RYML_ASSERT(nw > 0);
+        RYML_ASSERT(nr > 0);
+        RYML_ASSERT(rpos+nr <= src.len);
+        const size_t wpos_next = wpos + nw;
+        const size_t rpos_next = rpos + nr + 1u; // add 1u to account for the escape character
+        if(wpos_next <= rpos_next) // read and write do not overlap. just do a vanilla copy.
+        {
+            if((wpos_next <= wcap) && !unfiltered_chars)
+                memcpy(src.str + wpos, s, nw);
+            rpos = rpos_next;
+            wpos = wpos_next;
+            maxcap = wpos > maxcap ? wpos : maxcap;
+        }
+        else // there is overlap. move the (to-be-read) string to the right.
+        {
+            const size_t excess = wpos_next - rpos_next;
+            RYML_ASSERT(wpos_next > rpos_next);
+            if(src.len + excess <= wcap) // ensure we do not go past the end
+            {
+                RYML_ASSERT(rpos+nr+excess <= src.len);
+                if(wpos_next <= wcap)
+                {
+                    if(!unfiltered_chars)
+                    {
+                        memmove(src.str + wpos_next, src.str + rpos_next, src.len - rpos_next);
+                        memcpy(src.str + wpos, s, nw);
+                    }
+                    rpos = wpos_next; // wpos, not rpos
+                }
+                else
+                {
+                    rpos = rpos_next;
+                    //const size_t unw = nw > (nr + 1u) ? nw - (nr + 1u) : 0;
+                    _c4dbgip("inplace: add unfiltered {}->{}   maxcap={}->{}!", unfiltered_chars, true);
+                    unfiltered_chars = true;
+                }
+                wpos = wpos_next;
+                // extend the string up to capacity
+                src.len += excess;
+                maxcap = wpos > maxcap ? wpos : maxcap;
+            }
+            else
+            {
+                //const size_t unw = nw > (nr + 1u) ? nw - (nr + 1u) : 0;
+                RYML_ASSERT(rpos_next <= src.len);
+                const size_t required_size = wpos_next + (src.len - rpos_next);
+                _c4dbgip("inplace: add unfiltered {}->{}   maxcap={}->{}!", unfiltered_chars, true, maxcap, required_size > maxcap ? required_size : maxcap);
+                RYML_ASSERT(required_size > wcap);
+                unfiltered_chars = true;
+                maxcap = required_size > maxcap ? required_size : maxcap;
+                wpos = wpos_next;
+                rpos = rpos_next;
+            }
+        }
+    }
+};
+
+#undef _c4dbgip
+
+
+/** @} */
+
+} // namespace yml
+} // namespace c4
+
+#endif /* _C4_YML_FILTER_PROCESSOR_HPP_ */
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/filter_processor.hpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/parser_state.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/parser_state.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifndef _C4_YML_PARSER_STATE_HPP_
+#define _C4_YML_PARSER_STATE_HPP_
+
+#ifndef _C4_YML_COMMON_HPP_
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp
+//#include "c4/yml/common.hpp"
+#if !defined(C4_YML_COMMON_HPP_) && !defined(_C4_YML_COMMON_HPP_)
+#error "amalgamate: file c4/yml/common.hpp must have been included at this point"
+#endif /* C4_YML_COMMON_HPP_ */
+
+#endif
+
+namespace c4 {
+namespace yml {
+
+/** data type for @ref ParserState_e */
+using ParserFlag_t = int;
+
+/** Enumeration of the state flags for the parser */
+typedef enum : ParserFlag_t {
+    RTOP = 0x01 <<  0,   ///< reading at top level
+    RUNK = 0x01 <<  1,   ///< reading unknown state (when starting): must determine whether scalar, map or seq
+    RMAP = 0x01 <<  2,   ///< reading a map
+    RSEQ = 0x01 <<  3,   ///< reading a seq
+    FLOW = 0x01 <<  4,   ///< reading is inside explicit flow chars: [] or {}
+    BLCK = 0x01 <<  5,   ///< reading in block mode
+    QMRK = 0x01 <<  6,   ///< reading an explicit key (`? key`)
+    RKEY = 0x01 <<  7,   ///< reading a scalar as key
+    RVAL = 0x01 <<  9,   ///< reading a scalar as val
+    RKCL = 0x01 <<  8,   ///< reading the key colon (ie the : after the key in the map)
+    RNXT = 0x01 << 10,   ///< read next val or keyval
+    SSCL = 0x01 << 11,   ///< there's a stored scalar
+    QSCL = 0x01 << 12,   ///< stored scalar was quoted
+    RSET = 0x01 << 13,   ///< the (implicit) map being read is a !!set. @see https://yaml.org/type/set.html
+    RDOC = 0x01 << 14,   ///< reading a document
+    NDOC = 0x01 << 15,   ///< no document mode. a document has ended and another has not started yet.
+    USTY = 0x01 << 16,   ///< reading in unknown style mode - must determine FLOW or BLCK
+    //! reading an implicit map nested in an explicit seq.
+    //! eg, {key: [key2: value2, key3: value3]}
+    //! is parsed as {key: [{key2: value2}, {key3: value3}]}
+    RSEQIMAP = 0x01 << 17,
+} ParserState_e;
+
+#ifdef RYML_DBG
+/** @cond dev */
+namespace detail {
+csubstr _parser_flags_to_str(substr buf, ParserFlag_t flags);
+} // namespace
+/** @endcond */
+#endif
+
+
+/** Helper to control the line contents while parsing a buffer */
+struct LineContents
+{
+    substr  rem;         ///< the stripped line remainder; initially starts at the first non-space character
+    size_t  indentation; ///< the number of spaces on the beginning of the line
+    substr  full;        ///< the full line, including newlines on the right
+    substr  stripped;    ///< the stripped line, excluding newlines on the right
+
+    LineContents() = default;
+
+    void reset_with_next_line(substr buf, size_t offset)
+    {
+        RYML_ASSERT(offset <= buf.len);
+        size_t e = offset;
+        // get the current line stripped of newline chars
+        while(e < buf.len && (buf.str[e] != '\n' && buf.str[e] != '\r'))
+            ++e;
+        RYML_ASSERT(e >= offset);
+        const substr stripped_ = buf.range(offset, e);
+        // advance pos to include the first line ending
+        if(e < buf.len && buf.str[e] == '\r')
+            ++e;
+        if(e < buf.len && buf.str[e] == '\n')
+            ++e;
+        const substr full_ = buf.range(offset, e);
+        reset(full_, stripped_);
+    }
+
+    void reset(substr full_, substr stripped_)
+    {
+        rem = stripped_;
+        indentation = stripped_.first_not_of(' ');  // find the first column where the character is not a space
+        full = full_;
+        stripped = stripped_;
+    }
+
+    C4_ALWAYS_INLINE size_t current_col() const RYML_NOEXCEPT
+    {
+        // WARNING: gcc x86 release builds were wrong (eg returning 0
+        // when the result should be 4 ) when this function was like
+        // this:
+        //
+        //return current_col(rem);
+        //
+        // (see below for the full definition of the called overload
+        // of current_col())
+        //
+        // ... so we explicitly inline the code in here:
+        RYML_ASSERT(rem.str >= full.str);
+        size_t col = static_cast<size_t>(rem.str - full.str);
+        return col;
+        //
+        // this was happening only on builds specifically with (gcc
+        // AND x86 AND release); no other builds were having the
+        // problem: not in debug, not in x64, not in other
+        // architectures, not in clang, not in visual studio. WTF!?
+        //
+        // Enabling debug prints with RYML_DBG made the problem go
+        // away, so these could not be used to debug the
+        // problem. Adding prints inside the called current_col() also
+        // made the problem go away! WTF!???
+        //
+        // a prize will be offered to anybody able to explain why this
+        // was happening.
+    }
+
+    C4_ALWAYS_INLINE size_t current_col(csubstr s) const RYML_NOEXCEPT
+    {
+        RYML_ASSERT(s.str >= full.str);
+        RYML_ASSERT(full.is_super(s));
+        size_t col = static_cast<size_t>(s.str - full.str);
+        return col;
+    }
+};
+static_assert(std::is_standard_layout<LineContents>::value, "LineContents not standard");
+
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
+struct ParserState
+{
+    LineContents line_contents;
+    Location     pos;
+    ParserFlag_t flags;
+    size_t       indref;  ///< the reference indentation in the current block scope
+    id_type      level;
+    id_type      node_id; ///< don't hold a pointer to the node as it will be relocated during tree resizes
+    size_t       scalar_col; // the column where the scalar (or its quotes) begin
+    bool         more_indented;
+    bool         has_children;
+
+    ParserState() = default;
+
+    void start_parse(const char *file, id_type node_id_)
+    {
+        level = 0;
+        pos.name = to_csubstr(file);
+        pos.offset = 0;
+        pos.line = 1;
+        pos.col = 1;
+        node_id = node_id_;
+        more_indented = false;
+        scalar_col = 0;
+        indref = 0;
+        has_children = false;
+    }
+
+    void reset_after_push()
+    {
+        node_id = NONE;
+        indref = npos;
+        more_indented = false;
+        ++level;
+        has_children = false;
+    }
+
+    C4_ALWAYS_INLINE void reset_before_pop(ParserState const& to_pop)
+    {
+        pos = to_pop.pos;
+        line_contents = to_pop.line_contents;
+    }
+
+public:
+
+    C4_ALWAYS_INLINE bool at_line_beginning() const noexcept
+    {
+        return line_contents.rem.str == line_contents.full.str;
+    }
+    C4_ALWAYS_INLINE bool indentation_eq() const noexcept
+    {
+        RYML_ASSERT(indref != npos);
+        return line_contents.indentation != npos && line_contents.indentation == indref;
+    }
+    C4_ALWAYS_INLINE bool indentation_ge() const noexcept
+    {
+        RYML_ASSERT(indref != npos);
+        return line_contents.indentation != npos && line_contents.indentation >= indref;
+    }
+    C4_ALWAYS_INLINE bool indentation_gt() const noexcept
+    {
+        RYML_ASSERT(indref != npos);
+        return line_contents.indentation != npos && line_contents.indentation > indref;
+    }
+    C4_ALWAYS_INLINE bool indentation_lt() const noexcept
+    {
+        RYML_ASSERT(indref != npos);
+        return line_contents.indentation != npos && line_contents.indentation < indref;
+    }
+};
+static_assert(std::is_standard_layout<ParserState>::value, "ParserState not standard");
+
+
+} // namespace yml
+} // namespace c4
+
+#endif /* _C4_YML_PARSER_STATE_HPP_ */
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/parser_state.hpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/event_handler_stack.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/event_handler_stack.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifndef _C4_YML_EVENT_HANDLER_STACK_HPP_
+#define _C4_YML_EVENT_HANDLER_STACK_HPP_
+
+#ifndef _C4_YML_DETAIL_STACK_HPP_
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/stack.hpp
+//#include "c4/yml/detail/stack.hpp"
+#if !defined(C4_YML_DETAIL_STACK_HPP_) && !defined(_C4_YML_DETAIL_STACK_HPP_)
+#error "amalgamate: file c4/yml/detail/stack.hpp must have been included at this point"
+#endif /* C4_YML_DETAIL_STACK_HPP_ */
+
+#endif
+
+#ifndef _C4_YML_DETAIL_PARSER_DBG_HPP_
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp
+//#include "c4/yml/detail/parser_dbg.hpp"
+#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_)
+#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point"
+#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */
+
+#endif
+
+#ifndef _C4_YML_PARSER_STATE_HPP_
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/parser_state.hpp
+//#include "c4/yml/parser_state.hpp"
+#if !defined(C4_YML_PARSER_STATE_HPP_) && !defined(_C4_YML_PARSER_STATE_HPP_)
+#error "amalgamate: file c4/yml/parser_state.hpp must have been included at this point"
+#endif /* C4_YML_PARSER_STATE_HPP_ */
+
+#endif
+
+#ifdef RYML_DBG
+#ifndef _C4_YML_DETAIL_PRINT_HPP_
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/print.hpp
+//#include "c4/yml/detail/print.hpp"
+#if !defined(C4_YML_DETAIL_PRINT_HPP_) && !defined(_C4_YML_DETAIL_PRINT_HPP_)
+#error "amalgamate: file c4/yml/detail/print.hpp must have been included at this point"
+#endif /* C4_YML_DETAIL_PRINT_HPP_ */
+
+#endif
+#endif
+
+namespace c4 {
+namespace yml {
+
+/** @addtogroup doc_event_handlers
+ * @{ */
+
+namespace detail {
+using pfn_relocate_arena = void (*)(void*, csubstr prev_arena, substr next_arena);
+} // detail
+
+/** Use this class a base of implementations of event handler to
+ * simplify the stack logic. */
+template<class HandlerImpl, class HandlerState>
+struct EventHandlerStack
+{
+    static_assert(std::is_base_of<ParserState, HandlerState>::value,
+                  "ParserState must be a base of HandlerState");
+
+    using state = HandlerState;
+    using pfn_relocate_arena = detail::pfn_relocate_arena;
+
+public:
+
+    detail::stack<state> m_stack;
+    state *C4_RESTRICT   m_curr;    ///< current stack level: top of the stack. cached here for easier access.
+    state *C4_RESTRICT   m_parent;  ///< parent of the current stack level.
+    pfn_relocate_arena   m_relocate_arena; ///< callback when the arena gets relocated
+    void *               m_relocate_arena_data;
+
+protected:
+
+    EventHandlerStack() : m_stack(), m_curr(), m_parent(), m_relocate_arena(), m_relocate_arena_data() {}
+    EventHandlerStack(Callbacks const& cb) : m_stack(cb), m_curr(), m_parent(), m_relocate_arena(), m_relocate_arena_data() {}
+
+protected:
+
+    void _stack_start_parse(const char *filename, pfn_relocate_arena relocate_arena, void *relocate_arena_data)
+    {
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_curr != nullptr);
+        _RYML_CB_ASSERT(m_stack.m_callbacks, relocate_arena != nullptr);
+        _RYML_CB_ASSERT(m_stack.m_callbacks, relocate_arena_data != nullptr);
+        m_curr->start_parse(filename, m_curr->node_id);
+        m_relocate_arena = relocate_arena;
+        m_relocate_arena_data = relocate_arena_data;
+    }
+
+    void _stack_finish_parse()
+    {
+    }
+
+protected:
+
+    void _stack_reset_root()
+    {
+        m_stack.clear();
+        m_stack.push({});
+        m_parent = nullptr;
+        m_curr = &m_stack.top();
+    }
+
+    void _stack_reset_non_root()
+    {
+        m_stack.clear();
+        m_stack.push({}); // parent
+        m_stack.push({}); // node
+        m_parent = &m_stack.top(1);
+        m_curr = &m_stack.top();
+    }
+
+    void _stack_push()
+    {
+        m_stack.push_top();
+        m_parent = &m_stack.top(1); // don't use m_curr. watch out for relocations inside the prev push
+        m_curr = &m_stack.top();
+        m_curr->reset_after_push();
+    }
+
+    void _stack_pop()
+    {
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_parent);
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.size() > 1);
+        m_parent->reset_before_pop(*m_curr);
+        m_stack.pop();
+        m_parent = m_stack.size() > 1 ? &m_stack.top(1) : nullptr;
+        m_curr = &m_stack.top();
+        #ifdef RYML_DBG
+        if(m_parent)
+            _c4dbgpf("popped! top is now node={} (parent={})", m_curr->node_id, m_parent->node_id);
+        else
+            _c4dbgpf("popped! top is now node={} @ ROOT", m_curr->node_id);
+        #endif
+    }
+
+protected:
+
+    // undefined at the end
+    #define _has_any_(bits) (static_cast<HandlerImpl const* C4_RESTRICT>(this)->template _has_any__<bits>())
+
+    bool _stack_should_push_on_begin_doc() const
+    {
+        const bool is_root = (m_stack.size() == 1u);
+        return is_root && (_has_any_(DOC|VAL|MAP|SEQ) || m_curr->has_children);
+    }
+
+    bool _stack_should_pop_on_end_doc() const
+    {
+        const bool is_root = (m_stack.size() == 1u);
+        return !is_root && _has_any_(DOC);
+    }
+
+protected:
+
+    void _stack_relocate_to_new_arena(csubstr prev, substr curr)
+    {
+        for(state &st : m_stack)
+        {
+            if(st.line_contents.rem.is_sub(prev))
+                st.line_contents.rem = _stack_relocate_to_new_arena(st.line_contents.rem, prev, curr);
+            if(st.line_contents.full.is_sub(prev))
+                st.line_contents.full = _stack_relocate_to_new_arena(st.line_contents.full, prev, curr);
+            if(st.line_contents.stripped.is_sub(prev))
+                st.line_contents.stripped = _stack_relocate_to_new_arena(st.line_contents.stripped, prev, curr);
+        }
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_relocate_arena != nullptr);
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_relocate_arena_data != nullptr);
+        m_relocate_arena(m_relocate_arena_data, prev, curr);
+    }
+
+    substr _stack_relocate_to_new_arena(csubstr s, csubstr prev, substr curr)
+    {
+        _RYML_CB_ASSERT(m_stack.m_callbacks, prev.is_super(s));
+        auto pos = s.str - prev.str;
+        substr out = {curr.str + pos, s.len};
+        _RYML_CB_ASSERT(m_stack.m_callbacks, curr.is_super(out));
+        return out;
+    }
+
+public:
+
+    /** Check whether the current parse tokens are trailing on the
+     * previous doc, and raise an error if they are. This function is
+     * called by the parse engine (not the event handler) before a doc
+     * is started. */
+    void check_trailing_doc_token() const
+    {
+        const bool is_root = (m_stack.size() == 1u);
+        const bool isndoc = (m_curr->flags & NDOC) != 0;
+        const bool suspicious = _has_any_(MAP|SEQ|VAL);
+        _c4dbgpf("target={} isroot={} suspicious={} ndoc={}", m_curr->node_id, is_root, suspicious, isndoc);
+        if((is_root || _has_any_(DOC)) && suspicious && !isndoc)
+            _RYML_CB_ERR_(m_stack.m_callbacks, "parse error", m_curr->pos);
+    }
+
+protected:
+
+    #undef _has_any_
+
+};
+
+/** @} */
+
+} // namespace yml
+} // namespace c4
+
+#endif /* _C4_YML_EVENT_HANDLER_STACK_HPP_ */
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/event_handler_stack.hpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/event_handler_tree.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/event_handler_tree.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifndef _C4_YML_EVENT_HANDLER_TREE_HPP_
+#define _C4_YML_EVENT_HANDLER_TREE_HPP_
+
+#ifndef _C4_YML_TREE_HPP_
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp
+//#include "c4/yml/tree.hpp"
+#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_)
+#error "amalgamate: file c4/yml/tree.hpp must have been included at this point"
+#endif /* C4_YML_TREE_HPP_ */
+
+#endif
+
+#ifndef _C4_YML_EVENT_HANDLER_STACK_HPP_
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/event_handler_stack.hpp
+//#include "c4/yml/event_handler_stack.hpp"
+#if !defined(C4_YML_EVENT_HANDLER_STACK_HPP_) && !defined(_C4_YML_EVENT_HANDLER_STACK_HPP_)
+#error "amalgamate: file c4/yml/event_handler_stack.hpp must have been included at this point"
+#endif /* C4_YML_EVENT_HANDLER_STACK_HPP_ */
+
+#endif
+
+C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4702) // unreachable code
+
+namespace c4 {
+namespace yml {
+
+/** @addtogroup doc_event_handlers
+ * @{ */
+
+
+/** The stack state needed specifically by @ref EventHandlerTree */
+struct EventHandlerTreeState : public ParserState
+{
+    NodeData *tr_data;
+};
+
+
+/** The event handler to create a ryml @ref Tree. See the
+ * documentation for @ref doc_event_handlers, which has important
+ * notes about the event model used by rapidyaml. */
+struct EventHandlerTree : public EventHandlerStack<EventHandlerTree, EventHandlerTreeState>
+{
+
+    /** @name types
+     * @{ */
+
+    using state = EventHandlerTreeState;
+
+    /** @} */
+
+public:
+
+    /** @cond dev */
+    Tree *C4_RESTRICT m_tree;
+    id_type m_id;
+    size_t m_num_directives;
+    bool m_yaml_directive;
+
+    #if RYML_DBG
+    #define _enable_(bits) _enable__<bits>(); _c4dbgpf("node[{}]: enable {}", m_curr->node_id, #bits)
+    #define _disable_(bits) _disable__<bits>(); _c4dbgpf("node[{}]: disable {}", m_curr->node_id, #bits)
+    #else
+    #define _enable_(bits) _enable__<bits>()
+    #define _disable_(bits) _disable__<bits>()
+    #endif
+    #define _has_any_(bits) _has_any__<bits>()
+    /** @endcond */
+
+public:
+
+    /** @name construction and resetting
+     * @{ */
+
+    EventHandlerTree() : EventHandlerStack(), m_tree(), m_id(NONE), m_num_directives(), m_yaml_directive() {}
+    EventHandlerTree(Callbacks const& cb) : EventHandlerStack(cb), m_tree(), m_id(NONE), m_num_directives(), m_yaml_directive() {}
+    EventHandlerTree(Tree *tree, id_type id) : EventHandlerStack(tree->callbacks()), m_tree(tree), m_id(id), m_num_directives(), m_yaml_directive()
+    {
+        reset(tree, id);
+    }
+
+    void reset(Tree *tree, id_type id)
+    {
+        if(C4_UNLIKELY(!tree))
+            _RYML_CB_ERR(m_stack.m_callbacks, "null tree");
+        if(C4_UNLIKELY(id >= tree->capacity()))
+            _RYML_CB_ERR(tree->callbacks(), "invalid node");
+        if(C4_UNLIKELY(!tree->is_root(id)))
+            if(C4_UNLIKELY(tree->is_map(tree->parent(id))))
+                if(C4_UNLIKELY(!tree->has_key(id)))
+                    _RYML_CB_ERR(tree->callbacks(), "destination node belongs to a map and has no key");
+        m_tree = tree;
+        m_id = id;
+        if(m_tree->is_root(id))
+        {
+            _stack_reset_root();
+            _reset_parser_state(m_curr, id, m_tree->root_id());
+        }
+        else
+        {
+            _stack_reset_non_root();
+            _reset_parser_state(m_parent, id, m_tree->parent(id));
+            _reset_parser_state(m_curr, id, id);
+        }
+        m_num_directives = 0;
+        m_yaml_directive = false;
+    }
+
+    /** @} */
+
+public:
+
+    /** @name parse events
+     * @{ */
+
+    void start_parse(const char* filename, detail::pfn_relocate_arena relocate_arena, void *relocate_arena_data)
+    {
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree != nullptr);
+        this->_stack_start_parse(filename, relocate_arena, relocate_arena_data);
+    }
+
+    void finish_parse()
+    {
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree != nullptr);
+        if(m_num_directives && !m_tree->is_stream(m_tree->root_id()))
+            _RYML_CB_ERR_(m_stack.m_callbacks, "directives cannot be used without a document", {});
+        this->_stack_finish_parse();
+        /* This pointer is temporary. Remember that:
+         *
+         * - this handler object may be held by the user
+         * - it may be used with a temporary tree inside the parse function
+         * - when the parse function returns the temporary tree, its address
+         *   will change
+         *
+         * As a result, the user could try to read the tree from m_tree, and
+         * end up reading the stale temporary object.
+         *
+         * So it is better to clear it here; then the user will get an obvious
+         * segfault if reading from m_tree. */
+        m_tree = nullptr;
+    }
+
+    void cancel_parse()
+    {
+        m_tree = nullptr;
+    }
+
+    /** @} */
+
+public:
+
+    /** @name YAML stream events */
+    /** @{ */
+
+    C4_ALWAYS_INLINE void begin_stream() const noexcept { /*nothing to do*/ }
+
+    C4_ALWAYS_INLINE void end_stream() const noexcept { /*nothing to do*/ }
+
+    /** @} */
+
+public:
+
+    /** @name YAML document events */
+    /** @{ */
+
+    /** implicit doc start (without ---) */
+    void begin_doc()
+    {
+        _c4dbgp("begin_doc");
+        if(_stack_should_push_on_begin_doc())
+        {
+            _c4dbgp("push!");
+            _set_root_as_stream();
+            _push();
+            _enable_(DOC);
+        }
+    }
+    /** implicit doc end (without ...) */
+    void end_doc()
+    {
+        _c4dbgp("end_doc");
+        if(_stack_should_pop_on_end_doc())
+        {
+            _remove_speculative();
+            _c4dbgp("pop!");
+            _pop();
+        }
+    }
+
+    /** explicit doc start, with --- */
+    void begin_doc_expl()
+    {
+        _c4dbgp("begin_doc_expl");
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->root_id() == m_curr->node_id);
+        if(!m_tree->is_stream(m_tree->root_id())) //if(_should_push_on_begin_doc())
+        {
+            _c4dbgp("ensure stream");
+            _set_root_as_stream();
+            id_type first = m_tree->first_child(m_tree->root_id());
+            _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_stream(m_tree->root_id()));
+            _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->num_children(m_tree->root_id()) == 1u);
+            if(m_tree->has_children(first) || m_tree->is_val(first))
+            {
+                _c4dbgp("push!");
+                _push();
+            }
+            else
+            {
+                _c4dbgp("tweak");
+                _push();
+                _remove_speculative();
+                m_curr->node_id = m_tree->last_child(m_tree->root_id());
+                m_curr->tr_data = m_tree->_p(m_curr->node_id);
+            }
+        }
+        else
+        {
+            _c4dbgp("push!");
+            _push();
+        }
+        _enable_(DOC);
+    }
+    /** explicit doc end, with ... */
+    void end_doc_expl()
+    {
+        _c4dbgp("end_doc_expl");
+        _remove_speculative();
+        if(_stack_should_pop_on_end_doc())
+        {
+            _c4dbgp("pop!");
+            _pop();
+        }
+        m_yaml_directive = false;
+    }
+
+    /** @} */
+
+public:
+
+    /** @name YAML map events */
+    /** @{ */
+
+    void begin_map_key_flow()
+    {
+        _RYML_CB_ERR_(m_stack.m_callbacks, "ryml trees cannot handle containers as keys", m_curr->pos);
+    }
+    void begin_map_key_block()
+    {
+        _RYML_CB_ERR_(m_stack.m_callbacks, "ryml trees cannot handle containers as keys", m_curr->pos);
+    }
+
+    void begin_map_val_flow()
+    {
+        _c4dbgpf("node[{}]: begin_map_val_flow", m_curr->node_id);
+        _RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL));
+        _enable_(MAP|FLOW_SL);
+        _save_loc();
+        _push();
+    }
+    void begin_map_val_block()
+    {
+        _c4dbgpf("node[{}]: begin_map_val_block", m_curr->node_id);
+        _RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL));
+        _enable_(MAP|BLOCK);
+        _save_loc();
+        _push();
+    }
+
+    void end_map()
+    {
+        _pop();
+        _c4dbgpf("node[{}]: end_map_val", m_curr->node_id);
+    }
+
+    /** @} */
+
+public:
+
+    /** @name YAML seq events */
+    /** @{ */
+
+    void begin_seq_key_flow()
+    {
+        _RYML_CB_ERR_(m_stack.m_callbacks, "ryml trees cannot handle containers as keys", m_curr->pos);
+    }
+    void begin_seq_key_block()
+    {
+        _RYML_CB_ERR_(m_stack.m_callbacks, "ryml trees cannot handle containers as keys", m_curr->pos);
+    }
+
+    void begin_seq_val_flow()
+    {
+        _c4dbgpf("node[{}]: begin_seq_val_flow", m_curr->node_id);
+        _RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL));
+        _enable_(SEQ|FLOW_SL);
+        _save_loc();
+        _push();
+    }
+    void begin_seq_val_block()
+    {
+        _c4dbgpf("node[{}]: begin_seq_val_block", m_curr->node_id);
+        _RYML_CB_CHECK(m_stack.m_callbacks, !_has_any_(VAL));
+        _enable_(SEQ|BLOCK);
+        _save_loc();
+        _push();
+    }
+
+    void end_seq()
+    {
+        _pop();
+        _c4dbgpf("node[{}]: end_seq_val", m_curr->node_id);
+    }
+
+    /** @} */
+
+public:
+
+    /** @name YAML structure events */
+    /** @{ */
+
+    void add_sibling()
+    {
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_parent);
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->has_children(m_parent->node_id));
+        NodeData const* prev = m_tree->m_buf; // watchout against relocation of the tree nodes
+        _set_state_(m_curr, m_tree->_append_child__unprotected(m_parent->node_id));
+        if(prev != m_tree->m_buf)
+            _refresh_after_relocation();
+        _c4dbgpf("node[{}]: added sibling={} prev={}", m_parent->node_id, m_curr->node_id, m_tree->prev_sibling(m_curr->node_id));
+    }
+
+    /** set the previous val as the first key of a new map, with flow style.
+     *
+     * See the documentation for @ref doc_event_handlers, which has
+     * important notes about this event.
+     */
+    void actually_val_is_first_key_of_new_map_flow()
+    {
+        if(C4_UNLIKELY(m_tree->is_container(m_curr->node_id)))
+            _RYML_CB_ERR_(m_stack.m_callbacks, "ryml trees cannot handle containers as keys", m_curr->pos);
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_parent);
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(m_parent->node_id));
+        _RYML_CB_ASSERT(m_stack.m_callbacks, !m_tree->is_container(m_curr->node_id));
+        _RYML_CB_ASSERT(m_stack.m_callbacks, !m_tree->has_key(m_curr->node_id));
+        const NodeData tmp = _val2key_(*m_curr->tr_data);
+        _disable_(_VALMASK|VAL_STYLE);
+        m_curr->tr_data->m_val = {};
+        begin_map_val_flow();
+        m_curr->tr_data->m_type = tmp.m_type;
+        m_curr->tr_data->m_key = tmp.m_key;
+    }
+
+    /** like its flow counterpart, but this function can only be
+     * called after the end of a flow-val at root or doc level.
+     *
+     * See the documentation for @ref doc_event_handlers, which has
+     * important notes about this event.
+     */
+    void actually_val_is_first_key_of_new_map_block()
+    {
+        _RYML_CB_ERR_(m_stack.m_callbacks, "ryml trees cannot handle containers as keys", m_curr->pos);
+    }
+
+    /** @} */
+
+public:
+
+    /** @name YAML scalar events */
+    /** @{ */
+
+
+    C4_ALWAYS_INLINE void set_key_scalar_plain(csubstr scalar) noexcept
+    {
+        _c4dbgpf("node[{}]: set key scalar plain: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast<void const*>(scalar.str));
+        m_curr->tr_data->m_key.scalar = scalar;
+        _enable_(KEY|KEY_PLAIN);
+    }
+    C4_ALWAYS_INLINE void set_val_scalar_plain(csubstr scalar) noexcept
+    {
+        _c4dbgpf("node[{}]: set val scalar plain: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast<void const*>(scalar.str));
+        m_curr->tr_data->m_val.scalar = scalar;
+        _enable_(VAL|VAL_PLAIN);
+    }
+
+
+    C4_ALWAYS_INLINE void set_key_scalar_dquoted(csubstr scalar) noexcept
+    {
+        _c4dbgpf("node[{}]: set key scalar dquot: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast<void const*>(scalar.str));
+        m_curr->tr_data->m_key.scalar = scalar;
+        _enable_(KEY|KEY_DQUO);
+    }
+    C4_ALWAYS_INLINE void set_val_scalar_dquoted(csubstr scalar) noexcept
+    {
+        _c4dbgpf("node[{}]: set val scalar dquot: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast<void const*>(scalar.str));
+        m_curr->tr_data->m_val.scalar = scalar;
+        _enable_(VAL|VAL_DQUO);
+    }
+
+
+    C4_ALWAYS_INLINE void set_key_scalar_squoted(csubstr scalar) noexcept
+    {
+        _c4dbgpf("node[{}]: set key scalar squot: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast<void const*>(scalar.str));
+        m_curr->tr_data->m_key.scalar = scalar;
+        _enable_(KEY|KEY_SQUO);
+    }
+    C4_ALWAYS_INLINE void set_val_scalar_squoted(csubstr scalar) noexcept
+    {
+        _c4dbgpf("node[{}]: set val scalar squot: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast<void const*>(scalar.str));
+        m_curr->tr_data->m_val.scalar = scalar;
+        _enable_(VAL|VAL_SQUO);
+    }
+
+
+    C4_ALWAYS_INLINE void set_key_scalar_literal(csubstr scalar) noexcept
+    {
+        _c4dbgpf("node[{}]: set key scalar literal: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast<void const*>(scalar.str));
+        m_curr->tr_data->m_key.scalar = scalar;
+        _enable_(KEY|KEY_LITERAL);
+    }
+    C4_ALWAYS_INLINE void set_val_scalar_literal(csubstr scalar) noexcept
+    {
+        _c4dbgpf("node[{}]: set val scalar literal: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast<void const*>(scalar.str));
+        m_curr->tr_data->m_val.scalar = scalar;
+        _enable_(VAL|VAL_LITERAL);
+    }
+
+
+    C4_ALWAYS_INLINE void set_key_scalar_folded(csubstr scalar) noexcept
+    {
+        _c4dbgpf("node[{}]: set key scalar folded: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast<void const*>(scalar.str));
+        m_curr->tr_data->m_key.scalar = scalar;
+        _enable_(KEY|KEY_FOLDED);
+    }
+    C4_ALWAYS_INLINE void set_val_scalar_folded(csubstr scalar) noexcept
+    {
+        _c4dbgpf("node[{}]: set val scalar folded: [{}]~~~{}~~~ ({})", m_curr->node_id, scalar.len, scalar, reinterpret_cast<void const*>(scalar.str));
+        m_curr->tr_data->m_val.scalar = scalar;
+        _enable_(VAL|VAL_FOLDED);
+    }
+
+
+    C4_ALWAYS_INLINE void mark_key_scalar_unfiltered() noexcept
+    {
+        _enable_(KEY_UNFILT);
+    }
+    C4_ALWAYS_INLINE void mark_val_scalar_unfiltered() noexcept
+    {
+        _enable_(VAL_UNFILT);
+    }
+
+    /** @} */
+
+public:
+
+    /** @name YAML anchor/reference events */
+    /** @{ */
+
+    void set_key_anchor(csubstr anchor)
+    {
+        _c4dbgpf("node[{}]: set key anchor: [{}]~~~{}~~~", m_curr->node_id, anchor.len, anchor);
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
+        if(C4_UNLIKELY(_has_any_(KEYREF)))
+            _RYML_CB_ERR_(m_tree->callbacks(), "key cannot have both anchor and ref", m_curr->pos);
+        _RYML_CB_ASSERT(m_tree->callbacks(), !anchor.begins_with('&'));
+        _enable_(KEYANCH);
+        m_curr->tr_data->m_key.anchor = anchor;
+    }
+    void set_val_anchor(csubstr anchor)
+    {
+        _c4dbgpf("node[{}]: set val anchor: [{}]~~~{}~~~", m_curr->node_id, anchor.len, anchor);
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
+        if(C4_UNLIKELY(_has_any_(VALREF)))
+            _RYML_CB_ERR_(m_tree->callbacks(), "val cannot have both anchor and ref", m_curr->pos);
+        _RYML_CB_ASSERT(m_tree->callbacks(), !anchor.begins_with('&'));
+        _enable_(VALANCH);
+        m_curr->tr_data->m_val.anchor = anchor;
+    }
+
+    void set_key_ref(csubstr ref)
+    {
+        _c4dbgpf("node[{}]: set key ref: [{}]~~~{}~~~", m_curr->node_id, ref.len, ref);
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
+        if(C4_UNLIKELY(_has_any_(KEYANCH)))
+            _RYML_CB_ERR_(m_tree->callbacks(), "key cannot have both anchor and ref", m_curr->pos);
+        _RYML_CB_ASSERT(m_tree->callbacks(), ref.begins_with('*'));
+        _enable_(KEY|KEYREF);
+        m_curr->tr_data->m_key.anchor = ref.sub(1);
+        m_curr->tr_data->m_key.scalar = ref;
+    }
+    void set_val_ref(csubstr ref)
+    {
+        _c4dbgpf("node[{}]: set val ref: [{}]~~~{}~~~", m_curr->node_id, ref.len, ref);
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
+        if(C4_UNLIKELY(_has_any_(VALANCH)))
+            _RYML_CB_ERR_(m_tree->callbacks(), "val cannot have both anchor and ref", m_curr->pos);
+        _RYML_CB_ASSERT(m_tree->callbacks(), ref.begins_with('*'));
+        _enable_(VAL|VALREF);
+        m_curr->tr_data->m_val.anchor = ref.sub(1);
+        m_curr->tr_data->m_val.scalar = ref;
+    }
+
+    /** @} */
+
+public:
+
+    /** @name YAML tag events */
+    /** @{ */
+
+    void set_key_tag(csubstr tag) noexcept
+    {
+        _c4dbgpf("node[{}]: set key tag: [{}]~~~{}~~~", m_curr->node_id, tag.len, tag);
+        _enable_(KEYTAG);
+        m_curr->tr_data->m_key.tag = tag;
+    }
+    void set_val_tag(csubstr tag) noexcept
+    {
+        _c4dbgpf("node[{}]: set val tag: [{}]~~~{}~~~", m_curr->node_id, tag.len, tag);
+        _enable_(VALTAG);
+        m_curr->tr_data->m_val.tag = tag;
+    }
+
+    /** @} */
+
+public:
+
+    /** @name YAML directive events */
+    /** @{ */
+
+    C4_NO_INLINE void add_directive(csubstr directive)
+    {
+        _c4dbgpf("% directive! {}", directive);
+        _RYML_CB_ASSERT(m_tree->callbacks(), directive.begins_with('%'));
+        if(directive.begins_with("%TAG"))
+        {
+            if(C4_UNLIKELY(!m_tree->add_tag_directive(directive)))
+                _RYML_CB_ERR_(m_stack.m_callbacks, "failed to add directive", m_curr->pos);
+        }
+        else if(directive.begins_with("%YAML"))
+        {
+            _c4dbgpf("%YAML directive! ignoring...: {}", directive);
+            if(C4_UNLIKELY(m_yaml_directive))
+                _RYML_CB_ERR_(m_stack.m_callbacks, "multiple yaml directives", m_curr->pos);
+            m_yaml_directive = true;
+        }
+        else
+        {
+            _c4dbgpf("unknown directive! ignoring... {}", directive);
+        }
+        ++m_num_directives;
+    }
+
+    /** @} */
+
+public:
+
+    /** @name arena functions */
+    /** @{ */
+
+    substr alloc_arena(size_t len)
+    {
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
+        csubstr prev = m_tree->arena();
+        substr out = m_tree->alloc_arena(len);
+        substr curr = m_tree->arena();
+        if(curr.str != prev.str)
+            _stack_relocate_to_new_arena(prev, curr);
+        return out;
+    }
+
+    substr alloc_arena(size_t len, substr *relocated)
+    {
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
+        csubstr prev = m_tree->arena();
+        if(!prev.is_super(*relocated))
+            return alloc_arena(len);
+        substr out = alloc_arena(len);
+        substr curr = m_tree->arena();
+        if(curr.str != prev.str)
+            *relocated = _stack_relocate_to_new_arena(*relocated, prev, curr);
+        return out;
+    }
+
+    /** @} */
+
+public:
+
+    /** @cond dev */
+    void _reset_parser_state(state* st, id_type parse_root, id_type node)
+    {
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
+        _set_state_(st, node);
+        const NodeType type = m_tree->type(node);
+        #ifdef RYML_DBG
+        char flagbuf[80];
+        _c4dbgpf("resetting state: initial flags={}", detail::_parser_flags_to_str(flagbuf, st->flags));
+        #endif
+        if(type == NOTYPE)
+        {
+            _c4dbgpf("node[{}] is notype", node);
+            if(m_tree->is_root(parse_root))
+            {
+                _c4dbgpf("node[{}] is root", node);
+                st->flags |= RUNK|RTOP;
+            }
+            else
+            {
+                _c4dbgpf("node[{}] is not root. setting USTY", node);
+                st->flags |= USTY;
+            }
+        }
+        else if(type.is_map())
+        {
+            _c4dbgpf("node[{}] is map", node);
+            st->flags |= RMAP|USTY;
+        }
+        else if(type.is_seq())
+        {
+            _c4dbgpf("node[{}] is map", node);
+            st->flags |= RSEQ|USTY;
+        }
+        else if(type.has_key())
+        {
+            _c4dbgpf("node[{}] has key. setting USTY", node);
+            st->flags |= USTY;
+        }
+        else
+        {
+            _RYML_CB_ERR(m_tree->callbacks(), "cannot append to node");
+        }
+        if(type.is_doc())
+        {
+            _c4dbgpf("node[{}] is doc", node);
+            st->flags |= RDOC;
+        }
+        #ifdef RYML_DBG
+        _c4dbgpf("resetting state: final flags={}", detail::_parser_flags_to_str(flagbuf, st->flags));
+        #endif
+    }
+
+    /** push a new parent, add a child to the new parent, and set the
+     * child as the current node */
+    void _push()
+    {
+        _stack_push();
+        NodeData const* prev = m_tree->m_buf; // watch out against relocation of the tree nodes
+        m_curr->node_id = m_tree->_append_child__unprotected(m_parent->node_id);
+        m_curr->tr_data = m_tree->_p(m_curr->node_id);
+        if(prev != m_tree->m_buf)
+            _refresh_after_relocation();
+        _c4dbgpf("pushed! level={}. top is now node={} (parent={})", m_curr->level, m_curr->node_id, m_parent ? m_parent->node_id : NONE);
+    }
+    /** end the current scope */
+    void _pop()
+    {
+        _remove_speculative_with_parent();
+        _stack_pop();
+    }
+
+public:
+
+    template<type_bits bits> C4_HOT C4_ALWAYS_INLINE void _enable__() noexcept
+    {
+        m_curr->tr_data->m_type.type = static_cast<NodeType_e>(m_curr->tr_data->m_type.type | bits);
+    }
+    template<type_bits bits> C4_HOT C4_ALWAYS_INLINE void _disable__() noexcept
+    {
+        m_curr->tr_data->m_type.type = static_cast<NodeType_e>(m_curr->tr_data->m_type.type & (~bits));
+    }
+    template<type_bits bits> C4_HOT C4_ALWAYS_INLINE bool _has_any__() const noexcept
+    {
+        return (m_curr->tr_data->m_type.type & bits) != 0;
+    }
+
+public:
+
+    C4_ALWAYS_INLINE void _set_state_(state *C4_RESTRICT s, id_type id) noexcept
+    {
+        s->node_id = id;
+        s->tr_data = m_tree->_p(id);
+    }
+    void _refresh_after_relocation()
+    {
+        _c4dbgp("tree: refreshing stack data after tree data relocation");
+        for(auto &st : m_stack)
+            st.tr_data = m_tree->_p(st.node_id);
+    }
+
+    void _set_root_as_stream()
+    {
+        _c4dbgp("set root as stream");
+        _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->root_id() == 0u);
+        _RYML_CB_ASSERT(m_tree->callbacks(), m_curr->node_id == 0u);
+        const bool hack = !m_tree->has_children(m_curr->node_id) && !m_tree->is_val(m_curr->node_id);
+        if(hack)
+            m_tree->_p(m_tree->root_id())->m_type.add(VAL);
+        m_tree->set_root_as_stream();
+        _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_stream(m_tree->root_id()));
+        _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_children(m_tree->root_id()));
+        _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_doc(m_tree->first_child(m_tree->root_id())));
+        if(hack)
+            m_tree->_p(m_tree->first_child(m_tree->root_id()))->m_type.rem(VAL);
+        _set_state_(m_curr, m_tree->root_id());
+    }
+
+    static NodeData _val2key_(NodeData const& C4_RESTRICT d) noexcept
+    {
+        NodeData r = d;
+        r.m_key = d.m_val;
+        r.m_val = {};
+        r.m_type = d.m_type;
+        static_assert((_VALMASK >> 1u) == _KEYMASK, "required for this function to work");
+        static_assert((VAL_STYLE >> 1u) == KEY_STYLE, "required for this function to work");
+        r.m_type.type = ((d.m_type.type & (_VALMASK|VAL_STYLE)) >> 1u);
+        r.m_type.type = (r.m_type.type & ~(_VALMASK|VAL_STYLE));
+        r.m_type.type = (r.m_type.type | KEY);
+        return r;
+    }
+
+    void _remove_speculative()
+    {
+        _c4dbgp("remove speculative node");
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
+        _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->size() > 0);
+        const id_type last_added = m_tree->size() - 1;
+        if(m_tree->has_parent(last_added))
+            if(m_tree->_p(last_added)->m_type == NOTYPE)
+                m_tree->remove(last_added);
+    }
+
+    void _remove_speculative_with_parent()
+    {
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
+        _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->size() > 0);
+        const id_type last_added = m_tree->size() - 1;
+        _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_parent(last_added));
+        if(m_tree->_p(last_added)->m_type == NOTYPE)
+        {
+            _c4dbgpf("remove speculative node with parent. parent={} node={} parent(node)={}", m_parent->node_id, last_added, m_tree->parent(last_added));
+            m_tree->remove(last_added);
+        }
+    }
+
+    C4_ALWAYS_INLINE void _save_loc()
+    {
+        _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree);
+        _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->_p(m_curr->node_id)->m_val.scalar.len == 0);
+        m_tree->_p(m_curr->node_id)->m_val.scalar.str = m_curr->line_contents.rem.str;
+    }
+
+#undef _enable_
+#undef _disable_
+#undef _has_any_
+
+    /** @endcond */
+};
+
+/** @} */
+
+} // namespace yml
+} // namespace c4
+
+C4_SUPPRESS_WARNING_MSVC_POP
+
+#endif /* _C4_YML_EVENT_HANDLER_TREE_HPP_ */
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/event_handler_tree.hpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/parse_engine.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/parse_engine.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifndef _C4_YML_PARSE_ENGINE_HPP_
+#define _C4_YML_PARSE_ENGINE_HPP_
+
+#ifndef _C4_YML_DETAIL_PARSER_DBG_HPP_
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp
+//#include "c4/yml/detail/parser_dbg.hpp"
+#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_)
+#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point"
+#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */
+
+#endif
+
+#ifndef _C4_YML_PARSER_STATE_HPP_
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/parser_state.hpp
+//#include "c4/yml/parser_state.hpp"
+#if !defined(C4_YML_PARSER_STATE_HPP_) && !defined(_C4_YML_PARSER_STATE_HPP_)
+#error "amalgamate: file c4/yml/parser_state.hpp must have been included at this point"
+#endif /* C4_YML_PARSER_STATE_HPP_ */
+
+#endif
+
+
+#if defined(_MSC_VER)
+#   pragma warning(push)
+#   pragma warning(disable: 4251/*needs to have dll-interface to be used by clients of struct*/)
+#endif
+
+
+namespace c4 {
+namespace yml {
+
+/** @addtogroup doc_parse
+ * @{ */
+
+/** @defgroup doc_event_handlers Event Handlers
+ *
+ * @brief rapidyaml implements its parsing logic with a two-level
+ * model, where a @ref ParseEngine object reads through the YAML
+ * source, and dispatches events to an EventHandler bound to the @ref
+ * ParseEngine. Because @ref ParseEngine is templated on the event
+ * handler, the binding uses static polymorphism, without any virtual
+ * functions. The actual handler object can be changed at run time,
+ * (but of course needs to be the type of the template parameter).
+ * This is thus a very efficient architecture, and further enables the
+ * user to provide his own custom handler if he wishes to bypass the
+ * rapidyaml @ref Tree.
+ *
+ * There are two handlers implemented in this project:
+ *
+ * - @ref EventHandlerTree is the handler responsible for creating the
+ *   ryml @ref Tree
+ *
+ * - @ref EventHandlerYamlStd is the handler responsible for emitting
+ *   standardized [YAML test suite
+ *   events](https://github.com/yaml/yaml-test-suite), used (only) in
+ *   the CI of this project.
+ *
+ *
+ * ### Event model
+ *
+ * The event model used by the parse engine and event handlers follows
+ * very closely the event model in the [YAML test
+ * suite](https://github.com/yaml/yaml-test-suite).
+ *
+ * Consider for example this YAML,
+ * ```yaml
+ * {foo: bar,foo2: bar2}
+ * ```
+ * which would produce these events in the test-suite parlance:
+ * ```
+ * +STR
+ * +DOC
+ * +MAP {}
+ * =VAL :foo
+ * =VAL :bar
+ * =VAL :foo2
+ * =VAL :bar2
+ * -MAP
+ * -DOC
+ * -STR
+ * ```
+ *
+ * For reference, the @ref ParseEngine object will produce this
+ * sequence of calls to its bound EventHandler:
+ * ```cpp
+ * handler.begin_stream();
+ * handler.begin_doc();
+ * handler.begin_map_val_flow();
+ * handler.set_key_scalar_plain("foo");
+ * handler.set_val_scalar_plain("bar");
+ * handler.add_sibling();
+ * handler.set_key_scalar_plain("foo2");
+ * handler.set_val_scalar_plain("bar2");
+ * handler.end_map();
+ * handler.end_doc();
+ * handler.end_stream();
+ * ```
+ *
+ * For many other examples of all areas of YAML and how ryml's parse
+ * model corresponds to the YAML standard model, refer to the [unit
+ * tests for the parse
+ * engine](https://github.com/biojppm/rapidyaml/tree/master/test/test_parse_engine.cpp).
+ *
+ *
+ * ### Special events
+ *
+ * Most of the parsing events adopted by rapidyaml in its event model
+ * are fairly obvious, but there are two less-obvious events requiring
+ * some explanation.
+ *
+ * These events exist to make it easier to parse some special YAML
+ * cases. They are called by the parser when a just-handled
+ * value/container is actually the first key of a new map:
+ *
+ *   - `actually_val_is_first_key_of_new_map_flow()` (@ref EventHandlerTree::actually_val_is_first_key_of_new_map_flow() "see implementation in EventHandlerTree" / @ref EventHandlerYamlStd::actually_val_is_first_key_of_new_map_flow() "see implementation in EventHandlerYamlStd")
+ *   - `actually_val_is_first_key_of_new_map_block()` (@ref EventHandlerTree::actually_val_is_first_key_of_new_map_block() "see implementation in EventHandlerTree" / @ref EventHandlerYamlStd::actually_val_is_first_key_of_new_map_block() "see implementation in EventHandlerYamlStd")
+ *
+ * For example, consider an implicit map inside a seq: `[a: b, c:
+ * d]` which is parsed as `[{a: b}, {c: d}]`. The standard event
+ * sequence for this YAML would be the following:
+ * ```cpp
+ * handler.begin_seq_val_flow();
+ * handler.begin_map_val_flow();
+ * handler.set_key_scalar_plain("a");
+ * handler.set_val_scalar_plain("b");
+ * handler.end_map();
+ * handler.add_sibling();
+ * handler.begin_map_val_flow();
+ * handler.set_key_scalar_plain("c");
+ * handler.set_val_scalar_plain("d");
+ * handler.end_map();
+ * handler.end_seq();
+ * ```
+ * The problem with this event sequence is that it forces the
+ * parser to delay setting the val scalar (in this case "a" and
+ * "c") until it knows whether the scalar is a key or a val. This
+ * would require the parser to store the scalar until this
+ * time. For instance, in the example above, the parser should
+ * delay setting "a" and "c", because they are in fact keys and
+ * not vals. Until then, the parser would have to store "a" and
+ * "c" in its internal state. The downside is that this complexity
+ * cost would apply even if there is no implicit map -- every val
+ * in a seq would have to be delayed until one of the
+ * disambiguating subsequent tokens `,-]:` is found.
+ * By calling this function, the parser can avoid this complexity,
+ * by preemptively setting the scalar as a val. Then a call to
+ * this function will create the map and rearrange the scalar as
+ * key. Now the cost applies only once: when a seqimap starts. So
+ * the following (easier and cheaper) event sequence below has the
+ * same effect as the event sequence above:
+ * ```cpp
+ * handler.begin_seq_val_flow();
+ * handler.set_val_scalar_plain("notmap");
+ * handler.set_val_scalar_plain("a"); // preemptively set "a" as val!
+ * handler.actually_as_new_map_key(); // create a map, move the "a" val as the key of the first child of the new map
+ * handler.set_val_scalar_plain("b"); // now "a" is a key and "b" the val
+ * handler.end_map();
+ * handler.set_val_scalar_plain("c"); // "c" also as val!
+ * handler.actually_as_block_flow();  // likewise
+ * handler.set_val_scalar_plain("d"); // now "c" is a key and "b" the val
+ * handler.end_map();
+ * handler.end_seq();
+ * ```
+ * This also applies to container keys (although ryml's tree
+ * cannot accomodate these): the parser can preemptively set a
+ * container as a val, and call this event to turn that container
+ * into a key. For example, consider this yaml:
+ * ```yaml
+ *   [aa, bb]: [cc, dd]
+ * # ^       ^ ^
+ * # |       | |
+ * # (2)   (1) (3)     <- event sequence
+ * ```
+ * The standard event sequence for this YAML would be the
+ * following:
+ * ```cpp
+ * handler.begin_map_val_block();       // (1)
+ * handler.begin_seq_key_flow();        // (2)
+ * handler.set_val_scalar_plain("aa");
+ * handler.add_sibling();
+ * handler.set_val_scalar_plain("bb");
+ * handler.end_seq();
+ * handler.begin_seq_val_flow();        // (3)
+ * handler.set_val_scalar_plain("cc");
+ * handler.add_sibling();
+ * handler.set_val_scalar_plain("dd");
+ * handler.end_seq();
+ * handler.end_map();
+ * ```
+ * The problem with the sequence above is that, reading from
+ * left-to-right, the parser can only detect the proper calls at
+ * (1) and (2) once it reaches (1) in the YAML source. So, the
+ * parser would have to buffer the entire event sequence starting
+ * from the beginning until it reaches (1). Using this function,
+ * the parser can do instead:
+ * ```cpp
+ * handler.begin_seq_val_flow();        // (2) -- preemptively as val!
+ * handler.set_val_scalar_plain("aa");
+ * handler.add_sibling();
+ * handler.set_val_scalar_plain("bb");
+ * handler.end_seq();
+ * handler.actually_as_new_map_key();   // (1) -- adjust when finding that the prev val was actually a key.
+ * handler.begin_seq_val_flow();        // (3) -- go on as before
+ * handler.set_val_scalar_plain("cc");
+ * handler.add_sibling();
+ * handler.set_val_scalar_plain("dd");
+ * handler.end_seq();
+ * handler.end_map();
+ * ```
+ */
+
+class Tree;
+class NodeRef;
+class ConstNodeRef;
+
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
+/** Options to give to the parser to control its behavior. */
+struct RYML_EXPORT ParserOptions
+{
+private:
+
+    typedef enum : uint32_t {
+        SCALAR_FILTERING = (1u << 0),
+        LOCATIONS = (1u << 1),
+        DEFAULTS = SCALAR_FILTERING,
+    } Flags_e;
+
+    uint32_t flags = DEFAULTS;
+
+public:
+
+    ParserOptions() = default;
+
+public:
+
+    /** @name source location tracking */
+    /** @{ */
+
+    /** enable/disable source location tracking */
+    ParserOptions& locations(bool enabled) noexcept
+    {
+        if(enabled)
+            flags |= LOCATIONS;
+        else
+            flags &= ~LOCATIONS;
+        return *this;
+    }
+    /** query source location tracking status */
+    C4_ALWAYS_INLINE bool locations() const noexcept { return (flags & LOCATIONS); }
+
+    /** @} */
+
+public:
+
+    /** @name scalar filtering status (experimental; disable at your discretion) */
+    /** @{ */
+
+    /** enable/disable scalar filtering while parsing */
+    ParserOptions& scalar_filtering(bool enabled) noexcept
+    {
+        if(enabled)
+            flags |= SCALAR_FILTERING;
+        else
+            flags &= ~SCALAR_FILTERING;
+        return *this;
+    }
+    /** query scalar filtering status */
+    C4_ALWAYS_INLINE bool scalar_filtering() const noexcept { return (flags & SCALAR_FILTERING); }
+
+    /** @} */
+};
+
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
+/** This is the main driver of parsing logic: it scans the YAML or
+ * JSON source for tokens, and emits the appropriate sequence of
+ * parsing events to its event handler. The parse engine itself has no
+ * special limitations, and *can* accomodate containers as keys; it is the
+ * event handler may introduce additional constraints.
+ *
+ * There are two implemented handlers (see @ref doc_event_handlers,
+ * which has important notes about the event model):
+ *
+ * - @ref EventHandlerTree is the handler responsible for creating the
+ *   ryml @ref Tree
+ *
+ * - @ref EventHandlerYamlStd is the handler responsible for emitting
+ *   standardized [YAML test suite
+ *   events](https://github.com/yaml/yaml-test-suite), used (only) in
+ *   the CI of this project. This is not part of the library and is
+ *   not installed.
+ */
+template<class EventHandler>
+class ParseEngine
+{
+public:
+
+    using handler_type = EventHandler;
+
+public:
+
+    /** @name construction and assignment */
+    /** @{ */
+
+    ParseEngine(EventHandler *evt_handler, ParserOptions opts={});
+    ~ParseEngine();
+
+    ParseEngine(ParseEngine &&);
+    ParseEngine(ParseEngine const&);
+    ParseEngine& operator=(ParseEngine &&);
+    ParseEngine& operator=(ParseEngine const&);
+
+    /** @} */
+
+public:
+
+    /** @name modifiers */
+    /** @{ */
+
+    /** Reserve a certain capacity for the parsing stack.
+     * This should be larger than the expected depth of the parsed
+     * YAML tree.
+     *
+     * The parsing stack is the only (potential) heap memory used
+     * directly by the parser.
+     *
+     * If the requested capacity is below the default
+     * stack size of 16, the memory is used directly in the parser
+     * object; otherwise it will be allocated from the heap.
+     *
+     * @note this reserves memory only for the parser itself; all the
+     * allocations for the parsed tree will go through the tree's
+     * allocator (when different).
+     *
+     * @note for maximum efficiency, the tree and the arena can (and
+     * should) also be reserved. */
+    void reserve_stack(id_type capacity)
+    {
+        m_evt_handler->m_stack.reserve(capacity);
+    }
+
+    /** Reserve a certain capacity for the array used to track node
+     * locations in the source buffer. */
+    void reserve_locations(size_t num_source_lines)
+    {
+        _resize_locations(num_source_lines);
+    }
+
+    RYML_DEPRECATED("filter arena no longer needed")
+    void reserve_filter_arena(size_t) {}
+
+    /** @} */
+
+public:
+
+    /** @name getters */
+    /** @{ */
+
+    /** Get the options used to build this parser object. */
+    ParserOptions const& options() const { return m_options; }
+
+    /** Get the current callbacks in the parser. */
+    Callbacks const& callbacks() const { RYML_ASSERT(m_evt_handler); return m_evt_handler->m_stack.m_callbacks; }
+
+    /** Get the name of the latest file parsed by this object. */
+    csubstr filename() const { return m_file; }
+
+    /** Get the latest YAML buffer parsed by this object. */
+    csubstr source() const { return m_buf; }
+
+    id_type stack_capacity() const { RYML_ASSERT(m_evt_handler); return m_evt_handler->m_stack.capacity(); }
+    size_t locations_capacity() const { return m_newline_offsets_capacity; }
+
+    RYML_DEPRECATED("filter arena no longer needed")
+    size_t filter_arena_capacity() const { return 0u; }
+
+    /** @} */
+
+public:
+
+    /** @name parse methods */
+    /** @{ */
+
+    /** parse YAML in place, emitting events to the current handler */
+    void parse_in_place_ev(csubstr filename, substr src);
+
+    /** parse JSON in place, emitting events to the current handler */
+    void parse_json_in_place_ev(csubstr filename, substr src);
+
+    /** @} */
+
+public:
+
+    /** @name deprecated parse methods
+     * @{ */
+
+    /** @cond dev */
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree, void>::type parse_in_place(csubstr filename, substr yaml, Tree *t, size_t node_id);
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree, void>::type parse_in_place(                  substr yaml, Tree *t, size_t node_id);
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree, void>::type parse_in_place(csubstr filename, substr yaml, Tree *t                );
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree, void>::type parse_in_place(                  substr yaml, Tree *t                );
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree, void>::type parse_in_place(csubstr filename, substr yaml, NodeRef node           );
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree, void>::type parse_in_place(                  substr yaml, NodeRef node           );
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree, Tree>::type parse_in_place(csubstr filename, substr yaml                         );
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree, Tree>::type parse_in_place(                  substr yaml                         );
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree, void>::type parse_in_arena(csubstr filename, csubstr yaml, Tree *t, size_t node_id);
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree, void>::type parse_in_arena(                  csubstr yaml, Tree *t, size_t node_id);
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree, void>::type parse_in_arena(csubstr filename, csubstr yaml, Tree *t                );
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree, void>::type parse_in_arena(                  csubstr yaml, Tree *t                );
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree, void>::type parse_in_arena(csubstr filename, csubstr yaml, NodeRef node           );
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree, void>::type parse_in_arena(                  csubstr yaml, NodeRef node           );
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree, Tree>::type parse_in_arena(csubstr filename, csubstr yaml                         );
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding function in parse.hpp.") typename std::enable_if<U::is_wtree, Tree>::type parse_in_arena(                  csubstr yaml                         );
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if<U::is_wtree, void>::type parse_in_arena(csubstr filename, substr yaml, Tree *t, size_t node_id);
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if<U::is_wtree, void>::type parse_in_arena(                  substr yaml, Tree *t, size_t node_id);
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if<U::is_wtree, void>::type parse_in_arena(csubstr filename, substr yaml, Tree *t                );
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if<U::is_wtree, void>::type parse_in_arena(                  substr yaml, Tree *t                );
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if<U::is_wtree, void>::type parse_in_arena(csubstr filename, substr yaml, NodeRef node           );
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if<U::is_wtree, void>::type parse_in_arena(                  substr yaml, NodeRef node           );
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if<U::is_wtree, Tree>::type parse_in_arena(csubstr filename, substr yaml                         );
+    template<class U=EventHandler> RYML_DEPRECATED("removed, deliberately undefined. use the freestanding csubstr version in parse.hpp.") typename std::enable_if<U::is_wtree, Tree>::type parse_in_arena(                  substr yaml                         );
+    /** @endcond */
+
+    /** @} */
+
+public:
+
+    /** @name locations */
+    /** @{ */
+
+    /** Get the location of a node of the last tree to be parsed by this parser. */
+    Location location(Tree const& tree, id_type node_id) const;
+    /** Get the location of a node of the last tree to be parsed by this parser. */
+    Location location(ConstNodeRef node) const;
+    /** Get the string starting at a particular location, to the end
+     * of the parsed source buffer. */
+    csubstr location_contents(Location const& loc) const;
+    /** Given a pointer to a buffer position, get the location.
+     * @param[in] val must be pointing to somewhere in the source
+     * buffer that was last parsed by this object. */
+    Location val_location(const char *val) const;
+
+    /** @} */
+
+public:
+
+    /** @name scalar filtering */
+    /** @{*/
+
+    /** filter a plain scalar */
+    FilterResult filter_scalar_plain(csubstr scalar, substr dst, size_t indentation);
+    /** filter a plain scalar in place */
+    FilterResult filter_scalar_plain_in_place(substr scalar, size_t cap, size_t indentation);
+
+    /** filter a single-quoted scalar */
+    FilterResult filter_scalar_squoted(csubstr scalar, substr dst);
+    /** filter a single-quoted scalar in place */
+    FilterResult filter_scalar_squoted_in_place(substr scalar, size_t cap);
+
+    /** filter a double-quoted scalar */
+    FilterResult filter_scalar_dquoted(csubstr scalar, substr dst);
+    /** filter a double-quoted scalar in place */
+    FilterResultExtending filter_scalar_dquoted_in_place(substr scalar, size_t cap);
+
+    /** filter a block-literal scalar */
+    FilterResult filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp);
+    /** filter a block-literal scalar in place */
+    FilterResult filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp);
+
+    /** filter a block-folded scalar */
+    FilterResult filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp);
+    /** filter a block-folded scalar in place */
+    FilterResult filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp);
+
+    /** @} */
+
+private:
+
+    struct ScannedScalar
+    {
+        substr scalar;
+        bool needs_filter;
+    };
+
+    struct ScannedBlock
+    {
+        substr scalar;
+        size_t indentation;
+        BlockChomp_e chomp;
+    };
+
+    bool    _is_doc_begin(csubstr s);
+    bool    _is_doc_end(csubstr s);
+
+    bool    _scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation);
+    bool    _scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc);
+    bool    _scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc);
+    bool    _scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc);
+    bool    _scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc);
+    bool    _scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc);
+    bool    _scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc);
+    bool    _scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc);
+    bool    _is_valid_start_scalar_plain_flow(csubstr s);
+
+    ScannedScalar _scan_scalar_squot();
+    ScannedScalar _scan_scalar_dquot();
+
+    void    _scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref);
+
+    csubstr _scan_anchor();
+    csubstr _scan_ref_seq();
+    csubstr _scan_ref_map();
+    csubstr _scan_tag();
+
+public: // exposed for testing
+
+    /** @cond dev */
+    csubstr _filter_scalar_plain(substr s, size_t indentation);
+    csubstr _filter_scalar_squot(substr s);
+    csubstr _filter_scalar_dquot(substr s);
+    csubstr _filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp);
+    csubstr _filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp);
+
+    csubstr _maybe_filter_key_scalar_plain(ScannedScalar const& sc, size_t indendation);
+    csubstr _maybe_filter_val_scalar_plain(ScannedScalar const& sc, size_t indendation);
+    csubstr _maybe_filter_key_scalar_squot(ScannedScalar const& sc);
+    csubstr _maybe_filter_val_scalar_squot(ScannedScalar const& sc);
+    csubstr _maybe_filter_key_scalar_dquot(ScannedScalar const& sc);
+    csubstr _maybe_filter_val_scalar_dquot(ScannedScalar const& sc);
+    csubstr _maybe_filter_key_scalar_literal(ScannedBlock const& sb);
+    csubstr _maybe_filter_val_scalar_literal(ScannedBlock const& sb);
+    csubstr _maybe_filter_key_scalar_folded(ScannedBlock const& sb);
+    csubstr _maybe_filter_val_scalar_folded(ScannedBlock const& sb);
+    /** @endcond */
+
+private:
+
+    void  _handle_map_block();
+    void  _handle_seq_block();
+    void  _handle_map_flow();
+    void  _handle_seq_flow();
+    void  _handle_seq_imap();
+    void  _handle_map_json();
+    void  _handle_seq_json();
+
+    void  _handle_unk();
+    void  _handle_unk_json();
+    void  _handle_usty();
+
+    void  _handle_flow_skip_whitespace();
+
+    void  _end_map_blck();
+    void  _end_seq_blck();
+    void  _end2_map();
+    void  _end2_seq();
+
+    void  _begin2_doc();
+    void  _begin2_doc_expl();
+    void  _end2_doc();
+    void  _end2_doc_expl();
+
+    void  _maybe_begin_doc();
+    void  _maybe_end_doc();
+
+    void  _start_doc_suddenly();
+    void  _end_doc_suddenly();
+    void  _end_doc_suddenly__pop();
+    void  _end_stream();
+
+    void  _set_indentation(size_t indentation);
+    void  _save_indentation();
+    void  _handle_indentation_pop_from_block_seq();
+    void  _handle_indentation_pop_from_block_map();
+    void  _handle_indentation_pop(ParserState const* dst);
+
+    void _maybe_skip_comment();
+    void _skip_comment();
+    void _maybe_skip_whitespace_tokens();
+    void _maybe_skipchars(char c);
+    #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
+    void _maybe_skipchars_up_to(char c, size_t max_to_skip);
+    #endif
+    template<size_t N>
+    void _skipchars(const char (&chars)[N]);
+    bool _maybe_scan_following_colon() noexcept;
+    bool _maybe_scan_following_comma() noexcept;
+
+public:
+
+    /** @cond dev */
+    template<class FilterProcessor> auto _filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result());
+    template<class FilterProcessor> auto _filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result());
+    template<class FilterProcessor> auto _filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result());
+    template<class FilterProcessor> auto _filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result());
+    template<class FilterProcessor> auto _filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result());
+    /** @endcond */
+
+public:
+
+    /** @cond dev */
+    template<class FilterProcessor> void   _filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation);
+    template<class FilterProcessor> void   _filter_nl_squoted(FilterProcessor &C4_RESTRICT proc);
+    template<class FilterProcessor> void   _filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc);
+
+    template<class FilterProcessor> bool   _filter_ws_handle_to_first_non_space(FilterProcessor &C4_RESTRICT proc);
+    template<class FilterProcessor> void   _filter_ws_copy_trailing(FilterProcessor &C4_RESTRICT proc);
+    template<class FilterProcessor> void   _filter_ws_skip_trailing(FilterProcessor &C4_RESTRICT proc);
+
+    template<class FilterProcessor> void   _filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc);
+
+    template<class FilterProcessor> void   _filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation);
+    template<class FilterProcessor> size_t _handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp);
+    template<class FilterProcessor> size_t _extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len);
+    template<class FilterProcessor> void   _filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation);
+    template<class FilterProcessor> void   _filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len);
+    template<class FilterProcessor> size_t _filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl);
+    template<class FilterProcessor> void   _filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len);
+    template<class FilterProcessor> void   _filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept;
+
+    /** @endcond */
+
+private:
+
+    void _line_progressed(size_t ahead);
+    void _line_ended();
+    void _line_ended_undo();
+
+    bool  _finished_file() const;
+    bool  _finished_line() const;
+
+    void   _scan_line();
+    substr _peek_next_line(size_t pos=npos) const;
+
+    inline bool _at_line_begin() const
+    {
+        return m_evt_handler->m_curr->line_contents.rem.begin() == m_evt_handler->m_curr->line_contents.full.begin();
+    }
+
+    void _relocate_arena(csubstr prev_arena, substr next_arena);
+    static void _s_relocate_arena(void*, csubstr prev_arena, substr next_arena);
+
+private:
+
+    C4_ALWAYS_INLINE bool has_all(ParserFlag_t f) const noexcept { return (m_evt_handler->m_curr->flags & f) == f; }
+    C4_ALWAYS_INLINE bool has_any(ParserFlag_t f) const noexcept { return (m_evt_handler->m_curr->flags & f) != 0; }
+    C4_ALWAYS_INLINE bool has_none(ParserFlag_t f) const noexcept { return (m_evt_handler->m_curr->flags & f) == 0; }
+    static C4_ALWAYS_INLINE bool has_all(ParserFlag_t f, ParserState const* C4_RESTRICT s) noexcept { return (s->flags & f) == f; }
+    static C4_ALWAYS_INLINE bool has_any(ParserFlag_t f, ParserState const* C4_RESTRICT s) noexcept { return (s->flags & f) != 0; }
+    static C4_ALWAYS_INLINE bool has_none(ParserFlag_t f, ParserState const* C4_RESTRICT s) noexcept { return (s->flags & f) == 0; }
+
+    #ifndef RYML_DBG
+    C4_ALWAYS_INLINE static void add_flags(ParserFlag_t on, ParserState *C4_RESTRICT s) noexcept { s->flags |= on; }
+    C4_ALWAYS_INLINE static void addrem_flags(ParserFlag_t on, ParserFlag_t off, ParserState *C4_RESTRICT s) noexcept { s->flags &= ~off; s->flags |= on; }
+    C4_ALWAYS_INLINE static void rem_flags(ParserFlag_t off, ParserState *C4_RESTRICT s) noexcept { s->flags &= ~off; }
+    C4_ALWAYS_INLINE void add_flags(ParserFlag_t on) noexcept { m_evt_handler->m_curr->flags |= on; }
+    C4_ALWAYS_INLINE void addrem_flags(ParserFlag_t on, ParserFlag_t off) noexcept { m_evt_handler->m_curr->flags &= ~off; m_evt_handler->m_curr->flags |= on; }
+    C4_ALWAYS_INLINE void rem_flags(ParserFlag_t off) noexcept { m_evt_handler->m_curr->flags &= ~off; }
+    #else
+    static void add_flags(ParserFlag_t on, ParserState *C4_RESTRICT s);
+    static void addrem_flags(ParserFlag_t on, ParserFlag_t off, ParserState *C4_RESTRICT s);
+    static void rem_flags(ParserFlag_t off, ParserState *C4_RESTRICT s);
+    C4_ALWAYS_INLINE void add_flags(ParserFlag_t on) noexcept { add_flags(on, m_evt_handler->m_curr); }
+    C4_ALWAYS_INLINE void addrem_flags(ParserFlag_t on, ParserFlag_t off) noexcept { addrem_flags(on, off, m_evt_handler->m_curr); }
+    C4_ALWAYS_INLINE void rem_flags(ParserFlag_t off) noexcept { rem_flags(off, m_evt_handler->m_curr); }
+    #endif
+
+private:
+
+    void _prepare_locations();
+    void _resize_locations(size_t sz);
+    bool _locations_dirty() const;
+
+    bool _location_from_cont(Tree const& tree, id_type node, Location *C4_RESTRICT loc) const;
+    bool _location_from_node(Tree const& tree, id_type node, Location *C4_RESTRICT loc, id_type level) const;
+
+private:
+
+    void _reset();
+    void _free();
+    void _clr();
+
+    #ifdef RYML_DBG
+    template<class ...Args> void _dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const;
+    #endif
+    template<class ...Args> void _err(csubstr fmt, Args const& C4_RESTRICT ...args) const;
+    template<class ...Args> void _errloc(csubstr fmt, Location const& loc, Args const& C4_RESTRICT ...args) const;
+
+    template<class DumpFn>  void _fmt_msg(DumpFn &&dumpfn) const;
+
+private:
+
+    /** store pending tag or anchor/ref annotations */
+    struct Annotation
+    {
+        struct Entry
+        {
+            csubstr str;
+            size_t indentation;
+            size_t line;
+        };
+        Entry annotations[2];
+        size_t num_entries;
+    };
+
+    void _add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line);
+    void _clear_annotations(Annotation *C4_RESTRICT dst);
+    bool _has_pending_annotations() const { return m_pending_tags.num_entries || m_pending_anchors.num_entries; }
+    #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
+    bool _handle_indentation_from_annotations();
+    #endif
+    bool _annotations_require_key_container() const;
+    void _handle_annotations_before_blck_key_scalar();
+    void _handle_annotations_before_blck_val_scalar();
+    void _handle_annotations_before_start_mapblck(size_t current_line);
+    void _handle_annotations_before_start_mapblck_as_key();
+    void _handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line);
+    size_t _select_indentation_from_annotations(size_t val_indentation, size_t val_line);
+    void _handle_directive(csubstr rem);
+
+    void _check_tag(csubstr tag);
+
+private:
+
+    ParserOptions m_options;
+
+    csubstr m_file;
+    substr  m_buf;
+
+public:
+
+    /** @cond dev */
+    EventHandler *C4_RESTRICT m_evt_handler;
+    /** @endcond */
+
+private:
+
+    Annotation m_pending_anchors;
+    Annotation m_pending_tags;
+
+    bool m_was_inside_qmrk;
+    bool m_doc_empty = true;
+
+private:
+
+    size_t *m_newline_offsets;
+    size_t  m_newline_offsets_size;
+    size_t  m_newline_offsets_capacity;
+    csubstr m_newline_offsets_buf;
+
+};
+
+/** @cond dev */
+RYML_EXPORT C4_NO_INLINE size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept;
+/** @endcond */
+
+
+/** Quickly inspect the source to estimate the number of nodes the
+ * resulting tree is likely have. If a tree is empty before
+ * parsing, considerable time will be spent growing it, so calling
+ * this to reserve the tree size prior to parsing is likely to
+ * result in a time gain. We encourage using this method before
+ * parsing, but as always measure its impact in performance to
+ * obtain a good trade-off.
+ *
+ * @note since this method is meant for optimizing performance, it
+ * is approximate. The result may be actually smaller than the
+ * resulting number of nodes, notably if the YAML uses implicit
+ * maps as flow seq members as in `[these: are, individual:
+ * maps]`. */
+RYML_EXPORT id_type estimate_tree_capacity(csubstr src);
+
+/** @} */
+
+} // namespace yml
+} // namespace c4
+
+#if defined(_MSC_VER)
+#   pragma warning(pop)
+#endif
+
+#endif /* _C4_YML_PARSE_ENGINE_HPP_ */
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/parse_engine.hpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/preprocess.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/preprocess.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifndef _C4_YML_PREPROCESS_HPP_
+#define _C4_YML_PREPROCESS_HPP_
+
+/** @file preprocess.hpp Functions for preprocessing YAML prior to parsing. */
+
+#ifndef _C4_YML_COMMON_HPP_
+//included above:
+//#include "./common.hpp"
+#endif
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/substr.hpp
+//#include <c4/substr.hpp>
+#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_)
+#error "amalgamate: file c4/substr.hpp must have been included at this point"
+#endif /* C4_SUBSTR_HPP_ */
+
+
+
+namespace c4 {
+namespace yml {
+
+/** @addtogroup doc_preprocessors
+ * @{
+ */
+
+/** @cond dev */
+namespace detail {
+using Preprocessor = size_t(csubstr, substr);
+template<Preprocessor PP, class CharContainer>
+substr preprocess_into_container(csubstr input, CharContainer *out)
+{
+    // try to write once. the preprocessor will stop writing at the end of
+    // the container, but will process all the input to determine the
+    // required container size.
+    size_t sz = PP(input, to_substr(*out));
+    // if the container size is not enough, resize, and run again in the
+    // resized container
+    if(sz > out->size())
+    {
+        out->resize(sz);
+        sz = PP(input, to_substr(*out));
+    }
+    return to_substr(*out).first(sz);
+}
+} // namespace detail
+/** @endcond */
+
+
+//-----------------------------------------------------------------------------
+
+/** @defgroup doc_preprocess_rxmap preprocess_rxmap
+ *
+ * @brief Convert flow-type relaxed maps (with implicit bools) into strict YAML
+ * flow map:
+ *
+ * @code{.yaml}
+ * {a, b, c, d: [e, f], g: {a, b}}
+ * # is converted into this:
+ * {a: 1, b: 1, c: 1, d: [e, f], g: {a, b}}
+ * @endcode
+
+ * @note this is NOT recursive - conversion happens only in the top-level map
+ * @param rxmap A relaxed map
+ * @param buf output buffer
+ * @param out output container
+ *
+ * @{
+ */
+
+/** Write into a given output buffer. This function is safe to call with
+ * empty or small buffers; it won't write beyond the end of the buffer.
+ *
+ * @return the number of characters required for output
+ */
+RYML_EXPORT size_t preprocess_rxmap(csubstr rxmap, substr buf);
+
+
+/** Write into an existing container. It is resized to contained the output.
+ * @return a substr of the container
+ * @overload preprocess_rxmap */
+template<class CharContainer>
+substr preprocess_rxmap(csubstr rxmap, CharContainer *out)
+{
+    return detail::preprocess_into_container<preprocess_rxmap>(rxmap, out);
+}
+
+
+/** Create a container with the result.
+ * @overload preprocess_rxmap */
+template<class CharContainer>
+CharContainer preprocess_rxmap(csubstr rxmap)
+{
+    CharContainer out;
+    preprocess_rxmap(rxmap, &out);
+    return out;
+}
+
+/** @} */ // preprocess_rxmap
+/** @} */ // group
+
+} // namespace yml
+} // namespace c4
+
+#endif /* _C4_YML_PREPROCESS_HPP_ */
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/preprocess.hpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/reference_resolver.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/reference_resolver.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifndef _C4_YML_REFERENCE_RESOLVER_HPP_
+#define _C4_YML_REFERENCE_RESOLVER_HPP_
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp
+//#include "c4/yml/tree.hpp"
+#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_)
+#error "amalgamate: file c4/yml/tree.hpp must have been included at this point"
+#endif /* C4_YML_TREE_HPP_ */
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/stack.hpp
+//#include "c4/yml/detail/stack.hpp"
+#if !defined(C4_YML_DETAIL_STACK_HPP_) && !defined(_C4_YML_DETAIL_STACK_HPP_)
+#error "amalgamate: file c4/yml/detail/stack.hpp must have been included at this point"
+#endif /* C4_YML_DETAIL_STACK_HPP_ */
+
+
+
+namespace c4 {
+namespace yml {
+
+/** @addtogroup doc_ref_utils
+ * @{
+ */
+
+/** Reusable object to resolve references/aliases in the tree. */
+struct RYML_EXPORT ReferenceResolver
+{
+    ReferenceResolver() = default;
+
+    /** Resolve references: for each reference, look for a matching
+     * anchor, and copy its contents to the ref node.
+     *
+     * This method first does a full traversal of the tree to gather
+     * all anchors and references in a separate collection, then it
+     * goes through that collection to locate the names, which it does
+     * by obeying the YAML standard diktat that "an alias node refers
+     * to the most recent node in the serialization having the
+     * specified anchor"
+     *
+     * So, depending on the number of anchor/alias nodes, this is a
+     * potentially expensive operation, with a best-case linear
+     * complexity (from the initial traversal).
+     *
+     * @todo verify sanity against anchor-ref attacks (https://en.wikipedia.org/wiki/Billion_laughs_attack )
+     */
+    void resolve(Tree *t_);
+
+public:
+
+    /** @cond dev */
+
+    struct RefData
+    {
+        NodeType type;
+        id_type node;
+        id_type prev_anchor;
+        id_type target;
+        id_type parent_ref;
+        id_type parent_ref_sibling;
+    };
+
+    void reset_(Tree *t_);
+    void gather_anchors_and_refs_();
+    void gather_anchors_and_refs__(id_type n);
+    id_type count_anchors_and_refs_(id_type n);
+
+    id_type lookup_(RefData *C4_RESTRICT ra);
+
+public:
+
+    Tree *C4_RESTRICT m_tree;
+    /** We're using this stack purely as an array. */
+    detail::stack<RefData> m_refs;
+
+    /** @endcond */
+};
+
+/** @} */
+
+} // namespace ryml
+} // namespace c4
+
+
+#endif // _C4_YML_REFERENCE_RESOLVER_HPP_
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/reference_resolver.hpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/parse.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifndef _C4_YML_PARSE_HPP_
+#define _C4_YML_PARSE_HPP_
+
+#ifndef _C4_YML_COMMON_HPP_
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp
+//#include "c4/yml/common.hpp"
+#if !defined(C4_YML_COMMON_HPP_) && !defined(_C4_YML_COMMON_HPP_)
+#error "amalgamate: file c4/yml/common.hpp must have been included at this point"
+#endif /* C4_YML_COMMON_HPP_ */
+
+#endif
+
+namespace c4 {
+namespace yml {
+
+class Tree;
+class NodeRef;
+template<class EventHandler> class ParseEngine;
+struct EventHandlerTree;
+RYML_EXPORT id_type estimate_tree_capacity(csubstr src);
+
+
+/** @addtogroup doc_parse
+ * @{ */
+
+/** This is the main ryml parser, where the parser events are handled
+ * to create a ryml tree.
+ *
+ * @warning Because the ryml @ref Tree does not accept containers as
+ * keys, this class cannot successfully parse YAML source with this
+ * feature. See @ref ParseEngine for more details.
+ *
+ * @see ParserOptions
+ * @see ParseEngine
+ * @see EventHandlerTree
+ * */
+using Parser = RYML_EXPORT ParseEngine<EventHandlerTree>;
+
+
+//-----------------------------------------------------------------------------
+
+/** @defgroup doc_parse_in_place__with_existing_parser Parse in place with existing parser
+ *
+ * @brief parse a mutable YAML source buffer. Scalars requiring
+ * filtering are mutated in place (except in the rare cases where the
+ * filtered scalar is longer than the original scalar, or where
+ * filtering was disabled before the call). These overloads accept an
+ * existing parser object, and provide the opportunity to use special
+ * parser options.
+ *
+ * @see ParserOptions
+ *
+ * @{
+ */
+
+// this is vertically aligned to highlight the parameter differences.
+
+RYML_EXPORT void parse_in_place(Parser *parser, csubstr filename, substr yaml, Tree *t, id_type node_id); /**< (1) parse YAML into an existing tree node.
+                                                                                                          *
+                                                                                                          * The filename will be used in any error messages
+                                                                                                          * arising during the parse. The callbacks in the
+                                                                                                          * tree are kept, and used to allocate
+                                                                                                          * the tree members, if any allocation is required. */
+RYML_EXPORT void parse_in_place(Parser *parser,                   substr yaml, Tree *t, id_type node_id); /**< (2) like (1) but no filename will be reported */
+RYML_EXPORT void parse_in_place(Parser *parser, csubstr filename, substr yaml, Tree *t                 ); /**< (3) parse YAML into an existing tree, into its root node.
+                                                                                                          *
+                                                                                                          * The filename will be used in any error messages
+                                                                                                          * arising during the parse. The callbacks in the
+                                                                                                          * tree are kept, and used to allocate
+                                                                                                          * the tree members, if any allocation is required. */
+RYML_EXPORT void parse_in_place(Parser *parser,                   substr yaml, Tree *t                 ); /**< (4) like (3) but no filename will be reported */
+RYML_EXPORT void parse_in_place(Parser *parser, csubstr filename, substr yaml, NodeRef node            ); /**< (5) like (1) but the node is given as a NodeRef */
+RYML_EXPORT void parse_in_place(Parser *parser,                   substr yaml, NodeRef node            ); /**< (6) like (5) but no filename will be reported */
+RYML_EXPORT Tree parse_in_place(Parser *parser, csubstr filename, substr yaml                          ); /**< (7) create a new tree, and parse YAML into its root node.
+                                                                                                          *
+                                                                                                          * The filename will be used in any error messages
+                                                                                                          * arising during the parse. The tree is created with
+                                                                                                          * the callbacks currently in the parser.
+                                                                                                          */
+RYML_EXPORT Tree parse_in_place(Parser *parser,                   substr yaml                          ); /**< (8) like (7) but no filename will be reported */
+
+
+// this is vertically aligned to highlight the parameter differences.
+RYML_EXPORT void parse_json_in_place(Parser *parser, csubstr filename, substr json, Tree *t, id_type node_id); ///< (1) parse JSON into an existing tree node. The filename will be used in any error messages arising during the parse.
+RYML_EXPORT void parse_json_in_place(Parser *parser,                   substr json, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported
+RYML_EXPORT void parse_json_in_place(Parser *parser, csubstr filename, substr json, Tree *t                 ); ///< (3) parse JSON into an existing tree, into its root node.
+RYML_EXPORT void parse_json_in_place(Parser *parser,                   substr json, Tree *t                 ); ///< (4) like (3) but no filename will be reported
+RYML_EXPORT void parse_json_in_place(Parser *parser, csubstr filename, substr json, NodeRef node            ); ///< (5) like (1) but the node is given as a NodeRef
+RYML_EXPORT void parse_json_in_place(Parser *parser,                   substr json, NodeRef node            ); ///< (6) like (5) but no filename will be reported
+RYML_EXPORT Tree parse_json_in_place(Parser *parser, csubstr filename, substr json                          ); ///< (7) create a new tree, and parse JSON into its root node.
+RYML_EXPORT Tree parse_json_in_place(Parser *parser,                   substr json                          ); ///< (8) like (7) but no filename will be reported
+
+/** @} */
+
+
+//-----------------------------------------------------------------------------
+
+/** @defgroup doc_parse_in_place___with_temporary_parser Parse in place with temporary parser
+ *
+ * @brief parse a mutable YAML source buffer. Scalars requiring
+ * filtering are mutated in place (except in the rare cases where the
+ * filtered scalar is longer than the original scalar).
+ *
+ * @note These freestanding functions use a temporary parser object,
+ * and are convenience functions to enable the user to easily parse
+ * YAML without the need to explicitly instantiate a parser and event
+ * handler. Note that some properties (notably node locations in the
+ * original source code) are only available through the parser
+ * class. If you need access to any of these properties, use
+ * the appropriate overload from @ref doc_parse_in_place__with_existing_parser
+ *
+ * @{
+ */
+
+// this is vertically aligned to highlight the parameter differences.
+RYML_EXPORT void parse_in_place(csubstr filename, substr yaml, Tree *t, id_type node_id); ///< (1) parse YAML into an existing tree node. The filename will be used in any error messages arising during the parse.
+RYML_EXPORT void parse_in_place(                  substr yaml, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported
+RYML_EXPORT void parse_in_place(csubstr filename, substr yaml, Tree *t                 ); ///< (3) parse YAML into an existing tree, into its root node.
+RYML_EXPORT void parse_in_place(                  substr yaml, Tree *t                 ); ///< (4) like (3) but no filename will be reported
+RYML_EXPORT void parse_in_place(csubstr filename, substr yaml, NodeRef node            ); ///< (5) like (1) but the node is given as a NodeRef
+RYML_EXPORT void parse_in_place(                  substr yaml, NodeRef node            ); ///< (6) like (5) but no filename will be reported
+RYML_EXPORT Tree parse_in_place(csubstr filename, substr yaml                          ); ///< (7) create a new tree, and parse YAML into its root node.
+RYML_EXPORT Tree parse_in_place(                  substr yaml                          ); ///< (8) like (7) but no filename will be reported
+
+// this is vertically aligned to highlight the parameter differences.
+RYML_EXPORT void parse_json_in_place(csubstr filename, substr json, Tree *t, id_type node_id); ///< (1) parse JSON into an existing tree node. The filename will be used in any error messages arising during the parse.
+RYML_EXPORT void parse_json_in_place(                  substr json, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported
+RYML_EXPORT void parse_json_in_place(csubstr filename, substr json, Tree *t                 ); ///< (3) parse JSON into an existing tree, into its root node.
+RYML_EXPORT void parse_json_in_place(                  substr json, Tree *t                 ); ///< (4) like (3) but no filename will be reported
+RYML_EXPORT void parse_json_in_place(csubstr filename, substr json, NodeRef node            ); ///< (5) like (1) but the node is given as a NodeRef
+RYML_EXPORT void parse_json_in_place(                  substr json, NodeRef node            ); ///< (6) like (5) but no filename will be reported
+RYML_EXPORT Tree parse_json_in_place(csubstr filename, substr json                          ); ///< (7) create a new tree, and parse JSON into its root node.
+RYML_EXPORT Tree parse_json_in_place(                  substr json                          ); ///< (8) like (7) but no filename will be reported
+
+/** @} */
+
+
+//-----------------------------------------------------------------------------
+
+
+/** @defgroup doc_parse_in_arena__with_existing_parser Parse in arena with existing parser
+ *
+ * @brief parse a read-only (immutable) YAML source buffer. This is
+ * achieved by first copying the contents of the buffer to the tree's
+ * arena, and then calling @ref parse_in_arena() . All the resulting
+ * scalars will be filtered in the arena. These overloads accept an
+ * existing parser object, and provide the opportunity to use special
+ * parser options.
+ *
+ * @see ParserOptions
+ *
+ *
+ * @note These freestanding functions use a temporary parser object,
+ * and are convenience functions to easily parse YAML without the need
+ * to instantiate a separate parser. Note that some properties
+ * (notably node locations in the original source code) are only
+ * available through the parser class. If you need access to any of
+ * these properties, use the appropriate overload from @ref
+ * doc_parse_in_arena__with_existing_parser
+ *
+ * @warning overloads receiving a substr YAML buffer are intentionally
+ * left undefined, such that calling parse_in_arena() with a substr
+ * will cause a linker error. This is to prevent an accidental copy of
+ * the source buffer to the tree's arena, because substr (which is
+ * mutable) is implicitly convertible to csubstr (which is
+ * immutable). If you really intend to parse a mutable buffer in the
+ * tree's arena, convert it first to immutable by assigning the substr
+ * to a csubstr prior to calling parse_in_arena(). This is not needed
+ * for parse_in_place() because csubstr is not implicitly convertible
+ * to substr. To be clear:
+ * ```c++
+ * substr mutable_buffer = ...;
+ * parser.parse_in_arena(mutable_buffer); // linker error
+ *
+ * csubstr immutable_buffer = ...;
+ * parser.parse_in_arena(immutable_buffer); // ok
+ * ```
+ *
+ * @{
+ */
+
+#define RYML_DONT_PARSE_SUBSTR_IN_ARENA ""                      \
+    "Do not pass a (mutable) substr to parse_in_arena(); "      \
+    "if you have a substr, it should be parsed in place. "      \
+    "Consider using parse_in_place() instead, or convert "      \
+    "the buffer to csubstr prior to calling. This function "    \
+    " is deliberately left undefined, so that calling it "      \
+    "will cause a linker error."
+
+// this is vertically aligned to highlight the parameter differences.
+RYML_EXPORT void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, Tree *t, id_type node_id); ///< (1) parse YAML into an existing tree node. The filename will be used in any error messages arising during the parse.
+RYML_EXPORT void parse_in_arena(Parser *parser,                   csubstr yaml, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported
+RYML_EXPORT void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, Tree *t                 ); ///< (3) parse YAML into an existing tree, into its root node.
+RYML_EXPORT void parse_in_arena(Parser *parser,                   csubstr yaml, Tree *t                 ); ///< (4) like (3) but no filename will be reported
+RYML_EXPORT void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, NodeRef node            ); ///< (5) like (1) but the node is given as a NodeRef
+RYML_EXPORT void parse_in_arena(Parser *parser,                   csubstr yaml, NodeRef node            ); ///< (6) like (5) but no filename will be reported
+RYML_EXPORT Tree parse_in_arena(Parser *parser, csubstr filename, csubstr yaml                          ); ///< (7) create a new tree, and parse YAML into its root node.
+RYML_EXPORT Tree parse_in_arena(Parser *parser,                   csubstr yaml                          ); ///< (8) like (7) but no filename will be reported
+
+// this is vertically aligned to highlight the parameter differences.
+RYML_EXPORT void parse_json_in_arena(Parser *parser, csubstr filename, csubstr json, Tree *t, id_type node_id); ///< (1) parse JSON into an existing tree node. The filename will be used in any error messages arising during the parse.
+RYML_EXPORT void parse_json_in_arena(Parser *parser,                   csubstr json, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported
+RYML_EXPORT void parse_json_in_arena(Parser *parser, csubstr filename, csubstr json, Tree *t                 ); ///< (3) parse JSON into an existing tree, into its root node.
+RYML_EXPORT void parse_json_in_arena(Parser *parser,                   csubstr json, Tree *t                 ); ///< (4) like (3) but no filename will be reported
+RYML_EXPORT void parse_json_in_arena(Parser *parser, csubstr filename, csubstr json, NodeRef node            ); ///< (5) like (1) but the node is given as a NodeRef
+RYML_EXPORT void parse_json_in_arena(Parser *parser,                   csubstr json, NodeRef node            ); ///< (6) like (5) but no filename will be reported
+RYML_EXPORT Tree parse_json_in_arena(Parser *parser, csubstr filename, csubstr json                          ); ///< (7) create a new tree, and parse JSON into its root node.
+RYML_EXPORT Tree parse_json_in_arena(Parser *parser,                   csubstr json                          ); ///< (8) like (7) but no filename will be reported
+
+/* READ THE DEPRECATION NOTE!
+ *
+ * All of the functions below are intentionally left undefined, to
+ * prevent them being used.
+ *
+ */
+/** @cond dev */
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(Parser *parser,                   substr yaml, Tree *t, id_type node_id);
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(Parser *parser, csubstr filename, substr yaml, Tree *t, id_type node_id);
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(Parser *parser,                   substr yaml, Tree *t                 );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(Parser *parser, csubstr filename, substr yaml, Tree *t                 );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(Parser *parser,                   substr yaml, NodeRef node            );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(Parser *parser, csubstr filename, substr yaml, NodeRef node            );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(Parser *parser,                   substr yaml                          );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(Parser *parser, csubstr filename, substr yaml                          );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(Parser *parser,                   substr json, Tree *t, id_type node_id);
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(Parser *parser, csubstr filename, substr json, Tree *t, id_type node_id);
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(Parser *parser,                   substr json, Tree *t                 );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(Parser *parser, csubstr filename, substr json, Tree *t                 );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(Parser *parser,                   substr json, NodeRef node            );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(Parser *parser, csubstr filename, substr json, NodeRef node            );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_json_in_arena(Parser *parser,                   substr json                          );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_json_in_arena(Parser *parser, csubstr filename, substr json                          );
+/** @endcond */
+
+/** @} */
+
+
+//-----------------------------------------------------------------------------
+
+
+/** @defgroup doc_parse_in_arena__with_temporary_parser Parse in arena with temporary parser
+ *
+ * @brief parse a read-only (immutable) YAML source buffer. This is
+ * achieved by first copying the contents of the buffer to the tree's
+ * arena, and then calling @ref parse_in_arena() .
+ *
+ * @note These freestanding functions use a temporary parser object,
+ * and are convenience functions to easily one-off parse YAML without
+ * the need to instantiate a separate parser. Note that some
+ * properties (notably node locations in the original source code) are
+ * only available through the parser class. If you need access to any
+ * of these properties, use the appropriate overload from @ref
+ * doc_parse_in_arena__with_existing_parser
+ *
+ * @warning overloads receiving a substr YAML buffer are intentionally
+ * left undefined, such that calling parse_in_arena() with a substr
+ * will cause a linker error. This is to prevent an accidental copy of
+ * the source buffer to the tree's arena, because substr (which is
+ * mutable) is implicitly convertible to csubstr (which is
+ * immutable). If you really intend to parse a mutable buffer in the
+ * tree's arena, convert it first to immutable by assigning the substr
+ * to a csubstr prior to calling parse_in_arena(). This is not needed
+ * for parse_in_place() because csubstr is not implicitly convertible
+ * to substr. To be clear:
+ * ```c++
+ * substr mutable_buffer = ...;
+ * parser.parse_in_arena(mutable_buffer); // linker error
+ *
+ * csubstr immutable_buffer = ...;
+ * parser.parse_in_arena(immutable_buffer); // ok
+ * ```
+ *
+ * @{
+ */
+
+// this is vertically aligned to highlight the parameter differences.
+RYML_EXPORT void parse_in_arena(csubstr filename, csubstr yaml, Tree *t, id_type node_id); ///< (1) parse YAML into an existing tree node. The filename will be used in any error messages arising during the parse.
+RYML_EXPORT void parse_in_arena(                  csubstr yaml, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported
+RYML_EXPORT void parse_in_arena(csubstr filename, csubstr yaml, Tree *t                 ); ///< (3) parse YAML into an existing tree, into its root node.
+RYML_EXPORT void parse_in_arena(                  csubstr yaml, Tree *t                 ); ///< (4) like (3) but no filename will be reported
+RYML_EXPORT void parse_in_arena(csubstr filename, csubstr yaml, NodeRef node            ); ///< (5) like (1) but the node is given as a NodeRef
+RYML_EXPORT void parse_in_arena(                  csubstr yaml, NodeRef node            ); ///< (6) like (5) but no filename will be reported
+RYML_EXPORT Tree parse_in_arena(csubstr filename, csubstr yaml                          ); ///< (7) create a new tree, and parse YAML into its root node.
+RYML_EXPORT Tree parse_in_arena(                  csubstr yaml                          ); ///< (8) like (7) but no filename will be reported
+
+// this is vertically aligned to highlight the parameter differences.
+RYML_EXPORT void parse_json_in_arena(csubstr filename, csubstr json, Tree *t, id_type node_id); ///< (1) parse JSON into an existing tree node. The filename will be used in any error messages arising during the parse.
+RYML_EXPORT void parse_json_in_arena(                  csubstr json, Tree *t, id_type node_id); ///< (2) like (1) but no filename will be reported
+RYML_EXPORT void parse_json_in_arena(csubstr filename, csubstr json, Tree *t                 ); ///< (3) parse JSON into an existing tree, into its root node.
+RYML_EXPORT void parse_json_in_arena(                  csubstr json, Tree *t                 ); ///< (4) like (3) but no filename will be reported
+RYML_EXPORT void parse_json_in_arena(csubstr filename, csubstr json, NodeRef node            ); ///< (5) like (1) but the node is given as a NodeRef
+RYML_EXPORT void parse_json_in_arena(                  csubstr json, NodeRef node            ); ///< (6) like (5) but no filename will be reported
+RYML_EXPORT Tree parse_json_in_arena(csubstr filename, csubstr json                          ); ///< (7) create a new tree, and parse JSON into its root node.
+RYML_EXPORT Tree parse_json_in_arena(                  csubstr json                          ); ///< (8) like (7) but no filename will be reported
+
+/* READ THE DEPRECATION NOTE!
+ *
+ * All of the functions below are intentionally left undefined, to
+ * prevent them being used.
+ */
+/** @cond dev */
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(                  substr yaml, Tree *t, id_type node_id);
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, Tree *t, id_type node_id);
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(                  substr yaml, Tree *t                 );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, Tree *t                 );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(                  substr yaml, NodeRef node            );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, NodeRef node            );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(                  substr yaml                          );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(csubstr filename, substr yaml                          );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(                  substr json, Tree *t, id_type node_id);
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(csubstr filename, substr json, Tree *t, id_type node_id);
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(                  substr json, Tree *t                 );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(csubstr filename, substr json, Tree *t                 );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(                  substr json, NodeRef node            );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_json_in_arena(csubstr filename, substr json, NodeRef node            );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_json_in_arena(                  substr json                          );
+RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_json_in_arena(csubstr filename, substr json                          );
+/** @endcond */
+
+/** @} */
+/** @} */
+
+} // namespace yml
+} // namespace c4
+
+#endif /* _C4_YML_PARSE_HPP_ */
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/std/map.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/std/map.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifndef _C4_YML_STD_MAP_HPP_
+#define _C4_YML_STD_MAP_HPP_
+
+/** @file map.hpp write/read std::map to/from a YAML tree. */
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp
+//#include "c4/yml/node.hpp"
+#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_)
+#error "amalgamate: file c4/yml/node.hpp must have been included at this point"
+#endif /* C4_YML_NODE_HPP_ */
+
+#include <map>
+
+namespace c4 {
+namespace yml {
+
+// std::map requires child nodes in the data
+// tree hierarchy (a MAP node in ryml parlance).
+// So it should be serialized via write()/read().
+
+template<class K, class V, class Less, class Alloc>
+void write(c4::yml::NodeRef *n, std::map<K, V, Less, Alloc> const& m)
+{
+    *n |= c4::yml::MAP;
+    for(auto const& C4_RESTRICT p : m)
+    {
+        auto ch = n->append_child();
+        ch << c4::yml::key(p.first);
+        ch << p.second;
+    }
+}
+
+template<class K, class V, class Less, class Alloc>
+bool read(c4::yml::ConstNodeRef const& n, std::map<K, V, Less, Alloc> * m)
+{
+    K k{};
+    V v{};
+    for(auto const& C4_RESTRICT ch : n)
+    {
+        ch >> c4::yml::key(k);
+        ch >> v;
+        m->emplace(std::make_pair(std::move(k), std::move(v)));
+    }
+    return true;
+}
+
+} // namespace yml
+} // namespace c4
+
+#endif // _C4_YML_STD_MAP_HPP_
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/map.hpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/std/string.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/std/string.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifndef C4_YML_STD_STRING_HPP_
+#define C4_YML_STD_STRING_HPP_
+
+/** @file string.hpp substring conversions for/from std::string */
+
+// everything we need is implemented here:
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/std/string.hpp
+//#include <c4/std/string.hpp>
+#if !defined(C4_STD_STRING_HPP_) && !defined(_C4_STD_STRING_HPP_)
+#error "amalgamate: file c4/std/string.hpp must have been included at this point"
+#endif /* C4_STD_STRING_HPP_ */
+
+
+#endif // C4_YML_STD_STRING_HPP_
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/string.hpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/std/vector.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/std/vector.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifndef _C4_YML_STD_VECTOR_HPP_
+#define _C4_YML_STD_VECTOR_HPP_
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp
+//#include "c4/yml/node.hpp"
+#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_)
+#error "amalgamate: file c4/yml/node.hpp must have been included at this point"
+#endif /* C4_YML_NODE_HPP_ */
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/std/vector.hpp
+//#include <c4/std/vector.hpp>
+#if !defined(C4_STD_VECTOR_HPP_) && !defined(_C4_STD_VECTOR_HPP_)
+#error "amalgamate: file c4/std/vector.hpp must have been included at this point"
+#endif /* C4_STD_VECTOR_HPP_ */
+
+//included above:
+//#include <vector>
+
+namespace c4 {
+namespace yml {
+
+// vector is a sequence-like type, and it requires child nodes
+// in the data tree hierarchy (a SEQ node in ryml parlance).
+// So it should be serialized via write()/read().
+
+
+template<class V, class Alloc>
+void write(c4::yml::NodeRef *n, std::vector<V, Alloc> const& vec)
+{
+    *n |= c4::yml::SEQ;
+    for(V const& v : vec)
+        n->append_child() << v;
+}
+
+template<class V, class Alloc>
+bool read(c4::yml::ConstNodeRef const& n, std::vector<V, Alloc> *vec)
+{
+    C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wuseless-cast")
+    vec->resize(static_cast<size_t>(n.num_children()));
+    C4_SUPPRESS_WARNING_GCC_POP
+    size_t pos = 0;
+    for(ConstNodeRef const child : n)
+        child >> (*vec)[pos++];
+    return true;
+}
+
+/** specialization: std::vector<bool> uses std::vector<bool>::reference as
+ * the return value of its operator[]. */
+template<class Alloc>
+bool read(c4::yml::ConstNodeRef const& n, std::vector<bool, Alloc> *vec)
+{
+    C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wuseless-cast")
+    vec->resize(static_cast<size_t>(n.num_children()));
+    C4_SUPPRESS_WARNING_GCC_POP
+    size_t pos = 0;
+    bool tmp = {};
+    for(ConstNodeRef const child : n)
+    {
+        child >> tmp;
+        (*vec)[pos++] = tmp;
+    }
+    return true;
+}
+
+} // namespace yml
+} // namespace c4
+
+#endif // _C4_YML_STD_VECTOR_HPP_
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/vector.hpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/std/std.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/std/std.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifndef _C4_YML_STD_STD_HPP_
+#define _C4_YML_STD_STD_HPP_
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/std/string.hpp
+//#include "c4/yml/std/string.hpp"
+#if !defined(C4_YML_STD_STRING_HPP_) && !defined(_C4_YML_STD_STRING_HPP_)
+#error "amalgamate: file c4/yml/std/string.hpp must have been included at this point"
+#endif /* C4_YML_STD_STRING_HPP_ */
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/std/vector.hpp
+//#include "c4/yml/std/vector.hpp"
+#if !defined(C4_YML_STD_VECTOR_HPP_) && !defined(_C4_YML_STD_VECTOR_HPP_)
+#error "amalgamate: file c4/yml/std/vector.hpp must have been included at this point"
+#endif /* C4_YML_STD_VECTOR_HPP_ */
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/std/map.hpp
+//#include "c4/yml/std/map.hpp"
+#if !defined(C4_YML_STD_MAP_HPP_) && !defined(_C4_YML_STD_MAP_HPP_)
+#error "amalgamate: file c4/yml/std/map.hpp must have been included at this point"
+#endif /* C4_YML_STD_MAP_HPP_ */
+
+
+#endif // _C4_YML_STD_STD_HPP_
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/std.hpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/version.cpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/version.cpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifdef RYML_SINGLE_HDR_DEFINE_NOW
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/version.hpp
+//#include "c4/yml/version.hpp"
+#if !defined(C4_YML_VERSION_HPP_) && !defined(_C4_YML_VERSION_HPP_)
+#error "amalgamate: file c4/yml/version.hpp must have been included at this point"
+#endif /* C4_YML_VERSION_HPP_ */
+
+
+namespace c4 {
+namespace yml {
+
+csubstr version()
+{
+  return RYML_VERSION;
+}
+
+int version_major()
+{
+  return RYML_VERSION_MAJOR;
+}
+
+int version_minor()
+{
+  return RYML_VERSION_MINOR;
+}
+
+int version_patch()
+{
+  return RYML_VERSION_PATCH;
+}
+
+} // namespace yml
+} // namespace c4
+
+#endif /* RYML_SINGLE_HDR_DEFINE_NOW */
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/version.cpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/common.cpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/common.cpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifdef RYML_SINGLE_HDR_DEFINE_NOW
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp
+//#include "c4/yml/common.hpp"
+#if !defined(C4_YML_COMMON_HPP_) && !defined(_C4_YML_COMMON_HPP_)
+#error "amalgamate: file c4/yml/common.hpp must have been included at this point"
+#endif /* C4_YML_COMMON_HPP_ */
+
+
+#ifndef RYML_NO_DEFAULT_CALLBACKS
+//included above:
+//#   include <stdlib.h>
+//included above:
+//#   include <stdio.h>
+#   ifdef RYML_DEFAULT_CALLBACK_USES_EXCEPTIONS
+#       include <stdexcept>
+#   endif
+#endif // RYML_NO_DEFAULT_CALLBACKS
+
+
+namespace c4 {
+namespace yml {
+
+C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast")
+C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4702/*unreachable code*/) // on the call to the unreachable macro
+
+namespace {
+Callbacks s_default_callbacks;
+} // anon namespace
+
+#ifndef RYML_NO_DEFAULT_CALLBACKS
+void report_error_impl(const char* msg, size_t length, Location loc, FILE *f)
+{
+    if(!f)
+        f = stderr;
+    if(loc)
+    {
+        if(!loc.name.empty())
+        {
+            // this is more portable than using fprintf("%.*s:") which
+            // is not available in some embedded platforms
+            fwrite(loc.name.str, 1, loc.name.len, f);
+            fputc(':', f);
+        }
+        fprintf(f, "%zu:", loc.line);
+        if(loc.col)
+            fprintf(f, "%zu:", loc.col);
+        if(loc.offset)
+            fprintf(f, " (%zuB):", loc.offset);
+        fputc(' ', f);
+    }
+    RYML_ASSERT(!csubstr(msg, length).ends_with('\0'));
+    fwrite(msg, 1, length, f);
+    fputc('\n', f);
+    fflush(f);
+}
+
+[[noreturn]] void error_impl(const char* msg, size_t length, Location loc, void * /*user_data*/)
+{
+    RYML_ASSERT(!csubstr(msg, length).ends_with('\0'));
+    report_error_impl(msg, length, loc, nullptr);
+#ifdef RYML_DEFAULT_CALLBACK_USES_EXCEPTIONS
+    throw std::runtime_error(std::string(msg, length));
+#else
+    ::abort();
+#endif
+}
+
+void* allocate_impl(size_t length, void * /*hint*/, void * /*user_data*/)
+{
+    void *mem = ::malloc(length);
+    if(mem == nullptr)
+    {
+        const char msg[] = "could not allocate memory";
+        error_impl(msg, sizeof(msg)-1, {}, nullptr);
+    }
+    return mem;
+}
+
+void free_impl(void *mem, size_t /*length*/, void * /*user_data*/)
+{
+    ::free(mem);
+}
+#endif // RYML_NO_DEFAULT_CALLBACKS
+
+
+
+Callbacks::Callbacks()
+    :
+    m_user_data(nullptr),
+    #ifndef RYML_NO_DEFAULT_CALLBACKS
+    m_allocate(allocate_impl),
+    m_free(free_impl),
+    m_error(error_impl)
+    #else
+    m_allocate(nullptr),
+    m_free(nullptr),
+    m_error(nullptr)
+    #endif
+{
+}
+
+Callbacks::Callbacks(void *user_data, pfn_allocate alloc_, pfn_free free_, pfn_error error_)
+    :
+    m_user_data(user_data),
+    #ifndef RYML_NO_DEFAULT_CALLBACKS
+    m_allocate(alloc_ ? alloc_ : allocate_impl),
+    m_free(free_ ? free_ : free_impl),
+    m_error((error_ ? error_ : error_impl))
+    #else
+    m_allocate(alloc_),
+    m_free(free_),
+    m_error(error_)
+    #endif
+{
+    RYML_CHECK(m_allocate);
+    RYML_CHECK(m_free);
+    RYML_CHECK(m_error);
+}
+
+
+void set_callbacks(Callbacks const& c)
+{
+    s_default_callbacks = c;
+}
+
+Callbacks const& get_callbacks()
+{
+    return s_default_callbacks;
+}
+
+void reset_callbacks()
+{
+    set_callbacks(Callbacks());
+}
+
+// the [[noreturn]] attribute needs to be here as well (UB otherwise)
+// https://en.cppreference.com/w/cpp/language/attributes/noreturn
+[[noreturn]] void error(Callbacks const& cb, const char *msg, size_t msg_len, Location loc)
+{
+    cb.m_error(msg, msg_len, loc, cb.m_user_data);
+    abort(); // call abort in case the error callback didn't interrupt execution
+    C4_UNREACHABLE();
+}
+
+// the [[noreturn]] attribute needs to be here as well (UB otherwise)
+// see https://en.cppreference.com/w/cpp/language/attributes/noreturn
+[[noreturn]] void error(const char *msg, size_t msg_len, Location loc)
+{
+    error(s_default_callbacks, msg, msg_len, loc);
+    C4_UNREACHABLE();
+}
+
+C4_SUPPRESS_WARNING_MSVC_POP
+C4_SUPPRESS_WARNING_GCC_CLANG_POP
+
+} // namespace yml
+} // namespace c4
+
+#endif /* RYML_SINGLE_HDR_DEFINE_NOW */
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/common.cpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/node_type.cpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/node_type.cpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifdef RYML_SINGLE_HDR_DEFINE_NOW
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/node_type.hpp
+//#include "c4/yml/node_type.hpp"
+#if !defined(C4_YML_NODE_TYPE_HPP_) && !defined(_C4_YML_NODE_TYPE_HPP_)
+#error "amalgamate: file c4/yml/node_type.hpp must have been included at this point"
+#endif /* C4_YML_NODE_TYPE_HPP_ */
+
+
+namespace c4 {
+namespace yml {
+
+const char* NodeType::type_str(NodeType_e ty) noexcept
+{
+    switch(ty & _TYMASK)
+    {
+    case KEYVAL:
+        return "KEYVAL";
+    case KEY:
+        return "KEY";
+    case VAL:
+        return "VAL";
+    case MAP:
+        return "MAP";
+    case SEQ:
+        return "SEQ";
+    case KEYMAP:
+        return "KEYMAP";
+    case KEYSEQ:
+        return "KEYSEQ";
+    case DOCSEQ:
+        return "DOCSEQ";
+    case DOCMAP:
+        return "DOCMAP";
+    case DOCVAL:
+        return "DOCVAL";
+    case DOC:
+        return "DOC";
+    case STREAM:
+        return "STREAM";
+    case NOTYPE:
+        return "NOTYPE";
+    default:
+        if((ty & KEYVAL) == KEYVAL)
+            return "KEYVAL***";
+        if((ty & KEYMAP) == KEYMAP)
+            return "KEYMAP***";
+        if((ty & KEYSEQ) == KEYSEQ)
+            return "KEYSEQ***";
+        if((ty & DOCSEQ) == DOCSEQ)
+            return "DOCSEQ***";
+        if((ty & DOCMAP) == DOCMAP)
+            return "DOCMAP***";
+        if((ty & DOCVAL) == DOCVAL)
+            return "DOCVAL***";
+        if(ty & KEY)
+            return "KEY***";
+        if(ty & VAL)
+            return "VAL***";
+        if(ty & MAP)
+            return "MAP***";
+        if(ty & SEQ)
+            return "SEQ***";
+        if(ty & DOC)
+            return "DOC***";
+        return "(unk)";
+    }
+}
+
+csubstr NodeType::type_str(substr buf, NodeType_e flags) noexcept
+{
+    size_t pos = 0;
+    bool gotone = false;
+
+    #define _prflag(fl, txt)                                    \
+    do {                                                        \
+        if((flags & fl) == (fl))                                \
+        {                                                       \
+            if(gotone)                                          \
+            {                                                   \
+                if(pos + 1 < buf.len)                           \
+                    buf[pos] = '|';                             \
+                ++pos;                                          \
+            }                                                   \
+            csubstr fltxt = txt;                                \
+            if(pos + fltxt.len <= buf.len)                      \
+                memcpy(buf.str + pos, fltxt.str, fltxt.len);    \
+            pos += fltxt.len;                                   \
+            gotone = true;                                      \
+            flags = (flags & ~fl); /*remove the flag*/          \
+        }                                                       \
+    } while(0)
+
+    _prflag(STREAM, "STREAM");
+    _prflag(DOC, "DOC");
+    // key properties
+    _prflag(KEY, "KEY");
+    _prflag(KEYTAG, "KTAG");
+    _prflag(KEYANCH, "KANCH");
+    _prflag(KEYREF, "KREF");
+    _prflag(KEY_LITERAL, "KLITERAL");
+    _prflag(KEY_FOLDED, "KFOLDED");
+    _prflag(KEY_SQUO, "KSQUO");
+    _prflag(KEY_DQUO, "KDQUO");
+    _prflag(KEY_PLAIN, "KPLAIN");
+    _prflag(KEY_UNFILT, "KUNFILT");
+    // val properties
+    _prflag(VAL, "VAL");
+    _prflag(VALTAG, "VTAG");
+    _prflag(VALANCH, "VANCH");
+    _prflag(VALREF, "VREF");
+    _prflag(VAL_UNFILT, "VUNFILT");
+    _prflag(VAL_LITERAL, "VLITERAL");
+    _prflag(VAL_FOLDED, "VFOLDED");
+    _prflag(VAL_SQUO, "VSQUO");
+    _prflag(VAL_DQUO, "VDQUO");
+    _prflag(VAL_PLAIN, "VPLAIN");
+    _prflag(VAL_UNFILT, "VUNFILT");
+    // container properties
+    _prflag(MAP, "MAP");
+    _prflag(SEQ, "SEQ");
+    _prflag(FLOW_SL, "FLOWSL");
+    _prflag(FLOW_ML, "FLOWML");
+    _prflag(BLOCK, "BLCK");
+    if(pos == 0)
+        _prflag(NOTYPE, "NOTYPE");
+
+    #undef _prflag
+
+    if(pos < buf.len)
+    {
+        buf[pos] = '\0';
+        return buf.first(pos);
+    }
+    else
+    {
+        csubstr failed;
+        failed.len = pos + 1;
+        failed.str = nullptr;
+        return failed;
+    }
+}
+
+
+//-----------------------------------------------------------------------------
+
+// see https://www.yaml.info/learn/quote.html#noplain
+bool scalar_style_query_squo(csubstr s) noexcept
+{
+    return ! s.first_of_any("\n ", "\n\t");
+}
+
+// see https://www.yaml.info/learn/quote.html#noplain
+bool scalar_style_query_plain(csubstr s) noexcept
+{
+    if(s.begins_with("-."))
+    {
+        if(s == "-.inf" || s == "-.INF")
+            return true;
+        else if(s.sub(2).is_number())
+            return true;
+    }
+    else if(s.begins_with_any("0123456789.-+") && s.is_number())
+    {
+        return true;
+    }
+    return s != ':'
+        && ( ! s.begins_with_any("-:?*&,'\"{}[]|>%#@`\r")) // @ and ` are reserved characters
+        && ( ! s.ends_with_any(":#"))
+             // make this check in the last place, as it has linear
+             // complexity, while the previous ones are
+             // constant-time
+        && (s.first_of("\n#:[]{},") == npos);
+}
+
+NodeType_e scalar_style_choose(csubstr s) noexcept
+{
+    if(s.len)
+    {
+        if(s.begins_with_any(" \n\t")
+           ||
+           s.ends_with_any(" \n\t"))
+        {
+            return SCALAR_DQUO;
+        }
+        else if( ! scalar_style_query_plain(s))
+        {
+            return scalar_style_query_squo(s) ? SCALAR_SQUO : SCALAR_DQUO;
+        }
+        // nothing remarkable - use plain
+        return SCALAR_PLAIN;
+    }
+    return s.str ? SCALAR_SQUO : SCALAR_PLAIN;
+}
+
+NodeType_e scalar_style_json_choose(csubstr s) noexcept
+{
+    // do not quote special cases
+    bool plain = (
+        (s == "true" || s == "false" || s == "null")
+        ||
+        (
+            // do not quote numbers
+            s.is_number()
+            &&
+            (
+                // quote integral numbers if they have a leading 0
+                // https://github.com/biojppm/rapidyaml/issues/291
+                (!(s.len > 1 && s.begins_with('0')))
+                // do not quote reals with leading 0
+                // https://github.com/biojppm/rapidyaml/issues/313
+                || (s.find('.') != csubstr::npos)
+            )
+        )
+    );
+    return plain ? SCALAR_PLAIN : SCALAR_DQUO;
+}
+
+} // namespace yml
+} // namespace c4
+
+#endif /* RYML_SINGLE_HDR_DEFINE_NOW */
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/node_type.cpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/tag.cpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/tag.cpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifdef RYML_SINGLE_HDR_DEFINE_NOW
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/tag.hpp
+//#include "c4/yml/tag.hpp"
+#if !defined(C4_YML_TAG_HPP_) && !defined(_C4_YML_TAG_HPP_)
+#error "amalgamate: file c4/yml/tag.hpp must have been included at this point"
+#endif /* C4_YML_TAG_HPP_ */
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp
+//#include "c4/yml/tree.hpp"
+#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_)
+#error "amalgamate: file c4/yml/tree.hpp must have been included at this point"
+#endif /* C4_YML_TREE_HPP_ */
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp
+//#include "c4/yml/detail/parser_dbg.hpp"
+#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_)
+#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point"
+#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */
+
+
+
+namespace c4 {
+namespace yml {
+
+bool is_custom_tag(csubstr tag)
+{
+    if((tag.len > 2) && (tag.str[0] == '!'))
+    {
+        size_t pos = tag.find('!', 1);
+        return pos != npos && pos > 1 && tag.str[1] != '<';
+    }
+    return false;
+}
+
+csubstr normalize_tag(csubstr tag)
+{
+    YamlTag_e t = to_tag(tag);
+    if(t != TAG_NONE)
+        return from_tag(t);
+    if(tag.begins_with("!<"))
+        tag = tag.sub(1);
+    if(tag.begins_with("<!"))
+        return tag;
+    return tag;
+}
+
+csubstr normalize_tag_long(csubstr tag)
+{
+    YamlTag_e t = to_tag(tag);
+    if(t != TAG_NONE)
+        return from_tag_long(t);
+    if(tag.begins_with("!<"))
+        tag = tag.sub(1);
+    if(tag.begins_with("<!"))
+        return tag;
+    return tag;
+}
+
+csubstr normalize_tag_long(csubstr tag, substr output)
+{
+    csubstr result = normalize_tag_long(tag);
+    if(result.begins_with("!!"))
+    {
+        tag = tag.sub(2);
+        const csubstr pfx = "<tag:yaml.org,2002:";
+        const size_t len = pfx.len + tag.len + 1;
+        if(len <= output.len)
+        {
+            memcpy(output.str          , pfx.str, pfx.len);
+            memcpy(output.str + pfx.len, tag.str, tag.len);
+            output[pfx.len + tag.len] = '>';
+            result = output.first(len);
+        }
+        else
+        {
+            result.str = nullptr;
+            result.len = len;
+        }
+    }
+    return result;
+}
+
+YamlTag_e to_tag(csubstr tag)
+{
+    if(tag.begins_with("!<"))
+        tag = tag.sub(1);
+    if(tag.begins_with("!!"))
+        tag = tag.sub(2);
+    else if(tag.begins_with('!'))
+        return TAG_NONE;
+    else if(tag.begins_with("tag:yaml.org,2002:"))
+    {
+        RYML_ASSERT(csubstr("tag:yaml.org,2002:").len == 18);
+        tag = tag.sub(18);
+    }
+    else if(tag.begins_with("<tag:yaml.org,2002:"))
+    {
+        RYML_ASSERT(csubstr("<tag:yaml.org,2002:").len == 19);
+        tag = tag.sub(19);
+        if(!tag.len)
+            return TAG_NONE;
+        tag = tag.offs(0, 1);
+    }
+
+    if(tag == "map")
+        return TAG_MAP;
+    else if(tag == "omap")
+        return TAG_OMAP;
+    else if(tag == "pairs")
+        return TAG_PAIRS;
+    else if(tag == "set")
+        return TAG_SET;
+    else if(tag == "seq")
+        return TAG_SEQ;
+    else if(tag == "binary")
+        return TAG_BINARY;
+    else if(tag == "bool")
+        return TAG_BOOL;
+    else if(tag == "float")
+        return TAG_FLOAT;
+    else if(tag == "int")
+        return TAG_INT;
+    else if(tag == "merge")
+        return TAG_MERGE;
+    else if(tag == "null")
+        return TAG_NULL;
+    else if(tag == "str")
+        return TAG_STR;
+    else if(tag == "timestamp")
+        return TAG_TIMESTAMP;
+    else if(tag == "value")
+        return TAG_VALUE;
+    else if(tag == "yaml")
+        return TAG_YAML;
+
+    return TAG_NONE;
+}
+
+csubstr from_tag_long(YamlTag_e tag)
+{
+    switch(tag)
+    {
+    case TAG_MAP:
+        return {"<tag:yaml.org,2002:map>"};
+    case TAG_OMAP:
+        return {"<tag:yaml.org,2002:omap>"};
+    case TAG_PAIRS:
+        return {"<tag:yaml.org,2002:pairs>"};
+    case TAG_SET:
+        return {"<tag:yaml.org,2002:set>"};
+    case TAG_SEQ:
+        return {"<tag:yaml.org,2002:seq>"};
+    case TAG_BINARY:
+        return {"<tag:yaml.org,2002:binary>"};
+    case TAG_BOOL:
+        return {"<tag:yaml.org,2002:bool>"};
+    case TAG_FLOAT:
+        return {"<tag:yaml.org,2002:float>"};
+    case TAG_INT:
+        return {"<tag:yaml.org,2002:int>"};
+    case TAG_MERGE:
+        return {"<tag:yaml.org,2002:merge>"};
+    case TAG_NULL:
+        return {"<tag:yaml.org,2002:null>"};
+    case TAG_STR:
+        return {"<tag:yaml.org,2002:str>"};
+    case TAG_TIMESTAMP:
+        return {"<tag:yaml.org,2002:timestamp>"};
+    case TAG_VALUE:
+        return {"<tag:yaml.org,2002:value>"};
+    case TAG_YAML:
+        return {"<tag:yaml.org,2002:yaml>"};
+    case TAG_NONE:
+    default:
+        return {""};
+    }
+}
+
+csubstr from_tag(YamlTag_e tag)
+{
+    switch(tag)
+    {
+    case TAG_MAP:
+        return {"!!map"};
+    case TAG_OMAP:
+        return {"!!omap"};
+    case TAG_PAIRS:
+        return {"!!pairs"};
+    case TAG_SET:
+        return {"!!set"};
+    case TAG_SEQ:
+        return {"!!seq"};
+    case TAG_BINARY:
+        return {"!!binary"};
+    case TAG_BOOL:
+        return {"!!bool"};
+    case TAG_FLOAT:
+        return {"!!float"};
+    case TAG_INT:
+        return {"!!int"};
+    case TAG_MERGE:
+        return {"!!merge"};
+    case TAG_NULL:
+        return {"!!null"};
+    case TAG_STR:
+        return {"!!str"};
+    case TAG_TIMESTAMP:
+        return {"!!timestamp"};
+    case TAG_VALUE:
+        return {"!!value"};
+    case TAG_YAML:
+        return {"!!yaml"};
+    case TAG_NONE:
+    default:
+        return {""};
+    }
+}
+
+
+bool TagDirective::create_from_str(csubstr directive_)
+{
+    csubstr directive = directive_;
+    directive = directive.sub(4);
+    if(!directive.begins_with(' '))
+        return false;
+    directive = directive.triml(' ');
+    size_t pos = directive.find(' ');
+    if(pos == npos)
+        return false;
+    handle = directive.first(pos);
+    directive = directive.sub(handle.len).triml(' ');
+    pos = directive.find(' ');
+    if(pos != npos)
+        directive = directive.first(pos);
+    prefix = directive;
+    next_node_id = NONE;
+    _c4dbgpf("%TAG: handle={} prefix={}", handle, prefix);
+    return true;
+}
+
+bool TagDirective::create_from_str(csubstr directive_, Tree *tree)
+{
+    _RYML_CB_CHECK(tree->callbacks(), directive_.begins_with("%TAG "));
+    if(!create_from_str(directive_))
+    {
+        _RYML_CB_ERR(tree->callbacks(), "invalid tag directive");
+    }
+    next_node_id = tree->size();
+    if(tree->size() > 0)
+    {
+        const id_type prev = tree->size() - 1;
+        if(tree->is_root(prev) && tree->type(prev) != NOTYPE && !tree->is_stream(prev))
+            ++next_node_id;
+    }
+    _c4dbgpf("%TAG: handle={} prefix={} next_node={}", handle, prefix, next_node_id);
+    return true;
+}
+
+size_t TagDirective::transform(csubstr tag, substr output, Callbacks const& callbacks) const
+{
+    _c4dbgpf("%TAG: handle={} prefix={} next_node={}. tag={}", handle, prefix, next_node_id, tag);
+    _RYML_CB_ASSERT(callbacks, tag.len >= handle.len);
+    csubstr rest = tag.sub(handle.len);
+    _c4dbgpf("%TAG: rest={}", rest);
+    if(rest.begins_with('<'))
+    {
+        _c4dbgpf("%TAG: begins with <. rest={}", rest);
+        if(C4_UNLIKELY(!rest.ends_with('>')))
+            _RYML_CB_ERR(callbacks, "malformed tag");
+        rest = rest.offs(1, 1);
+        if(rest.begins_with(prefix))
+        {
+            _c4dbgpf("%TAG: already transformed! actual={}", rest.sub(prefix.len));
+            return 0; // return 0 to signal that the tag is local and cannot be resolved
+        }
+    }
+    size_t len = 1u + prefix.len + rest.len + 1u;
+    size_t numpc = rest.count('%');
+    if(numpc == 0)
+    {
+        if(len <= output.len)
+        {
+            output.str[0] = '<';
+            memcpy(1u + output.str, prefix.str, prefix.len);
+            memcpy(1u + output.str + prefix.len, rest.str, rest.len);
+            output.str[1u + prefix.len + rest.len] = '>';
+        }
+    }
+    else
+    {
+        // need to decode URI % sequences
+        size_t pos = rest.find('%');
+        _RYML_CB_ASSERT(callbacks, pos != npos);
+        do {
+            size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1);
+            if(next == npos)
+                next = rest.len;
+            _RYML_CB_CHECK(callbacks, pos+1 < next);
+            _RYML_CB_CHECK(callbacks, pos+1 + 2 <= next);
+            size_t delta = next - (pos+1);
+            len -= delta;
+            pos = rest.find('%', pos+1);
+        } while(pos != npos);
+        if(len <= output.len)
+        {
+            size_t prev = 0, wpos = 0;
+            auto appendstr = [&](csubstr s) { memcpy(output.str + wpos, s.str, s.len); wpos += s.len; };
+            auto appendchar = [&](char c) { output.str[wpos++] = c; };
+            appendchar('<');
+            appendstr(prefix);
+            pos = rest.find('%');
+            _RYML_CB_ASSERT(callbacks, pos != npos);
+            do {
+                size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1);
+                if(next == npos)
+                    next = rest.len;
+                _RYML_CB_CHECK(callbacks, pos+1 < next);
+                _RYML_CB_CHECK(callbacks, pos+1 + 2 <= next);
+                uint8_t val;
+                if(C4_UNLIKELY(!read_hex(rest.range(pos+1, next), &val) || val > 127))
+                    _RYML_CB_ERR(callbacks, "invalid URI character");
+                appendstr(rest.range(prev, pos));
+                appendchar(static_cast<char>(val));
+                prev = next;
+                pos = rest.find('%', pos+1);
+            } while(pos != npos);
+            _RYML_CB_ASSERT(callbacks, pos == npos);
+            _RYML_CB_ASSERT(callbacks, prev > 0);
+            _RYML_CB_ASSERT(callbacks, rest.len >= prev);
+            appendstr(rest.sub(prev));
+            appendchar('>');
+            _RYML_CB_ASSERT(callbacks, wpos == len);
+        }
+    }
+    return len;
+}
+
+} // namespace yml
+} // namespace c4
+
+#endif /* RYML_SINGLE_HDR_DEFINE_NOW */
+
+
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/tag.cpp)
+
+
+
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/tree.cpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.cpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
+
+#ifdef RYML_SINGLE_HDR_DEFINE_NOW
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp
+//#include "c4/yml/tree.hpp"
+#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_)
+#error "amalgamate: file c4/yml/tree.hpp must have been included at this point"
+#endif /* C4_YML_TREE_HPP_ */
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp
+//#include "c4/yml/detail/parser_dbg.hpp"
+#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_)
+#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point"
+#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp
+//#include "c4/yml/node.hpp"
+#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_)
+#error "amalgamate: file c4/yml/node.hpp must have been included at this point"
+#endif /* C4_YML_NODE_HPP_ */
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/reference_resolver.hpp
+//#include "c4/yml/reference_resolver.hpp"
+#if !defined(C4_YML_REFERENCE_RESOLVER_HPP_) && !defined(_C4_YML_REFERENCE_RESOLVER_HPP_)
+#error "amalgamate: file c4/yml/reference_resolver.hpp must have been included at this point"
+#endif /* C4_YML_REFERENCE_RESOLVER_HPP_ */
+
+
+
+C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4296/*expression is always 'boolean_value'*/)
+C4_SUPPRESS_WARNING_MSVC(4702/*unreachable code*/)
+C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast")
+C4_SUPPRESS_WARNING_GCC("-Wtype-limits")
+C4_SUPPRESS_WARNING_GCC("-Wuseless-cast")
+
+namespace c4 {
+namespace yml {
+
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
+NodeRef Tree::rootref()
+{
+    return NodeRef(this, root_id());
+}
+ConstNodeRef Tree::rootref() const
+{
+    return ConstNodeRef(this, root_id());
+}
+
+ConstNodeRef Tree::crootref() const
+{
+    return ConstNodeRef(this, root_id());
+}
+
+NodeRef Tree::ref(id_type id)
+{
+    _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_cap);
+    return NodeRef(this, id);
+}
+ConstNodeRef Tree::ref(id_type id) const
+{
+    _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_cap);
+    return ConstNodeRef(this, id);
+}
+ConstNodeRef Tree::cref(id_type id) const
+{
+    _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_cap);
+    return ConstNodeRef(this, id);
+}
+
+NodeRef Tree::operator[] (csubstr key)
+{
+    return rootref()[key];
+}
+ConstNodeRef Tree::operator[] (csubstr key) const
+{
+    return rootref()[key];
+}
+
+NodeRef Tree::operator[] (id_type i)
+{
+    return rootref()[i];
+}
+ConstNodeRef Tree::operator[] (id_type i) const
+{
+    return rootref()[i];
+}
+
+NodeRef Tree::docref(id_type i)
+{
+    return ref(doc(i));
+}
+ConstNodeRef Tree::docref(id_type i) const
+{
+    return cref(doc(i));
+}
+ConstNodeRef Tree::cdocref(id_type i) const
+{
+    return cref(doc(i));
+}
+
+
+//-----------------------------------------------------------------------------
+Tree::Tree(Callbacks const& cb)
+    : m_buf(nullptr)
+    , m_cap(0)
+    , m_size(0)
+    , m_free_head(NONE)
+    , m_free_tail(NONE)
+    , m_arena()
+    , m_arena_pos(0)
+    , m_callbacks(cb)
+    , m_tag_directives()
+{
+}
+
+Tree::Tree(id_type node_capacity, size_t arena_capacity, Callbacks const& cb)
+    : Tree(cb)
+{
+    reserve(node_capacity);
+    reserve_arena(arena_capacity);
+}
+
+Tree::~Tree()
+{
+    _free();
+}
+
+
+Tree::Tree(Tree const& that) : Tree(that.m_callbacks)
+{
+    _copy(that);
+}
+
+Tree& Tree::operator= (Tree const& that)
+{
+    _free();
+    m_callbacks = that.m_callbacks;
+    _copy(that);
+    return *this;
+}
+
+Tree::Tree(Tree && that) noexcept : Tree(that.m_callbacks)
+{
+    _move(that);
+}
+
+Tree& Tree::operator= (Tree && that) RYML_NOEXCEPT
+{
+    _free();
+    m_callbacks = that.m_callbacks;
+    _move(that);
+    return *this;
+}
+
+void Tree::_free()
+{
+    if(m_buf)
+    {
+        _RYML_CB_ASSERT(m_callbacks, m_cap > 0);
+        _RYML_CB_FREE(m_callbacks, m_buf, NodeData, (size_t)m_cap);
+    }
+    if(m_arena.str)
+    {
+        _RYML_CB_ASSERT(m_callbacks, m_arena.len > 0);
+        _RYML_CB_FREE(m_callbacks, m_arena.str, char, m_arena.len);
+    }
+    _clear();
+}
+
+
+C4_SUPPRESS_WARNING_GCC_PUSH
+#if defined(__GNUC__) && __GNUC__>= 8
+    C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wclass-memaccess") // error: ‘void* memset(void*, int, size_t)’ clearing an object of type ‘class c4::yml::Tree’ with no trivial copy-assignment; use assignment or value-initialization instead
+#endif
+
+void Tree::_clear()
+{
+    m_buf = nullptr;
+    m_cap = 0;
+    m_size = 0;
+    m_free_head = 0;
+    m_free_tail = 0;
+    m_arena = {};
+    m_arena_pos = 0;
+    for(id_type i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i)
+        m_tag_directives[i] = {};
+}
+
+void Tree::_copy(Tree const& that)
+{
+    _RYML_CB_ASSERT(m_callbacks, m_buf == nullptr);
+    _RYML_CB_ASSERT(m_callbacks, m_arena.str == nullptr);
+    _RYML_CB_ASSERT(m_callbacks, m_arena.len == 0);
+    if(that.m_cap)
+    {
+        m_buf = _RYML_CB_ALLOC_HINT(m_callbacks, NodeData, (size_t)that.m_cap, that.m_buf);
+        memcpy(m_buf, that.m_buf, (size_t)that.m_cap * sizeof(NodeData));
+    }
+    m_cap = that.m_cap;
+    m_size = that.m_size;
+    m_free_head = that.m_free_head;
+    m_free_tail = that.m_free_tail;
+    m_arena_pos = that.m_arena_pos;
+    m_arena = that.m_arena;
+    if(that.m_arena.str)
+    {
+        _RYML_CB_ASSERT(m_callbacks, that.m_arena.len > 0);
+        substr arena;
+        arena.str = _RYML_CB_ALLOC_HINT(m_callbacks, char, that.m_arena.len, that.m_arena.str);
+        arena.len = that.m_arena.len;
+        _relocate(arena); // does a memcpy of the arena and updates nodes using the old arena
+        m_arena = arena;
+    }
+    for(id_type i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i)
+        m_tag_directives[i] = that.m_tag_directives[i];
+}
+
+void Tree::_move(Tree & that) noexcept
+{
+    _RYML_CB_ASSERT(m_callbacks, m_buf == nullptr);
+    _RYML_CB_ASSERT(m_callbacks, m_arena.str == nullptr);
+    _RYML_CB_ASSERT(m_callbacks, m_arena.len == 0);
+    m_buf = that.m_buf;
+    m_cap = that.m_cap;
+    m_size = that.m_size;
+    m_free_head = that.m_free_head;
+    m_free_tail = that.m_free_tail;
+    m_arena = that.m_arena;
+    m_arena_pos = that.m_arena_pos;
+    for(id_type i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i)
+        m_tag_directives[i] = that.m_tag_directives[i];
+    that._clear();
+}
+
+void Tree::_relocate(substr next_arena)
+{
+    _RYML_CB_ASSERT(m_callbacks, next_arena.not_empty());
+    _RYML_CB_ASSERT(m_callbacks, next_arena.len >= m_arena.len);
+    if(m_arena_pos)
+        memcpy(next_arena.str, m_arena.str, m_arena_pos);
+    for(NodeData *C4_RESTRICT n = m_buf, *e = m_buf + m_cap; n != e; ++n)
+    {
+        if(in_arena(n->m_key.scalar))
+            n->m_key.scalar = _relocated(n->m_key.scalar, next_arena);
+        if(in_arena(n->m_key.tag))
+            n->m_key.tag = _relocated(n->m_key.tag, next_arena);
+        if(in_arena(n->m_key.anchor))
+            n->m_key.anchor = _relocated(n->m_key.anchor, next_arena);
+        if(in_arena(n->m_val.scalar))
+            n->m_val.scalar = _relocated(n->m_val.scalar, next_arena);
+        if(in_arena(n->m_val.tag))
+            n->m_val.tag = _relocated(n->m_val.tag, next_arena);
+        if(in_arena(n->m_val.anchor))
+            n->m_val.anchor = _relocated(n->m_val.anchor, next_arena);
+    }
+    for(TagDirective &C4_RESTRICT td : m_tag_directives)
+    {
+        if(in_arena(td.prefix))
+            td.prefix = _relocated(td.prefix, next_arena);
+        if(in_arena(td.handle))
+            td.handle = _relocated(td.handle, next_arena);
+    }
+}
+
+
+//-----------------------------------------------------------------------------
+void Tree::reserve(id_type cap)
+{
+    if(cap > m_cap)
+    {
+        NodeData *buf = _RYML_CB_ALLOC_HINT(m_callbacks, NodeData, (size_t)cap, m_buf);
+        if(m_buf)
+        {
+            memcpy(buf, m_buf, (size_t)m_cap * sizeof(NodeData));
+            _RYML_CB_FREE(m_callbacks, m_buf, NodeData, (size_t)m_cap);
+        }
+        id_type first = m_cap, del = cap - m_cap;
+        m_cap = cap;
+        m_buf = buf;
+        _clear_range(first, del);
+        if(m_free_head != NONE)
+        {
+            _RYML_CB_ASSERT(m_callbacks, m_buf != nullptr);
+            _RYML_CB_ASSERT(m_callbacks, m_free_tail != NONE);
+            m_buf[m_free_tail].m_next_sibling = first;
+            m_buf[first].m_prev_sibling = m_free_tail;
+            m_free_tail = cap-1;
+        }
+        else
+        {
+            _RYML_CB_ASSERT(m_callbacks, m_free_tail == NONE);
+            m_free_head = first;
+            m_free_tail = cap-1;
+        }
+        _RYML_CB_ASSERT(m_callbacks, m_free_head == NONE || (m_free_head >= 0 && m_free_head < cap));
+        _RYML_CB_ASSERT(m_callbacks, m_free_tail == NONE || (m_free_tail >= 0 && m_free_tail < cap));
+
+        if( ! m_size)
+            _claim_root();
+    }
+}
+
+
+//-----------------------------------------------------------------------------
+void Tree::clear()
+{
+    _clear_range(0, m_cap);
+    m_size = 0;
+    if(m_buf)
+    {
+        _RYML_CB_ASSERT(m_callbacks, m_cap >= 0);
+        m_free_head = 0;
+        m_free_tail = m_cap-1;
+        _claim_root();
+    }
+    else
+    {
+        m_free_head = NONE;
+        m_free_tail = NONE;
+    }
+    for(id_type i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i)
+        m_tag_directives[i] = {};
+}
+
+void Tree::_claim_root()
+{
+    id_type r = _claim();
+    _RYML_CB_ASSERT(m_callbacks, r == 0);
+    _set_hierarchy(r, NONE, NONE);
+}
+
+
+//-----------------------------------------------------------------------------
+void Tree::_clear_range(id_type first, id_type num)
+{
+    if(num == 0)
+        return; // prevent overflow when subtracting
+    _RYML_CB_ASSERT(m_callbacks, first >= 0 && first + num <= m_cap);
+    memset(m_buf + first, 0, (size_t)num * sizeof(NodeData)); // TODO we should not need this
+    for(id_type i = first, e = first + num; i < e; ++i)
+    {
+        _clear(i);
+        NodeData *n = m_buf + i;
+        n->m_prev_sibling = i - 1;
+        n->m_next_sibling = i + 1;
+    }
+    m_buf[first + num - 1].m_next_sibling = NONE;
+}
+
+C4_SUPPRESS_WARNING_GCC_POP
+
+
+//-----------------------------------------------------------------------------
+void Tree::_release(id_type i)
+{
+    _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap);
+
+    _rem_hierarchy(i);
+    _free_list_add(i);
+    _clear(i);
+
+    --m_size;
+}
+
+//-----------------------------------------------------------------------------
+// add to the front of the free list
+void Tree::_free_list_add(id_type i)
+{
+    _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap);
+    NodeData &C4_RESTRICT w = m_buf[i];
+
+    w.m_parent = NONE;
+    w.m_next_sibling = m_free_head;
+    w.m_prev_sibling = NONE;
+    if(m_free_head != NONE)
+        m_buf[m_free_head].m_prev_sibling = i;
+    m_free_head = i;
+    if(m_free_tail == NONE)
+        m_free_tail = m_free_head;
+}
+
+void Tree::_free_list_rem(id_type i)
+{
+    if(m_free_head == i)
+        m_free_head = _p(i)->m_next_sibling;
+    _rem_hierarchy(i);
+}
+
+//-----------------------------------------------------------------------------
+id_type Tree::_claim()
+{
+    if(m_free_head == NONE || m_buf == nullptr)
+    {
+        id_type sz = 2 * m_cap;
+        sz = sz ? sz : 16;
+        reserve(sz);
+        _RYML_CB_ASSERT(m_callbacks, m_free_head != NONE);
+    }
+
+    _RYML_CB_ASSERT(m_callbacks, m_size < m_cap);
+    _RYML_CB_ASSERT(m_callbacks, m_free_head >= 0 && m_free_head < m_cap);
+
+    id_type ichild = m_free_head;
+    NodeData *child = m_buf + ichild;
+
+    ++m_size;
+    m_free_head = child->m_next_sibling;
+    if(m_free_head == NONE)
+    {
+        m_free_tail = NONE;
+        _RYML_CB_ASSERT(m_callbacks, m_size == m_cap);
+    }
+
+    _clear(ichild);
+
+    return ichild;
+}
+
+//-----------------------------------------------------------------------------
+
+C4_SUPPRESS_WARNING_GCC_PUSH
+C4_SUPPRESS_WARNING_CLANG_PUSH
+C4_SUPPRESS_WARNING_CLANG("-Wnull-dereference")
+#if defined(__GNUC__)
+#if (__GNUC__ >= 6)
+C4_SUPPRESS_WARNING_GCC("-Wnull-dereference")
+#endif
+#if (__GNUC__ > 9)
+C4_SUPPRESS_WARNING_GCC("-Wanalyzer-fd-leak")
+#endif
+#endif
+
+void Tree::_set_hierarchy(id_type ichild, id_type iparent, id_type iprev_sibling)
+{
+    _RYML_CB_ASSERT(m_callbacks, ichild >= 0 && ichild < m_cap);
+    _RYML_CB_ASSERT(m_callbacks, iparent == NONE || (iparent >= 0 && iparent < m_cap));
+    _RYML_CB_ASSERT(m_callbacks, iprev_sibling == NONE || (iprev_sibling >= 0 && iprev_sibling < m_cap));
+
+    NodeData *C4_RESTRICT child = _p(ichild);
+
+    child->m_parent = iparent;
+    child->m_prev_sibling = NONE;
+    child->m_next_sibling = NONE;
+
+    if(iparent == NONE)
+    {
+        _RYML_CB_ASSERT(m_callbacks, ichild == 0);
+        _RYML_CB_ASSERT(m_callbacks, iprev_sibling == NONE);
+    }
+
+    if(iparent == NONE)
+        return;
+
+    id_type inext_sibling = iprev_sibling != NONE ? next_sibling(iprev_sibling) : first_child(iparent);
+    NodeData *C4_RESTRICT parent = get(iparent);
+    NodeData *C4_RESTRICT psib   = get(iprev_sibling);
+    NodeData *C4_RESTRICT nsib   = get(inext_sibling);
+
+    if(psib)
+    {
+        _RYML_CB_ASSERT(m_callbacks, next_sibling(iprev_sibling) == id(nsib));
+        child->m_prev_sibling = id(psib);
+        psib->m_next_sibling = id(child);
+        _RYML_CB_ASSERT(m_callbacks, psib->m_prev_sibling != psib->m_next_sibling || psib->m_prev_sibling == NONE);
+    }
+
+    if(nsib)
+    {
+        _RYML_CB_ASSERT(m_callbacks, prev_sibling(inext_sibling) == id(psib));
+        child->m_next_sibling = id(nsib);
+        nsib->m_prev_sibling = id(child);
+        _RYML_CB_ASSERT(m_callbacks, nsib->m_prev_sibling != nsib->m_next_sibling || nsib->m_prev_sibling == NONE);
+    }
+
+    if(parent->m_first_child == NONE)
+    {
+        _RYML_CB_ASSERT(m_callbacks, parent->m_last_child == NONE);
+        parent->m_first_child = id(child);
+        parent->m_last_child = id(child);
+    }
+    else
+    {
+        if(child->m_next_sibling == parent->m_first_child)
+            parent->m_first_child = id(child);
+
+        if(child->m_prev_sibling == parent->m_last_child)
+            parent->m_last_child = id(child);
+    }
+}
+
+C4_SUPPRESS_WARNING_GCC_POP
+C4_SUPPRESS_WARNING_CLANG_POP
+
+
+//-----------------------------------------------------------------------------
+void Tree::_rem_hierarchy(id_type i)
+{
+    _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap);
+
+    NodeData &C4_RESTRICT w = m_buf[i];
+
+    // remove from the parent
+    if(w.m_parent != NONE)
+    {
+        NodeData &C4_RESTRICT p = m_buf[w.m_parent];
+        if(p.m_first_child == i)
+        {
+            p.m_first_child = w.m_next_sibling;
+        }
+        if(p.m_last_child == i)
+        {
+            p.m_last_child = w.m_prev_sibling;
+        }
+    }
+
+    // remove from the used list
+    if(w.m_prev_sibling != NONE)
+    {
+        NodeData *C4_RESTRICT prev = get(w.m_prev_sibling);
+        prev->m_next_sibling = w.m_next_sibling;
+    }
+    if(w.m_next_sibling != NONE)
+    {
+        NodeData *C4_RESTRICT next = get(w.m_next_sibling);
+        next->m_prev_sibling = w.m_prev_sibling;
+    }
+}
+
+//-----------------------------------------------------------------------------
+id_type Tree::_do_reorder(id_type *node, id_type count)
+{
+    // swap this node if it's not in place
+    if(*node != count)
+    {
+        _swap(*node, count);
+        *node = count;
+    }
+    ++count; // bump the count from this node
+
+    // now descend in the hierarchy
+    for(id_type i = first_child(*node); i != NONE; i = next_sibling(i))
+    {
+        // this child may have been relocated to a different index,
+        // so get an updated version
+        count = _do_reorder(&i, count);
+    }
+    return count;
+}
+
+void Tree::reorder()
+{
+    id_type r = root_id();
+    _do_reorder(&r, 0);
+}
+
+
+//-----------------------------------------------------------------------------
+void Tree::_swap(id_type n_, id_type m_)
+{
+    _RYML_CB_ASSERT(m_callbacks, (parent(n_) != NONE) || type(n_) == NOTYPE);
+    _RYML_CB_ASSERT(m_callbacks, (parent(m_) != NONE) || type(m_) == NOTYPE);
+    NodeType tn = type(n_);
+    NodeType tm = type(m_);
+    if(tn != NOTYPE && tm != NOTYPE)
+    {
+        _swap_props(n_, m_);
+        _swap_hierarchy(n_, m_);
+    }
+    else if(tn == NOTYPE && tm != NOTYPE)
+    {
+        _copy_props(n_, m_);
+        _free_list_rem(n_);
+        _copy_hierarchy(n_, m_);
+        _clear(m_);
+        _free_list_add(m_);
+    }
+    else if(tn != NOTYPE && tm == NOTYPE)
+    {
+        _copy_props(m_, n_);
+        _free_list_rem(m_);
+        _copy_hierarchy(m_, n_);
+        _clear(n_);
+        _free_list_add(n_);
+    }
+    else
+    {
+        C4_NEVER_REACH();
+    }
+}
+
+//-----------------------------------------------------------------------------
+void Tree::_swap_hierarchy(id_type ia, id_type ib)
+{
+    if(ia == ib) return;
+
+    for(id_type i = first_child(ia); i != NONE; i = next_sibling(i))
+    {
+        if(i == ib || i == ia)
+            continue;
+        _p(i)->m_parent = ib;
+    }
+
+    for(id_type i = first_child(ib); i != NONE; i = next_sibling(i))
+    {
+        if(i == ib || i == ia)
+            continue;
+        _p(i)->m_parent = ia;
+    }
+
+    auto & C4_RESTRICT a  = *_p(ia);
+    auto & C4_RESTRICT b  = *_p(ib);
+    auto & C4_RESTRICT pa = *_p(a.m_parent);
+    auto & C4_RESTRICT pb = *_p(b.m_parent);
+
+    if(&pa == &pb)
+    {
+        if((pa.m_first_child == ib && pa.m_last_child == ia)
+            ||
+           (pa.m_first_child == ia && pa.m_last_child == ib))
+        {
+            std::swap(pa.m_first_child, pa.m_last_child);
+        }
+        else
+        {
+            bool changed = false;
+            if(pa.m_first_child == ia)
+            {
+                pa.m_first_child = ib;
+                changed = true;
+            }
+            if(pa.m_last_child  == ia)
+            {
+                pa.m_last_child = ib;
+                changed = true;
+            }
+            if(pb.m_first_child == ib && !changed)
+            {
+                pb.m_first_child = ia;
+            }
+            if(pb.m_last_child  == ib && !changed)
+            {
+                pb.m_last_child  = ia;
+            }
+        }
+    }
+    else
+    {
+        if(pa.m_first_child == ia)
+            pa.m_first_child = ib;
+        if(pa.m_last_child  == ia)
+            pa.m_last_child  = ib;
+        if(pb.m_first_child == ib)
+            pb.m_first_child = ia;
+        if(pb.m_last_child  == ib)
+            pb.m_last_child  = ia;
+    }
+    std::swap(a.m_first_child , b.m_first_child);
+    std::swap(a.m_last_child  , b.m_last_child);
+
+    if(a.m_prev_sibling != ib && b.m_prev_sibling != ia &&
+       a.m_next_sibling != ib && b.m_next_sibling != ia)
+    {
+        if(a.m_prev_sibling != NONE && a.m_prev_sibling != ib)
+            _p(a.m_prev_sibling)->m_next_sibling = ib;
+        if(a.m_next_sibling != NONE && a.m_next_sibling != ib)
+            _p(a.m_next_sibling)->m_prev_sibling = ib;
+        if(b.m_prev_sibling != NONE && b.m_prev_sibling != ia)
+            _p(b.m_prev_sibling)->m_next_sibling = ia;
+        if(b.m_next_sibling != NONE && b.m_next_sibling != ia)
+            _p(b.m_next_sibling)->m_prev_sibling = ia;
+        std::swap(a.m_prev_sibling, b.m_prev_sibling);
+        std::swap(a.m_next_sibling, b.m_next_sibling);
+    }
+    else
+    {
+        if(a.m_next_sibling == ib) // n will go after m
+        {
+            _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling == ia);
+            if(a.m_prev_sibling != NONE)
+            {
+                _RYML_CB_ASSERT(m_callbacks, a.m_prev_sibling != ib);
+                _p(a.m_prev_sibling)->m_next_sibling = ib;
+            }
+            if(b.m_next_sibling != NONE)
+            {
+                _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling != ia);
+                _p(b.m_next_sibling)->m_prev_sibling = ia;
+            }
+            id_type ns = b.m_next_sibling;
+            b.m_prev_sibling = a.m_prev_sibling;
+            b.m_next_sibling = ia;
+            a.m_prev_sibling = ib;
+            a.m_next_sibling = ns;
+        }
+        else if(a.m_prev_sibling == ib) // m will go after n
+        {
+            _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling == ia);
+            if(b.m_prev_sibling != NONE)
+            {
+                _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling != ia);
+                _p(b.m_prev_sibling)->m_next_sibling = ia;
+            }
+            if(a.m_next_sibling != NONE)
+            {
+                _RYML_CB_ASSERT(m_callbacks, a.m_next_sibling != ib);
+                _p(a.m_next_sibling)->m_prev_sibling = ib;
+            }
+            id_type ns = b.m_prev_sibling;
+            a.m_prev_sibling = b.m_prev_sibling;
+            a.m_next_sibling = ib;
+            b.m_prev_sibling = ia;
+            b.m_next_sibling = ns;
+        }
+        else
+        {
+            C4_NEVER_REACH();
+        }
+    }
+    _RYML_CB_ASSERT(m_callbacks, a.m_next_sibling != ia);
+    _RYML_CB_ASSERT(m_callbacks, a.m_prev_sibling != ia);
+    _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling != ib);
+    _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling != ib);
+
+    if(a.m_parent != ib && b.m_parent != ia)
+    {
+        std::swap(a.m_parent, b.m_parent);
+    }
+    else
+    {
+        if(a.m_parent == ib && b.m_parent != ia)
+        {
+            a.m_parent = b.m_parent;
+            b.m_parent = ia;
+        }
+        else if(a.m_parent != ib && b.m_parent == ia)
+        {
+            b.m_parent = a.m_parent;
+            a.m_parent = ib;
+        }
+        else
+        {
+            C4_NEVER_REACH();
+        }
+    }
+}
+
+//-----------------------------------------------------------------------------
+void Tree::_copy_hierarchy(id_type dst_, id_type src_)
+{
+    auto const& C4_RESTRICT src = *_p(src_);
+    auto      & C4_RESTRICT dst = *_p(dst_);
+    auto      & C4_RESTRICT prt = *_p(src.m_parent);
+    for(id_type i = src.m_first_child; i != NONE; i = next_sibling(i))
+    {
+        _p(i)->m_parent = dst_;
+    }
+    if(src.m_prev_sibling != NONE)
+    {
+        _p(src.m_prev_sibling)->m_next_sibling = dst_;
+    }
+    if(src.m_next_sibling != NONE)
+    {
+        _p(src.m_next_sibling)->m_prev_sibling = dst_;
+    }
+    if(prt.m_first_child == src_)
+    {
+        prt.m_first_child = dst_;
+    }
+    if(prt.m_last_child  == src_)
+    {
+        prt.m_last_child  = dst_;
+    }
+    dst.m_parent       = src.m_parent;
+    dst.m_first_child  = src.m_first_child;
+    dst.m_last_child   = src.m_last_child;
+    dst.m_prev_sibling = src.m_prev_sibling;
+    dst.m_next_sibling = src.m_next_sibling;
+}
+
+//-----------------------------------------------------------------------------
+void Tree::_swap_props(id_type n_, id_type m_)
+{
+    NodeData &C4_RESTRICT n = *_p(n_);
+    NodeData &C4_RESTRICT m = *_p(m_);
+    std::swap(n.m_type, m.m_type);
+    std::swap(n.m_key, m.m_key);
+    std::swap(n.m_val, m.m_val);
+}
+
+//-----------------------------------------------------------------------------
+void Tree::move(id_type node, id_type after)
+{
+    _RYML_CB_ASSERT(m_callbacks, node != NONE);
+    _RYML_CB_ASSERT(m_callbacks, node != after);
+    _RYML_CB_ASSERT(m_callbacks,  ! is_root(node));
+    _RYML_CB_ASSERT(m_callbacks, (after == NONE) || (has_sibling(node, after) && has_sibling(after, node)));
+
+    _rem_hierarchy(node);
+    _set_hierarchy(node, parent(node), after);
+}
+
+//-----------------------------------------------------------------------------
+
+void Tree::move(id_type node, id_type new_parent, id_type after)
+{
+    _RYML_CB_ASSERT(m_callbacks, node != NONE);
+    _RYML_CB_ASSERT(m_callbacks, node != after);
+    _RYML_CB_ASSERT(m_callbacks, new_parent != NONE);
+    _RYML_CB_ASSERT(m_callbacks, new_parent != node);
+    _RYML_CB_ASSERT(m_callbacks, new_parent != after);
+    _RYML_CB_ASSERT(m_callbacks,  ! is_root(node));
+
+    _rem_hierarchy(node);
+    _set_hierarchy(node, new_parent, after);
+}
+
+id_type Tree::move(Tree *src, id_type node, id_type new_parent, id_type after)
+{
+    _RYML_CB_ASSERT(m_callbacks, src != nullptr);
+    _RYML_CB_ASSERT(m_callbacks, node != NONE);
+    _RYML_CB_ASSERT(m_callbacks, new_parent != NONE);
+    _RYML_CB_ASSERT(m_callbacks, new_parent != after);
+
+    id_type dup = duplicate(src, node, new_parent, after);
+    src->remove(node);
+    return dup;
+}
+
+void Tree::set_root_as_stream()
+{
+    id_type root = root_id();
+    if(is_stream(root))
+        return;
+    // don't use _add_flags() because it's checked and will fail
+    if(!has_children(root))
+    {
+        if(is_val(root))
+        {
+            _p(root)->m_type.add(SEQ);
+            id_type next_doc = append_child(root);
+            _copy_props_wo_key(next_doc, root);
+            _p(next_doc)->m_type.add(DOC);
+            _p(next_doc)->m_type.rem(SEQ);
+        }
+        _p(root)->m_type = STREAM;
+        return;
+    }
+    _RYML_CB_ASSERT(m_callbacks, !has_key(root));
+    id_type next_doc = append_child(root);
+    _copy_props_wo_key(next_doc, root);
+    _add_flags(next_doc, DOC);
+    for(id_type prev = NONE, ch = first_child(root), next = next_sibling(ch); ch != NONE; )
+    {
+        if(ch == next_doc)
+            break;
+        move(ch, next_doc, prev);
+        prev = ch;
+        ch = next;
+        next = next_sibling(next);
+    }
+    _p(root)->m_type = STREAM;
+}
+
+
+//-----------------------------------------------------------------------------
+void Tree::remove_children(id_type node)
+{
+    _RYML_CB_ASSERT(m_callbacks, get(node) != nullptr);
+    id_type ich = get(node)->m_first_child;
+    while(ich != NONE)
+    {
+        remove_children(ich);
+        _RYML_CB_ASSERT(m_callbacks, get(ich) != nullptr);
+        id_type next = get(ich)->m_next_sibling;
+        _release(ich);
+        if(ich == get(node)->m_last_child)
+            break;
+        ich = next;
+    }
+}
+
+bool Tree::change_type(id_type node, NodeType type)
+{
+    _RYML_CB_ASSERT(m_callbacks, type.is_val() || type.is_map() || type.is_seq());
+    _RYML_CB_ASSERT(m_callbacks, type.is_val() + type.is_map() + type.is_seq() == 1);
+    _RYML_CB_ASSERT(m_callbacks, type.has_key() == has_key(node) || (has_key(node) && !type.has_key()));
+    NodeData *d = _p(node);
+    if(type.is_map() && is_map(node))
+        return false;
+    else if(type.is_seq() && is_seq(node))
+        return false;
+    else if(type.is_val() && is_val(node))
+        return false;
+    d->m_type = (d->m_type & (~(MAP|SEQ|VAL))) | type;
+    remove_children(node);
+    return true;
+}
+
+
+//-----------------------------------------------------------------------------
+id_type Tree::duplicate(id_type node, id_type parent, id_type after)
+{
+    return duplicate(this, node, parent, after);
+}
+
+id_type Tree::duplicate(Tree const* src, id_type node, id_type parent, id_type after)
+{
+    _RYML_CB_ASSERT(m_callbacks, src != nullptr);
+    _RYML_CB_ASSERT(m_callbacks, node != NONE);
+    _RYML_CB_ASSERT(m_callbacks, parent != NONE);
+    _RYML_CB_ASSERT(m_callbacks,  ! src->is_root(node));
+
+    id_type copy = _claim();
+
+    _copy_props(copy, src, node);
+    _set_hierarchy(copy, parent, after);
+    duplicate_children(src, node, copy, NONE);
+
+    return copy;
+}
+
+//-----------------------------------------------------------------------------
+id_type Tree::duplicate_children(id_type node, id_type parent, id_type after)
+{
+    return duplicate_children(this, node, parent, after);
+}
+
+id_type Tree::duplicate_children(Tree const* src, id_type node, id_type parent, id_type after)
+{
+    _RYML_CB_ASSERT(m_callbacks, src != nullptr);
+    _RYML_CB_ASSERT(m_callbacks, node != NONE);
+    _RYML_CB_ASSERT(m_callbacks, parent != NONE);
+    _RYML_CB_ASSERT(m_callbacks, after == NONE || has_child(parent, after));
+
+    id_type prev = after;
+    for(id_type i = src->first_child(node); i != NONE; i = src->next_sibling(i))
+    {
+        prev = duplicate(src, i, parent, prev);
+    }
+
+    return prev;
+}
+
+//-----------------------------------------------------------------------------
+void Tree::duplicate_contents(id_type node, id_type where)
+{
+    duplicate_contents(this, node, where);
+}
+
+void Tree::duplicate_contents(Tree const *src, id_type node, id_type where)
+{
+    _RYML_CB_ASSERT(m_callbacks, src != nullptr);
+    _RYML_CB_ASSERT(m_callbacks, node != NONE);
+    _RYML_CB_ASSERT(m_callbacks, where != NONE);
+    _copy_props_wo_key(where, src, node);
+    duplicate_children(src, node, where, last_child(where));
+}
+
+//-----------------------------------------------------------------------------
+id_type Tree::duplicate_children_no_rep(id_type node, id_type parent, id_type after)
+{
+    return duplicate_children_no_rep(this, node, parent, after);
+}
+
+id_type Tree::duplicate_children_no_rep(Tree const *src, id_type node, id_type parent, id_type after)
+{
+    _RYML_CB_ASSERT(m_callbacks, node != NONE);
+    _RYML_CB_ASSERT(m_callbacks, parent != NONE);
+    _RYML_CB_ASSERT(m_callbacks, after == NONE || has_child(parent, after));
+
+    // don't loop using pointers as there may be a relocation
+
+    // find the position where "after" is
+    id_type after_pos = NONE;
+    if(after != NONE)
+    {
+        for(id_type i = first_child(parent), icount = 0; i != NONE; ++icount, i = next_sibling(i))
+        {
+            if(i == after)
+            {
+                after_pos = icount;
+                break;
+            }
+        }
+        _RYML_CB_ASSERT(m_callbacks, after_pos != NONE);
+    }
+
+    // for each child to be duplicated...
+    id_type prev = after;
+    for(id_type i = src->first_child(node); i != NONE; i = src->next_sibling(i))
+    {
+        if(is_seq(parent))
+        {
+            prev = duplicate(i, parent, prev);
+        }
+        else
+        {
+            _RYML_CB_ASSERT(m_callbacks, is_map(parent));
+            // does the parent already have a node with key equal to that of the current duplicate?
+            id_type rep = NONE, rep_pos = NONE;
+            for(id_type j = first_child(parent), jcount = 0; j != NONE; ++jcount, j = next_sibling(j))
+            {
+                if(key(j) == key(i))
+                {
+                    rep = j;
+                    rep_pos = jcount;
+                    break;
+                }
+            }
+            if(rep == NONE) // there is no repetition; just duplicate
+            {
+                prev = duplicate(src, i, parent, prev);
+            }
+            else  // yes, there is a repetition
+            {
+                if(after_pos != NONE && rep_pos < after_pos)
+                {
+                    // rep is located before the node which will be inserted,
+                    // and will be overridden by the duplicate. So replace it.
+                    remove(rep);
+                    prev = duplicate(src, i, parent, prev);
+                }
+                else if(prev == NONE)
+                {
+                    // first iteration with prev = after = NONE and repetition
+                    prev = rep;
+                }
+                else if(rep != prev)
+                {
+                    // rep is located after the node which will be inserted
+                    // and overrides it. So move the rep into this node's place.
+                    move(rep, prev);
+                    prev = rep;
+                }
+            } // there's a repetition
+        }
+    }
+
+    return prev;
+}
+
+
+//-----------------------------------------------------------------------------
+
+void Tree::merge_with(Tree const *src, id_type src_node, id_type dst_node)
+{
+    _RYML_CB_ASSERT(m_callbacks, src != nullptr);
+    if(src_node == NONE)
+        src_node = src->root_id();
+    if(dst_node == NONE)
+        dst_node = root_id();
+    _RYML_CB_ASSERT(m_callbacks, src->has_val(src_node) || src->is_seq(src_node) || src->is_map(src_node));
+    if(src->has_val(src_node))
+    {
+        type_bits mask_src = ~STYLE; // keep the existing style if it is already a val
+        if( ! has_val(dst_node))
+        {
+            if(has_children(dst_node))
+                remove_children(dst_node);
+            mask_src |= VAL_STYLE; // copy the src style
+        }
+        if(src->is_keyval(src_node))
+        {
+            _copy_props(dst_node, src, src_node, mask_src);
+        }
+        else
+        {
+            _RYML_CB_ASSERT(m_callbacks, src->is_val(src_node));
+            _copy_props_wo_key(dst_node, src, src_node, mask_src);
+        }
+    }
+    else if(src->is_seq(src_node))
+    {
+        if( ! is_seq(dst_node))
+        {
+            if(has_children(dst_node))
+                remove_children(dst_node);
+            _clear_type(dst_node);
+            if(src->has_key(src_node))
+                to_seq(dst_node, src->key(src_node));
+            else
+                to_seq(dst_node);
+            _p(dst_node)->m_type = src->_p(src_node)->m_type;
+        }
+        for(id_type sch = src->first_child(src_node); sch != NONE; sch = src->next_sibling(sch))
+        {
+            id_type dch = append_child(dst_node);
+            _copy_props_wo_key(dch, src, sch);
+            merge_with(src, sch, dch);
+        }
+    }
+    else
+    {
+        _RYML_CB_ASSERT(m_callbacks, src->is_map(src_node));
+        if( ! is_map(dst_node))
+        {
+            if(has_children(dst_node))
+                remove_children(dst_node);
+            _clear_type(dst_node);
+            if(src->has_key(src_node))
+                to_map(dst_node, src->key(src_node));
+            else
+                to_map(dst_node);
+            _p(dst_node)->m_type = src->_p(src_node)->m_type;
+        }
+        for(id_type sch = src->first_child(src_node); sch != NONE; sch = src->next_sibling(sch))
+        {
+            id_type dch = find_child(dst_node, src->key(sch));
+            if(dch == NONE)
+            {
+                dch = append_child(dst_node);
+                _copy_props(dch, src, sch);
+            }
+            merge_with(src, sch, dch);
+        }
+    }
+}
+
+
+//-----------------------------------------------------------------------------
+
+void Tree::resolve()
+{
+    if(m_size == 0)
+        return;
+    ReferenceResolver rr;
+    resolve(&rr);
+}
+
+void Tree::resolve(ReferenceResolver *C4_RESTRICT rr)
+{
+    if(m_size == 0)
+        return;
+    rr->resolve(this);
+}
+
+
+//-----------------------------------------------------------------------------
+
+id_type Tree::num_children(id_type node) const
+{
+    id_type count = 0;
+    for(id_type i = first_child(node); i != NONE; i = next_sibling(i))
+        ++count;
+    return count;
+}
+
+id_type Tree::child(id_type node, id_type pos) const
+{
+    _RYML_CB_ASSERT(m_callbacks, node != NONE);
+    id_type count = 0;
+    for(id_type i = first_child(node); i != NONE; i = next_sibling(i))
+    {
+        if(count++ == pos)
+            return i;
+    }
+    return NONE;
+}
+
+id_type Tree::child_pos(id_type node, id_type ch) const
+{
+    _RYML_CB_ASSERT(m_callbacks, node != NONE);
+    id_type count = 0;
+    for(id_type i = first_child(node); i != NONE; i = next_sibling(i))
+    {
+        if(i == ch)
+            return count;
+        ++count;
+    }
+    return NONE;
+}
+
+#if defined(__clang__)
+#   pragma clang diagnostic push
+#   pragma GCC diagnostic ignored "-Wnull-dereference"
+#elif defined(__GNUC__)
+#   pragma GCC diagnostic push
+#   if __GNUC__ >= 6
+#       pragma GCC diagnostic ignored "-Wnull-dereference"
+#   endif
+#   if __GNUC__ > 9
+#       pragma GCC diagnostic ignored "-Wanalyzer-null-dereference"
+#   endif
+#endif
+
+id_type Tree::find_child(id_type node, csubstr const& name) const
+{
+    _RYML_CB_ASSERT(m_callbacks, node != NONE);
+    _RYML_CB_ASSERT(m_callbacks, is_map(node));
+    if(get(node)->m_first_child == NONE)
+    {
+        _RYML_CB_ASSERT(m_callbacks, _p(node)->m_last_child == NONE);
+        return NONE;
+    }
+    else
+    {
+        _RYML_CB_ASSERT(m_callbacks, _p(node)->m_last_child != NONE);
+    }
+    for(id_type i = first_child(node); i != NONE; i = next_sibling(i))
+    {
+        if(_p(i)->m_key.scalar == name)
+        {
+            return i;
+        }
+    }
+    return NONE;
+}
+
+#if defined(__clang__)
+#   pragma clang diagnostic pop
+#elif defined(__GNUC__)
+#   pragma GCC diagnostic pop
+#endif
+
+namespace {
+id_type depth_desc_(Tree const& C4_RESTRICT t, id_type id, id_type currdepth=0, id_type maxdepth=0)
+{
+    maxdepth = currdepth > maxdepth ? currdepth : maxdepth;
+    for(id_type child = t.first_child(id); child != NONE; child = t.next_sibling(child))
+    {
+        const id_type d = depth_desc_(t, child, currdepth+1, maxdepth);
+        maxdepth = d > maxdepth ? d : maxdepth;
+    }
+    return maxdepth;
+}
+}
+
+id_type Tree::depth_desc(id_type node) const
+{
+    _RYML_CB_ASSERT(m_callbacks, node != NONE);
+    return depth_desc_(*this, node);
+}
+
+id_type Tree::depth_asc(id_type node) const
+{
+    _RYML_CB_ASSERT(m_callbacks, node != NONE);
+    id_type depth = 0;
+    while(!is_root(node))
+    {
+        ++depth;
+        node = parent(node);
+    }
+    return depth;
+}
+
+
+//-----------------------------------------------------------------------------
+
+void Tree::to_val(id_type node, csubstr val, type_bits more_flags)
+{
+    _RYML_CB_ASSERT(m_callbacks,  ! has_children(node));
+    _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || ! parent_is_map(node));
+    _set_flags(node, VAL|more_flags);
+    _p(node)->m_key.clear();
+    _p(node)->m_val = val;
+}
+
+void Tree::to_keyval(id_type node, csubstr key, csubstr val, type_bits more_flags)
+{
+    _RYML_CB_ASSERT(m_callbacks,  ! has_children(node));
+    _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node));
+    _set_flags(node, KEYVAL|more_flags);
+    _p(node)->m_key = key;
+    _p(node)->m_val = val;
+}
+
+void Tree::to_map(id_type node, type_bits more_flags)
+{
+    _RYML_CB_ASSERT(m_callbacks,  ! has_children(node));
+    _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || ! parent_is_map(node)); // parent must not have children with keys
+    _set_flags(node, MAP|more_flags);
+    _p(node)->m_key.clear();
+    _p(node)->m_val.clear();
+}
+
+void Tree::to_map(id_type node, csubstr key, type_bits more_flags)
+{
+    _RYML_CB_ASSERT(m_callbacks,  ! has_children(node));
+    _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node));
+    _set_flags(node, KEY|MAP|more_flags);
+    _p(node)->m_key = key;
+    _p(node)->m_val.clear();
+}
+
+void Tree::to_seq(id_type node, type_bits more_flags)
+{
+    _RYML_CB_ASSERT(m_callbacks,  ! has_children(node));
+    _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_seq(node));
+    _set_flags(node, SEQ|more_flags);
+    _p(node)->m_key.clear();
+    _p(node)->m_val.clear();
+}
+
+void Tree::to_seq(id_type node, csubstr key, type_bits more_flags)
+{
+    _RYML_CB_ASSERT(m_callbacks,  ! has_children(node));
+    _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node));
+    _set_flags(node, KEY|SEQ|more_flags);
+    _p(node)->m_key = key;
+    _p(node)->m_val.clear();
+}
+
+void Tree::to_doc(id_type node, type_bits more_flags)
+{
+    _RYML_CB_ASSERT(m_callbacks,  ! has_children(node));
+    _set_flags(node, DOC|more_flags);
+    _p(node)->m_key.clear();
+    _p(node)->m_val.clear();
+}
+
+void Tree::to_stream(id_type node, type_bits more_flags)
+{
+    _RYML_CB_ASSERT(m_callbacks,  ! has_children(node));
+    _set_flags(node, STREAM|more_flags);
+    _p(node)->m_key.clear();
+    _p(node)->m_val.clear();
+}
+
+
+//-----------------------------------------------------------------------------
+id_type Tree::num_tag_directives() const
+{
+    // this assumes we have a very small number of tag directives
+    for(id_type i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i)
+        if(m_tag_directives[i].handle.empty())
+            return i;
+    return RYML_MAX_TAG_DIRECTIVES;
+}
+
+void Tree::clear_tag_directives()
+{
+    for(TagDirective &td : m_tag_directives)
+        td = {};
+}
+
+id_type Tree::add_tag_directive(TagDirective const& td)
+{
+    _RYML_CB_CHECK(m_callbacks, !td.handle.empty());
+    _RYML_CB_CHECK(m_callbacks, !td.prefix.empty());
+    _RYML_CB_CHECK(m_callbacks, td.handle.begins_with('!'));
+    _RYML_CB_CHECK(m_callbacks, td.handle.ends_with('!'));
+    // https://yaml.org/spec/1.2.2/#rule-ns-word-char
+    _RYML_CB_CHECK(m_callbacks, td.handle == '!' || td.handle == "!!" || td.handle.trim('!').first_not_of("01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-") == npos);
+    id_type pos = num_tag_directives();
+    _RYML_CB_CHECK(m_callbacks, pos < RYML_MAX_TAG_DIRECTIVES);
+    m_tag_directives[pos] = td;
+    return pos;
+}
+
+bool Tree::add_tag_directive(csubstr directive_)
+{
+    TagDirective td;
+    if(td.create_from_str(directive_, this))
+    {
+        add_tag_directive(td);
+        return true;
+    }
+    return false;
+}
+
+size_t Tree::resolve_tag(substr output, csubstr tag, id_type node_id) const
+{
+    // lookup from the end. We want to find the first directive that
+    // matches the tag and has a target node id leq than the given
+    // node_id.
+    for(id_type i = RYML_MAX_TAG_DIRECTIVES-1; i != (id_type)-1; --i)
+    {
+        auto const& td = m_tag_directives[i];
+        if(td.handle.empty())
+            continue;
+        if(tag.begins_with(td.handle) && td.next_node_id <= node_id)
+            return td.transform(tag, output, m_callbacks);
+    }
+    if(tag.begins_with('!'))
+    {
+        if(is_custom_tag(tag))
+        {
+            _RYML_CB_ERR(m_callbacks, "tag directive not found");
+        }
+    }
+    return 0; // return 0 to signal that the tag is local and cannot be resolved
+}
+
+namespace {
+csubstr _transform_tag(Tree *t, csubstr tag, id_type node)
+{
+    _c4dbgpf("[{}] resolving tag ~~~{}~~~", node, tag);
+    size_t required_size = t->resolve_tag(substr{}, tag, node);
+    if(!required_size)
+    {
+        if(tag.begins_with("!<"))
+            tag = tag.sub(1);
+        _c4dbgpf("[{}] resolved tag: ~~~{}~~~", node, tag);
+        return tag;
+    }
+    const char *prev_arena = t->arena().str;(void)prev_arena;
+    substr buf = t->alloc_arena(required_size);
+    _RYML_CB_ASSERT(t->m_callbacks, t->arena().str == prev_arena);
+    size_t actual_size = t->resolve_tag(buf, tag, node);
+    _RYML_CB_ASSERT(t->m_callbacks, actual_size <= required_size);
+    _c4dbgpf("[{}] resolved tag: ~~~{}~~~", node, buf.first(actual_size));
+    return buf.first(actual_size);
+}
+void _resolve_tags(Tree *t, id_type node)
+{
+    NodeData *C4_RESTRICT d = t->_p(node);
+    if(d->m_type & KEYTAG)
+        d->m_key.tag = _transform_tag(t, d->m_key.tag, node);
+    if(d->m_type & VALTAG)
+        d->m_val.tag = _transform_tag(t, d->m_val.tag, node);
+    for(id_type child = t->first_child(node); child != NONE; child = t->next_sibling(child))
+        _resolve_tags(t, child);
+}
+size_t _count_resolved_tags_size(Tree const* t, id_type node)
+{
+    size_t sz = 0;
+    NodeData const* C4_RESTRICT d = t->_p(node);
+    if(d->m_type & KEYTAG)
+        sz += t->resolve_tag(substr{}, d->m_key.tag, node);
+    if(d->m_type & VALTAG)
+        sz += t->resolve_tag(substr{}, d->m_val.tag, node);
+    for(id_type child = t->first_child(node); child != NONE; child = t->next_sibling(child))
+        sz += _count_resolved_tags_size(t, child);
+    return sz;
+}
+void _normalize_tags(Tree *t, id_type node)
+{
+    NodeData *C4_RESTRICT d = t->_p(node);
+    if(d->m_type & KEYTAG)
+        d->m_key.tag = normalize_tag(d->m_key.tag);
+    if(d->m_type & VALTAG)
+        d->m_val.tag = normalize_tag(d->m_val.tag);
+    for(id_type child = t->first_child(node); child != NONE; child = t->next_sibling(child))
+        _normalize_tags(t, child);
+}
+void _normalize_tags_long(Tree *t, id_type node)
+{
+    NodeData *C4_RESTRICT d = t->_p(node);
+    if(d->m_type & KEYTAG)
+        d->m_key.tag = normalize_tag_long(d->m_key.tag);
+    if(d->m_type & VALTAG)
+        d->m_val.tag = normalize_tag_long(d->m_val.tag);
+    for(id_type child = t->first_child(node); child != NONE; child = t->next_sibling(child))
+        _normalize_tags_long(t, child);
+}
+} // namespace
+
+void Tree::resolve_tags()
+{
+    if(empty())
+        return;
+    size_t needed_size = _count_resolved_tags_size(this, root_id());
+    if(needed_size)
+        reserve_arena(arena_size() + needed_size);
+    _resolve_tags(this, root_id());
+}
+
+void Tree::normalize_tags()
+{
+    if(empty())
+        return;
+    _normalize_tags(this, root_id());
+}
+
+void Tree::normalize_tags_long()
+{
+    if(empty())
+        return;
+    _normalize_tags_long(this, root_id());
+}
+
+
+//-----------------------------------------------------------------------------
+
+csubstr Tree::lookup_result::resolved() const
+{
+    csubstr p = path.first(path_pos);
+    if(p.ends_with('.'))
+        p = p.first(p.len-1);
+    return p;
+}
+
+csubstr Tree::lookup_result::unresolved() const
+{
+    return path.sub(path_pos);
+}
+
+void Tree::_advance(lookup_result *r, size_t more) const
+{
+    r->path_pos += more;
+    if(r->path.sub(r->path_pos).begins_with('.'))
+        ++r->path_pos;
+}
+
+Tree::lookup_result Tree::lookup_path(csubstr path, id_type start) const
+{
+    if(start == NONE)
+        start = root_id();
+    lookup_result r(path, start);
+    if(path.empty())
+        return r;
+    _lookup_path(&r);
+    if(r.target == NONE && r.closest == start)
+        r.closest = NONE;
+    return r;
+}
+
+id_type Tree::lookup_path_or_modify(csubstr default_value, csubstr path, id_type start)
+{
+    id_type target = _lookup_path_or_create(path, start);
+    if(parent_is_map(target))
+        to_keyval(target, key(target), default_value);
+    else
+        to_val(target, default_value);
+    return target;
+}
+
+id_type Tree::lookup_path_or_modify(Tree const *src, id_type src_node, csubstr path, id_type start)
+{
+    id_type target = _lookup_path_or_create(path, start);
+    merge_with(src, src_node, target);
+    return target;
+}
+
+id_type Tree::_lookup_path_or_create(csubstr path, id_type start)
+{
+    if(start == NONE)
+        start = root_id();
+    lookup_result r(path, start);
+    _lookup_path(&r);
+    if(r.target != NONE)
+    {
+        C4_ASSERT(r.unresolved().empty());
+        return r.target;
+    }
+    _lookup_path_modify(&r);
+    return r.target;
+}
+
+void Tree::_lookup_path(lookup_result *r) const
+{
+    C4_ASSERT( ! r->unresolved().empty());
+    _lookup_path_token parent{"", type(r->closest)};
+    id_type node;
+    do
+    {
+        node = _next_node(r, &parent);
+        if(node != NONE)
+            r->closest = node;
+        if(r->unresolved().empty())
+        {
+            r->target = node;
+            return;
+        }
+    } while(node != NONE);
+}
+
+void Tree::_lookup_path_modify(lookup_result *r)
+{
+    C4_ASSERT( ! r->unresolved().empty());
+    _lookup_path_token parent{"", type(r->closest)};
+    id_type node;
+    do
+    {
+        node = _next_node_modify(r, &parent);
+        if(node != NONE)
+            r->closest = node;
+        if(r->unresolved().empty())
+        {
+            r->target = node;
+            return;
+        }
+    } while(node != NONE);
+}
+
+id_type Tree::_next_node(lookup_result * r, _lookup_path_token *parent) const
+{
+    _lookup_path_token token = _next_token(r, *parent);
+    if( ! token)
+        return NONE;
+
+    id_type node = NONE;
+    csubstr prev = token.value;
+    if(token.type == MAP || token.type == SEQ)
+    {
+        _RYML_CB_ASSERT(m_callbacks, !token.value.begins_with('['));
+        //_RYML_CB_ASSERT(m_callbacks, is_container(r->closest) || r->closest == NONE);
+        _RYML_CB_ASSERT(m_callbacks, is_map(r->closest));
+        node = find_child(r->closest, token.value);
+    }
+    else if(token.type == KEYVAL)
+    {
+        _RYML_CB_ASSERT(m_callbacks, r->unresolved().empty());
+        if(is_map(r->closest))
+            node = find_child(r->closest, token.value);
+    }
+    else if(token.type == KEY)
+    {
+        _RYML_CB_ASSERT(m_callbacks, token.value.begins_with('[') && token.value.ends_with(']'));
+        token.value = token.value.offs(1, 1).trim(' ');
+        id_type idx = 0;
+        _RYML_CB_CHECK(m_callbacks, from_chars(token.value, &idx));
+        node = child(r->closest, idx);
+    }
+    else
+    {
+        C4_NEVER_REACH();
+    }
+
+    if(node != NONE)
+    {
+        *parent = token;
+    }
+    else
+    {
+        csubstr p = r->path.sub(r->path_pos > 0 ? r->path_pos - 1 : r->path_pos);
+        r->path_pos -= prev.len;
+        if(p.begins_with('.'))
+            r->path_pos -= 1u;
+    }
+
+    return node;
+}
+
+id_type Tree::_next_node_modify(lookup_result * r, _lookup_path_token *parent)
+{
+    _lookup_path_token token = _next_token(r, *parent);
+    if( ! token)
+        return NONE;
+
+    id_type node = NONE;
+    if(token.type == MAP || token.type == SEQ)
+    {
+        _RYML_CB_ASSERT(m_callbacks, !token.value.begins_with('['));
+        //_RYML_CB_ASSERT(m_callbacks, is_container(r->closest) || r->closest == NONE);
+        if( ! is_container(r->closest))
+        {
+            if(has_key(r->closest))
+                to_map(r->closest, key(r->closest));
+            else
+                to_map(r->closest);
+        }
+        else
+        {
+            if(is_map(r->closest))
+                node = find_child(r->closest, token.value);
+            else
+            {
+                id_type pos = NONE;
+                _RYML_CB_CHECK(m_callbacks, c4::atox(token.value, &pos));
+                _RYML_CB_ASSERT(m_callbacks, pos != NONE);
+                node = child(r->closest, pos);
+            }
+        }
+        if(node == NONE)
+        {
+            _RYML_CB_ASSERT(m_callbacks, is_map(r->closest));
+            node = append_child(r->closest);
+            NodeData *n = _p(node);
+            n->m_key.scalar = token.value;
+            n->m_type.add(KEY);
+        }
+    }
+    else if(token.type == KEYVAL)
+    {
+        _RYML_CB_ASSERT(m_callbacks, r->unresolved().empty());
+        if(is_map(r->closest))
+        {
+            node = find_child(r->closest, token.value);
+            if(node == NONE)
+                node = append_child(r->closest);
+        }
+        else
+        {
+            _RYML_CB_ASSERT(m_callbacks, !is_seq(r->closest));
+            _add_flags(r->closest, MAP);
+            node = append_child(r->closest);
+        }
+        NodeData *n = _p(node);
+        n->m_key.scalar = token.value;
+        n->m_val.scalar = "";
+        n->m_type.add(KEYVAL);
+    }
+    else if(token.type == KEY)
+    {
+        _RYML_CB_ASSERT(m_callbacks, token.value.begins_with('[') && token.value.ends_with(']'));
+        token.value = token.value.offs(1, 1).trim(' ');
+        id_type idx;
+        if( ! from_chars(token.value, &idx))
+             return NONE;
+        if( ! is_container(r->closest))
+        {
+            if(has_key(r->closest))
+            {
+                csubstr k = key(r->closest);
+                _clear_type(r->closest);
+                to_seq(r->closest, k);
+            }
+            else
+            {
+                _clear_type(r->closest);
+                to_seq(r->closest);
+            }
+        }
+        _RYML_CB_ASSERT(m_callbacks, is_container(r->closest));
+        node = child(r->closest, idx);
+        if(node == NONE)
+        {
+            _RYML_CB_ASSERT(m_callbacks, num_children(r->closest) <= idx);
+            for(id_type i = num_children(r->closest); i <= idx; ++i)
+            {
+                node = append_child(r->closest);
+                if(i < idx)
+                {
+                    if(is_map(r->closest))
+                        to_keyval(node, /*"~"*/{}, /*"~"*/{});
+                    else if(is_seq(r->closest))
+                        to_val(node, /*"~"*/{});
+                }
+            }
+        }
+    }
+    else
+    {
+        C4_NEVER_REACH();
+    }
+
+    _RYML_CB_ASSERT(m_callbacks, node != NONE);
+    *parent = token;
+    return node;
+}
+
+/* types of tokens:
+ * - seeing "map."  ---> "map"/MAP
+ * - finishing "scalar" ---> "scalar"/KEYVAL
+ * - seeing "seq[n]" ---> "seq"/SEQ (--> "[n]"/KEY)
+ * - seeing "[n]" ---> "[n]"/KEY
+ */
+Tree::_lookup_path_token Tree::_next_token(lookup_result *r, _lookup_path_token const& parent) const
+{
+    csubstr unres = r->unresolved();
+    if(unres.empty())
+        return {};
+
+    // is it an indexation like [0], [1], etc?
+    if(unres.begins_with('['))
+    {
+        size_t pos = unres.find(']');
+        if(pos == csubstr::npos)
+            return {};
+        csubstr idx = unres.first(pos + 1);
+        _advance(r, pos + 1);
+        return {idx, KEY};
+    }
 
-#ifdef RYML_DBG
-    template<class ...Args> void _dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const;
-#endif
-    template<class ...Args> void _err(csubstr fmt, Args const& C4_RESTRICT ...args) const;
-    template<class DumpFn>  void _fmt_msg(DumpFn &&dumpfn) const;
-    static csubstr _prfl(substr buf, flag_t v);
+    // no. so it must be a name
+    size_t pos = unres.first_of(".[");
+    if(pos == csubstr::npos)
+    {
+        _advance(r, unres.len);
+        NodeType t;
+        if(( ! parent) || parent.type.is_seq())
+            return {unres, VAL};
+        return {unres, KEYVAL};
+    }
 
-private:
+    // it's either a map or a seq
+    _RYML_CB_ASSERT(m_callbacks, unres[pos] == '.' || unres[pos] == '[');
+    if(unres[pos] == '.')
+    {
+        _RYML_CB_ASSERT(m_callbacks, pos != 0);
+        _advance(r, pos + 1);
+        return {unres.first(pos), MAP};
+    }
 
-    csubstr m_file;
-     substr m_buf;
+    _RYML_CB_ASSERT(m_callbacks, unres[pos] == '[');
+    _advance(r, pos);
+    return {unres.first(pos), SEQ};
+}
 
-    size_t  m_root_id;
-    Tree *  m_tree;
 
-    detail::stack<State> m_stack;
-    State * m_state;
+} // namespace ryml
+} // namespace c4
 
-    size_t  m_key_tag_indentation;
-    size_t  m_key_tag2_indentation;
-    csubstr m_key_tag;
-    csubstr m_key_tag2;
-    size_t  m_val_tag_indentation;
-    csubstr m_val_tag;
 
-    bool    m_key_anchor_was_before;
-    size_t  m_key_anchor_indentation;
-    csubstr m_key_anchor;
-    size_t  m_val_anchor_indentation;
-    csubstr m_val_anchor;
+C4_SUPPRESS_WARNING_GCC_CLANG_POP
+C4_SUPPRESS_WARNING_MSVC_POP
 
-    substr m_filter_arena;
+#endif /* RYML_SINGLE_HDR_DEFINE_NOW */
 
-    mutable size_t *m_newline_offsets;
-    mutable size_t  m_newline_offsets_size;
-    mutable size_t  m_newline_offsets_capacity;
-    mutable csubstr m_newline_offsets_buf;
-};
 
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/tree.cpp)
 
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
 
-/** @name parse_in_place
- *
- * @desc parse a mutable YAML source buffer.
- *
- * @note These freestanding functions use a temporary parser object,
- * and are convenience functions to easily parse YAML without the need
- * to instantiate a separate parser. Note that some properties
- * (notably node locations in the original source code) are only
- * available through the parser object after it has parsed the
- * code. If you need access to any of these properties, use
- * Parser::parse_in_place() */
-/** @{ */
 
-inline Tree parse_in_place(                  substr yaml                         ) { Parser np; return np.parse_in_place({}      , yaml); } //!< parse in-situ a modifiable YAML source buffer.
-inline Tree parse_in_place(csubstr filename, substr yaml                         ) { Parser np; return np.parse_in_place(filename, yaml); } //!< parse in-situ a modifiable YAML source buffer, providing a filename for error messages.
-inline void parse_in_place(                  substr yaml, Tree *t                ) { Parser np; np.parse_in_place({}      , yaml, t); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer
-inline void parse_in_place(csubstr filename, substr yaml, Tree *t                ) { Parser np; np.parse_in_place(filename, yaml, t); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages.
-inline void parse_in_place(                  substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place({}      , yaml, t, node_id); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer
-inline void parse_in_place(csubstr filename, substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place(filename, yaml, t, node_id); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages.
-inline void parse_in_place(                  substr yaml, NodeRef node           ) { Parser np; np.parse_in_place({}      , yaml, node); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer
-inline void parse_in_place(csubstr filename, substr yaml, NodeRef node           ) { Parser np; np.parse_in_place(filename, yaml, node); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages.
-
-RYML_DEPRECATED("use parse_in_place() instead") inline Tree parse(                  substr yaml                         ) { Parser np; return np.parse_in_place({}      , yaml); }
-RYML_DEPRECATED("use parse_in_place() instead") inline Tree parse(csubstr filename, substr yaml                         ) { Parser np; return np.parse_in_place(filename, yaml); }
-RYML_DEPRECATED("use parse_in_place() instead") inline void parse(                  substr yaml, Tree *t                ) { Parser np; np.parse_in_place({}      , yaml, t); }
-RYML_DEPRECATED("use parse_in_place() instead") inline void parse(csubstr filename, substr yaml, Tree *t                ) { Parser np; np.parse_in_place(filename, yaml, t); }
-RYML_DEPRECATED("use parse_in_place() instead") inline void parse(                  substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place({}      , yaml, t, node_id); }
-RYML_DEPRECATED("use parse_in_place() instead") inline void parse(csubstr filename, substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place(filename, yaml, t, node_id); }
-RYML_DEPRECATED("use parse_in_place() instead") inline void parse(                  substr yaml, NodeRef node           ) { Parser np; np.parse_in_place({}      , yaml, node); }
-RYML_DEPRECATED("use parse_in_place() instead") inline void parse(csubstr filename, substr yaml, NodeRef node           ) { Parser np; np.parse_in_place(filename, yaml, node); }
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/parse_engine.def.hpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/parse_engine.def.hpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
 
-/** @} */
+#ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
+#define _C4_YML_PARSE_ENGINE_DEF_HPP_
 
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/parse_engine.hpp
+//#include "c4/yml/parse_engine.hpp"
+#if !defined(C4_YML_PARSE_ENGINE_HPP_) && !defined(_C4_YML_PARSE_ENGINE_HPP_)
+#error "amalgamate: file c4/yml/parse_engine.hpp must have been included at this point"
+#endif /* C4_YML_PARSE_ENGINE_HPP_ */
 
-//-----------------------------------------------------------------------------
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/error.hpp
+//#include "c4/error.hpp"
+#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_)
+#error "amalgamate: file c4/error.hpp must have been included at this point"
+#endif /* C4_ERROR_HPP_ */
 
-/** @name parse_in_arena
- * @desc parse a read-only YAML source buffer, copying it first to the tree's arena.
- *
- * @note These freestanding functions use a temporary parser object,
- * and are convenience functions to easily parse YAML without the need
- * to instantiate a separate parser. Note that some properties
- * (notably node locations in the original source code) are only
- * available through the parser object after it has parsed the
- * code. If you need access to any of these properties, use
- * Parser::parse_in_arena().
- *
- * @note overloads receiving a substr YAML buffer are intentionally
- * left undefined, such that calling parse_in_arena() with a substr
- * will cause a linker error. This is to prevent an accidental
- * copy of the source buffer to the tree's arena, because substr
- * is implicitly convertible to csubstr. If you really intend to parse
- * a mutable buffer in the tree's arena, convert it first to immutable
- * by assigning the substr to a csubstr prior to calling parse_in_arena().
- * This is not needed for parse_in_place() because csubstr is not
- * implicitly convertible to substr. */
-/** @{ */
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/charconv.hpp
+//#include "c4/charconv.hpp"
+#if !defined(C4_CHARCONV_HPP_) && !defined(_C4_CHARCONV_HPP_)
+#error "amalgamate: file c4/charconv.hpp must have been included at this point"
+#endif /* C4_CHARCONV_HPP_ */
 
-/* READ THE NOTE ABOVE! */
-RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(                  substr yaml                         );
-RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(csubstr filename, substr yaml                         );
-RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(                  substr yaml, Tree *t                );
-RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, Tree *t                );
-RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(                  substr yaml, Tree *t, size_t node_id);
-RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, Tree *t, size_t node_id);
-RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(                  substr yaml, NodeRef node           );
-RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, NodeRef node           );
-
-inline Tree parse_in_arena(                  csubstr yaml                         ) { Parser np; return np.parse_in_arena({}      , yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena.
-inline Tree parse_in_arena(csubstr filename, csubstr yaml                         ) { Parser np; return np.parse_in_arena(filename, yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages.
-inline void parse_in_arena(                  csubstr yaml, Tree *t                ) { Parser np; np.parse_in_arena({}      , yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena.
-inline void parse_in_arena(csubstr filename, csubstr yaml, Tree *t                ) { Parser np; np.parse_in_arena(filename, yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages.
-inline void parse_in_arena(                  csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena({}      , yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena.
-inline void parse_in_arena(csubstr filename, csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena(filename, yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages.
-inline void parse_in_arena(                  csubstr yaml, NodeRef node           ) { Parser np; np.parse_in_arena({}      , yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena.
-inline void parse_in_arena(csubstr filename, csubstr yaml, NodeRef node           ) { Parser np; np.parse_in_arena(filename, yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages.
-
-RYML_DEPRECATED("use parse_in_arena() instead") inline Tree parse(                  csubstr yaml                         ) { Parser np; return np.parse_in_arena({}      , yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena.
-RYML_DEPRECATED("use parse_in_arena() instead") inline Tree parse(csubstr filename, csubstr yaml                         ) { Parser np; return np.parse_in_arena(filename, yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages.
-RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(                  csubstr yaml, Tree *t                ) { Parser np; np.parse_in_arena({}      , yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena.
-RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(csubstr filename, csubstr yaml, Tree *t                ) { Parser np; np.parse_in_arena(filename, yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages.
-RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(                  csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena({}      , yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena.
-RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(csubstr filename, csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena(filename, yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages.
-RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(                  csubstr yaml, NodeRef node           ) { Parser np; np.parse_in_arena({}      , yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena.
-RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(csubstr filename, csubstr yaml, NodeRef node           ) { Parser np; np.parse_in_arena(filename, yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages.
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/utf.hpp
+//#include "c4/utf.hpp"
+#if !defined(C4_UTF_HPP_) && !defined(_C4_UTF_HPP_)
+#error "amalgamate: file c4/utf.hpp must have been included at this point"
+#endif /* C4_UTF_HPP_ */
 
-/** @} */
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/dump.hpp
+//#include <c4/dump.hpp>
+#if !defined(C4_DUMP_HPP_) && !defined(_C4_DUMP_HPP_)
+#error "amalgamate: file c4/dump.hpp must have been included at this point"
+#endif /* C4_DUMP_HPP_ */
 
-} // namespace yml
-} // namespace c4
 
-#if defined(_MSC_VER)
-#   pragma warning(pop)
-#endif
+//included above:
+//#include <ctype.h>
 
-#endif /* _C4_YML_PARSE_HPP_ */
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp
+//#include "c4/yml/detail/parser_dbg.hpp"
+#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_)
+#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point"
+#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */
 
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/filter_processor.hpp
+//#include "c4/yml/filter_processor.hpp"
+#if !defined(C4_YML_FILTER_PROCESSOR_HPP_) && !defined(_C4_YML_FILTER_PROCESSOR_HPP_)
+#error "amalgamate: file c4/yml/filter_processor.hpp must have been included at this point"
+#endif /* C4_YML_FILTER_PROCESSOR_HPP_ */
 
-// (end https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp)
+#ifdef RYML_DBG
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/print.hpp
+//#include "c4/yml/detail/print.hpp"
+#if !defined(C4_YML_DETAIL_PRINT_HPP_) && !defined(_C4_YML_DETAIL_PRINT_HPP_)
+#error "amalgamate: file c4/yml/detail/print.hpp must have been included at this point"
+#endif /* C4_YML_DETAIL_PRINT_HPP_ */
 
+#endif
 
 
-//********************************************************************************
-//--------------------------------------------------------------------------------
-// src/c4/yml/std/map.hpp
-// https://github.com/biojppm/rapidyaml/src/c4/yml/std/map.hpp
-//--------------------------------------------------------------------------------
-//********************************************************************************
+#if defined(RYML_WITH_TAB_TOKENS)
+#define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
+#define _RYML_WITHOUT_TAB_TOKENS(...)
+#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
+#else
+#define _RYML_WITH_TAB_TOKENS(...)
+#define _RYML_WITHOUT_TAB_TOKENS(...) __VA_ARGS__
+#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
+#endif
 
-#ifndef _C4_YML_STD_MAP_HPP_
-#define _C4_YML_STD_MAP_HPP_
 
-/** @file map.hpp write/read std::map to/from a YAML tree. */
+// scaffold:
+#define _c4dbgnextline()                           \
+    do {                                           \
+       _c4dbgq("\n-----------");                   \
+       _c4dbgt("handling line={}, offset={}B",     \
+               m_evt_handler->m_curr->pos.line,    \
+               m_evt_handler->m_curr->pos.offset); \
+    } while(0)
 
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp
-//#include "c4/yml/node.hpp"
-#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_)
-#error "amalgamate: file c4/yml/node.hpp must have been included at this point"
-#endif /* C4_YML_NODE_HPP_ */
 
-#include <map>
+#if defined(_MSC_VER)
+#   pragma warning(push)
+#   pragma warning(disable: 4296/*expression is always 'boolean_value'*/)
+#   pragma warning(disable: 4702/*unreachable code*/)
+#elif defined(__clang__)
+#   pragma clang diagnostic push
+#   pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
+#   pragma clang diagnostic ignored "-Wformat-nonliteral"
+#   pragma clang diagnostic ignored "-Wold-style-cast"
+#elif defined(__GNUC__)
+#   pragma GCC diagnostic push
+#   pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
+#   pragma GCC diagnostic ignored "-Wformat-nonliteral"
+#   pragma GCC diagnostic ignored "-Wold-style-cast"
+#   if __GNUC__ >= 7
+#       pragma GCC diagnostic ignored "-Wduplicated-branches"
+#   endif
+#endif
 
 namespace c4 {
 namespace yml {
 
-// std::map requires child nodes in the data
-// tree hierarchy (a MAP node in ryml parlance).
-// So it should be serialized via write()/read().
+namespace {
 
-template<class K, class V, class Less, class Alloc>
-void write(c4::yml::NodeRef *n, std::map<K, V, Less, Alloc> const& m)
+C4_HOT C4_ALWAYS_INLINE bool _is_blck_token(csubstr s) noexcept
 {
-    *n |= c4::yml::MAP;
-    for(auto const& C4_RESTRICT p : m)
+    RYML_ASSERT(s.len > 0);
+    RYML_ASSERT(s.str[0] == '-' || s.str[0] == ':' || s.str[0] == '?');
+    return ((s.len == 1) || ((s.str[1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[1] == '\t'))));
+}
+
+inline bool _is_doc_begin_token(csubstr s)
+{
+    RYML_ASSERT(s.begins_with('-'));
+    RYML_ASSERT(!s.ends_with("\n"));
+    RYML_ASSERT(!s.ends_with("\r"));
+    return (s.len >= 3 && s.str[1] == '-' && s.str[2] == '-')
+        && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
+}
+
+inline bool _is_doc_end_token(csubstr s)
+{
+    RYML_ASSERT(s.begins_with('.'));
+    RYML_ASSERT(!s.ends_with("\n"));
+    RYML_ASSERT(!s.ends_with("\r"));
+    return (s.len >= 3 && s.str[1] == '.' && s.str[2] == '.')
+        && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
+}
+
+inline bool _is_doc_token(csubstr s) noexcept
+{
+    //
+    // NOTE: this function was failing under some scenarios when
+    // compiled with gcc -O2 (but not -O3 or -O1 or -O0), likely
+    // related to optimizer assumptions on the input string and
+    // possibly caused from UB around assignment to that string (the
+    // call site was in _scan_block()). For more details see:
+    //
+    // https://github.com/biojppm/rapidyaml/issues/440
+    //
+    // The current version does not suffer this problem, but it may
+    // appear again.
+    //
+    if(s.len >= 3)
     {
-        auto ch = n->append_child();
-        ch << c4::yml::key(p.first);
-        ch << p.second;
+        switch(s.str[0])
+        {
+        case '-':
+            //return _is_doc_begin_token(s); // this was failing with gcc -O2
+            return (s.str[1] == '-' && s.str[2] == '-')
+                && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
+        case '.':
+            //return _is_doc_end_token(s); // this was failing with gcc -O2
+            return (s.str[1] == '.' && s.str[2] == '.')
+                && (s.len == 3 || (s.str[3] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[3] == '\t')));
+        }
     }
+    return false;
 }
 
-template<class K, class V, class Less, class Alloc>
-bool read(c4::yml::NodeRef const& n, std::map<K, V, Less, Alloc> * m)
+inline size_t _is_special_json_scalar(csubstr s)
 {
-    K k{};
-    V v{};
-    for(auto const& C4_RESTRICT ch : n)
+    RYML_ASSERT(s.len);
+    switch(s.str[0])
     {
-        ch >> c4::yml::key(k);
-        ch >> v;
-        m->emplace(std::make_pair(std::move(k), std::move(v)));
+    case 'f':
+        if(s.len >= 5 && s.begins_with("false"))
+            return 5u;
+        break;
+    case 't':
+        if(s.len >= 4 && s.begins_with("true"))
+            return 4u;
+        break;
+    case 'n':
+        if(s.len >= 4 && s.begins_with("null"))
+            return 4u;
+        break;
     }
-    return true;
+    return 0u;
 }
 
-} // namespace yml
-} // namespace c4
-
-#endif // _C4_YML_STD_MAP_HPP_
-
-
-// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/map.hpp)
-
-
-
-//********************************************************************************
-//--------------------------------------------------------------------------------
-// src/c4/yml/std/string.hpp
-// https://github.com/biojppm/rapidyaml/src/c4/yml/std/string.hpp
-//--------------------------------------------------------------------------------
-//********************************************************************************
-
-#ifndef C4_YML_STD_STRING_HPP_
-#define C4_YML_STD_STRING_HPP_
-
-/** @file string.hpp substring conversions for/from std::string */
-
-// everything we need is implemented here:
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/std/string.hpp
-//#include <c4/std/string.hpp>
-#if !defined(C4_STD_STRING_HPP_) && !defined(_C4_STD_STRING_HPP_)
-#error "amalgamate: file c4/std/string.hpp must have been included at this point"
-#endif /* C4_STD_STRING_HPP_ */
-
 
-#endif // C4_YML_STD_STRING_HPP_
+//-----------------------------------------------------------------------------
 
+C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following)
+{
+    return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n');
+}
 
-// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/string.hpp)
+//! look for the next newline chars, and jump to the right of those
+inline substr from_next_line(substr rem)
+{
+    size_t nlpos = rem.first_of("\r\n");
+    if(nlpos == csubstr::npos)
+        return {};
+    const char nl = rem[nlpos];
+    rem = rem.right_of(nlpos);
+    if(rem.empty())
+        return {};
+    if(_extend_from_combined_newline(nl, rem.front()))
+        rem = rem.sub(1);
+    return rem;
+}
 
 
+//-----------------------------------------------------------------------------
 
-//********************************************************************************
-//--------------------------------------------------------------------------------
-// src/c4/yml/std/vector.hpp
-// https://github.com/biojppm/rapidyaml/src/c4/yml/std/vector.hpp
-//--------------------------------------------------------------------------------
-//********************************************************************************
+inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i)
+{
+    RYML_ASSERT(r[*i] == '\n');
+    size_t numnl_following = 0;
+    ++(*i);
+    for( ; *i < r.len; ++(*i))
+    {
+        if(r.str[*i] == '\n')
+            ++numnl_following;
+        // skip leading whitespace
+        else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
+            ;
+        else
+            break;
+    }
+    return numnl_following;
+}
 
-#ifndef _C4_YML_STD_VECTOR_HPP_
-#define _C4_YML_STD_VECTOR_HPP_
+/** @p i is set to the first non whitespace character after the line
+ * @return the number of empty lines after the initial position */
+inline size_t _count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation)
+{
+    RYML_ASSERT(r[*i] == '\n');
+    size_t numnl_following = 0;
+    ++(*i);
+    if(indentation == 0)
+    {
+        for( ; *i < r.len; ++(*i))
+        {
+            if(r.str[*i] == '\n')
+                ++numnl_following;
+            // skip leading whitespace
+            else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
+                ;
+            else
+                break;
+        }
+    }
+    else
+    {
+        for( ; *i < r.len; ++(*i))
+        {
+            if(r.str[*i] == '\n')
+            {
+                ++numnl_following;
+                // skip the indentation after the newline
+                size_t stop = *i + indentation;
+                for( ; *i < r.len; ++(*i))
+                {
+                    if(r.str[*i] != ' ' && r.str[*i] != '\r')
+                        break;
+                    RYML_ASSERT(*i < stop);
+                }
+                C4_UNUSED(stop);
+            }
+            // skip leading whitespace
+            else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')
+                ;
+            else
+                break;
+        }
+    }
+    return numnl_following;
+}
 
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp
-//#include "c4/yml/node.hpp"
-#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_)
-#error "amalgamate: file c4/yml/node.hpp must have been included at this point"
-#endif /* C4_YML_NODE_HPP_ */
+} // anon namespace
 
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/std/vector.hpp
-//#include <c4/std/vector.hpp>
-#if !defined(C4_STD_VECTOR_HPP_) && !defined(_C4_STD_VECTOR_HPP_)
-#error "amalgamate: file c4/std/vector.hpp must have been included at this point"
-#endif /* C4_STD_VECTOR_HPP_ */
 
-//included above:
-//#include <vector>
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 
-namespace c4 {
-namespace yml {
+template<class EventHandler>
+ParseEngine<EventHandler>::~ParseEngine()
+{
+    _free();
+    _clr();
+}
 
-// vector is a sequence-like type, and it requires child nodes
-// in the data tree hierarchy (a SEQ node in ryml parlance).
-// So it should be serialized via write()/read().
+template<class EventHandler>
+ParseEngine<EventHandler>::ParseEngine(EventHandler *evt_handler, ParserOptions opts)
+    : m_options(opts)
+    , m_file()
+    , m_buf()
+    , m_evt_handler(evt_handler)
+    , m_pending_anchors()
+    , m_pending_tags()
+    , m_newline_offsets()
+    , m_newline_offsets_size(0)
+    , m_newline_offsets_capacity(0)
+    , m_newline_offsets_buf()
+{
+    RYML_CHECK(evt_handler);
+}
 
-template<class V, class Alloc>
-void write(c4::yml::NodeRef *n, std::vector<V, Alloc> const& vec)
+template<class EventHandler>
+ParseEngine<EventHandler>::ParseEngine(ParseEngine &&that)
+    : m_options(that.m_options)
+    , m_file(that.m_file)
+    , m_buf(that.m_buf)
+    , m_evt_handler(that.m_evt_handler)
+    , m_pending_anchors(that.m_pending_anchors)
+    , m_pending_tags(that.m_pending_tags)
+    , m_newline_offsets(that.m_newline_offsets)
+    , m_newline_offsets_size(that.m_newline_offsets_size)
+    , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
+    , m_newline_offsets_buf(that.m_newline_offsets_buf)
 {
-    *n |= c4::yml::SEQ;
-    for(auto const& v : vec)
-    {
-        n->append_child() << v;
-    }
+    that._clr();
 }
 
-template<class V, class Alloc>
-bool read(c4::yml::NodeRef const& n, std::vector<V, Alloc> *vec)
+template<class EventHandler>
+ParseEngine<EventHandler>::ParseEngine(ParseEngine const& that)
+    : m_options(that.m_options)
+    , m_file(that.m_file)
+    , m_buf(that.m_buf)
+    , m_evt_handler(that.m_evt_handler)
+    , m_pending_anchors(that.m_pending_anchors)
+    , m_pending_tags(that.m_pending_tags)
+    , m_newline_offsets()
+    , m_newline_offsets_size()
+    , m_newline_offsets_capacity()
+    , m_newline_offsets_buf()
 {
-    vec->resize(n.num_children());
-    size_t pos = 0;
-    for(auto const ch : n)
+    if(that.m_newline_offsets_capacity)
     {
-        ch >> (*vec)[pos++];
+        _resize_locations(that.m_newline_offsets_capacity);
+        _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
+        memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
+        m_newline_offsets_size = that.m_newline_offsets_size;
     }
-    return true;
 }
 
-} // namespace yml
-} // namespace c4
+template<class EventHandler>
+ParseEngine<EventHandler>& ParseEngine<EventHandler>::operator=(ParseEngine &&that)
+{
+    _free();
+    m_options = (that.m_options);
+    m_file = (that.m_file);
+    m_buf = (that.m_buf);
+    m_evt_handler = that.m_evt_handler;
+    m_pending_anchors = that.m_pending_anchors;
+    m_pending_tags = that.m_pending_tags;
+    m_newline_offsets = (that.m_newline_offsets);
+    m_newline_offsets_size = (that.m_newline_offsets_size);
+    m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
+    m_newline_offsets_buf = (that.m_newline_offsets_buf);
+    that._clr();
+    return *this;
+}
 
-#endif // _C4_YML_STD_VECTOR_HPP_
+template<class EventHandler>
+ParseEngine<EventHandler>& ParseEngine<EventHandler>::operator=(ParseEngine const& that)
+{
+    _free();
+    m_options = (that.m_options);
+    m_file = (that.m_file);
+    m_buf = (that.m_buf);
+    m_evt_handler = that.m_evt_handler;
+    m_pending_anchors = that.m_pending_anchors;
+    m_pending_tags = that.m_pending_tags;
+    if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
+        _resize_locations(that.m_newline_offsets_capacity);
+    _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
+    _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
+    memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
+    m_newline_offsets_size = that.m_newline_offsets_size;
+    m_newline_offsets_buf = that.m_newline_offsets_buf;
+    return *this;
+}
 
+template<class EventHandler>
+void ParseEngine<EventHandler>::_clr()
+{
+    m_options = {};
+    m_file = {};
+    m_buf = {};
+    m_evt_handler = {};
+    m_pending_anchors = {};
+    m_pending_tags = {};
+    m_newline_offsets = {};
+    m_newline_offsets_size = {};
+    m_newline_offsets_capacity = {};
+    m_newline_offsets_buf = {};
+}
 
-// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/vector.hpp)
+template<class EventHandler>
+void ParseEngine<EventHandler>::_free()
+{
+    if(m_newline_offsets)
+    {
+        _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
+        m_newline_offsets = nullptr;
+        m_newline_offsets_size = 0u;
+        m_newline_offsets_capacity = 0u;
+        m_newline_offsets_buf = 0u;
+    }
+}
 
 
+//-----------------------------------------------------------------------------
 
-//********************************************************************************
-//--------------------------------------------------------------------------------
-// src/c4/yml/std/std.hpp
-// https://github.com/biojppm/rapidyaml/src/c4/yml/std/std.hpp
-//--------------------------------------------------------------------------------
-//********************************************************************************
+template<class EventHandler>
+void ParseEngine<EventHandler>::_reset()
+{
+    m_pending_anchors = {};
+    m_pending_tags = {};
+    if(m_options.locations())
+    {
+        _prepare_locations();
+    }
+    m_was_inside_qmrk = false;
+}
 
-#ifndef _C4_YML_STD_STD_HPP_
-#define _C4_YML_STD_STD_HPP_
 
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/yml/std/string.hpp
-//#include "c4/yml/std/string.hpp"
-#if !defined(C4_YML_STD_STRING_HPP_) && !defined(_C4_YML_STD_STRING_HPP_)
-#error "amalgamate: file c4/yml/std/string.hpp must have been included at this point"
-#endif /* C4_YML_STD_STRING_HPP_ */
+//-----------------------------------------------------------------------------
 
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/yml/std/vector.hpp
-//#include "c4/yml/std/vector.hpp"
-#if !defined(C4_YML_STD_VECTOR_HPP_) && !defined(_C4_YML_STD_VECTOR_HPP_)
-#error "amalgamate: file c4/yml/std/vector.hpp must have been included at this point"
-#endif /* C4_YML_STD_VECTOR_HPP_ */
+template<class EventHandler>
+void ParseEngine<EventHandler>::_relocate_arena(csubstr prev_arena, substr next_arena)
+{
+    #define _ryml_relocate(s)                                   \
+    if(s.is_sub(prev_arena))                                    \
+    {                                                           \
+        s.str = next_arena.str + (s.str - prev_arena.str);      \
+    }
+    _ryml_relocate(m_buf);
+    _ryml_relocate(m_newline_offsets_buf);
+    for(size_t i = 0; i < m_pending_tags.num_entries; ++i)
+        _ryml_relocate(m_pending_tags.annotations[i].str);
+    for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
+        _ryml_relocate(m_pending_anchors.annotations[i].str);
+    #undef _ryml_relocate
+}
 
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/yml/std/map.hpp
-//#include "c4/yml/std/map.hpp"
-#if !defined(C4_YML_STD_MAP_HPP_) && !defined(_C4_YML_STD_MAP_HPP_)
-#error "amalgamate: file c4/yml/std/map.hpp must have been included at this point"
-#endif /* C4_YML_STD_MAP_HPP_ */
+template<class EventHandler>
+void ParseEngine<EventHandler>::_s_relocate_arena(void* data, csubstr prev_arena, substr next_arena)
+{
+    ((ParseEngine*)data)->_relocate_arena(prev_arena, next_arena);
+}
 
 
-#endif // _C4_YML_STD_STD_HPP_
+//-----------------------------------------------------------------------------
+
+template<class EventHandler>
+template<class DumpFn>
+void ParseEngine<EventHandler>::_fmt_msg(DumpFn &&dumpfn) const
+{
+    auto const *const C4_RESTRICT st = m_evt_handler->m_curr;
+    auto const& lc = st->line_contents;
+    csubstr contents = lc.stripped;
+    if(contents.len)
+    {
+        // print the yaml src line
+        size_t offs = 3u + to_chars(substr{}, st->pos.line) + to_chars(substr{}, st->pos.col);
+        if(m_file.len)
+        {
+            detail::_dump(dumpfn, "{}:", m_file);
+            offs += m_file.len + 1;
+        }
+        detail::_dump(dumpfn, "{}:{}: ", st->pos.line, st->pos.col);
+        csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
+        csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("..."));
+        detail::_dump(dumpfn, "{}{}  (size={})\n", maybe_full_content, maybe_ellipsis, contents.len);
+        // highlight the remaining portion of the previous line
+        size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin());
+        size_t lastcol = firstcol + lc.rem.len;
+        for(size_t i = 0; i < offs + firstcol; ++i)
+            dumpfn(" ");
+        dumpfn("^");
+        for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)
+            dumpfn("~");
+        detail::_dump(dumpfn, "{}  (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
+    }
+    else
+    {
+        dumpfn("\n");
+    }
 
+#ifdef RYML_DBG
+    // next line: print the state flags
+    {
+        char flagbuf_[128];
+        detail::_dump(dumpfn, "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags));
+    }
+#endif
+}
 
-// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/std.hpp)
 
+//-----------------------------------------------------------------------------
 
+template<class EventHandler>
+template<class ...Args>
+void ParseEngine<EventHandler>::_err(csubstr fmt, Args const& C4_RESTRICT ...args) const
+{
+    char errmsg[RYML_ERRMSG_SIZE];
+    detail::_SubstrWriter writer(errmsg);
+    auto dumpfn = [&writer](csubstr s){ writer.append(s); };
+    detail::_dump(dumpfn, fmt, args...);
+    writer.append('\n');
+    _fmt_msg(dumpfn);
+    size_t len = writer.pos < RYML_ERRMSG_SIZE ? writer.pos : RYML_ERRMSG_SIZE;
+    m_evt_handler->cancel_parse();
+    m_evt_handler->m_stack.m_callbacks.m_error(errmsg, len, m_evt_handler->m_curr->pos, m_evt_handler->m_stack.m_callbacks.m_user_data);
+}
 
-//********************************************************************************
-//--------------------------------------------------------------------------------
-// src/c4/yml/common.cpp
-// https://github.com/biojppm/rapidyaml/src/c4/yml/common.cpp
-//--------------------------------------------------------------------------------
-//********************************************************************************
 
-#ifdef RYML_SINGLE_HDR_DEFINE_NOW
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp
-//#include "c4/yml/common.hpp"
-#if !defined(C4_YML_COMMON_HPP_) && !defined(_C4_YML_COMMON_HPP_)
-#error "amalgamate: file c4/yml/common.hpp must have been included at this point"
-#endif /* C4_YML_COMMON_HPP_ */
+//-----------------------------------------------------------------------------
+#ifdef RYML_DBG
+template<class EventHandler>
+template<class ...Args>
+void ParseEngine<EventHandler>::_dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const
+{
+    if(_dbg_enabled())
+    {
+        auto dumpfn = [](csubstr s){ if(s.str) fwrite(s.str, 1, s.len, stdout); };
+        detail::_dump(dumpfn, fmt, args...);
+        dumpfn("\n");
+        _fmt_msg(dumpfn);
+    }
+}
+#endif
 
 
-#ifndef RYML_NO_DEFAULT_CALLBACKS
-//included above:
-//#   include <stdlib.h>
-//included above:
-//#   include <stdio.h>
-#endif // RYML_NO_DEFAULT_CALLBACKS
+//-----------------------------------------------------------------------------
+template<class EventHandler>
+bool ParseEngine<EventHandler>::_finished_file() const
+{
+    bool ret = m_evt_handler->m_curr->pos.offset >= m_buf.len;
+    if(ret)
+    {
+        _c4dbgp("finished file!!!");
+    }
+    return ret;
+}
 
-namespace c4 {
-namespace yml {
+template<class EventHandler>
+C4_HOT C4_ALWAYS_INLINE bool ParseEngine<EventHandler>::_finished_line() const
+{
+    return m_evt_handler->m_curr->line_contents.rem.empty();
+}
 
-namespace {
-Callbacks s_default_callbacks;
-} // anon namespace
 
-#ifndef RYML_NO_DEFAULT_CALLBACKS
-void report_error_impl(const char* msg, size_t length, Location loc, FILE *f)
+//-----------------------------------------------------------------------------
+
+template<class EventHandler>
+void ParseEngine<EventHandler>::_maybe_skip_whitespace_tokens()
 {
-    if(!f)
-        f = stderr;
-    if(loc)
+    csubstr rem = m_evt_handler->m_curr->line_contents.rem;
+    if(rem.len && (rem.str[0] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[0] == '\t')))
     {
-        if(!loc.name.empty())
-        {
-            fwrite(loc.name.str, 1, loc.name.len, f);
-            fputc(':', f);
-        }
-        fprintf(f, "%zu:", loc.line);
-        if(loc.col)
-            fprintf(f, "%zu:", loc.col);
-        if(loc.offset)
-            fprintf(f, " (%zuB):", loc.offset);
+        size_t pos = rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
+        if(pos == npos)
+            pos = rem.len; // maybe the line is just all whitespace
+        _c4dbgpf("skip {} whitespace characters", pos);
+        _line_progressed(pos);
     }
-    fprintf(f, "%.*s\n", (int)length, msg);
-    fflush(f);
 }
 
-void error_impl(const char* msg, size_t length, Location loc, void * /*user_data*/)
+template<class EventHandler>
+void ParseEngine<EventHandler>::_maybe_skipchars(char c)
 {
-    report_error_impl(msg, length, loc, nullptr);
-    ::abort();
+    csubstr rem = m_evt_handler->m_curr->line_contents.rem;
+    if(rem.len && rem.str[0] == c)
+    {
+        size_t pos = rem.first_not_of(c);
+        if(pos == npos)
+            pos = rem.len; // maybe the line is just all c
+        _c4dbgpf("skip {}x'{}'", pos, c);
+        _line_progressed(pos);
+    }
 }
 
-void* allocate_impl(size_t length, void * /*hint*/, void * /*user_data*/)
+#ifdef RYML_NO_COVERAGE__TO_BE_DELETED
+template<class EventHandler>
+void ParseEngine<EventHandler>::_maybe_skipchars_up_to(char c, size_t max_to_skip)
 {
-    void *mem = ::malloc(length);
-    if(mem == nullptr)
+    csubstr rem = m_evt_handler->m_curr->line_contents.rem;
+    if(rem.len && rem.str[0] == c)
     {
-        const char msg[] = "could not allocate memory";
-        error_impl(msg, sizeof(msg)-1, {}, nullptr);
+        size_t pos = rem.first_not_of(c);
+        if(pos == npos)
+            pos = rem.len; // maybe the line is just all c
+        if(pos > max_to_skip)
+            pos = max_to_skip;
+        _c4dbgpf("skip {}x'{}'", pos, c);
+        _line_progressed(pos);
     }
-    return mem;
 }
+#endif
 
-void free_impl(void *mem, size_t /*length*/, void * /*user_data*/)
+template<class EventHandler>
+template<size_t N>
+void ParseEngine<EventHandler>::_skipchars(const char (&chars)[N])
 {
-    ::free(mem);
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with_any(chars));
+    size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(chars);
+    if(pos == npos)
+        pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line is just whitespace
+    _c4dbgpf("skip {} characters", pos);
+    _line_progressed(pos);
 }
-#endif // RYML_NO_DEFAULT_CALLBACKS
 
+template<class EventHandler>
+void ParseEngine<EventHandler>::_skip_comment()
+{
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begins_with('#'));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.is_sub(m_evt_handler->m_curr->line_contents.full));
+    csubstr rem = m_evt_handler->m_curr->line_contents.rem;
+    csubstr full = m_evt_handler->m_curr->line_contents.full;
+    // raise an error if the comment is not preceded by whitespace
+    if(!full.begins_with('#'))
+    {
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str > full.str);
+        const char c = full[(size_t)(rem.str - full.str - 1)];
+        if(C4_UNLIKELY(c != ' ' && c != '\t'))
+            _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks, "comment not preceded by whitespace");
+    }
+    else
+    {
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.str == full.str);
+    }
+    _c4dbgpf("comment was '{}'", rem);
+    _line_progressed(rem.len);
+}
 
+template<class EventHandler>
+void ParseEngine<EventHandler>::_maybe_skip_comment()
+{
+    csubstr s = m_evt_handler->m_curr->line_contents.rem.triml(' ');
+    if(s.begins_with('#'))
+    {
+        _line_progressed((size_t)(s.str - m_evt_handler->m_curr->line_contents.rem.str));
+        _skip_comment();
+    }
+}
 
-Callbacks::Callbacks()
-    :
-    m_user_data(nullptr),
-    #ifndef RYML_NO_DEFAULT_CALLBACKS
-    m_allocate(allocate_impl),
-    m_free(free_impl),
-    m_error(error_impl)
-    #else
-    m_allocate(nullptr),
-    m_free(nullptr),
-    m_error(nullptr)
-    #endif
+template<class EventHandler>
+bool ParseEngine<EventHandler>::_maybe_scan_following_colon() noexcept
 {
+    if(m_evt_handler->m_curr->line_contents.rem.len)
+    {
+        if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
+        {
+            size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
+            if(pos == npos)
+                pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces
+            _c4dbgpf("skip {}x'{}'", pos, ' ');
+            _line_progressed(pos);
+        }
+        if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ':'))
+        {
+            _c4dbgp("found ':' colon next");
+            _line_progressed(1);
+            return true;
+        }
+    }
+    return false;
 }
 
-Callbacks::Callbacks(void *user_data, pfn_allocate alloc_, pfn_free free_, pfn_error error_)
-    :
-    m_user_data(user_data),
-    #ifndef RYML_NO_DEFAULT_CALLBACKS
-    m_allocate(alloc_ ? alloc_ : allocate_impl),
-    m_free(free_ ? free_ : free_impl),
-    m_error(error_ ? error_ : error_impl)
-    #else
-    m_allocate(alloc_),
-    m_free(free_),
-    m_error(error_)
-    #endif
+template<class EventHandler>
+bool ParseEngine<EventHandler>::_maybe_scan_following_comma() noexcept
 {
-    C4_CHECK(m_allocate);
-    C4_CHECK(m_free);
-    C4_CHECK(m_error);
+    if(m_evt_handler->m_curr->line_contents.rem.len)
+    {
+        if(m_evt_handler->m_curr->line_contents.rem.str[0] == ' ' || m_evt_handler->m_curr->line_contents.rem.str[0] == '\t')
+        {
+            size_t pos = m_evt_handler->m_curr->line_contents.rem.first_not_of(" \t");
+            if(pos == npos)
+                pos = m_evt_handler->m_curr->line_contents.rem.len; // maybe the line has only spaces
+            _c4dbgpf("skip {}x'{}'", pos, ' ');
+            _line_progressed(pos);
+        }
+        if(m_evt_handler->m_curr->line_contents.rem.len && (m_evt_handler->m_curr->line_contents.rem.str[0] == ','))
+        {
+            _c4dbgp("found ',' comma next");
+            _line_progressed(1);
+            return true;
+        }
+    }
+    return false;
 }
 
 
-void set_callbacks(Callbacks const& c)
+//-----------------------------------------------------------------------------
+
+template<class EventHandler>
+csubstr ParseEngine<EventHandler>::_scan_anchor()
 {
-    s_default_callbacks = c;
+    csubstr s = m_evt_handler->m_curr->line_contents.rem;
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('&'));
+    csubstr anchor = s.range(1, s.first_of(' '));
+    _line_progressed(1u + anchor.len);
+    _maybe_skipchars(' ');
+    return anchor;
 }
 
-Callbacks const& get_callbacks()
+template<class EventHandler>
+csubstr ParseEngine<EventHandler>::_scan_ref_seq()
 {
-    return s_default_callbacks;
+    csubstr s = m_evt_handler->m_curr->line_contents.rem;
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'));
+    csubstr ref = s.first(s.first_of(",] :"));
+    _line_progressed(ref.len);
+    return ref;
 }
 
-void reset_callbacks()
+template<class EventHandler>
+csubstr ParseEngine<EventHandler>::_scan_ref_map()
 {
-    set_callbacks(Callbacks());
+    csubstr s = m_evt_handler->m_curr->line_contents.rem;
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('*'));
+    csubstr ref = s.first(s.first_of(",} "));
+    _line_progressed(ref.len);
+    return ref;
+}
+
+template<class EventHandler>
+csubstr ParseEngine<EventHandler>::_scan_tag()
+{
+    csubstr rem = m_evt_handler->m_curr->line_contents.rem.triml(' ');
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!'));
+    csubstr t;
+    if(rem.begins_with("!!"))
+    {
+        _c4dbgp("begins with '!!'");
+        if(has_any(FLOW))
+            t = rem.left_of(rem.first_of(" ,"));
+        else
+            t = rem.left_of(rem.first_of(' '));
+    }
+    else if(rem.begins_with("!<"))
+    {
+        _c4dbgp("begins with '!<'");
+        t = rem.left_of(rem.first_of('>'), true);
+    }
+    #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
+    else if(rem.begins_with("!h!"))
+    {
+        _c4dbgp("begins with '!h!'");
+        t = rem.left_of(rem.first_of(' '));
+    }
+    #endif
+    else
+    {
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.begins_with('!'));
+        _c4dbgp("begins with '!'");
+        if(has_any(FLOW))
+            t = rem.left_of(rem.first_of(" ,"));
+        else
+            t = rem.left_of(rem.first_of(' '));
+    }
+    _line_progressed(t.len);
+    _maybe_skip_whitespace_tokens();
+    return t;
 }
 
-void error(const char *msg, size_t msg_len, Location loc)
+
+//-----------------------------------------------------------------------------
+
+template<class EventHandler>
+bool ParseEngine<EventHandler>::_is_valid_start_scalar_plain_flow(csubstr s)
 {
-    s_default_callbacks.m_error(msg, msg_len, loc, s_default_callbacks.m_user_data);
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.empty());
+
+    // it's not a scalar if it starts with any of these characters:
+    switch(s.str[0])
+    {
+    // these are all legal tokens which mean no scalar is starting:
+    case '[':
+    case ']':
+    case '{':
+    case '}':
+    case '!':
+    case '&':
+    case '*':
+    case '|':
+    case '>':
+    case '#':
+        _c4dbgpf("not a scalar: found non-scalar token '{}'", _c4prc(s.str[0]));
+        return false;
+    // '-' and ':' are illegal at the beginning if not followed by a scalar character
+    case '-':
+    case ':':
+        if(s.len > 1)
+        {
+            switch(s.str[1])
+            {
+            case '\n':
+            case '\r':
+            case '{':
+            case '[':
+            //_RYML_WITHOUT_TAB_TOKENS(case '\t'):
+                _c4err("invalid token \":{}\"", _c4prc(s.str[1]));
+                break;
+            case ' ':
+            case '}':
+            case ']':
+                if(s.str[0] == ':')
+                {
+                    _c4dbgpf("not a scalar: found non-scalar token '{}{}'", s.str[0], s.str[1]);
+                    return false;
+                }
+                break;
+            default:
+                break;
+            }
+        }
+        else
+        {
+            return false;
+        }
+        break;
+    case '?':
+        if(s.len > 1)
+        {
+            switch(s.str[1])
+            {
+            case ' ':
+            case '\n':
+            case '\r':
+            _RYML_WITHOUT_TAB_TOKENS(case '\t':)
+                _c4dbgpf("not a scalar: found non-scalar token '?{}'", _c4prc(s.str[1]));
+                return false;
+            case '{':
+            case '}':
+            case '[':
+            case ']':
+                _c4err("invalid token \"?{}\"", _c4prc(s.str[1]));
+                break;
+            default:
+                break;
+            }
+        }
+        else
+        {
+            return false;
+        }
+        break;
+    // everything else is a legal starting character
+    default:
+        break;
+    }
+
+    return true;
 }
 
-} // namespace yml
-} // namespace c4
+template<class EventHandler>
+bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_flow(ScannedScalar *C4_RESTRICT sc)
+{
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ|RSEQIMAP));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
 
-#endif /* RYML_SINGLE_HDR_DEFINE_NOW */
+    substr s = m_evt_handler->m_curr->line_contents.rem;
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with('\n'));
 
+    if(!s.len)
+        return false;
 
-// (end https://github.com/biojppm/rapidyaml/src/c4/yml/common.cpp)
+    if(!_is_valid_start_scalar_plain_flow(s))
+        return false;
+
+    _c4dbgp("scanning seqflow scalar...");
 
+    const size_t start_offset = m_evt_handler->m_curr->pos.offset;
+    bool needs_filter = false;
+    while(true)
+    {
+        _c4dbgpf("scanning scalar: curr line=[{}]~~~{}~~~", s.len, s);
+        for(size_t i = 0; i < s.len; ++i)
+        {
+            const char c = s.str[i];
+            switch(c)
+            {
+            case ',':
+                _c4dbgpf("found terminating character at {}: '{}'", i, c);
+                _line_progressed(i);
+                if(m_evt_handler->m_curr->pos.offset + i > start_offset)
+                {
+                    goto ended_scalar;
+                }
+                else
+                {
+                    _c4dbgp("at the beginning. no scalar here.");
+                    return false;
+                }
+                break;
+            case ']':
+                _c4dbgpf("found terminating character at {}: '{}'", i, c);
+                _line_progressed(i);
+                goto ended_scalar;
+                break;
+            case '#':
+                _c4dbgp("found suspicious '#'");
+                if(!i || (s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t')))
+                {
+                    _c4dbgpf("found terminating character at {}: '{}'", i, c);
+                    _line_progressed(i);
+                    goto ended_scalar;
+                }
+                break;
+            case ':':
+                _c4dbgp("found suspicious ':'");
+                if(s.len > i+1)
+                {
+                    const char next = s.str[i+1];
+                    _c4dbgpf("next char is '{}'", _c4prc(next));
+                    if(next == ' ' || next == ',' _RYML_WITH_TAB_TOKENS(|| next == '\t'))
+                    {
+                        _c4dbgp("map starting!");
+                        if(m_evt_handler->m_curr->pos.offset + i > start_offset)
+                        {
+                            _c4dbgp("scalar finished!");
+                            _line_progressed(i);
+                            goto ended_scalar;
+                        }
+                        else
+                        {
+                            _c4dbgp("at the beginning. no scalar here.");
+                            return false;
+                        }
+                    }
+                    else
+                    {
+                        _c4dbgp("it's a scalar indeed.");
+                        ++i; // skip the next char
+                    }
+                }
+                else if(s.len == i+1)
+                {
+                    _c4dbgp("':' at line end. map starting!");
+                    return false;
+                }
+                break;
+            case '[':
+            case '{':
+            case '}':
+                _line_progressed(i);
+                _c4err("invalid character: '{}'", c); // noreturn
+            default:
+                ;
+            }
+        }
+        _line_progressed(s.len);
+        if(!_finished_file())
+        {
+            _c4dbgp("next line!");
+            _line_ended();
+            _scan_line();
+        }
+        else
+        {
+            _c4dbgp("file finished!");
+            goto ended_scalar;
+        }
+        s = m_evt_handler->m_curr->line_contents.rem;
+        needs_filter = true;
+    }
 
+ended_scalar:
 
-//********************************************************************************
-//--------------------------------------------------------------------------------
-// src/c4/yml/tree.cpp
-// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.cpp
-//--------------------------------------------------------------------------------
-//********************************************************************************
+    sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
+    sc->needs_filter = needs_filter;
 
-#ifdef RYML_SINGLE_HDR_DEFINE_NOW
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp
-//#include "c4/yml/tree.hpp"
-#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_)
-#error "amalgamate: file c4/yml/tree.hpp must have been included at this point"
-#endif /* C4_YML_TREE_HPP_ */
+    _c4prscalar("scanned plain scalar", sc->scalar, /*keep_newlines*/true);
 
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp
-//#include "c4/yml/detail/parser_dbg.hpp"
-#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_)
-#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point"
-#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */
+    return true;
+}
 
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp
-//#include "c4/yml/node.hpp"
-#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_)
-#error "amalgamate: file c4/yml/node.hpp must have been included at this point"
-#endif /* C4_YML_NODE_HPP_ */
+template<class EventHandler>
+bool ParseEngine<EventHandler>::_scan_scalar_plain_map_flow(ScannedScalar *C4_RESTRICT sc)
+{
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ) || has_any(RSEQIMAP));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP|RSEQIMAP));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK));
 
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/stack.hpp
-//#include "c4/yml/detail/stack.hpp"
-#if !defined(C4_YML_DETAIL_STACK_HPP_) && !defined(_C4_YML_DETAIL_STACK_HPP_)
-#error "amalgamate: file c4/yml/detail/stack.hpp must have been included at this point"
-#endif /* C4_YML_DETAIL_STACK_HPP_ */
+    substr s = m_evt_handler->m_curr->line_contents.rem;
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
 
+    if(!s.len)
+        return false;
 
+    if(!_is_valid_start_scalar_plain_flow(s))
+        return false;
 
-C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wtype-limits")
-C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4296/*expression is always 'boolean_value'*/)
+    _c4dbgp("scanning scalar...");
 
-namespace c4 {
-namespace yml {
+    const size_t start_offset = m_evt_handler->m_curr->pos.offset;
+    bool needs_filter = false;
+    while(true)
+    {
+        for(size_t i = 0; i < s.len; ++i)
+        {
+            const char c = s.str[i];
+            switch(c)
+            {
+            case ',':
+            case '}':
+                _line_progressed(i);
+                _c4dbgpf("found terminating character: '{}'", c);
+                goto ended_scalar;
+            case ':':
+                if(s.len == i+1 || s.str[i+1] == ' ' || s.str[i+1] == ',' || s.str[i+1] == '}' _RYML_WITH_TAB_TOKENS(|| s.str[i+1] == '\t'))
+                {
+                    _line_progressed(i);
+                    _c4dbgpf("found terminating character: '{}'", c);
+                    goto ended_scalar;
+                }
+                break;
+            case '{':
+            case '[':
+                _line_progressed(i);
+                _c4err("invalid character: '{}'", c); // noreturn
+                break;
+            case ']':
+                _line_progressed(i);
+                if(has_any(RSEQIMAP))
+                    goto ended_scalar;
+                else
+                    _c4err("invalid character: '{}'", c); // noreturn
+                break;
+            case '#':
+                if(!i || s.str[i-1] == ' ' _RYML_WITH_TAB_TOKENS(|| s.str[i-1] == '\t'))
+                {
+                    _line_progressed(i);
+                    _c4dbgpf("found terminating character: '{}'", c);
+                    goto ended_scalar;
+                }
+                break;
+            default:
+                ;
+            }
+        }
+        _c4dbgp("next line!");
+        _line_progressed(s.len);
+        if(!_finished_file())
+        {
+            _c4dbgp("next line!");
+            _line_ended();
+            _scan_line();
+        }
+        else
+        {
+            _c4dbgp("file finished!");
+            goto ended_scalar;
+        }
+        s = m_evt_handler->m_curr->line_contents.rem;
+        needs_filter = true;
+    }
 
+ended_scalar:
 
-csubstr normalize_tag(csubstr tag)
-{
-    YamlTag_e t = to_tag(tag);
-    if(t != TAG_NONE)
-        return from_tag(t);
-    if(tag.begins_with("!<"))
-        tag = tag.sub(1);
-    if(tag.begins_with("<!"))
-        return tag;
-    return tag;
-}
+    sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \n\t\r", " \n\r"));
+    sc->needs_filter = needs_filter;
 
-csubstr normalize_tag_long(csubstr tag)
-{
-    YamlTag_e t = to_tag(tag);
-    if(t != TAG_NONE)
-        return from_tag_long(t);
-    if(tag.begins_with("!<"))
-        tag = tag.sub(1);
-    if(tag.begins_with("<!"))
-        return tag;
-    return tag;
+    _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
+
+    return true;
 }
 
-YamlTag_e to_tag(csubstr tag)
+template<class EventHandler>
+bool ParseEngine<EventHandler>::_scan_scalar_seq_json(ScannedScalar *C4_RESTRICT sc)
 {
-    if(tag.begins_with("!<"))
-        tag = tag.sub(1);
-    if(tag.begins_with("!!"))
-        tag = tag.sub(2);
-    else if(tag.begins_with('!'))
-        return TAG_NONE;
-    else if(tag.begins_with("tag:yaml.org,2002:"))
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
+
+    substr s = m_evt_handler->m_curr->line_contents.rem;
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
+
+    if(!s.len)
+        return false;
+
+    _c4dbgp("scanning scalar...");
+
+    switch(s.str[0])
     {
-        RYML_ASSERT(csubstr("tag:yaml.org,2002:").len == 18);
-        tag = tag.sub(18);
+    case ']':
+    case '{':
+    case ',':
+        _c4dbgp("not a scalar.");
+        return false;
     }
-    else if(tag.begins_with("<tag:yaml.org,2002:"))
+
     {
-        RYML_ASSERT(csubstr("<tag:yaml.org,2002:").len == 19);
-        tag = tag.sub(19);
-        if(!tag.len)
-            return TAG_NONE;
-        tag = tag.offs(0, 1);
+        const size_t len = _is_special_json_scalar(s);
+        if(len)
+        {
+            sc->scalar = s.first(len);
+            sc->needs_filter = false;
+            _c4dbgpf("special json scalar: '{}'", sc->scalar);
+            _line_progressed(len);
+            return true;
+        }
     }
 
-    if(tag == "map")
-        return TAG_MAP;
-    else if(tag == "omap")
-        return TAG_OMAP;
-    else if(tag == "pairs")
-        return TAG_PAIRS;
-    else if(tag == "set")
-        return TAG_SET;
-    else if(tag == "seq")
-        return TAG_SEQ;
-    else if(tag == "binary")
-        return TAG_BINARY;
-    else if(tag == "bool")
-        return TAG_BOOL;
-    else if(tag == "float")
-        return TAG_FLOAT;
-    else if(tag == "int")
-        return TAG_INT;
-    else if(tag == "merge")
-        return TAG_MERGE;
-    else if(tag == "null")
-        return TAG_NULL;
-    else if(tag == "str")
-        return TAG_STR;
-    else if(tag == "timestamp")
-        return TAG_TIMESTAMP;
-    else if(tag == "value")
-        return TAG_VALUE;
-
-    return TAG_NONE;
-}
-
-csubstr from_tag_long(YamlTag_e tag)
-{
-    switch(tag)
+    // must be a number
+    size_t i = 0;
+    for( ; i < s.len; ++i)
     {
-    case TAG_MAP:
-        return {"<tag:yaml.org,2002:map>"};
-    case TAG_OMAP:
-        return {"<tag:yaml.org,2002:omap>"};
-    case TAG_PAIRS:
-        return {"<tag:yaml.org,2002:pairs>"};
-    case TAG_SET:
-        return {"<tag:yaml.org,2002:set>"};
-    case TAG_SEQ:
-        return {"<tag:yaml.org,2002:seq>"};
-    case TAG_BINARY:
-        return {"<tag:yaml.org,2002:binary>"};
-    case TAG_BOOL:
-        return {"<tag:yaml.org,2002:bool>"};
-    case TAG_FLOAT:
-        return {"<tag:yaml.org,2002:float>"};
-    case TAG_INT:
-        return {"<tag:yaml.org,2002:int>"};
-    case TAG_MERGE:
-        return {"<tag:yaml.org,2002:merge>"};
-    case TAG_NULL:
-        return {"<tag:yaml.org,2002:null>"};
-    case TAG_STR:
-        return {"<tag:yaml.org,2002:str>"};
-    case TAG_TIMESTAMP:
-        return {"<tag:yaml.org,2002:timestamp>"};
-    case TAG_VALUE:
-        return {"<tag:yaml.org,2002:value>"};
-    case TAG_YAML:
-        return {"<tag:yaml.org,2002:yaml>"};
-    case TAG_NONE:
-        return {""};
+        const char c = s.str[i];
+        switch(c)
+        {
+        case ',':
+        case ']':
+        case ' ':
+        case '\t':
+            _c4dbgpf("found terminating character: '{}'", c);
+            goto ended_scalar;
+        case '#':
+            if(!i || s.str[i-1] == ' ')
+            {
+                _c4dbgpf("found terminating character: '{}'", c);
+                goto ended_scalar;
+            }
+            break;
+        default:
+            ;
+        }
     }
-    return {""};
-}
 
-csubstr from_tag(YamlTag_e tag)
-{
-    switch(tag)
+ended_scalar:
+
+    if(C4_LIKELY(i > 0))
     {
-    case TAG_MAP:
-        return {"!!map"};
-    case TAG_OMAP:
-        return {"!!omap"};
-    case TAG_PAIRS:
-        return {"!!pairs"};
-    case TAG_SET:
-        return {"!!set"};
-    case TAG_SEQ:
-        return {"!!seq"};
-    case TAG_BINARY:
-        return {"!!binary"};
-    case TAG_BOOL:
-        return {"!!bool"};
-    case TAG_FLOAT:
-        return {"!!float"};
-    case TAG_INT:
-        return {"!!int"};
-    case TAG_MERGE:
-        return {"!!merge"};
-    case TAG_NULL:
-        return {"!!null"};
-    case TAG_STR:
-        return {"!!str"};
-    case TAG_TIMESTAMP:
-        return {"!!timestamp"};
-    case TAG_VALUE:
-        return {"!!value"};
-    case TAG_YAML:
-        return {"!!yaml"};
-    case TAG_NONE:
-        return {""};
+        _line_progressed(i);
+        sc->scalar = s.first(i);
+        sc->needs_filter = false;
+        _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
+        return true;
     }
-    return {""};
+
+    return false;
 }
 
+template<class EventHandler>
+bool ParseEngine<EventHandler>::_scan_scalar_map_json(ScannedScalar *C4_RESTRICT sc)
+{
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(FLOW));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL));
 
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
+    substr s = m_evt_handler->m_curr->line_contents.rem;
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
 
-const char* NodeType::type_str(NodeType_e ty)
-{
-    switch(ty & _TYMASK)
-    {
-    case KEYVAL:
-        return "KEYVAL";
-    case KEY:
-        return "KEY";
-    case VAL:
-        return "VAL";
-    case MAP:
-        return "MAP";
-    case SEQ:
-        return "SEQ";
-    case KEYMAP:
-        return "KEYMAP";
-    case KEYSEQ:
-        return "KEYSEQ";
-    case DOCSEQ:
-        return "DOCSEQ";
-    case DOCMAP:
-        return "DOCMAP";
-    case DOCVAL:
-        return "DOCVAL";
-    case DOC:
-        return "DOC";
-    case STREAM:
-        return "STREAM";
-    case NOTYPE:
-        return "NOTYPE";
-    default:
-        if((ty & KEYVAL) == KEYVAL)
-            return "KEYVAL***";
-        if((ty & KEYMAP) == KEYMAP)
-            return "KEYMAP***";
-        if((ty & KEYSEQ) == KEYSEQ)
-            return "KEYSEQ***";
-        if((ty & DOCSEQ) == DOCSEQ)
-            return "DOCSEQ***";
-        if((ty & DOCMAP) == DOCMAP)
-            return "DOCMAP***";
-        if((ty & DOCVAL) == DOCVAL)
-            return "DOCVAL***";
-        if(ty & KEY)
-            return "KEY***";
-        if(ty & VAL)
-            return "VAL***";
-        if(ty & MAP)
-            return "MAP***";
-        if(ty & SEQ)
-            return "SEQ***";
-        if(ty & DOC)
-            return "DOC***";
-        return "(unk)";
+    if(!s.len)
+        return false;
+
+    _c4dbgp("scanning scalar...");
+
+    {
+        const size_t len = _is_special_json_scalar(s);
+        if(len)
+        {
+            sc->scalar = s.first(len);
+            sc->needs_filter = false;
+            _c4dbgpf("special json scalar: '{}'", sc->scalar);
+            _line_progressed(len);
+            return true;
+        }
     }
-}
 
+    // must be a number
+    size_t i = 0;
+    for( ; i < s.len; ++i)
+    {
+        const char c = s.str[i];
+        switch(c)
+        {
+        case ',':
+        case '}':
+        case ' ':
+        case '\t':
+            _c4dbgpf("found terminating character: '{}'", c);
+            goto ended_scalar;
+        case '#':
+            if(!i || s.str[i-1] == ' ')
+            {
+                _c4dbgpf("found terminating character: '{}'", c);
+                goto ended_scalar;
+            }
+            break;
+        default:
+            ;
+        }
+    }
 
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
+ended_scalar:
 
-NodeRef Tree::rootref()
-{
-    return NodeRef(this, root_id());
-}
-NodeRef const Tree::rootref() const
-{
-    return NodeRef(const_cast<Tree*>(this), root_id());
-}
+    if(C4_LIKELY(i > 0))
+    {
+        _line_progressed(i);
+        sc->scalar = s.first(i);
+        sc->needs_filter = false;
+        _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
+        return true;
+    }
 
-NodeRef Tree::ref(size_t id)
-{
-    _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size);
-    return NodeRef(this, id);
-}
-NodeRef const Tree::ref(size_t id) const
-{
-    _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size);
-    return NodeRef(const_cast<Tree*>(this), id);
+    return false;
 }
 
-NodeRef Tree::operator[] (csubstr key)
+template<class EventHandler>
+bool ParseEngine<EventHandler>::_is_doc_begin(csubstr s)
 {
-    return rootref()[key];
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] == '-');
+    return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_begin_token(s));
 }
-NodeRef const Tree::operator[] (csubstr key) const
+
+template<class EventHandler>
+bool ParseEngine<EventHandler>::_is_doc_end(csubstr s)
 {
-    return rootref()[key];
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s[0] == '.');
+    return (m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin() && _is_doc_end_token(s));
 }
 
-NodeRef Tree::operator[] (size_t i)
+template<class EventHandler>
+bool ParseEngine<EventHandler>::_scan_scalar_plain_blck(ScannedScalar *C4_RESTRICT sc, size_t indentation)
 {
-    return rootref()[i];
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK|RUNK|USTY));
+
+    substr s = m_evt_handler->m_curr->line_contents.rem;
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !s.begins_with(' '));
+
+    if(!s.len)
+        return false;
+
+    switch(s.str[0])
+    {
+    case '-':
+        if(_is_blck_token(s))
+        {
+            return false;
+        }
+        else if(_is_doc_begin(s))
+        {
+            _c4dbgp("token is doc start");
+            return false;
+        }
+        break;
+    case ':':
+    case '?':
+        if(_is_blck_token(s))
+            return false;
+        break;
+    case '[':
+    case '{':
+    case '&':
+    case '*':
+    case '!':
+    _RYML_WITH_TAB_TOKENS(case '\t':)
+        return false;
+    case '.':
+        if(_is_doc_end(s))
+        {
+            _c4dbgp("token is doc end");
+            return false;
+        }
+        break;
+    }
+
+    _c4dbgpf("plain scalar! indentation={}", indentation);
+
+    const size_t start_offset = m_evt_handler->m_curr->pos.offset;
+    const size_t start_line = m_evt_handler->m_curr->pos.line;
+
+    bool needs_filter = false;
+    while(true)
+    {
+        _c4dbgpf("plain scalar line: [{}]~~~{}~~~", s.len, s);
+        for(size_t i = 0; i < s.len; ++i)
+        {
+            const char curr = s.str[i];
+            //_c4dbgpf("[{}]='{}'", i, _c4prc(curr));
+            switch(curr)
+            {
+            case ':':
+                _c4dbgpf("[{}]: got suspicious ':'", i);
+                // are there more characters?
+                if((i + 1 == s.len) || ((s.str[i+1] == ' ') _RYML_WITH_TAB_TOKENS( || (s.str[i+1] == '\t'))))
+                {
+                    _c4dbgpf("followed by '{}'", i+1 == s.len ? csubstr("\\n") : _c4prc(s.str[i+1]));
+                    _line_progressed(i);
+                    // ': ' is accepted only on the first line
+                    if(C4_LIKELY(m_evt_handler->m_curr->pos.line == start_line))
+                    {
+                        _c4dbgp("start line. scalar ends here");
+                        goto ended_scalar;
+                    }
+                    else
+                    {
+                        _c4err("parse error");
+                    }
+                }
+                else
+                {
+                    size_t j = i;
+                    while(j + 1 < s.len && s.str[j+1] == ':')
+                    {
+                        _c4dbgp("skip colon");
+                        ++j;
+                    }
+                    i = j > i ? j-1 : i;
+                    _c4dbgp("nothing to see here");
+                }
+                break;
+            case '#':
+                _c4dbgp("got suspicious '#'");
+                if(!i || (s.str[i-1] == ' ' || s.str[i-1] == '\t'))
+                {
+                    _c4dbgp("comment! scalar ends here");
+                    _line_progressed(i);
+                    goto ended_scalar;
+                }
+                else
+                {
+                    _c4dbgp("nothing to see here");
+                }
+                break;
+            }
+        }
+        _line_progressed(s.len);
+        csubstr next_peeked = _peek_next_line(m_evt_handler->m_curr->pos.offset);
+        next_peeked = next_peeked.trimr("\n\r");
+        const size_t next_indentation = next_peeked.first_not_of(' ');
+        _c4dbgpf("indentation curr={} next={}", indentation, next_indentation);
+        if(next_indentation < indentation)
+        {
+            _c4dbgp("smaller indentation! scalar ended");
+            goto ended_scalar;
+        }
+        else if(next_indentation == 0 && next_peeked.len > 0)
+        {
+            const char first = next_peeked.str[0];
+            switch(first)
+            {
+            case '-':
+                next_peeked = next_peeked.trimr("\n\r");
+                _c4dbgpf("doc begin? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : "");
+                if(_is_doc_begin_token(next_peeked))
+                {
+                    _c4dbgp("doc begin! scalar ended");
+                    goto ended_scalar;
+                }
+                break;
+            case '.':
+                next_peeked = next_peeked.trimr("\n\r");
+                _c4dbgpf("doc end? peeked=[{}]~~~{}{}~~~", next_peeked.len, next_peeked.len >= 3 ? next_peeked.first(3) : next_peeked, next_peeked.len > 3 ? "..." : "");
+                if(_is_doc_end_token(next_peeked))
+                {
+                    _c4dbgp("doc end! scalar ended");
+                    goto ended_scalar;
+                }
+                break;
+            }
+        }
+        // load with next line
+        _c4dbgp("next line!");
+        if(!_finished_file())
+        {
+            _c4dbgp("next line!");
+            _line_ended();
+            _scan_line();
+        }
+        else
+        {
+            _c4dbgp("file finished!");
+            goto ended_scalar;
+        }
+        s = m_evt_handler->m_curr->line_contents.rem;
+        needs_filter = true;
+    }
+
+ended_scalar:
+
+    sc->scalar = m_buf.range(start_offset, m_evt_handler->m_curr->pos.offset).trimr(" \n\r\t");
+    sc->needs_filter = needs_filter;
+
+    _c4dbgpf("scalar was [{}]~~~{}~~~", sc->scalar.len, sc->scalar);
+
+    return true;
 }
-NodeRef const Tree::operator[] (size_t i) const
+
+template<class EventHandler>
+bool ParseEngine<EventHandler>::_scan_scalar_plain_seq_blck(ScannedScalar *C4_RESTRICT sc)
 {
-    return rootref()[i];
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RMAP));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQIMAP));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
+    return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
 }
 
-NodeRef Tree::docref(size_t i)
+template<class EventHandler>
+bool ParseEngine<EventHandler>::_scan_scalar_plain_map_blck(ScannedScalar *C4_RESTRICT sc)
 {
-    return ref(doc(i));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RSEQ));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(BLCK));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RVAL|QMRK));
+    return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref + 1u);
 }
-NodeRef const Tree::docref(size_t i) const
+
+template<class EventHandler>
+bool ParseEngine<EventHandler>::_scan_scalar_plain_unk(ScannedScalar *C4_RESTRICT sc)
 {
-    return ref(doc(i));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks,  has_any(RUNK|USTY));
+    return _scan_scalar_plain_blck(sc, m_evt_handler->m_curr->indref);
 }
 
 
 //-----------------------------------------------------------------------------
-Tree::Tree(Callbacks const& cb)
-    : m_buf(nullptr)
-    , m_cap(0)
-    , m_size(0)
-    , m_free_head(NONE)
-    , m_free_tail(NONE)
-    , m_arena()
-    , m_arena_pos(0)
-    , m_callbacks(cb)
-{
-}
 
-Tree::Tree(size_t node_capacity, size_t arena_capacity, Callbacks const& cb)
-    : Tree(cb)
+template<class EventHandler>
+substr ParseEngine<EventHandler>::_peek_next_line(size_t pos) const
 {
-    reserve(node_capacity);
-    reserve_arena(arena_capacity);
-}
+    substr rem{}; // declare here because of the goto
+    size_t nlpos{}; // declare here because of the goto
+    pos = pos == npos ? m_evt_handler->m_curr->pos.offset : pos;
+    if(pos >= m_buf.len)
+        goto next_is_empty;
 
-Tree::~Tree()
-{
-    _free();
-}
+    // look for the next newline chars, and jump to the right of those
+    rem = from_next_line(m_buf.sub(pos));
+    if(rem.empty())
+        goto next_is_empty;
 
+    // now get everything up to and including the following newline chars
+    nlpos = rem.first_of("\r\n");
+    if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len))
+        nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
+    rem = rem.left_of(nlpos, /*include_pos*/true);
 
-Tree::Tree(Tree const& that) noexcept : Tree(that.m_callbacks)
-{
-    _copy(that);
-}
+    _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n"));
+    return rem;
 
-Tree& Tree::operator= (Tree const& that) noexcept
-{
-    _free();
-    m_callbacks = that.m_callbacks;
-    _copy(that);
-    return *this;
+next_is_empty:
+    _c4dbgpf("peek next line @ {}: (len=0)''", pos);
+    return {};
 }
 
-Tree::Tree(Tree && that) noexcept : Tree(that.m_callbacks)
-{
-    _move(that);
-}
+//-----------------------------------------------------------------------------
 
-Tree& Tree::operator= (Tree && that) noexcept
+template<class EventHandler>
+void ParseEngine<EventHandler>::_scan_line()
 {
-    _free();
-    m_callbacks = that.m_callbacks;
-    _move(that);
-    return *this;
+    if(C4_LIKELY(m_evt_handler->m_curr->pos.offset < m_buf.len))
+        m_evt_handler->m_curr->line_contents.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
+    else
+        m_evt_handler->m_curr->line_contents.reset(m_buf.last(0), m_buf.last(0));
 }
 
-void Tree::_free()
+template<class EventHandler>
+void ParseEngine<EventHandler>::_line_progressed(size_t ahead)
 {
-    if(m_buf)
-    {
-        _RYML_CB_ASSERT(m_callbacks, m_cap > 0);
-        _RYML_CB_FREE(m_callbacks, m_buf, NodeData, m_cap);
-    }
-    if(m_arena.str)
-    {
-        _RYML_CB_ASSERT(m_callbacks, m_arena.len > 0);
-        _RYML_CB_FREE(m_callbacks, m_arena.str, char, m_arena.len);
-    }
-    _clear();
+    _c4dbgpf("line[{}] ({} cols) progressed by {}:  col {}-->{}   offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->line_contents.full.len, ahead, m_evt_handler->m_curr->pos.col, m_evt_handler->m_curr->pos.col+ahead, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset+ahead);
+    m_evt_handler->m_curr->pos.offset += ahead;
+    m_evt_handler->m_curr->pos.col += ahead;
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col <= m_evt_handler->m_curr->line_contents.stripped.len+1);
+    m_evt_handler->m_curr->line_contents.rem = m_evt_handler->m_curr->line_contents.rem.sub(ahead);
 }
 
-
-C4_SUPPRESS_WARNING_GCC_PUSH
-#if defined(__GNUC__) && __GNUC__>= 8
-    C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wclass-memaccess") // error: ‘void* memset(void*, int, size_t)’ clearing an object of type ‘class c4::yml::Tree’ with no trivial copy-assignment; use assignment or value-initialization instead
-#endif
-
-void Tree::_clear()
+template<class EventHandler>
+void ParseEngine<EventHandler>::_line_ended()
 {
-    m_buf = nullptr;
-    m_cap = 0;
-    m_size = 0;
-    m_free_head = 0;
-    m_free_tail = 0;
-    m_arena = {};
-    m_arena_pos = 0;
-    for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i)
-        m_tag_directives[i] = {};
+    _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{} / col {}-->{}",
+             m_evt_handler->m_curr->pos.line,
+             m_evt_handler->m_curr->line_contents.full.len,
+             m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset + m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len,
+             m_evt_handler->m_curr->pos.col, 1);
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == m_evt_handler->m_curr->line_contents.stripped.len + 1);
+    m_evt_handler->m_curr->pos.offset += m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
+    ++m_evt_handler->m_curr->pos.line;
+    m_evt_handler->m_curr->pos.col = 1;
 }
 
-void Tree::_copy(Tree const& that)
+template<class EventHandler>
+void ParseEngine<EventHandler>::_line_ended_undo()
 {
-    _RYML_CB_ASSERT(m_callbacks, m_buf == nullptr);
-    _RYML_CB_ASSERT(m_callbacks, m_arena.str == nullptr);
-    _RYML_CB_ASSERT(m_callbacks, m_arena.len == 0);
-    m_buf = _RYML_CB_ALLOC_HINT(m_callbacks, NodeData, that.m_cap, that.m_buf);
-    memcpy(m_buf, that.m_buf, that.m_cap * sizeof(NodeData));
-    m_cap = that.m_cap;
-    m_size = that.m_size;
-    m_free_head = that.m_free_head;
-    m_free_tail = that.m_free_tail;
-    m_arena_pos = that.m_arena_pos;
-    m_arena = that.m_arena;
-    if(that.m_arena.str)
-    {
-        _RYML_CB_ASSERT(m_callbacks, that.m_arena.len > 0);
-        substr arena;
-        arena.str = _RYML_CB_ALLOC_HINT(m_callbacks, char, that.m_arena.len, that.m_arena.str);
-        arena.len = that.m_arena.len;
-        _relocate(arena); // does a memcpy of the arena and updates nodes using the old arena
-        m_arena = arena;
-    }
-    for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i)
-        m_tag_directives[i] = that.m_tag_directives[i];
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.col == 1u);
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line > 0u);
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len);
+    const size_t delta = m_evt_handler->m_curr->line_contents.full.len - m_evt_handler->m_curr->line_contents.stripped.len;
+    _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line, m_evt_handler->m_curr->pos.line - 1, m_evt_handler->m_curr->pos.offset, m_evt_handler->m_curr->pos.offset - delta);
+    m_evt_handler->m_curr->pos.offset -= delta;
+    --m_evt_handler->m_curr->pos.line;
+    m_evt_handler->m_curr->pos.col = m_evt_handler->m_curr->line_contents.stripped.len + 1u;
+    // don't forget to undo also the changes to the remainder of the line
+    //_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.offset >= m_buf.len || m_buf[m_evt_handler->m_curr->pos.offset] == '\n' || m_buf[m_evt_handler->m_curr->pos.offset] == '\r');
+    m_evt_handler->m_curr->line_contents.rem = m_buf.sub(m_evt_handler->m_curr->pos.offset, 0);
 }
 
-void Tree::_move(Tree & that)
+
+//-----------------------------------------------------------------------------
+template<class EventHandler>
+void ParseEngine<EventHandler>::_set_indentation(size_t indentation)
 {
-    _RYML_CB_ASSERT(m_callbacks, m_buf == nullptr);
-    _RYML_CB_ASSERT(m_callbacks, m_arena.str == nullptr);
-    _RYML_CB_ASSERT(m_callbacks, m_arena.len == 0);
-    m_buf = that.m_buf;
-    m_cap = that.m_cap;
-    m_size = that.m_size;
-    m_free_head = that.m_free_head;
-    m_free_tail = that.m_free_tail;
-    m_arena = that.m_arena;
-    m_arena_pos = that.m_arena_pos;
-    for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i)
-        m_tag_directives[i] = that.m_tag_directives[i];
-    that._clear();
+    m_evt_handler->m_curr->indref = indentation;
+    _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
 }
 
-void Tree::_relocate(substr next_arena)
-{
-    _RYML_CB_ASSERT(m_callbacks, next_arena.not_empty());
-    _RYML_CB_ASSERT(m_callbacks, next_arena.len >= m_arena.len);
-    memcpy(next_arena.str, m_arena.str, m_arena_pos);
-    for(NodeData *C4_RESTRICT n = m_buf, *e = m_buf + m_cap; n != e; ++n)
-    {
-        if(in_arena(n->m_key.scalar))
-            n->m_key.scalar = _relocated(n->m_key.scalar, next_arena);
-        if(in_arena(n->m_key.tag))
-            n->m_key.tag = _relocated(n->m_key.tag, next_arena);
-        if(in_arena(n->m_key.anchor))
-            n->m_key.anchor = _relocated(n->m_key.anchor, next_arena);
-        if(in_arena(n->m_val.scalar))
-            n->m_val.scalar = _relocated(n->m_val.scalar, next_arena);
-        if(in_arena(n->m_val.tag))
-            n->m_val.tag = _relocated(n->m_val.tag, next_arena);
-        if(in_arena(n->m_val.anchor))
-            n->m_val.anchor = _relocated(n->m_val.anchor, next_arena);
-    }
-    for(TagDirective &C4_RESTRICT td : m_tag_directives)
-    {
-        if(in_arena(td.prefix))
-            td.prefix = _relocated(td.prefix, next_arena);
-        if(in_arena(td.handle))
-            td.handle = _relocated(td.handle, next_arena);
-    }
+template<class EventHandler>
+void ParseEngine<EventHandler>::_save_indentation()
+{
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.rem.begin() >= m_evt_handler->m_curr->line_contents.full.begin());
+    m_evt_handler->m_curr->indref = m_evt_handler->m_curr->line_contents.current_col();
+    _c4dbgpf("state[{}]: saving indentation: {}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
 }
 
 
 //-----------------------------------------------------------------------------
-void Tree::reserve(size_t cap)
+
+template<class EventHandler>
+void ParseEngine<EventHandler>::_end_map_blck()
 {
-    if(cap > m_cap)
+    _c4dbgp("mapblck: end");
+    if(has_any(RKCL|RVAL))
     {
-        NodeData *buf = _RYML_CB_ALLOC_HINT(m_callbacks, NodeData, cap, m_buf);
-        if(m_buf)
-        {
-            memcpy(buf, m_buf, m_cap * sizeof(NodeData));
-            _RYML_CB_FREE(m_callbacks, m_buf, NodeData, m_cap);
-        }
-        size_t first = m_cap, del = cap - m_cap;
-        m_cap = cap;
-        m_buf = buf;
-        _clear_range(first, del);
-        if(m_free_head != NONE)
-        {
-            _RYML_CB_ASSERT(m_callbacks, m_buf != nullptr);
-            _RYML_CB_ASSERT(m_callbacks, m_free_tail != NONE);
-            m_buf[m_free_tail].m_next_sibling = first;
-            m_buf[first].m_prev_sibling = m_free_tail;
-            m_free_tail = cap-1;
-        }
-        else
-        {
-            _RYML_CB_ASSERT(m_callbacks, m_free_tail == NONE);
-            m_free_head = first;
-            m_free_tail = cap-1;
-        }
-        _RYML_CB_ASSERT(m_callbacks, m_free_head == NONE || (m_free_head >= 0 && m_free_head < cap));
-        _RYML_CB_ASSERT(m_callbacks, m_free_tail == NONE || (m_free_tail >= 0 && m_free_tail < cap));
+        _c4dbgp("mapblck: set missing val");
+        _handle_annotations_before_blck_val_scalar();
+        m_evt_handler->set_val_scalar_plain({});
+    }
+    else if(has_any(QMRK))
+    {
+        _c4dbgp("mapblck: set missing keyval");
+        _handle_annotations_before_blck_key_scalar();
+        m_evt_handler->set_key_scalar_plain({});
+        _handle_annotations_before_blck_val_scalar();
+        m_evt_handler->set_val_scalar_plain({});
+    }
+    m_evt_handler->end_map();
+}
 
-        if( ! m_size)
-            _claim_root();
+template<class EventHandler>
+void ParseEngine<EventHandler>::_end_seq_blck()
+{
+    if(has_any(RVAL))
+    {
+        _c4dbgp("seqblck: set missing val");
+        _handle_annotations_before_blck_val_scalar();
+        m_evt_handler->set_val_scalar_plain({});
     }
+    m_evt_handler->end_seq();
 }
 
+template<class EventHandler>
+void ParseEngine<EventHandler>::_end2_map()
+{
+    _c4dbgp("map: end");
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RMAP));
+    if(has_any(BLCK))
+    {
+        _end_map_blck();
+    }
+    else
+    {
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(USTY));
+        m_evt_handler->_pop();
+    }
+}
 
-//-----------------------------------------------------------------------------
-void Tree::clear()
+template<class EventHandler>
+void ParseEngine<EventHandler>::_end2_seq()
 {
-    _clear_range(0, m_cap);
-    m_size = 0;
-    if(m_buf)
+    _c4dbgp("seq: end");
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RSEQ));
+    if(has_any(BLCK))
     {
-        _RYML_CB_ASSERT(m_callbacks, m_cap >= 0);
-        m_free_head = 0;
-        m_free_tail = m_cap-1;
-        _claim_root();
+        _end_seq_blck();
     }
     else
     {
-        m_free_head = NONE;
-        m_free_tail = NONE;
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(FLOW));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(USTY));
+        m_evt_handler->_pop();
     }
-    for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i)
-        m_tag_directives[i] = {};
 }
 
-void Tree::_claim_root()
+template<class EventHandler>
+void ParseEngine<EventHandler>::_begin2_doc()
 {
-    size_t r = _claim();
-    _RYML_CB_ASSERT(m_callbacks, r == 0);
-    _set_hierarchy(r, NONE, NONE);
+    m_doc_empty = true;
+    add_flags(RDOC);
+    m_evt_handler->begin_doc();
+    m_evt_handler->m_curr->indref = 0; // ?
 }
 
+template<class EventHandler>
+void ParseEngine<EventHandler>::_begin2_doc_expl()
+{
+    m_doc_empty = true;
+    add_flags(RDOC);
+    m_evt_handler->begin_doc_expl();
+    m_evt_handler->m_curr->indref = 0; // ?
+}
 
-//-----------------------------------------------------------------------------
-void Tree::_clear_range(size_t first, size_t num)
+template<class EventHandler>
+void ParseEngine<EventHandler>::_end2_doc()
 {
-    if(num == 0)
-        return; // prevent overflow when subtracting
-    _RYML_CB_ASSERT(m_callbacks, first >= 0 && first + num <= m_cap);
-    memset(m_buf + first, 0, num * sizeof(NodeData)); // TODO we should not need this
-    for(size_t i = first, e = first + num; i < e; ++i)
+    _c4dbgp("doc: end");
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RDOC));
+    if(m_doc_empty)
     {
-        _clear(i);
-        NodeData *n = m_buf + i;
-        n->m_prev_sibling = i - 1;
-        n->m_next_sibling = i + 1;
+        _c4dbgp("doc was empty; add empty val");
+        m_evt_handler->set_val_scalar_plain({});
     }
-    m_buf[first + num - 1].m_next_sibling = NONE;
+    m_evt_handler->end_doc();
 }
 
-C4_SUPPRESS_WARNING_GCC_POP
-
-
-//-----------------------------------------------------------------------------
-void Tree::_release(size_t i)
+template<class EventHandler>
+void ParseEngine<EventHandler>::_end2_doc_expl()
 {
-    _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap);
-
-    _rem_hierarchy(i);
-    _free_list_add(i);
-    _clear(i);
-
-    --m_size;
+    _c4dbgp("doc: end");
+    if(m_doc_empty)
+    {
+        _c4dbgp("doc: no children; add empty val");
+        m_evt_handler->set_val_scalar_plain({});
+    }
+    m_evt_handler->end_doc_expl();
 }
 
-//-----------------------------------------------------------------------------
-// add to the front of the free list
-void Tree::_free_list_add(size_t i)
+template<class EventHandler>
+void ParseEngine<EventHandler>::_maybe_begin_doc()
 {
-    _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap);
-    NodeData &C4_RESTRICT w = m_buf[i];
-
-    w.m_parent = NONE;
-    w.m_next_sibling = m_free_head;
-    w.m_prev_sibling = NONE;
-    if(m_free_head != NONE)
-        m_buf[m_free_head].m_prev_sibling = i;
-    m_free_head = i;
-    if(m_free_tail == NONE)
-        m_free_tail = m_free_head;
+    if(has_none(RDOC))
+    {
+        _c4dbgp("doc must be started");
+        _begin2_doc();
+    }
 }
-
-void Tree::_free_list_rem(size_t i)
+template<class EventHandler>
+void ParseEngine<EventHandler>::_maybe_end_doc()
 {
-    if(m_free_head == i)
-        m_free_head = _p(i)->m_next_sibling;
-    _rem_hierarchy(i);
+    if(has_any(RDOC))
+    {
+        _c4dbgp("doc must be finished");
+        _end2_doc();
+    }
 }
 
-//-----------------------------------------------------------------------------
-size_t Tree::_claim()
+template<class EventHandler>
+void ParseEngine<EventHandler>::_end_doc_suddenly__pop()
 {
-    if(m_free_head == NONE || m_buf == nullptr)
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
+    if(m_evt_handler->m_stack[0].flags & RDOC)
     {
-        size_t sz = 2 * m_cap;
-        sz = sz ? sz : 16;
-        reserve(sz);
-        _RYML_CB_ASSERT(m_callbacks, m_free_head != NONE);
+        _c4dbgp("root is RDOC");
+        if(m_evt_handler->m_curr->level != 0)
+            _handle_indentation_pop(&m_evt_handler->m_stack[0]);
+    }
+    else if((m_evt_handler->m_stack.size() > 1) && (m_evt_handler->m_stack[1].flags & RDOC))
+    {
+        _c4dbgp("root is STREAM");
+        if(m_evt_handler->m_curr->level != 1)
+            _handle_indentation_pop(&m_evt_handler->m_stack[1]);
+    }
+    else
+    {
+        _c4err("internal error");
     }
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RDOC));
+}
 
-    _RYML_CB_ASSERT(m_callbacks, m_size < m_cap);
-    _RYML_CB_ASSERT(m_callbacks, m_free_head >= 0 && m_free_head < m_cap);
+template<class EventHandler>
+void ParseEngine<EventHandler>::_end_doc_suddenly()
+{
+    _c4dbgp("end doc suddenly");
+    _end_doc_suddenly__pop();
+    _end2_doc_expl();
+    addrem_flags(RUNK|RTOP|NDOC, RMAP|RSEQ|RDOC);
+}
 
-    size_t ichild = m_free_head;
-    NodeData *child = m_buf + ichild;
+template<class EventHandler>
+void ParseEngine<EventHandler>::_start_doc_suddenly()
+{
+    _c4dbgp("start doc suddenly");
+    _end_doc_suddenly__pop();
+    _end2_doc();
+    _begin2_doc_expl();
+}
 
-    ++m_size;
-    m_free_head = child->m_next_sibling;
-    if(m_free_head == NONE)
+template<class EventHandler>
+void ParseEngine<EventHandler>::_end_stream()
+{
+    _c4dbgpf("end_stream, level={} node_id={}", m_evt_handler->m_curr->level, m_evt_handler->m_curr->node_id);
+    if(has_all(RSEQ|FLOW))
+        _c4err("missing terminating ]");
+    else if(has_all(RMAP|FLOW))
+        _c4err("missing terminating }");
+    if(m_evt_handler->m_stack.size() > 1)
+        _handle_indentation_pop(m_evt_handler->m_stack.begin());
+    if(has_all(RDOC))
     {
-        m_free_tail = NONE;
-        _RYML_CB_ASSERT(m_callbacks, m_size == m_cap);
+        _end2_doc();
+    }
+    else if(has_all(RTOP|RUNK))
+    {
+        if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
+        {
+            if(m_doc_empty)
+            {
+                m_evt_handler->begin_doc();
+                _handle_annotations_before_blck_val_scalar();
+                m_evt_handler->set_val_scalar_plain({});
+                m_evt_handler->end_doc();
+            }
+        }
     }
+    m_evt_handler->end_stream();
+}
 
-    _clear(ichild);
 
-    return ichild;
+template<class EventHandler>
+void ParseEngine<EventHandler>::_handle_indentation_pop(ParserState const* popto)
+{
+    _c4dbgpf("popping {} level{}: from level {}(@ind={}) to level {}(@ind={})", m_evt_handler->m_curr->level - popto->level, (((m_evt_handler->m_curr->level - popto->level) > 1) ? "s" : ""), m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, popto->level, popto->indref);
+    while(m_evt_handler->m_curr != popto)
+    {
+        if(has_any(RSEQ))
+        {
+            _c4dbgpf("popping seq at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
+            _end2_seq();
+        }
+        else if(has_any(RMAP))
+        {
+            _c4dbgpf("popping map at level {} (indentation={},addr={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref, m_evt_handler->m_curr);
+            _end2_map();
+        }
+        else
+        {
+            break;
+        }
+    }
+    _c4dbgpf("current level is {} (indentation={})", m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
 }
 
-//-----------------------------------------------------------------------------
-
-C4_SUPPRESS_WARNING_GCC_PUSH
-C4_SUPPRESS_WARNING_CLANG_PUSH
-C4_SUPPRESS_WARNING_CLANG("-Wnull-dereference")
-#if defined(__GNUC__) && (__GNUC__ >= 6)
-C4_SUPPRESS_WARNING_GCC("-Wnull-dereference")
-#endif
+template<class EventHandler>
+void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_seq()
+{
+    // search the stack frame to jump to based on its indentation
+    using state_type = typename EventHandler::state;
+    state_type const* popto = nullptr;
+    auto &stack = m_evt_handler->m_stack;
+    _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous
+    _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
+    const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
+    #ifdef RYML_DBG
+    if(_dbg_enabled())
+    {
+        char flagbuf_[128];
+        for(state_type const& s : stack)
+            _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
+    }
+    #endif
+    for(state_type const* s = m_evt_handler->m_curr-1; s >= stack.begin(); --s)
+    {
+        _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
+        if(s->indref == ind)
+        {
+            _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id);
+            popto = s;
+            break;
+        }
+    }
+    if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
+    {
+        _c4err("parse error: incorrect indentation?");
+    }
+    _handle_indentation_pop(popto);
+}
 
-void Tree::_set_hierarchy(size_t ichild, size_t iparent, size_t iprev_sibling)
+template<class EventHandler>
+void ParseEngine<EventHandler>::_handle_indentation_pop_from_block_map()
 {
-    _RYML_CB_ASSERT(m_callbacks, iparent == NONE || (iparent >= 0 && iparent < m_cap));
-    _RYML_CB_ASSERT(m_callbacks, iprev_sibling == NONE || (iprev_sibling >= 0 && iprev_sibling < m_cap));
+    // search the stack frame to jump to based on its indentation
+    using state_type = typename EventHandler::state;
+    auto &stack = m_evt_handler->m_stack;
+    _RYML_CB_ASSERT(stack.m_callbacks, stack.is_contiguous()); // this search relies on the stack being contiguous
+    _RYML_CB_ASSERT(stack.m_callbacks, m_evt_handler->m_curr >= stack.begin() && m_evt_handler->m_curr < stack.end());
+    const size_t ind = m_evt_handler->m_curr->line_contents.indentation;
+    state_type const* popto = nullptr;
+    #ifdef RYML_DBG
+    char flagbuf_[128];
+    if(_dbg_enabled())
+    {
+        for(state_type const& s : stack)
+            _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
+    }
+    #endif
+    for(state_type const* s = m_evt_handler->m_curr-1; s > stack.begin(); --s) // never go to the stack bottom. that's the root
+    {
+        _c4dbgpf("searching for state with indentation {}. current: ind={},level={},node={},flags={}", ind, s->indref, s->level, s->node_id, detail::_parser_flags_to_str(flagbuf_, s->flags));
+        if(s->indref < ind)
+        {
+            break;
+        }
+        else if(s->indref == ind)
+        {
+            _c4dbgpf("same indentation!!! level={} node={}", s->level, s->node_id);
+            if(popto && has_any(RTOP, s) && has_none(RMAP|RSEQ, s))
+            {
+                break;
+            }
+            popto = s;
+            if(has_all(RSEQ|BLCK, s))
+            {
+                csubstr rem = m_evt_handler->m_curr->line_contents.rem;
+                const size_t first = rem.first_not_of(' ');
+                _RYML_CB_ASSERT(stack.m_callbacks, first == ind || first == npos);
+                rem = rem.right_of(first, true);
+                _c4dbgpf("indentless? rem='{}' first={}", rem, first);
+                if(rem.begins_with('-') && _is_blck_token(rem))
+                {
+                    _c4dbgp("parent was indentless seq");
+                    break;
+                }
+            }
+        }
+    }
+    if(!popto || popto >= m_evt_handler->m_curr || popto->level >= m_evt_handler->m_curr->level)
+    {
+        _c4err("parse error: incorrect indentation?");
+    }
+    _handle_indentation_pop(popto);
+}
 
-    NodeData *C4_RESTRICT child = get(ichild);
 
-    child->m_parent = iparent;
-    child->m_prev_sibling = NONE;
-    child->m_next_sibling = NONE;
+//-----------------------------------------------------------------------------
+template<class EventHandler>
+typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_squot()
+{
+    // quoted scalars can spread over multiple lines!
+    // nice explanation here: http://yaml-multiline.info/
 
-    if(iparent == NONE)
+    // a span to the end of the file
+    size_t b = m_evt_handler->m_curr->pos.offset;
+    substr s = m_buf.sub(b);
+    if(s.begins_with(' '))
     {
-        _RYML_CB_ASSERT(m_callbacks, ichild == 0);
-        _RYML_CB_ASSERT(m_callbacks, iprev_sibling == NONE);
+        s = s.triml(' ');
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
+        _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
     }
+    b = m_evt_handler->m_curr->pos.offset; // take this into account
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('\''));
 
-    if(iparent == NONE)
-        return;
+    // skip the opening quote
+    _line_progressed(1);
+    s = s.sub(1);
 
-    size_t inext_sibling = iprev_sibling != NONE ? next_sibling(iprev_sibling) : first_child(iparent);
-    NodeData *C4_RESTRICT parent = get(iparent);
-    NodeData *C4_RESTRICT psib   = get(iprev_sibling);
-    NodeData *C4_RESTRICT nsib   = get(inext_sibling);
+    bool needs_filter = false;
 
-    if(psib)
+    size_t numlines = 1; // we already have one line
+    size_t pos = npos; // find the pos of the matching quote
+    while( ! _finished_file())
     {
-        _RYML_CB_ASSERT(m_callbacks, next_sibling(iprev_sibling) == id(nsib));
-        child->m_prev_sibling = id(psib);
-        psib->m_next_sibling = id(child);
-        _RYML_CB_ASSERT(m_callbacks, psib->m_prev_sibling != psib->m_next_sibling || psib->m_prev_sibling == NONE);
-    }
+        const csubstr line = m_evt_handler->m_curr->line_contents.rem;
+        bool line_is_blank = true;
+        _c4dbgpf("scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_evt_handler->m_curr->pos.line, line);
+        for(size_t i = 0; i < line.len; ++i)
+        {
+            const char curr = line.str[i];
+            if(curr == '\'') // single quotes are escaped with two single quotes
+            {
+                const char next = i+1 < line.len ? line.str[i+1] : '~';
+                if(next != '\'') // so just look for the first quote
+                {                // without another after it
+                    pos = i;
+                    break;
+                }
+                else
+                {
+                    needs_filter = true; // needs filter to remove escaped quotes
+                    ++i; // skip the escaped quote
+                }
+            }
+            else if(curr != ' ')
+            {
+                line_is_blank = false;
+            }
+        }
+
+        // leading whitespace also needs filtering
+        needs_filter = needs_filter
+            || (numlines > 1)
+            || line_is_blank
+            || (_at_line_begin() && line.begins_with(' '));
+
+        if(pos == npos)
+        {
+            _line_progressed(line.len);
+            ++numlines;
+        }
+        else
+        {
+            _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
+            _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '\'');
+            _line_progressed(pos + 1); // progress beyond the quote
+            pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it
+            break;
+        }
 
-    if(nsib)
-    {
-        _RYML_CB_ASSERT(m_callbacks, prev_sibling(inext_sibling) == id(psib));
-        child->m_next_sibling = id(nsib);
-        nsib->m_prev_sibling = id(child);
-        _RYML_CB_ASSERT(m_callbacks, nsib->m_prev_sibling != nsib->m_next_sibling || nsib->m_prev_sibling == NONE);
+        _line_ended();
+        _scan_line();
     }
 
-    if(parent->m_first_child == NONE)
+    if(pos == npos)
     {
-        _RYML_CB_ASSERT(m_callbacks, parent->m_last_child == NONE);
-        parent->m_first_child = id(child);
-        parent->m_last_child = id(child);
+        _c4err("reached end of file while looking for closing quote");
     }
     else
     {
-        if(child->m_next_sibling == parent->m_first_child)
-            parent->m_first_child = id(child);
-
-        if(child->m_prev_sibling == parent->m_last_child)
-            parent->m_last_child = id(child);
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\'');
+        s = s.sub(0, pos-1);
     }
-}
 
-C4_SUPPRESS_WARNING_GCC_POP
-C4_SUPPRESS_WARNING_CLANG_POP
+    _c4prscalar("scanned squoted scalar", s, /*keep_newlines*/true);
+
+    return ScannedScalar { s, needs_filter };
+}
 
 
 //-----------------------------------------------------------------------------
-void Tree::_rem_hierarchy(size_t i)
+template<class EventHandler>
+typename ParseEngine<EventHandler>::ScannedScalar ParseEngine<EventHandler>::_scan_scalar_dquot()
 {
-    _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap);
+    // quoted scalars can spread over multiple lines!
+    // nice explanation here: http://yaml-multiline.info/
 
-    NodeData &C4_RESTRICT w = m_buf[i];
+    // a span to the end of the file
+    size_t b = m_evt_handler->m_curr->pos.offset;
+    substr s = m_buf.sub(b);
+    if(s.begins_with(' '))
+    {
+        s = s.triml(' ');
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.sub(b).is_super(s));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
+        _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
+    }
+    b = m_evt_handler->m_curr->pos.offset; // take this into account
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('"'));
 
-    // remove from the parent
-    if(w.m_parent != NONE)
+    // skip the opening quote
+    _line_progressed(1);
+    s = s.sub(1);
+
+    bool needs_filter = false;
+
+    size_t numlines = 1; // we already have one line
+    size_t pos = npos; // find the pos of the matching quote
+    while( ! _finished_file())
     {
-        NodeData &C4_RESTRICT p = m_buf[w.m_parent];
-        if(p.m_first_child == i)
+        const csubstr line = m_evt_handler->m_curr->line_contents.rem;
+        bool line_is_blank = true;
+        _c4dbgpf("scanning double quoted scalar @ line[{}]:  line='{}'", m_evt_handler->m_curr->pos.line, line);
+        for(size_t i = 0; i < line.len; ++i)
         {
-            p.m_first_child = w.m_next_sibling;
+            const char curr = line.str[i];
+            if(curr != ' ')
+                line_is_blank = false;
+            // every \ is an escape
+            if(curr == '\\')
+            {
+                const char next = i+1 < line.len ? line.str[i+1] : '~';
+                needs_filter = true;
+                if(next == '"' || next == '\\')
+                    ++i;
+            }
+            else if(curr == '"')
+            {
+                pos = i;
+                break;
+            }
         }
-        if(p.m_last_child == i)
+
+        // leading whitespace also needs filtering
+        needs_filter = needs_filter
+            || (numlines > 1)
+            || line_is_blank
+            || (_at_line_begin() && line.begins_with(' '));
+
+        if(pos == npos)
         {
-            p.m_last_child = w.m_prev_sibling;
+            _line_progressed(line.len);
+            ++numlines;
+        }
+        else
+        {
+            _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
+            _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf[m_evt_handler->m_curr->pos.offset + pos] == '"');
+            _line_progressed(pos + 1); // progress beyond the quote
+            pos = m_evt_handler->m_curr->pos.offset - b - 1; // but we stop before it
+            break;
         }
+
+        _line_ended();
+        _scan_line();
     }
 
-    // remove from the used list
-    if(w.m_prev_sibling != NONE)
+    if(pos == npos)
     {
-        NodeData *C4_RESTRICT prev = get(w.m_prev_sibling);
-        prev->m_next_sibling = w.m_next_sibling;
+        _c4err("reached end of file looking for closing quote");
     }
-    if(w.m_next_sibling != NONE)
+    else
     {
-        NodeData *C4_RESTRICT next = get(w.m_next_sibling);
-        next->m_prev_sibling = w.m_prev_sibling;
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, pos > 0);
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"');
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
+        s = s.sub(0, pos-1);
     }
-}
-
-//-----------------------------------------------------------------------------
-void Tree::reorder()
-{
-    size_t r = root_id();
-    _do_reorder(&r, 0);
-}
 
-//-----------------------------------------------------------------------------
-size_t Tree::_do_reorder(size_t *node, size_t count)
-{
-    // swap this node if it's not in place
-    if(*node != count)
-    {
-        _swap(*node, count);
-        *node = count;
-    }
-    ++count; // bump the count from this node
+    _c4prscalar("scanned dquoted scalar", s, /*keep_newlines*/true);
 
-    // now descend in the hierarchy
-    for(size_t i = first_child(*node); i != NONE; i = next_sibling(i))
-    {
-        // this child may have been relocated to a different index,
-        // so get an updated version
-        count = _do_reorder(&i, count);
-    }
-    return count;
+    return ScannedScalar { s, needs_filter };
 }
 
-//-----------------------------------------------------------------------------
-void Tree::_swap(size_t n_, size_t m_)
-{
-    _RYML_CB_ASSERT(m_callbacks, (parent(n_) != NONE) || type(n_) == NOTYPE);
-    _RYML_CB_ASSERT(m_callbacks, (parent(m_) != NONE) || type(m_) == NOTYPE);
-    NodeType tn = type(n_);
-    NodeType tm = type(m_);
-    if(tn != NOTYPE && tm != NOTYPE)
-    {
-        _swap_props(n_, m_);
-        _swap_hierarchy(n_, m_);
-    }
-    else if(tn == NOTYPE && tm != NOTYPE)
-    {
-        _copy_props(n_, m_);
-        _free_list_rem(n_);
-        _copy_hierarchy(n_, m_);
-        _clear(m_);
-        _free_list_add(m_);
-    }
-    else if(tn != NOTYPE && tm == NOTYPE)
-    {
-        _copy_props(m_, n_);
-        _free_list_rem(m_);
-        _copy_hierarchy(m_, n_);
-        _clear(n_);
-        _free_list_add(n_);
-    }
-    else
-    {
-        C4_NEVER_REACH();
-    }
-}
 
 //-----------------------------------------------------------------------------
-void Tree::_swap_hierarchy(size_t ia, size_t ib)
+template<class EventHandler>
+void ParseEngine<EventHandler>::_scan_block(ScannedBlock *C4_RESTRICT sb, size_t indref)
 {
-    if(ia == ib) return;
-
-    for(size_t i = first_child(ia); i != NONE; i = next_sibling(i))
-    {
-        if(i == ib || i == ia)
-            continue;
-        _p(i)->m_parent = ib;
-    }
+    _c4dbgpf("blck: indref={}", indref);
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, indref != npos);
 
-    for(size_t i = first_child(ib); i != NONE; i = next_sibling(i))
+    // nice explanation here: http://yaml-multiline.info/
+    csubstr s = m_evt_handler->m_curr->line_contents.rem;
+    csubstr trimmed = s.triml(' ');
+    if(trimmed.str > s.str)
     {
-        if(i == ib || i == ia)
-            continue;
-        _p(i)->m_parent = ia;
+        _c4dbgp("skipping whitespace");
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, trimmed.str >= s.str);
+        _line_progressed(static_cast<size_t>(trimmed.str - s.str));
+        s = trimmed;
     }
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>'));
 
-    auto & C4_RESTRICT a  = *_p(ia);
-    auto & C4_RESTRICT b  = *_p(ib);
-    auto & C4_RESTRICT pa = *_p(a.m_parent);
-    auto & C4_RESTRICT pb = *_p(b.m_parent);
+    _c4dbgpf("blck: specs=[{}]~~~{}~~~", s.len, s);
 
-    if(&pa == &pb)
+    // parse the spec
+    BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used
+    size_t indentation = npos; // have to find out if no spec is given
+    csubstr digits;
+    if(s.len > 1)
     {
-        if((pa.m_first_child == ib && pa.m_last_child == ia)
-            ||
-           (pa.m_first_child == ia && pa.m_last_child == ib))
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, s.begins_with_any("|>"));
+        csubstr t = s.sub(1);
+        _c4dbgpf("blck: spec is multichar: '{}'", t);
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, t.len >= 1);
+        size_t pos = t.first_of("-+");
+        _c4dbgpf("blck: spec chomp char at {}", pos);
+        if(pos != npos)
         {
-            std::swap(pa.m_first_child, pa.m_last_child);
+            if(t[pos] == '-')
+                chomp = CHOMP_STRIP;
+            else if(t[pos] == '+')
+                chomp = CHOMP_KEEP;
+            if(pos == 0)
+                t = t.sub(1);
+            else
+                t = t.first(pos);
         }
-        else
+        // from here to the end, only digits are considered
+        digits = t.left_of(t.first_not_of("0123456789"));
+        if( ! digits.empty())
         {
-            bool changed = false;
-            if(pa.m_first_child == ia)
-            {
-                pa.m_first_child = ib;
-                changed = true;
-            }
-            if(pa.m_last_child  == ia)
-            {
-                pa.m_last_child = ib;
-                changed = true;
-            }
-            if(pb.m_first_child == ib && !changed)
-            {
-                pb.m_first_child = ia;
-            }
-            if(pb.m_last_child  == ib && !changed)
-            {
-                pb.m_last_child  = ia;
-            }
+            if(C4_UNLIKELY(digits.len > 1))
+                _c4err("parse error: invalid indentation");
+            _c4dbgpf("blck: parse indentation digits: [{}]~~~{}~~~", digits.len, digits);
+            if(C4_UNLIKELY( ! c4::atou(digits, &indentation)))
+                _c4err("parse error: could not read indentation as decimal");
+            if(C4_UNLIKELY( ! indentation))
+                _c4err("parse error: null indentation");
+            _c4dbgpf("blck: indentation specified: {}. add {} from curr state -> {}", indentation, m_evt_handler->m_curr->indref, indentation+indref);
+            indentation += m_evt_handler->m_curr->indref;
         }
     }
-    else
-    {
-        if(pa.m_first_child == ia)
-            pa.m_first_child = ib;
-        if(pa.m_last_child  == ia)
-            pa.m_last_child  = ib;
-        if(pb.m_first_child == ib)
-            pb.m_first_child = ia;
-        if(pb.m_last_child  == ib)
-            pb.m_last_child  = ia;
-    }
-    std::swap(a.m_first_child , b.m_first_child);
-    std::swap(a.m_last_child  , b.m_last_child);
 
-    if(a.m_prev_sibling != ib && b.m_prev_sibling != ia &&
-       a.m_next_sibling != ib && b.m_next_sibling != ia)
-    {
-        if(a.m_prev_sibling != NONE && a.m_prev_sibling != ib)
-            _p(a.m_prev_sibling)->m_next_sibling = ib;
-        if(a.m_next_sibling != NONE && a.m_next_sibling != ib)
-            _p(a.m_next_sibling)->m_prev_sibling = ib;
-        if(b.m_prev_sibling != NONE && b.m_prev_sibling != ia)
-            _p(b.m_prev_sibling)->m_next_sibling = ia;
-        if(b.m_next_sibling != NONE && b.m_next_sibling != ia)
-            _p(b.m_next_sibling)->m_prev_sibling = ia;
-        std::swap(a.m_prev_sibling, b.m_prev_sibling);
-        std::swap(a.m_next_sibling, b.m_next_sibling);
-    }
-    else
+    _c4dbgpf("blck: style={}  chomp={}  indentation={}", s.begins_with('>') ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);
+
+    // finish the current line
+    _line_progressed(s.len);
+    _line_ended();
+    _scan_line();
+
+    // start with a zero-length block, already pointing at the right place
+    substr raw_block(m_buf.data() + m_evt_handler->m_curr->pos.offset, size_t(0));// m_evt_handler->m_curr->line_contents.full.sub(0, 0);
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, raw_block.begin() == m_evt_handler->m_curr->line_contents.full.begin());
+
+    // read every full line into a raw block,
+    // from which newlines are to be stripped as needed.
+    //
+    // If no explicit indentation was given, pick it from the first
+    // non-empty line. See
+    // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator
+    size_t num_lines = 0;
+    size_t first = m_evt_handler->m_curr->pos.line;
+    size_t provisional_indentation = npos;
+    LineContents lc;
+    while(( ! _finished_file()))
     {
-        if(a.m_next_sibling == ib) // n will go after m
+        // peek next line, but do not advance immediately
+        lc.reset_with_next_line(m_buf, m_evt_handler->m_curr->pos.offset);
+        _c4dbgpf("blck: peeking at [{}]~~~{}~~~", lc.stripped.len, lc.stripped);
+        // evaluate termination conditions
+        if(indentation != npos)
         {
-            _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling == ia);
-            if(a.m_prev_sibling != NONE)
+            _c4dbgpf("blck: indentation={}", indentation);
+            // stop when the line is deindented and not empty
+            if(lc.indentation < indentation && ( ! lc.rem.trim(" \t").empty()))
             {
-                _RYML_CB_ASSERT(m_callbacks, a.m_prev_sibling != ib);
-                _p(a.m_prev_sibling)->m_next_sibling = ib;
+                if(raw_block.len)
+                {
+                    _c4dbgpf("blck: indentation decreased ref={} thisline={}", indentation, lc.indentation);
+                }
+                else
+                {
+                    _c4err("indentation decreased without any scalar");
+                }
+                break;
             }
-            if(b.m_next_sibling != NONE)
+            else if(indentation == 0)
             {
-                _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling != ia);
-                _p(b.m_next_sibling)->m_prev_sibling = ia;
+                _c4dbgpf("blck: noindent. lc.rem=[{}]~~~{}~~~", lc.rem.len, lc.rem);
+                if(_is_doc_token(lc.rem))
+                {
+                    _c4dbgp("blck: stop. indentation=0 and doc ended");
+                    break;
+                }
             }
-            size_t ns = b.m_next_sibling;
-            b.m_prev_sibling = a.m_prev_sibling;
-            b.m_next_sibling = ia;
-            a.m_prev_sibling = ib;
-            a.m_next_sibling = ns;
         }
-        else if(a.m_prev_sibling == ib) // m will go after n
+        else
         {
-            _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling == ia);
-            if(b.m_prev_sibling != NONE)
+            const size_t fns = lc.stripped.first_not_of(' ');
+            _c4dbgpf("blck: indentation ref not set. firstnonws={}", fns);
+            if(fns != npos) // non-empty line
             {
-                _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling != ia);
-                _p(b.m_prev_sibling)->m_next_sibling = ia;
+                _RYML_WITH_TAB_TOKENS(
+                    if(C4_UNLIKELY(lc.stripped.begins_with('\t')))
+                        _c4err("parse error");
+                )
+                _c4dbgpf("blck: line not empty. indref={} indprov={} indentation={}", indref, provisional_indentation, lc.indentation);
+                if(provisional_indentation == npos)
+                {
+                    if(lc.indentation < indref)
+                    {
+                        _c4dbgpf("blck: block terminated indentation={} < indref={}", lc.indentation, indref);
+                        if(raw_block.len == 0)
+                        {
+                            _c4dbgp("blck: was empty, undo next line");
+                            _line_ended_undo();
+                        }
+                        break;
+                    }
+                    else if(lc.indentation == m_evt_handler->m_curr->indref)
+                    {
+                        if(has_any(RSEQ|RMAP))
+                        {
+                            _c4dbgpf("blck: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_evt_handler->m_curr->indref);
+                            break;
+                        }
+                    }
+                    _c4dbgpf("blck: set indentation ref from this line: ref={}", lc.indentation);
+                    indentation = lc.indentation;
+                }
+                else
+                {
+                    if(lc.indentation >= provisional_indentation)
+                    {
+                        _c4dbgpf("blck: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
+                        //indentation = provisional_indentation ? provisional_indentation : lc.indentation;
+                        indentation = lc.indentation;
+                    }
+                    else
+                    {
+                        break;
+                        //_c4err("parse error: first non-empty block line should have at least the original indentation");
+                    }
+                }
             }
-            if(a.m_next_sibling != NONE)
+            else // empty line
             {
-                _RYML_CB_ASSERT(m_callbacks, a.m_next_sibling != ib);
-                _p(a.m_next_sibling)->m_prev_sibling = ib;
+                _c4dbgpf("blck: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation);
+                if(provisional_indentation != npos)
+                {
+                    if(lc.stripped.len >= provisional_indentation)
+                    {
+                        _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len);
+                        provisional_indentation = lc.stripped.len;
+                    }
+                    #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
+                    else if(lc.indentation >= provisional_indentation && lc.indentation != npos)
+                    {
+                        _c4dbgpf("blck: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation);
+                        provisional_indentation = lc.indentation;
+                    }
+                    #endif
+                }
+                else
+                {
+                    provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL);
+                    _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
+                    if(provisional_indentation == npos)
+                    {
+                        provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(RSEQ|RVAL);
+                        _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
+                    }
+                    if(provisional_indentation < indref)
+                    {
+                        provisional_indentation = indref;
+                        _c4dbgpf("blck: initialize provisional_ref={}", provisional_indentation);
+                    }
+                }
             }
-            size_t ns = b.m_prev_sibling;
-            a.m_prev_sibling = b.m_prev_sibling;
-            a.m_next_sibling = ib;
-            b.m_prev_sibling = ia;
-            b.m_next_sibling = ns;
-        }
-        else
-        {
-            C4_NEVER_REACH();
-        }
-    }
-    _RYML_CB_ASSERT(m_callbacks, a.m_next_sibling != ia);
-    _RYML_CB_ASSERT(m_callbacks, a.m_prev_sibling != ia);
-    _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling != ib);
-    _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling != ib);
-
-    if(a.m_parent != ib && b.m_parent != ia)
-    {
-        std::swap(a.m_parent, b.m_parent);
-    }
-    else
-    {
-        if(a.m_parent == ib && b.m_parent != ia)
-        {
-            a.m_parent = b.m_parent;
-            b.m_parent = ia;
-        }
-        else if(a.m_parent != ib && b.m_parent == ia)
-        {
-            b.m_parent = a.m_parent;
-            a.m_parent = ib;
-        }
-        else
-        {
-            C4_NEVER_REACH();
         }
+        // advance now that we know the folded scalar continues
+        m_evt_handler->m_curr->line_contents = lc;
+        _c4dbgpf("blck: append '{}'", m_evt_handler->m_curr->line_contents.rem);
+        raw_block.len += m_evt_handler->m_curr->line_contents.full.len;
+        _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
+        _line_ended();
+        ++num_lines;
     }
-}
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->pos.line == (first + num_lines) || (raw_block.len == 0));
+    C4_UNUSED(num_lines);
+    C4_UNUSED(first);
 
-//-----------------------------------------------------------------------------
-void Tree::_copy_hierarchy(size_t dst_, size_t src_)
-{
-    auto const& C4_RESTRICT src = *_p(src_);
-    auto      & C4_RESTRICT dst = *_p(dst_);
-    auto      & C4_RESTRICT prt = *_p(src.m_parent);
-    for(size_t i = src.m_first_child; i != NONE; i = next_sibling(i))
-    {
-        _p(i)->m_parent = dst_;
-    }
-    if(src.m_prev_sibling != NONE)
-    {
-        _p(src.m_prev_sibling)->m_next_sibling = dst_;
-    }
-    if(src.m_next_sibling != NONE)
-    {
-        _p(src.m_next_sibling)->m_prev_sibling = dst_;
-    }
-    if(prt.m_first_child == src_)
-    {
-        prt.m_first_child = dst_;
-    }
-    if(prt.m_last_child  == src_)
+    if(indentation == npos)
     {
-        prt.m_last_child  = dst_;
+        _c4dbgpf("blck: set indentation from provisional: {}", provisional_indentation);
+        indentation = provisional_indentation;
     }
-    dst.m_parent       = src.m_parent;
-    dst.m_first_child  = src.m_first_child;
-    dst.m_last_child   = src.m_last_child;
-    dst.m_prev_sibling = src.m_prev_sibling;
-    dst.m_next_sibling = src.m_next_sibling;
-}
 
-//-----------------------------------------------------------------------------
-void Tree::_swap_props(size_t n_, size_t m_)
-{
-    NodeData &C4_RESTRICT n = *_p(n_);
-    NodeData &C4_RESTRICT m = *_p(m_);
-    std::swap(n.m_type, m.m_type);
-    std::swap(n.m_key, m.m_key);
-    std::swap(n.m_val, m.m_val);
-}
+    if(num_lines)
+        _line_ended_undo();
 
-//-----------------------------------------------------------------------------
-void Tree::move(size_t node, size_t after)
-{
-    _RYML_CB_ASSERT(m_callbacks, node != NONE);
-    _RYML_CB_ASSERT(m_callbacks,  ! is_root(node));
-    _RYML_CB_ASSERT(m_callbacks, has_sibling(node, after) && has_sibling(after, node));
+    _c4prscalar("scanned block", raw_block, /*keep_newlines*/true);
 
-    _rem_hierarchy(node);
-    _set_hierarchy(node, parent(node), after);
+    sb->scalar = raw_block;
+    sb->indentation = indentation;
+    sb->chomp = chomp;
 }
 
-//-----------------------------------------------------------------------------
 
-void Tree::move(size_t node, size_t new_parent, size_t after)
-{
-    _RYML_CB_ASSERT(m_callbacks, node != NONE);
-    _RYML_CB_ASSERT(m_callbacks, new_parent != NONE);
-    _RYML_CB_ASSERT(m_callbacks,  ! is_root(node));
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 
-    _rem_hierarchy(node);
-    _set_hierarchy(node, new_parent, after);
-}
+// a debugging scaffold:
+#if 0
+#define _c4dbgfws(fmt, ...) _c4dbgpf("filt_ws[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
+#else
+#define _c4dbgfws(...)
+#endif
 
-size_t Tree::move(Tree *src, size_t node, size_t new_parent, size_t after)
+template<class EventHandler>
+template<class FilterProcessor>
+bool ParseEngine<EventHandler>::_filter_ws_handle_to_first_non_space(FilterProcessor &proc)
 {
-    _RYML_CB_ASSERT(m_callbacks, node != NONE);
-    _RYML_CB_ASSERT(m_callbacks, new_parent != NONE);
-
-    size_t dup = duplicate(src, node, new_parent, after);
-    src->remove(node);
-    return dup;
-}
+    _c4dbgfws("found whitespace '{}'", _c4prc(proc.curr()));
+    _RYML_CB_ASSERT(this->callbacks(), proc.curr() == ' ' || proc.curr() == '\t');
 
-void Tree::set_root_as_stream()
-{
-    size_t root = root_id();
-    if(is_stream(root))
-        return;
-    // don't use _add_flags() because it's checked and will fail
-    if(!has_children(root))
+    const size_t first_pos = proc.rpos > 0 ? proc.src.first_not_of(" \t", proc.rpos) : proc.src.first_not_of(' ', proc.rpos);
+    if(first_pos != npos)
     {
-        if(is_val(root))
+        const char first_char = proc.src[first_pos];
+        _c4dbgfws("firstnonws='{}'@{}", _c4prc(first_char), first_pos);
+        if(first_char == '\n' || first_char == '\r') // skip trailing whitespace
         {
-            _p(root)->m_type.add(SEQ);
-            size_t next_doc = append_child(root);
-            _copy_props_wo_key(next_doc, root);
-            _p(next_doc)->m_type.add(DOC);
-            _p(next_doc)->m_type.rem(SEQ);
+            _c4dbgfws("whitespace is trailing on line", "");
+            proc.skip(first_pos - proc.rpos);
         }
-        _p(root)->m_type = STREAM;
-        return;
-    }
-    _RYML_CB_ASSERT(m_callbacks, !has_key(root));
-    size_t next_doc = append_child(root);
-    _copy_props_wo_key(next_doc, root);
-    _add_flags(next_doc, DOC);
-    for(size_t prev = NONE, ch = first_child(root), next = next_sibling(ch); ch != NONE; )
-    {
-        if(ch == next_doc)
-            break;
-        move(ch, next_doc, prev);
-        prev = ch;
-        ch = next;
-        next = next_sibling(next);
+        else // a legit whitespace
+        {
+            proc.copy();
+            _c4dbgfws("legit whitespace. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
+        }
+        return true;
     }
-    _p(root)->m_type = STREAM;
+    _c4dbgfws("whitespace is trailing on line", "");
+    return false;
 }
 
-
-//-----------------------------------------------------------------------------
-void Tree::remove_children(size_t node)
+template<class EventHandler>
+template<class FilterProcessor>
+void ParseEngine<EventHandler>::_filter_ws_copy_trailing(FilterProcessor &proc)
 {
-    _RYML_CB_ASSERT(m_callbacks, get(node) != nullptr);
-    size_t ich = get(node)->m_first_child;
-    while(ich != NONE)
+    if(!_filter_ws_handle_to_first_non_space(proc))
     {
-        remove_children(ich);
-        _RYML_CB_ASSERT(m_callbacks, get(ich) != nullptr);
-        size_t next = get(ich)->m_next_sibling;
-        _release(ich);
-        if(ich == get(node)->m_last_child)
-            break;
-        ich = next;
-    }
-}
-
-bool Tree::change_type(size_t node, NodeType type)
-{
-    _RYML_CB_ASSERT(m_callbacks, type.is_val() || type.is_map() || type.is_seq());
-    _RYML_CB_ASSERT(m_callbacks, type.is_val() + type.is_map() + type.is_seq() == 1);
-    _RYML_CB_ASSERT(m_callbacks, type.has_key() == has_key(node) || (has_key(node) && !type.has_key()));
-    NodeData *d = _p(node);
-    if(type.is_map() && is_map(node))
-        return false;
-    else if(type.is_seq() && is_seq(node))
-        return false;
-    else if(type.is_val() && is_val(node))
-        return false;
-    d->m_type = (d->m_type & (~(MAP|SEQ|VAL))) | type;
-    remove_children(node);
-    return true;
+        _c4dbgfws("... everything else is trailing whitespace - copy {} chars", proc.src.len - proc.rpos);
+        proc.copy(proc.src.len - proc.rpos);
+    }
 }
 
-
-//-----------------------------------------------------------------------------
-size_t Tree::duplicate(size_t node, size_t parent, size_t after)
+template<class EventHandler>
+template<class FilterProcessor>
+void ParseEngine<EventHandler>::_filter_ws_skip_trailing(FilterProcessor &proc)
 {
-    return duplicate(this, node, parent, after);
+    if(!_filter_ws_handle_to_first_non_space(proc))
+    {
+        _c4dbgfws("... everything else is trailing whitespace - skip {} chars", proc.src.len - proc.rpos);
+        proc.skip(proc.src.len - proc.rpos);
+    }
 }
 
-size_t Tree::duplicate(Tree const* src, size_t node, size_t parent, size_t after)
-{
-    _RYML_CB_ASSERT(m_callbacks, src != nullptr);
-    _RYML_CB_ASSERT(m_callbacks, node != NONE);
-    _RYML_CB_ASSERT(m_callbacks, parent != NONE);
-    _RYML_CB_ASSERT(m_callbacks,  ! src->is_root(node));
+#undef _c4dbgfws
 
-    size_t copy = _claim();
 
-    _copy_props(copy, src, node);
-    _set_hierarchy(copy, parent, after);
-    duplicate_children(src, node, copy, NONE);
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+/* plain scalars */
 
-    return copy;
-}
+// a debugging scaffold:
+#if 0
+#define _c4dbgfps(fmt, ...) _c4dbgpf("filt_plain[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
+#else
+#define _c4dbgfps(fmt, ...)
+#endif
 
-//-----------------------------------------------------------------------------
-size_t Tree::duplicate_children(size_t node, size_t parent, size_t after)
+template<class EventHandler>
+template<class FilterProcessor>
+void ParseEngine<EventHandler>::_filter_nl_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation)
 {
-    return duplicate_children(this, node, parent, after);
+    _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
+
+    _c4dbgfps("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
+    size_t ii = proc.rpos;
+    const size_t numnl_following = _count_following_newlines(proc.src, &ii, indentation);
+    if(numnl_following)
+    {
+        proc.set('\n', numnl_following);
+        _c4dbgfps("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
+    }
+    else
+    {
+        const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
+        if(ret != npos)
+        {
+            proc.set(' ');
+             _c4dbgfps("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
+        }
+        else
+        {
+            _c4dbgfps("last newline, everything else is whitespace. ii={}/{}", ii, proc.src.len);
+            ii = proc.src.len;
+        }
+    }
+    proc.rpos = ii;
 }
 
-size_t Tree::duplicate_children(Tree const* src, size_t node, size_t parent, size_t after)
+template<class EventHandler>
+template<class FilterProcessor>
+auto ParseEngine<EventHandler>::_filter_plain(FilterProcessor &C4_RESTRICT proc, size_t indentation) -> decltype(proc.result())
 {
-    _RYML_CB_ASSERT(m_callbacks, src != nullptr);
-    _RYML_CB_ASSERT(m_callbacks, node != NONE);
-    _RYML_CB_ASSERT(m_callbacks, parent != NONE);
-    _RYML_CB_ASSERT(m_callbacks, after == NONE || has_child(parent, after));
+    _RYML_CB_ASSERT(this->callbacks(), indentation != npos);
+    _c4dbgfps("before=[{}]~~~{}~~~", proc.src.len, proc.src);
 
-    size_t prev = after;
-    for(size_t i = src->first_child(node); i != NONE; i = src->next_sibling(i))
+    while(proc.has_more_chars())
     {
-        prev = duplicate(src, i, parent, prev);
+        const char curr = proc.curr();
+        _c4dbgfps("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
+        switch(curr)
+        {
+        case ' ':
+        _RYML_WITH_TAB_TOKENS(case '\t':)
+            _c4dbgfps("whitespace", curr);
+            _filter_ws_skip_trailing(proc);
+            break;
+        case '\n':
+            _c4dbgfps("newline", curr);
+            _filter_nl_plain(proc, /*indentation*/indentation);
+            break;
+        case '\r':  // skip \r --- https://stackoverflow.com/questions/1885900
+            _c4dbgfps("carriage return, ignore", curr);
+            proc.skip();
+            break;
+        default:
+            proc.copy();
+            break;
+        }
     }
 
-    return prev;
+    _c4dbgfps("after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
+
+    return proc.result();
 }
 
-//-----------------------------------------------------------------------------
-void Tree::duplicate_contents(size_t node, size_t where)
+#undef _c4dbgfps
+
+
+template<class EventHandler>
+FilterResult ParseEngine<EventHandler>::filter_scalar_plain(csubstr scalar, substr dst, size_t indentation)
 {
-    duplicate_contents(this, node, where);
+    FilterProcessorSrcDst proc(scalar, dst);
+    return _filter_plain(proc, indentation);
 }
 
-void Tree::duplicate_contents(Tree const *src, size_t node, size_t where)
+template<class EventHandler>
+FilterResult ParseEngine<EventHandler>::filter_scalar_plain_in_place(substr dst, size_t cap, size_t indentation)
 {
-    _RYML_CB_ASSERT(m_callbacks, src != nullptr);
-    _RYML_CB_ASSERT(m_callbacks, node != NONE);
-    _RYML_CB_ASSERT(m_callbacks, where != NONE);
-    _copy_props_wo_key(where, src, node);
-    duplicate_children(src, node, where, last_child(where));
+    FilterProcessorInplaceEndExtending proc(dst, cap);
+    return _filter_plain(proc, indentation);
 }
 
+
 //-----------------------------------------------------------------------------
-size_t Tree::duplicate_children_no_rep(size_t node, size_t parent, size_t after)
-{
-    return duplicate_children_no_rep(this, node, parent, after);
-}
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+/* single quoted */
 
-size_t Tree::duplicate_children_no_rep(Tree const *src, size_t node, size_t parent, size_t after)
-{
-    _RYML_CB_ASSERT(m_callbacks, node != NONE);
-    _RYML_CB_ASSERT(m_callbacks, parent != NONE);
-    _RYML_CB_ASSERT(m_callbacks, after == NONE || has_child(parent, after));
+// a debugging scaffold:
+#if 0
+#define _c4dbgfsq(fmt, ...) _c4dbgpf("filt_squo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
+#else
+#define _c4dbgfsq(fmt, ...)
+#endif
 
-    // don't loop using pointers as there may be a relocation
+template<class EventHandler>
+template<class FilterProcessor>
+void ParseEngine<EventHandler>::_filter_nl_squoted(FilterProcessor &C4_RESTRICT proc)
+{
+    _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
 
-    // find the position where "after" is
-    size_t after_pos = NONE;
-    if(after != NONE)
+    _c4dbgfsq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
+    size_t ii = proc.rpos;
+    const size_t numnl_following = _count_following_newlines(proc.src, &ii);
+    if(numnl_following)
     {
-        for(size_t i = first_child(parent), icount = 0; i != NONE; ++icount, i = next_sibling(i))
-        {
-            if(i == after)
-            {
-                after_pos = icount;
-                break;
-            }
-        }
-        _RYML_CB_ASSERT(m_callbacks, after_pos != NONE);
+        proc.set('\n', numnl_following);
+        _c4dbgfsq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
     }
-
-    // for each child to be duplicated...
-    size_t prev = after;
-    for(size_t i = src->first_child(node), icount = 0; i != NONE; ++icount, i = src->next_sibling(i))
+    else
     {
-        if(is_seq(parent))
+        const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
+        if(ret != npos)
         {
-            prev = duplicate(i, parent, prev);
+            proc.set(' ');
+            _c4dbgfsq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
         }
         else
         {
-            _RYML_CB_ASSERT(m_callbacks, is_map(parent));
-            // does the parent already have a node with key equal to that of the current duplicate?
-            size_t rep = NONE, rep_pos = NONE;
-            for(size_t j = first_child(parent), jcount = 0; j != NONE; ++jcount, j = next_sibling(j))
+            proc.set(' ');
+            _c4dbgfsq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
+        }
+    }
+    proc.rpos = ii;
+}
+
+template<class EventHandler>
+template<class FilterProcessor>
+auto ParseEngine<EventHandler>::_filter_squoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
+{
+    _c4dbgfsq("before=[{}]~~~{}~~~", proc.src.len, proc.src);
+
+    // from the YAML spec for double-quoted scalars:
+    // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted
+    while(proc.has_more_chars())
+    {
+        const char curr = proc.curr();
+        _c4dbgfsq("'{}', sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
+        switch(curr)
+        {
+        case ' ':
+        case '\t':
+            _c4dbgfsq("whitespace", curr);
+            _filter_ws_copy_trailing(proc);
+            break;
+        case '\n':
+            _c4dbgfsq("newline", curr);
+            _filter_nl_squoted(proc);
+            break;
+        case '\r':  // skip \r --- https://stackoverflow.com/questions/1885900
+            _c4dbgfsq("skip cr", curr);
+            proc.skip();
+            break;
+        case '\'':
+            _c4dbgfsq("squote", curr);
+            if(proc.next() == '\'')
             {
-                if(key(j) == key(i))
-                {
-                    rep = j;
-                    rep_pos = jcount;
-                    break;
-                }
+                _c4dbgfsq("two consecutive squotes", curr);
+                proc.skip();
+                proc.copy();
             }
-            if(rep == NONE) // there is no repetition; just duplicate
+            else
             {
-                prev = duplicate(src, i, parent, prev);
+                _c4err("filter error");
             }
-            else  // yes, there is a repetition
-            {
-                if(after_pos != NONE && rep_pos < after_pos)
-                {
-                    // rep is located before the node which will be inserted,
-                    // and will be overridden by the duplicate. So replace it.
-                    remove(rep);
-                    prev = duplicate(src, i, parent, prev);
-                }
-                else if(after_pos == NONE || rep_pos >= after_pos)
-                {
-                    // rep is located after the node which will be inserted
-                    // and overrides it. So move the rep into this node's place.
-                    if(rep != prev)
-                    {
-                        move(rep, prev);
-                        prev = rep;
-                    }
-                }
-            } // there's a repetition
+            break;
+        default:
+            proc.copy();
+            break;
         }
     }
 
-    return prev;
+    _c4dbgfsq(": #filteredchars={} after=~~~[{}]{}~~~", proc.src.len-proc.sofar().len, proc.sofar().len, proc.sofar());
+
+    return proc.result();
+}
+
+#undef _c4dbgfsq
+
+template<class EventHandler>
+FilterResult ParseEngine<EventHandler>::filter_scalar_squoted(csubstr scalar, substr dst)
+{
+    FilterProcessorSrcDst proc(scalar, dst);
+    return _filter_squoted(proc);
+}
+
+template<class EventHandler>
+FilterResult ParseEngine<EventHandler>::filter_scalar_squoted_in_place(substr dst, size_t cap)
+{
+    FilterProcessorInplaceEndExtending proc(dst, cap);
+    return _filter_squoted(proc);
 }
 
 
 //-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+/* double quoted */
+
+// a debugging scaffold:
+#if 0
+#define _c4dbgfdq(fmt, ...) _c4dbgpf("filt_dquo[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
+#else
+#define _c4dbgfdq(...)
+#endif
 
-void Tree::merge_with(Tree const *src, size_t src_node, size_t dst_node)
+template<class EventHandler>
+template<class FilterProcessor>
+void ParseEngine<EventHandler>::_filter_nl_dquoted(FilterProcessor &C4_RESTRICT proc)
 {
-    _RYML_CB_ASSERT(m_callbacks, src != nullptr);
-    if(src_node == NONE)
-        src_node = src->root_id();
-    if(dst_node == NONE)
-        dst_node = root_id();
-    _RYML_CB_ASSERT(m_callbacks, src->has_val(src_node) || src->is_seq(src_node) || src->is_map(src_node));
+    _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
 
-    if(src->has_val(src_node))
+    _c4dbgfdq("found newline. sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
+    size_t ii = proc.rpos;
+    const size_t numnl_following = _count_following_newlines(proc.src, &ii);
+    if(numnl_following)
     {
-        if( ! has_val(dst_node))
+        proc.set('\n', numnl_following);
+        _c4dbgfdq("{} consecutive (empty) lines {}. totalws={}", 1+numnl_following, ii < proc.src.len ? "in the middle" : "at the end", proc.rpos-ii);
+    }
+    else
+    {
+        const size_t ret = proc.src.first_not_of(" \t", proc.rpos+1);
+        if(ret != npos)
         {
-            if(has_children(dst_node))
-                remove_children(dst_node);
+            proc.set(' ');
+            _c4dbgfdq("single newline. convert to space. ret={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
         }
-        if(src->is_keyval(src_node))
-            _copy_props(dst_node, src, src_node);
-        else if(src->is_val(src_node))
-            _copy_props_wo_key(dst_node, src, src_node);
         else
-            C4_NEVER_REACH();
+        {
+            proc.set(' ');
+            _c4dbgfdq("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, proc.src.len, proc.wpos, proc.sofar());
+        }
+        if(ii < proc.src.len && proc.src.str[ii] == '\\')
+        {
+            _c4dbgfdq("backslash at [{}]", ii);
+            const char next = ii+1 < proc.src.len ? proc.src.str[ii+1] : '\0';
+            if(next == ' ' || next == '\t')
+            {
+                _c4dbgfdq("extend skip to backslash", "");
+                ++ii;
+            }
+        }
     }
-    else if(src->is_seq(src_node))
+    proc.rpos = ii;
+}
+
+template<class EventHandler>
+template<class FilterProcessor>
+void ParseEngine<EventHandler>::_filter_dquoted_backslash(FilterProcessor &C4_RESTRICT proc)
+{
+    char next = proc.next();
+    _c4dbgfdq("backslash, next='{}'", _c4prc(next));
+    if(next == '\r')
     {
-        if( ! is_seq(dst_node))
+        if(proc.rpos+2 < proc.src.len && proc.src.str[proc.rpos+2] == '\n')
         {
-            if(has_children(dst_node))
-                remove_children(dst_node);
-            _clear_type(dst_node);
-            if(src->has_key(src_node))
-                to_seq(dst_node, src->key(src_node));
-            else
-                to_seq(dst_node);
+            proc.skip(); // newline escaped with \ -- skip both (add only one as i is loop-incremented)
+            next = '\n';
+            _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", proc.rpos);
         }
-        for(size_t sch = src->first_child(src_node); sch != NONE; sch = src->next_sibling(sch))
+    }
+
+    if(next == '\n')
+    {
+        size_t ii = proc.rpos + 2;
+        for( ; ii < proc.src.len; ++ii)
         {
-            size_t dch = append_child(dst_node);
-            _copy_props_wo_key(dch, src, sch);
-            merge_with(src, sch, dch);
+            // skip leading whitespace
+            if(proc.src.str[ii] == ' ' || proc.src.str[ii] == '\t')
+                ;
+            else
+                break;
         }
+        proc.skip(ii - proc.rpos);
     }
-    else if(src->is_map(src_node))
+    else if(next == '"' || next == '/' || next == ' ' || next == '\t')
     {
-        if( ! is_map(dst_node))
+        // escapes for json compatibility
+        proc.translate_esc(next);
+        _c4dbgfdq("here, used '{}'", _c4prc(next));
+    }
+    else if(next == '\r')
+    {
+        proc.skip();
+    }
+    else if(next == 'n')
+    {
+        proc.translate_esc('\n');
+    }
+    else if(next == 'r')
+    {
+        proc.translate_esc('\r');
+    }
+    else if(next == 't')
+    {
+        proc.translate_esc('\t');
+    }
+    else if(next == '\\')
+    {
+        proc.translate_esc('\\');
+    }
+    else if(next == 'x') // UTF8
+    {
+        if(C4_UNLIKELY(proc.rpos + 1u + 2u >= proc.src.len))
+            _c4err("\\x requires 2 hex digits. scalar pos={}", proc.rpos);
+        csubstr codepoint = proc.src.sub(proc.rpos + 2u, 2u);
+        _c4dbgfdq("utf8 ~~~{}~~~ rpos={} rem=~~~{}~~~", codepoint, proc.rpos, proc.src.sub(proc.rpos));
+        uint8_t byteval = {};
+        if(C4_UNLIKELY(!read_hex(codepoint, &byteval)))
+            _c4err("failed to read \\x codepoint. scalar pos={}", proc.rpos);
+        proc.translate_esc_bulk((const char*)&byteval, 1u, /*nread*/3u);
+        _c4dbgfdq("utf8 after rpos={} rem=~~~{}~~~", proc.rpos, proc.src.sub(proc.rpos));
+    }
+    else if(next == 'u') // UTF16
+    {
+        if(C4_UNLIKELY(proc.rpos + 1u + 4u >= proc.src.len))
+            _c4err("\\u requires 4 hex digits. scalar pos={}", proc.rpos);
+        char readbuf[8];
+        csubstr codepoint = proc.src.sub(proc.rpos + 2u, 4u);
+        uint32_t codepoint_val = {};
+        if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
+            _c4err("failed to parse \\u codepoint. scalar pos={}", proc.rpos);
+        const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
+        if(C4_UNLIKELY(numbytes == 0))
+            _c4err("failed to decode code point={}", proc.rpos);
+        _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
+        proc.translate_esc_bulk(readbuf, numbytes, /*nread*/5u);
+    }
+    else if(next == 'U') // UTF32
+    {
+        if(C4_UNLIKELY(proc.rpos + 1u + 8u >= proc.src.len))
+            _c4err("\\U requires 8 hex digits. scalar pos={}", proc.rpos);
+        char readbuf[8];
+        csubstr codepoint = proc.src.sub(proc.rpos + 2u, 8u);
+        uint32_t codepoint_val = {};
+        if(C4_UNLIKELY(!read_hex(codepoint, &codepoint_val)))
+            _c4err("failed to parse \\U codepoint. scalar pos={}", proc.rpos);
+        const size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
+        if(C4_UNLIKELY(numbytes == 0))
+            _c4err("failed to decode code point={}", proc.rpos);
+        _RYML_CB_ASSERT(callbacks(), numbytes <= 4);
+        proc.translate_esc_bulk(readbuf, numbytes, /*nread*/9u);
+    }
+    // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char
+    else if(next == '0')
+    {
+        proc.translate_esc('\0');
+    }
+    else if(next == 'b') // backspace
+    {
+        proc.translate_esc('\b');
+    }
+    else if(next == 'f') // form feed
+    {
+        proc.translate_esc('\f');
+    }
+    else if(next == 'a') // bell character
+    {
+        proc.translate_esc('\a');
+    }
+    else if(next == 'v') // vertical tab
+    {
+        proc.translate_esc('\v');
+    }
+    else if(next == 'e') // escape character
+    {
+        proc.translate_esc('\x1b');
+    }
+    else if(next == '_') // unicode non breaking space \u00a0
+    {
+        // https://www.compart.com/en/unicode/U+00a0
+        const char payload[] = {
+            _RYML_CHCONST(-0x3e, 0xc2),
+            _RYML_CHCONST(-0x60, 0xa0),
+        };
+        proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
+    }
+    else if(next == 'N') // unicode next line \u0085
+    {
+        // https://www.compart.com/en/unicode/U+0085
+        const char payload[] = {
+            _RYML_CHCONST(-0x3e, 0xc2),
+            _RYML_CHCONST(-0x7b, 0x85),
+        };
+        proc.translate_esc_bulk(payload, /*nwrite*/2, /*nread*/1);
+    }
+    else if(next == 'L') // unicode line separator \u2028
+    {
+        // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
+        const char payload[] = {
+            _RYML_CHCONST(-0x1e, 0xe2),
+            _RYML_CHCONST(-0x80, 0x80),
+            _RYML_CHCONST(-0x58, 0xa8),
+        };
+        proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
+    }
+    else if(next == 'P') // unicode paragraph separator \u2029
+    {
+        // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
+        const char payload[] = {
+            _RYML_CHCONST(-0x1e, 0xe2),
+            _RYML_CHCONST(-0x80, 0x80),
+            _RYML_CHCONST(-0x57, 0xa9),
+        };
+        proc.translate_esc_extending(payload, /*nwrite*/3, /*nread*/1);
+    }
+    else if(next == '\0')
+    {
+        proc.skip();
+    }
+    else
+    {
+        _c4err("unknown character '{}' after '\\' pos={}", _c4prc(next), proc.rpos);
+    }
+    _c4dbgfdq("backslash...sofar=[{}]~~~{}~~~", proc.wpos, proc.sofar());
+}
+
+
+template<class EventHandler>
+template<class FilterProcessor>
+auto ParseEngine<EventHandler>::_filter_dquoted(FilterProcessor &C4_RESTRICT proc) -> decltype(proc.result())
+{
+    _c4dbgfdq("before=[{}]~~~{}~~~", proc.src.len, proc.src);
+    // from the YAML spec for double-quoted scalars:
+    // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted
+    while(proc.has_more_chars())
+    {
+        const char curr = proc.curr();
+        _c4dbgfdq("'{}' sofar=[{}]~~~{}~~~", _c4prc(curr), proc.wpos, proc.sofar());
+        switch(curr)
         {
-            if(has_children(dst_node))
-                remove_children(dst_node);
-            _clear_type(dst_node);
-            if(src->has_key(src_node))
-                to_map(dst_node, src->key(src_node));
-            else
-                to_map(dst_node);
+        case ' ':
+        case '\t':
+        {
+            _c4dbgfdq("whitespace", curr);
+            _filter_ws_copy_trailing(proc);
+            break;
+        }
+        case '\n':
+        {
+            _c4dbgfdq("newline", curr);
+            _filter_nl_dquoted(proc);
+            break;
+        }
+        case '\r':  // skip \r --- https://stackoverflow.com/questions/1885900
+        {
+            _c4dbgfdq("carriage return, ignore", curr);
+            proc.skip();
+            break;
+        }
+        case '\\':
+        {
+            _filter_dquoted_backslash(proc);
+            break;
         }
-        for(size_t sch = src->first_child(src_node); sch != NONE; sch = src->next_sibling(sch))
+        default:
         {
-            size_t dch = find_child(dst_node, src->key(sch));
-            if(dch == NONE)
-            {
-                dch = append_child(dst_node);
-                _copy_props(dch, src, sch);
-            }
-            merge_with(src, sch, dch);
+            proc.copy();
+            break;
+        }
         }
     }
-    else
-    {
-        C4_NEVER_REACH();
-    }
+    _c4dbgfdq("after[{}]=~~~{}~~~", proc.wpos, proc.sofar());
+    return proc.result();
 }
 
+#undef _c4dbgfdq
 
-//-----------------------------------------------------------------------------
 
-namespace detail {
-/** @todo make this part of the public API, refactoring as appropriate
- * to be able to use the same resolver to handle multiple trees (one
- * at a time) */
-struct ReferenceResolver
+template<class EventHandler>
+FilterResult ParseEngine<EventHandler>::filter_scalar_dquoted(csubstr scalar, substr dst)
 {
-    struct refdata
-    {
-        NodeType type;
-        size_t node;
-        size_t prev_anchor;
-        size_t target;
-        size_t parent_ref;
-        size_t parent_ref_sibling;
-    };
+    FilterProcessorSrcDst proc(scalar, dst);
+    return _filter_dquoted(proc);
+}
 
-    Tree *t;
-    /** from the specs: "an alias node refers to the most recent
-     * node in the serialization having the specified anchor". So
-     * we need to start looking upward from ref nodes.
-     *
-     * @see http://yaml.org/spec/1.2/spec.html#id2765878 */
-    stack<refdata> refs;
+template<class EventHandler>
+FilterResultExtending ParseEngine<EventHandler>::filter_scalar_dquoted_in_place(substr dst, size_t cap)
+{
+    FilterProcessorInplaceMidExtending proc(dst, cap);
+    return _filter_dquoted(proc);
+}
 
-    ReferenceResolver(Tree *t_) : t(t_), refs(t_->callbacks())
-    {
-        resolve();
-    }
 
-    void store_anchors_and_refs()
-    {
-        // minimize (re-)allocations by counting first
-        size_t num_anchors_and_refs = count_anchors_and_refs(t->root_id());
-        if(!num_anchors_and_refs)
-            return;
-        refs.reserve(num_anchors_and_refs);
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+// block filtering helpers
+
+template<class EventHandler>
+template<class FilterProcessor>
+void ParseEngine<EventHandler>::_filter_chomp(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp, size_t indentation)
+{
+    _RYML_CB_ASSERT(this->callbacks(), chomp == CHOMP_CLIP || chomp == CHOMP_KEEP || chomp == CHOMP_STRIP);
+    _RYML_CB_ASSERT(this->callbacks(), proc.rem().first_not_of(" \n\r") == npos);
 
-        // now descend through the hierarchy
-        _store_anchors_and_refs(t->root_id());
+    // a debugging scaffold:
+    #if 0
+    #define _c4dbgchomp(fmt, ...) _c4dbgpf("chomp[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
+    #else
+    #define _c4dbgchomp(...)
+    #endif
 
-        // finally connect the reference list
-        size_t prev_anchor = npos;
-        size_t count = 0;
-        for(auto &rd : refs)
+    // advance to the last line having spaces beyond the indentation
+    {
+        size_t last = _find_last_newline_and_larger_indentation(proc.rem(), indentation);
+        if(last != npos)
         {
-            rd.prev_anchor = prev_anchor;
-            if(rd.type.is_anchor())
-                prev_anchor = count;
-            ++count;
+            _c4dbgchomp("found newline and larger indentation. last={}", last);
+            last = proc.rpos + last + size_t(1) + indentation;  // last started at to-be-read.
+            _RYML_CB_ASSERT(this->callbacks(), last <= proc.src.len);
+            // remove indentation spaces, copy the rest
+            while((proc.rpos < last) && proc.has_more_chars())
+            {
+                const char curr = proc.curr();
+                _c4dbgchomp("curr='{}'", _c4prc(curr));
+                switch(curr)
+                {
+                case '\n':
+                    {
+                        _c4dbgchomp("newline! remlen={}", proc.rem().len);
+                        proc.copy();
+                        // are there spaces after the newline?
+                        csubstr at_next_line = proc.rem();
+                        if(at_next_line.begins_with(' '))
+                        {
+                            _c4dbgchomp("next line begins with spaces. indentation={}", indentation);
+                            // there are spaces.
+                            size_t first_non_space = at_next_line.first_not_of(' ');
+                            _c4dbgchomp("first_non_space={}", first_non_space);
+                            if(first_non_space == npos)
+                            {
+                                _c4dbgchomp("{} spaces, to the end", at_next_line.len);
+                                first_non_space = at_next_line.len;
+                            }
+                            if(first_non_space <= indentation)
+                            {
+                                _c4dbgchomp("skip spaces={}<=indentation={}", first_non_space, indentation);
+                                proc.skip(first_non_space);
+                            }
+                            else
+                            {
+                                _c4dbgchomp("skip indentation={}<spaces={}", indentation, first_non_space);
+                                proc.skip(indentation);
+                                // copy the spaces after the indentation
+                                _c4dbgchomp("copy {}={}-{} spaces", first_non_space - indentation, first_non_space, indentation);
+                                proc.copy(first_non_space - indentation);
+                            }
+                        }
+                        break;
+                    }
+                case '\r':
+                    proc.skip();
+                    break;
+                default:
+                    _c4err("parse error");
+                    break;
+                }
+            }
         }
     }
 
-    size_t count_anchors_and_refs(size_t n)
+    // from now on, we only have line ends (or indentation spaces)
+    switch(chomp)
     {
-        size_t c = 0;
-        c += t->has_key_anchor(n);
-        c += t->has_val_anchor(n);
-        c += t->is_key_ref(n);
-        c += t->is_val_ref(n);
-        for(size_t ch = t->first_child(n); ch != NONE; ch = t->next_sibling(ch))
-            c += count_anchors_and_refs(ch);
-        return c;
-    }
-
-    void _store_anchors_and_refs(size_t n)
+    case CHOMP_CLIP:
     {
-        if(t->is_key_ref(n) || t->is_val_ref(n) || (t->has_key(n) && t->key(n) == "<<"))
+        bool had_one = false;
+        while(proc.has_more_chars())
         {
-            if(t->is_seq(n))
+            const char curr = proc.curr();
+            _c4dbgchomp("CLIP: '{}'", _c4prc(curr));
+            switch(curr)
             {
-                // for merging multiple inheritance targets
-                //   <<: [ *CENTER, *BIG ]
-                for(size_t ich = t->first_child(n); ich != NONE; ich = t->next_sibling(ich))
-                {
-                    RYML_ASSERT(t->num_children(ich) == 0);
-                    refs.push({VALREF, ich, npos, npos, n, t->next_sibling(n)});
-                }
-                return;
-            }
-            if(t->is_key_ref(n) && t->key(n) != "<<") // insert key refs BEFORE inserting val refs
+            case '\n':
             {
-                RYML_CHECK((!t->has_key(n)) || t->key(n).ends_with(t->key_ref(n)));
-                refs.push({KEYREF, n, npos, npos, NONE, NONE});
+                _c4dbgchomp("copy newline!", curr);
+                proc.copy();
+                proc.set_at_end();
+                had_one = true;
+                break;
             }
-            if(t->is_val_ref(n))
-            {
-                RYML_CHECK((!t->has_val(n)) || t->val(n).ends_with(t->val_ref(n)));
-                refs.push({VALREF, n, npos, npos, NONE, NONE});
+            case ' ':
+            case '\r':
+                _c4dbgchomp("skip!", curr);
+                proc.skip();
+                break;
             }
         }
-        if(t->has_key_anchor(n))
-        {
-            RYML_CHECK(t->has_key(n));
-            refs.push({KEYANCH, n, npos, npos, NONE, NONE});
-        }
-        if(t->has_val_anchor(n))
+        if(!had_one) // there were no newline characters. add one.
         {
-            RYML_CHECK(t->has_val(n) || t->is_container(n));
-            refs.push({VALANCH, n, npos, npos, NONE, NONE});
-        }
-        for(size_t ch = t->first_child(n); ch != NONE; ch = t->next_sibling(ch))
-        {
-            _store_anchors_and_refs(ch);
+            _c4dbgchomp("chomp=CLIP: add missing newline @{}", proc.wpos);
+            proc.set('\n');
         }
+        break;
     }
-
-    size_t lookup_(refdata *C4_RESTRICT ra)
+    case CHOMP_KEEP:
     {
-        RYML_ASSERT(ra->type.is_key_ref() || ra->type.is_val_ref());
-        RYML_ASSERT(ra->type.is_key_ref() != ra->type.is_val_ref());
-        csubstr refname;
-        if(ra->type.is_val_ref())
-        {
-            refname = t->val_ref(ra->node);
-        }
-        else
+        _c4dbgchomp("chomp=KEEP: copy all remaining new lines of {} characters", proc.rem().len);
+        while(proc.has_more_chars())
         {
-            RYML_ASSERT(ra->type.is_key_ref());
-            refname = t->key_ref(ra->node);
-        }
-        while(ra->prev_anchor != npos)
-        {
-            ra = &refs[ra->prev_anchor];
-            if(t->has_anchor(ra->node, refname))
-                return ra->node;
+            const char curr = proc.curr();
+            _c4dbgchomp("KEEP: '{}'", _c4prc(curr));
+            switch(curr)
+            {
+            case '\n':
+                _c4dbgchomp("copy newline!", curr);
+                proc.copy();
+                break;
+            case ' ':
+            case '\r':
+                _c4dbgchomp("skip!", curr);
+                proc.skip();
+                break;
+            }
         }
-
-        #ifndef RYML_ERRMSG_SIZE
-          #define RYML_ERRMSG_SIZE 1024
-        #endif
-
-        char errmsg[RYML_ERRMSG_SIZE];
-        snprintf(errmsg, RYML_ERRMSG_SIZE, "anchor does not exist: '%.*s'",
-                 static_cast<int>(refname.size()), refname.data());
-        c4::yml::error(errmsg);
-        return NONE;
+        break;
     }
-
-    void resolve()
+    case CHOMP_STRIP:
     {
-        store_anchors_and_refs();
-        if(refs.empty())
-            return;
-
-        /* from the specs: "an alias node refers to the most recent
-         * node in the serialization having the specified anchor". So
-         * we need to start looking upward from ref nodes.
-         *
-         * @see http://yaml.org/spec/1.2/spec.html#id2765878 */
-        for(size_t i = 0, e = refs.size(); i < e; ++i)
-        {
-            auto &C4_RESTRICT rd = refs.top(i);
-            if( ! rd.type.is_ref())
-                continue;
-            rd.target = lookup_(&rd);
-        }
+        _c4dbgchomp("chomp=STRIP: strip {} characters", proc.rem().len);
+        // nothing to do!
+        break;
+    }
     }
 
-}; // ReferenceResolver
-} // namespace detail
+    #undef _c4dbgchomp
+}
 
-void Tree::resolve()
-{
-    if(m_size == 0)
-        return;
 
-    detail::ReferenceResolver rr(this);
+// a debugging scaffold:
+#if 0
+#define _c4dbgfb(fmt, ...) _c4dbgpf("filt_block[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
+#else
+#define _c4dbgfb(...)
+#endif
 
-    // insert the resolved references
-    size_t prev_parent_ref = NONE;
-    size_t prev_parent_ref_after = NONE;
-    for(auto const& C4_RESTRICT rd : rr.refs)
+template<class EventHandler>
+template<class FilterProcessor>
+void ParseEngine<EventHandler>::_filter_block_indentation(FilterProcessor &C4_RESTRICT proc, size_t indentation)
+{
+    csubstr rem = proc.rem(); // remaining
+    if(rem.len)
     {
-        if( ! rd.type.is_ref())
-            continue;
-        if(rd.parent_ref != NONE)
+        size_t first = rem.first_not_of(' ');
+        if(first != npos)
         {
-            _RYML_CB_ASSERT(m_callbacks, is_seq(rd.parent_ref));
-            size_t after, p = parent(rd.parent_ref);
-            if(prev_parent_ref != rd.parent_ref)
+            _c4dbgfb("{} spaces follow before next nonws character", first);
+            if(first < indentation)
             {
-                after = rd.parent_ref;//prev_sibling(rd.parent_ref_sibling);
-                prev_parent_ref_after = after;
+                _c4dbgfb("skip {}<{} spaces from indentation", first, indentation);
+                proc.skip(first);
             }
             else
             {
-                after = prev_parent_ref_after;
+                _c4dbgfb("skip {} spaces from indentation", indentation);
+                proc.skip(indentation);
             }
-            prev_parent_ref = rd.parent_ref;
-            prev_parent_ref_after = duplicate_children_no_rep(rd.target, p, after);
-            remove(rd.node);
         }
+        #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
         else
         {
-            if(has_key(rd.node) && is_key_ref(rd.node) && key(rd.node) == "<<")
-            {
-                _RYML_CB_ASSERT(m_callbacks, is_keyval(rd.node));
-                size_t p = parent(rd.node);
-                size_t after = prev_sibling(rd.node);
-                duplicate_children_no_rep(rd.target, p, after);
-                remove(rd.node);
-            }
-            else if(rd.type.is_key_ref())
-            {
-                _RYML_CB_ASSERT(m_callbacks, is_key_ref(rd.node));
-                _RYML_CB_ASSERT(m_callbacks, has_key_anchor(rd.target) || has_val_anchor(rd.target));
-                if(has_val_anchor(rd.target) && val_anchor(rd.target) == key_ref(rd.node))
-                {
-                    _RYML_CB_CHECK(m_callbacks, !is_container(rd.target));
-                    _RYML_CB_CHECK(m_callbacks, has_val(rd.target));
-                    _p(rd.node)->m_key.scalar = val(rd.target);
-                    _add_flags(rd.node, KEY);
-                }
-                else
-                {
-                    _RYML_CB_CHECK(m_callbacks, key_anchor(rd.target) == key_ref(rd.node));
-                    _p(rd.node)->m_key.scalar = key(rd.target);
-                    _add_flags(rd.node, VAL);
-                }
-            }
-            else
+            _c4dbgfb("all spaces to the end: {} spaces", first);
+            first = rem.len;
+            if(first)
             {
-                _RYML_CB_ASSERT(m_callbacks, rd.type.is_val_ref());
-                if(has_key_anchor(rd.target) && key_anchor(rd.target) == val_ref(rd.node))
+                if(first < indentation)
                 {
-                    _RYML_CB_CHECK(m_callbacks, !is_container(rd.target));
-                    _RYML_CB_CHECK(m_callbacks, has_val(rd.target));
-                    _p(rd.node)->m_val.scalar = key(rd.target);
-                    _add_flags(rd.node, VAL);
+                    _c4dbgfb("skip everything", first);
+                    proc.skip(proc.src.len - proc.rpos);
                 }
                 else
                 {
-                    duplicate_contents(rd.target, rd.node);
+                    _c4dbgfb("skip {} spaces from indentation", indentation);
+                    proc.skip(indentation);
                 }
             }
         }
+        #endif
     }
-
-    // clear anchors and refs
-    for(auto const& C4_RESTRICT ar : rr.refs)
-    {
-        rem_anchor_ref(ar.node);
-        if(ar.parent_ref != NONE)
-            if(type(ar.parent_ref) != NOTYPE)
-                remove(ar.parent_ref);
-    }
-
-}
-
-//-----------------------------------------------------------------------------
-
-size_t Tree::num_children(size_t node) const
-{
-    size_t count = 0;
-    for(size_t i = first_child(node); i != NONE; i = next_sibling(i))
-    {
-        ++count;
-    }
-    return count;
 }
 
-size_t Tree::child(size_t node, size_t pos) const
+template<class EventHandler>
+template<class FilterProcessor>
+size_t ParseEngine<EventHandler>::_handle_all_whitespace(FilterProcessor &C4_RESTRICT proc, BlockChomp_e chomp)
 {
-    _RYML_CB_ASSERT(m_callbacks, node != NONE);
-    size_t count = 0;
-    for(size_t i = first_child(node); i != NONE; i = next_sibling(i))
+    csubstr contents = proc.src.trimr(" \n\r");
+    _c4dbgfb("ws: contents_len={} wslen={}", contents.len, proc.src.len-contents.len);
+    if(!contents.len)
     {
-        if(count++ == pos)
-            return i;
+        _c4dbgfb("ws: all whitespace: len={}", proc.src.len);
+        if(chomp == CHOMP_KEEP && proc.src.len)
+        {
+            _c4dbgfb("ws: chomp=KEEP all {} newlines", proc.src.count('\n'));
+            while(proc.has_more_chars())
+            {
+                const char curr = proc.curr();
+                if(curr == '\n')
+                    proc.copy();
+                else
+                    proc.skip();
+            }
+            if(!proc.wpos)
+            {
+                proc.set('\n');
+            }
+        }
     }
-    return NONE;
+    return contents.len;
 }
 
-size_t Tree::child_pos(size_t node, size_t ch) const
+template<class EventHandler>
+template<class FilterProcessor>
+size_t ParseEngine<EventHandler>::_extend_to_chomp(FilterProcessor &C4_RESTRICT proc, size_t contents_len)
 {
-    size_t count = 0;
-    for(size_t i = first_child(node); i != NONE; i = next_sibling(i))
-    {
-        if(i == ch)
-            return count;
-        ++count;
-    }
-    return npos;
-}
+    _c4dbgfb("contents_len={}", contents_len);
 
-#if defined(__clang__)
-#   pragma clang diagnostic push
-#   pragma GCC diagnostic ignored "-Wnull-dereference"
-#elif defined(__GNUC__)
-#   pragma GCC diagnostic push
-#   if __GNUC__ >= 6
-#       pragma GCC diagnostic ignored "-Wnull-dereference"
-#   endif
-#endif
+    _RYML_CB_ASSERT(this->callbacks(), contents_len > 0u);
 
-size_t Tree::find_child(size_t node, csubstr const& name) const
-{
-    _RYML_CB_ASSERT(m_callbacks, node != NONE);
-    _RYML_CB_ASSERT(m_callbacks, is_map(node));
-    if(get(node)->m_first_child == NONE)
+    // extend contents to just before the first newline at the end,
+    // in case it is preceded by spaces
+    size_t firstnewl = proc.src.first_of('\n', contents_len);
+    if(firstnewl != npos)
     {
-        _RYML_CB_ASSERT(m_callbacks, _p(node)->m_last_child == NONE);
-        return NONE;
+        contents_len = firstnewl;
+        _c4dbgfb("contents_len={}  <--- firstnewl={}", contents_len, firstnewl);
     }
     else
     {
-        _RYML_CB_ASSERT(m_callbacks, _p(node)->m_last_child != NONE);
-    }
-    for(size_t i = first_child(node); i != NONE; i = next_sibling(i))
-    {
-        if(_p(i)->m_key.scalar == name)
-        {
-            return i;
-        }
+        contents_len = proc.src.len;
+        _c4dbgfb("contents_len={}  <--- src.len={}", contents_len, proc.src.len);
     }
-    return NONE;
+
+    return contents_len;
 }
 
-#if defined(__clang__)
-#   pragma clang diagnostic pop
-#elif defined(__GNUC__)
-#   pragma GCC diagnostic pop
-#endif
+#undef _c4dbgfb
 
 
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-void Tree::to_val(size_t node, csubstr val, type_bits more_flags)
-{
-    _RYML_CB_ASSERT(m_callbacks,  ! has_children(node));
-    _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || ! parent_is_map(node));
-    _set_flags(node, VAL|more_flags);
-    _p(node)->m_key.clear();
-    _p(node)->m_val = val;
-}
+// a debugging scaffold:
+#if 0
+#define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block_lit[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
+#else
+#define _c4dbgfbl(...)
+#endif
 
-void Tree::to_keyval(size_t node, csubstr key, csubstr val, type_bits more_flags)
+template<class EventHandler>
+template<class FilterProcessor>
+auto ParseEngine<EventHandler>::_filter_block_literal(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
 {
-    _RYML_CB_ASSERT(m_callbacks,  ! has_children(node));
-    _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node));
-    _set_flags(node, KEYVAL|more_flags);
-    _p(node)->m_key = key;
-    _p(node)->m_val = val;
-}
+    _c4dbgfbl("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
 
-void Tree::to_map(size_t node, type_bits more_flags)
-{
-    _RYML_CB_ASSERT(m_callbacks,  ! has_children(node));
-    _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || ! parent_is_map(node)); // parent must not have children with keys
-    _set_flags(node, MAP|more_flags);
-    _p(node)->m_key.clear();
-    _p(node)->m_val.clear();
-}
+    size_t contents_len = _handle_all_whitespace(proc, chomp);
+    if(!contents_len)
+        return proc.result();
 
-void Tree::to_map(size_t node, csubstr key, type_bits more_flags)
-{
-    _RYML_CB_ASSERT(m_callbacks,  ! has_children(node));
-    _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node));
-    _set_flags(node, KEY|MAP|more_flags);
-    _p(node)->m_key = key;
-    _p(node)->m_val.clear();
-}
+    contents_len = _extend_to_chomp(proc, contents_len);
 
-void Tree::to_seq(size_t node, type_bits more_flags)
-{
-    _RYML_CB_ASSERT(m_callbacks,  ! has_children(node));
-    _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_seq(node));
-    _set_flags(node, SEQ|more_flags);
-    _p(node)->m_key.clear();
-    _p(node)->m_val.clear();
-}
+    _c4dbgfbl("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
 
-void Tree::to_seq(size_t node, csubstr key, type_bits more_flags)
-{
-    _RYML_CB_ASSERT(m_callbacks,  ! has_children(node));
-    _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node));
-    _set_flags(node, KEY|SEQ|more_flags);
-    _p(node)->m_key = key;
-    _p(node)->m_val.clear();
-}
+    _filter_block_indentation(proc, indentation);
 
-void Tree::to_doc(size_t node, type_bits more_flags)
-{
-    _RYML_CB_ASSERT(m_callbacks,  ! has_children(node));
-    _set_flags(node, DOC|more_flags);
-    _p(node)->m_key.clear();
-    _p(node)->m_val.clear();
+    // now filter the bulk
+    while(proc.has_more_chars(/*maxpos*/contents_len))
+    {
+        const char curr = proc.curr();
+        _c4dbgfbl("'{}' sofar=[{}]~~~{}~~~",  _c4prc(curr), proc.wpos, proc.sofar());
+        switch(curr)
+        {
+        case '\n':
+        {
+            _c4dbgfbl("found newline. skip indentation on the next line", curr);
+            proc.copy();  // copy the newline
+            _filter_block_indentation(proc, indentation);
+            break;
+        }
+        case '\r':
+            proc.skip();
+            break;
+        default:
+            proc.copy();
+            break;
+        }
+    }
+
+    _c4dbgfbl("before chomp: #tochomp={}   sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
+
+    _filter_chomp(proc, chomp, indentation);
+
+    _c4dbgfbl("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
+
+    return proc.result();
+}
+
+#undef _c4dbgfbl
+
+template<class EventHandler>
+FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
+{
+    FilterProcessorSrcDst proc(scalar, dst);
+    return _filter_block_literal(proc, indentation, chomp);
 }
 
-void Tree::to_stream(size_t node, type_bits more_flags)
+template<class EventHandler>
+FilterResult ParseEngine<EventHandler>::filter_scalar_block_literal_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
 {
-    _RYML_CB_ASSERT(m_callbacks,  ! has_children(node));
-    _set_flags(node, STREAM|more_flags);
-    _p(node)->m_key.clear();
-    _p(node)->m_val.clear();
+    FilterProcessorInplaceEndExtending proc(scalar, cap);
+    return _filter_block_literal(proc, indentation, chomp);
 }
 
 
 //-----------------------------------------------------------------------------
-size_t Tree::num_tag_directives() const
-{
-    // this assumes we have a very small number of tag directives
-    for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i)
-        if(m_tag_directives[i].handle.empty())
-            return i;
-    return RYML_MAX_TAG_DIRECTIVES;
-}
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 
-void Tree::clear_tag_directives()
+// a debugging scaffold:
+#if 0
+#define _c4dbgfbf(fmt, ...) _c4dbgpf("filt_block_folded[{}->{}]: " fmt, proc.rpos, proc.wpos, __VA_ARGS__)
+#else
+#define _c4dbgfbf(...)
+#endif
+
+
+template<class EventHandler>
+template<class FilterProcessor>
+void ParseEngine<EventHandler>::_filter_block_folded_newlines_leading(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
 {
-    for(TagDirective &td : m_tag_directives)
-        td = {};
+    _filter_block_indentation(proc, indentation);
+    while(proc.has_more_chars(len))
+    {
+        const char curr = proc.curr();
+        _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~",  _c4prc(curr), proc.wpos, proc.sofar());
+        switch(curr)
+        {
+        case '\n':
+            _c4dbgfbf("newline.", curr);
+            proc.copy();
+            _filter_block_indentation(proc, indentation);
+            break;
+        case '\r':
+            proc.skip();
+            break;
+        case ' ':
+        case '\t':
+        {
+            size_t first = proc.rem().first_not_of(" \t");
+            _c4dbgfbf("space. first={}", first);
+            if(first == npos)
+                first = proc.rem().len;
+            _c4dbgfbf("... indentation increased to {}",  first);
+            _filter_block_folded_indented_block(proc, indentation, len, first);
+            break;
+        }
+        default:
+            _c4dbgfbf("newl leading: not space, not newline. stop.", 0);
+            return;
+        }
+    }
 }
 
-size_t Tree::add_tag_directive(TagDirective const& td)
+template<class EventHandler>
+template<class FilterProcessor>
+size_t ParseEngine<EventHandler>::_filter_block_folded_newlines_compress(FilterProcessor &C4_RESTRICT proc, size_t num_newl, size_t wpos_at_first_newl)
 {
-    _RYML_CB_CHECK(m_callbacks, !td.handle.empty());
-    _RYML_CB_CHECK(m_callbacks, !td.prefix.empty());
-    _RYML_CB_ASSERT(m_callbacks, td.handle.begins_with('!'));
-    _RYML_CB_ASSERT(m_callbacks, td.handle.ends_with('!'));
-    // https://yaml.org/spec/1.2.2/#rule-ns-word-char
-    _RYML_CB_ASSERT(m_callbacks, td.handle == '!' || td.handle == "!!" || td.handle.trim('!').first_not_of("01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-") == npos);
-    size_t pos = num_tag_directives();
-    _RYML_CB_CHECK(m_callbacks, pos < RYML_MAX_TAG_DIRECTIVES);
-    m_tag_directives[pos] = td;
-    return pos;
+    switch(num_newl)
+    {
+    case 1u:
+        _c4dbgfbf("... this is the first newline. turn into space. wpos={}", proc.wpos);
+        wpos_at_first_newl = proc.wpos;
+        proc.skip();
+        proc.set(' ');
+        break;
+    case 2u:
+        _c4dbgfbf("... this is the second newline. prev space (at wpos={}) must be newline", wpos_at_first_newl);
+        _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl != npos);
+        _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] == ' ');
+        _RYML_CB_ASSERT(this->callbacks(), wpos_at_first_newl + 1u == proc.wpos);
+        proc.skip();
+        proc.set_at(wpos_at_first_newl, '\n');
+        _RYML_CB_ASSERT(this->callbacks(), proc.sofar()[wpos_at_first_newl] == '\n');
+        break;
+    default:
+        _c4dbgfbf("... subsequent newline (num_newl={}). copy", num_newl);
+        proc.copy();
+        break;
+    }
+    return wpos_at_first_newl;
+}
+
+template<class EventHandler>
+template<class FilterProcessor>
+void ParseEngine<EventHandler>::_filter_block_folded_newlines(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len)
+{
+    _RYML_CB_ASSERT(this->callbacks(), proc.curr() == '\n');
+    size_t num_newl = 0;
+    size_t wpos_at_first_newl = npos;
+    while(proc.has_more_chars(len))
+    {
+        const char curr = proc.curr();
+        _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~",  _c4prc(curr), proc.wpos, proc.sofar());
+        switch(curr)
+        {
+        case '\n':
+        {
+            _c4dbgfbf("newline. sofar={}", num_newl);
+            // NOTE: vs2022-32bit-release builds were giving wrong
+            // results in this block, if it was written as either
+            // as a  switch(num_newl) or its equivalent if-form.
+            //
+            // For this reason, we're using a dedicated function
+            // (**_compress), which seems to work around the issue.
+            //
+            // The manifested problem was that somewhere between the
+            // assignment to curr and this point, proc.wpos (the
+            // write-position of the processor) jumped to npos, which
+            // made the write wrap-around! To make things worse,
+            // enabling prints via _c4dbgpf() and _c4dbgfbf() made the
+            // problem go away!
+            //
+            // The only way to make the problem appear with prints
+            // enabled was by disabling all prints in this function
+            // (including in the block which was moved to the compress
+            // function) and then selectively enabling only some of
+            // those prints.
+            //
+            // This may be due to some bug in the cl-x86 optimizer; or
+            // it may be triggered by some UB which may be
+            // inadvertedly present in this function or in the filter
+            // processor. This is despite our best efforts to weed out
+            // any such UB problem: neither clang-tidy nor none of the
+            // sanitizers, or gcc's -fanalyzer pointed to any problems
+            // in this code.
+            //
+            // In the end, moving this block to a separate function
+            // was the only way to bury the problem. But it may
+            // resurface again, as The Undead, rising to from the
+            // grave to haunt us with his terrible presence.
+            //
+            // We may have to revisit this. With a stake, and lots of
+            // garlic.
+            wpos_at_first_newl = _filter_block_folded_newlines_compress(proc, ++num_newl, wpos_at_first_newl);
+            _filter_block_indentation(proc, indentation);
+            break;
+        }
+        case ' ':
+        case '\t':
+            {
+                size_t first = proc.rem().first_not_of(" \t");
+                _c4dbgfbf("space. first={}", first);
+                if(first == npos)
+                    first = proc.rem().len;
+                _c4dbgfbf("... indentation increased to {}",  first);
+                if(num_newl)
+                {
+                    _c4dbgfbf("... prev space (at wpos={}) must be newline", wpos_at_first_newl);
+                    proc.set_at(wpos_at_first_newl, '\n');
+                }
+                if(num_newl > 1u)
+                {
+                    _c4dbgfbf("... add missing newline", wpos_at_first_newl);
+                    proc.set('\n');
+                }
+                _filter_block_folded_indented_block(proc, indentation, len, first);
+                num_newl = 0;
+                wpos_at_first_newl = npos;
+                break;
+            }
+        case '\r':
+            proc.skip();
+            break;
+        default:
+            _c4dbgfbf("not space, not newline. stop.", 0);
+            return;
+        }
+    }
 }
 
-size_t Tree::resolve_tag(substr output, csubstr tag, size_t node_id) const
+
+template<class EventHandler>
+template<class FilterProcessor>
+void ParseEngine<EventHandler>::_filter_block_folded_indented_block(FilterProcessor &C4_RESTRICT proc, size_t indentation, size_t len, size_t curr_indentation) noexcept
 {
-    // lookup from the end. We want to find the first directive that
-    // matches the tag and has a target node id leq than the given
-    // node_id.
-    for(size_t i = RYML_MAX_TAG_DIRECTIVES-1; i != (size_t)-1; --i)
+    _RYML_CB_ASSERT(this->callbacks(), (proc.rem().first_not_of(" \t") == curr_indentation) || (proc.rem().first_not_of(" \t") == npos));
+    if(curr_indentation)
+        proc.copy(curr_indentation);
+    while(proc.has_more_chars(len))
     {
-        auto const& td = m_tag_directives[i];
-        if(td.handle.empty())
-            continue;
-        if(tag.begins_with(td.handle) && td.next_node_id <= node_id)
+        const char curr = proc.curr();
+        _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~",  _c4prc(curr), proc.wpos, proc.sofar());
+        switch(curr)
         {
-            _RYML_CB_ASSERT(m_callbacks, tag.len >= td.handle.len);
-            csubstr rest = tag.sub(td.handle.len);
-            size_t len = 1u + td.prefix.len + rest.len + 1u;
-            size_t numpc = rest.count('%');
-            if(numpc == 0)
+        case '\n':
             {
-                if(len <= output.len)
+                proc.copy();
+                _filter_block_indentation(proc, indentation);
+                csubstr rem = proc.rem();
+                const size_t first = rem.first_not_of(' ');
+                _c4dbgfbf("newline. firstns={}",  first);
+                if(first == 0)
                 {
-                    output.str[0] = '<';
-                    memcpy(1u + output.str, td.prefix.str, td.prefix.len);
-                    memcpy(1u + output.str + td.prefix.len, rest.str, rest.len);
-                    output.str[1u + td.prefix.len + rest.len] = '>';
+                    const char c = rem[first];
+                    _c4dbgfbf("firstns={}='{}'", first, _c4prc(c));
+                    if(c == '\n' || c == '\r')
+                    {
+                        ;
+                    }
+                    else
+                    {
+                        _c4dbgfbf("done with indented block",  first);
+                        goto endloop;
+                    }
                 }
-            }
-            else
-            {
-                // need to decode URI % sequences
-                size_t pos = rest.find('%');
-                _RYML_CB_ASSERT(m_callbacks, pos != npos);
-                do {
-                    size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1);
-                    if(next == npos)
-                        next = rest.len;
-                    _RYML_CB_CHECK(m_callbacks, pos+1 < next);
-                    _RYML_CB_CHECK(m_callbacks, pos+1 + 2 <= next);
-                    size_t delta = next - (pos+1);
-                    len -= delta;
-                    pos = rest.find('%', pos+1);
-                } while(pos != npos);
-                if(len <= output.len)
+                else if(first != npos)
                 {
-                    size_t prev = 0, wpos = 0;
-                    auto appendstr = [&](csubstr s) { memcpy(output.str + wpos, s.str, s.len); wpos += s.len; };
-                    auto appendchar = [&](char c) { output.str[wpos++] = c; };
-                    appendchar('<');
-                    appendstr(td.prefix);
-                    pos = rest.find('%');
-                    _RYML_CB_ASSERT(m_callbacks, pos != npos);
-                    do {
-                        size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1);
-                        if(next == npos)
-                            next = rest.len;
-                        _RYML_CB_CHECK(m_callbacks, pos+1 < next);
-                        _RYML_CB_CHECK(m_callbacks, pos+1 + 2 <= next);
-                        uint8_t val;
-                        if(C4_UNLIKELY(!read_hex(rest.range(pos+1, next), &val) || val > 127))
-                            _RYML_CB_ERR(m_callbacks, "invalid URI character");
-                        appendstr(rest.range(prev, pos));
-                        appendchar((char)val);
-                        prev = next;
-                        pos = rest.find('%', pos+1);
-                    } while(pos != npos);
-                    _RYML_CB_ASSERT(m_callbacks, pos == npos);
-                    _RYML_CB_ASSERT(m_callbacks, prev > 0);
-                    _RYML_CB_ASSERT(m_callbacks, rest.len >= prev);
-                    appendstr(rest.sub(prev));
-                    appendchar('>');
-                    _RYML_CB_ASSERT(m_callbacks, wpos == len);
+                    proc.copy(first);
+                    _c4dbgfbf("copy all {} spaces",  first);
                 }
+                break;
             }
-            return len;
+            break;
+        case '\r':
+            proc.skip();
+            break;
+        default:
+            proc.copy();
+            break;
         }
     }
-    return 0; // return 0 to signal that the tag is local and cannot be resolved
+ endloop:
+    return;
 }
 
-namespace {
-csubstr _transform_tag(Tree *t, csubstr tag, size_t node)
-{
-    size_t required_size = t->resolve_tag(substr{}, tag, node);
-    if(!required_size)
-        return tag;
-    const char *prev_arena = t->arena().str;
-    substr buf = t->alloc_arena(required_size);
-    _RYML_CB_ASSERT(t->m_callbacks, t->arena().str == prev_arena);
-    size_t actual_size = t->resolve_tag(buf, tag, node);
-    _RYML_CB_ASSERT(t->m_callbacks, actual_size <= required_size);
-    return buf.first(actual_size);
-}
-void _resolve_tags(Tree *t, size_t node)
+
+template<class EventHandler>
+template<class FilterProcessor>
+auto ParseEngine<EventHandler>::_filter_block_folded(FilterProcessor &C4_RESTRICT proc, size_t indentation, BlockChomp_e chomp) -> decltype(proc.result())
 {
-    for(size_t child = t->first_child(node); child != NONE; child = t->next_sibling(child))
+    _c4dbgfbf("indentation={} before=[{}]~~~{}~~~", indentation, proc.src.len, proc.src);
+
+    size_t contents_len = _handle_all_whitespace(proc, chomp);
+    if(!contents_len)
+        return proc.result();
+
+    contents_len = _extend_to_chomp(proc, contents_len);
+
+    _c4dbgfbf("to filter=[{}]~~~{}~~~", contents_len, proc.src.first(contents_len));
+
+    _filter_block_folded_newlines_leading(proc, indentation, contents_len);
+
+    // now filter the bulk
+    while(proc.has_more_chars(/*maxpos*/contents_len))
     {
-        if(t->has_key(child) && t->has_key_tag(child))
-            t->set_key_tag(child, _transform_tag(t, t->key_tag(child), child));
-        if(t->has_val(child) && t->has_val_tag(child))
-            t->set_val_tag(child, _transform_tag(t, t->val_tag(child), child));
-        _resolve_tags(t, child);
+        const char curr = proc.curr();
+        _c4dbgfbf("'{}' sofar=[{}]~~~{}~~~",  _c4prc(curr), proc.wpos, proc.sofar());
+        switch(curr)
+        {
+        case '\n':
+        {
+            _c4dbgfbf("found newline", curr);
+            _filter_block_folded_newlines(proc, indentation, contents_len);
+            break;
+        }
+        case '\r':
+            proc.skip();
+            break;
+        default:
+            proc.copy();
+            break;
+        }
     }
+
+    _c4dbgfbf("before chomp: #tochomp={}   sofar=[{}]~~~{}~~~", proc.rem().len, proc.sofar().len, proc.sofar());
+
+    _filter_chomp(proc, chomp, indentation);
+
+    _c4dbgfbf("final=[{}]~~~{}~~~", proc.sofar().len, proc.sofar());
+
+    return proc.result();
 }
-size_t _count_resolved_tags_size(Tree const* t, size_t node)
+
+#undef _c4dbgfbf
+
+template<class EventHandler>
+FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded(csubstr scalar, substr dst, size_t indentation, BlockChomp_e chomp)
 {
-    size_t sz = 0;
-    for(size_t child = t->first_child(node); child != NONE; child = t->next_sibling(child))
-    {
-        if(t->has_key(child) && t->has_key_tag(child))
-            sz += t->resolve_tag(substr{}, t->key_tag(child), child);
-        if(t->has_val(child) && t->has_val_tag(child))
-            sz += t->resolve_tag(substr{}, t->val_tag(child), child);
-        sz += _count_resolved_tags_size(t, child);
-    }
-    return sz;
+    FilterProcessorSrcDst proc(scalar, dst);
+    return _filter_block_folded(proc, indentation, chomp);
 }
-} // namespace
 
-void Tree::resolve_tags()
+template<class EventHandler>
+FilterResult ParseEngine<EventHandler>::filter_scalar_block_folded_in_place(substr scalar, size_t cap, size_t indentation, BlockChomp_e chomp)
 {
-    if(empty())
-        return;
-    if(num_tag_directives() == 0)
-        return;
-    size_t needed_size = _count_resolved_tags_size(this, root_id());
-    if(needed_size)
-        reserve_arena(arena_pos() + needed_size);
-    _resolve_tags(this, root_id());
+    FilterProcessorInplaceEndExtending proc(scalar, cap);
+    return _filter_block_folded(proc, indentation, chomp);
 }
 
 
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-csubstr Tree::lookup_result::resolved() const
+template<class EventHandler>
+csubstr ParseEngine<EventHandler>::_filter_scalar_plain(substr s, size_t indentation)
 {
-    csubstr p = path.first(path_pos);
-    if(p.ends_with('.'))
-        p = p.first(p.len-1);
-    return p;
+    _c4dbgpf("filtering plain scalar: s=[{}]~~~{}~~~", s.len, s);
+    FilterResult r = this->filter_scalar_plain_in_place(s, s.len, indentation);
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, r.valid());
+    _c4dbgpf("filtering plain scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
+    return r.get();
 }
 
-csubstr Tree::lookup_result::unresolved() const
-{
-    return path.sub(path_pos);
-}
+//-----------------------------------------------------------------------------
 
-void Tree::_advance(lookup_result *r, size_t more) const
+template<class EventHandler>
+csubstr ParseEngine<EventHandler>::_filter_scalar_squot(substr s)
 {
-    r->path_pos += more;
-    if(r->path.sub(r->path_pos).begins_with('.'))
-        ++r->path_pos;
+    _c4dbgpf("filtering squo scalar: s=[{}]~~~{}~~~", s.len, s);
+    FilterResult r = this->filter_scalar_squoted_in_place(s, s.len);
+    _RYML_CB_ASSERT(this->callbacks(), r.valid());
+    _c4dbgpf("filtering squo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
+    return r.get();
 }
 
-Tree::lookup_result Tree::lookup_path(csubstr path, size_t start) const
-{
-    if(start == NONE)
-        start = root_id();
-    lookup_result r(path, start);
-    if(path.empty())
-        return r;
-    _lookup_path(&r);
-    if(r.target == NONE && r.closest == start)
-        r.closest = NONE;
-    return r;
-}
 
-size_t Tree::lookup_path_or_modify(csubstr default_value, csubstr path, size_t start)
+//-----------------------------------------------------------------------------
+
+template<class EventHandler>
+csubstr ParseEngine<EventHandler>::_filter_scalar_dquot(substr s)
 {
-    size_t target = _lookup_path_or_create(path, start);
-    if(parent_is_map(target))
-        to_keyval(target, key(target), default_value);
+    _c4dbgpf("filtering dquo scalar: s=[{}]~~~{}~~~", s.len, s);
+    FilterResultExtending r = this->filter_scalar_dquoted_in_place(s, s.len);
+    if(C4_LIKELY(r.valid()))
+    {
+        _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
+        return r.get();
+    }
     else
-        to_val(target, default_value);
-    return target;
+    {
+        const size_t len = r.required_len();
+        _c4dbgpf("filtering dquo scalar: not enough space: needs {}, have {}", len, s.len);
+        substr dst = m_evt_handler->alloc_arena(len, &s);
+        _c4dbgpf("filtering dquo scalar: dst.len={}", dst.len);
+        _RYML_CB_ASSERT(this->callbacks(), dst.len == len);
+        FilterResult rsd = this->filter_scalar_dquoted(s, dst);
+        _c4dbgpf("filtering dquo scalar: ... result now needs {} was {}", rsd.required_len(), len);
+        _RYML_CB_ASSERT(this->callbacks(), rsd.required_len() <= len); // may be smaller!
+        _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
+        _c4dbgpf("filtering dquo scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
+        return rsd.get();
+    }
 }
 
-size_t Tree::lookup_path_or_modify(Tree const *src, size_t src_node, csubstr path, size_t start)
-{
-    size_t target = _lookup_path_or_create(path, start);
-    merge_with(src, src_node, target);
-    return target;
-}
 
-size_t Tree::_lookup_path_or_create(csubstr path, size_t start)
+//-----------------------------------------------------------------------------
+template<class EventHandler>
+csubstr ParseEngine<EventHandler>::_filter_scalar_literal(substr s, size_t indentation, BlockChomp_e chomp)
 {
-    if(start == NONE)
-        start = root_id();
-    lookup_result r(path, start);
-    _lookup_path(&r);
-    if(r.target != NONE)
+    _c4dbgpf("filtering block literal scalar: s=[{}]~~~{}~~~", s.len, s);
+    FilterResult r = this->filter_scalar_block_literal_in_place(s, s.len, indentation, chomp);
+    if(C4_LIKELY(r.valid()))
     {
-        C4_ASSERT(r.unresolved().empty());
-        return r.target;
+        _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
+        return r.get();
+    }
+    else
+    {
+        _c4dbgpf("filtering block literal scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
+        substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
+        FilterResult rsd = this->filter_scalar_block_literal(s, dst, indentation, chomp);
+        _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
+        _c4dbgpf("filtering block literal scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
+        return rsd.get();
     }
-    _lookup_path_modify(&r);
-    return r.target;
 }
 
-void Tree::_lookup_path(lookup_result *r) const
+
+//-----------------------------------------------------------------------------
+template<class EventHandler>
+csubstr ParseEngine<EventHandler>::_filter_scalar_folded(substr s, size_t indentation, BlockChomp_e chomp)
 {
-    C4_ASSERT( ! r->unresolved().empty());
-    _lookup_path_token parent{"", type(r->closest)};
-    size_t node;
-    do
+    _c4dbgpf("filtering block folded scalar: s=[{}]~~~{}~~~", s.len, s);
+    FilterResult r = this->filter_scalar_block_folded_in_place(s, s.len, indentation, chomp);
+    if(C4_LIKELY(r.valid()))
     {
-        node = _next_node(r, &parent);
-        if(node != NONE)
-            r->closest = node;
-        if(r->unresolved().empty())
-        {
-            r->target = node;
-            return;
-        }
-    } while(node != NONE);
+        _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", r.get().len, r.get());
+        return r.get();
+    }
+    else
+    {
+        _c4dbgpf("filtering block folded scalar: not enough space: needs {}, have {}", r.required_len(), s.len);
+        substr dst = m_evt_handler->alloc_arena(r.required_len(), &s);
+        FilterResult rsd = this->filter_scalar_block_folded(s, dst, indentation, chomp);
+        _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, rsd.valid());
+        _c4dbgpf("filtering block folded scalar: success! s=[{}]~~~{}~~~", rsd.get().len, rsd.get());
+        return rsd.get();
+    }
 }
 
-void Tree::_lookup_path_modify(lookup_result *r)
+
+//-----------------------------------------------------------------------------
+
+template<class EventHandler>
+csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
 {
-    C4_ASSERT( ! r->unresolved().empty());
-    _lookup_path_token parent{"", type(r->closest)};
-    size_t node;
-    do
+    if(sc.needs_filter)
     {
-        node = _next_node_modify(r, &parent);
-        if(node != NONE)
-            r->closest = node;
-        if(r->unresolved().empty())
+        if(m_options.scalar_filtering())
         {
-            r->target = node;
-            return;
+            return _filter_scalar_plain(sc.scalar, indentation);
+        }
+        else
+        {
+            _c4dbgp("plain scalar left unfiltered");
+            m_evt_handler->mark_key_scalar_unfiltered();
         }
-    } while(node != NONE);
-}
-
-size_t Tree::_next_node(lookup_result * r, _lookup_path_token *parent) const
-{
-    _lookup_path_token token = _next_token(r, *parent);
-    if( ! token)
-        return NONE;
-
-    size_t node = NONE;
-    csubstr prev = token.value;
-    if(token.type == MAP || token.type == SEQ)
-    {
-        _RYML_CB_ASSERT(m_callbacks, !token.value.begins_with('['));
-        //_RYML_CB_ASSERT(m_callbacks, is_container(r->closest) || r->closest == NONE);
-        _RYML_CB_ASSERT(m_callbacks, is_map(r->closest));
-        node = find_child(r->closest, token.value);
     }
-    else if(token.type == KEYVAL)
+    else
     {
-        _RYML_CB_ASSERT(m_callbacks, r->unresolved().empty());
-        if(is_map(r->closest))
-            node = find_child(r->closest, token.value);
+        _c4dbgp("plain scalar doesn't need filtering");
     }
-    else if(token.type == KEY)
+    return sc.scalar;
+}
+
+template<class EventHandler>
+csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_plain(ScannedScalar const& C4_RESTRICT sc, size_t indentation)
+{
+    if(sc.needs_filter)
     {
-        _RYML_CB_ASSERT(m_callbacks, token.value.begins_with('[') && token.value.ends_with(']'));
-        token.value = token.value.offs(1, 1).trim(' ');
-        size_t idx = 0;
-        _RYML_CB_CHECK(m_callbacks, from_chars(token.value, &idx));
-        node = child(r->closest, idx);
+        if(m_options.scalar_filtering())
+        {
+            return _filter_scalar_plain(sc.scalar, indentation);
+        }
+        else
+        {
+            _c4dbgp("plain scalar left unfiltered");
+            m_evt_handler->mark_val_scalar_unfiltered();
+        }
     }
     else
     {
-        C4_NEVER_REACH();
+        _c4dbgp("plain scalar doesn't need filtering");
     }
+    return sc.scalar;
+}
+
+
+//-----------------------------------------------------------------------------
 
-    if(node != NONE)
+template<class EventHandler>
+csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
+{
+    if(sc.needs_filter)
     {
-        *parent = token;
+        if(m_options.scalar_filtering())
+        {
+            return _filter_scalar_squot(sc.scalar);
+        }
+        else
+        {
+            _c4dbgp("squo key scalar left unfiltered");
+            m_evt_handler->mark_key_scalar_unfiltered();
+        }
     }
     else
     {
-        csubstr p = r->path.sub(r->path_pos > 0 ? r->path_pos - 1 : r->path_pos);
-        r->path_pos -= prev.len;
-        if(p.begins_with('.'))
-            r->path_pos -= 1u;
+        _c4dbgp("squo key scalar doesn't need filtering");
     }
-
-    return node;
+    return sc.scalar;
 }
 
-size_t Tree::_next_node_modify(lookup_result * r, _lookup_path_token *parent)
+template<class EventHandler>
+csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_squot(ScannedScalar const& C4_RESTRICT sc)
 {
-    _lookup_path_token token = _next_token(r, *parent);
-    if( ! token)
-        return NONE;
-
-    size_t node = NONE;
-    if(token.type == MAP || token.type == SEQ)
+    if(sc.needs_filter)
     {
-        _RYML_CB_ASSERT(m_callbacks, !token.value.begins_with('['));
-        //_RYML_CB_ASSERT(m_callbacks, is_container(r->closest) || r->closest == NONE);
-        if( ! is_container(r->closest))
+        if(m_options.scalar_filtering())
         {
-            if(has_key(r->closest))
-                to_map(r->closest, key(r->closest));
-            else
-                to_map(r->closest);
+            return _filter_scalar_squot(sc.scalar);
         }
         else
         {
-            if(is_map(r->closest))
-                node = find_child(r->closest, token.value);
-            else
-            {
-                size_t pos = NONE;
-                _RYML_CB_CHECK(m_callbacks, c4::atox(token.value, &pos));
-                _RYML_CB_ASSERT(m_callbacks, pos != NONE);
-                node = child(r->closest, pos);
-            }
-        }
-        if(node == NONE)
-        {
-            _RYML_CB_ASSERT(m_callbacks, is_map(r->closest));
-            node = append_child(r->closest);
-            NodeData *n = _p(node);
-            n->m_key.scalar = token.value;
-            n->m_type.add(KEY);
+            _c4dbgp("squo val scalar left unfiltered");
+            m_evt_handler->mark_val_scalar_unfiltered();
         }
     }
-    else if(token.type == KEYVAL)
+    else
     {
-        _RYML_CB_ASSERT(m_callbacks, r->unresolved().empty());
-        if(is_map(r->closest))
+        _c4dbgp("squo val scalar doesn't need filtering");
+    }
+    return sc.scalar;
+}
+
+
+//-----------------------------------------------------------------------------
+
+template<class EventHandler>
+csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
+{
+    if(sc.needs_filter)
+    {
+        if(m_options.scalar_filtering())
         {
-            node = find_child(r->closest, token.value);
-            if(node == NONE)
-                node = append_child(r->closest);
+            return _filter_scalar_dquot(sc.scalar);
         }
         else
         {
-            _RYML_CB_ASSERT(m_callbacks, !is_seq(r->closest));
-            _add_flags(r->closest, MAP);
-            node = append_child(r->closest);
+            _c4dbgp("dquo scalar left unfiltered");
+            m_evt_handler->mark_key_scalar_unfiltered();
         }
-        NodeData *n = _p(node);
-        n->m_key.scalar = token.value;
-        n->m_val.scalar = "";
-        n->m_type.add(KEYVAL);
     }
-    else if(token.type == KEY)
+    else
     {
-        _RYML_CB_ASSERT(m_callbacks, token.value.begins_with('[') && token.value.ends_with(']'));
-        token.value = token.value.offs(1, 1).trim(' ');
-        size_t idx;
-        if( ! from_chars(token.value, &idx))
-             return NONE;
-        if( ! is_container(r->closest))
+        _c4dbgp("dquo scalar doesn't need filtering");
+    }
+    return sc.scalar;
+}
+
+template<class EventHandler>
+csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_dquot(ScannedScalar const& C4_RESTRICT sc)
+{
+    if(sc.needs_filter)
+    {
+        if(m_options.scalar_filtering())
         {
-            if(has_key(r->closest))
-            {
-                csubstr k = key(r->closest);
-                _clear_type(r->closest);
-                to_seq(r->closest, k);
-            }
-            else
-            {
-                _clear_type(r->closest);
-                to_seq(r->closest);
-            }
+            return _filter_scalar_dquot(sc.scalar);
         }
-        _RYML_CB_ASSERT(m_callbacks, is_container(r->closest));
-        node = child(r->closest, idx);
-        if(node == NONE)
+        else
         {
-            _RYML_CB_ASSERT(m_callbacks, num_children(r->closest) <= idx);
-            for(size_t i = num_children(r->closest); i <= idx; ++i)
-            {
-                node = append_child(r->closest);
-                if(i < idx)
-                {
-                    if(is_map(r->closest))
-                        to_keyval(node, /*"~"*/{}, /*"~"*/{});
-                    else if(is_seq(r->closest))
-                        to_val(node, /*"~"*/{});
-                }
-            }
+            _c4dbgp("dquo scalar left unfiltered");
+            m_evt_handler->mark_val_scalar_unfiltered();
         }
     }
     else
     {
-        C4_NEVER_REACH();
+        _c4dbgp("dquo scalar doesn't need filtering");
     }
-
-    _RYML_CB_ASSERT(m_callbacks, node != NONE);
-    *parent = token;
-    return node;
+    return sc.scalar;
 }
 
-/** types of tokens:
- * - seeing "map."  ---> "map"/MAP
- * - finishing "scalar" ---> "scalar"/KEYVAL
- * - seeing "seq[n]" ---> "seq"/SEQ (--> "[n]"/KEY)
- * - seeing "[n]" ---> "[n]"/KEY
- */
-Tree::_lookup_path_token Tree::_next_token(lookup_result *r, _lookup_path_token const& parent) const
-{
-    csubstr unres = r->unresolved();
-    if(unres.empty())
-        return {};
 
-    // is it an indexation like [0], [1], etc?
-    if(unres.begins_with('['))
-    {
-        size_t pos = unres.find(']');
-        if(pos == csubstr::npos)
-            return {};
-        csubstr idx = unres.first(pos + 1);
-        _advance(r, pos + 1);
-        return {idx, KEY};
-    }
+//-----------------------------------------------------------------------------
 
-    // no. so it must be a name
-    size_t pos = unres.first_of(".[");
-    if(pos == csubstr::npos)
+template<class EventHandler>
+csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
+{
+    if(m_options.scalar_filtering())
     {
-        _advance(r, unres.len);
-        NodeType t;
-        if(( ! parent) || parent.type.is_seq())
-            return {unres, VAL};
-        return {unres, KEYVAL};
+        return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
     }
-
-    // it's either a map or a seq
-    _RYML_CB_ASSERT(m_callbacks, unres[pos] == '.' || unres[pos] == '[');
-    if(unres[pos] == '.')
+    else
     {
-        _RYML_CB_ASSERT(m_callbacks, pos != 0);
-        _advance(r, pos + 1);
-        return {unres.first(pos), MAP};
+        _c4dbgp("literal scalar left unfiltered");
+        m_evt_handler->mark_key_scalar_unfiltered();
     }
-
-    _RYML_CB_ASSERT(m_callbacks, unres[pos] == '[');
-    _advance(r, pos);
-    return {unres.first(pos), SEQ};
+    return sb.scalar;
 }
 
+template<class EventHandler>
+csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_literal(ScannedBlock const& C4_RESTRICT sb)
+{
+    if(m_options.scalar_filtering())
+    {
+        return _filter_scalar_literal(sb.scalar, sb.indentation, sb.chomp);
+    }
+    else
+    {
+        _c4dbgp("literal scalar left unfiltered");
+        m_evt_handler->mark_val_scalar_unfiltered();
+    }
+    return sb.scalar;
+}
 
-} // namespace ryml
-} // namespace c4
-
-
-C4_SUPPRESS_WARNING_GCC_POP
-C4_SUPPRESS_WARNING_MSVC_POP
-
-#endif /* RYML_SINGLE_HDR_DEFINE_NOW */
-
-
-// (end https://github.com/biojppm/rapidyaml/src/c4/yml/tree.cpp)
-
-
-
-//********************************************************************************
-//--------------------------------------------------------------------------------
-// src/c4/yml/parse.cpp
-// https://github.com/biojppm/rapidyaml/src/c4/yml/parse.cpp
-//--------------------------------------------------------------------------------
-//********************************************************************************
-
-#ifdef RYML_SINGLE_HDR_DEFINE_NOW
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp
-//#include "c4/yml/parse.hpp"
-#if !defined(C4_YML_PARSE_HPP_) && !defined(_C4_YML_PARSE_HPP_)
-#error "amalgamate: file c4/yml/parse.hpp must have been included at this point"
-#endif /* C4_YML_PARSE_HPP_ */
-
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/error.hpp
-//#include "c4/error.hpp"
-#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_)
-#error "amalgamate: file c4/error.hpp must have been included at this point"
-#endif /* C4_ERROR_HPP_ */
-
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/utf.hpp
-//#include "c4/utf.hpp"
-#if !defined(C4_UTF_HPP_) && !defined(_C4_UTF_HPP_)
-#error "amalgamate: file c4/utf.hpp must have been included at this point"
-#endif /* C4_UTF_HPP_ */
-
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/dump.hpp
-//#include <c4/dump.hpp>
-#if !defined(C4_DUMP_HPP_) && !defined(_C4_DUMP_HPP_)
-#error "amalgamate: file c4/dump.hpp must have been included at this point"
-#endif /* C4_DUMP_HPP_ */
-
-
-//included above:
-//#include <ctype.h>
-//included above:
-//#include <stdarg.h>
-//included above:
-//#include <stdio.h>
-
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp
-//#include "c4/yml/detail/parser_dbg.hpp"
-#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_)
-#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point"
-#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */
-
-#ifdef RYML_DBG
-// amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/print.hpp
-//#include "c4/yml/detail/print.hpp"
-#if !defined(C4_YML_DETAIL_PRINT_HPP_) && !defined(_C4_YML_DETAIL_PRINT_HPP_)
-#error "amalgamate: file c4/yml/detail/print.hpp must have been included at this point"
-#endif /* C4_YML_DETAIL_PRINT_HPP_ */
-
-#endif
-
-#ifndef RYML_ERRMSG_SIZE
-    #define RYML_ERRMSG_SIZE 1024
-#endif
-
-//#define RYML_WITH_TAB_TOKENS
-#ifdef RYML_WITH_TAB_TOKENS
-#define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__
-#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with
-#else
-#define _RYML_WITH_TAB_TOKENS(...)
-#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without
-#endif
-
-
-#if defined(_MSC_VER)
-#   pragma warning(push)
-#   pragma warning(disable: 4296/*expression is always 'boolean_value'*/)
-#elif defined(__clang__)
-#   pragma clang diagnostic push
-#   pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
-#   pragma clang diagnostic ignored "-Wformat-nonliteral"
-#elif defined(__GNUC__)
-#   pragma GCC diagnostic push
-#   pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0.
-#   pragma GCC diagnostic ignored "-Wformat-nonliteral"
-#   if __GNUC__ >= 7
-#       pragma GCC diagnostic ignored "-Wduplicated-branches"
-#   endif
-#endif
-
-namespace c4 {
-namespace yml {
 
-namespace {
+//-----------------------------------------------------------------------------
 
-template<class DumpFn, class ...Args>
-void _parse_dump(DumpFn dumpfn, c4::csubstr fmt, Args&& ...args)
+template<class EventHandler>
+csubstr ParseEngine<EventHandler>::_maybe_filter_key_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
 {
-    char writebuf[256];
-    auto results = c4::format_dump_resume(dumpfn, writebuf, fmt, std::forward<Args>(args)...);
-    // resume writing if the results failed to fit the buffer
-    if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) // bufsize will be that of the largest element serialized. Eg int(1), will require 1 byte.
+    if(m_options.scalar_filtering())
     {
-        results = format_dump_resume(dumpfn, results, writebuf, fmt, std::forward<Args>(args)...);
-        if(C4_UNLIKELY(results.bufsize > sizeof(writebuf)))
-        {
-            results = format_dump_resume(dumpfn, results, writebuf, fmt, std::forward<Args>(args)...);
-        }
+        return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
+    }
+    else
+    {
+        _c4dbgp("folded scalar left unfiltered");
+        m_evt_handler->mark_key_scalar_unfiltered();
     }
+    return sb.scalar;
 }
 
-bool _is_scalar_next__runk(csubstr s)
+template<class EventHandler>
+csubstr ParseEngine<EventHandler>::_maybe_filter_val_scalar_folded(ScannedBlock const& C4_RESTRICT sb)
 {
-    return !(s.begins_with(": ") || s.begins_with_any("#,{}[]%&") || s.begins_with("? ") || s == "-" || s.begins_with("- ") || s.begins_with(":\"") || s.begins_with(":'"));
+    if(m_options.scalar_filtering())
+    {
+        return _filter_scalar_folded(sb.scalar, sb.indentation, sb.chomp);
+    }
+    else
+    {
+        _c4dbgp("folded scalar left unfiltered");
+        m_evt_handler->mark_val_scalar_unfiltered();
+    }
+    return sb.scalar;
 }
 
-bool _is_scalar_next__rseq_rval(csubstr s)
-{
-    return !(s.begins_with_any("[{!&") || s.begins_with("? ") || s.begins_with("- ") || s == "-");
-}
 
-bool _is_scalar_next__rmap(csubstr s)
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
+#ifdef RYML_DBG  //   !!! <----------------------------------
+
+template<class EventHandler>
+void ParseEngine<EventHandler>::add_flags(ParserFlag_t on, ParserState * s)
 {
-    return !(s.begins_with(": ") || s.begins_with_any("#,!&") || s.begins_with("? ") _RYML_WITH_TAB_TOKENS(|| s.begins_with(":\t")));
+    char buf1_[64], buf2_[64], buf3_[64];
+    csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
+    csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
+    csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags|on);
+    _c4dbgpf("state[{}]: add {}: before={} after={}", s->level, buf1, buf2, buf3);
+    s->flags |= on;
 }
 
-bool _is_scalar_next__rmap_val(csubstr s)
+template<class EventHandler>
+void ParseEngine<EventHandler>::addrem_flags(ParserFlag_t on, ParserFlag_t off, ParserState * s)
 {
-    return !(s.begins_with("- ") || s.begins_with_any("{[") || s == "-");
+    char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
+    csubstr buf1 = detail::_parser_flags_to_str(buf1_, on);
+    csubstr buf2 = detail::_parser_flags_to_str(buf2_, off);
+    csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags);
+    csubstr buf4 = detail::_parser_flags_to_str(buf4_, ((s->flags|on)&(~off)));
+    _c4dbgpf("state[{}]: add {} / rem {}: before={} after={}", s->level, buf1, buf2, buf3, buf4);
+    s->flags |= on;
+    s->flags &= ~off;
 }
 
-bool _is_doc_sep(csubstr s)
+template<class EventHandler>
+void ParseEngine<EventHandler>::rem_flags(ParserFlag_t off, ParserState * s)
 {
-    constexpr const csubstr dashes = "---";
-    constexpr const csubstr ellipsis = "...";
-    constexpr const csubstr whitesp = " \t";
-    if(s.begins_with(dashes))
-        return s == dashes || s.sub(3).begins_with_any(whitesp);
-    else if(s.begins_with(ellipsis))
-        return s == ellipsis || s.sub(3).begins_with_any(whitesp);
-    return false;
+    char buf1_[64], buf2_[64], buf3_[64];
+    csubstr buf1 = detail::_parser_flags_to_str(buf1_, off);
+    csubstr buf2 = detail::_parser_flags_to_str(buf2_, s->flags);
+    csubstr buf3 = detail::_parser_flags_to_str(buf3_, s->flags&(~off));
+    _c4dbgpf("state[{}]: rem {}: before={} after={}", s->level, buf1, buf2, buf3);
+    s->flags &= ~off;
 }
 
-/** @p i is set to the first non whitespace character after the line
- * @return the number of empty lines after the initial position */
-size_t count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation)
+inline C4_NO_INLINE csubstr detail::_parser_flags_to_str(substr buf, ParserFlag_t flags)
 {
-    RYML_ASSERT(r[*i] == '\n');
-    size_t numnl_following = 0;
-    ++(*i);
-    for( ; *i < r.len; ++(*i))
-    {
-        if(r.str[*i] == '\n')
-        {
-            ++numnl_following;
-            if(indentation) // skip the indentation after the newline
-            {
-                size_t stop = *i + indentation;
-                for( ; *i < r.len; ++(*i))
-                {
-                    if(r.str[*i] != ' ' && r.str[*i] != '\r')
-                        break;
-                    RYML_ASSERT(*i < stop);
-                }
-                C4_UNUSED(stop);
-            }
-        }
-        else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r')  // skip leading whitespace
-            ;
-        else
-            break;
+    size_t pos = 0;
+    bool gotone = false;
+
+    #define _prflag(fl)                                     \
+    if((flags & fl) == (fl))                                \
+    {                                                       \
+        if(gotone)                                          \
+        {                                                   \
+            if(pos + 1 < buf.len)                           \
+                buf[pos] = '|';                             \
+            ++pos;                                          \
+        }                                                   \
+        csubstr fltxt = #fl;                                \
+        if(pos + fltxt.len <= buf.len)                      \
+            memcpy(buf.str + pos, fltxt.str, fltxt.len);    \
+        pos += fltxt.len;                                   \
+        gotone = true;                                      \
     }
-    return numnl_following;
+
+    _prflag(RTOP);
+    _prflag(RUNK);
+    _prflag(RMAP);
+    _prflag(RSEQ);
+    _prflag(FLOW);
+    _prflag(BLCK);
+    _prflag(QMRK);
+    _prflag(RKEY);
+    _prflag(RVAL);
+    _prflag(RKCL);
+    _prflag(RNXT);
+    _prflag(SSCL);
+    _prflag(QSCL);
+    _prflag(RSET);
+    _prflag(RDOC);
+    _prflag(NDOC);
+    _prflag(USTY);
+    _prflag(RSEQIMAP);
+
+    #undef _prflag
+
+    if(pos == 0)
+        if(buf.len > 0)
+            buf[pos++] = '0';
+
+    RYML_CHECK(pos <= buf.len);
+
+    return buf.first(pos);
 }
 
-} // anon namespace
+#endif // RYML_DBG   !!! <----------------------------------
 
 
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-Parser::~Parser()
+template<class EventHandler>
+csubstr ParseEngine<EventHandler>::location_contents(Location const& loc) const
 {
-    _free();
-    _clr();
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, loc.offset < m_buf.len);
+    return m_buf.sub(loc.offset);
 }
 
-Parser::Parser(Callbacks const& cb)
-    : m_file()
-    , m_buf()
-    , m_root_id(NONE)
-    , m_tree()
-    , m_stack(cb)
-    , m_state()
-    , m_key_tag_indentation(0)
-    , m_key_tag2_indentation(0)
-    , m_key_tag()
-    , m_key_tag2()
-    , m_val_tag_indentation(0)
-    , m_val_tag()
-    , m_key_anchor_was_before(false)
-    , m_key_anchor_indentation(0)
-    , m_key_anchor()
-    , m_val_anchor_indentation(0)
-    , m_val_anchor()
-    , m_filter_arena()
-    , m_newline_offsets()
-    , m_newline_offsets_size(0)
-    , m_newline_offsets_capacity(0)
-    , m_newline_offsets_buf()
+template<class EventHandler>
+Location ParseEngine<EventHandler>::location(ConstNodeRef node) const
 {
-    m_stack.push(State{});
-    m_state = &m_stack.top();
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, node.readable());
+    return location(*node.tree(), node.id());
 }
 
-Parser::Parser(Parser &&that)
-    : m_file(that.m_file)
-    , m_buf(that.m_buf)
-    , m_root_id(that.m_root_id)
-    , m_tree(that.m_tree)
-    , m_stack(std::move(that.m_stack))
-    , m_state(&m_stack.top())
-    , m_key_tag_indentation(that.m_key_tag_indentation)
-    , m_key_tag2_indentation(that.m_key_tag2_indentation)
-    , m_key_tag(that.m_key_tag)
-    , m_key_tag2(that.m_key_tag2)
-    , m_val_tag_indentation(that.m_val_tag_indentation)
-    , m_val_tag(that.m_val_tag)
-    , m_key_anchor_was_before(that.m_key_anchor_was_before)
-    , m_key_anchor_indentation(that.m_key_anchor_indentation)
-    , m_key_anchor(that.m_key_anchor)
-    , m_val_anchor_indentation(that.m_val_anchor_indentation)
-    , m_val_anchor(that.m_val_anchor)
-    , m_filter_arena(that.m_filter_arena)
-    , m_newline_offsets(that.m_newline_offsets)
-    , m_newline_offsets_size(that.m_newline_offsets_size)
-    , m_newline_offsets_capacity(that.m_newline_offsets_capacity)
-    , m_newline_offsets_buf(that.m_newline_offsets_buf)
+template<class EventHandler>
+Location ParseEngine<EventHandler>::location(Tree const& tree, id_type node) const
 {
-    that._clr();
+    // try hard to avoid getting the location from a null string.
+    Location loc;
+    if(_location_from_node(tree, node, &loc, 0))
+        return loc;
+    return val_location(m_buf.str);
 }
 
-Parser::Parser(Parser const& that)
-    : m_file(that.m_file)
-    , m_buf(that.m_buf)
-    , m_root_id(that.m_root_id)
-    , m_tree(that.m_tree)
-    , m_stack(that.m_stack)
-    , m_state(&m_stack.top())
-    , m_key_tag_indentation(that.m_key_tag_indentation)
-    , m_key_tag2_indentation(that.m_key_tag2_indentation)
-    , m_key_tag(that.m_key_tag)
-    , m_key_tag2(that.m_key_tag2)
-    , m_val_tag_indentation(that.m_val_tag_indentation)
-    , m_val_tag(that.m_val_tag)
-    , m_key_anchor_was_before(that.m_key_anchor_was_before)
-    , m_key_anchor_indentation(that.m_key_anchor_indentation)
-    , m_key_anchor(that.m_key_anchor)
-    , m_val_anchor_indentation(that.m_val_anchor_indentation)
-    , m_val_anchor(that.m_val_anchor)
-    , m_filter_arena()
-    , m_newline_offsets()
-    , m_newline_offsets_size()
-    , m_newline_offsets_capacity()
-    , m_newline_offsets_buf()
+template<class EventHandler>
+bool ParseEngine<EventHandler>::_location_from_node(Tree const& tree, id_type node, Location *C4_RESTRICT loc, id_type level) const
 {
-    if(that.m_newline_offsets_capacity)
+    if(tree.has_key(node))
     {
-        _resize_locations(that.m_newline_offsets_capacity);
-        _RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity);
-        memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
-        m_newline_offsets_size = that.m_newline_offsets_size;
+        csubstr k = tree.key(node);
+        if(C4_LIKELY(k.str != nullptr))
+        {
+            _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, k.is_sub(m_buf));
+            _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(k));
+            *loc = val_location(k.str);
+            return true;
+        }
     }
-    if(that.m_filter_arena.len)
+
+    if(tree.has_val(node))
     {
-        _resize_filter_arena(that.m_filter_arena.len);
+        csubstr v = tree.val(node);
+        if(C4_LIKELY(v.str != nullptr))
+        {
+            _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, v.is_sub(m_buf));
+            _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.is_super(v));
+            *loc = val_location(v.str);
+            return true;
+        }
     }
-}
 
-Parser& Parser::operator=(Parser &&that)
-{
-    _free();
-    m_file = (that.m_file);
-    m_buf = (that.m_buf);
-    m_root_id = (that.m_root_id);
-    m_tree = (that.m_tree);
-    m_stack = std::move(that.m_stack);
-    m_state = (&m_stack.top());
-    m_key_tag_indentation = (that.m_key_tag_indentation);
-    m_key_tag2_indentation = (that.m_key_tag2_indentation);
-    m_key_tag = (that.m_key_tag);
-    m_key_tag2 = (that.m_key_tag2);
-    m_val_tag_indentation = (that.m_val_tag_indentation);
-    m_val_tag = (that.m_val_tag);
-    m_key_anchor_was_before = (that.m_key_anchor_was_before);
-    m_key_anchor_indentation = (that.m_key_anchor_indentation);
-    m_key_anchor = (that.m_key_anchor);
-    m_val_anchor_indentation = (that.m_val_anchor_indentation);
-    m_val_anchor = (that.m_val_anchor);
-    m_filter_arena = that.m_filter_arena;
-    m_newline_offsets = (that.m_newline_offsets);
-    m_newline_offsets_size = (that.m_newline_offsets_size);
-    m_newline_offsets_capacity = (that.m_newline_offsets_capacity);
-    m_newline_offsets_buf = (that.m_newline_offsets_buf);
-    that._clr();
-    return *this;
-}
+    if(tree.is_container(node))
+    {
+        if(_location_from_cont(tree, node, loc))
+            return true;
+    }
 
-Parser& Parser::operator=(Parser const& that)
-{
-    _free();
-    m_file = (that.m_file);
-    m_buf = (that.m_buf);
-    m_root_id = (that.m_root_id);
-    m_tree = (that.m_tree);
-    m_stack = that.m_stack;
-    m_state = &m_stack.top();
-    m_key_tag_indentation = (that.m_key_tag_indentation);
-    m_key_tag2_indentation = (that.m_key_tag2_indentation);
-    m_key_tag = (that.m_key_tag);
-    m_key_tag2 = (that.m_key_tag2);
-    m_val_tag_indentation = (that.m_val_tag_indentation);
-    m_val_tag = (that.m_val_tag);
-    m_key_anchor_was_before = (that.m_key_anchor_was_before);
-    m_key_anchor_indentation = (that.m_key_anchor_indentation);
-    m_key_anchor = (that.m_key_anchor);
-    m_val_anchor_indentation = (that.m_val_anchor_indentation);
-    m_val_anchor = (that.m_val_anchor);
-    if(that.m_filter_arena.len > 0)
-        _resize_filter_arena(that.m_filter_arena.len);
-    if(that.m_newline_offsets_capacity > m_newline_offsets_capacity)
-        _resize_locations(that.m_newline_offsets_capacity);
-    _RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity);
-    _RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size);
-    memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t));
-    m_newline_offsets_size = that.m_newline_offsets_size;
-    m_newline_offsets_buf = that.m_newline_offsets_buf;
-    return *this;
-}
+    if(tree.type(node) != NOTYPE && level == 0)
+    {
+        // try the prev sibling
+        {
+            const id_type prev = tree.prev_sibling(node);
+            if(prev != NONE)
+            {
+                if(_location_from_node(tree, prev, loc, level+1))
+                    return true;
+            }
+        }
+        // try the next sibling
+        {
+            const id_type next = tree.next_sibling(node);
+            if(next != NONE)
+            {
+                if(_location_from_node(tree, next, loc, level+1))
+                    return true;
+            }
+        }
+        // try the parent
+        {
+            const id_type parent = tree.parent(node);
+            if(parent != NONE)
+            {
+                if(_location_from_node(tree, parent, loc, level+1))
+                    return true;
+            }
+        }
+    }
 
-void Parser::_clr()
-{
-    m_file = {};
-    m_buf = {};
-    m_root_id = {};
-    m_tree = {};
-    m_stack.clear();
-    m_state = {};
-    m_key_tag_indentation = {};
-    m_key_tag2_indentation = {};
-    m_key_tag = {};
-    m_key_tag2 = {};
-    m_val_tag_indentation = {};
-    m_val_tag = {};
-    m_key_anchor_was_before = {};
-    m_key_anchor_indentation = {};
-    m_key_anchor = {};
-    m_val_anchor_indentation = {};
-    m_val_anchor = {};
-    m_filter_arena = {};
-    m_newline_offsets = {};
-    m_newline_offsets_size = {};
-    m_newline_offsets_capacity = {};
-    m_newline_offsets_buf = {};
+    return false;
 }
 
-void Parser::_free()
+template<class EventHandler>
+bool ParseEngine<EventHandler>::_location_from_cont(Tree const& tree, id_type node, Location *C4_RESTRICT loc) const
 {
-    if(m_newline_offsets)
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, tree.is_container(node));
+    if(!tree.is_stream(node))
     {
-        _RYML_CB_FREE(m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
-        m_newline_offsets = nullptr;
-        m_newline_offsets_size = 0u;
-        m_newline_offsets_capacity = 0u;
-        m_newline_offsets_buf = 0u;
+        const char *node_start = tree._p(node)->m_val.scalar.str;  // this was stored in the container
+        if(tree.has_children(node))
+        {
+            id_type child = tree.first_child(node);
+            if(tree.has_key(child))
+            {
+                // when a map starts, the container was set after the key
+                csubstr k = tree.key(child);
+                if(k.str && node_start > k.str)
+                    node_start = k.str;
+            }
+        }
+        *loc = val_location(node_start);
+        return true;
     }
-    if(m_filter_arena.len)
+    else // it's a stream
     {
-        _RYML_CB_FREE(m_stack.m_callbacks, m_filter_arena.str, char, m_filter_arena.len);
-        m_filter_arena = {};
+        *loc = val_location(m_buf.str); // just return the front of the buffer
     }
-    m_stack._free();
+    return true;
 }
 
 
-//-----------------------------------------------------------------------------
-void Parser::_reset()
-{
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.size() == 1);
-    m_stack.clear();
-    m_stack.push({});
-    m_state = &m_stack.top();
-    m_state->reset(m_file.str, m_root_id);
-
-    m_key_tag_indentation = 0;
-    m_key_tag2_indentation = 0;
-    m_key_tag.clear();
-    m_key_tag2.clear();
-    m_val_tag_indentation = 0;
-    m_val_tag.clear();
-    m_key_anchor_was_before = false;
-    m_key_anchor_indentation = 0;
-    m_key_anchor.clear();
-    m_val_anchor_indentation = 0;
-    m_val_anchor.clear();
-
-    _mark_locations_dirty();
-}
-
-//-----------------------------------------------------------------------------
-template<class DumpFn>
-void Parser::_fmt_msg(DumpFn &&dumpfn) const
+template<class EventHandler>
+Location ParseEngine<EventHandler>::val_location(const char *val) const
 {
-    auto const& lc = m_state->line_contents;
-    csubstr contents = lc.stripped;
-    if(contents.len)
+    if(C4_UNLIKELY(val == nullptr))
+        return {m_file, 0, 0, 0};
+    _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_options.locations());
+    // NOTE: if any of these checks fails, the parser needs to be
+    // instantiated with locations enabled.
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_options.locations());
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, !_locations_dirty());
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets != nullptr);
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size > 0);
+    // NOTE: the pointer needs to belong to the buffer that was used to parse.
+    csubstr src = m_buf;
+    _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, val != nullptr || src.str == nullptr);
+    _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr));
+    // ok. search the first stored newline after the given ptr
+    using lineptr_type = size_t const* C4_RESTRICT;
+    lineptr_type lineptr = nullptr;
+    size_t offset = (size_t)(val - src.begin());
+    if(m_newline_offsets_size < RYML_LOCATIONS_SMALL_THRESHOLD)
     {
-        // print the yaml src line
-        size_t offs = 3u + to_chars(substr{}, m_state->pos.line) + to_chars(substr{}, m_state->pos.col);
-        if(m_file.len)
+        // just do a linear search if the size is small.
+        for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr)
         {
-            _parse_dump(dumpfn, "{}:", m_file);
-            offs += m_file.len + 1;
+            if(*curr > offset)
+            {
+                lineptr = curr;
+                break;
+            }
         }
-        _parse_dump(dumpfn, "{}:{}: ", m_state->pos.line, m_state->pos.col);
-        csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u));
-        csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("..."));
-        _parse_dump(dumpfn, "{}{}  (size={})\n", maybe_full_content, maybe_ellipsis, contents.len);
-        // highlight the remaining portion of the previous line
-        size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin());
-        size_t lastcol = firstcol + lc.rem.len;
-        for(size_t i = 0; i < offs + firstcol; ++i)
-            dumpfn(" ");
-        dumpfn("^");
-        for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i)
-            dumpfn("~");
-        _parse_dump(dumpfn, "{}  (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1);
     }
     else
     {
-        dumpfn("\n");
+        // do a bisection search if the size is not small.
+        //
+        // We could use std::lower_bound but this is simple enough and
+        // spares the costly include of <algorithm>.
+        size_t count = m_newline_offsets_size;
+        size_t step;
+        lineptr_type it;
+        lineptr = m_newline_offsets;
+        while(count)
+        {
+            step = count >> 1;
+            it = lineptr + step;
+            if(*it < offset)
+            {
+                lineptr = ++it;
+                count -= step + 1;
+            }
+            else
+            {
+                count = step;
+            }
+        }
+    }
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr >= m_newline_offsets);
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size);
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, *lineptr > offset);
+    Location loc;
+    loc.name = m_file;
+    loc.offset = offset;
+    loc.line = (size_t)(lineptr - m_newline_offsets);
+    if(lineptr > m_newline_offsets)
+        loc.col = (offset - *(lineptr-1) - 1u);
+    else
+        loc.col = offset;
+    return loc;
+}
+
+template<class EventHandler>
+void ParseEngine<EventHandler>::_prepare_locations()
+{
+    m_newline_offsets_buf = m_buf;
+    size_t numnewlines = 1u + m_buf.count('\n');
+    _resize_locations(numnewlines);
+    m_newline_offsets_size = 0;
+    for(size_t i = 0; i < m_buf.len; i++)
+        if(m_buf[i] == '\n')
+            m_newline_offsets[m_newline_offsets_size++] = i;
+    m_newline_offsets[m_newline_offsets_size++] = m_buf.len;
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
+}
+
+template<class EventHandler>
+void ParseEngine<EventHandler>::_resize_locations(size_t numnewlines)
+{
+    if(numnewlines > m_newline_offsets_capacity)
+    {
+        if(m_newline_offsets)
+            _RYML_CB_FREE(m_evt_handler->m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
+        m_newline_offsets = _RYML_CB_ALLOC_HINT(m_evt_handler->m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets);
+        m_newline_offsets_capacity = numnewlines;
     }
+}
 
-#ifdef RYML_DBG
-    // next line: print the state flags
+template<class EventHandler>
+bool ParseEngine<EventHandler>::_locations_dirty() const
+{
+    return !m_newline_offsets_size;
+}
+
+
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+
+template<class EventHandler>
+void ParseEngine<EventHandler>::_handle_flow_skip_whitespace()
+{
+    if(m_evt_handler->m_curr->line_contents.rem.len > 0)
     {
-        char flagbuf_[64];
-        _parse_dump(dumpfn, "top state: {}\n", _prfl(flagbuf_, m_state->flags));
+        csubstr rem = m_evt_handler->m_curr->line_contents.rem;
+        if(rem.str[0] == ' ' || rem.str[0] == '\t')
+        {
+            _c4dbgpf("starts with whitespace: '{}'", _c4prc(rem.str[0]));
+            _skipchars(" \t");
+            rem = m_evt_handler->m_curr->line_contents.rem;
+        }
+        // comments
+        if(rem.begins_with('#'))
+        {
+            _c4dbgpf("it's a comment: {}", m_evt_handler->m_curr->line_contents.rem);
+            _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
+        }
     }
-#endif
 }
 
 
 //-----------------------------------------------------------------------------
-template<class ...Args>
-void Parser::_err(csubstr fmt, Args const& C4_RESTRICT ...args) const
+
+
+template<class EventHandler>
+void ParseEngine<EventHandler>::_add_annotation(Annotation *C4_RESTRICT dst, csubstr str, size_t indentation, size_t line)
 {
-    char errmsg[RYML_ERRMSG_SIZE];
-    detail::_SubstrWriter writer(errmsg);
-    auto dumpfn = [&writer](csubstr s){ writer.append(s); };
-    _parse_dump(dumpfn, fmt, args...);
-    writer.append('\n');
-    _fmt_msg(dumpfn);
-    size_t len = writer.pos < RYML_ERRMSG_SIZE ? writer.pos : RYML_ERRMSG_SIZE;
-    m_tree->m_callbacks.m_error(errmsg, len, m_state->pos, m_tree->m_callbacks.m_user_data);
+    _c4dbgpf("store annotation[{}]: '{}' indentation={} line={}", dst->num_entries, str, indentation, line);
+    if(C4_UNLIKELY(dst->num_entries >= C4_COUNTOF(dst->annotations)))
+        _c4err("too many annotations");
+    dst->annotations[dst->num_entries].str = str;
+    dst->annotations[dst->num_entries].indentation = indentation;
+    dst->annotations[dst->num_entries].line = line;
+    ++dst->num_entries;
 }
 
-//-----------------------------------------------------------------------------
-#ifdef RYML_DBG
-template<class ...Args>
-void Parser::_dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const
+template<class EventHandler>
+void ParseEngine<EventHandler>::_clear_annotations(Annotation *C4_RESTRICT dst)
 {
-    auto dumpfn = [](csubstr s){ fwrite(s.str, 1, s.len, stdout); };
-    _parse_dump(dumpfn, fmt, args...);
-    dumpfn("\n");
-    _fmt_msg(dumpfn);
+    dst->num_entries = 0;
 }
-#endif
 
-//-----------------------------------------------------------------------------
-bool Parser::_finished_file() const
+#ifdef RYML_NO_COVERAGE__TO_BE_DELETED
+template<class EventHandler>
+bool ParseEngine<EventHandler>::_handle_indentation_from_annotations()
 {
-    bool ret = m_state->pos.offset >= m_buf.len;
-    if(ret)
+    if(m_pending_anchors.num_entries == 1u || m_pending_tags.num_entries == 1u)
     {
-        _c4dbgp("finished file!!!");
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries < 2u && m_pending_tags.num_entries < 2u);
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.annotations[0].line < m_evt_handler->m_curr->pos.line);
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.annotations[1].line < m_evt_handler->m_curr->pos.line);
+        size_t to_skip = m_evt_handler->m_curr->indref;
+        if(m_pending_anchors.num_entries)
+            to_skip = m_pending_anchors.annotations[0].indentation > to_skip ? m_pending_anchors.annotations[0].indentation : to_skip;
+        if(m_pending_tags.num_entries)
+            to_skip = m_pending_tags.annotations[0].indentation > to_skip ? m_pending_tags.annotations[0].indentation : to_skip;
+        _c4dbgpf("annotations pending, skip indentation up to {}!", to_skip);
+        _maybe_skipchars_up_to(' ', to_skip);
+        return true;
     }
-    return ret;
+    return false;
 }
+#endif
 
-//-----------------------------------------------------------------------------
-bool Parser::_finished_line() const
+template<class EventHandler>
+bool ParseEngine<EventHandler>::_annotations_require_key_container() const
 {
-    return m_state->line_contents.rem.empty();
+    return m_pending_tags.num_entries > 1 || m_pending_anchors.num_entries > 1;
 }
 
-//-----------------------------------------------------------------------------
-void Parser::parse_in_place(csubstr file, substr buf, Tree *t, size_t node_id)
+template<class EventHandler>
+void ParseEngine<EventHandler>::_check_tag(csubstr tag)
 {
-    m_file = file;
-    m_buf = buf;
-    m_root_id = node_id;
-    m_tree = t;
-    _reset();
-    while( ! _finished_file())
+    if(!tag.begins_with("!<"))
     {
-        _scan_line();
-        while( ! _finished_line())
-            _handle_line();
-        if(_finished_file())
-            break; // it may have finished because of multiline blocks
-        _line_ended();
+        if(C4_UNLIKELY(tag.first_of("[]{},") != npos))
+            _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks, "tags must not contain any of '[]{},'", m_evt_handler->m_curr->pos);
+    }
+    else
+    {
+        if(C4_UNLIKELY(!tag.ends_with('>')))
+            _RYML_CB_ERR_(m_evt_handler->m_stack.m_callbacks, "malformed tag", m_evt_handler->m_curr->pos);
     }
-    _handle_finished_file();
-}
-
-//-----------------------------------------------------------------------------
-void Parser::_handle_finished_file()
-{
-    _end_stream();
 }
 
-//-----------------------------------------------------------------------------
-void Parser::_handle_line()
+template<class EventHandler>
+void ParseEngine<EventHandler>::_handle_annotations_before_blck_key_scalar()
 {
-    _c4dbgq("\n-----------");
-    _c4dbgt("handling line={}, offset={}B", m_state->pos.line, m_state->pos.offset);
-    _RYML_CB_ASSERT(m_stack.m_callbacks,  ! m_state->line_contents.rem.empty());
-    if(has_any(RSEQ))
+    _c4dbgpf("annotations_before_blck_key_scalar, node={}", m_evt_handler->m_curr->node_id);
+    if(m_pending_tags.num_entries)
     {
-        if(has_any(FLOW))
+        _c4dbgpf("annotations_before_blck_key_scalar, #tags={}", m_pending_tags.num_entries);
+        if(C4_LIKELY(m_pending_tags.num_entries == 1))
         {
-            if(_handle_seq_flow())
-                return;
+            _check_tag(m_pending_tags.annotations[0].str);
+            m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
+            _clear_annotations(&m_pending_tags);
         }
         else
         {
-            if(_handle_seq_blck())
-                return;
+            _c4err("too many tags");
         }
     }
-    else if(has_any(RMAP))
+    if(m_pending_anchors.num_entries)
     {
-        if(has_any(FLOW))
+        _c4dbgpf("annotations_before_blck_key_scalar, #anchors={}", m_pending_anchors.num_entries);
+        if(C4_LIKELY(m_pending_anchors.num_entries == 1))
         {
-            if(_handle_map_flow())
-                return;
+            m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
+            _clear_annotations(&m_pending_anchors);
         }
         else
         {
-            if(_handle_map_blck())
-                return;
+            _c4err("too many anchors");
         }
     }
-    else if(has_any(RUNK))
-    {
-        if(_handle_unk())
-            return;
-    }
-
-    if(_handle_top())
-        return;
 }
 
-
-//-----------------------------------------------------------------------------
-bool Parser::_handle_unk()
+template<class EventHandler>
+void ParseEngine<EventHandler>::_handle_annotations_before_blck_val_scalar()
 {
-    _c4dbgp("handle_unk");
-
-    csubstr rem = m_state->line_contents.rem;
-    const bool start_as_child = (node(m_state) == nullptr);
-
-    if(C4_UNLIKELY(has_any(NDOC)))
+    _c4dbgpf("annotations_before_blck_val_scalar, node={}", m_evt_handler->m_curr->node_id);
+    if(m_pending_tags.num_entries)
     {
-        if(rem == "---" || rem.begins_with("--- "))
+        _c4dbgpf("annotations_before_blck_val_scalar, #tags={}", m_pending_tags.num_entries);
+        if(C4_LIKELY(m_pending_tags.num_entries == 1))
         {
-            _start_new_doc(rem);
-            return true;
+            _check_tag(m_pending_tags.annotations[0].str);
+            m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
+            _clear_annotations(&m_pending_tags);
         }
-        auto trimmed = rem.triml(' ');
-        if(trimmed == "---" || trimmed.begins_with("--- "))
+        else
         {
-            _RYML_CB_ASSERT(m_stack.m_callbacks, rem.len >= trimmed.len);
-            _line_progressed(rem.len - trimmed.len);
-            _start_new_doc(trimmed);
-            _save_indentation();
-            return true;
+            _c4err("too many tags");
         }
-        else if(trimmed.begins_with("..."))
+    }
+    if(m_pending_anchors.num_entries)
+    {
+        _c4dbgpf("annotations_before_blck_val_scalar, #anchors={}", m_pending_anchors.num_entries);
+        if(C4_LIKELY(m_pending_anchors.num_entries == 1))
         {
-            _end_stream();
+            m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
+            _clear_annotations(&m_pending_anchors);
         }
-        else if(trimmed.first_of("#%") == csubstr::npos) // neither a doc nor a tag
+        else
         {
-            _c4dbgpf("starting implicit doc to accomodate unexpected tokens: '{}'", rem);
-            size_t indref = m_state->indref;
-            _push_level();
-            _start_doc();
-            _set_indentation(indref);
+            _c4err("too many anchors");
         }
-        _RYML_CB_ASSERT(m_stack.m_callbacks, !trimmed.empty());
     }
+}
 
-    _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
-    if(m_state->indref > 0)
+template<class EventHandler>
+void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck(size_t current_line)
+{
+    _c4dbgpf("annotations_before_start_mapblck, current_line={}", current_line);
+    if(m_pending_tags.num_entries == 2)
+    {
+        _c4dbgp("2 tags, setting entry 0");
+        _check_tag(m_pending_tags.annotations[0].str);
+        m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
+    }
+    else if(m_pending_tags.num_entries == 1)
     {
-        csubstr ws = rem.left_of(rem.first_not_of(' '));
-        if(m_state->indref <= ws.len)
+        _c4dbgpf("1 tag. line={}, curr={}", m_pending_tags.annotations[0].line);
+        if(m_pending_tags.annotations[0].line < current_line)
         {
-            _c4dbgpf("skipping base indentation of {}", m_state->indref);
-            _line_progressed(m_state->indref);
-            rem = rem.sub(m_state->indref);
+            _c4dbgp("...tag is for the map. setting it.");
+            _check_tag(m_pending_tags.annotations[0].str);
+            m_evt_handler->set_val_tag(m_pending_tags.annotations[0].str);
+            _clear_annotations(&m_pending_tags);
         }
     }
-
-    if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t")))
+    //
+    if(m_pending_anchors.num_entries == 2)
     {
-        _c4dbgpf("it's a seq (as_child={})", start_as_child);
-        _move_key_anchor_to_val_anchor();
-        _move_key_tag_to_val_tag();
-        _push_level();
-        _start_seq(start_as_child);
-        _save_indentation();
-        _line_progressed(2);
-        return true;
+        _c4dbgp("2 anchors, setting entry 0");
+        m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
     }
-    else if(rem == '-')
+    else if(m_pending_anchors.num_entries == 1)
     {
-        _c4dbgpf("it's a seq (as_child={})", start_as_child);
-        _move_key_anchor_to_val_anchor();
-        _move_key_tag_to_val_tag();
-        _push_level();
-        _start_seq(start_as_child);
-        _save_indentation();
-        _line_progressed(1);
-        return true;
+        _c4dbgpf("1 anchor. line={}, curr={}", m_pending_anchors.annotations[0].line);
+        if(m_pending_anchors.annotations[0].line < current_line)
+        {
+            _c4dbgp("...anchor is for the map. setting it.");
+            m_evt_handler->set_val_anchor(m_pending_anchors.annotations[0].str);
+            _clear_annotations(&m_pending_anchors);
+        }
     }
-    else if(rem.begins_with('['))
+}
+
+template<class EventHandler>
+void ParseEngine<EventHandler>::_handle_annotations_before_start_mapblck_as_key()
+{
+    _c4dbgp("annotations_before_start_mapblck_as_key");
+    if(m_pending_tags.num_entries == 2)
     {
-        _c4dbgpf("it's a seq, flow (as_child={})", start_as_child);
-        _move_key_anchor_to_val_anchor();
-        _move_key_tag_to_val_tag();
-        _push_level(/*explicit flow*/true);
-        _start_seq(start_as_child);
-        add_flags(FLOW);
-        _line_progressed(1);
-        return true;
+        _check_tag(m_pending_tags.annotations[0].str);
+        m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
     }
-    else if(rem.begins_with('{'))
+    if(m_pending_anchors.num_entries == 2)
     {
-        _c4dbgpf("it's a map, flow (as_child={})", start_as_child);
-        _move_key_anchor_to_val_anchor();
-        _move_key_tag_to_val_tag();
-        _push_level(/*explicit flow*/true);
-        _start_map(start_as_child);
-        addrem_flags(FLOW|RKEY, RVAL);
-        _line_progressed(1);
-        return true;
+        m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
     }
-    else if(rem.begins_with("? "))
+}
+
+template<class EventHandler>
+void ParseEngine<EventHandler>::_handle_annotations_and_indentation_after_start_mapblck(size_t key_indentation, size_t key_line)
+{
+    _c4dbgp("annotations_after_start_mapblck");
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries <= 2);
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_anchors.num_entries <= 2);
+    if(m_pending_anchors.num_entries || m_pending_tags.num_entries)
     {
-        _c4dbgpf("it's a map (as_child={}) + this key is complex", start_as_child);
-        _move_key_anchor_to_val_anchor();
-        _move_key_tag_to_val_tag();
-        _push_level();
-        _start_map(start_as_child);
-        addrem_flags(RKEY|QMRK, RVAL);
-        _save_indentation();
-        _line_progressed(2);
-        return true;
+        key_indentation = _select_indentation_from_annotations(key_indentation, key_line);
+        switch(m_pending_tags.num_entries)
+        {
+        case 1u:
+            _check_tag(m_pending_tags.annotations[0].str);
+            m_evt_handler->set_key_tag(m_pending_tags.annotations[0].str);
+            _clear_annotations(&m_pending_tags);
+            break;
+        case 2u:
+            _check_tag(m_pending_tags.annotations[1].str);
+            m_evt_handler->set_key_tag(m_pending_tags.annotations[1].str);
+            _clear_annotations(&m_pending_tags);
+            break;
+        }
+        switch(m_pending_anchors.num_entries)
+        {
+        case 1u:
+            m_evt_handler->set_key_anchor(m_pending_anchors.annotations[0].str);
+            _clear_annotations(&m_pending_anchors);
+            break;
+        case 2u:
+            m_evt_handler->set_key_anchor(m_pending_anchors.annotations[1].str);
+            _clear_annotations(&m_pending_anchors);
+            break;
+        }
     }
-    else if(rem.begins_with(": ") && !has_all(SSCL))
+    _set_indentation(key_indentation);
+}
+
+template<class EventHandler>
+size_t ParseEngine<EventHandler>::_select_indentation_from_annotations(size_t val_indentation, size_t val_line)
+{
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_pending_tags.num_entries || m_pending_anchors.num_entries);
+    // select the left-most annotation on the max line
+    auto const *C4_RESTRICT curr = m_pending_anchors.num_entries ? &m_pending_anchors.annotations[0] : &m_pending_tags.annotations[0];
+    for(size_t i = 0; i < m_pending_anchors.num_entries; ++i)
     {
-        _c4dbgp("it's a map with an empty key");
-        _move_key_anchor_to_val_anchor();
-        _move_key_tag_to_val_tag();
-        _push_level();
-        _start_map(start_as_child);
-        _store_scalar_null(rem.str);
-        addrem_flags(RVAL, RKEY);
-        _save_indentation();
-        _line_progressed(2);
-        return true;
+        auto const& C4_RESTRICT ann = m_pending_anchors.annotations[i];
+        if(ann.line > curr->line)
+            curr = &ann;
+        else if(ann.indentation < curr->indentation)
+            curr = &ann;
     }
-    else if(rem == ':' && !has_all(SSCL))
+    for(size_t j = 0; j < m_pending_tags.num_entries; ++j)
     {
-        _c4dbgp("it's a map with an empty key");
-        _move_key_anchor_to_val_anchor();
-        _move_key_tag_to_val_tag();
-        _push_level();
-        _start_map(start_as_child);
-        _store_scalar_null(rem.str);
-        addrem_flags(RVAL, RKEY);
-        _save_indentation();
-        _line_progressed(1);
-        return true;
+        auto const& C4_RESTRICT ann = m_pending_tags.annotations[j];
+        if(ann.line > curr->line)
+            curr = &ann;
+        else if(ann.indentation < curr->indentation)
+            curr = &ann;
     }
-    else if(_handle_types())
+    return curr->line < val_line ? val_indentation : curr->indentation;
+}
+
+template<class EventHandler>
+void ParseEngine<EventHandler>::_handle_directive(csubstr rem)
+{
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.is_sub(m_evt_handler->m_curr->line_contents.rem));
+    const size_t pos = rem.find('#');
+    _c4dbgpf("handle_directive: pos={} rem={}", pos, rem);
+    if(pos == npos) // no comments
     {
-        return true;
+        m_evt_handler->add_directive(rem);
+        _line_progressed(rem.len);
     }
-    else if(!rem.begins_with('*') && _handle_key_anchors_and_refs())
+    else
     {
-        return true;
+        csubstr to_comment = rem.first(pos);
+        csubstr trimmed = to_comment.trimr(" \t");
+        m_evt_handler->add_directive(trimmed);
+        _line_progressed(pos);
+        _skip_comment();
     }
-    else if(has_all(SSCL))
-    {
-        _c4dbgpf("there's a stored scalar: '{}'", m_state->scalar);
+}
 
-        csubstr saved_scalar;
-        bool is_quoted;
-        if(_scan_scalar(&saved_scalar, &is_quoted))
-        {
-            rem = m_state->line_contents.rem;
-            _c4dbgpf("... and there's also a scalar next! '{}'", saved_scalar);
-            if(rem.begins_with_any(" \t"))
-            {
-                size_t n = rem.first_not_of(" \t");
-                _c4dbgpf("skipping {} spaces/tabs", n);
-                rem = rem.sub(n);
-                _line_progressed(n);
-            }
-        }
 
-        _c4dbgpf("rem='{}'", rem);
+//-----------------------------------------------------------------------------
+
+template<class EventHandler>
+void ParseEngine<EventHandler>::_handle_seq_json()
+{
+seqjson_start:
+    _c4dbgpf("handle2_seq_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
+
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT));
 
-        if(rem.begins_with(", "))
+    _handle_flow_skip_whitespace();
+    csubstr rem = m_evt_handler->m_curr->line_contents.rem;
+    if(!rem.len)
+        goto seqjson_again;
+
+    if(has_any(RVAL))
+    {
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
+        const char first = rem.str[0];
+        _c4dbgpf("mapjson[RVAL]: '{}'", first);
+        switch(first)
+        {
+        case '"':
         {
-            _c4dbgpf("got a ',' -- it's a seq (as_child={})", start_as_child);
-            _start_seq(start_as_child);
-            add_flags(FLOW);
-            _append_val(_consume_scalar());
-            _line_progressed(2);
+            _c4dbgp("seqjson[RVAL]: scanning double-quoted scalar");
+            ScannedScalar sc = _scan_scalar_dquot();
+            csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
+            m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
+            addrem_flags(RNXT, RVAL);
+            break;
         }
-        else if(rem.begins_with(','))
+        case '[':
         {
-            _c4dbgpf("got a ',' -- it's a seq (as_child={})", start_as_child);
-            _start_seq(start_as_child);
-            add_flags(FLOW);
-            _append_val(_consume_scalar());
+            _c4dbgp("seqjson[RVAL]: start child seqjson");
+            addrem_flags(RNXT, RVAL);
+            m_evt_handler->begin_seq_val_flow();
+            addrem_flags(RVAL, RNXT);
             _line_progressed(1);
+            break;
         }
-        else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))
+        case '{':
         {
-            _c4dbgpf("got a ': ' -- it's a map (as_child={})", start_as_child);
-            _start_map_unk(start_as_child); // wait for the val scalar to append the key-val pair
-            _line_progressed(2);
+            _c4dbgp("seqjson[RVAL]: start child mapjson");
+            addrem_flags(RNXT, RVAL);
+            m_evt_handler->begin_map_val_flow();
+            addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
+            _line_progressed(1);
+            goto seqjson_finish;
         }
-        else if(rem == ":" || rem.begins_with(":\"") || rem.begins_with(":'"))
+        case ']': // this happens on a trailing comma like ", ]"
         {
-            if(rem == ":") { _c4dbgpf("got a ':' -- it's a map (as_child={})", start_as_child); }
-            else { _c4dbgpf("got a '{}' -- it's a map (as_child={})", rem.first(2), start_as_child); }
-            _start_map_unk(start_as_child); // wait for the val scalar to append the key-val pair
-            _line_progressed(1); // advance only 1
+            _c4dbgp("seqjson[RVAL]: end!");
+            rem_flags(RSEQ);
+            m_evt_handler->end_seq();
+            _line_progressed(1);
+            if(!has_all(RSEQ|FLOW))
+                goto seqjson_finish;
+            break;
         }
-        else if(rem.begins_with('}'))
+        default:
         {
-            if(!has_all(RMAP|FLOW))
+            ScannedScalar sc;
+            if(_scan_scalar_seq_json(&sc))
             {
-                _c4err("invalid token: not reading a map");
+                _c4dbgp("seqjson[RVAL]: it's a plain scalar.");
+                csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
+                m_evt_handler->set_val_scalar_plain(maybe_filtered);
+                addrem_flags(RNXT, RVAL);
             }
-            if(!has_all(SSCL))
+            else
             {
-                _c4err("no scalar stored");
+                _c4err("parse error");
             }
-            _append_key_val(saved_scalar);
-            _stop_map();
-            _line_progressed(1);
-        }
-        else if(rem.begins_with("..."))
-        {
-            _c4dbgp("got stream end '...'");
-            _end_stream();
-            _line_progressed(3);
-        }
-        else if(rem.begins_with('#'))
-        {
-            _c4dbgpf("it's a comment: '{}'", rem);
-            _scan_comment();
-            return true;
         }
-        else if(_handle_key_anchors_and_refs())
-        {
-            return true;
-        }
-        else if(rem.begins_with(" ") || rem.begins_with("\t"))
-        {
-            size_t n = rem.first_not_of(" \t");
-            if(n == npos)
-                n = rem.len;
-            _c4dbgpf("has {} spaces/tabs, skip...", n);
-            _line_progressed(n);
-            return true;
         }
-        else if(rem.empty())
+    }
+    else // RNXT
+    {
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
+        const char first = rem.str[0];
+        _c4dbgpf("mapjson[RNXT]: '{}'", first);
+        switch(first)
         {
-            // nothing to do
-        }
-        else if(rem == "---" || rem.begins_with("--- "))
+        case ',':
         {
-            _c4dbgp("caught ---: starting doc");
-            _start_new_doc(rem);
-            return true;
+            _c4dbgp("seqjson[RNXT]: expect next val");
+            addrem_flags(RVAL, RNXT);
+            m_evt_handler->add_sibling();
+            _line_progressed(1);
+            break;
         }
-        else if(rem.begins_with('%'))
+        case ']':
         {
-            _c4dbgp("caught a directive: ignoring...");
-            _line_progressed(rem.len);
-            return true;
+            _c4dbgp("seqjson[RNXT]: end!");
+            m_evt_handler->end_seq();
+            _line_progressed(1);
+            goto seqjson_finish;
         }
-        else
-        {
+        default:
             _c4err("parse error");
         }
-
-        if( ! saved_scalar.empty())
-        {
-            _store_scalar(saved_scalar, is_quoted);
-        }
-
-        return true;
     }
-    else
+
+ seqjson_again:
+    _c4dbgt("seqjson: go again", 0);
+    if(_finished_line())
     {
-        _RYML_CB_ASSERT(m_stack.m_callbacks,  ! has_any(SSCL));
-        csubstr scalar;
-        size_t indentation = m_state->line_contents.indentation; // save
-        bool is_quoted;
-        if(_scan_scalar(&scalar, &is_quoted))
+        if(C4_LIKELY(!_finished_file()))
         {
-            _c4dbgpf("got a {} scalar", is_quoted ? "quoted" : "");
-            rem = m_state->line_contents.rem;
-            {
-                size_t first = rem.first_not_of(" \t");
-                if(first && first != npos)
-                {
-                    _c4dbgpf("skip {} whitespace characters", first);
-                   _line_progressed(first);
-                   rem = rem.sub(first);
-                }
-            }
-            _store_scalar(scalar, is_quoted);
-            if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))
-            {
-                _c4dbgpf("got a ': ' next -- it's a map (as_child={})", start_as_child);
-                _push_level();
-                _start_map(start_as_child); // wait for the val scalar to append the key-val pair
-                _set_indentation(indentation);
-                _line_progressed(2); // call this AFTER saving the indentation
-            }
-            else if(rem == ":")
-            {
-                _c4dbgpf("got a ':' next -- it's a map (as_child={})", start_as_child);
-                _push_level();
-                _start_map(start_as_child); // wait for the val scalar to append the key-val pair
-                _set_indentation(indentation);
-                _line_progressed(1); // call this AFTER saving the indentation
-            }
-            else
-            {
-                // we still don't know whether it's a seq or a map
-                // so just store the scalar
-            }
-            return true;
+            _line_ended();
+            _scan_line();
+            _c4dbgnextline();
         }
-        else if(rem.begins_with_any(" \t"))
+        else
         {
-            csubstr ws = rem.left_of(rem.first_not_of(" \t"));
-            rem = rem.right_of(ws);
-            if(has_all(RTOP) && rem.begins_with("---"))
-            {
-                _c4dbgp("there's a doc starting, and it's indented");
-                _set_indentation(ws.len);
-            }
-            _c4dbgpf("skipping {} spaces/tabs", ws.len);
-            _line_progressed(ws.len);
-            return true;
+            _c4err("missing terminating ]");
         }
     }
+    goto seqjson_start;
 
-    return false;
+ seqjson_finish:
+    _c4dbgp("seqjson: finish");
 }
 
 
 //-----------------------------------------------------------------------------
-C4_ALWAYS_INLINE void Parser::_skipchars(char c)
-{
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begins_with(c));
-    size_t pos = m_state->line_contents.rem.first_not_of(c);
-    if(pos == npos)
-        pos = m_state->line_contents.rem.len; // maybe the line is just whitespace
-    _c4dbgpf("skip {} '{}'", pos, c);
-    _line_progressed(pos);
-}
-
-template<size_t N>
-C4_ALWAYS_INLINE void Parser::_skipchars(const char (&chars)[N])
-{
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begins_with_any(chars));
-    size_t pos = m_state->line_contents.rem.first_not_of(chars);
-    if(pos == npos)
-        pos = m_state->line_contents.rem.len; // maybe the line is just whitespace
-    _c4dbgpf("skip {} characters", pos);
-    _line_progressed(pos);
-}
-
 
-//-----------------------------------------------------------------------------
-bool Parser::_handle_seq_flow()
+template<class EventHandler>
+void ParseEngine<EventHandler>::_handle_map_json()
 {
-    _c4dbgpf("handle_seq_flow: node_id={} level={}", m_state->node_id, m_state->level);
-    csubstr rem = m_state->line_contents.rem;
+mapjson_start:
+    _c4dbgpf("handle2_map_json: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
 
-    _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY));
-    _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ|FLOW));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT)));
 
-    if(rem.begins_with(' '))
-    {
-        // with explicit flow, indentation does not matter
-        _c4dbgp("starts with spaces");
-        _skipchars(' ');
-        return true;
-    }
-    _RYML_WITH_TAB_TOKENS(else if(rem.begins_with('\t'))
-    {
-        _c4dbgp("starts with tabs");
-        _skipchars('\t');
-        return true;
-    })
-    else if(rem.begins_with('#'))
-    {
-        _c4dbgp("it's a comment");
-        rem = _scan_comment(); // also progresses the line
-        return true;
-    }
-    else if(rem.begins_with(']'))
-    {
-        _c4dbgp("end the sequence");
-        _pop_level();
-        _line_progressed(1);
-        if(has_all(RSEQIMAP))
-        {
-            _stop_seqimap();
-            _pop_level();
-        }
-        return true;
-    }
+    _handle_flow_skip_whitespace();
+    csubstr rem = m_evt_handler->m_curr->line_contents.rem;
+    if(!rem.len)
+        goto mapjson_again;
 
-    if(has_any(RVAL))
+    if(has_any(RKEY))
     {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT));
-        bool is_quoted;
-        if(_scan_scalar(&rem, &is_quoted))
-        {
-            _c4dbgp("it's a scalar");
-            addrem_flags(RNXT, RVAL);
-            _append_val(rem, is_quoted);
-            return true;
-        }
-        else if(rem.begins_with('['))
-        {
-            _c4dbgp("val is a child seq");
-            addrem_flags(RNXT, RVAL); // before _push_level!
-            _push_level(/*explicit flow*/true);
-            _start_seq();
-            add_flags(FLOW);
-            _line_progressed(1);
-            return true;
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
+        const char first = rem.str[0];
+        _c4dbgpf("mapjson[RKEY]: '{}'", first);
+        switch(first)
+        {
+        case '"':
+        {
+            _c4dbgp("mapjson[RKEY]: scanning double-quoted scalar");
+            ScannedScalar sc = _scan_scalar_dquot();
+            csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
+            m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
+            addrem_flags(RKCL, RKEY);
+            break;
         }
-        else if(rem.begins_with('{'))
+        case '}': // this happens on a trailing comma like ", }"
         {
-            _c4dbgp("val is a child map");
-            addrem_flags(RNXT, RVAL); // before _push_level!
-            _push_level(/*explicit flow*/true);
-            _start_map();
-            addrem_flags(FLOW|RKEY, RVAL);
+            _c4dbgp("mapjson[RKEY]: end!");
+            m_evt_handler->end_map();
             _line_progressed(1);
-            return true;
+            goto mapjson_finish;
         }
-        else if(rem == ':')
-        {
-            _c4dbgpf("found ':' -- there's an implicit map in the seq node[{}]", m_state->node_id);
-            _start_seqimap();
-            _line_progressed(1);
-            return true;
+        default:
+            _c4err("parse error");
         }
-        else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))
+    }
+    else if(has_any(RVAL))
+    {
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
+        const char first = rem.str[0];
+        _c4dbgpf("mapjson[RVAL]: '{}'", first);
+        switch(first)
         {
-            _c4dbgpf("found ': ' -- there's an implicit map in the seq node[{}]", m_state->node_id);
-            _start_seqimap();
-            _line_progressed(2);
-            return true;
-        }
-        else if(rem.begins_with("? "))
+        case '"':
         {
-            _c4dbgpf("found '? ' -- there's an implicit map in the seq node[{}]", m_state->node_id);
-            _start_seqimap();
-            _line_progressed(2);
-            _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(SSCL) && m_state->scalar == "");
-            addrem_flags(QMRK|RKEY, RVAL|SSCL);
-            return true;
+            _c4dbgp("mapjson[RVAL]: scanning double-quoted scalar");
+            ScannedScalar sc = _scan_scalar_dquot();
+            csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
+            m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
+            addrem_flags(RNXT, RVAL);
+            break;
         }
-        else if(_handle_types())
+        case '[':
         {
-            return true;
+            _c4dbgp("mapjson[RVAL]: start val seqjson");
+            addrem_flags(RNXT, RVAL);
+            m_evt_handler->begin_seq_val_flow();
+            _set_indentation(m_evt_handler->m_parent->indref);
+            addrem_flags(RSEQ|RVAL, RMAP|RNXT);
+            _line_progressed(1);
+            goto mapjson_finish;
         }
-        else if(_handle_val_anchors_and_refs())
+        case '{':
         {
-            return true;
+            _c4dbgp("mapjson[RVAL]: start val mapjson");
+            addrem_flags(RNXT, RVAL);
+            m_evt_handler->begin_map_val_flow();
+            _set_indentation(m_evt_handler->m_parent->indref);
+            addrem_flags(RKEY, RNXT);
+            _line_progressed(1);
+            // keep going in this function
+            break;
         }
-        else if(rem.begins_with(", "))
+        default:
         {
-            _c4dbgp("found ',' -- the value was null");
-            _append_val_null(rem.str - 1);
-            _line_progressed(2);
-            return true;
+            ScannedScalar sc;
+            if(_scan_scalar_map_json(&sc))
+            {
+                _c4dbgp("mapjson[RVAL]: plain scalar.");
+                csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
+                m_evt_handler->set_val_scalar_plain(maybe_filtered);
+                addrem_flags(RNXT, RVAL);
+            }
+            else
+            {
+                _c4err("parse error");
+            }
+            break;
         }
-        else if(rem.begins_with(','))
-        {
-            _c4dbgp("found ',' -- the value was null");
-            _append_val_null(rem.str - 1);
-            _line_progressed(1);
-            return true;
         }
-        else if(rem.begins_with('\t'))
+    }
+    else if(has_any(RKCL)) // read the key colon
+    {
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
+        const char first = rem.str[0];
+        _c4dbgpf("mapjson[RKCL]: '{}'", first);
+        if(first == ':')
         {
-            _skipchars('\t');
-            return true;
+            _c4dbgp("mapjson[RKCL]: found the colon");
+            addrem_flags(RVAL, RKCL);
+            _line_progressed(1);
         }
         else
         {
@@ -26860,4429 +37783,4098 @@ bool Parser::_handle_seq_flow()
     }
     else if(has_any(RNXT))
     {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL));
-        if(rem.begins_with(", "))
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
+        _c4dbgpf("mapjson[RNXT]: '{}'", rem.str[0]);
+        if(rem.begins_with(','))
         {
-            _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW));
-            _c4dbgp("seq: expect next val");
-            addrem_flags(RVAL, RNXT);
-            _line_progressed(2);
-            return true;
+            _c4dbgp("mapjson[RNXT]: expect next keyval");
+            m_evt_handler->add_sibling();
+            addrem_flags(RKEY, RNXT);
+            _line_progressed(1);
         }
-        else if(rem.begins_with(','))
+        else if(rem.begins_with('}'))
         {
-            _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW));
-            _c4dbgp("seq: expect next val");
-            addrem_flags(RVAL, RNXT);
+            _c4dbgp("mapjson[RNXT]: end!");
+            m_evt_handler->end_map();
             _line_progressed(1);
-            return true;
+            goto mapjson_finish;
         }
-        else if(rem == ':')
+        else
         {
-            _c4dbgpf("found ':' -- there's an implicit map in the seq node[{}]", m_state->node_id);
-            _start_seqimap();
-            _line_progressed(1);
-            return true;
+            _c4err("parse error");
         }
-        else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))
+    }
+
+ mapjson_again:
+    _c4dbgt("mapjson: go again", 0);
+    if(_finished_line())
+    {
+        if(C4_LIKELY(!_finished_file()))
         {
-            _c4dbgpf("found ': ' -- there's an implicit map in the seq node[{}]", m_state->node_id);
-            _start_seqimap();
-            _line_progressed(2);
-            return true;
+            _line_ended();
+            _scan_line();
+            _c4dbgnextline();
         }
         else
         {
-            _c4err("was expecting a comma");
+            _c4err("missing terminating }");
         }
     }
-    else
-    {
-        _c4err("internal error");
-    }
+    goto mapjson_start;
 
-    return true;
+ mapjson_finish:
+    _c4dbgp("mapjson: finish");
 }
 
+
 //-----------------------------------------------------------------------------
-bool Parser::_handle_seq_blck()
+
+template<class EventHandler>
+void ParseEngine<EventHandler>::_handle_seq_imap()
 {
-    _c4dbgpf("handle_seq_impl: node_id={} level={}", m_state->node_id, m_state->level);
-    csubstr rem = m_state->line_contents.rem;
+seqimap_start:
+    _c4dbgpf("handle2_seq_imap: node_id={} level={} indref={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
 
-    _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ));
-    _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY));
-    _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQIMAP));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT|QMRK|RKCL));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == has_all(RVAL) + has_all(RNXT) + has_all(QMRK) + has_all(RKCL));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 3);
 
-    if(rem.begins_with('#'))
-    {
-        _c4dbgp("it's a comment");
-        rem = _scan_comment();
-        return true;
-    }
+    _handle_flow_skip_whitespace();
+    csubstr rem = m_evt_handler->m_curr->line_contents.rem;
+    if(!rem.len)
+        goto seqimap_again;
 
-    if(has_any(RNXT))
+    if(has_any(RVAL))
     {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL));
-
-        if(_handle_indentation())
-            return true;
-
-        if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t")))
-        {
-            _c4dbgp("expect another val");
-            addrem_flags(RVAL, RNXT);
-            _line_progressed(2);
-            return true;
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
+        const char first = rem.str[0];
+        _c4dbgpf("seqimap[RVAL]: '{}'", _c4prc(first));
+        ScannedScalar sc;
+        if(first == '\'')
+        {
+            _c4dbgp("seqimap[RVAL]: scanning single-quoted scalar");
+            sc = _scan_scalar_squot();
+            csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
+            m_evt_handler->set_val_scalar_squoted(maybe_filtered);
+            m_evt_handler->end_map();
+            goto seqimap_finish;
+        }
+        else if(first == '"')
+        {
+            _c4dbgp("seqimap[RVAL]: scanning double-quoted scalar");
+            sc = _scan_scalar_dquot();
+            csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
+            m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
+            m_evt_handler->end_map();
+            goto seqimap_finish;
+        }
+        // block scalars (ie | and >) cannot appear in flow containers
+        else if(_scan_scalar_plain_map_flow(&sc))
+        {
+            _c4dbgp("seqimap[RVAL]: it's a scalar.");
+            csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
+            m_evt_handler->set_val_scalar_plain(maybe_filtered);
+            m_evt_handler->end_map();
+            goto seqimap_finish;
+        }
+        else if(first == '[')
+        {
+            _c4dbgp("seqimap[RVAL]: start child seqflow");
+            addrem_flags(RNXT, RVAL);
+            m_evt_handler->begin_seq_val_flow();
+            addrem_flags(RVAL, RNXT|RSEQIMAP);
+            _set_indentation(m_evt_handler->m_parent->indref);
+            _line_progressed(1);
+            goto seqimap_finish;
         }
-        else if(rem == '-')
+        else if(first == '{')
         {
-            _c4dbgp("expect another val");
-            addrem_flags(RVAL, RNXT);
+            _c4dbgp("seqimap[RVAL]: start child mapflow");
+            addrem_flags(RNXT, RVAL);
+            m_evt_handler->begin_map_val_flow();
+            addrem_flags(RMAP|RKEY, RSEQ|RVAL|RSEQIMAP|RNXT);
+            _set_indentation(m_evt_handler->m_parent->indref);
             _line_progressed(1);
-            return true;
+            goto seqimap_finish;
         }
-        else if(rem.begins_with_any(" \t"))
+        else if(first == ',' || first == ']')
         {
-            _RYML_CB_ASSERT(m_stack.m_callbacks,  ! _at_line_begin());
-            _skipchars(" \t");
-            return true;
+            _c4dbgp("seqimap[RVAL]: finish without val.");
+            m_evt_handler->set_val_scalar_plain({});
+            m_evt_handler->end_map();
+            goto seqimap_finish;
         }
-        else if(rem.begins_with("..."))
+        else if(first == '&')
         {
-            _c4dbgp("got stream end '...'");
-            _end_stream();
-            _line_progressed(3);
-            return true;
+            csubstr anchor = _scan_anchor();
+            _c4dbgp("seqimap[RVAL]: anchor!");
+            m_evt_handler->set_val_anchor(anchor);
         }
-        else if(rem.begins_with("---"))
+        else if(first == '*')
         {
-            _c4dbgp("got document start '---'");
-            _start_new_doc(rem);
-            return true;
+            csubstr ref = _scan_ref_seq();
+            _c4dbgp("seqimap[RVAL]: ref!");
+            m_evt_handler->set_val_ref(ref);
+            addrem_flags(RNXT, RVAL);
         }
         else
         {
             _c4err("parse error");
         }
     }
-    else if(has_any(RVAL))
+    else if(has_any(RNXT))
     {
-        // there can be empty values
-        if(_handle_indentation())
-            return true;
-
-        csubstr s;
-        bool is_quoted;
-        if(_scan_scalar(&s, &is_quoted)) // this also progresses the line
-        {
-            _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : "");
-
-            rem = m_state->line_contents.rem;
-            if(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(rem.begins_with_any(" \t"), rem.begins_with(' ')))
-            {
-                _c4dbgp("skipping whitespace...");
-                size_t skip = rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
-                if(skip == csubstr::npos)
-                    skip = rem.len; // maybe the line is just whitespace
-                _line_progressed(skip);
-                rem = rem.sub(skip);
-            }
-
-            _c4dbgpf("rem=[{}]~~~{}~~~", rem.len, rem);
-            if(!rem.begins_with('#') && (rem.ends_with(':') || rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))))
-            {
-                _c4dbgp("actually, the scalar is the first key of a map, and it opens a new scope");
-                if(m_key_anchor.empty())
-                    _move_val_anchor_to_key_anchor();
-                if(m_key_tag.empty())
-                    _move_val_tag_to_key_tag();
-                addrem_flags(RNXT, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT
-                _push_level();
-                _start_map();
-                _store_scalar(s, is_quoted);
-                if( ! _maybe_set_indentation_from_anchor_or_tag())
-                {
-                    _c4dbgpf("set indentation from scalar: {}", m_state->scalar_col);
-                    _set_indentation(m_state->scalar_col); // this is the column where the scalar starts
-                }
-                _move_key_tag2_to_key_tag();
-                addrem_flags(RVAL, RKEY);
-                _line_progressed(1);
-            }
-            else
-            {
-                _c4dbgp("appending val to current seq");
-                _append_val(s, is_quoted);
-                addrem_flags(RNXT, RVAL);
-            }
-            return true;
-        }
-        else if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t")))
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
+        const char first = rem.str[0];
+        _c4dbgpf("seqimap[RNXT]: '{}'", _c4prc(first));
+        if(first == ',' || first == ']')
         {
-            if(_rval_dash_start_or_continue_seq())
-                _line_progressed(2);
-            return true;
+            // we may get here because a map or a seq started and we
+            // return later
+            _c4dbgp("seqimap: done");
+            m_evt_handler->end_map();
+            goto seqimap_finish;
         }
-        else if(rem == '-')
+        else
         {
-            if(_rval_dash_start_or_continue_seq())
-                _line_progressed(1);
-            return true;
+            _c4err("parse error");
         }
-        else if(rem.begins_with('['))
-        {
-            _c4dbgp("val is a child seq, flow");
-            addrem_flags(RNXT, RVAL); // before _push_level!
-            _push_level(/*explicit flow*/true);
-            _start_seq();
-            add_flags(FLOW);
+    }
+    else if(has_any(QMRK))
+    {
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(QMRK));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
+        const char first = rem.str[0];
+        _c4dbgpf("seqimap[QMRK]: '{}'", _c4prc(first));
+        ScannedScalar sc;
+        if(first == '\'')
+        {
+            _c4dbgp("seqimap[QMRK]: scanning single-quoted scalar");
+            sc = _scan_scalar_squot();
+            csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
+            m_evt_handler->set_key_scalar_squoted(maybe_filtered);
+            addrem_flags(RKCL, QMRK);
+            goto seqimap_again;
+        }
+        else if(first == '"')
+        {
+            _c4dbgp("seqimap[QMRK]: scanning double-quoted scalar");
+            sc = _scan_scalar_dquot();
+            csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
+            m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
+            addrem_flags(RKCL, QMRK);
+            goto seqimap_again;
+        }
+        // block scalars (ie | and >) cannot appear in flow containers
+        else if(_scan_scalar_plain_map_flow(&sc))
+        {
+            _c4dbgp("seqimap[QMRK]: it's a scalar.");
+            csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
+            m_evt_handler->set_key_scalar_plain(maybe_filtered);
+            addrem_flags(RKCL, QMRK);
+            goto seqimap_again;
+        }
+        else if(first == '[')
+        {
+            _c4dbgp("seqimap[QMRK]: start child seqflow");
+            addrem_flags(RKCL, QMRK);
+            m_evt_handler->begin_seq_key_flow();
+            addrem_flags(RSEQ|RVAL, RKCL|RSEQIMAP);
+            _set_indentation(m_evt_handler->m_parent->indref);
             _line_progressed(1);
-            return true;
+            goto seqimap_finish;
         }
-        else if(rem.begins_with('{'))
+        else if(first == '{')
         {
-            _c4dbgp("val is a child map, flow");
-            addrem_flags(RNXT, RVAL); // before _push_level!
-            _push_level(/*explicit flow*/true);
-            _start_map();
-            addrem_flags(FLOW|RKEY, RVAL);
+            _c4dbgp("seqimap[QMRK]: start child mapflow");
+            addrem_flags(RKCL, QMRK);
+            m_evt_handler->begin_map_key_flow();
+            addrem_flags(RMAP|RKEY, RSEQ|RKCL|RSEQIMAP);
+            _set_indentation(m_evt_handler->m_parent->indref);
             _line_progressed(1);
-            return true;
-        }
-        else if(rem.begins_with("? "))
-        {
-            _c4dbgp("val is a child map + this key is complex");
-            addrem_flags(RNXT, RVAL); // before _push_level!
-            _push_level();
-            _start_map();
-            addrem_flags(QMRK|RKEY, RVAL);
-            _save_indentation();
-            _line_progressed(2);
-            return true;
+            goto seqimap_finish;
         }
-        else if(rem.begins_with(' '))
+        else if(first == ',' || first == ']')
         {
-            csubstr spc = rem.left_of(rem.first_not_of(' '));
-            if(_at_line_begin())
-            {
-                _c4dbgpf("skipping value indentation: {} spaces", spc.len);
-                _line_progressed(spc.len);
-                return true;
-            }
-            else
-            {
-                _c4dbgpf("skipping {} spaces", spc.len);
-                _line_progressed(spc.len);
-                return true;
-            }
+            _c4dbgp("seqimap[QMRK]: finish without key.");
+            m_evt_handler->set_key_scalar_plain({});
+            m_evt_handler->set_val_scalar_plain({});
+            m_evt_handler->end_map();
+            goto seqimap_finish;
         }
-        else if(_handle_types())
+        else if(first == '&')
         {
-            return true;
+            csubstr anchor = _scan_anchor();
+            _c4dbgp("seqimap[QMRK]: anchor!");
+            m_evt_handler->set_key_anchor(anchor);
         }
-        else if(_handle_val_anchors_and_refs())
+        else if(first == '*')
         {
-            return true;
-        }
-        /* pathological case:
-         * - &key : val
-         * - &key :
-         * - : val
-         */
-        else if((!has_all(SSCL)) &&
-                (rem.begins_with(": ") || rem.left_of(rem.find("#")).trimr("\t") == ":"))
-        {
-            if(!m_val_anchor.empty() || !m_val_tag.empty())
-            {
-                _c4dbgp("val is a child map + this key is empty, with anchors or tags");
-                addrem_flags(RNXT, RVAL); // before _push_level!
-                _move_val_tag_to_key_tag();
-                _move_val_anchor_to_key_anchor();
-                _push_level();
-                _start_map();
-                _store_scalar_null(rem.str);
-                addrem_flags(RVAL, RKEY);
-                RYML_CHECK(_maybe_set_indentation_from_anchor_or_tag()); // one of them must exist
-                _line_progressed(rem.begins_with(": ") ? 2u : 1u);
-                return true;
-            }
-            else
-            {
-                _c4dbgp("val is a child map + this key is empty, no anchors or tags");
-                addrem_flags(RNXT, RVAL); // before _push_level!
-                size_t ind = m_state->indref;
-                _push_level();
-                _start_map();
-                _store_scalar_null(rem.str);
-                addrem_flags(RVAL, RKEY);
-                _c4dbgpf("set indentation from map anchor: {}", ind + 2);
-                _set_indentation(ind + 2); // this is the column where the map starts
-                _line_progressed(rem.begins_with(": ") ? 2u : 1u);
-                return true;
-            }
+            csubstr ref = _scan_ref_seq();
+            _c4dbgp("seqimap[QMRK]: ref!");
+            m_evt_handler->set_key_ref(ref);
+            addrem_flags(RKCL, QMRK);
         }
         else
         {
             _c4err("parse error");
         }
     }
-
-    return false;
-}
-
-//-----------------------------------------------------------------------------
-
-bool Parser::_rval_dash_start_or_continue_seq()
-{
-    size_t ind = m_state->line_contents.current_col();
-    _RYML_CB_ASSERT(m_stack.m_callbacks, ind >= m_state->indref);
-    size_t delta_ind = ind - m_state->indref;
-    if( ! delta_ind)
-    {
-        _c4dbgp("prev val was empty");
-        addrem_flags(RNXT, RVAL);
-        _append_val_null(&m_state->line_contents.full[ind]);
-        return false;
-    }
-    _c4dbgp("val is a nested seq, indented");
-    addrem_flags(RNXT, RVAL); // before _push_level!
-    _push_level();
-    _start_seq();
-    _save_indentation();
-    return true;
-}
-
-//-----------------------------------------------------------------------------
-bool Parser::_handle_map_flow()
-{
-    // explicit flow, ie, inside {}, separated by commas
-    _c4dbgpf("handle_map_flow: node_id={}  level={}", m_state->node_id, m_state->level);
-    csubstr rem = m_state->line_contents.rem;
-
-    _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RMAP|FLOW));
-
-    if(rem.begins_with(' '))
-    {
-        // with explicit flow, indentation does not matter
-        _c4dbgp("starts with spaces");
-        _skipchars(' ');
-        return true;
-    }
-    _RYML_WITH_TAB_TOKENS(else if(rem.begins_with('\t'))
-    {
-        // with explicit flow, indentation does not matter
-        _c4dbgp("starts with tabs");
-        _skipchars('\t');
-        return true;
-    })
-    else if(rem.begins_with('#'))
-    {
-        _c4dbgp("it's a comment");
-        rem = _scan_comment(); // also progresses the line
-        return true;
-    }
-    else if(rem.begins_with('}'))
+    else if(has_any(RKCL))
     {
-        _c4dbgp("end the map");
-        if(has_all(SSCL))
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKCL));
+        const char first = rem.str[0];
+        _c4dbgpf("seqimap[RKCL]: '{}'", _c4prc(first));
+        if(first == ':')
+        {
+            _c4dbgp("seqimap[RKCL]: found ':'");
+            addrem_flags(RVAL, RKCL);
+            _line_progressed(1);
+            goto seqimap_again;
+        }
+        else if(first == ',' || first == ']')
         {
-            _c4dbgp("the last val was null");
-            _append_key_val_null(rem.str - 1);
-            rem_flags(RVAL);
+            _c4dbgp("seqimap[RKCL]: found ','. finish without val");
+            m_evt_handler->set_val_scalar_plain({});
+            m_evt_handler->end_map();
+            goto seqimap_finish;
         }
-        _pop_level();
-        _line_progressed(1);
-        if(has_all(RSEQIMAP))
+        else
         {
-            _c4dbgp("stopping implicitly nested 1x map");
-            _stop_seqimap();
-            _pop_level();
+            _c4err("parse error");
         }
-        return true;
     }
 
-    if(has_any(RNXT))
+ seqimap_again:
+    _c4dbgt("seqimap: go again", 0);
+    if(_finished_line())
     {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY));
-        _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL));
-        _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RSEQIMAP));
-
-        if(rem.begins_with(", "))
+        if(C4_LIKELY(!_finished_file()))
         {
-            _c4dbgp("seq: expect next keyval");
-            addrem_flags(RKEY, RNXT);
-            _line_progressed(2);
-            return true;
-        }
-        else if(rem.begins_with(','))
-        {
-            _c4dbgp("seq: expect next keyval");
-            addrem_flags(RKEY, RNXT);
-            _line_progressed(1);
-            return true;
+            _line_ended();
+            _scan_line();
+            _c4dbgnextline();
         }
         else
         {
             _c4err("parse error");
         }
     }
-    else if(has_any(RKEY))
-    {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT));
-        _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL));
+    goto seqimap_start;
 
-        bool is_quoted;
-        if(has_none(SSCL) && _scan_scalar(&rem, &is_quoted))
-        {
-            _c4dbgp("it's a scalar");
-            _store_scalar(rem, is_quoted);
-            rem = m_state->line_contents.rem;
-            csubstr trimmed = rem.triml(" \t");
-            if(trimmed.len && (trimmed.begins_with(": ") || trimmed.begins_with_any(":,}") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))))
-            {
-                _RYML_CB_ASSERT(m_stack.m_callbacks, trimmed.str >= rem.str);
-                size_t num = static_cast<size_t>(trimmed.str - rem.str);
-                _c4dbgpf("trimming {} whitespace after the scalar: '{}' --> '{}'", num, rem, rem.sub(num));
-                rem = rem.sub(num);
-                _line_progressed(num);
-            }
-        }
+ seqimap_finish:
+    _c4dbgp("seqimap: finish");
+}
+
+
+//-----------------------------------------------------------------------------
 
-        if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))
+template<class EventHandler>
+void ParseEngine<EventHandler>::_handle_seq_flow()
+{
+seqflow_start:
+    _c4dbgpf("handle2_seq_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
+
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RVAL) != has_all(RNXT));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indref != npos);
+
+    _handle_flow_skip_whitespace();
+    csubstr rem = m_evt_handler->m_curr->line_contents.rem;
+    if(!rem.len)
+        goto seqflow_again;
+
+    if(has_any(RVAL))
+    {
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
+        const char first = rem.str[0];
+        ScannedScalar sc;
+        if(first == '\'')
         {
-            _c4dbgp("wait for val");
-            addrem_flags(RVAL, RKEY|QMRK);
-            _line_progressed(2);
-            if(!has_all(SSCL))
-            {
-                _c4dbgp("no key was found, defaulting to empty key ''");
-                _store_scalar_null(rem.str);
-            }
-            return true;
+            _c4dbgp("seqflow[RVAL]: scanning single-quoted scalar");
+            sc = _scan_scalar_squot();
+            csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
+            m_evt_handler->set_val_scalar_squoted(maybe_filtered);
+            addrem_flags(RNXT, RVAL);
         }
-        else if(rem == ':')
+        else if(first == '"')
         {
-            _c4dbgp("wait for val");
-            addrem_flags(RVAL, RKEY|QMRK);
-            _line_progressed(1);
-            if(!has_all(SSCL))
-            {
-                _c4dbgp("no key was found, defaulting to empty key ''");
-                _store_scalar_null(rem.str);
-            }
-            return true;
+            _c4dbgp("seqflow[RVAL]: scanning double-quoted scalar");
+            sc = _scan_scalar_dquot();
+            csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
+            m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
+            addrem_flags(RNXT, RVAL);
         }
-        else if(rem.begins_with('?'))
+        // block scalars (ie | and >) cannot appear in flow containers
+        else if(_scan_scalar_plain_seq_flow(&sc))
         {
-            _c4dbgp("complex key");
-            add_flags(QMRK);
-            _line_progressed(1);
-            return true;
+            _c4dbgp("seqflow[RVAL]: it's a scalar.");
+            csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
+            m_evt_handler->set_val_scalar_plain(maybe_filtered);
+            addrem_flags(RNXT, RVAL);
         }
-        else if(rem.begins_with(','))
+        else if(first == '[')
         {
-            _c4dbgp("prev scalar was a key with null value");
-            _append_key_val_null(rem.str - 1);
+            _c4dbgp("seqflow[RVAL]: start child seqflow");
+            addrem_flags(RNXT, RVAL);
+            m_evt_handler->begin_seq_val_flow();
+            _set_indentation(m_evt_handler->m_parent->indref);
+            addrem_flags(RVAL, RNXT);
             _line_progressed(1);
-            return true;
         }
-        else if(rem.begins_with('}'))
+        else if(first == '{')
         {
-            _c4dbgp("map terminates after a key...");
-            _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL));
-            _c4dbgp("the last val was null");
-            _append_key_val_null(rem.str - 1);
-            rem_flags(RVAL);
-            if(has_all(RSEQIMAP))
-            {
-                _c4dbgp("stopping implicitly nested 1x map");
-                _stop_seqimap();
-                _pop_level();
-            }
-            _pop_level();
+            _c4dbgp("seqflow[RVAL]: start child mapflow");
+            addrem_flags(RNXT, RVAL);
+            m_evt_handler->begin_map_val_flow();
+            _set_indentation(m_evt_handler->m_parent->indref);
+            addrem_flags(RMAP|RKEY, RSEQ|RVAL|RNXT);
             _line_progressed(1);
-            return true;
+            goto seqflow_finish;
         }
-        else if(_handle_types())
+        else if(first == ']') // this happens on a trailing comma like ", ]"
         {
-            return true;
-        }
-        else if(_handle_key_anchors_and_refs())
-        {
-            return true;
+            _c4dbgp("seqflow[RVAL]: end!");
+            _line_progressed(1);
+            m_evt_handler->end_seq();
+            goto seqflow_finish;
         }
-        else if(rem == "")
+        else if(first == '*')
         {
-            return true;
+            csubstr ref = _scan_ref_seq();
+            _c4dbgpf("seqflow[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
+            m_evt_handler->set_val_ref(ref);
+            addrem_flags(RNXT, RVAL);
         }
-        else
+        else if(first == '&')
         {
-            size_t pos = rem.first_not_of(" \t");
-            if(pos == csubstr::npos)
-               pos = 0;
-            rem = rem.sub(pos);
-            if(rem.begins_with(':'))
-            {
-                _c4dbgp("wait for val");
-                addrem_flags(RVAL, RKEY|QMRK);
-                _line_progressed(pos + 1);
-                if(!has_all(SSCL))
-                {
-                    _c4dbgp("no key was found, defaulting to empty key ''");
-                    _store_scalar_null(rem.str);
-                }
-                return true;
-            }
-            else if(rem.begins_with('#'))
-            {
-                _c4dbgp("it's a comment");
-                _line_progressed(pos);
-                rem = _scan_comment(); // also progresses the line
-                return true;
-            }
-            else
+            csubstr anchor = _scan_anchor();
+            _c4dbgpf("seqflow[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
+            m_evt_handler->set_val_anchor(anchor);
+            if(_maybe_scan_following_comma())
             {
-                _c4err("parse error");
+                _c4dbgp("seqflow[RVAL]: empty scalar!");
+                m_evt_handler->set_val_scalar_plain({});
+                m_evt_handler->add_sibling();
             }
         }
-    }
-    else if(has_any(RVAL))
-    {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT));
-        _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY));
-        _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL));
-        bool is_quoted;
-        if(_scan_scalar(&rem, &is_quoted))
+        else if(first == '!')
         {
-            _c4dbgp("it's a scalar");
-            addrem_flags(RNXT, RVAL|RKEY);
-            _append_key_val(rem, is_quoted);
-            if(has_all(RSEQIMAP))
+            csubstr tag = _scan_tag();
+            _c4dbgpf("seqflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
+            _check_tag(tag);
+            m_evt_handler->set_val_tag(tag);
+            if(_maybe_scan_following_comma())
             {
-                _c4dbgp("stopping implicitly nested 1x map");
-                _stop_seqimap();
-                _pop_level();
+                _c4dbgp("seqflow[RVAL]: empty scalar!");
+                m_evt_handler->set_val_scalar_plain({});
+                m_evt_handler->add_sibling();
             }
-            return true;
         }
-        else if(rem.begins_with('['))
+        else if(first == ':')
         {
-            _c4dbgp("val is a child seq");
-            addrem_flags(RNXT, RVAL|RKEY); // before _push_level!
-            _push_level(/*explicit flow*/true);
-            _move_scalar_from_top();
-            _start_seq();
-            add_flags(FLOW);
+            _c4dbgpf("seqflow[RVAL]: actually seqimap at node[{}], with empty key", m_evt_handler->m_curr->node_id);
+            addrem_flags(RNXT, RVAL);
+            m_evt_handler->begin_map_val_flow();
+            _set_indentation(m_evt_handler->m_parent->indref);
+            m_evt_handler->set_key_scalar_plain({});
+            addrem_flags(RSEQIMAP|RVAL, RSEQ|RNXT);
             _line_progressed(1);
-            return true;
+            goto seqflow_finish;
         }
-        else if(rem.begins_with('{'))
+        else if(first == '?')
         {
-            _c4dbgp("val is a child map");
-            addrem_flags(RNXT, RVAL|RKEY); // before _push_level!
-            _push_level(/*explicit flow*/true);
-            _move_scalar_from_top();
-            _start_map();
-            addrem_flags(FLOW|RKEY, RNXT|RVAL);
+            _c4dbgp("seqflow[RVAL]: start child mapflow, explicit key");
+            addrem_flags(RNXT, RVAL);
+            m_was_inside_qmrk = true;
+            m_evt_handler->begin_map_val_flow();
+            _set_indentation(m_evt_handler->m_parent->indref);
+            addrem_flags(RSEQIMAP|QMRK, RSEQ|RNXT);
             _line_progressed(1);
-            return true;
+            _maybe_skip_whitespace_tokens();
+            goto seqflow_finish;
         }
-        else if(_handle_types())
+        else
         {
-            return true;
+            _c4err("parse error");
         }
-        else if(_handle_val_anchors_and_refs())
+    }
+    else // RNXT
+    {
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
+        const char first = rem.str[0];
+        if(first == ',')
         {
-            return true;
+            _c4dbgp("seqflow[RNXT]: expect next val");
+            addrem_flags(RVAL, RNXT);
+            m_evt_handler->add_sibling();
+            _line_progressed(1);
         }
-        else if(rem.begins_with(','))
+        else if(first == ']')
         {
-            _c4dbgp("appending empty val");
-            _append_key_val_null(rem.str - 1);
-            addrem_flags(RKEY, RVAL);
+            _c4dbgp("seqflow[RNXT]: end!");
+            m_evt_handler->end_seq();
             _line_progressed(1);
-            if(has_any(RSEQIMAP))
-            {
-                _c4dbgp("stopping implicitly nested 1x map");
-                _stop_seqimap();
-                _pop_level();
-            }
-            return true;
+            goto seqflow_finish;
         }
-        else if(has_any(RSEQIMAP) && rem.begins_with(']'))
+        else if(first == ':')
         {
-            _c4dbgp("stopping implicitly nested 1x map");
-            if(has_any(SSCL))
-            {
-                _append_key_val_null(rem.str - 1);
-            }
-            _stop_seqimap();
-            _pop_level();
-            return true;
+            _c4dbgpf("seqflow[RNXT]: actually seqimap at node[{}]", m_evt_handler->m_curr->node_id);
+            m_evt_handler->actually_val_is_first_key_of_new_map_flow();
+            _set_indentation(m_evt_handler->m_parent->indref);
+            _line_progressed(1);
+            addrem_flags(RSEQIMAP|RVAL, RNXT);
+            goto seqflow_finish;
         }
         else
         {
             _c4err("parse error");
         }
     }
-    else
+
+ seqflow_again:
+    _c4dbgt("seqflow: go again", 0);
+    if(_finished_line())
     {
-        _c4err("internal error");
+        if(C4_LIKELY(!_finished_file()))
+        {
+            _line_ended();
+            _scan_line();
+            _c4dbgnextline();
+        }
+        else
+        {
+            _c4err("missing terminating ]");
+        }
     }
+    goto seqflow_start;
 
-    return false;
+ seqflow_finish:
+    _c4dbgp("seqflow: finish");
 }
 
-//-----------------------------------------------------------------------------
-bool Parser::_handle_map_blck()
-{
-    _c4dbgpf("handle_map_impl: node_id={}  level={}", m_state->node_id, m_state->level);
-    csubstr rem = m_state->line_contents.rem;
 
-    _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RMAP));
-    _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW));
+//-----------------------------------------------------------------------------
 
-    if(rem.begins_with('#'))
-    {
-        _c4dbgp("it's a comment");
-        rem = _scan_comment();
-        return true;
-    }
+template<class EventHandler>
+void ParseEngine<EventHandler>::_handle_map_flow()
+{
+mapflow_start:
+    _c4dbgpf("handle2_map_flow: node_id={} level={} indentation={}", m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
 
-    if(has_any(RNXT))
-    {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY));
-        _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL));
-        // actually, we don't need RNXT in indent-based maps.
-        addrem_flags(RKEY, RNXT);
-    }
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(FLOW));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)));
 
-    if(_handle_indentation())
-        return true;
+    _handle_flow_skip_whitespace();
+    csubstr rem = m_evt_handler->m_curr->line_contents.rem;
+    if(!rem.len)
+        goto mapflow_again;
 
     if(has_any(RKEY))
     {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT));
-        _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL));
-
-        _c4dbgp("read scalar?");
-        bool is_quoted;
-        if(_scan_scalar(&rem, &is_quoted)) // this also progresses the line
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
+        const char first = rem.str[0];
+        _c4dbgpf("mapflow[RKEY]: '{}'", first);
+        ScannedScalar sc;
+        if(first == '\'')
         {
-            _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : "");
-            if(has_all(QMRK|SSCL))
-            {
-                _c4dbgpf("current key is QMRK; SSCL is set. so take store scalar='{}' as key and add an empty val", m_state->scalar);
-                _append_key_val_null(rem.str - 1);
-            }
-            _store_scalar(rem, is_quoted);
-            if(has_all(QMRK|RSET))
-            {
-                _c4dbgp("it's a complex key, so use null value '~'");
-                _append_key_val_null(rem.str);
-            }
-            rem = m_state->line_contents.rem;
-
-            if(rem.begins_with(':'))
-            {
-                _c4dbgp("wait for val");
-                addrem_flags(RVAL, RKEY|QMRK);
-                _line_progressed(1);
-                rem = m_state->line_contents.rem;
-                if(rem.begins_with_any(" \t"))
-                {
-                    _RYML_CB_ASSERT(m_stack.m_callbacks,  ! _at_line_begin());
-                    rem = rem.left_of(rem.first_not_of(" \t"));
-                    _c4dbgpf("skip {} spaces/tabs", rem.len);
-                    _line_progressed(rem.len);
-                }
-            }
-            return true;
+            _c4dbgp("mapflow[RKEY]: scanning single-quoted scalar");
+            sc = _scan_scalar_squot();
+            csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
+            m_evt_handler->set_key_scalar_squoted(maybe_filtered);
+            addrem_flags(RKCL, RKEY|QMRK);
         }
-        else if(rem.begins_with_any(" \t"))
+        else if(first == '"')
         {
-            size_t pos = rem.first_not_of(" \t");
-            if(pos == npos)
-                pos = rem.len;
-            _c4dbgpf("skip {} spaces/tabs", pos);
-            _line_progressed(pos);
-            return true;
+            _c4dbgp("mapflow[RKEY]: scanning double-quoted scalar");
+            sc = _scan_scalar_dquot();
+            csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
+            m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
+            addrem_flags(RKCL, RKEY|QMRK);
         }
-        else if(rem == '?' || rem.begins_with("? "))
+        // block scalars (ie | and >) cannot appear in flow containers
+        else if(_scan_scalar_plain_map_flow(&sc))
         {
-            _c4dbgp("it's a complex key");
-            _line_progressed(rem.begins_with("? ") ? 2u : 1u);
-            if(has_any(SSCL))
-                _append_key_val_null(rem.str - 1);
-            add_flags(QMRK);
-            return true;
+            _c4dbgp("mapflow[RKEY]: plain scalar");
+            csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
+            m_evt_handler->set_key_scalar_plain(maybe_filtered);
+            addrem_flags(RKCL, RKEY|QMRK);
+        }
+        else if(first == '?')
+        {
+            _c4dbgp("mapflow[RKEY]: explicit key");
+            _line_progressed(1);
+            addrem_flags(QMRK, RKEY);
+            _maybe_skip_whitespace_tokens();
         }
-        else if(has_all(QMRK) && rem.begins_with(':'))
+        else if(first == ':')
         {
-            _c4dbgp("complex key finished");
-            if(!has_any(SSCL))
-                _store_scalar_null(rem.str);
+            _c4dbgp("mapflow[RKEY]: setting empty key");
+            m_evt_handler->set_key_scalar_plain({});
             addrem_flags(RVAL, RKEY|QMRK);
             _line_progressed(1);
-            rem = m_state->line_contents.rem;
-            if(rem.begins_with(' '))
-            {
-                _RYML_CB_ASSERT(m_stack.m_callbacks,  ! _at_line_begin());
-                _skipchars(' ');
-            }
-            return true;
+            _maybe_skip_whitespace_tokens();
         }
-        else if(rem == ':' || rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))
+        else if(first == '}') // this happens on a trailing comma like ", }"
         {
-            _c4dbgp("key finished");
-            if(!has_all(SSCL))
-            {
-                _c4dbgp("key was empty...");
-                _store_scalar_null(rem.str);
-                rem_flags(QMRK);
-            }
-            addrem_flags(RVAL, RKEY);
-            _line_progressed(rem == ':' ? 1 : 2);
-            return true;
+            _c4dbgp("mapflow[RKEY]: end!");
+            m_evt_handler->end_map();
+            _line_progressed(1);
+            goto mapflow_finish;
         }
-        else if(rem.begins_with("..."))
+        else if(first == '&')
         {
-            _c4dbgp("end current document");
-            _end_stream();
-            _line_progressed(3);
-            return true;
+            csubstr anchor = _scan_anchor();
+            _c4dbgpf("mapflow[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
+            m_evt_handler->set_key_anchor(anchor);
         }
-        else if(rem.begins_with("---"))
+        else if(first == '*')
         {
-            _c4dbgp("start new document '---'");
-            _start_new_doc(rem);
-            return true;
+            csubstr ref = _scan_ref_map();
+            _c4dbgpf("mapflow[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
+            m_evt_handler->set_key_ref(ref);
+            addrem_flags(RKCL, RKEY);
         }
-        else if(_handle_types())
+        else if(first == '[')
         {
-            return true;
+            // RYML's tree cannot store container keys, but that's
+            // handled inside the tree sink. Other sink types may be
+            // able to handle it.
+            _c4dbgp("mapflow[RKEY]: start child seqflow (!)");
+            addrem_flags(RKCL, RKEY);
+            m_evt_handler->begin_seq_key_flow();
+            addrem_flags(RSEQ|RVAL, RMAP|RKCL);
+            _set_indentation(m_evt_handler->m_parent->indref);
+            _line_progressed(1);
+            goto mapflow_finish;
+        }
+        else if(first == '{')
+        {
+            // RYML's tree cannot store container keys, but that's
+            // handled inside the tree sink. Other sink types may be
+            // able to handle it.
+            _c4dbgp("mapflow[RKEY]: start child mapflow (!)");
+            addrem_flags(RKCL, RKEY);
+            m_evt_handler->begin_map_key_flow();
+            addrem_flags(RKEY, RVAL|RKCL);
+            _set_indentation(m_evt_handler->m_parent->indref);
+            _line_progressed(1);
+            // keep going in this function
         }
-        else if(_handle_key_anchors_and_refs())
+        else if(first == '!')
         {
-            return true;
+            csubstr tag = _scan_tag();
+            _c4dbgpf("mapflow[RKEY]: tag! [{}]~~~{}~~~", tag.len, tag);
+            _check_tag(tag);
+            m_evt_handler->set_key_tag(tag);
         }
         else
         {
             _c4err("parse error");
         }
     }
-    else if(has_any(RVAL))
+    else if(has_any(RKCL)) // read the key colon
     {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT));
-        _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY));
-
-        csubstr s;
-        bool is_quoted;
-        if(_scan_scalar(&s, &is_quoted)) // this also progresses the line
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
+        const char first = rem.str[0];
+        _c4dbgpf("mapflow[RKCL]: '{}'", first);
+        if(first == ':')
         {
-            _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : "");
-
-            rem = m_state->line_contents.rem;
-
-            if(rem.begins_with(": "))
-            {
-                _c4dbgp("actually, the scalar is the first key of a map");
-                addrem_flags(RKEY, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT
-                _push_level();
-                _move_scalar_from_top();
-                _move_val_anchor_to_key_anchor();
-                _start_map();
-                _save_indentation(m_state->scalar_col);
-                addrem_flags(RVAL, RKEY);
-                _line_progressed(2);
-            }
-            else if(rem.begins_with(':'))
-            {
-                _c4dbgp("actually, the scalar is the first key of a map, and it opens a new scope");
-                addrem_flags(RKEY, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT
-                _push_level();
-                _move_scalar_from_top();
-                _move_val_anchor_to_key_anchor();
-                _start_map();
-                _save_indentation(/*behind*/s.len);
-                addrem_flags(RVAL, RKEY);
-                _line_progressed(1);
-            }
-            else
-            {
-                _c4dbgp("appending keyval to current map");
-                _append_key_val(s, is_quoted);
-                addrem_flags(RKEY, RVAL);
-            }
-            return true;
+            _c4dbgp("mapflow[RKCL]: found the colon");
+            addrem_flags(RVAL, RKCL);
+            _line_progressed(1);
         }
-        else if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t")))
+        else if(first == '}')
         {
-            _c4dbgp("val is a nested seq, indented");
-            addrem_flags(RKEY, RVAL); // before _push_level!
-            _push_level();
-            _move_scalar_from_top();
-            _start_seq();
-            _save_indentation();
-            _line_progressed(2);
-            return true;
+            _c4dbgp("mapflow[RKCL]: end with missing val!");
+            addrem_flags(RVAL, RKCL);
+            m_evt_handler->set_val_scalar_plain({});
+            m_evt_handler->end_map();
+            _line_progressed(1);
+            goto mapflow_finish;
         }
-        else if(rem == '-')
+        else if(first == ',')
         {
-            _c4dbgp("maybe a seq. start unknown, indented");
-            _start_unk();
-            _save_indentation();
+            _c4dbgp("mapflow[RKCL]: got comma. val is missing");
+            m_evt_handler->set_val_scalar_plain({});
+            m_evt_handler->add_sibling();
+            addrem_flags(RKEY, RKCL);
             _line_progressed(1);
-            return true;
         }
-        else if(rem.begins_with('['))
+        else
+        {
+            _c4err("parse error");
+        }
+    }
+    else if(has_any(RVAL))
+    {
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
+        const char first = rem.str[0];
+        _c4dbgpf("mapflow[RVAL]: '{}'", first);
+        ScannedScalar sc;
+        if(first == '\'')
+        {
+            _c4dbgp("mapflow[RVAL]: scanning single-quoted scalar");
+            sc = _scan_scalar_squot();
+            csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
+            m_evt_handler->set_val_scalar_squoted(maybe_filtered);
+            addrem_flags(RNXT, RVAL);
+        }
+        else if(first == '"')
+        {
+            _c4dbgp("mapflow[RVAL]: scanning double-quoted scalar");
+            sc = _scan_scalar_dquot();
+            csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
+            m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
+            addrem_flags(RNXT, RVAL);
+        }
+        // block scalars (ie | and >) cannot appear in flow containers
+        else if(_scan_scalar_plain_map_flow(&sc))
+        {
+            _c4dbgp("mapflow[RVAL]: plain scalar.");
+            csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
+            m_evt_handler->set_val_scalar_plain(maybe_filtered);
+            addrem_flags(RNXT, RVAL);
+        }
+        else if(first == '[')
         {
-            _c4dbgp("val is a child seq, flow");
-            addrem_flags(RKEY, RVAL); // before _push_level!
-            _push_level(/*explicit flow*/true);
-            _move_scalar_from_top();
-            _start_seq();
-            add_flags(FLOW);
+            _c4dbgp("mapflow[RVAL]: start val seqflow");
+            addrem_flags(RNXT, RVAL);
+            m_evt_handler->begin_seq_val_flow();
+            _set_indentation(m_evt_handler->m_parent->indref);
+            addrem_flags(RSEQ|RVAL, RMAP|RNXT);
             _line_progressed(1);
-            return true;
+            goto mapflow_finish;
         }
-        else if(rem.begins_with('{'))
+        else if(first == '{')
         {
-            _c4dbgp("val is a child map, flow");
-            addrem_flags(RKEY, RVAL); // before _push_level!
-            _push_level(/*explicit flow*/true);
-            _move_scalar_from_top();
-            _start_map();
-            addrem_flags(FLOW|RKEY, RVAL);
+            _c4dbgp("mapflow[RVAL]: start val mapflow");
+            addrem_flags(RNXT, RVAL);
+            m_evt_handler->begin_map_val_flow();
+            _set_indentation(m_evt_handler->m_parent->indref);
+            addrem_flags(RKEY, RNXT);
             _line_progressed(1);
-            return true;
+            // keep going in this function
         }
-        else if(rem.begins_with(' '))
+        else if(first == '}')
         {
-            csubstr spc = rem.left_of(rem.first_not_of(' '));
-            if(_at_line_begin())
-            {
-                _c4dbgpf("skipping value indentation: {} spaces", spc.len);
-                _line_progressed(spc.len);
-                return true;
-            }
-            else
-            {
-                _c4dbgpf("skipping {} spaces", spc.len);
-                _line_progressed(spc.len);
-                return true;
-            }
+            _c4dbgp("mapflow[RVAL]: end!");
+            m_evt_handler->set_val_scalar_plain({});
+            m_evt_handler->end_map();
+            _line_progressed(1);
+            goto mapflow_finish;
         }
-        else if(_handle_types())
+        else if(first == '*')
         {
-            return true;
+            csubstr ref = _scan_ref_map();
+            _c4dbgpf("mapflow[RVAL]: key ref! [{}]~~~{}~~~", ref.len, ref);
+            m_evt_handler->set_val_ref(ref);
+            addrem_flags(RNXT, RVAL);
         }
-        else if(_handle_val_anchors_and_refs())
+        else if(first == '&')
         {
-            return true;
+            csubstr anchor = _scan_anchor();
+            _c4dbgpf("mapflow[RVAL]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
+            m_evt_handler->set_val_anchor(anchor);
         }
-        else if(rem.begins_with("--- ") || rem == "---" || rem.begins_with("---\t"))
+        else if(first == '!')
         {
-            _start_new_doc(rem);
-            return true;
+            csubstr tag = _scan_tag();
+            _c4dbgpf("mapflow[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
+            _check_tag(tag);
+            m_evt_handler->set_val_tag(tag);
         }
         else
         {
             _c4err("parse error");
         }
     }
-    else
-    {
-        _c4err("internal error");
-    }
-
-    return false;
-}
-
-
-//-----------------------------------------------------------------------------
-bool Parser::_handle_top()
-{
-    _c4dbgp("handle_top");
-    csubstr rem = m_state->line_contents.rem;
-
-    if(rem.begins_with('#'))
-    {
-        _c4dbgp("a comment line");
-        _scan_comment();
-        return true;
-    }
-
-    csubstr trimmed = rem.triml(' ');
-
-    if(trimmed.begins_with('%'))
-    {
-        _handle_directive(trimmed);
-        _line_progressed(rem.len);
-        return true;
-    }
-    else if(trimmed.begins_with("--- ") || trimmed == "---" || trimmed.begins_with("---\t"))
+    else if(has_any(RNXT))
     {
-        _start_new_doc(rem);
-        if(trimmed.len < rem.len)
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
+        _c4dbgpf("mapflow[RNXT]: '{}'", rem.str[0]);
+        if(rem.begins_with(','))
         {
-            _line_progressed(rem.len - trimmed.len);
-            _save_indentation();
+            _c4dbgp("mapflow[RNXT]: expect next keyval");
+            m_evt_handler->add_sibling();
+            addrem_flags(RKEY, RNXT);
+            _line_progressed(1);
         }
-        return true;
-    }
-    else if(trimmed.begins_with("..."))
-    {
-        _c4dbgp("end current document");
-        _end_stream();
-        if(trimmed.len < rem.len)
+        else if(rem.begins_with('}'))
         {
-            _line_progressed(rem.len - trimmed.len);
+            _c4dbgp("mapflow[RNXT]: end!");
+            m_evt_handler->end_map();
+            _line_progressed(1);
+            goto mapflow_finish;
         }
-        _line_progressed(3);
-        return true;
-    }
-    else
-    {
-        _c4err("parse error");
-    }
-
-    return false;
-}
-
-
-//-----------------------------------------------------------------------------
-
-bool Parser::_handle_key_anchors_and_refs()
-{
-    _RYML_CB_ASSERT(m_stack.m_callbacks, !has_any(RVAL));
-    const csubstr rem = m_state->line_contents.rem;
-    if(rem.begins_with('&'))
-    {
-        _c4dbgp("found a key anchor!!!");
-        if(has_all(QMRK|SSCL))
+        else
         {
-            _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY));
-            _c4dbgp("there is a stored key, so this anchor is for the next element");
-            _append_key_val_null(rem.str - 1);
-            rem_flags(QMRK);
-            return true;
+            _c4err("parse error");
         }
-        csubstr anchor = rem.left_of(rem.first_of(' '));
-        _line_progressed(anchor.len);
-        anchor = anchor.sub(1); // skip the first character
-        _move_key_anchor_to_val_anchor();
-        _c4dbgpf("key anchor value: '{}'", anchor);
-        m_key_anchor = anchor;
-        m_key_anchor_indentation = m_state->line_contents.current_col(rem);
-        return true;
-    }
-    else if(C4_UNLIKELY(rem.begins_with('*')))
-    {
-        _c4err("not implemented - this should have been catched elsewhere");
-        C4_NEVER_REACH();
-        return false;
     }
-    return false;
-}
-
-bool Parser::_handle_val_anchors_and_refs()
-{
-    _RYML_CB_ASSERT(m_stack.m_callbacks, !has_any(RKEY));
-    const csubstr rem = m_state->line_contents.rem;
-    if(rem.begins_with('&'))
+    else if(has_any(QMRK))
     {
-        csubstr anchor = rem.left_of(rem.first_of(' '));
-        _line_progressed(anchor.len);
-        anchor = anchor.sub(1); // skip the first character
-        _c4dbgpf("val: found an anchor: '{}', indentation={}!!!", anchor, m_state->line_contents.current_col(rem));
-        if(m_val_anchor.empty())
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
+        const char first = rem.str[0];
+        _c4dbgpf("mapflow[QMRK]: '{}'", first);
+        ScannedScalar sc;
+        if(first == '\'')
         {
-            _c4dbgpf("save val anchor: '{}'", anchor);
-            m_val_anchor = anchor;
-            m_val_anchor_indentation = m_state->line_contents.current_col(rem);
+            _c4dbgp("mapflow[QMRK]: scanning single-quoted scalar");
+            sc = _scan_scalar_squot();
+            csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
+            m_evt_handler->set_key_scalar_squoted(maybe_filtered);
+            addrem_flags(RKCL, QMRK);
         }
-        else
+        else if(first == '"')
         {
-            _c4dbgpf("there is a pending val anchor '{}'", m_val_anchor);
-            if(m_tree->is_seq(m_state->node_id))
-            {
-                if(m_tree->has_children(m_state->node_id))
-                {
-                    _c4dbgpf("current node={} is a seq, has {} children", m_state->node_id, m_tree->num_children(m_state->node_id));
-                    _c4dbgpf("... so take the new one as a key anchor '{}'", anchor);
-                    m_key_anchor = anchor;
-                    m_key_anchor_indentation = m_state->line_contents.current_col(rem);
-                }
-                else
-                {
-                    _c4dbgpf("current node={} is a seq, has no children", m_state->node_id);
-                    if(m_tree->has_val_anchor(m_state->node_id))
-                    {
-                        _c4dbgpf("... node={} already has val anchor: '{}'", m_state->node_id, m_tree->val_anchor(m_state->node_id));
-                        _c4dbgpf("... so take the new one as a key anchor '{}'", anchor);
-                        m_key_anchor = anchor;
-                        m_key_anchor_indentation = m_state->line_contents.current_col(rem);
-                    }
-                    else
-                    {
-                        _c4dbgpf("... so set pending val anchor: '{}' on current node {}", m_val_anchor, m_state->node_id);
-                        m_tree->set_val_anchor(m_state->node_id, m_val_anchor);
-                        m_val_anchor = anchor;
-                        m_val_anchor_indentation = m_state->line_contents.current_col(rem);
-                    }
-                }
-            }
+            _c4dbgp("mapflow[QMRK]: scanning double-quoted scalar");
+            sc = _scan_scalar_dquot();
+            csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
+            m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
+            addrem_flags(RKCL, QMRK);
         }
-        return true;
-    }
-    else if(C4_UNLIKELY(rem.begins_with('*')))
-    {
-        _c4err("not implemented - this should have been catched elsewhere");
-        C4_NEVER_REACH();
-        return false;
-    }
-    return false;
-}
-
-void Parser::_move_key_anchor_to_val_anchor()
-{
-    if(m_key_anchor.empty())
-        return;
-    _c4dbgpf("move current key anchor to val slot: key='{}' -> val='{}'", m_key_anchor, m_val_anchor);
-    if(!m_val_anchor.empty())
-        _c4err("triple-pending anchor");
-    m_val_anchor = m_key_anchor;
-    m_val_anchor_indentation = m_key_anchor_indentation;
-    m_key_anchor = {};
-    m_key_anchor_indentation = {};
-}
-
-void Parser::_move_val_anchor_to_key_anchor()
-{
-    if(m_val_anchor.empty())
-        return;
-    if(!_token_is_from_this_line(m_val_anchor))
-        return;
-    _c4dbgpf("move current val anchor to key slot: key='{}' <- val='{}'", m_key_anchor, m_val_anchor);
-    if(!m_key_anchor.empty())
-        _c4err("triple-pending anchor");
-    m_key_anchor = m_val_anchor;
-    m_key_anchor_indentation = m_val_anchor_indentation;
-    m_val_anchor = {};
-    m_val_anchor_indentation = {};
-}
-
-void Parser::_move_key_tag_to_val_tag()
-{
-    if(m_key_tag.empty())
-        return;
-    _c4dbgpf("move key tag to val tag: key='{}' -> val='{}'", m_key_tag, m_val_tag);
-    m_val_tag = m_key_tag;
-    m_val_tag_indentation = m_key_tag_indentation;
-    m_key_tag.clear();
-    m_key_tag_indentation = 0;
-}
-
-void Parser::_move_val_tag_to_key_tag()
-{
-    if(m_val_tag.empty())
-        return;
-    if(!_token_is_from_this_line(m_val_tag))
-        return;
-    _c4dbgpf("move val tag to key tag: key='{}' <- val='{}'", m_key_tag, m_val_tag);
-    m_key_tag = m_val_tag;
-    m_key_tag_indentation = m_val_tag_indentation;
-    m_val_tag.clear();
-    m_val_tag_indentation = 0;
-}
-
-void Parser::_move_key_tag2_to_key_tag()
-{
-    if(m_key_tag2.empty())
-        return;
-    _c4dbgpf("move key tag2 to key tag: key='{}' <- key2='{}'", m_key_tag, m_key_tag2);
-    m_key_tag = m_key_tag2;
-    m_key_tag_indentation = m_key_tag2_indentation;
-    m_key_tag2.clear();
-    m_key_tag2_indentation = 0;
-}
-
-
-//-----------------------------------------------------------------------------
-
-bool Parser::_handle_types()
-{
-    csubstr rem = m_state->line_contents.rem.triml(' ');
-    csubstr t;
-
-    if(rem.begins_with("!!"))
-    {
-        _c4dbgp("begins with '!!'");
-        t = rem.left_of(rem.first_of(" ,"));
-        _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 2);
-        //t = t.sub(2);
-        if(t == "!!set")
-            add_flags(RSET);
-    }
-    else if(rem.begins_with("!<"))
-    {
-        _c4dbgp("begins with '!<'");
-        t = rem.left_of(rem.first_of('>'), true);
-        _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 2);
-        //t = t.sub(2, t.len-1);
-    }
-    else if(rem.begins_with("!h!"))
-    {
-        _c4dbgp("begins with '!h!'");
-        t = rem.left_of(rem.first_of(' '));
-        _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 3);
-        //t = t.sub(3);
-    }
-    else if(rem.begins_with('!'))
-    {
-        _c4dbgp("begins with '!'");
-        t = rem.left_of(rem.first_of(' '));
-        _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 1);
-        //t = t.sub(1);
-    }
-
-    if(t.empty())
-        return false;
-
-    if(has_all(QMRK|SSCL))
-    {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY));
-        _c4dbgp("there is a stored key, so this tag is for the next element");
-        _append_key_val_null(rem.str - 1);
-        rem_flags(QMRK);
-    }
-
-    #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
-    const char *tag_beginning = rem.str;
-    #endif
-    size_t tag_indentation = m_state->line_contents.current_col(t);
-    _c4dbgpf("there was a tag: '{}', indentation={}", t, tag_indentation);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, t.end() > m_state->line_contents.rem.begin());
-    _line_progressed(static_cast<size_t>(t.end() - m_state->line_contents.rem.begin()));
-    {
-        size_t pos = m_state->line_contents.rem.first_not_of(" \t");
-        if(pos != csubstr::npos)
-            _line_progressed(pos);
-    }
-
-    if(has_all(RMAP|RKEY))
-    {
-        _c4dbgpf("saving map key tag '{}'", t);
-        _RYML_CB_ASSERT(m_stack.m_callbacks, m_key_tag.empty());
-        m_key_tag = t;
-        m_key_tag_indentation = tag_indentation;
-    }
-    else if(has_all(RMAP|RVAL))
-    {
-        /* foo: !!str
-         * !!str : bar  */
-        rem = m_state->line_contents.rem;
-        rem = rem.left_of(rem.find("#"));
-        rem = rem.trimr(" \t");
-        _c4dbgpf("rem='{}'", rem);
-        #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
-        if(rem == ':' || rem.begins_with(": "))
+        // block scalars (ie | and >) cannot appear in flow containers
+        else if(_scan_scalar_plain_map_flow(&sc))
         {
-            _c4dbgp("the last val was null, and this is a tag from a null key");
-            _append_key_val_null(tag_beginning - 1);
-            _store_scalar_null(rem.str - 1);
-            // do not change the flag to key, it is ~
-            _RYML_CB_ASSERT(m_stack.m_callbacks, rem.begin() > m_state->line_contents.rem.begin());
-            size_t token_len = rem == ':' ? 1 : 2;
-            _line_progressed(static_cast<size_t>(token_len + rem.begin() - m_state->line_contents.rem.begin()));
+            _c4dbgp("mapflow[QMRK]: plain scalar");
+            csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);
+            m_evt_handler->set_key_scalar_plain(maybe_filtered);
+            addrem_flags(RKCL, QMRK);
         }
-        #endif
-        _c4dbgpf("saving map val tag '{}'", t);
-        _RYML_CB_ASSERT(m_stack.m_callbacks, m_val_tag.empty());
-        m_val_tag = t;
-        m_val_tag_indentation = tag_indentation;
-    }
-    else if(has_all(RSEQ|RVAL) || has_all(RTOP|RUNK|NDOC))
-    {
-        if(m_val_tag.empty())
+        else if(first == ':')
         {
-            _c4dbgpf("saving seq/doc val tag '{}'", t);
-            m_val_tag = t;
-            m_val_tag_indentation = tag_indentation;
+            _c4dbgp("mapflow[QMRK]: setting empty key");
+            m_evt_handler->set_key_scalar_plain({});
+            addrem_flags(RVAL, QMRK);
+            _line_progressed(1);
+            _maybe_skip_whitespace_tokens();
         }
-        else
+        else if(first == '}') // this happens on a trailing comma like ", }"
         {
-            _c4dbgpf("saving seq/doc key tag '{}'", t);
-            m_key_tag = t;
-            m_key_tag_indentation = tag_indentation;
+            _c4dbgp("mapflow[QMRK]: end!");
+            m_evt_handler->set_key_scalar_plain({});
+            m_evt_handler->set_val_scalar_plain({});
+            m_evt_handler->end_map();
+            _line_progressed(1);
+            goto mapflow_finish;
         }
-    }
-    else if(has_all(RTOP|RUNK) || has_any(RUNK))
-    {
-        rem = m_state->line_contents.rem;
-        rem = rem.left_of(rem.find("#"));
-        rem = rem.trimr(" \t");
-        if(rem.empty())
+        else if(first == '&')
         {
-            _c4dbgpf("saving val tag '{}'", t);
-            _RYML_CB_ASSERT(m_stack.m_callbacks, m_val_tag.empty());
-            m_val_tag = t;
-            m_val_tag_indentation = tag_indentation;
+            csubstr anchor = _scan_anchor();
+            _c4dbgpf("mapflow[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
+            m_evt_handler->set_key_anchor(anchor);
         }
-        else
+        else if(first == '*')
         {
-            _c4dbgpf("saving key tag '{}'", t);
-            if(m_key_tag.empty())
-            {
-                m_key_tag = t;
-                m_key_tag_indentation = tag_indentation;
-            }
-            else
-            {
-                /* handle this case:
-                 * !!str foo: !!map
-                 *   !!int 1: !!float 20.0
-                 *   !!int 3: !!float 40.0
-                 *
-                 * (m_key_tag would be !!str and m_key_tag2 would be !!int)
-                 */
-                m_key_tag2 = t;
-                m_key_tag2_indentation = tag_indentation;
-            }
+            csubstr ref = _scan_ref_map();
+            _c4dbgpf("mapflow[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
+            m_evt_handler->set_key_ref(ref);
+            addrem_flags(RKCL, QMRK);
+        }
+        else if(first == '[')
+        {
+            // RYML's tree cannot store container keys, but that's
+            // handled inside the tree sink. Other sink types may be
+            // able to handle it.
+            _c4dbgp("mapflow[QMRK]: start child seqflow (!)");
+            addrem_flags(RKCL, QMRK);
+            m_evt_handler->begin_seq_key_flow();
+            addrem_flags(RSEQ|RVAL, RMAP|RKCL);
+            _set_indentation(m_evt_handler->m_parent->indref);
+            _line_progressed(1);
+            goto mapflow_finish;
+        }
+        else if(first == '{')
+        {
+            // RYML's tree cannot store container keys, but that's
+            // handled inside the tree sink. Other sink types may be
+            // able to handle it.
+            _c4dbgp("mapflow[QMRK]: start child mapflow (!)");
+            addrem_flags(RKCL, QMRK);
+            m_evt_handler->begin_map_key_flow();
+            _set_indentation(m_evt_handler->m_parent->indref);
+            addrem_flags(RKEY, RKCL);
+            _line_progressed(1);
+            // keep going in this function
+        }
+        else if(first == '!')
+        {
+            csubstr tag = _scan_tag();
+            _c4dbgpf("mapflow[QMRK]: tag! [{}]~~~{}~~~", tag.len, tag);
+            _check_tag(tag);
+            m_evt_handler->set_key_tag(tag);
         }
-    }
-    else
-    {
-        _c4err("internal error");
-    }
-
-    if(m_val_tag.not_empty())
-    {
-        YamlTag_e tag = to_tag(t);
-        if(tag == TAG_STR)
+        else
         {
-            _c4dbgpf("tag '{}' is a str-type tag", t);
-            if(has_all(RTOP|RUNK|NDOC))
-            {
-                _c4dbgpf("docval. slurping the string. pos={}", m_state->pos.offset);
-                csubstr scalar = _slurp_doc_scalar();
-                _c4dbgpf("docval. after slurp: {}, at node {}: '{}'", m_state->pos.offset, m_state->node_id, scalar);
-                m_tree->to_val(m_state->node_id, scalar, DOC);
-                _c4dbgpf("docval. val tag {} -> {}", m_val_tag, normalize_tag(m_val_tag));
-                m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag));
-                m_val_tag.clear();
-                if(!m_val_anchor.empty())
-                {
-                    _c4dbgpf("setting val anchor[{}]='{}'", m_state->node_id, m_val_anchor);
-                    m_tree->set_val_anchor(m_state->node_id, m_val_anchor);
-                    m_val_anchor.clear();
-                }
-                _end_stream();
-            }
+            _c4err("parse error");
         }
     }
-    return true;
-}
-
-//-----------------------------------------------------------------------------
-csubstr Parser::_slurp_doc_scalar()
-{
-    csubstr s = m_state->line_contents.rem;
-    size_t pos = m_state->pos.offset;
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.full.find("---") != csubstr::npos);
-    _c4dbgpf("slurp 0 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset));
-    if(s.len == 0)
-    {
-        _line_ended();
-        _scan_line();
-        s = m_state->line_contents.rem;
-        pos = m_state->pos.offset;
-    }
 
-    size_t skipws = s.first_not_of(" \t");
-    _c4dbgpf("slurp 1 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset));
-    if(skipws != npos)
+ mapflow_again:
+    _c4dbgt("mapflow: go again", 0);
+    if(_finished_line())
     {
-        _line_progressed(skipws);
-        s = m_state->line_contents.rem;
-        pos = m_state->pos.offset;
-        _c4dbgpf("slurp 2 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset));
-    }
-
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_val_anchor.empty());
-    _handle_val_anchors_and_refs();
-    if(!m_val_anchor.empty())
-    {
-        s = m_state->line_contents.rem;
-        skipws = s.first_not_of(" \t");
-        if(skipws != npos)
+        if(C4_LIKELY(!_finished_file()))
         {
-            _line_progressed(skipws);
+            _line_ended();
+            _scan_line();
+            _c4dbgnextline();
+        }
+        else
+        {
+            _c4err("missing terminating }");
         }
-        s = m_state->line_contents.rem;
-        pos = m_state->pos.offset;
-        _c4dbgpf("slurp 3 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset));
-    }
-
-    if(s.begins_with('\''))
-    {
-        m_state->scalar_col = m_state->line_contents.current_col(s);
-        return _scan_squot_scalar();
-    }
-    else if(s.begins_with('"'))
-    {
-        m_state->scalar_col = m_state->line_contents.current_col(s);
-        return _scan_dquot_scalar();
-    }
-    else if(s.begins_with('|') || s.begins_with('>'))
-    {
-        return _scan_block();
     }
+    goto mapflow_start;
 
-    _c4dbgpf("slurp 4 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset));
-
-    m_state->scalar_col = m_state->line_contents.current_col(s);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() + pos);
-    _line_progressed(static_cast<size_t>(s.end() - (m_buf.begin() + pos)));
+ mapflow_finish:
+    _c4dbgp("mapflow: finish");
+}
 
-    _c4dbgpf("slurp 5 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset));
 
-    if(_at_line_end())
-    {
-        _c4dbgpf("at line end. curr='{}'", s);
-        s = _extend_scanned_scalar(s);
-    }
+//-----------------------------------------------------------------------------
 
-    _c4dbgpf("scalar was '{}'", s);
+template<class EventHandler>
+void ParseEngine<EventHandler>::_handle_seq_block()
+{
+seqblck_start:
+    _c4dbgpf("handle2_seq_block: seq_id={} node_id={} level={} indent={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
 
-    return s;
-}
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RSEQ));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(BLCK));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RVAL|RNXT));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RVAL) + has_any(RNXT)));
 
-//-----------------------------------------------------------------------------
-bool Parser::_scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted)
-{
-    csubstr s = m_state->line_contents.rem;
-    if(s.len == 0)
-        return false;
-    s = s.trim(" \t");
-    if(s.len == 0)
-        return false;
+    _maybe_skip_comment();
+    csubstr rem = m_evt_handler->m_curr->line_contents.rem;
+    if(!rem.len)
+        goto seqblck_again;
 
-    if(s.begins_with('\''))
-    {
-        _c4dbgp("got a ': scanning single-quoted scalar");
-        m_state->scalar_col = m_state->line_contents.current_col(s);
-        *scalar = _scan_squot_scalar();
-        *quoted = true;
-        return true;
-    }
-    else if(s.begins_with('"'))
-    {
-        _c4dbgp("got a \": scanning double-quoted scalar");
-        m_state->scalar_col = m_state->line_contents.current_col(s);
-        *scalar = _scan_dquot_scalar();
-        *quoted = true;
-        return true;
-    }
-    else if(s.begins_with('|') || s.begins_with('>'))
-    {
-        *scalar = _scan_block();
-        *quoted = true;
-        return true;
-    }
-    else if(has_any(RTOP) && _is_doc_sep(s))
-    {
-        return false;
-    }
-    else if(has_any(RSEQ))
+    if(has_any(RVAL))
     {
-        _RYML_CB_ASSERT(m_stack.m_callbacks,  ! has_all(RKEY));
-        if(has_all(RVAL))
+        _c4dbgpf("seqblck[RVAL]: col={}", m_evt_handler->m_curr->pos.col);
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
+        if(m_evt_handler->m_curr->at_line_beginning())
         {
-            _c4dbgp("RSEQ|RVAL");
-            if( ! _is_scalar_next__rseq_rval(s))
-                return false;
-            _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t"))
-                return false;
-            )
-            if(s.ends_with(':'))
+            _c4dbgpf("seqblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
+            if(m_evt_handler->m_curr->indentation_ge())
             {
-                --s.len;
+                _c4dbgpf("seqblck[RVAL]: skip {} from indentation", m_evt_handler->m_curr->line_contents.indentation);
+                _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
+                rem = m_evt_handler->m_curr->line_contents.rem;
+                if(!rem.len)
+                    goto seqblck_again;
             }
-            else
+            else if(m_evt_handler->m_curr->indentation_lt())
             {
-                auto first = s.first_of_any(": " _RYML_WITH_TAB_TOKENS( , ":\t"), " #");
-                if(first)
-                    s.len = first.pos;
+                _c4dbgp("seqblck[RVAL]: smaller indentation!");
+                _handle_indentation_pop_from_block_seq();
+                goto seqblck_finish;
             }
-            if(has_all(FLOW))
+            else if(m_evt_handler->m_curr->line_contents.indentation == npos)
             {
-                _c4dbgp("RSEQ|RVAL|EXPL");
-                s = s.left_of(s.first_of(",]"));
+                _c4dbgp("seqblck[RVAL]: empty line!");
+                _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
+                goto seqblck_again;
             }
-            s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
         }
+        #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
         else
         {
-            _c4err("internal error");
-        }
-    }
-    else if(has_any(RMAP))
-    {
-        if( ! _is_scalar_next__rmap(s))
-            return false;
-        size_t colon_space = s.find(": ");
-        if(colon_space == npos)
-        {
-            _RYML_WITH_OR_WITHOUT_TAB_TOKENS(
-                // with tab tokens
-                colon_space = s.find(":\t");
-                if(colon_space == npos)
+            // accomodate annotation on the previous line. eg:
+            // - &elm
+            //   foo            # <-- on this line
+            // - &elm
+            //   &foo foo: bar  # <-- on this line
+            if(rem.str[0] == ' ')
+            {
+                if(_handle_indentation_from_annotations())
                 {
-                    _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0);
-                    colon_space = s.find(':');
-                    if(colon_space != s.len-1)
-                        colon_space = npos;
+                    _c4dbgp("seqblck[RVAL]: annotations!");
+                    rem = m_evt_handler->m_curr->line_contents.rem;
+                    if(!rem.len)
+                        goto seqblck_again;
                 }
-                ,
-                // without tab tokens
-                colon_space = s.find(':');
-                _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0);
-                if(colon_space != s.len-1)
-                    colon_space = npos;
-            )
+            }
         }
-
-        if(has_all(RKEY))
+        #endif
+        _RYML_CB_ASSERT(callbacks(), rem.len);
+        _c4dbgpf("seqblck[RVAL]: '{}' node_id={}", rem.str[0], m_evt_handler->m_curr->node_id);
+        const char first = rem.str[0];
+        const size_t startline = m_evt_handler->m_curr->pos.line;
+        // warning: the gcc optimizer on x86 builds is brittle with
+        // this function:
+        const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
+        ScannedScalar sc;
+        if(first == '\'')
+        {
+            _c4dbgp("seqblck[RVAL]: single-quoted scalar");
+            sc = _scan_scalar_squot();
+            if(!_maybe_scan_following_colon())
+            {
+                _c4dbgp("seqblck[RVAL]: set as val");
+                _handle_annotations_before_blck_val_scalar();
+                csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
+                m_evt_handler->set_val_scalar_squoted(maybe_filtered);
+                addrem_flags(RNXT, RVAL);
+            }
+            else
+            {
+                _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
+                addrem_flags(RNXT, RVAL);
+                _handle_annotations_before_start_mapblck(startline);
+                m_evt_handler->begin_map_val_block();
+                _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
+                m_evt_handler->set_key_scalar_squoted(maybe_filtered);
+                addrem_flags(RMAP|RVAL, RSEQ|RNXT);
+                _maybe_skip_whitespace_tokens();
+                goto seqblck_finish;
+            }
+        }
+        else if(first == '"')
         {
-            _RYML_CB_ASSERT(m_stack.m_callbacks, !s.begins_with(' '));
-            if(has_any(QMRK))
+            _c4dbgp("seqblck[RVAL]: double-quoted scalar");
+            sc = _scan_scalar_dquot();
+            if(!_maybe_scan_following_colon())
             {
-                _c4dbgp("RMAP|RKEY|CPLX");
-                _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP));
-                if(s.begins_with("? ") || s == '?')
-                    return false;
-                s = s.left_of(colon_space);
-                s = s.left_of(s.first_of("#"));
-                if(has_any(FLOW))
-                    s = s.left_of(s.first_of(':'));
-                s = s.trimr(" \t");
-                if(s.begins_with("---"))
-                    return false;
-                else if(s.begins_with("..."))
-                    return false;
+                _c4dbgp("seqblck[RVAL]: set as val");
+                _handle_annotations_before_blck_val_scalar();
+                csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
+                m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
+                addrem_flags(RNXT, RVAL);
             }
             else
             {
-                _c4dbgp("RMAP|RKEY");
-                _RYML_CB_CHECK(m_stack.m_callbacks, !s.begins_with('{'));
-                if(s.begins_with("? ") || s == '?')
-                    return false;
-                s = s.left_of(colon_space);
-                s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
-                if(has_any(FLOW))
+                _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
+                addrem_flags(RNXT, RVAL);
+                _handle_annotations_before_start_mapblck(startline);
+                m_evt_handler->begin_map_val_block();
+                _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
+                m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
+                addrem_flags(RMAP|RVAL, RSEQ|RNXT);
+                _maybe_skip_whitespace_tokens();
+                goto seqblck_finish;
+            }
+        }
+        // block scalars can only appear as keys when in QMRK scope
+        // (ie, after ? tokens), so no need to scan following colon in
+        // here.
+        else if(first == '|')
+        {
+            _c4dbgp("seqblck[RVAL]: block-literal scalar");
+            ScannedBlock sb;
+            _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
+            _handle_annotations_before_blck_val_scalar();
+            csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
+            m_evt_handler->set_val_scalar_literal(maybe_filtered);
+            addrem_flags(RNXT, RVAL);
+        }
+        else if(first == '>')
+        {
+            _c4dbgp("seqblck[RVAL]: block-folded scalar");
+            ScannedBlock sb;
+            _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
+            _handle_annotations_before_blck_val_scalar();
+            csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
+            m_evt_handler->set_val_scalar_folded(maybe_filtered);
+            addrem_flags(RNXT, RVAL);
+        }
+        else if(_scan_scalar_plain_seq_blck(&sc))
+        {
+            _c4dbgp("seqblck[RVAL]: plain scalar.");
+            if(!_maybe_scan_following_colon())
+            {
+                _c4dbgp("seqblck[RVAL]: set as val");
+                _handle_annotations_before_blck_val_scalar();
+                csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);  // VAL!
+                m_evt_handler->set_val_scalar_plain(maybe_filtered);
+                addrem_flags(RNXT, RVAL);
+            }
+            else
+            {
+                if(startindent > m_evt_handler->m_curr->indref)
                 {
-                    _c4dbgpf("RMAP|RKEY|EXPL: '{}'", s);
-                    s = s.left_of(s.first_of(",}"));
-                    if(s.ends_with(':'))
-                        s = s.offs(0, 1);
+                    _c4dbgp("seqblck[RVAL]: start mapblck, set scalar as key");
+                    addrem_flags(RNXT, RVAL);
+                    _handle_annotations_before_start_mapblck(startline);
+                    m_evt_handler->begin_map_val_block();
+                    _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                    csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);  // KEY!
+                    m_evt_handler->set_key_scalar_plain(maybe_filtered);
+                    addrem_flags(RMAP|RVAL, RSEQ|RNXT);
+                    _maybe_skip_whitespace_tokens();
+                    goto seqblck_finish;
                 }
-                else if(s.begins_with("---"))
+                else if(m_evt_handler->m_parent && m_evt_handler->m_parent->indref == startindent && has_any(RMAP|BLCK, m_evt_handler->m_parent))
                 {
-                    return false;
+                    _c4dbgp("seqblck[RVAL]: empty val + end indentless seq + set key");
+                    m_evt_handler->set_val_scalar_plain({});
+                    m_evt_handler->end_seq();
+                    m_evt_handler->add_sibling();
+                    csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref);  // KEY!
+                    m_evt_handler->set_key_scalar_plain(maybe_filtered);
+                    addrem_flags(RVAL, RNXT|RKEY);
+                    _maybe_skip_whitespace_tokens();
+                    goto seqblck_finish;
                 }
-                else if(s.begins_with("..."))
+                else
                 {
-                    return false;
+                    _c4err("parse error");
                 }
             }
         }
-        else if(has_all(RVAL))
+        else if(first == '[')
         {
-            _c4dbgp("RMAP|RVAL");
-            _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(QMRK));
-            if( ! _is_scalar_next__rmap_val(s))
-                return false;
-            _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t"))
-                return false;
-            )
-            s = s.left_of(s.find(" #")); // is there a comment?
-            s = s.left_of(s.find("\t#")); // is there a comment?
-            if(has_any(FLOW))
+            _c4dbgp("seqblck[RVAL]: start child seqflow");
+            addrem_flags(RNXT, RVAL);
+            m_evt_handler->begin_seq_val_flow();
+            addrem_flags(FLOW|RVAL, BLCK|RNXT);
+            _line_progressed(1);
+            _set_indentation(m_evt_handler->m_parent->indref + 1u);
+            goto seqblck_finish;
+        }
+        else if(first == '{')
+        {
+            _c4dbgp("seqblck[RVAL]: start child mapflow");
+            addrem_flags(RNXT, RVAL);
+            _handle_annotations_before_blck_val_scalar();
+            m_evt_handler->begin_map_val_flow();
+            addrem_flags(RMAP|RKEY|FLOW, BLCK|RSEQ|RVAL|RNXT);
+            _line_progressed(1);
+            _set_indentation(m_evt_handler->m_parent->indref + 1u);
+            goto seqblck_finish;
+        }
+        else if(first == '-')
+        {
+            if(startindent == m_evt_handler->m_curr->indref)
             {
-                _c4dbgp("RMAP|RVAL|EXPL");
-                if(has_none(RSEQIMAP))
-                    s = s.left_of(s.first_of(",}"));
-                else
-                    s = s.left_of(s.first_of(",]"));
+                _c4dbgp("seqblck[RVAL]: prev val was empty");
+                _handle_annotations_before_blck_val_scalar();
+                m_evt_handler->set_val_scalar_plain({});
+                // keep in RVAL, but for the next sibling
+                m_evt_handler->add_sibling();
+            }
+            else
+            {
+                _c4dbgp("seqblck[RVAL]: start child seqblck");
+                _RYML_CB_ASSERT(this->callbacks(), startindent > m_evt_handler->m_curr->indref);
+                addrem_flags(RNXT, RVAL);
+                _handle_annotations_before_blck_val_scalar();
+                m_evt_handler->begin_seq_val_block();
+                addrem_flags(RVAL, RNXT);
+                _save_indentation();
+                // keep going on inside this function
+            }
+            _line_progressed(1);
+            _maybe_skip_whitespace_tokens();
+        }
+        else if(first == ':')
+        {
+            _c4dbgp("seqblck[RVAL]: start child mapblck with empty key");
+            addrem_flags(RNXT, RVAL);
+            _handle_annotations_before_start_mapblck(startline);
+            m_evt_handler->begin_map_val_block();
+            _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+            m_evt_handler->set_key_scalar_plain({});
+            addrem_flags(RMAP|RVAL, RSEQ|RNXT);
+            _line_progressed(1);
+            _maybe_skip_whitespace_tokens();
+            goto seqblck_finish;
+        }
+        else if(first == '&')
+        {
+            const csubstr anchor = _scan_anchor();
+            _c4dbgpf("seqblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
+            // we need to buffer the anchors, as there may be two
+            // consecutive anchors in here
+            _add_annotation(&m_pending_anchors, anchor, startindent, startline);
+        }
+        else if(first == '*')
+        {
+            csubstr ref = _scan_ref_seq();
+            _c4dbgpf("seqblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
+            if(!_maybe_scan_following_colon())
+            {
+                _c4dbgp("seqblck[RVAL]: set ref as val!");
+                _handle_annotations_before_blck_val_scalar();
+                m_evt_handler->set_val_ref(ref);
+                addrem_flags(RNXT, RVAL);
+            }
+            else
+            {
+                _c4dbgp("seqblck[RVAL]: ref is key of map");
+                addrem_flags(RNXT, RVAL);
+                _handle_annotations_before_start_mapblck(startline);
+                m_evt_handler->begin_map_val_block();
+                _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                m_evt_handler->set_key_ref(ref);
+                addrem_flags(RMAP|RVAL, RSEQ|RNXT);
+                _set_indentation(startindent);
+                _maybe_skip_whitespace_tokens();
+                goto seqblck_finish;
             }
-            s = s.trim(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' '));
-            if(s.begins_with("---"))
-                return false;
-            else if(s.begins_with("..."))
-                return false;
+        }
+        else if(first == '!')
+        {
+            csubstr tag = _scan_tag();
+            _c4dbgpf("seqblck[RVAL]: val tag! [{}]~~~{}~~~", tag.len, tag);
+            // we need to buffer the tags, as there may be two
+            // consecutive tags in here
+            _add_annotation(&m_pending_tags, tag, startindent, startline);
+        }
+        else if(first == '?')
+        {
+            _c4dbgp("seqblck[RVAL]: start child mapblck, explicit key");
+            addrem_flags(RNXT, RVAL);
+            m_was_inside_qmrk = true;
+            m_evt_handler->begin_map_val_block();
+            addrem_flags(RMAP|QMRK, RSEQ|RNXT);
+            _save_indentation();
+            _line_progressed(1);
+            _maybe_skip_whitespace_tokens();
+            goto seqblck_finish;
         }
         else
         {
             _c4err("parse error");
         }
     }
-    else if(has_all(RUNK))
+    else // RNXT
     {
-        _c4dbgpf("RUNK '[{}]~~~{}~~~", s.len, s);
-        if( ! _is_scalar_next__runk(s))
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
+        //
+        // handle indentation
+        //
+        _c4dbgpf("seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
+        if(C4_UNLIKELY(!_at_line_begin()))
+            _c4err("parse error");
+        if(m_evt_handler->m_curr->indentation_ge())
         {
-            _c4dbgp("RUNK: no scalar next");
-            return false;
+            _c4dbgpf("seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
+            _line_progressed(m_evt_handler->m_curr->indref);
+            _maybe_skip_whitespace_tokens();
+            rem = m_evt_handler->m_curr->line_contents.rem;
+            if(!rem.len)
+                goto seqblck_again;
         }
-        size_t pos = s.find(" #");
-        if(pos != npos)
-            s = s.left_of(pos);
-        pos = s.find(": ");
-        if(pos != npos)
-            s = s.left_of(pos);
-        else if(s.ends_with(':'))
-            s = s.left_of(s.len-1);
-        _RYML_WITH_TAB_TOKENS(
-        else if((pos = s.find(":\t")) != npos) // TABS
-            s = s.left_of(pos);
-        )
-        else
-            s = s.left_of(s.first_of(','));
-        s = s.trim(" \t");
-        _c4dbgpf("RUNK: scalar='{}'", s);
-    }
-    else
-    {
-        _c4err("not implemented");
-    }
-
-    if(s.empty())
-        return false;
-
-    m_state->scalar_col = m_state->line_contents.current_col(s);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str);
-    _line_progressed(static_cast<size_t>(s.str - m_state->line_contents.rem.str) + s.len);
-
-    if(_at_line_end() && s != '~')
-    {
-        _c4dbgpf("at line end. curr='{}'", s);
-        s = _extend_scanned_scalar(s);
-    }
-
-    _c4dbgpf("scalar was '{}'", s);
-
-    *scalar = s;
-    *quoted = false;
-    return true;
-}
-
-//-----------------------------------------------------------------------------
-
-csubstr Parser::_extend_scanned_scalar(csubstr s)
-{
-    if(has_all(RMAP|RKEY|QMRK))
-    {
-        size_t scalar_indentation = has_any(FLOW) ? 0 : m_state->scalar_col;
-        _c4dbgpf("extend_scalar: explicit key! indref={} scalar_indentation={} scalar_col={}", m_state->indref, scalar_indentation, m_state->scalar_col);
-        csubstr n = _scan_to_next_nonempty_line(scalar_indentation);
-        if(!n.empty())
+        else if(m_evt_handler->m_curr->indentation_lt())
         {
-            substr full = _scan_complex_key(s, n).trimr(" \t\r\n");
-            if(full != s)
-                s = _filter_plain_scalar(full, scalar_indentation);
+            _c4dbgp("seqblck[RNXT]: smaller indentation!");
+            _handle_indentation_pop_from_block_seq();
+            if(has_all(RSEQ|BLCK))
+            {
+                _c4dbgp("seqblck[RNXT]: still seqblck!");
+                _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
+                _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
+                rem = m_evt_handler->m_curr->line_contents.rem;
+                if(!rem.len)
+                    goto seqblck_again;
+            }
+            else
+            {
+                _c4dbgp("seqblck[RNXT]: no longer seqblck!");
+                goto seqblck_finish;
+            }
         }
-    }
-    // deal with plain (unquoted) scalars that continue to the next line
-    else if(!s.begins_with_any("*")) // cannot be a plain scalar if it starts with * (that's an anchor reference)
-    {
-        _c4dbgpf("extend_scalar: line ended, scalar='{}'", s);
-        if(has_none(FLOW))
+        else if(m_evt_handler->m_curr->line_contents.indentation == npos)
+        {
+            _c4dbgpf("seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
+            _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
+            rem = m_evt_handler->m_curr->line_contents.rem;
+            if(!rem.len)
+                goto seqblck_again;
+        }
+        //
+        // now handle the tokens
+        //
+        const char first = rem.str[0];
+        _c4dbgpf("seqblck[RNXT]: '{}' node_id={}", first, m_evt_handler->m_curr->node_id);
+        if(first == '-')
+        {
+            if(m_evt_handler->m_curr->indref > 0 || m_evt_handler->m_curr->line_contents.indentation > 0 || !_is_doc_begin_token(rem))
+            {
+                _c4dbgp("seqblck[RNXT]: expect next val");
+                addrem_flags(RVAL, RNXT);
+                m_evt_handler->add_sibling();
+                _line_progressed(1);
+                _maybe_skip_whitespace_tokens();
+            }
+            else
+            {
+                _c4dbgp("seqblck[RNXT]: start doc");
+                _start_doc_suddenly();
+                _line_progressed(3);
+                _maybe_skip_whitespace_tokens();
+                goto seqblck_finish;
+            }
+        }
+        else if(first == ':')
+        {
+            // This happens for example in `- [a: b]: c` (after
+            // terminating the seq, ie, after `]`). All other cases
+            // (ie colon after scalars) are caught elsewhere (ie, in
+            // RVAL state).
+            auto const *C4_RESTRICT prev_state = m_evt_handler->m_parent;
+            if(C4_LIKELY(prev_state && (prev_state->flags & RMAP)))
+            {
+                _c4dbgp("seqblck[RNXT]: actually this seq was '?' key of parent map");
+                m_evt_handler->end_seq();
+                goto seqblck_finish;
+            }
+            else
+            {
+                _c4err("parse error");
+            }
+        }
+        else if(first == '.')
         {
-            size_t scalar_indentation = m_state->indref + 1;
-            if(has_all(RUNK) && scalar_indentation == 1)
-                scalar_indentation = 0;
-            csubstr n = _scan_to_next_nonempty_line(scalar_indentation);
-            if(!n.empty())
+            _c4dbgp("seqblck[RNXT]: maybe doc?");
+            csubstr rs = rem.sub(1);
+            if(rs == ".." || rs.begins_with(".. "))
+            {
+                _c4dbgp("seqblck[RNXT]: end+start doc");
+                _end_doc_suddenly();
+                _line_progressed(3);
+                _maybe_skip_whitespace_tokens();
+                goto seqblck_finish;
+            }
+            else
             {
-                _c4dbgpf("rscalar[IMPL]: state_indref={} state_indentation={} scalar_indentation={}", m_state->indref, m_state->line_contents.indentation, scalar_indentation);
-                _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.full.is_super(n));
-                substr full = _scan_plain_scalar_blck(s, n, scalar_indentation);
-                if(full.len >= s.len)
-                    s = _filter_plain_scalar(full, scalar_indentation);
+                _c4err("parse error");
             }
         }
         else
         {
-            _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW));
-            csubstr n = _scan_to_next_nonempty_line(/*indentation*/0);
-            if(!n.empty())
+            // may be an indentless sequence nested in a map...
+            //if(m_evt_handler->m_stack.size() >= 2)
+            #ifdef RYML_DBG
+            char flagbuf_[128];
+            for(auto const& s : m_evt_handler->m_stack)
+            {
+                _dbg_printf("state[{}]: ind={} node={} flags={}\n", s.level, s.indref, s.node_id, detail::_parser_flags_to_str(flagbuf_, s.flags));
+            }
+            #endif
+            if(m_evt_handler->m_parent && has_all(RMAP|BLCK, m_evt_handler->m_parent) && m_evt_handler->m_curr->indref == m_evt_handler->m_parent->indref)
             {
-                _c4dbgp("rscalar[FLOW]");
-                substr full = _scan_plain_scalar_flow(s, n);
-                s = _filter_plain_scalar(full, /*indentation*/0);
+                _c4dbgpf("seqblck[RNXT]: end indentless seq, go to parent={}. node={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id);
+                _RYML_CB_ASSERT(this->callbacks(), m_evt_handler->m_curr != m_evt_handler->m_parent);
+                _handle_indentation_pop(m_evt_handler->m_parent);
+                _RYML_CB_ASSERT(this->callbacks(), has_all(RMAP|BLCK));
+                m_evt_handler->add_sibling();
+                addrem_flags(RKEY, RNXT);
+                goto seqblck_finish;
+            }
+            else //if(first != '*')
+            {
+                _c4err("parse error");
             }
         }
     }
 
-    return s;
+ seqblck_again:
+    _c4dbgt("seqblck: go again", 0);
+    if(_finished_line())
+    {
+        _line_ended();
+        _scan_line();
+        if(_finished_file())
+        {
+            _c4dbgp("seqblck: finish!");
+            _end_seq_blck();
+            goto seqblck_finish;
+        }
+        _c4dbgnextline();
+    }
+    goto seqblck_start;
+
+ seqblck_finish:
+    _c4dbgp("seqblck: finish");
 }
 
 
 //-----------------------------------------------------------------------------
 
-substr Parser::_scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line)
+template<class EventHandler>
+void ParseEngine<EventHandler>::_handle_map_block()
 {
-    static constexpr const csubstr chars = "[]{}?#,";
-    size_t pos = peeked_line.first_of(chars);
-    bool first = true;
-    while(pos != 0)
+mapblck_start:
+    _c4dbgpf("handle2_map_block: map_id={} node_id={} level={} indref={}", m_evt_handler->m_parent->node_id, m_evt_handler->m_curr->node_id, m_evt_handler->m_curr->level, m_evt_handler->m_curr->indref);
+
+    // states: RKEY|QMRK -> RKCL -> RVAL -> RNXT
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(BLCK));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY|RKCL|RVAL|RNXT|QMRK));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, 1 == (has_any(RKEY) + has_any(RKCL) + has_any(RVAL) + has_any(RNXT) + has_any(QMRK)));
+
+    _maybe_skip_comment();
+    csubstr rem = m_evt_handler->m_curr->line_contents.rem;
+    if(!rem.len)
+        goto mapblck_again;
+
+    if(has_any(RKEY))
     {
-        if(has_all(RMAP|RKEY) || has_any(RUNK))
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
+        //
+        // handle indentation
+        //
+        if(m_evt_handler->m_curr->at_line_beginning())
         {
-            csubstr tpkl = peeked_line.triml(' ').trimr("\r\n");
-            if(tpkl.begins_with(": ") || tpkl == ':')
+            if(m_evt_handler->m_curr->indentation_eq())
             {
-                _c4dbgpf("rscalar[EXPL]: map value starts on the peeked line: '{}'", peeked_line);
-                peeked_line = peeked_line.first(0);
-                break;
+                _c4dbgpf("mapblck[RKEY]: skip {} from indref", m_evt_handler->m_curr->indref);
+                _line_progressed(m_evt_handler->m_curr->indref);
+                rem = m_evt_handler->m_curr->line_contents.rem;
+                if(!rem.len)
+                    goto mapblck_again;
             }
-            else
+            else if(m_evt_handler->m_curr->indentation_lt())
             {
-                auto colon_pos = peeked_line.first_of_any(": ", ":");
-                if(colon_pos && colon_pos.pos < pos)
+                _c4dbgp("mapblck[RKEY]: smaller indentation!");
+                _handle_indentation_pop_from_block_map();
+                _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
+                if(has_all(RMAP|BLCK))
                 {
-                    peeked_line = peeked_line.first(colon_pos.pos);
-                    _c4dbgpf("rscalar[EXPL]: found colon at {}. peeked='{}'", colon_pos.pos, peeked_line);
-                    _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.end() >= m_state->line_contents.rem.begin());
-                    _line_progressed(static_cast<size_t>(peeked_line.end() - m_state->line_contents.rem.begin()));
-                    break;
+                    _c4dbgp("mapblck[RKEY]: still mapblck!");
+                    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RKEY));
+                    rem = m_evt_handler->m_curr->line_contents.rem;
+                    if(!rem.len)
+                        goto mapblck_again;
+                }
+                else
+                {
+                    _c4dbgp("mapblck[RKEY]: no longer mapblck!");
+                    goto mapblck_finish;
                 }
             }
+            else
+            {
+                _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_gt());
+                _c4err("invalid indentation");
+            }
         }
-        if(pos != npos)
-        {
-            _c4dbgpf("rscalar[EXPL]: found special character '{}' at {}, stopping: '{}'", peeked_line[pos], pos, peeked_line.left_of(pos).trimr("\r\n"));
-            peeked_line = peeked_line.left_of(pos);
-            _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.end() >= m_state->line_contents.rem.begin());
-            _line_progressed(static_cast<size_t>(peeked_line.end() - m_state->line_contents.rem.begin()));
-            break;
+        //
+        // now handle the tokens
+        //
+        const char first = rem.str[0];
+        const size_t startline = m_evt_handler->m_curr->pos.line;
+        const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
+        _c4dbgpf("mapblck[RKEY]: '{}'", first);
+        ScannedScalar sc;
+        if(first == '\'')
+        {
+            _c4dbgp("mapblck[RKEY]: scanning single-quoted scalar");
+            sc = _scan_scalar_squot();
+            csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
+            _handle_annotations_before_blck_key_scalar();
+            m_evt_handler->set_key_scalar_squoted(maybe_filtered);
+            addrem_flags(RVAL, RKEY);
+            if(!_maybe_scan_following_colon())
+                _c4err("could not find ':' colon after key");
+            _maybe_skip_whitespace_tokens();
         }
-        _c4dbgpf("rscalar[EXPL]: append another line, full: '{}'", peeked_line.trimr("\r\n"));
-        if(!first)
+        else if(first == '"')
         {
-            RYML_CHECK(_advance_to_peeked());
+            _c4dbgp("mapblck[RKEY]: scanning double-quoted scalar");
+            sc = _scan_scalar_dquot();
+            csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
+            _handle_annotations_before_blck_key_scalar();
+            m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
+            addrem_flags(RVAL, RKEY);
+            if(!_maybe_scan_following_colon())
+                _c4err("could not find ':' colon after key");
+            _maybe_skip_whitespace_tokens();
         }
-        peeked_line = _scan_to_next_nonempty_line(/*indentation*/0);
-        if(peeked_line.empty())
+        // block scalars (| and >) can not be used as keys unless they
+        // appear in an explicit QMRK scope (ie, after the ? token),
+        else if(C4_UNLIKELY(first == '|'))
         {
-            _c4err("expected token or continuation");
-        }
-        pos = peeked_line.first_of(chars);
-        first = false;
-    }
-    substr full(m_buf.str + (currscalar.str - m_buf.str), m_buf.begin() + m_state->pos.offset);
-    full = full.trimr("\n\r ");
-    return full;
-}
-
-
-//-----------------------------------------------------------------------------
-
-substr Parser::_scan_plain_scalar_blck(csubstr currscalar, csubstr peeked_line, size_t indentation)
-{
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(currscalar));
-    // NOTE. there's a problem with _scan_to_next_nonempty_line(), as it counts newlines twice
-    // size_t offs = m_state->pos.offset;   // so we workaround by directly counting from the end of the given scalar
-    _RYML_CB_ASSERT(m_stack.m_callbacks, currscalar.end() >= m_buf.begin());
-    size_t offs = static_cast<size_t>(currscalar.end() - m_buf.begin());
-    _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.begins_with(' ', indentation));
-    while(true)
-    {
-        _c4dbgpf("rscalar[IMPL]: continuing... ref_indentation={}", indentation);
-        if(peeked_line.begins_with("...") || peeked_line.begins_with("---"))
+            _c4err("block literal keys must be enclosed in '?'");
+        }
+        else if(C4_UNLIKELY(first == '>'))
         {
-            _c4dbgpf("rscalar[IMPL]: document termination next -- bail now '{}'", peeked_line.trimr("\r\n"));
-            break;
+            _c4err("block literal keys must be enclosed in '?'");
         }
-        else if(( ! peeked_line.begins_with(' ', indentation))) // is the line deindented?
+        else if(_scan_scalar_plain_map_blck(&sc))
         {
-            if(!peeked_line.trim(" \r\n\t").empty()) // is the line not blank?
-            {
-                _c4dbgpf("rscalar[IMPL]: deindented line, not blank -- bail now '{}'", peeked_line.trimr("\r\n"));
-                break;
-            }
-            _c4dbgpf("rscalar[IMPL]: line is blank and has less indentation: ref={} line={}: '{}'", indentation, peeked_line.first_not_of(' ') == csubstr::npos ? 0 : peeked_line.first_not_of(' '), peeked_line.trimr("\r\n"));
-            _c4dbgpf("rscalar[IMPL]: ... searching for a line starting at indentation {}", indentation);
-            csubstr next_peeked = _scan_to_next_nonempty_line(indentation);
-            if(next_peeked.empty())
-            {
-                _c4dbgp("rscalar[IMPL]: ... finished.");
-                break;
-            }
-            _c4dbgp("rscalar[IMPL]: ... continuing.");
-            peeked_line = next_peeked;
+            _c4dbgp("mapblck[RKEY]: plain scalar");
+            csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref);
+            _handle_annotations_before_blck_key_scalar();
+            m_evt_handler->set_key_scalar_plain(maybe_filtered);
+            addrem_flags(RVAL, RKEY);
+            if(!_maybe_scan_following_colon())
+                _c4err("could not find ':' colon after key");
+            _maybe_skip_whitespace_tokens();
         }
-
-        _c4dbgpf("rscalar[IMPL]: line contents: '{}'", peeked_line.right_of(indentation, true).trimr("\r\n"));
-        size_t token_pos;
-        if(peeked_line.find(": ") != npos)
+        else if(first == '?')
         {
-            _line_progressed(peeked_line.find(": "));
-            _c4err("': ' is not a valid token in plain flow (unquoted) scalars");
+            _c4dbgp("mapblck[RKEY]: key token!");
+            addrem_flags(QMRK, RKEY);
+            _line_progressed(1);
+            _maybe_skip_whitespace_tokens();
+            m_was_inside_qmrk = true;
+            goto mapblck_again;
         }
-        else if(peeked_line.ends_with(':'))
+        else if(first == ':')
         {
-            _line_progressed(peeked_line.find(':'));
-            _c4err("lines cannot end with ':' in plain flow (unquoted) scalars");
+            _c4dbgp("mapblck[RKEY]: setting empty key");
+            _handle_annotations_before_blck_key_scalar();
+            m_evt_handler->set_key_scalar_plain({});
+            addrem_flags(RVAL, RKEY);
+            _line_progressed(1);
+            _maybe_skip_whitespace_tokens();
         }
-        else if((token_pos = peeked_line.find(" #")) != npos)
+        else if(first == '*')
         {
-            _line_progressed(token_pos);
-            break;
-            //_c4err("' #' is not a valid token in plain flow (unquoted) scalars");
+            csubstr ref = _scan_ref_map();
+            _c4dbgpf("mapblck[RKEY]: key ref! [{}]~~~{}~~~", ref.len, ref);
+            _handle_annotations_before_blck_key_scalar();
+            m_evt_handler->set_key_ref(ref);
+            addrem_flags(RVAL, RKEY);
+            if(!_maybe_scan_following_colon())
+                _c4err("could not find ':' colon after key");
+            _maybe_skip_whitespace_tokens();
         }
-
-        _c4dbgpf("rscalar[IMPL]: append another line: (len={})'{}'", peeked_line.len, peeked_line.trimr("\r\n"));
-        if(!_advance_to_peeked())
+        else if(first == '&')
         {
-            _c4dbgp("rscalar[IMPL]: file finishes after the scalar");
-            break;
+            csubstr anchor = _scan_anchor();
+            _c4dbgpf("mapblck[RKEY]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
+            _add_annotation(&m_pending_anchors, anchor, startindent, startline);
         }
-        peeked_line = m_state->line_contents.rem;
-    }
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= offs);
-    substr full(m_buf.str + (currscalar.str - m_buf.str),
-                currscalar.len + (m_state->pos.offset - offs));
-    full = full.trimr("\r\n ");
-    return full;
-}
-
-substr Parser::_scan_complex_key(csubstr currscalar, csubstr peeked_line)
-{
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(currscalar));
-    // NOTE. there's a problem with _scan_to_next_nonempty_line(), as it counts newlines twice
-    // size_t offs = m_state->pos.offset;   // so we workaround by directly counting from the end of the given scalar
-    _RYML_CB_ASSERT(m_stack.m_callbacks, currscalar.end() >= m_buf.begin());
-    size_t offs = static_cast<size_t>(currscalar.end() - m_buf.begin());
-    while(true)
-    {
-        _c4dbgp("rcplxkey: continuing...");
-        if(peeked_line.begins_with("...") || peeked_line.begins_with("---"))
+        else if(first == '!')
         {
-            _c4dbgpf("rcplxkey: document termination next -- bail now '{}'", peeked_line.trimr("\r\n"));
-            break;
+            csubstr tag = _scan_tag();
+            _c4dbgpf("mapblck[RKEY]: key tag! [{}]~~~{}~~~", tag.len, tag);
+            _add_annotation(&m_pending_tags, tag, startindent, startline);
         }
-        else
+        else if(first == '[')
+        {
+            // RYML's tree cannot store container keys, but that's
+            // handled inside the tree handler. Other handlers may be
+            // able to handle it.
+            _c4dbgp("mapblck[RKEY]: start child seqflow (!)");
+            addrem_flags(RKCL, RKEY);
+            _handle_annotations_before_blck_key_scalar();
+            m_evt_handler->begin_seq_key_flow();
+            addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL);
+            _line_progressed(1);
+            _set_indentation(startindent);
+            goto mapblck_finish;
+        }
+        else if(first == '{')
+        {
+            // RYML's tree cannot store container keys, but that's
+            // handled inside the tree handler. Other handlers may be
+            // able to handle it.
+            _c4dbgp("mapblck[RKEY]: start child mapflow (!)");
+            addrem_flags(RKCL, RKEY);
+            _handle_annotations_before_blck_key_scalar();
+            m_evt_handler->begin_map_key_flow();
+            addrem_flags(FLOW|RKEY, BLCK|RKCL);
+            _line_progressed(1);
+            _set_indentation(startindent);
+            goto mapblck_finish;
+        }
+        else if(first == '-')
         {
-            size_t pos = peeked_line.first_of("?:[]{}");
-            if(pos == csubstr::npos)
+            _c4dbgp("mapblck[RKEY]: maybe doc?");
+            if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_begin_token(rem))
             {
-                pos = peeked_line.find("- ");
+                _c4dbgp("mapblck[RKEY]: end+start doc");
+                _start_doc_suddenly();
+                _line_progressed(3);
+                _maybe_skip_whitespace_tokens();
+                goto mapblck_finish;
             }
-            if(pos != csubstr::npos)
+            else
             {
-                _c4dbgpf("rcplxkey: found special characters at pos={}: '{}'", pos, peeked_line.trimr("\r\n"));
-                _line_progressed(pos);
-                break;
+                _c4err("parse error");
             }
         }
-
-        _c4dbgpf("rcplxkey: no special chars found '{}'", peeked_line.trimr("\r\n"));
-        csubstr next_peeked = _scan_to_next_nonempty_line(0);
-        if(next_peeked.empty())
-        {
-            _c4dbgp("rcplxkey: empty ... finished.");
-            break;
-        }
-        _c4dbgp("rcplxkey: ... continuing.");
-        peeked_line = next_peeked;
-
-        _c4dbgpf("rcplxkey: line contents: '{}'", peeked_line.trimr("\r\n"));
-        size_t colpos;
-        if((colpos = peeked_line.find(": ")) != npos)
+        else if(first == '.')
         {
-            _c4dbgp("rcplxkey: found ': ', stopping.");
-            _line_progressed(colpos);
-            break;
+            _c4dbgp("mapblck[RKEY]: maybe end doc?");
+            if(m_evt_handler->m_curr->line_contents.indentation == 0 && _is_doc_end_token(rem))
+            {
+                _c4dbgp("mapblck[RKEY]: end doc");
+                _end_doc_suddenly();
+                _line_progressed(3);
+                _maybe_skip_whitespace_tokens();
+                goto mapblck_finish;
+            }
+            else
+            {
+                _c4err("parse error");
+            }
         }
-        #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
-        else if((colpos = peeked_line.ends_with(':')))
+       _RYML_WITH_TAB_TOKENS(
+        else if(first == '\t')
         {
-            _c4dbgp("rcplxkey: ends with ':', stopping.");
-            _line_progressed(colpos);
-            break;
-        }
-        #endif
-        _c4dbgpf("rcplxkey: append another line: (len={})'{}'", peeked_line.len, peeked_line.trimr("\r\n"));
-        if(!_advance_to_peeked())
+            _c4dbgp("mapblck[RKEY]: skip tabs");
+            _maybe_skipchars('\t');
+        })
+        else
         {
-            _c4dbgp("rcplxkey: file finishes after the scalar");
-            break;
+            _c4err("parse error");
         }
-        peeked_line = m_state->line_contents.rem;
     }
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= offs);
-    substr full(m_buf.str + (currscalar.str - m_buf.str),
-                currscalar.len + (m_state->pos.offset - offs));
-    return full;
-}
-
-//! scans to the next non-blank line starting with the given indentation
-csubstr Parser::_scan_to_next_nonempty_line(size_t indentation)
-{
-    csubstr next_peeked;
-    while(true)
+    else if(has_any(RKCL)) // read the key colon
     {
-        _c4dbgpf("rscalar: ... curr offset: {} indentation={}", m_state->pos.offset, indentation);
-        next_peeked = _peek_next_line(m_state->pos.offset);
-        csubstr next_peeked_triml = next_peeked.triml(' ');
-        _c4dbgpf("rscalar: ... next peeked line='{}'", next_peeked.trimr("\r\n"));
-        if(next_peeked_triml.begins_with('#'))
-        {
-            _c4dbgp("rscalar: ... first non-space character is #");
-            return {};
-        }
-        else if(next_peeked.begins_with(' ', indentation))
-        {
-            _c4dbgpf("rscalar: ... begins at same indentation {}, assuming continuation", indentation);
-            _advance_to_peeked();
-            return next_peeked;
-        }
-        else   // check for de-indentation
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
+        //
+        // handle indentation
+        //
+        if(m_evt_handler->m_curr->at_line_beginning())
         {
-            csubstr trimmed = next_peeked_triml.trimr("\t\r\n");
-            _c4dbgpf("rscalar: ... deindented! trimmed='{}'", trimmed);
-            if(!trimmed.empty())
+            if(m_evt_handler->m_curr->indentation_eq())
+            {
+                _c4dbgpf("mapblck[RKCL]: skip {} from indref", m_evt_handler->m_curr->indref);
+                _line_progressed(m_evt_handler->m_curr->indref);
+                rem = m_evt_handler->m_curr->line_contents.rem;
+                if(!rem.len)
+                    goto mapblck_again;
+            }
+            else if(C4_UNLIKELY(m_evt_handler->m_curr->indentation_lt()))
             {
-                _c4dbgp("rscalar: ... and not empty. bailing out.");
-                return {};
+                _c4err("invalid indentation");
             }
         }
-        if(!_advance_to_peeked())
+        const char first = rem.str[0];
+        _c4dbgpf("mapblck[RKCL]: '{}'", first);
+        if(first == ':')
         {
-            _c4dbgp("rscalar: file finished");
-            return {};
+            _c4dbgp("mapblck[RKCL]: found the colon");
+            addrem_flags(RVAL, RKCL);
+            _line_progressed(1);
+            _maybe_skip_whitespace_tokens();
         }
-    }
-    return {};
-}
-
-// returns false when the file finished
-bool Parser::_advance_to_peeked()
-{
-    _line_progressed(m_state->line_contents.rem.len);
-    _line_ended(); // advances to the peeked-at line, consuming all remaining (probably newline) characters on the current line
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.first_of("\r\n") == csubstr::npos);
-    _c4dbgpf("advance to peeked: scan more... pos={} len={}", m_state->pos.offset, m_buf.len);
-    _scan_line();  // puts the peeked-at line in the buffer
-    if(_finished_file())
-    {
-        _c4dbgp("rscalar: finished file!");
-        return false;
-    }
-    return true;
-}
-
-//-----------------------------------------------------------------------------
-
-C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following)
-{
-    return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n');
-}
-
-//! look for the next newline chars, and jump to the right of those
-csubstr from_next_line(csubstr rem)
-{
-    size_t nlpos = rem.first_of("\r\n");
-    if(nlpos == csubstr::npos)
-        return {};
-    const char nl = rem[nlpos];
-    rem = rem.right_of(nlpos);
-    if(rem.empty())
-        return {};
-    if(_extend_from_combined_newline(nl, rem.front()))
-        rem = rem.sub(1);
-    return rem;
-}
-
-csubstr Parser::_peek_next_line(size_t pos) const
-{
-    csubstr rem{}; // declare here because of the goto
-    size_t nlpos{}; // declare here because of the goto
-    pos = pos == npos ? m_state->pos.offset : pos;
-    if(pos >= m_buf.len)
-        goto next_is_empty;
-
-    // look for the next newline chars, and jump to the right of those
-    rem = from_next_line(m_buf.sub(pos));
-    if(rem.empty())
-        goto next_is_empty;
-
-    // now get everything up to and including the following newline chars
-    nlpos = rem.first_of("\r\n");
-    if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len))
-        nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]);
-    rem = rem.left_of(nlpos, /*include_pos*/true);
-
-    _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n"));
-    return rem;
-
-next_is_empty:
-    _c4dbgpf("peek next line @ {}: (len=0)''", pos);
-    return {};
-}
-
-
-//-----------------------------------------------------------------------------
-void Parser::LineContents::reset_with_next_line(csubstr buf, size_t offset)
-{
-    RYML_ASSERT(offset <= buf.len);
-    char const* C4_RESTRICT b = &buf[offset];
-    char const* C4_RESTRICT e = b;
-    // get the current line stripped of newline chars
-    while(e < buf.end() && (*e != '\n' && *e != '\r'))
-        ++e;
-    RYML_ASSERT(e >= b);
-    const csubstr stripped_ = buf.sub(offset, static_cast<size_t>(e - b));
-    // advance pos to include the first line ending
-    if(e != buf.end() && *e == '\r')
-        ++e;
-    if(e != buf.end() && *e == '\n')
-        ++e;
-    RYML_ASSERT(e >= b);
-    const csubstr full_ = buf.sub(offset, static_cast<size_t>(e - b));
-    reset(full_, stripped_);
-}
-
-void Parser::_scan_line()
-{
-    if(m_state->pos.offset >= m_buf.len)
-    {
-        m_state->line_contents.reset(m_buf.last(0), m_buf.last(0));
-        return;
-    }
-    m_state->line_contents.reset_with_next_line(m_buf, m_state->pos.offset);
-}
-
-
-//-----------------------------------------------------------------------------
-void Parser::_line_progressed(size_t ahead)
-{
-    _c4dbgpf("line[{}] ({} cols) progressed by {}:  col {}-->{}   offset {}-->{}", m_state->pos.line, m_state->line_contents.full.len, ahead, m_state->pos.col, m_state->pos.col+ahead, m_state->pos.offset, m_state->pos.offset+ahead);
-    m_state->pos.offset += ahead;
-    m_state->pos.col += ahead;
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col <= m_state->line_contents.stripped.len+1);
-    m_state->line_contents.rem = m_state->line_contents.rem.sub(ahead);
-}
-
-void Parser::_line_ended()
-{
-    _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{}", m_state->pos.line, m_state->line_contents.full.len, m_state->pos.offset, m_state->pos.offset+m_state->line_contents.full.len - m_state->line_contents.stripped.len);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col == m_state->line_contents.stripped.len+1);
-    m_state->pos.offset += m_state->line_contents.full.len - m_state->line_contents.stripped.len;
-    ++m_state->pos.line;
-    m_state->pos.col = 1;
-}
-
-void Parser::_line_ended_undo()
-{
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col == 1u);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.line > 0u);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= m_state->line_contents.full.len - m_state->line_contents.stripped.len);
-    _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_state->pos.line, m_state->pos.line, m_state->pos.line - 1, m_state->pos.offset, m_state->pos.offset - (m_state->line_contents.full.len - m_state->line_contents.stripped.len));
-    m_state->pos.offset -= m_state->line_contents.full.len - m_state->line_contents.stripped.len;
-    --m_state->pos.line;
-    m_state->pos.col = m_state->line_contents.stripped.len + 1u;
-}
-
-//-----------------------------------------------------------------------------
-void Parser::_set_indentation(size_t indentation)
-{
-    m_state->indref = indentation;
-    _c4dbgpf("state[{}]: saving indentation: {}", m_state-m_stack.begin(), m_state->indref);
-}
-
-void Parser::_save_indentation(size_t behind)
-{
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begin() >= m_state->line_contents.full.begin());
-    m_state->indref = static_cast<size_t>(m_state->line_contents.rem.begin() - m_state->line_contents.full.begin());
-    _RYML_CB_ASSERT(m_stack.m_callbacks, behind <= m_state->indref);
-    m_state->indref -= behind;
-    _c4dbgpf("state[{}]: saving indentation: {}", m_state-m_stack.begin(), m_state->indref);
-}
-
-bool Parser::_maybe_set_indentation_from_anchor_or_tag()
-{
-    if(m_key_anchor.not_empty())
-    {
-        _c4dbgpf("set indentation from key anchor: {}", m_key_anchor_indentation);
-        _set_indentation(m_key_anchor_indentation); // this is the column where the anchor starts
-        return true;
-    }
-    else if(m_key_tag.not_empty())
-    {
-        _c4dbgpf("set indentation from key tag: {}", m_key_tag_indentation);
-        _set_indentation(m_key_tag_indentation); // this is the column where the tag starts
-        return true;
-    }
-    return false;
-}
-
-
-//-----------------------------------------------------------------------------
-void Parser::_write_key_anchor(size_t node_id)
-{
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->has_key(node_id));
-    if( ! m_key_anchor.empty())
-    {
-        _c4dbgpf("node={}: set key anchor to '{}'", node_id, m_key_anchor);
-        m_tree->set_key_anchor(node_id, m_key_anchor);
-        m_key_anchor.clear();
-        m_key_anchor_was_before = false;
-        m_key_anchor_indentation = 0;
-    }
-    else if( ! m_tree->is_key_quoted(node_id))
-    {
-        csubstr r = m_tree->key(node_id);
-        if(r.begins_with('*'))
+        else if(first == '?')
         {
-            _c4dbgpf("node={}: set key reference: '{}'", node_id, r);
-            m_tree->set_key_ref(node_id, r.sub(1));
+            _c4dbgp("mapblck[RKCL]: got '?'. val was empty");
+            _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_was_inside_qmrk);
+            m_evt_handler->set_val_scalar_plain({});
+            m_evt_handler->add_sibling();
+            addrem_flags(QMRK, RKCL);
+            _line_progressed(1);
+            _maybe_skip_whitespace_tokens();
         }
-        else if(r == "<<")
+        else if(first == '-')
         {
-            m_tree->set_key_ref(node_id, r);
-            _c4dbgpf("node={}: it's an inheriting reference", node_id);
-            if(m_tree->is_seq(node_id))
+            if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
             {
-                _c4dbgpf("node={}: inheriting from seq of {}", node_id, m_tree->num_children(node_id));
-                for(size_t i = m_tree->first_child(node_id); i != NONE; i = m_tree->next_sibling(i))
-                {
-                    if( ! (m_tree->val(i).begins_with('*')))
-                        _c4err("malformed reference: '{}'", m_tree->val(i));
-                }
+                _c4dbgp("mapblck[RKCL]: end+start doc");
+                _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
+                _start_doc_suddenly();
+                _line_progressed(3);
+                _maybe_skip_whitespace_tokens();
+                goto mapblck_finish;
             }
-            else if( ! m_tree->val(node_id).begins_with('*'))
+            else
             {
-                 _c4err("malformed reference: '{}'", m_tree->val(node_id));
+                _c4err("parse error");
             }
-            //m_tree->set_key_ref(node_id, r);
         }
-    }
-}
-
-//-----------------------------------------------------------------------------
-void Parser::_write_val_anchor(size_t node_id)
-{
-    if( ! m_val_anchor.empty())
-    {
-        _c4dbgpf("node={}: set val anchor to '{}'", node_id, m_val_anchor);
-        m_tree->set_val_anchor(node_id, m_val_anchor);
-        m_val_anchor.clear();
-    }
-    csubstr r = m_tree->has_val(node_id) ? m_tree->val(node_id) : "";
-    if(!m_tree->is_val_quoted(node_id) && r.begins_with('*'))
-    {
-        _c4dbgpf("node={}: set val reference: '{}'", node_id, r);
-        RYML_CHECK(!m_tree->has_val_anchor(node_id));
-        m_tree->set_val_ref(node_id, r.sub(1));
-    }
-}
-
-//-----------------------------------------------------------------------------
-void Parser::_push_level(bool explicit_flow_chars)
-{
-    _c4dbgpf("pushing level! currnode={}  currlevel={} stacksize={} stackcap={}", m_state->node_id, m_state->level, m_stack.size(), m_stack.capacity());
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_state == &m_stack.top());
-    if(node(m_state) == nullptr)
-    {
-        _c4dbgp("pushing level! actually no, current node is null");
-        //_RYML_CB_ASSERT(m_stack.m_callbacks,  ! explicit_flow_chars);
-        return;
-    }
-    flag_t st = RUNK;
-    if(explicit_flow_chars || has_all(FLOW))
-    {
-        st |= FLOW;
-    }
-    m_stack.push_top();
-    m_state = &m_stack.top();
-    set_flags(st);
-    m_state->node_id = (size_t)NONE;
-    m_state->indref = (size_t)NONE;
-    ++m_state->level;
-    _c4dbgpf("pushing level: now, currlevel={}", m_state->level);
-}
-
-void Parser::_pop_level()
-{
-    _c4dbgpf("popping level! currnode={} currlevel={}", m_state->node_id, m_state->level);
-    if(has_any(RMAP) || m_tree->is_map(m_state->node_id))
-    {
-        _stop_map();
-    }
-    if(has_any(RSEQ) || m_tree->is_seq(m_state->node_id))
-    {
-        _stop_seq();
-    }
-    if(m_tree->is_doc(m_state->node_id))
-    {
-        _stop_doc();
-    }
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.size() > 1);
-    _prepare_pop();
-    m_stack.pop();
-    m_state = &m_stack.top();
-    /*if(has_any(RMAP))
-    {
-        _toggle_key_val();
-    }*/
-    if(m_state->line_contents.indentation == 0)
-    {
-        //_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RTOP));
-        add_flags(RTOP);
-    }
-    _c4dbgpf("popping level: now, currnode={} currlevel={}", m_state->node_id, m_state->level);
-}
-
-//-----------------------------------------------------------------------------
-void Parser::_start_unk(bool /*as_child*/)
-{
-    _c4dbgp("start_unk");
-    _push_level();
-    _move_scalar_from_top();
-}
-
-//-----------------------------------------------------------------------------
-void Parser::_start_doc(bool as_child)
-{
-    _c4dbgpf("start_doc (as child={})", as_child);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id));
-    size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id;
-    _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_root(parent_id));
-    _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id));
-    if(as_child)
-    {
-        _c4dbgpf("start_doc: parent={}", parent_id);
-        if( ! m_tree->is_stream(parent_id))
+        else if(first == '.')
         {
-            _c4dbgp("start_doc: rearranging with root as STREAM");
-            m_tree->set_root_as_stream();
+            _c4dbgp("mapblck[RKCL]: maybe end doc?");
+            csubstr rs = rem.sub(1);
+            if(rs == ".." || rs.begins_with(".. "))
+            {
+                _c4dbgp("mapblck[RKCL]: end+start doc");
+                _end_doc_suddenly();
+                _line_progressed(3);
+                goto mapblck_finish;
+            }
+            else
+            {
+                _c4err("parse error");
+            }
         }
-        m_state->node_id = m_tree->append_child(parent_id);
-        m_tree->to_doc(m_state->node_id);
-    }
-    #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
-    else
-    {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(parent_id) || m_tree->empty(parent_id));
-        m_state->node_id = parent_id;
-        if( ! m_tree->is_doc(parent_id))
+        else if(m_was_inside_qmrk)
         {
-            m_tree->to_doc(parent_id, DOC);
+            _RYML_CB_CHECK(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->indentation_eq());
+            _c4dbgp("mapblck[RKCL]: missing :");
+            m_evt_handler->set_val_scalar_plain({});
+            m_evt_handler->add_sibling();
+            m_was_inside_qmrk = false;
+            addrem_flags(RKEY, RKCL);
         }
-    }
-    #endif
-    _c4dbgpf("start_doc: id={}", m_state->node_id);
-    add_flags(RUNK|RTOP|NDOC);
-    _handle_types();
-    rem_flags(NDOC);
-}
-
-void Parser::_stop_doc()
-{
-    size_t doc_node = m_state->node_id;
-    _c4dbgpf("stop_doc[{}]", doc_node);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_doc(doc_node));
-    if(!m_tree->is_seq(doc_node) && !m_tree->is_map(doc_node) && !m_tree->is_val(doc_node))
-    {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(SSCL));
-        _c4dbgpf("stop_doc[{}]: there was nothing; adding null val", doc_node);
-        m_tree->to_val(doc_node, {}, DOC);
-    }
-}
-
-void Parser::_end_stream()
-{
-    _c4dbgpf("end_stream, level={} node_id={}", m_state->level, m_state->node_id);
-    _RYML_CB_ASSERT(m_stack.m_callbacks,  ! m_stack.empty());
-    NodeData *added = nullptr;
-    if(has_any(SSCL))
-    {
-        if(m_tree->is_seq(m_state->node_id))
+        else
         {
-            _c4dbgp("append val...");
-            added = _append_val(_consume_scalar());
+            _c4err("parse error");
         }
-        else if(m_tree->is_map(m_state->node_id))
+    }
+    else if(has_any(RVAL))
+    {
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
+        //
+        // handle indentation
+        //
+        if(m_evt_handler->m_curr->at_line_beginning())
         {
-            _c4dbgp("append null key val...");
-            added = _append_key_val_null(m_state->line_contents.rem.str);
-            #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
-            if(has_any(RSEQIMAP))
+            _c4dbgpf("mapblck[RVAL]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
+            m_evt_handler->m_curr->more_indented = false;
+            if(m_evt_handler->m_curr->indref == npos)
+            {
+                _c4dbgpf("mapblck[RVAL]: setting indentation={}", m_evt_handler->m_parent->indref);
+                _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
+                _line_progressed(m_evt_handler->m_curr->indref);
+                rem = m_evt_handler->m_curr->line_contents.rem;
+                if(!rem.len)
+                    goto mapblck_again;
+            }
+            else if(m_evt_handler->m_curr->indentation_eq())
+            {
+                _c4dbgp("mapblck[RVAL]: skip indentation!");
+                _line_progressed(m_evt_handler->m_curr->indref);
+                rem = m_evt_handler->m_curr->line_contents.rem;
+                if(!rem.len)
+                    goto mapblck_again;
+                // TODO: this is valid:
+                //
+                // ```yaml
+                // a:
+                // b:
+                // ---
+                // a:
+                //  b
+                // ---
+                // a:
+                //  b: c
+                // ```
+                //
+                // ... but this is not:
+                //
+                // ```yaml
+                // a:
+                // v
+                // ---
+                // a: b: c
+                // ```
+                //
+                // here, we probably need to set a boolean on the state
+                // to disambiguate between these cases.
+            }
+            else if(m_evt_handler->m_curr->indentation_gt())
+            {
+                _c4dbgp("mapblck[RVAL]: more indented!");
+                m_evt_handler->m_curr->more_indented = true;
+                _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
+                rem = m_evt_handler->m_curr->line_contents.rem;
+                if(!rem.len)
+                    goto mapblck_again;
+            }
+            else if(m_evt_handler->m_curr->indentation_lt())
+            {
+                _c4dbgp("mapblck[RVAL]: smaller indentation!");
+                _handle_indentation_pop_from_block_map();
+                if(has_all(RMAP|BLCK))
+                {
+                    _c4dbgp("mapblck[RVAL]: still mapblck!");
+                    _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
+                    if(has_any(RNXT))
+                    {
+                        _c4dbgp("mapblck[RVAL]: speculatively expect next keyval");
+                        m_evt_handler->add_sibling();
+                        addrem_flags(RKEY, RNXT);
+                    }
+                    goto mapblck_again;
+                }
+                else
+                {
+                    _c4dbgp("mapblck[RVAL]: no longer mapblck!");
+                    goto mapblck_finish;
+                }
+            }
+            else if(m_evt_handler->m_curr->line_contents.indentation == npos)
             {
-                _stop_seqimap();
-                _pop_level();
+                _c4dbgp("mapblck[RVAL]: empty line!");
+                _line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
+                goto mapblck_again;
             }
-            #endif
-        }
-        else if(m_tree->is_doc(m_state->node_id) || m_tree->type(m_state->node_id) == NOTYPE)
-        {
-            NodeType_e quoted = has_any(QSCL) ? VALQUO : NOTYPE; // do this before consuming the scalar
-            csubstr scalar = _consume_scalar();
-            _c4dbgpf("node[{}]: to docval '{}'{}", m_state->node_id, scalar, quoted == VALQUO ? ", quoted" : "");
-            m_tree->to_val(m_state->node_id, scalar, DOC|quoted);
-            added = m_tree->get(m_state->node_id);
-        }
-        else
-        {
-            _c4err("internal error");
         }
-    }
-    else if(has_all(RSEQ|RVAL) && has_none(FLOW))
-    {
-        _c4dbgp("add last...");
-        added = _append_val_null(m_state->line_contents.rem.str);
-    }
-    else if(!m_val_tag.empty() && (m_tree->is_doc(m_state->node_id) || m_tree->type(m_state->node_id) == NOTYPE))
-    {
-        csubstr scalar = m_state->line_contents.rem.first(0);
-        _c4dbgpf("node[{}]: add null scalar as docval", m_state->node_id);
-        m_tree->to_val(m_state->node_id, scalar, DOC);
-        added = m_tree->get(m_state->node_id);
-    }
-
-    if(added)
-    {
-        size_t added_id = m_tree->id(added);
-        if(m_tree->is_seq(m_state->node_id) || m_tree->is_doc(m_state->node_id))
-        {
-            if(!m_key_anchor.empty())
+        //
+        // now handle the tokens
+        //
+        const char first = rem.str[0];
+        const size_t startline = m_evt_handler->m_curr->pos.line;
+        const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
+        _c4dbgpf("mapblck[RVAL]: '{}'", first);
+        ScannedScalar sc;
+        if(first == '\'')
+        {
+            _c4dbgp("mapblck[RVAL]: scanning single-quoted scalar");
+            sc = _scan_scalar_squot();
+            if(!_maybe_scan_following_colon())
             {
-                _c4dbgpf("node[{}]: move key to val anchor: '{}'", added_id, m_key_anchor);
-                m_val_anchor = m_key_anchor;
-                m_key_anchor = {};
+                _c4dbgp("mapblck[RVAL]: set as val");
+                _handle_annotations_before_blck_val_scalar();
+                csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc); // VAL!
+                m_evt_handler->set_val_scalar_squoted(maybe_filtered);
+                addrem_flags(RNXT, RVAL);
             }
-            if(!m_key_tag.empty())
+            else
             {
-                _c4dbgpf("node[{}]: move key to val tag: '{}'", added_id, m_key_tag);
-                m_val_tag = m_key_tag;
-                m_key_tag = {};
+                if(startindent != m_evt_handler->m_curr->indref)
+                {
+                    _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
+                    _handle_annotations_before_start_mapblck(startline);
+                    addrem_flags(RNXT, RVAL);
+                    m_evt_handler->begin_map_val_block();
+                    _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                    csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
+                    m_evt_handler->set_key_scalar_squoted(maybe_filtered);
+                    _maybe_skip_whitespace_tokens();
+                    _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
+                    // keep the child state on RVAL
+                    addrem_flags(RVAL, RNXT);
+                }
+                else
+                {
+                    _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
+                    m_evt_handler->set_val_scalar_plain({});
+                    m_evt_handler->add_sibling();
+                    csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
+                    m_evt_handler->set_key_scalar_squoted(maybe_filtered);
+                    // keep going on RVAL
+                    _maybe_skip_whitespace_tokens();
+                }
             }
         }
-        #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
-        if(!m_key_anchor.empty())
-        {
-            _c4dbgpf("node[{}]: set key anchor='{}'", added_id, m_key_anchor);
-            m_tree->set_key_anchor(added_id, m_key_anchor);
-            m_key_anchor = {};
-        }
-        #endif
-        if(!m_val_anchor.empty())
+        else if(first == '"')
         {
-            _c4dbgpf("node[{}]: set val anchor='{}'", added_id, m_val_anchor);
-            m_tree->set_val_anchor(added_id, m_val_anchor);
-            m_val_anchor = {};
+            _c4dbgp("mapblck[RVAL]: scanning double-quoted scalar");
+            sc = _scan_scalar_dquot();
+            if(!_maybe_scan_following_colon())
+            {
+                _c4dbgp("mapblck[RVAL]: set as val");
+                _handle_annotations_before_blck_val_scalar();
+                csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc); // VAL!
+                m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
+                addrem_flags(RNXT, RVAL);
+            }
+            else
+            {
+                if(startindent != m_evt_handler->m_curr->indref)
+                {
+                    _c4dbgp("mapblck[RVAL]: start new block map, set scalar as key");
+                    _handle_annotations_before_start_mapblck(startline);
+                    addrem_flags(RNXT, RVAL);
+                    m_evt_handler->begin_map_val_block();
+                    _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                    csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
+                    m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
+                    _maybe_skip_whitespace_tokens();
+                    _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
+                    // keep the child state on RVAL
+                    addrem_flags(RVAL, RNXT);
+                }
+                else
+                {
+                    _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
+                    m_evt_handler->set_val_scalar_plain({});
+                    m_evt_handler->add_sibling();
+                    csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
+                    m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
+                    // keep going on RVAL
+                    _maybe_skip_whitespace_tokens();
+                }
+            }
         }
-        #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
-        if(!m_key_tag.empty())
+        // block scalars can only appear as keys when in QMRK scope
+        // (ie, after ? tokens), so no need to scan following colon
+        else if(first == '|')
         {
-            _c4dbgpf("node[{}]: set key tag='{}' -> '{}'", added_id, m_key_tag, normalize_tag(m_key_tag));
-            m_tree->set_key_tag(added_id, normalize_tag(m_key_tag));
-            m_key_tag = {};
+            _c4dbgp("mapblck[RVAL]: scanning block-literal scalar");
+            ScannedBlock sb;
+            _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
+            _handle_annotations_before_blck_val_scalar();
+            csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
+            m_evt_handler->set_val_scalar_literal(maybe_filtered);
+            addrem_flags(RNXT, RVAL);
         }
-        #endif
-        if(!m_val_tag.empty())
+        else if(first == '>')
         {
-            _c4dbgpf("node[{}]: set val tag='{}' -> '{}'", added_id, m_val_tag, normalize_tag(m_val_tag));
-            m_tree->set_val_tag(added_id, normalize_tag(m_val_tag));
-            m_val_tag = {};
+            _c4dbgp("mapblck[RVAL]: scanning block-folded scalar");
+            ScannedBlock sb;
+            _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
+            _handle_annotations_before_blck_val_scalar();
+            csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
+            m_evt_handler->set_val_scalar_folded(maybe_filtered);
+            addrem_flags(RNXT, RVAL);
         }
-    }
-
-    while(m_stack.size() > 1)
-    {
-        _c4dbgpf("popping level: {} (stack sz={})", m_state->level, m_stack.size());
-        _RYML_CB_ASSERT(m_stack.m_callbacks,  ! has_any(SSCL, &m_stack.top()));
-        if(has_all(RSEQ|FLOW))
-            _err("closing ] not found");
-        _pop_level();
-    }
-    add_flags(NDOC);
-}
-
-void Parser::_start_new_doc(csubstr rem)
-{
-    _c4dbgp("_start_new_doc");
-    _RYML_CB_ASSERT(m_stack.m_callbacks, rem.begins_with("---"));
-    C4_UNUSED(rem);
-
-    _end_stream();
-
-    size_t indref = m_state->indref;
-    _c4dbgpf("start a document, indentation={}", indref);
-    _line_progressed(3);
-    _push_level();
-    _start_doc();
-    _set_indentation(indref);
-}
-
-
-//-----------------------------------------------------------------------------
-void Parser::_start_map(bool as_child)
-{
-    _c4dbgpf("start_map (as child={})", as_child);
-    addrem_flags(RMAP|RVAL, RKEY|RUNK);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id));
-    size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id;
-    _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id));
-    if(as_child)
-    {
-        m_state->node_id = m_tree->append_child(parent_id);
-        if(has_all(SSCL))
+        else if(_scan_scalar_plain_map_blck(&sc))
         {
-            type_bits key_quoted = NOTYPE;
-            if(m_state->flags & QSCL) // before consuming the scalar
-                key_quoted |= KEYQUO;
-            csubstr key = _consume_scalar();
-            m_tree->to_map(m_state->node_id, key, key_quoted);
-            _c4dbgpf("start_map: id={} key='{}'", m_state->node_id, m_tree->key(m_state->node_id));
-            _write_key_anchor(m_state->node_id);
-            if( ! m_key_tag.empty())
+            _c4dbgp("mapblck[RVAL]: plain scalar.");
+            if(!_maybe_scan_following_colon())
             {
-                _c4dbgpf("node[{}]: set key tag='{}' -> '{}'", m_state->node_id, m_key_tag, normalize_tag(m_key_tag));
-                m_tree->set_key_tag(m_state->node_id, normalize_tag(m_key_tag));
-                m_key_tag.clear();
+                _c4dbgp("mapblck[RVAL]: set as val");
+                _handle_annotations_before_blck_val_scalar();
+                csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, m_evt_handler->m_curr->indref); // VAL!
+                m_evt_handler->set_val_scalar_plain(maybe_filtered);
+                addrem_flags(RNXT, RVAL);
+            }
+            else
+            {
+                if(startindent != m_evt_handler->m_curr->indref)
+                {
+                    _c4dbgpf("mapblck[RVAL]: start new block map, set scalar as key {}", m_evt_handler->m_curr->indref);
+                    addrem_flags(RNXT, RVAL);
+                    _handle_annotations_before_start_mapblck(startline);
+                    m_evt_handler->begin_map_val_block();
+                    _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                    csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
+                    m_evt_handler->set_key_scalar_plain(maybe_filtered);
+                    _maybe_skip_whitespace_tokens();
+                    _set_indentation(m_evt_handler->m_curr->line_contents.indentation);
+                    // keep the child state on RVAL
+                    addrem_flags(RVAL, RNXT);
+                }
+                else
+                {
+                    _c4dbgp("mapblck[RVAL]: prev val empty+this is a key");
+                    _handle_annotations_before_blck_val_scalar();
+                    m_evt_handler->set_val_scalar_plain({});
+                    m_evt_handler->add_sibling();
+                    csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
+                    m_evt_handler->set_key_scalar_plain(maybe_filtered);
+                    // keep going on RVAL
+                    _maybe_skip_whitespace_tokens();
+                }
             }
         }
-        else
-        {
-            m_tree->to_map(m_state->node_id);
-            _c4dbgpf("start_map: id={}", m_state->node_id);
-        }
-        m_tree->_p(m_state->node_id)->m_val.scalar.str = m_state->line_contents.rem.str;
-        _write_val_anchor(m_state->node_id);
-    }
-    else
-    {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE);
-        m_state->node_id = parent_id;
-        _c4dbgpf("start_map: id={}", m_state->node_id);
-        type_bits as_doc = 0;
-        if(m_tree->is_doc(m_state->node_id))
-            as_doc |= DOC;
-        if(!m_tree->is_map(parent_id))
+        else if(first == '-')
         {
-            RYML_CHECK(!m_tree->has_children(parent_id));
-            m_tree->to_map(parent_id, as_doc);
+            if(rem.len == 1 || rem.str[1] == ' ' _RYML_WITH_TAB_TOKENS(|| rem.str[1] == '\t'))
+            {
+                _c4dbgp("mapblck[RVAL]: start val seqblck");
+                addrem_flags(RNXT, RVAL);
+                _handle_annotations_before_blck_val_scalar();
+                m_evt_handler->begin_seq_val_block();
+                addrem_flags(RSEQ|RVAL, RMAP|RNXT);
+                _set_indentation(startindent);
+                _line_progressed(1);
+                _maybe_skip_whitespace_tokens();
+                goto mapblck_finish;
+            }
+            else if(m_evt_handler->m_curr->indref == 0 || m_evt_handler->m_curr->line_contents.indentation == 0 || _is_doc_begin_token(rem))
+            {
+                _c4dbgp("mapblck[RVAL]: end+start doc");
+                _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, _is_doc_begin_token(rem));
+                _start_doc_suddenly();
+                _line_progressed(3);
+                _maybe_skip_whitespace_tokens();
+                goto mapblck_finish;
+            }
+            else
+            {
+                _c4err("parse error");
+            }
         }
-        else
+        else if(first == '[')
         {
-            m_tree->_add_flags(parent_id, as_doc);
+            _c4dbgp("mapblck[RVAL]: start val seqflow");
+            addrem_flags(RNXT, RVAL);
+            _handle_annotations_before_blck_val_scalar();
+            m_evt_handler->begin_seq_val_flow();
+            addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RNXT|BLCK);
+            _set_indentation(m_evt_handler->m_curr->indref + 1u);
+            _line_progressed(1);
+            goto mapblck_finish;
         }
-        _move_scalar_from_top();
-        if(m_key_anchor.not_empty())
-            m_key_anchor_was_before = true;
-        _write_val_anchor(parent_id);
-        if(m_stack.size() >= 2)
+        else if(first == '{')
         {
-            State const& parent_state = m_stack.top(1);
-            if(parent_state.flags & RSET)
-                add_flags(RSET);
+            _c4dbgp("mapblck[RVAL]: start val mapflow");
+            addrem_flags(RNXT, RVAL);
+            _handle_annotations_before_blck_val_scalar();
+            m_evt_handler->begin_map_val_flow();
+            addrem_flags(RKEY|FLOW, BLCK|RVAL|RNXT);
+            m_evt_handler->m_curr->scalar_col = m_evt_handler->m_curr->line_contents.indentation;
+            _set_indentation(m_evt_handler->m_curr->indref + 1u);
+            _line_progressed(1);
+            goto mapblck_finish;
         }
-        m_tree->_p(parent_id)->m_val.scalar.str = m_state->line_contents.rem.str;
-    }
-    if( ! m_val_tag.empty())
-    {
-        _c4dbgpf("node[{}]: set val tag='{}' -> '{}'", m_state->node_id, m_val_tag, normalize_tag(m_val_tag));
-        m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag));
-        m_val_tag.clear();
-    }
-}
-
-void Parser::_start_map_unk(bool as_child)
-{
-    if(!m_key_anchor_was_before)
-    {
-        _c4dbgpf("stash key anchor before starting map... '{}'", m_key_anchor);
-        csubstr ka = m_key_anchor;
-        m_key_anchor = {};
-        _start_map(as_child);
-        m_key_anchor = ka;
-    }
-    else
-    {
-        _start_map(as_child);
-        m_key_anchor_was_before = false;
-    }
-    if(m_key_tag2.not_empty())
-    {
-        m_key_tag = m_key_tag2;
-        m_key_tag_indentation = m_key_tag2_indentation;
-        m_key_tag2.clear();
-        m_key_tag2_indentation = 0;
-    }
-}
-
-void Parser::_stop_map()
-{
-    _c4dbgpf("stop_map[{}]", m_state->node_id);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(m_state->node_id));
-    if(has_all(QMRK|RKEY) && !has_all(SSCL))
-    {
-        _c4dbgpf("stop_map[{}]: RKEY", m_state->node_id);
-        _store_scalar_null(m_state->line_contents.rem.str);
-        _append_key_val_null(m_state->line_contents.rem.str);
-    }
-}
-
-
-//-----------------------------------------------------------------------------
-void Parser::_start_seq(bool as_child)
-{
-    _c4dbgpf("start_seq (as child={})", as_child);
-    if(has_all(RTOP|RUNK))
-    {
-        _c4dbgpf("start_seq: moving key tag to val tag: '{}'", m_key_tag);
-        m_val_tag = m_key_tag;
-        m_key_tag.clear();
-    }
-    addrem_flags(RSEQ|RVAL, RUNK);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id));
-    size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id;
-    _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id));
-    if(as_child)
-    {
-        m_state->node_id = m_tree->append_child(parent_id);
-        if(has_all(SSCL))
+        else if(first == '*')
         {
-            _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(parent_id));
-            type_bits key_quoted = 0;
-            if(m_state->flags & QSCL) // before consuming the scalar
-                key_quoted |= KEYQUO;
-            csubstr key = _consume_scalar();
-            m_tree->to_seq(m_state->node_id, key, key_quoted);
-            _c4dbgpf("start_seq: id={} name='{}'", m_state->node_id, m_tree->key(m_state->node_id));
-            _write_key_anchor(m_state->node_id);
-            if( ! m_key_tag.empty())
+            csubstr ref = _scan_ref_map();
+            _c4dbgpf("mapblck[RVAL]: ref! [{}]~~~{}~~~", ref.len, ref);
+            if(startindent == m_evt_handler->m_curr->indref)
             {
-                _c4dbgpf("start_seq[{}]: set key tag='{}' -> '{}'", m_state->node_id, m_key_tag, normalize_tag(m_key_tag));
-                m_tree->set_key_tag(m_state->node_id, normalize_tag(m_key_tag));
-                m_key_tag.clear();
+                _c4dbgpf("mapblck[RVAL]: same indentation {}", startindent);
+                m_evt_handler->set_val_ref(ref);
+                addrem_flags(RNXT, RVAL);
             }
+            else
+            {
+                _c4dbgpf("mapblck[RVAL]: larger indentation {}>{}", startindent, m_evt_handler->m_curr->indref);
+                _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, startindent > m_evt_handler->m_curr->indref);
+                if(_maybe_scan_following_colon())
+                {
+                    _c4dbgp("mapblck[RVAL]: start child map, block");
+                    addrem_flags(RNXT, RVAL);
+                    _handle_annotations_before_blck_val_scalar();
+                    m_evt_handler->begin_map_val_block();
+                    m_evt_handler->set_key_ref(ref);
+                    _set_indentation(startindent);
+                    // keep going in RVAL
+                    addrem_flags(RVAL, RNXT);
+                }
+                else
+                {
+                    _c4dbgp("mapblck[RVAL]: was val ref");
+                    _handle_annotations_before_blck_val_scalar();
+                    m_evt_handler->set_val_ref(ref);
+                    addrem_flags(RNXT, RVAL);
+                }
+            }
+            _maybe_skip_whitespace_tokens();
         }
-        else
-        {
-            type_bits as_doc = 0;
-            _RYML_CB_ASSERT(m_stack.m_callbacks, !m_tree->is_doc(m_state->node_id));
-            m_tree->to_seq(m_state->node_id, as_doc);
-            _c4dbgpf("start_seq: id={}{}", m_state->node_id, as_doc ? " as doc" : "");
-        }
-        _write_val_anchor(m_state->node_id);
-        m_tree->_p(m_state->node_id)->m_val.scalar.str = m_state->line_contents.rem.str;
-    }
-    else
-    {
-        m_state->node_id = parent_id;
-        type_bits as_doc = 0;
-        if(m_tree->is_doc(m_state->node_id))
-            as_doc |= DOC;
-        if(!m_tree->is_seq(parent_id))
+        else if(first == '&')
         {
-            RYML_CHECK(!m_tree->has_children(parent_id));
-            m_tree->to_seq(parent_id, as_doc);
+            csubstr anchor = _scan_anchor();
+            _c4dbgpf("mapblck[RVAL]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
+            if(startindent == m_evt_handler->m_curr->indref)
+            {
+                _c4dbgp("mapblck[RVAL]: anchor for next key. val is missing!");
+                m_evt_handler->set_val_scalar_plain({});
+                m_evt_handler->add_sibling();
+                addrem_flags(RKEY, RVAL);
+            }
+            // we need to buffer the anchors, as there may be two
+            // consecutive anchors in here
+            _add_annotation(&m_pending_anchors, anchor, startindent, startline);
         }
-        else
+        else if(first == '!')
         {
-            m_tree->_add_flags(parent_id, as_doc);
+            csubstr tag = _scan_tag();
+            _c4dbgpf("mapblck[RVAL]: tag! [{}]~~~{}~~~", tag.len, tag);
+            if(startindent == m_evt_handler->m_curr->indref)
+            {
+                _c4dbgp("mapblck[RVAL]: tag for next key. val is missing!");
+                _handle_annotations_before_blck_val_scalar();
+                m_evt_handler->set_val_scalar_plain({});
+                m_evt_handler->add_sibling();
+                addrem_flags(RKEY, RVAL);
+            }
+            // we need to buffer the tags, as there may be two
+            // consecutive tags in here
+            _add_annotation(&m_pending_tags, tag, startindent, startline);
         }
-        _move_scalar_from_top();
-        _c4dbgpf("start_seq: id={}{}", m_state->node_id, as_doc ? " as_doc" : "");
-        _write_val_anchor(parent_id);
-        m_tree->_p(parent_id)->m_val.scalar.str = m_state->line_contents.rem.str;
-    }
-    if( ! m_val_tag.empty())
-    {
-        _c4dbgpf("start_seq[{}]: set val tag='{}' -> '{}'", m_state->node_id, m_val_tag, normalize_tag(m_val_tag));
-        m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag));
-        m_val_tag.clear();
-    }
-}
-
-void Parser::_stop_seq()
-{
-    _c4dbgp("stop_seq");
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(m_state->node_id));
-}
-
-
-//-----------------------------------------------------------------------------
-void Parser::_start_seqimap()
-{
-    _c4dbgpf("start_seqimap at node={}. has_children={}", m_state->node_id, m_tree->has_children(m_state->node_id));
-    _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ|FLOW));
-    // create a map, and turn the last scalar of this sequence
-    // into the key of the map's first child. This scalar was
-    // understood to be a value in the sequence, but it is
-    // actually a key of a map, implicitly opened here.
-    // Eg [val, key: val]
-    //
-    // Yep, YAML is crazy.
-    if(m_tree->has_children(m_state->node_id) && m_tree->has_val(m_tree->last_child(m_state->node_id)))
-    {
-        size_t prev = m_tree->last_child(m_state->node_id);
-        NodeType ty = m_tree->_p(prev)->m_type; // don't use type() because it masks out the quotes
-        NodeScalar tmp = m_tree->valsc(prev);
-        _c4dbgpf("has children and last child={} has val. saving the scalars, val='{}' quoted={}", prev, tmp.scalar, ty.is_val_quoted());
-        m_tree->remove(prev);
-        _push_level();
-        _start_map();
-        _store_scalar(tmp.scalar, ty.is_val_quoted());
-        m_key_anchor = tmp.anchor;
-        m_key_tag = tmp.tag;
-    }
-    else
-    {
-        _c4dbgpf("node {} has no children yet, using empty key", m_state->node_id);
-        _push_level();
-        _start_map();
-        _store_scalar_null(m_state->line_contents.rem.str);
-    }
-    add_flags(RSEQIMAP|FLOW);
-}
-
-void Parser::_stop_seqimap()
-{
-    _c4dbgp("stop_seqimap");
-    _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQIMAP));
-}
-
-
-//-----------------------------------------------------------------------------
-NodeData* Parser::_append_val(csubstr val, flag_t quoted)
-{
-    _RYML_CB_ASSERT(m_stack.m_callbacks,  ! has_all(SSCL));
-    _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) != nullptr);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(m_state->node_id));
-    type_bits additional_flags = quoted ? VALQUO : NOTYPE;
-    _c4dbgpf("append val: '{}' to parent id={} (level={}){}", val, m_state->node_id, m_state->level, quoted ? " VALQUO!" : "");
-    size_t nid = m_tree->append_child(m_state->node_id);
-    m_tree->to_val(nid, val, additional_flags);
-
-    _c4dbgpf("append val: id={} val='{}'", nid, m_tree->get(nid)->m_val.scalar);
-    if( ! m_val_tag.empty())
-    {
-        _c4dbgpf("append val[{}]: set val tag='{}' -> '{}'", nid, m_val_tag, normalize_tag(m_val_tag));
-        m_tree->set_val_tag(nid, normalize_tag(m_val_tag));
-        m_val_tag.clear();
-    }
-    _write_val_anchor(nid);
-    return m_tree->get(nid);
-}
-
-NodeData* Parser::_append_key_val(csubstr val, flag_t val_quoted)
-{
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(m_state->node_id));
-    type_bits additional_flags = 0;
-    if(m_state->flags & QSCL)
-        additional_flags |= KEYQUO;
-    if(val_quoted)
-        additional_flags |= VALQUO;
-
-    csubstr key = _consume_scalar();
-    _c4dbgpf("append keyval: '{}' '{}' to parent id={} (level={}){}{}", key, val, m_state->node_id, m_state->level, (additional_flags & KEYQUO) ? " KEYQUO!" : "", (additional_flags & VALQUO) ? " VALQUO!" : "");
-    size_t nid = m_tree->append_child(m_state->node_id);
-    m_tree->to_keyval(nid, key, val, additional_flags);
-    _c4dbgpf("append keyval: id={} key='{}' val='{}'", nid, m_tree->key(nid), m_tree->val(nid));
-    if( ! m_key_tag.empty())
-    {
-        _c4dbgpf("append keyval[{}]: set key tag='{}' -> '{}'", nid, m_key_tag, normalize_tag(m_key_tag));
-        m_tree->set_key_tag(nid, normalize_tag(m_key_tag));
-        m_key_tag.clear();
-    }
-    if( ! m_val_tag.empty())
-    {
-        _c4dbgpf("append keyval[{}]: set val tag='{}' -> '{}'", nid, m_val_tag, normalize_tag(m_val_tag));
-        m_tree->set_val_tag(nid, normalize_tag(m_val_tag));
-        m_val_tag.clear();
-    }
-    _write_key_anchor(nid);
-    _write_val_anchor(nid);
-    rem_flags(QMRK);
-    return m_tree->get(nid);
-}
-
-
-//-----------------------------------------------------------------------------
-void Parser::_store_scalar(csubstr s, flag_t is_quoted)
-{
-    _c4dbgpf("state[{}]: storing scalar '{}' (flag: {}) (old scalar='{}')",
-             m_state-m_stack.begin(), s, m_state->flags & SSCL, m_state->scalar);
-    RYML_CHECK(has_none(SSCL));
-    add_flags(SSCL | (is_quoted * QSCL));
-    m_state->scalar = s;
-}
-
-csubstr Parser::_consume_scalar()
-{
-    _c4dbgpf("state[{}]: consuming scalar '{}' (flag: {}))", m_state-m_stack.begin(), m_state->scalar, m_state->flags & SSCL);
-    RYML_CHECK(m_state->flags & SSCL);
-    csubstr s = m_state->scalar;
-    rem_flags(SSCL | QSCL);
-    m_state->scalar.clear();
-    return s;
-}
-
-void Parser::_move_scalar_from_top()
-{
-    if(m_stack.size() < 2) return;
-    State &prev = m_stack.top(1);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_state == &m_stack.top());
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_state != &prev);
-    if(prev.flags & SSCL)
-    {
-        _c4dbgpf("moving scalar '{}' from state[{}] to state[{}] (overwriting '{}')", prev.scalar, &prev-m_stack.begin(), m_state-m_stack.begin(), m_state->scalar);
-        add_flags(prev.flags & (SSCL | QSCL));
-        m_state->scalar = prev.scalar;
-        rem_flags(SSCL | QSCL, &prev);
-        prev.scalar.clear();
-    }
-}
-
-//-----------------------------------------------------------------------------
-/** @todo this function is a monster and needs love. */
-bool Parser::_handle_indentation()
-{
-    _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW));
-    if( ! _at_line_begin())
-        return false;
-
-    size_t ind = m_state->line_contents.indentation;
-    csubstr rem = m_state->line_contents.rem;
-    /** @todo instead of trimming, we should use the indentation index from above */
-    csubstr remt = rem.triml(' ');
-
-    if(remt.empty() || remt.begins_with('#')) // this is a blank or comment line
-    {
-        _line_progressed(rem.size());
-        return true;
-    }
-
-    _c4dbgpf("indentation? ind={} indref={}", ind, m_state->indref);
-    if(ind == m_state->indref)
-    {
-        if(has_all(SSCL|RVAL) && ! rem.sub(ind).begins_with('-'))
+        else if(first == '?')
         {
-            if(has_all(RMAP))
+            if(startindent == m_evt_handler->m_curr->indref)
             {
-                _append_key_val_null(rem.str + ind - 1);
-                addrem_flags(RKEY, RVAL);
+                _c4dbgp("mapblck[RVAL]: got '?'. val was empty");
+                _handle_annotations_before_blck_val_scalar();
+                m_evt_handler->set_val_scalar_plain({});
+                m_evt_handler->add_sibling();
+                addrem_flags(QMRK, RVAL);
             }
-            #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
-            else if(has_all(RSEQ))
+            else if(startindent > m_evt_handler->m_curr->indref)
             {
-                _append_val(_consume_scalar());
+                _c4dbgp("mapblck[RVAL]: start val mapblck");
                 addrem_flags(RNXT, RVAL);
+                _handle_annotations_before_blck_val_scalar();
+                m_evt_handler->begin_map_val_block();
+                addrem_flags(QMRK|BLCK, RNXT);
+                _set_indentation(startindent);
             }
             else
             {
-                _c4err("internal error");
+                _c4err("parse error");
             }
-            #endif
+            m_was_inside_qmrk = true;
+            _line_progressed(1);
+            _maybe_skip_whitespace_tokens();
+            goto mapblck_again;
         }
-        else if(has_all(RSEQ|RNXT) && ! rem.sub(ind).begins_with('-'))
+        else if(first == ':')
         {
-            if(m_stack.size() > 2) // do not pop to root level
+            if(startindent == m_evt_handler->m_curr->indref)
             {
-                _c4dbgp("end the indentless seq");
-                _pop_level();
-                return true;
+                _c4dbgp("mapblck[RVAL]: got ':'. val was empty, next key as well");
+                m_evt_handler->set_val_scalar_plain({});
+                m_evt_handler->add_sibling();
+                m_evt_handler->set_key_scalar_plain({});
+                _line_progressed(1);
+                _maybe_skip_whitespace_tokens();
+                goto mapblck_again;
+            }
+            else
+            {
+                _c4err("parse error");
+            }
+        }
+        else if(first == '.')
+        {
+            _c4dbgp("mapblck[RVAL]: maybe doc?");
+            csubstr rs = rem.sub(1);
+            if(rs == ".." || rs.begins_with(".. "))
+            {
+                _c4dbgp("seqblck[RVAL]: end doc expl");
+                _end_doc_suddenly();
+                _line_progressed(3);
+                _maybe_skip_whitespace_tokens();
+                goto mapblck_finish;
+            }
+            else
+            {
+                _c4err("parse error");
             }
         }
+       _RYML_WITH_TAB_TOKENS(
+        else if(first == '\t')
+        {
+            _c4dbgp("mapblck[RVAL]: skip tabs");
+            _maybe_skipchars('\t');
+        })
         else
         {
-            _c4dbgpf("same indentation ({}) -- nothing to see here", ind);
+            _c4err("parse error");
         }
-        _line_progressed(ind);
-        return ind > 0;
     }
-    else if(ind < m_state->indref)
+    else if(has_any(RNXT))
     {
-        _c4dbgpf("smaller indentation ({} < {})!!!", ind, m_state->indref);
-        if(has_all(RVAL))
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(QMRK));
+        //
+        // handle indentation
+        //
+        if(m_evt_handler->m_curr->at_line_beginning())
         {
-            _c4dbgp("there was an empty val -- appending");
-            if(has_all(RMAP))
-            {
-                _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL));
-                _append_key_val_null(rem.sub(ind).str - 1);
-            }
-            else if(has_all(RSEQ))
+            _c4dbgpf("mapblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
+            if(m_evt_handler->m_curr->indentation_eq())
             {
-                _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(SSCL));
-                _append_val_null(rem.sub(ind).str - 1);
+                _c4dbgpf("mapblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
+                _line_progressed(m_evt_handler->m_curr->indref);
+                _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
+                m_evt_handler->add_sibling();
+                addrem_flags(RKEY, RNXT);
+                goto mapblck_again;
             }
-        }
-        // search the stack frame to jump to based on its indentation
-        State const* popto = nullptr;
-        _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.is_contiguous()); // this search relies on the stack being contiguous
-        for(State const* s = m_state-1; s >= m_stack.begin(); --s)
-        {
-            _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id);
-            if(s->indref == ind)
+            else if(m_evt_handler->m_curr->indentation_lt())
             {
-                _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id);
-                popto = s;
-                // while it may be tempting to think we're done at this
-                // point, we must still determine whether we're jumping to a
-                // parent with the same indentation. Consider this case with
-                // an indentless sequence:
-                //
-                // product:
-                // - sku: BL394D
-                //   quantity: 4
-                //   description: Basketball
-                //   price: 450.00
-                // - sku: BL4438H
-                //   quantity: 1
-                //   description: Super Hoop
-                //   price: 2392.00  # jumping one level here would be wrong.
-                // tax: 1234.5       # we must jump two levels
-                if(popto > m_stack.begin())
+                _c4dbgp("mapblck[RNXT]: smaller indentation!");
+                _handle_indentation_pop_from_block_map();
+                if(has_all(RMAP|BLCK))
                 {
-                    auto parent = popto - 1;
-                    if(parent->indref == popto->indref)
+                    _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
+                    if(!has_any(RKCL))
                     {
-                        _c4dbgpf("the parent (level={},node={}) has the same indentation ({}). is this in an indentless sequence?", parent->level, parent->node_id, popto->indref);
-                        _c4dbgpf("isseq(popto)={} ismap(parent)={}", m_tree->is_seq(popto->node_id), m_tree->is_map(parent->node_id));
-                        if(m_tree->is_seq(popto->node_id) && m_tree->is_map(parent->node_id))
-                        {
-                            if( ! remt.begins_with('-'))
-                            {
-                                _c4dbgp("this is an indentless sequence");
-                                popto = parent;
-                            }
-                            else
-                            {
-                                _c4dbgp("not an indentless sequence");
-                            }
-                        }
+                        _c4dbgp("mapblck[RNXT]: speculatively expect next keyval");
+                        m_evt_handler->add_sibling();
+                        addrem_flags(RKEY, RNXT);
                     }
+                    goto mapblck_again;
+                }
+                else
+                {
+                    goto mapblck_finish;
                 }
-                break;
             }
         }
-        if(!popto || popto >= m_state || popto->level >= m_state->level)
-        {
-            _c4err("parse error: incorrect indentation?");
-        }
-        _c4dbgpf("popping {} levels: from level {} to level {}", m_state->level-popto->level, m_state->level, popto->level);
-        while(m_state != popto)
-        {
-            _c4dbgpf("popping level {} (indentation={})", m_state->level, m_state->indref);
-            _pop_level();
-        }
-        _RYML_CB_ASSERT(m_stack.m_callbacks, ind == m_state->indref);
-        _line_progressed(ind);
-        return true;
-    }
-    else
-    {
-        _c4dbgpf("larger indentation ({} > {})!!!", ind, m_state->indref);
-        _RYML_CB_ASSERT(m_stack.m_callbacks, ind > m_state->indref);
-        if(has_all(RMAP|RVAL))
+        //
+        // handle tokens
+        //
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
+        const char first = rem.str[0];
+        _c4dbgpf("mapblck[RNXT]: '{}'", _c4prc(first));
+        if(first == ':')
         {
-            if(_is_scalar_next__rmap_val(remt) && remt.first_of(":?") == npos)
+            if(m_evt_handler->m_curr->more_indented)
             {
-                _c4dbgpf("actually it seems a value: '{}'", remt);
+                _c4dbgp("mapblck[RNXT]: start child block map");
+                C4_NOT_IMPLEMENTED();
+                //m_evt_handler->actually_as_block_map();
+                _line_progressed(1);
+                _set_indentation(m_evt_handler->m_curr->scalar_col);
+                m_evt_handler->m_curr->more_indented = false;
+                goto mapblck_again;
             }
             else
-            {
-                addrem_flags(RKEY, RVAL);
-                _start_unk();
-                //_move_scalar_from_top();
-                _line_progressed(ind);
-                _save_indentation();
-                return true;
+            {
+                _c4err("parse error");
             }
         }
-        else if(has_all(RSEQ|RVAL))
+        else if(first == ' ')
         {
-            // nothing to do here
+            _c4dbgp("mapblck[RNXT]: skip spaces");
+            _maybe_skip_whitespace_tokens();
         }
         else
         {
-            _c4err("parse error - indentation should not increase at this point");
+            _c4err("parse error");
         }
     }
-
-    return false;
-}
-
-//-----------------------------------------------------------------------------
-csubstr Parser::_scan_comment()
-{
-    csubstr s = m_state->line_contents.rem;
-    _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('#'));
-    _line_progressed(s.len);
-    // skip the # character
-    s = s.sub(1);
-    // skip leading whitespace
-    s = s.right_of(s.first_not_of(' '), /*include_pos*/true);
-    _c4dbgpf("comment was '{}'", s);
-    return s;
-}
-
-//-----------------------------------------------------------------------------
-csubstr Parser::_scan_squot_scalar()
-{
-    // quoted scalars can spread over multiple lines!
-    // nice explanation here: http://yaml-multiline.info/
-
-    // a span to the end of the file
-    size_t b = m_state->pos.offset;
-    substr s = m_buf.sub(b);
-    if(s.begins_with(' '))
-    {
-        s = s.triml(' ');
-        _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.sub(b).is_super(s));
-        _RYML_CB_ASSERT(m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
-        _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
-    }
-    b = m_state->pos.offset; // take this into account
-    _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('\''));
-
-    // skip the opening quote
-    _line_progressed(1);
-    s = s.sub(1);
-
-    bool needs_filter = false;
-
-    size_t numlines = 1; // we already have one line
-    size_t pos = npos; // find the pos of the matching quote
-    while( ! _finished_file())
+    else if(has_any(QMRK))
     {
-        const csubstr line = m_state->line_contents.rem;
-        bool line_is_blank = true;
-        _c4dbgpf("scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_state->pos.line, line);
-        for(size_t i = 0; i < line.len; ++i)
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKEY));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RKCL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RVAL));
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT));
+        //
+        // handle indentation
+        //
+        if(m_evt_handler->m_curr->at_line_beginning())
         {
-            const char curr = line.str[i];
-            if(curr == '\'') // single quotes are escaped with two single quotes
+            _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_curr->line_contents.indentation != npos);
+            if(m_evt_handler->m_curr->indentation_eq())
             {
-                const char next = i+1 < line.len ? line.str[i+1] : '~';
-                if(next != '\'') // so just look for the first quote
-                {                // without another after it
-                    pos = i;
-                    break;
+                _c4dbgpf("mapblck[QMRK]: skip {} from indref", m_evt_handler->m_curr->indref);
+                _line_progressed(m_evt_handler->m_curr->indref);
+                rem = m_evt_handler->m_curr->line_contents.rem;
+                if(!rem.len)
+                    goto mapblck_again;
+            }
+            else if(m_evt_handler->m_curr->indentation_lt())
+            {
+                _c4dbgp("mapblck[QMRK]: smaller indentation!");
+                _handle_indentation_pop_from_block_map();
+                _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
+                if(has_all(RMAP|BLCK))
+                {
+                    _c4dbgp("mapblck[QMRK]: still mapblck!");
+                    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(QMRK));
+                    rem = m_evt_handler->m_curr->line_contents.rem;
+                    if(!rem.len)
+                        goto mapblck_again;
                 }
                 else
                 {
-                    needs_filter = true; // needs filter to remove escaped quotes
-                    ++i; // skip the escaped quote
+                    _c4dbgp("mapblck[QMRK]: no longer mapblck!");
+                    goto mapblck_finish;
                 }
             }
-            else if(curr != ' ')
+            // indentation can be larger in QMRK state
+            else
             {
-                line_is_blank = false;
+                _c4dbgp("mapblck[QMRK]: larger indentation !");
+                _line_progressed(m_evt_handler->m_curr->line_contents.indentation);
+                rem = m_evt_handler->m_curr->line_contents.rem;
+                if(!rem.len)
+                    goto mapblck_again;
             }
         }
-
-        // leading whitespace also needs filtering
-        needs_filter = needs_filter
-            || numlines > 1
-            || line_is_blank
-            || (_at_line_begin() && line.begins_with(' '))
-            || (m_state->line_contents.full.last_of('\r') != csubstr::npos);
-
-        if(pos == npos)
+        //
+        // now handle the tokens
+        //
+        const char first = rem.str[0];
+        const size_t startline = m_evt_handler->m_curr->pos.line;
+        const size_t startindent = m_evt_handler->m_curr->line_contents.current_col();
+        _c4dbgpf("mapblck[QMRK]: '{}'", first);
+        ScannedScalar sc;
+        if(first == '\'')
+        {
+            _c4dbgp("mapblck[QMRK]: scanning single-quoted scalar");
+            sc = _scan_scalar_squot();
+            csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc); // KEY!
+            if(!_maybe_scan_following_colon())
+            {
+                _c4dbgp("mapblck[QMRK]: set as key");
+                _handle_annotations_before_blck_key_scalar();
+                m_evt_handler->set_key_scalar_squoted(maybe_filtered);
+                addrem_flags(RKCL, QMRK);
+            }
+            else
+            {
+                _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
+                addrem_flags(RKCL, QMRK);
+                _handle_annotations_before_start_mapblck_as_key();
+                m_evt_handler->begin_map_key_block();
+                _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                m_evt_handler->set_key_scalar_squoted(maybe_filtered);
+                _maybe_skip_whitespace_tokens();
+                _set_indentation(startindent);
+                // keep the child state on RVAL
+                addrem_flags(RVAL, RKCL|QMRK);
+            }
+        }
+        else if(first == '"')
         {
-            _line_progressed(line.len);
-            ++numlines;
+            _c4dbgp("mapblck[QMRK]: scanning double-quoted scalar");
+            sc = _scan_scalar_dquot();
+            csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc); // KEY!
+            if(!_maybe_scan_following_colon())
+            {
+                _c4dbgp("mapblck[QMRK]: set as key");
+                _handle_annotations_before_blck_key_scalar();
+                m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
+                addrem_flags(RKCL, QMRK);
+            }
+            else
+            {
+                _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
+                addrem_flags(RKCL, QMRK);
+                _handle_annotations_before_start_mapblck_as_key();
+                m_evt_handler->begin_map_key_block();
+                _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
+                _maybe_skip_whitespace_tokens();
+                _set_indentation(startindent);
+                // keep the child state on RVAL
+                addrem_flags(RVAL, RKCL|QMRK);
+            }
         }
-        else
+        else if(first == '|')
         {
-            _RYML_CB_ASSERT(m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
-            _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf[m_state->pos.offset + pos] == '\'');
-            _line_progressed(pos + 1); // progress beyond the quote
-            pos = m_state->pos.offset - b - 1; // but we stop before it
-            break;
+            _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
+            ScannedBlock sb;
+            _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
+            csubstr maybe_filtered = _maybe_filter_key_scalar_literal(sb); // KEY!
+            _handle_annotations_before_blck_key_scalar();
+            m_evt_handler->set_key_scalar_literal(maybe_filtered);
+            addrem_flags(RKCL, QMRK);
         }
-
-        _line_ended();
-        _scan_line();
-    }
-
-    if(pos == npos)
-    {
-        _c4err("reached end of file while looking for closing quote");
-    }
-    else
-    {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, pos > 0);
-        _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
-        _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\'');
-        s = s.sub(0, pos-1);
-    }
-
-    if(needs_filter)
-    {
-        csubstr ret = _filter_squot_scalar(s);
-        _RYML_CB_ASSERT(m_stack.m_callbacks, ret.len <= s.len || s.empty() || s.trim(' ').empty());
-        _c4dbgpf("final scalar: \"{}\"", ret);
-        return ret;
-    }
-
-    _c4dbgpf("final scalar: \"{}\"", s);
-
-    return s;
-}
-
-//-----------------------------------------------------------------------------
-csubstr Parser::_scan_dquot_scalar()
-{
-    // quoted scalars can spread over multiple lines!
-    // nice explanation here: http://yaml-multiline.info/
-
-    // a span to the end of the file
-    size_t b = m_state->pos.offset;
-    substr s = m_buf.sub(b);
-    if(s.begins_with(' '))
-    {
-        s = s.triml(' ');
-        _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.sub(b).is_super(s));
-        _RYML_CB_ASSERT(m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin());
-        _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin()));
-    }
-    b = m_state->pos.offset; // take this into account
-    _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('"'));
-
-    // skip the opening quote
-    _line_progressed(1);
-    s = s.sub(1);
-
-    bool needs_filter = false;
-
-    size_t numlines = 1; // we already have one line
-    size_t pos = npos; // find the pos of the matching quote
-    while( ! _finished_file())
-    {
-        const csubstr line = m_state->line_contents.rem;
-        bool line_is_blank = true;
-        _c4dbgpf("scanning double quoted scalar @ line[{}]:  line='{}'", m_state->pos.line, line);
-        for(size_t i = 0; i < line.len; ++i)
+        else if(first == '>')
         {
-            const char curr = line.str[i];
-            if(curr != ' ')
-                line_is_blank = false;
-            // every \ is an escape
-            if(curr == '\\')
+            _c4dbgp("mapblck[QMRK]: scanning block-literal scalar");
+            ScannedBlock sb;
+            _scan_block(&sb, m_evt_handler->m_curr->indref + 1);
+            csubstr maybe_filtered = _maybe_filter_key_scalar_folded(sb); // KEY!
+            _handle_annotations_before_blck_key_scalar();
+            m_evt_handler->set_key_scalar_folded(maybe_filtered);
+            addrem_flags(RKCL, QMRK);
+        }
+        else if(_scan_scalar_plain_map_blck(&sc))
+        {
+            _c4dbgp("mapblck[QMRK]: plain scalar");
+            csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, m_evt_handler->m_curr->indref); // KEY!
+            if(!_maybe_scan_following_colon())
             {
-                const char next = i+1 < line.len ? line.str[i+1] : '~';
-                needs_filter = true;
-                if(next == '"' || next == '\\')
-                    ++i;
+                _c4dbgp("mapblck[QMRK]: set as key");
+                _handle_annotations_before_blck_key_scalar();
+                m_evt_handler->set_key_scalar_plain(maybe_filtered);
+                addrem_flags(RKCL, QMRK);
             }
-            else if(curr == '"')
+            else
             {
-                pos = i;
-                break;
+                _c4dbgp("mapblck[QMRK]: start new block map as key (!), set scalar as key");
+                addrem_flags(RKCL, QMRK);
+                _handle_annotations_before_start_mapblck_as_key();
+                m_evt_handler->begin_map_key_block();
+                _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                m_evt_handler->set_key_scalar_plain(maybe_filtered);
+                _maybe_skip_whitespace_tokens();
+                _set_indentation(startindent);
+                // keep the child state on RVAL
+                addrem_flags(RVAL, RKCL|QMRK);
             }
         }
-
-        // leading whitespace also needs filtering
-        needs_filter = needs_filter
-            || numlines > 1
-            || line_is_blank
-            || (_at_line_begin() && line.begins_with(' '))
-            || (m_state->line_contents.full.last_of('\r') != csubstr::npos);
-
-        if(pos == npos)
+        else if(first == ':')
         {
-            _line_progressed(line.len);
-            ++numlines;
+            if(startindent == m_evt_handler->m_curr->indref)
+            {
+                _c4dbgp("mapblck[QMRK]: empty key");
+                addrem_flags(RVAL, QMRK);
+                _handle_annotations_before_blck_key_scalar();
+                m_evt_handler->set_key_scalar_plain({});
+                _line_progressed(1);
+                _maybe_skip_whitespace_tokens();
+            }
+            else
+            {
+                _c4dbgp("mapblck[QMRK]: start new block map as key (!), empty key");
+                addrem_flags(RKCL, QMRK);
+                _handle_annotations_before_start_mapblck_as_key();
+                m_evt_handler->begin_map_key_block();
+                _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                m_evt_handler->set_key_scalar_plain({});
+                _line_progressed(1);
+                _maybe_skip_whitespace_tokens();
+                _set_indentation(startindent);
+                // keep the child state on RVAL
+                addrem_flags(RVAL, RKCL|QMRK);
+            }
         }
-        else
+        else if(first == '*')
         {
-            _RYML_CB_ASSERT(m_stack.m_callbacks, pos >= 0 && pos < m_buf.len);
-            _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf[m_state->pos.offset + pos] == '"');
-            _line_progressed(pos + 1); // progress beyond the quote
-            pos = m_state->pos.offset - b - 1; // but we stop before it
-            break;
+            csubstr ref = _scan_ref_map();
+            _c4dbgpf("mapblck[QMRK]: key ref! [{}]~~~{}~~~", ref.len, ref);
+            if(!_maybe_scan_following_colon())
+            {
+                _c4dbgp("mapblck[QMRK]: set ref as key");
+                _handle_annotations_before_blck_key_scalar();
+                m_evt_handler->set_key_ref(ref);
+                addrem_flags(RKCL, QMRK);
+            }
+            else
+            {
+                _c4dbgp("mapblck[QMRK]: start new block map as key (!), set ref as key");
+                addrem_flags(RKCL, QMRK);
+                _handle_annotations_before_blck_key_scalar();
+                m_evt_handler->begin_map_key_block();
+                m_evt_handler->set_key_ref(ref);
+                _set_indentation(startindent);
+                // keep the child state on RVAL
+                addrem_flags(RVAL, RKCL|QMRK);
+            }
+            _maybe_skip_whitespace_tokens();
         }
-
-        _line_ended();
-        _scan_line();
-    }
-
-    if(pos == npos)
-    {
-        _c4err("reached end of file looking for closing quote");
-    }
-    else
-    {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, pos > 0);
-        _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"');
-        _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end());
-        s = s.sub(0, pos-1);
-    }
-
-    if(needs_filter)
-    {
-        csubstr ret = _filter_dquot_scalar(s);
-        _c4dbgpf("final scalar: [{}]\"{}\"", ret.len, ret);
-        _RYML_CB_ASSERT(m_stack.m_callbacks, ret.len <= s.len || s.empty() || s.trim(' ').empty());
-        return ret;
-    }
-
-    _c4dbgpf("final scalar: \"{}\"", s);
-
-    return s;
-}
-
-//-----------------------------------------------------------------------------
-csubstr Parser::_scan_block()
-{
-    // nice explanation here: http://yaml-multiline.info/
-    csubstr s = m_state->line_contents.rem;
-    csubstr trimmed = s.triml(' ');
-    if(trimmed.str > s.str)
-    {
-        _c4dbgp("skipping whitespace");
-        _RYML_CB_ASSERT(m_stack.m_callbacks, trimmed.str >= s.str);
-        _line_progressed(static_cast<size_t>(trimmed.str - s.str));
-        s = trimmed;
-    }
-    _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>'));
-
-    _c4dbgpf("scanning block: specs=\"{}\"", s);
-
-    // parse the spec
-    BlockStyle_e newline = s.begins_with('>') ? BLOCK_FOLD : BLOCK_LITERAL;
-    BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used
-    size_t indentation = npos; // have to find out if no spec is given
-    csubstr digits;
-    if(s.len > 1)
-    {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with_any("|>"));
-        csubstr t = s.sub(1);
-        _c4dbgpf("scanning block: spec is multichar: '{}'", t);
-        _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 1);
-        size_t pos = t.first_of("-+");
-        _c4dbgpf("scanning block: spec chomp char at {}", pos);
-        if(pos != npos)
+        else if(first == '&')
         {
-            if(t[pos] == '-')
-                chomp = CHOMP_STRIP;
-            else if(t[pos] == '+')
-                chomp = CHOMP_KEEP;
-            if(pos == 0)
-                t = t.sub(1);
-            else
-                t = t.first(pos);
+            csubstr anchor = _scan_anchor();
+            _c4dbgpf("mapblck[QMRK]: key anchor! [{}]~~~{}~~~", anchor.len, anchor);
+            _add_annotation(&m_pending_anchors, anchor, startindent, startline);
         }
-        // from here to the end, only digits are considered
-        digits = t.left_of(t.first_not_of("0123456789"));
-        if( ! digits.empty())
+        else if(first == '!')
         {
-            if( ! c4::atou(digits, &indentation))
-                _c4err("parse error: could not read decimal");
-            _c4dbgpf("scanning block: indentation specified: {}. add {} from curr state -> {}", indentation, m_state->indref, indentation+m_state->indref);
-            indentation += m_state->indref;
+            csubstr tag = _scan_tag();
+            _c4dbgpf("mapblck[QMRK]: key tag! [{}]~~~{}~~~", tag.len, tag);
+            _add_annotation(&m_pending_tags, tag, startindent, startline);
         }
-    }
-
-    // finish the current line
-    _line_progressed(s.len);
-    _line_ended();
-    _scan_line();
-
-    _c4dbgpf("scanning block: style={}  chomp={}  indentation={}", newline==BLOCK_FOLD ? "fold" : "literal",
-        chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation);
-
-    // start with a zero-length block, already pointing at the right place
-    substr raw_block(m_buf.data() + m_state->pos.offset, size_t(0));// m_state->line_contents.full.sub(0, 0);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, raw_block.begin() == m_state->line_contents.full.begin());
-
-    // read every full line into a raw block,
-    // from which newlines are to be stripped as needed.
-    //
-    // If no explicit indentation was given, pick it from the first
-    // non-empty line. See
-    // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator
-    size_t num_lines = 0, first = m_state->pos.line, provisional_indentation = npos;
-    LineContents lc;
-    while(( ! _finished_file()))
-    {
-        // peek next line, but do not advance immediately
-        lc.reset_with_next_line(m_buf, m_state->pos.offset);
-        _c4dbgpf("scanning block: peeking at '{}'", lc.stripped);
-        // evaluate termination conditions
-        if(indentation != npos)
+        else if(first == '-')
         {
-            // stop when the line is deindented and not empty
-            if(lc.indentation < indentation && ( ! lc.rem.trim(" \t\r\n").empty()))
+            _c4dbgp("mapblck[QMRK]: maybe doc?");
+            csubstr rs = rem.sub(1);
+            if(rs == "--" || rs.begins_with("-- "))
             {
-                _c4dbgpf("scanning block: indentation decreased ref={} thisline={}", indentation, lc.indentation);
-                break;
+                _c4dbgp("mapblck[QMRK]: end+start doc");
+                _start_doc_suddenly();
+                _line_progressed(3);
             }
-            else if(indentation == 0)
+            else
             {
-                if((lc.rem == "..." || lc.rem.begins_with("... "))
-                    ||
-                   (lc.rem == "---" || lc.rem.begins_with("--- ")))
-                {
-                    _c4dbgp("scanning block: stop. indentation=0 and stream ended");
-                    break;
-                }
+                _c4dbgp("mapblck[QMRK]: start child seqblck (!)");
+                addrem_flags(RKCL, RKEY|QMRK);
+                m_evt_handler->begin_seq_key_block();
+                addrem_flags(RVAL|RSEQ, RMAP|RKCL|QMRK);
+                _set_indentation(startindent);
+                _line_progressed(1);
             }
+            _maybe_skip_whitespace_tokens();
+            goto mapblck_finish;
+        }
+        else if(first == '[')
+        {
+            _c4dbgp("mapblck[QMRK]: start child seqflow (!)");
+            addrem_flags(RKCL, RKEY|QMRK);
+            m_evt_handler->begin_seq_key_flow();
+            addrem_flags(RVAL|RSEQ|FLOW, RMAP|RKCL|QMRK|BLCK);
+            _set_indentation(m_evt_handler->m_parent->indref);
+            _line_progressed(1);
+            goto mapblck_finish;
+        }
+        else if(first == '{')
+        {
+            _c4dbgp("mapblck[QMRK]: start child mapblck (!)");
+            addrem_flags(RKCL, RKEY|QMRK);
+            m_evt_handler->begin_map_key_flow();
+            addrem_flags(RKEY|FLOW, RVAL|RKCL|QMRK|BLCK);
+            _set_indentation(m_evt_handler->m_parent->indref);
+            _line_progressed(1);
+            goto mapblck_finish;
+        }
+        else if(first == '?')
+        {
+            _c4dbgp("mapblck[QMRK]: another QMRK '?'");
+            m_evt_handler->set_key_scalar_plain({});
+            m_evt_handler->set_val_scalar_plain({});
+            m_evt_handler->add_sibling();
+            _line_progressed(1);
         }
-        else
+        else if(first == '.')
         {
-            _c4dbgpf("scanning block: indentation ref not set. firstnonws={}", lc.stripped.first_not_of(' '));
-            if(lc.stripped.first_not_of(' ') != npos) // non-empty line
+            _c4dbgp("mapblck[QMRK]: maybe end doc?");
+            csubstr rs = rem.sub(1);
+            if(rs == ".." || rs.begins_with(".. "))
             {
-                _c4dbgpf("scanning block: line not empty. indref={} indprov={} indentation={}", m_state->indref, provisional_indentation, lc.indentation);
-                if(provisional_indentation == npos)
-                {
-                    #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
-                    if(lc.indentation < m_state->indref)
-                    {
-                        _c4dbgpf("scanning block: block terminated indentation={} < indref={}", lc.indentation, m_state->indref);
-                        break;
-                    }
-                    else
-                    #endif
-                    if(lc.indentation == m_state->indref)
-                    {
-                        if(has_any(RSEQ|RMAP))
-                        {
-                            _c4dbgpf("scanning block: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_state->indref);
-                            break;
-                        }
-                    }
-                    _c4dbgpf("scanning block: set indentation ref from this line: ref={}", lc.indentation);
-                    indentation = lc.indentation;
-                }
-                else
-                {
-                    if(lc.indentation >= provisional_indentation)
-                    {
-                        _c4dbgpf("scanning block: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation);
-                        //indentation = provisional_indentation ? provisional_indentation : lc.indentation;
-                        indentation = lc.indentation;
-                    }
-                    else
-                    {
-                        break;
-                        //_c4err("parse error: first non-empty block line should have at least the original indentation");
-                    }
-                }
+                _c4dbgp("mapblck[QMRK]: end+start doc");
+                _end_doc_suddenly();
+                _line_progressed(3);
+                goto mapblck_finish;
             }
-            else // empty line
+            else
             {
-                _c4dbgpf("scanning block: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation);
-                if(provisional_indentation != npos)
-                {
-                    if(lc.stripped.len >= provisional_indentation)
-                    {
-                        _c4dbgpf("scanning block: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len);
-                        provisional_indentation = lc.stripped.len;
-                    }
-                    #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
-                    else if(lc.indentation >= provisional_indentation && lc.indentation != npos)
-                    {
-                        _c4dbgpf("scanning block: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation);
-                        provisional_indentation = lc.indentation;
-                    }
-                    #endif
-                }
-                else
-                {
-                    provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL);
-                    _c4dbgpf("scanning block: initialize provisional_ref={}", provisional_indentation);
-                    if(provisional_indentation == npos)
-                    {
-                        provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(RSEQ|RVAL);
-                        _c4dbgpf("scanning block: initialize provisional_ref={}", provisional_indentation);
-                    }
-                }
+                _c4err("parse error");
             }
         }
-        // advance now that we know the folded scalar continues
-        m_state->line_contents = lc;
-        _c4dbgpf("scanning block: append '{}'", m_state->line_contents.rem);
-        raw_block.len += m_state->line_contents.full.len;
-        _line_progressed(m_state->line_contents.rem.len);
-        _line_ended();
-        ++num_lines;
+        else
+        {
+            _c4err("parse error");
+        }
     }
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.line == (first + num_lines));
-    C4_UNUSED(num_lines);
-    C4_UNUSED(first);
 
-    if(indentation == npos)
+ mapblck_again:
+    _c4dbgt("mapblck: again", 0);
+    if(_finished_line())
     {
-        _c4dbgpf("scanning block: set indentation from provisional: {}", provisional_indentation);
-        indentation = provisional_indentation;
+        _line_ended();
+        _scan_line();
+        if(_finished_file())
+        {
+            _c4dbgp("mapblck: file finished!");
+            _end_map_blck();
+            goto mapblck_finish;
+        }
+        _c4dbgnextline();
     }
+    goto mapblck_start;
 
-    if(num_lines)
-        _line_ended_undo();
-
-    _c4dbgpf("scanning block: raw=~~~{}~~~", raw_block);
-
-    // ok! now we strip the newlines and spaces according to the specs
-    s = _filter_block_scalar(raw_block, newline, chomp, indentation);
-
-    _c4dbgpf("scanning block: final=~~~{}~~~", s);
-
-    return s;
+ mapblck_finish:
+    _c4dbgp("mapblck: finish");
 }
 
 
 //-----------------------------------------------------------------------------
 
-template<bool backslash_is_escape, bool keep_trailing_whitespace>
-bool Parser::_filter_nl(substr r, size_t *C4_RESTRICT i, size_t *C4_RESTRICT pos, size_t indentation)
+template<class EventHandler>
+void ParseEngine<EventHandler>::_handle_unk_json()
 {
-    // a debugging scaffold:
-    #if 0
-    #define _c4dbgfnl(fmt, ...) _c4dbgpf("filter_nl[{}]: " fmt, *i, __VA_ARGS__)
-    #else
-    #define _c4dbgfnl(...)
-    #endif
+    _c4dbgpf("handle_unk_json indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
 
-    const char curr = r[*i];
-    bool replaced = false;
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RTOP));
 
-    _RYML_CB_ASSERT(m_stack.m_callbacks, indentation != npos);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, curr == '\n');
+    _maybe_skip_comment();
+    csubstr rem = m_evt_handler->m_curr->line_contents.rem;
+    if(!rem.len)
+        return;
 
-    _c4dbgfnl("found newline. sofar=[{}]~~~{}~~~", *pos, m_filter_arena.first(*pos));
-    size_t ii = *i;
-    size_t numnl_following = count_following_newlines(r, &ii, indentation);
-    if(numnl_following)
+    size_t pos = rem.first_not_of(" \t");
+    if(pos)
+    {
+        pos = pos != npos ? pos : rem.len;
+        _c4dbgpf("skipping indentation of {}", pos);
+        _line_progressed(pos);
+        rem = m_evt_handler->m_curr->line_contents.rem;
+        if(!rem.len)
+            return;
+        _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
+    }
+
+    if(rem.begins_with('['))
     {
-        _c4dbgfnl("{} consecutive (empty) lines {} in the middle. totalws={}", 1+numnl_following, ii < r.len ? "in the middle" : "at the end", ii - *i);
-        for(size_t j = 0; j < numnl_following; ++j)
-            m_filter_arena.str[(*pos)++] = '\n';
+        _c4dbgp("it's a seq");
+        m_evt_handler->check_trailing_doc_token();
+        _maybe_begin_doc();
+        m_evt_handler->begin_seq_val_flow();
+        addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC);
+        _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
+        m_doc_empty = false;
+        _line_progressed(1);
+    }
+    else if(rem.begins_with('{'))
+    {
+        _c4dbgp("it's a map");
+        m_evt_handler->check_trailing_doc_token();
+        _maybe_begin_doc();
+        m_evt_handler->begin_map_val_flow();
+        addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
+        m_doc_empty = false;
+        _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
+        _line_progressed(1);
     }
     else
     {
-        if(r.first_not_of(" \t", *i+1) != npos)
-        {
-            m_filter_arena.str[(*pos)++] = ' ';
-            _c4dbgfnl("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, r.len, *pos, m_filter_arena.first(*pos));
-            replaced = true;
-        }
-        else
-        {
-            if C4_IF_CONSTEXPR (keep_trailing_whitespace)
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks,  ! has_any(SSCL));
+        _maybe_skip_whitespace_tokens();
+        csubstr s = m_evt_handler->m_curr->line_contents.rem;
+        if(!s.len)
+            return;
+        const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
+        const char first = s.str[0];
+        ScannedScalar sc;
+        if(first == '"')
+        {
+            _c4dbgp("runk_json: scanning double-quoted scalar");
+            m_evt_handler->check_trailing_doc_token();
+            _maybe_begin_doc();
+            add_flags(RDOC);
+            m_doc_empty = false;
+            sc = _scan_scalar_dquot();
+            csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
+            if(!_maybe_scan_following_colon())
             {
-                m_filter_arena.str[(*pos)++] = ' ';
-                _c4dbgfnl("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, r.len, *pos, m_filter_arena.first(*pos));
-                replaced = true;
+                _c4dbgp("runk_json: set as val");
+                _handle_annotations_before_blck_val_scalar();
+                m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
             }
             else
             {
-                _c4dbgfnl("last newline, everything else is whitespace. ii={}/{}", ii, r.len);
-                *i = r.len;
+                _c4err("parse error");
             }
         }
-        if C4_IF_CONSTEXPR (backslash_is_escape)
+        else if(_scan_scalar_plain_unk(&sc))
         {
-            if(ii < r.len && r.str[ii] == '\\')
+            _c4dbgp("runk_json: got a plain scalar");
+            m_evt_handler->check_trailing_doc_token();
+            _maybe_begin_doc();
+            add_flags(RDOC);
+            m_doc_empty = false;
+            if(!_maybe_scan_following_colon())
             {
-                const char next = ii+1 < r.len ? r.str[ii+1] : '\0';
-                if(next == ' ' || next == '\t')
-                {
-                    _c4dbgfnl("extend skip to backslash{}", "");
-                    ++ii;
-                }
+                _c4dbgp("runk_json: set as val");
+                _handle_annotations_before_blck_val_scalar();
+                csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
+                m_evt_handler->set_val_scalar_plain(maybe_filtered);
+            }
+            else
+            {
+                _c4err("parse error");
             }
         }
+        else
+        {
+            _c4err("parse error");
+        }
     }
-    *i = ii - 1; // correct for the loop increment
-
-    #undef _c4dbgfnl
-
-    return replaced;
 }
 
 
 //-----------------------------------------------------------------------------
 
-template<bool keep_trailing_whitespace>
-void Parser::_filter_ws(substr r, size_t *C4_RESTRICT i, size_t *C4_RESTRICT pos)
+template<class EventHandler>
+void ParseEngine<EventHandler>::_handle_unk()
 {
-    // a debugging scaffold:
-    #if 0
-    #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_nl[{}]: " fmt, *i, __VA_ARGS__)
-    #else
-    #define _c4dbgfws(...)
-    #endif
+    _c4dbgpf("handle_unk indref={} target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
+
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP));
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RTOP));
 
-    const char curr = r[*i];
-    _c4dbgfws("found whitespace '{}'", _c4prc(curr));
-    _RYML_CB_ASSERT(m_stack.m_callbacks, curr == ' ' || curr == '\t');
+    _maybe_skip_comment();
+    csubstr rem = m_evt_handler->m_curr->line_contents.rem;
+    if(!rem.len)
+        return;
+
+    size_t pos = rem.first_not_of(" \t");
+    if(pos)
+    {
+        pos = pos != npos ? pos : rem.len;
+        _c4dbgpf("skipping {} whitespace characters", pos);
+        _line_progressed(pos);
+        rem = m_evt_handler->m_curr->line_contents.rem;
+        if(!rem.len)
+            return;
+        _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
+    }
 
-    size_t first = *i > 0 ? r.first_not_of(" \t", *i) : r.first_not_of(' ', *i);
-    if(first != npos)
+    if(m_evt_handler->m_curr->line_contents.indentation == 0u && _at_line_begin())
     {
-        if(r[first] == '\n' || r[first] == '\r') // skip trailing whitespace
+        const char first = rem.str[0];
+        _c4dbgp("rtop: zero indent + at line begin");
+        if(first == '-')
         {
-            _c4dbgfws("whitespace is trailing on line. firstnonws='{}'@{}", _c4prc(r[first]), first);
-            *i = first - 1; // correct for the loop increment
+            _c4dbgp("rtop: suspecting doc");
+            if(_is_doc_begin_token(rem))
+            {
+                _c4dbgp("rtop: begin doc");
+                _maybe_end_doc();
+                _begin2_doc_expl();
+                _set_indentation(0);
+                addrem_flags(RDOC|RUNK, NDOC);
+                _line_progressed(3u);
+                _maybe_skip_whitespace_tokens();
+                return;
+            }
         }
-        else // a legit whitespace
+        else if(first == '.')
+        {
+            _c4dbgp("rtop: suspecting doc end");
+            if(_is_doc_end_token(rem))
+            {
+                _c4dbgp("rtop: end doc");
+                if(has_any(RDOC))
+                {
+                    _end2_doc_expl();
+                }
+                else
+                {
+                    _c4dbgp("rtop: ignore end doc");
+                }
+                addrem_flags(NDOC|RUNK, RDOC);
+                _line_progressed(3u);
+                _maybe_skip_whitespace_tokens();
+                return;
+            }
+        }
+        else if(first == '%')
         {
-            m_filter_arena.str[(*pos)++] = curr;
-            _c4dbgfws("legit whitespace. sofar=[{}]~~~{}~~~", *pos, m_filter_arena.first(*pos));
+            _c4dbgpf("directive: {}", rem);
+            if(C4_UNLIKELY(!m_doc_empty && has_none(NDOC)))
+                _RYML_CB_ERR(m_evt_handler->m_stack.m_callbacks, "need document footer before directives");
+            _handle_directive(rem);
+            return;
         }
     }
-    else
-    {
-        _c4dbgfws("... everything else is trailing whitespace{}", "");
-        if C4_IF_CONSTEXPR (keep_trailing_whitespace)
-            for(size_t j = *i; j < r.len; ++j)
-                m_filter_arena.str[(*pos)++] = r[j];
-        *i = r.len;
-    }
-
-    #undef _c4dbgfws
-}
-
-
-//-----------------------------------------------------------------------------
-csubstr Parser::_filter_plain_scalar(substr s, size_t indentation)
-{
-    // a debugging scaffold:
-    #if 0
-    #define _c4dbgfps(...) _c4dbgpf("filt_plain_scalar" __VA_ARGS__)
-    #else
-    #define _c4dbgfps(...)
-    #endif
 
-    _c4dbgfps("before=~~~{}~~~", s);
+    /* no else-if! */
+    char first = rem.str[0];
 
-    substr r = s.triml(" \t");
-    _grow_filter_arena(r.len);
-    size_t pos = 0; // the filtered size
-    bool filtered_chars = false;
-    for(size_t i = 0; i < r.len; ++i)
+    if(first == '[')
     {
-        const char curr = r.str[i];
-        _c4dbgfps("[{}]: '{}'", i, _c4prc(curr));
-        if(curr == ' ' || curr == '\t')
+        m_evt_handler->check_trailing_doc_token();
+        _maybe_begin_doc();
+        m_doc_empty = false;
+        const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
+        if(C4_LIKELY( ! _annotations_require_key_container()))
         {
-            _filter_ws</*keep_trailing_ws*/false>(r, &i, &pos);
+            _c4dbgp("it's a seq, flow");
+            _handle_annotations_before_blck_val_scalar();
+            m_evt_handler->begin_seq_val_flow();
+            addrem_flags(RSEQ|FLOW|RVAL, RUNK|RTOP|RDOC);
+            _set_indentation(startindent);
         }
-        else if(curr == '\n')
+        else
         {
-            filtered_chars = _filter_nl</*backslash_is_escape*/false, /*keep_trailing_ws*/false>(r, &i, &pos, indentation);
+            _c4dbgp("start new block map, set flow seq as key (!)");
+            _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
+            m_evt_handler->begin_map_val_block();
+            addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC);
+            _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
+            m_evt_handler->begin_seq_key_flow();
+            addrem_flags(RSEQ|FLOW|RVAL, RMAP|BLCK|RKCL);
+            _set_indentation(startindent);
         }
-        else if(curr == '\r')  // skip \r --- https://stackoverflow.com/questions/1885900
+        _line_progressed(1);
+    }
+    else if(first == '{')
+    {
+        m_evt_handler->check_trailing_doc_token();
+        _maybe_begin_doc();
+        m_doc_empty = false;
+        const size_t startindent = m_evt_handler->m_curr->line_contents.current_col(rem);
+        if(C4_LIKELY( ! _annotations_require_key_container()))
         {
-            ;
+            _c4dbgp("it's a map, flow");
+            _handle_annotations_before_blck_val_scalar();
+            m_evt_handler->begin_map_val_flow();
+            addrem_flags(RMAP|FLOW|RKEY, RVAL|RTOP|RUNK|RDOC);
+            _set_indentation(startindent);
         }
         else
         {
-            m_filter_arena.str[pos++] = r[i];
+            _c4dbgp("start new block map, set flow map as key (!)");
+            _handle_annotations_before_start_mapblck(m_evt_handler->m_curr->pos.line);
+            m_evt_handler->begin_map_val_block();
+            addrem_flags(RMAP|BLCK|RKCL, RUNK|RTOP|RDOC);
+            _handle_annotations_and_indentation_after_start_mapblck(startindent, m_evt_handler->m_curr->pos.line);
+            m_evt_handler->begin_map_key_flow();
+            addrem_flags(RMAP|FLOW|RKEY, BLCK|RKCL);
+            _set_indentation(startindent);
         }
+        _line_progressed(1);
     }
-
-    _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len);
-    if(pos < r.len || filtered_chars)
+    else if(first == '-' && _is_blck_token(rem))
     {
-        r = _finish_filter_arena(r, pos);
+        _c4dbgp("it's a seq, block");
+        m_evt_handler->check_trailing_doc_token();
+        _maybe_begin_doc();
+        _handle_annotations_before_blck_val_scalar();
+        m_evt_handler->begin_seq_val_block();
+        addrem_flags(RSEQ|BLCK|RVAL, RNXT|RTOP|RUNK|RDOC);
+        m_doc_empty = false;
+        _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
+        _line_progressed(1);
+        _maybe_skip_whitespace_tokens();
+    }
+    else if(first == '?' && _is_blck_token(rem))
+    {
+        _c4dbgp("it's a map + this key is complex");
+        m_evt_handler->check_trailing_doc_token();
+        _maybe_begin_doc();
+        _handle_annotations_before_blck_val_scalar();
+        m_evt_handler->begin_map_val_block();
+        addrem_flags(RMAP|BLCK|QMRK, RKEY|RVAL|RTOP|RUNK);
+        m_doc_empty = false;
+        m_was_inside_qmrk = true;
+        _save_indentation();
+        _line_progressed(1);
+        _maybe_skip_whitespace_tokens();
+    }
+    else if(first == ':' && _is_blck_token(rem))
+    {
+        if(m_doc_empty)
+        {
+            _c4dbgp("it's a map with an empty key");
+            m_evt_handler->check_trailing_doc_token();
+            _maybe_begin_doc();
+            _handle_annotations_before_blck_val_scalar();
+            m_evt_handler->begin_map_val_block();
+            m_evt_handler->set_key_scalar_plain({});
+            m_doc_empty = false;
+            _save_indentation();
+        }
+        else
+        {
+            _c4dbgp("actually prev val is a key!");
+            size_t prev_indentation = m_evt_handler->m_curr->indref;
+            m_evt_handler->actually_val_is_first_key_of_new_map_block();
+            _set_indentation(prev_indentation);
+        }
+        addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
+        _line_progressed(1);
+        _maybe_skip_whitespace_tokens();
+    }
+    else if(first == '&')
+    {
+        csubstr anchor = _scan_anchor();
+        _c4dbgpf("anchor! [{}]~~~{}~~~", anchor.len, anchor);
+        m_evt_handler->check_trailing_doc_token();
+        _maybe_begin_doc();
+        const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
+        const size_t line = m_evt_handler->m_curr->pos.line;
+        _add_annotation(&m_pending_anchors, anchor, indentation, line);
+        _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
+        m_doc_empty = false;
+    }
+    else if(first == '*')
+    {
+        csubstr ref = _scan_ref_map();
+        _c4dbgpf("ref! [{}]~~~{}~~~", ref.len, ref);
+        m_evt_handler->check_trailing_doc_token();
+        _maybe_begin_doc();
+        m_doc_empty = false;
+        if(!_maybe_scan_following_colon())
+        {
+            _c4dbgp("runk: set val ref");
+            _handle_annotations_before_blck_val_scalar();
+            m_evt_handler->set_val_ref(ref);
+        }
+        else
+        {
+            _c4dbgp("runk: start new block map, set ref as key");
+            const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
+            const size_t startline = m_evt_handler->m_curr->pos.line; // save
+            _handle_annotations_before_start_mapblck(startline);
+            m_evt_handler->begin_map_val_block();
+            _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+            m_evt_handler->set_key_ref(ref);
+            _maybe_skip_whitespace_tokens();
+            _set_indentation(startindent);
+            addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
+        }
+    }
+    else if(first == '!')
+    {
+        csubstr tag = _scan_tag();
+        _c4dbgpf("unk: val tag! [{}]~~~{}~~~", tag.len, tag);
+        // we need to buffer the tags, as there may be two
+        // consecutive tags in here
+        const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
+        const size_t line = m_evt_handler->m_curr->pos.line;
+        _add_annotation(&m_pending_tags, tag, indentation, line);
+    }
+    else
+    {
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks,  ! has_any(SSCL));
+        _maybe_skip_whitespace_tokens();
+        csubstr s = m_evt_handler->m_curr->line_contents.rem;
+        if(!s.len)
+            return;
+        const size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
+        const size_t startline = m_evt_handler->m_curr->pos.line; // save
+        first = s.str[0];
+        ScannedScalar sc;
+        if(first == '\'')
+        {
+            _c4dbgp("runk: scanning single-quoted scalar");
+            m_evt_handler->check_trailing_doc_token();
+            _maybe_begin_doc();
+            add_flags(RDOC);
+            m_doc_empty = false;
+            sc = _scan_scalar_squot();
+            if(!_maybe_scan_following_colon())
+            {
+                _c4dbgp("runk: set as val");
+                _handle_annotations_before_blck_val_scalar();
+                csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
+                m_evt_handler->set_val_scalar_squoted(maybe_filtered);
+            }
+            else
+            {
+                _c4dbgp("runk: start new block map, set scalar as key");
+                _handle_annotations_before_start_mapblck(startline);
+                m_evt_handler->begin_map_val_block();
+                _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
+                m_evt_handler->set_key_scalar_squoted(maybe_filtered);
+                _maybe_skip_whitespace_tokens();
+                _set_indentation(startindent);
+                addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
+            }
+        }
+        else if(first == '"')
+        {
+            _c4dbgp("runk: scanning double-quoted scalar");
+            m_evt_handler->check_trailing_doc_token();
+            _maybe_begin_doc();
+            add_flags(RDOC);
+            m_doc_empty = false;
+            sc = _scan_scalar_dquot();
+            if(!_maybe_scan_following_colon())
+            {
+                _c4dbgp("runk: set as val");
+                _handle_annotations_before_blck_val_scalar();
+                csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
+                m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
+            }
+            else
+            {
+                _c4dbgp("runk: start new block map, set double-quoted scalar as key");
+                _handle_annotations_before_start_mapblck(startline);
+                m_evt_handler->begin_map_val_block();
+                _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
+                m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
+                _maybe_skip_whitespace_tokens();
+                _set_indentation(startindent);
+                addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
+            }
+        }
+        else if(first == '|')
+        {
+            _c4dbgp("runk: scanning block-literal scalar");
+            m_evt_handler->check_trailing_doc_token();
+            _maybe_begin_doc();
+            add_flags(RDOC);
+            m_doc_empty = false;
+            ScannedBlock sb;
+            _scan_block(&sb, startindent);
+            if(C4_LIKELY(!_maybe_scan_following_colon()))
+            {
+                _c4dbgp("runk: set as val");
+                _handle_annotations_before_blck_val_scalar();
+                csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
+                m_evt_handler->set_val_scalar_literal(maybe_filtered);
+            }
+            else
+            {
+                _c4err("block literal keys must be enclosed in '?'");
+            }
+        }
+        else if(first == '>')
+        {
+            _c4dbgp("runk: scanning block-folded scalar");
+            m_evt_handler->check_trailing_doc_token();
+            _maybe_begin_doc();
+            add_flags(RDOC);
+            m_doc_empty = false;
+            ScannedBlock sb;
+            _scan_block(&sb, startindent);
+            if(C4_LIKELY(!_maybe_scan_following_colon()))
+            {
+                _c4dbgp("runk: set as val");
+                _handle_annotations_before_blck_val_scalar();
+                csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
+                m_evt_handler->set_val_scalar_folded(maybe_filtered);
+            }
+            else
+            {
+                _c4err("block folded keys must be enclosed in '?'");
+            }
+        }
+        else if(_scan_scalar_plain_unk(&sc))
+        {
+            _c4dbgp("runk: got a plain scalar");
+            m_evt_handler->check_trailing_doc_token();
+            _maybe_begin_doc();
+            add_flags(RDOC);
+            m_doc_empty = false;
+            if(!_maybe_scan_following_colon())
+            {
+                _c4dbgp("runk: set as val");
+                _handle_annotations_before_blck_val_scalar();
+                csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
+                m_evt_handler->set_val_scalar_plain(maybe_filtered);
+            }
+            else
+            {
+                _c4dbgp("runk: start new block map, set scalar as key");
+                _handle_annotations_before_start_mapblck(startline);
+                m_evt_handler->begin_map_val_block();
+                _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
+                m_evt_handler->set_key_scalar_plain(maybe_filtered);
+                _maybe_skip_whitespace_tokens();
+                _set_indentation(startindent);
+                addrem_flags(RMAP|BLCK|RVAL, RTOP|RUNK|RDOC);
+            }
+        }
     }
-
-    _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len);
-    _c4dbgfps("#filteredchars={} after=~~~{}~~~", s.len - r.len, r);
-
-    #undef _c4dbgfps
-    return r;
 }
 
 
 //-----------------------------------------------------------------------------
-csubstr Parser::_filter_squot_scalar(substr s)
+
+template<class EventHandler>
+C4_COLD void ParseEngine<EventHandler>::_handle_usty()
 {
-    // a debugging scaffold:
-    #if 0
-    #define _c4dbgfsq(...) _c4dbgpf("filt_squo_scalar")
-    #else
-    #define _c4dbgfsq(...)
+    _c4dbgpf("handle_usty target={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->node_id);
+
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_none(BLCK|FLOW));
+
+    #ifdef RYML_NO_COVERAGE__TO_BE_DELETED
+    if(has_any(RNXT))
+    {
+        _c4dbgp("usty[RNXT]: finishing!");
+        _end_stream();
+    }
     #endif
 
-    // from the YAML spec for double-quoted scalars:
-    // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted
+    _maybe_skip_comment();
+    csubstr rem = m_evt_handler->m_curr->line_contents.rem;
+    if(!rem.len)
+        return;
 
-    _c4dbgfsq(": before=~~~{}~~~", s);
+    size_t pos = rem.first_not_of(" \t");
+    if(pos)
+    {
+        pos = pos != npos ? pos : rem.len;
+        _c4dbgpf("skipping indentation of {}", pos);
+        _line_progressed(pos);
+        rem = m_evt_handler->m_curr->line_contents.rem;
+        if(!rem.len)
+            return;
+        _c4dbgpf("rem is now [{}]~~~{}~~~", rem.len, rem);
+    }
 
-    _grow_filter_arena(s.len);
-    substr r = s;
-    size_t pos = 0; // the filtered size
-    bool filtered_chars = false;
-    for(size_t i = 0; i < r.len; ++i)
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, rem.len > 0);
+    size_t startindent = m_evt_handler->m_curr->line_contents.indentation; // save
+    char first = rem.str[0];
+    if(has_any(RSEQ)) // destination is a sequence
     {
-        const char curr = r[i];
-        _c4dbgfsq("[{}]: '{}'", i, _c4prc(curr));
-        if(curr == ' ' || curr == '\t')
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks,  ! has_any(RMAP));
+        _c4dbgpf("usty[RSEQ]: first='{}'", _c4prc(first));
+        if(first == '[')
+        {
+            _c4dbgp("usty[RSEQ]: it's a flow seq. merging it");
+            add_flags(RNXT);
+            m_evt_handler->_push();
+            addrem_flags(FLOW|RVAL, RNXT|USTY);
+            _set_indentation(startindent);
+            _line_progressed(1);
+            _maybe_skip_whitespace_tokens();
+        }
+        else if(first == '-' && _is_blck_token(rem))
         {
-            _filter_ws</*keep_trailing_ws*/true>(r, &i, &pos);
+            _c4dbgp("usty[RSEQ]: it's a block seq. merging it");
+            add_flags(RNXT);
+            m_evt_handler->_push();
+            addrem_flags(BLCK|RVAL, RNXT|USTY);
+            _set_indentation(startindent);
+            _line_progressed(1);
+            _maybe_skip_whitespace_tokens();
         }
-        else if(curr == '\n')
+        else
         {
-            filtered_chars = _filter_nl</*backslash_is_escape*/false, /*keep_trailing_ws*/true>(r, &i, &pos, /*indentation*/0);
+            _c4err("can only parse a seq into an existing seq");
         }
-        else if(curr == '\r')  // skip \r --- https://stackoverflow.com/questions/1885900
+    }
+    else if(has_any(RMAP)) // destination is a map
+    {
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks,  ! has_any(RSEQ));
+        _c4dbgpf("usty[RMAP]: first='{}'", _c4prc(first));
+        if(first == '{')
         {
-            ;
+            _c4dbgp("usty[RMAP]: it's a flow map. merging it");
+            add_flags(RNXT);
+            _handle_annotations_before_blck_val_scalar();
+            m_evt_handler->_push();
+            addrem_flags(RMAP|FLOW|RKEY, RNXT|USTY);
+            _set_indentation(startindent);
+            _line_progressed(1);
+            _maybe_skip_whitespace_tokens();
         }
-        else if(curr == '\'')
+        else if(first == '?' && _is_blck_token(rem))
         {
-            char next = i+1 < r.len ? r[i+1] : '\0';
-            if(next == '\'')
-            {
-                _c4dbgfsq("[{}]: two consecutive quotes", i);
-                filtered_chars = true;
-                m_filter_arena.str[pos++] = '\'';
-                ++i;
-            }
+            _c4dbgp("usty[RMAP]: it's a block map + this key is complex");
+            add_flags(RNXT);
+            _handle_annotations_before_blck_val_scalar();
+            m_evt_handler->_push();
+            addrem_flags(RMAP|BLCK|QMRK, RNXT|USTY);
+            m_was_inside_qmrk = true;
+            _save_indentation();
+            _line_progressed(1);
+            _maybe_skip_whitespace_tokens();
         }
-        else
+        else if(first == ':' && _is_blck_token(rem))
+        {
+            _c4dbgp("usty[RMAP]: it's a map with an empty key");
+            add_flags(RNXT);
+            _handle_annotations_before_blck_val_scalar();
+            m_evt_handler->_push();
+            m_evt_handler->set_key_scalar_plain({});
+            addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
+            _save_indentation();
+            _line_progressed(1);
+            _maybe_skip_whitespace_tokens();
+        }
+        else if(rem.begins_with('&'))
         {
-            m_filter_arena.str[pos++] = curr;
+            csubstr anchor = _scan_anchor();
+            _c4dbgpf("usty[RMAP]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
+            const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
+            const size_t line = m_evt_handler->m_curr->pos.line;
+            _add_annotation(&m_pending_anchors, anchor, indentation, line);
+            _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
         }
-    }
-
-    _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len);
-    if(pos < r.len || filtered_chars)
-    {
-        r = _finish_filter_arena(r, pos);
-    }
-
-    _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len);
-    _c4dbgpf(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r);
-
-    #undef _c4dbgfsq
-    return r;
-}
-
-
-//-----------------------------------------------------------------------------
-csubstr Parser::_filter_dquot_scalar(substr s)
-{
-    // a debugging scaffold:
-    #if 0
-    #define _c4dbgfdq(...) _c4dbgpf("filt_dquo_scalar" __VA_ARGS__)
-    #else
-    #define _c4dbgfdq(...)
-    #endif
-
-    _c4dbgfdq(": before=~~~{}~~~", s);
-
-    // from the YAML spec for double-quoted scalars:
-    // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted
-    //
-    // All leading and trailing white space characters are excluded
-    // from the content. Each continuation line must therefore contain
-    // at least one non-space character. Empty lines, if any, are
-    // consumed as part of the line folding.
-
-    _grow_filter_arena(s.len + 2u * s.count('\\'));
-    substr r = s;
-    size_t pos = 0; // the filtered size
-    bool filtered_chars = false;
-    for(size_t i = 0; i < r.len; ++i)
-    {
-        const char curr = r[i];
-        _c4dbgfdq("[{}]: '{}'", i, _c4prc(curr));
-        if(curr == ' ' || curr == '\t')
+        else if(first == '*')
         {
-            _filter_ws</*keep_trailing_ws*/true>(r, &i, &pos);
+            csubstr ref = _scan_ref_map();
+            _c4dbgpf("usty[RMAP]: ref! [{}]~~~{}~~~", ref.len, ref);
+            if(!_maybe_scan_following_colon())
+            {
+                _c4err("cannot read a VAL to a map");
+            }
+            else
+            {
+                _c4dbgp("usty[RMAP]: start new block map, set ref as key");
+                const size_t startline = m_evt_handler->m_curr->pos.line; // save
+                add_flags(RNXT);
+                _handle_annotations_before_start_mapblck(startline);
+                m_evt_handler->_push();
+                _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                m_evt_handler->set_key_ref(ref);
+                _maybe_skip_whitespace_tokens();
+                _set_indentation(startindent);
+                addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
+            }
         }
-        else if(curr == '\n')
+        else if(first == '!')
         {
-            filtered_chars = _filter_nl</*backslash_is_escape*/true, /*keep_trailing_ws*/true>(r, &i, &pos, /*indentation*/0);
+            csubstr tag = _scan_tag();
+            _c4dbgpf("usty[RMAP]: val tag! [{}]~~~{}~~~", tag.len, tag);
+            // we need to buffer the tags, as there may be two
+            // consecutive tags in here
+            const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
+            const size_t line = m_evt_handler->m_curr->pos.line;
+            _add_annotation(&m_pending_tags, tag, indentation, line);
         }
-        else if(curr == '\r')  // skip \r --- https://stackoverflow.com/questions/1885900
+        else if(first == '[' || (first == '-' && _is_blck_token(rem)))
         {
-            ;
+            _c4err("cannot parse a seq into an existing map");
         }
-        else if(curr == '\\')
+        else
         {
-            char next = i+1 < r.len ? r[i+1] : '\0';
-            _c4dbgfdq("[{}]: backslash, next='{}'", i, _c4prc(next));
-            filtered_chars = true;
-            if(next == '\r')
+            _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks,  ! has_any(SSCL));
+            startindent = m_evt_handler->m_curr->line_contents.indentation; // save
+            const size_t startline = m_evt_handler->m_curr->pos.line; // save
+            ScannedScalar sc;
+            _c4dbgpf("usty[RMAP]: maybe scalar. first='{}'", _c4prc(first));
+            if(first == '\'')
             {
-                if(i+2 < r.len && r[i+2] == '\n')
+                _c4dbgp("usty[RMAP]: scanning single-quoted scalar");
+                sc = _scan_scalar_squot();
+                if(!_maybe_scan_following_colon())
                 {
-                    ++i; // newline escaped with \ -- skip both (add only one as i is loop-incremented)
-                    next = '\n';
-                    _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", i);
+                    _c4err("cannot read a VAL to a map");
                 }
-            }
-            // remember the loop will also increment i
-            if(next == '\n')
-            {
-                size_t ii = i + 2;
-                for( ; ii < r.len; ++ii)
+                else
                 {
-                    if(r.str[ii] == ' ' || r.str[ii] == '\t')  // skip leading whitespace
-                        ;
-                    else
-                        break;
+                    _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
+                    add_flags(RNXT);
+                    _handle_annotations_before_start_mapblck(startline);
+                    m_evt_handler->_push();
+                    _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                    csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
+                    m_evt_handler->set_key_scalar_squoted(maybe_filtered);
+                    _set_indentation(startindent);
+                    addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
+                    _maybe_skip_whitespace_tokens();
                 }
-                i += ii - i - 1;
-            }
-            else if(next == '"' || next == '/'  || next == ' ' || next == '\t') // escapes for json compatibility
-            {
-                m_filter_arena.str[pos++] = next;
-                ++i;
-            }
-            else if(next == '\r')
-            {
-                //++i;
             }
-            else if(next == 'n')
+            else if(first == '"')
             {
-                m_filter_arena.str[pos++] = '\n';
-                ++i;
+                _c4dbgp("usty[RMAP]: scanning double-quoted scalar");
+                sc = _scan_scalar_dquot();
+                if(!_maybe_scan_following_colon())
+                {
+                    _c4err("cannot read a VAL to a map");
+                }
+                else
+                {
+                    _c4dbgp("usty[RMAP]: start new block map, set double-quoted scalar as key");
+                    add_flags(RNXT);
+                    _handle_annotations_before_start_mapblck(startline);
+                    m_evt_handler->_push();
+                    _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                    csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
+                    m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
+                    _set_indentation(startindent);
+                    addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
+                    _maybe_skip_whitespace_tokens();
+                }
             }
-            else if(next == 'r')
+            else if(first == '|')
             {
-                m_filter_arena.str[pos++] = '\r';
-                ++i; // skip
+                _c4err("block literal keys must be enclosed in '?'");
             }
-            else if(next == 't')
+            else if(first == '>')
             {
-                m_filter_arena.str[pos++] = '\t';
-                ++i;
+                _c4err("block literal keys must be enclosed in '?'");
             }
-            else if(next == '\\')
+            else if(_scan_scalar_plain_unk(&sc))
             {
-                m_filter_arena.str[pos++] = '\\';
-                ++i;
-            }
-            else if(next == 'x') // UTF8
-            {
-                if(i + 1u + 2u >= r.len)
-                    _c4err("\\x requires 2 hex digits");
-                uint8_t byteval = {};
-                if(!read_hex(r.sub(i + 2u, 2u), &byteval))
-                    _c4err("failed to read \\x codepoint");
-                m_filter_arena.str[pos++] = *(char*)&byteval;
-                i += 1u + 2u;
-            }
-            else if(next == 'u') // UTF16
-            {
-                if(i + 1u + 4u >= r.len)
-                    _c4err("\\u requires 4 hex digits");
-                char readbuf[8];
-                csubstr codepoint = r.sub(i + 2u, 4u);
-                uint32_t codepoint_val = {};
-                if(!read_hex(codepoint, &codepoint_val))
-                    _c4err("failed to parse \\u codepoint");
-                size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
-                C4_ASSERT(numbytes <= 4);
-                memcpy(m_filter_arena.str + pos, readbuf, numbytes);
-                pos += numbytes;
-                i += 1u + 4u;
-            }
-            else if(next == 'U') // UTF32
-            {
-                if(i + 1u + 8u >= r.len)
-                    _c4err("\\U requires 8 hex digits");
-                char readbuf[8];
-                csubstr codepoint = r.sub(i + 2u, 8u);
-                uint32_t codepoint_val = {};
-                if(!read_hex(codepoint, &codepoint_val))
-                    _c4err("failed to parse \\U codepoint");
-                size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val);
-                C4_ASSERT(numbytes <= 4);
-                memcpy(m_filter_arena.str + pos, readbuf, numbytes);
-                pos += numbytes;
-                i += 1u + 8u;
-            }
-            // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char
-            else if(next == '0')
-            {
-                m_filter_arena.str[pos++] = '\0';
-                ++i;
+                _c4dbgp("usty[RMAP]: got a plain scalar");
+                if(!_maybe_scan_following_colon())
+                {
+                    _c4err("cannot read a VAL to a map");
+                }
+                else
+                {
+                    _c4dbgp("usty[RMAP]: start new block map, set scalar as key");
+                    add_flags(RNXT);
+                    _handle_annotations_before_start_mapblck(startline);
+                    m_evt_handler->_push();
+                    _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                    csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
+                    m_evt_handler->set_key_scalar_plain(maybe_filtered);
+                    _set_indentation(startindent);
+                    addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
+                    _maybe_skip_whitespace_tokens();
+                }
             }
-            else if(next == 'b') // backspace
+            else
             {
-                m_filter_arena.str[pos++] = '\b';
-                ++i;
+                _c4err("parse error");
             }
-            else if(next == 'f') // form feed
+        }
+    }
+    else // destination is unknown
+    {
+        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks,  ! has_any(RSEQ));
+        _c4dbgpf("usty[UNK]: first='{}'", _c4prc(first));
+        if(first == '[')
+        {
+            _c4dbgp("usty[UNK]: it's a flow seq");
+            add_flags(RNXT);
+            _handle_annotations_before_blck_val_scalar();
+            m_evt_handler->begin_seq_val_flow();
+            addrem_flags(RSEQ|FLOW|RVAL, RNXT|USTY);
+            _set_indentation(startindent);
+            _line_progressed(1);
+            _maybe_skip_whitespace_tokens();
+        }
+        else if(first == '-' && _is_blck_token(rem))
+        {
+            _c4dbgp("usty[UNK]: it's a block seq");
+            add_flags(RNXT);
+            _handle_annotations_before_blck_val_scalar();
+            m_evt_handler->begin_seq_val_block();
+            addrem_flags(RSEQ|BLCK|RVAL, RNXT|USTY);
+            _set_indentation(startindent);
+            _line_progressed(1);
+            _maybe_skip_whitespace_tokens();
+        }
+        else if(first == '{')
+        {
+            _c4dbgp("usty[UNK]: it's a flow map");
+            add_flags(RNXT);
+            _handle_annotations_before_blck_val_scalar();
+            m_evt_handler->begin_map_val_flow();
+            addrem_flags(RMAP|FLOW|RKEY, RNXT|USTY);
+            _set_indentation(startindent);
+            _line_progressed(1);
+            _maybe_skip_whitespace_tokens();
+        }
+        else if(first == '?' && _is_blck_token(rem))
+        {
+            _c4dbgp("usty[UNK]: it's a map + this key is complex");
+            add_flags(RNXT);
+            _handle_annotations_before_blck_val_scalar();
+            m_evt_handler->begin_map_val_block();
+            addrem_flags(RMAP|BLCK|QMRK, RNXT|USTY);
+            m_was_inside_qmrk = true;
+            _save_indentation();
+            _line_progressed(1);
+            _maybe_skip_whitespace_tokens();
+        }
+        else if(first == ':' && _is_blck_token(rem))
+        {
+            _c4dbgp("usty[UNK]: it's a map with an empty key");
+            add_flags(RNXT);
+            _handle_annotations_before_blck_val_scalar();
+            m_evt_handler->begin_map_val_block();
+            m_evt_handler->set_key_scalar_plain({});
+            addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
+            _save_indentation();
+            _line_progressed(1);
+            _maybe_skip_whitespace_tokens();
+        }
+        else if(first == '&')
+        {
+            csubstr anchor = _scan_anchor();
+            _c4dbgpf("usty[UNK]: anchor! [{}]~~~{}~~~", anchor.len, anchor);
+            const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
+            const size_t line = m_evt_handler->m_curr->pos.line;
+            _add_annotation(&m_pending_anchors, anchor, indentation, line);
+            _set_indentation(m_evt_handler->m_curr->line_contents.current_col(rem));
+        }
+        else if(first == '*')
+        {
+            csubstr ref = _scan_ref_map();
+            _c4dbgpf("usty[UNK]: ref! [{}]~~~{}~~~", ref.len, ref);
+            if(!_maybe_scan_following_colon())
             {
-                m_filter_arena.str[pos++] = '\f';
-                ++i;
+                _c4dbgp("usty[UNK]: set val ref");
+                _handle_annotations_before_blck_val_scalar();
+                m_evt_handler->set_val_ref(ref);
             }
-            else if(next == 'a') // bell character
+            else
             {
-                m_filter_arena.str[pos++] = '\a';
-                ++i;
+                _c4dbgp("usty[UNK]: start new block map, set ref as key");
+                const size_t startline = m_evt_handler->m_curr->pos.line; // save
+                add_flags(RNXT);
+                _handle_annotations_before_start_mapblck(startline);
+                m_evt_handler->begin_map_val_block();
+                _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                m_evt_handler->set_key_ref(ref);
+                _maybe_skip_whitespace_tokens();
+                _set_indentation(startindent);
+                addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
             }
-            else if(next == 'v') // vertical tab
+        }
+        else if(first == '!')
+        {
+            csubstr tag = _scan_tag();
+            _c4dbgpf("usty[UNK]: val tag! [{}]~~~{}~~~", tag.len, tag);
+            // we need to buffer the tags, as there may be two
+            // consecutive tags in here
+            const size_t indentation = m_evt_handler->m_curr->line_contents.current_col(rem);
+            const size_t line = m_evt_handler->m_curr->pos.line;
+            _add_annotation(&m_pending_tags, tag, indentation, line);
+        }
+        else
+        {
+            _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks,  ! has_any(SSCL));
+            startindent = m_evt_handler->m_curr->line_contents.indentation; // save
+            const size_t startline = m_evt_handler->m_curr->pos.line; // save
+            first = rem.str[0];
+            ScannedScalar sc;
+            _c4dbgpf("usty[UNK]: maybe scalar. first='{}'", _c4prc(first));
+            if(first == '\'')
             {
-                m_filter_arena.str[pos++] = '\v';
-                ++i;
+                _c4dbgp("usty[UNK]: scanning single-quoted scalar");
+                sc = _scan_scalar_squot();
+                if(!_maybe_scan_following_colon())
+                {
+                    _c4dbgp("usty[UNK]: set as val");
+                    _handle_annotations_before_blck_val_scalar();
+                    csubstr maybe_filtered = _maybe_filter_val_scalar_squot(sc);
+                    m_evt_handler->set_val_scalar_squoted(maybe_filtered);
+                    _end_stream();
+                }
+                else
+                {
+                    _c4dbgp("usty[UNK]: start new block map, set scalar as key");
+                    add_flags(RNXT);
+                    _handle_annotations_before_start_mapblck(startline);
+                    m_evt_handler->begin_map_val_block();
+                    _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                    csubstr maybe_filtered = _maybe_filter_key_scalar_squot(sc);
+                    m_evt_handler->set_key_scalar_squoted(maybe_filtered);
+                    _set_indentation(startindent);
+                    addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
+                    _maybe_skip_whitespace_tokens();
+                }
             }
-            else if(next == 'e') // escape character
+            else if(first == '"')
             {
-                m_filter_arena.str[pos++] = '\x1b';
-                ++i;
+                _c4dbgp("usty[UNK]: scanning double-quoted scalar");
+                sc = _scan_scalar_dquot();
+                if(!_maybe_scan_following_colon())
+                {
+                    _c4dbgp("usty[UNK]: set as val");
+                    _handle_annotations_before_blck_val_scalar();
+                    csubstr maybe_filtered = _maybe_filter_val_scalar_dquot(sc);
+                    m_evt_handler->set_val_scalar_dquoted(maybe_filtered);
+                    _end_stream();
+                }
+                else
+                {
+                    _c4dbgp("usty[UNK]: start new block map, set double-quoted scalar as key");
+                    add_flags(RNXT);
+                    _handle_annotations_before_start_mapblck(startline);
+                    m_evt_handler->begin_map_val_block();
+                    _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                    csubstr maybe_filtered = _maybe_filter_key_scalar_dquot(sc);
+                    m_evt_handler->set_key_scalar_dquoted(maybe_filtered);
+                    _set_indentation(startindent);
+                    addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
+                    _maybe_skip_whitespace_tokens();
+                }
             }
-            else if(next == '_') // unicode non breaking space \u00a0
+            else if(first == '|')
             {
-                // https://www.compart.com/en/unicode/U+00a0
-                m_filter_arena.str[pos++] = _RYML_CHCONST(-0x3e, 0xc2);
-                m_filter_arena.str[pos++] = _RYML_CHCONST(-0x60, 0xa0);
-                ++i;
+                _c4dbgp("usty[UNK]: scanning block-literal scalar");
+                ScannedBlock sb;
+                _scan_block(&sb, startindent);
+                _c4dbgp("usty[UNK]: set as val");
+                _handle_annotations_before_blck_val_scalar();
+                csubstr maybe_filtered = _maybe_filter_val_scalar_literal(sb);
+                m_evt_handler->set_val_scalar_literal(maybe_filtered);
+                _end_stream();
             }
-            else if(next == 'N') // unicode next line \u0085
+            else if(first == '>')
             {
-                // https://www.compart.com/en/unicode/U+0085
-                m_filter_arena.str[pos++] = _RYML_CHCONST(-0x3e, 0xc2);
-                m_filter_arena.str[pos++] = _RYML_CHCONST(-0x7b, 0x85);
-                ++i;
+                _c4dbgp("usty[UNK]: scanning block-folded scalar");
+                ScannedBlock sb;
+                _scan_block(&sb, startindent);
+                _c4dbgp("usty[UNK]: set as val");
+                _handle_annotations_before_blck_val_scalar();
+                csubstr maybe_filtered = _maybe_filter_val_scalar_folded(sb);
+                m_evt_handler->set_val_scalar_folded(maybe_filtered);
+                _end_stream();
             }
-            else if(next == 'L') // unicode line separator \u2028
+            else if(_scan_scalar_plain_unk(&sc))
             {
-                // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
-                m_filter_arena.str[pos++] = _RYML_CHCONST(-0x1e, 0xe2);
-                m_filter_arena.str[pos++] = _RYML_CHCONST(-0x80, 0x80);
-                m_filter_arena.str[pos++] = _RYML_CHCONST(-0x58, 0xa8);
-                ++i;
+                _c4dbgp("usty[UNK]: got a plain scalar");
+                if(!_maybe_scan_following_colon())
+                {
+                    _c4dbgp("usty[UNK]: set as val");
+                    _handle_annotations_before_blck_val_scalar();
+                    csubstr maybe_filtered = _maybe_filter_val_scalar_plain(sc, startindent);
+                    m_evt_handler->set_val_scalar_plain(maybe_filtered);
+                    _end_stream();
+                }
+                else
+                {
+                    _c4dbgp("usty[UNK]: start new block map, set scalar as key");
+                    add_flags(RNXT);
+                    _handle_annotations_before_start_mapblck(startline);
+                    m_evt_handler->begin_map_val_block();
+                    _handle_annotations_and_indentation_after_start_mapblck(startindent, startline);
+                    csubstr maybe_filtered = _maybe_filter_key_scalar_plain(sc, startindent);
+                    m_evt_handler->set_key_scalar_plain(maybe_filtered);
+                    _set_indentation(startindent);
+                    addrem_flags(RMAP|BLCK|RVAL, RNXT|USTY);
+                    _maybe_skip_whitespace_tokens();
+                }
             }
-            else if(next == 'P') // unicode paragraph separator \u2029
+            else
             {
-                // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex
-                m_filter_arena.str[pos++] = _RYML_CHCONST(-0x1e, 0xe2);
-                m_filter_arena.str[pos++] = _RYML_CHCONST(-0x80, 0x80);
-                m_filter_arena.str[pos++] = _RYML_CHCONST(-0x57, 0xa9);
-                ++i;
+                _c4err("parse error");
             }
-            _c4dbgfdq("[{}]: backslash...sofar=[{}]~~~{}~~~", i, pos, m_filter_arena.first(pos));
-        }
-        else
-        {
-            m_filter_arena.str[pos++] = curr;
         }
     }
-
-    _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len);
-    if(pos < r.len || filtered_chars)
-    {
-        r = _finish_filter_arena(r, pos);
-    }
-
-    _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len);
-    _c4dbgpf(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r);
-
-    #undef _c4dbgfdq
-
-    return r;
 }
 
 
 //-----------------------------------------------------------------------------
-bool Parser::_apply_chomp(substr buf, size_t *C4_RESTRICT pos, BlockChomp_e chomp)
+
+template<class EventHandler>
+void ParseEngine<EventHandler>::parse_json_in_place_ev(csubstr filename, substr src)
 {
-    substr trimmed = buf.first(*pos).trimr('\n');
-    bool added_newline = false;
-    switch(chomp)
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
+    m_file = filename;
+    m_buf = src;
+    _reset();
+    m_evt_handler->start_parse(filename.str, &_s_relocate_arena, this);
+    m_evt_handler->begin_stream();
+    while( ! _finished_file())
     {
-    case CHOMP_KEEP:
-        if(trimmed.len == *pos)
-        {
-            _c4dbgpf("chomp=KEEP: add missing newline @{}", *pos);
-            //m_filter_arena.str[(*pos)++] = '\n';
-            added_newline = true;
-        }
-        break;
-    case CHOMP_CLIP:
-        if(trimmed.len == *pos)
-        {
-            _c4dbgpf("chomp=CLIP: add missing newline @{}", *pos);
-            m_filter_arena.str[(*pos)++] = '\n';
-            added_newline = true;
-        }
-        else
+        _scan_line();
+        while( ! _finished_line())
         {
-            _c4dbgpf("chomp=CLIP: include single trailing newline @{}", trimmed.len+1);
-            *pos = trimmed.len + 1;
+            _c4dbgnextline();
+            _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks,  ! m_evt_handler->m_curr->line_contents.rem.empty());
+            if(has_any(RSEQ))
+            {
+                _handle_seq_json();
+            }
+            else if(has_any(RMAP))
+            {
+                _handle_map_json();
+            }
+            else if(has_any(RUNK))
+            {
+                _handle_unk_json();
+            }
+            else
+            {
+                _c4err("internal error");
+            }
         }
-        break;
-    case CHOMP_STRIP:
-        _c4dbgpf("chomp=STRIP: strip {}-{}-{} newlines", *pos, trimmed.len, *pos-trimmed.len);
-        *pos = trimmed.len;
-        break;
-    default:
-        _c4err("unknown chomp style");
+        if(_finished_file())
+            break; // it may have finished because of multiline blocks
+        _line_ended();
     }
-    return added_newline;
+    _end_stream();
+    m_evt_handler->finish_parse();
 }
 
 
 //-----------------------------------------------------------------------------
-csubstr Parser::_filter_block_scalar(substr s, BlockStyle_e style, BlockChomp_e chomp, size_t indentation)
-{
-    // a debugging scaffold:
-    #if 0
-    #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block" fmt, __VA_ARGS__)
-    #else
-    #define _c4dbgfbl(...)
-    #endif
-
-    _c4dbgfbl(": indentation={} before=[{}]~~~{}~~~", indentation, s.len, s);
-
-    if(chomp != CHOMP_KEEP && s.trim(" \n\r\t").len == 0u)
-    {
-        _c4dbgp("filt_block: empty scalar");
-        return s.first(0);
-    }
 
-    substr r = s;
-
-    switch(style)
+template<class EventHandler>
+void ParseEngine<EventHandler>::parse_in_place_ev(csubstr filename, substr src)
+{
+    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, m_evt_handler->m_stack.size() >= 1);
+    m_file = filename;
+    m_buf = src;
+    _reset();
+    m_evt_handler->start_parse(filename.str, &_s_relocate_arena, this);
+    m_evt_handler->begin_stream();
+    while( ! _finished_file())
     {
-    case BLOCK_LITERAL:
+        _scan_line();
+        while( ! _finished_line())
         {
-            _c4dbgp("filt_block: style=literal");
-            // trim leading whitespace up to indentation
+            _c4dbgnextline();
+            _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks,  ! m_evt_handler->m_curr->line_contents.rem.empty());
+            if(has_any(FLOW))
             {
-                size_t numws = r.first_not_of(' ');
-                if(numws != npos)
-                {
-                    if(numws > indentation)
-                        r = r.sub(indentation);
-                    else
-                        r = r.sub(numws);
-                    _c4dbgfbl(": after triml=[{}]~~~{}~~~", r.len, r);
-                }
-                else
+                if(has_none(RSEQIMAP))
                 {
-                    if(chomp != CHOMP_KEEP || r.len == 0)
+                    if(has_any(RSEQ))
                     {
-                        _c4dbgfbl(": all spaces {}, return empty", r.len);
-                        return r.first(0);
+                        _handle_seq_flow();
                     }
                     else
                     {
-                        r[0] = '\n';
-                        return r.first(1);
+                        _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
+                        _handle_map_flow();
                     }
                 }
-            }
-            _grow_filter_arena(s.len + 2u);  // use s.len! because we may need to add a newline at the end, so the leading indentation will allow space for that newline
-            size_t pos = 0; // the filtered size
-            for(size_t i = 0; i < r.len; ++i)
-            {
-                const char curr = r.str[i];
-                _c4dbgfbl("[{}]='{}'  pos={}", i, _c4prc(curr), pos);
-                if(curr == '\r')
-                    continue;
-                m_filter_arena.str[pos++] = curr;
-                if(curr == '\n')
+                else
                 {
-                    _c4dbgfbl("[{}]: found newline", i);
-                    // skip indentation on the next line
-                    csubstr rem = r.sub(i+1);
-                    size_t first = rem.first_not_of(' ');
-                    if(first != npos)
-                    {
-                        _RYML_CB_ASSERT(m_stack.m_callbacks, first < rem.len);
-                        _RYML_CB_ASSERT(m_stack.m_callbacks, i+1+first < r.len);
-                        _c4dbgfbl("[{}]: {} spaces follow before next nonws character @ [{}]='{}'", i, first, i+1+first, rem.str[first]);
-                        if(first < indentation)
-                        {
-                            _c4dbgfbl("[{}]: skip {}<{} spaces from indentation", i, first, indentation);
-                            i += first;
-                        }
-                        else
-                        {
-                            _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation);
-                            i += indentation;
-                        }
-                    }
-                    else
-                    {
-                        _RYML_CB_ASSERT(m_stack.m_callbacks, i+1 <= r.len);
-                        first = rem.len;
-                        _c4dbgfbl("[{}]: {} spaces to the end", i, first);
-                        if(first)
-                        {
-                            if(first < indentation)
-                            {
-                                _c4dbgfbl("[{}]: skip everything", i);
-                                --pos;
-                                break;
-                            }
-                            else
-                            {
-                                _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation);
-                                i += indentation;
-                            }
-                        }
-                        else if(i+1 == r.len)
-                        {
-                            if(chomp == CHOMP_STRIP)
-                                --pos;
-                            break;
-                        }
-                    }
+                    _handle_seq_imap();
                 }
             }
-            _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= pos);
-            _c4dbgfbl(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r);
-            bool changed = _apply_chomp(m_filter_arena, &pos, chomp);
-            _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len);
-            _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= s.len);
-            if(pos < r.len || changed)
-            {
-                r = _finish_filter_arena(s, pos); // write into s
-            }
-            break;
-        }
-    case BLOCK_FOLD:
-        {
-            _c4dbgp("filt_block: style=fold");
-            _grow_filter_arena(r.len + 2);
-            size_t pos = 0; // the filtered size
-            bool filtered_chars = false;
-            bool started = false;
-            bool is_indented = false;
-            size_t i = r.first_not_of(' ');
-            _c4dbgfbl(": first non space at {}", i);
-            if(i > indentation)
-            {
-                is_indented = true;
-                i = indentation;
-            }
-            _c4dbgfbl(": start folding at {}, is_indented={}", i, (int)is_indented);
-            auto on_change_indentation = [&](size_t numnl_following, size_t last_newl, size_t first_non_whitespace){
-                _c4dbgfbl("[{}]: add 1+{} newlines", i, numnl_following);
-                for(size_t j = 0; j < 1 + numnl_following; ++j)
-                    m_filter_arena.str[pos++] = '\n';
-                for(i = last_newl + 1 + indentation; i < first_non_whitespace; ++i)
-                {
-                    if(r.str[i] == '\r')
-                        continue;
-                    _c4dbgfbl("[{}]: add '{}'", i, _c4prc(r.str[i]));
-                    m_filter_arena.str[pos++] = r.str[i];
-                }
-                --i;
-            };
-            for( ; i < r.len; ++i)
+            else if(has_any(BLCK))
             {
-                const char curr = r.str[i];
-                _c4dbgfbl("[{}]='{}'", i, _c4prc(curr));
-                if(curr == '\n')
+                if(has_any(RSEQ))
                 {
-                    filtered_chars = true;
-                    // skip indentation on the next line, and advance over the next non-indented blank lines as well
-                    size_t first_non_whitespace;
-                    size_t numnl_following = (size_t)-1;
-                    while(r[i] == '\n')
-                    {
-                        ++numnl_following;
-                        csubstr rem = r.sub(i+1);
-                        size_t first = rem.first_not_of(' ');
-                        _c4dbgfbl("[{}]: found newline. first={} rem.len={}", i, first, rem.len);
-                        if(first != npos)
-                        {
-                            first_non_whitespace = first + i+1;
-                            while(first_non_whitespace < r.len && r[first_non_whitespace] == '\r')
-                                ++first_non_whitespace;
-                            _RYML_CB_ASSERT(m_stack.m_callbacks, first < rem.len);
-                            _RYML_CB_ASSERT(m_stack.m_callbacks, i+1+first < r.len);
-                            _c4dbgfbl("[{}]: {} spaces follow before next nonws character @ [{}]='{}'", i, first, i+1+first, _c4prc(rem.str[first]));
-                            if(first < indentation)
-                            {
-                                _c4dbgfbl("[{}]: skip {}<{} spaces from indentation", i, first, indentation);
-                                i += first;
-                            }
-                            else
-                            {
-                                _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation);
-                                i += indentation;
-                                if(first > indentation)
-                                {
-                                    _c4dbgfbl("[{}]: {} further indented than {}, stop newlining", i, first, indentation);
-                                    goto finished_counting_newlines;
-                                }
-                            }
-                            // prepare the next while loop iteration
-                            // by setting i at the next newline after
-                            // an empty line
-                            if(r[first_non_whitespace] == '\n')
-                                i = first_non_whitespace;
-                            else
-                                goto finished_counting_newlines;
-                        }
-                        else
-                        {
-                            _RYML_CB_ASSERT(m_stack.m_callbacks, i+1 <= r.len);
-                            first = rem.len;
-                            first_non_whitespace = first + i+1;
-                            if(first)
-                            {
-                                _c4dbgfbl("[{}]: {} spaces to the end", i, first);
-                                if(first < indentation)
-                                {
-                                    _c4dbgfbl("[{}]: skip everything", i);
-                                    i += first;
-                                }
-                                else
-                                {
-                                    _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation);
-                                    i += indentation;
-                                    if(first > indentation)
-                                    {
-                                        _c4dbgfbl("[{}]: {} spaces missing. not done yet", i, indentation - first);
-                                        goto finished_counting_newlines;
-                                    }
-                                }
-                            }
-                            else // if(i+1 == r.len)
-                            {
-                                _c4dbgfbl("[{}]: it's the final newline", i);
-                                _RYML_CB_ASSERT(m_stack.m_callbacks, i+1 == r.len);
-                                _RYML_CB_ASSERT(m_stack.m_callbacks, rem.len == 0);
-                            }
-                            goto end_of_scalar;
-                        }
-                    }
-                end_of_scalar:
-                    // Write all the trailing newlines. Since we're
-                    // at the end no folding is needed, so write every
-                    // newline (add 1).
-                    _c4dbgfbl("[{}]: add {} trailing newlines", i, 1+numnl_following);
-                    for(size_t j = 0; j < 1 + numnl_following; ++j)
-                        m_filter_arena.str[pos++] = '\n';
-                    break;
-                finished_counting_newlines:
-                    _c4dbgfbl("[{}]: #newlines={} firstnonws={}", i, numnl_following, first_non_whitespace);
-                    while(first_non_whitespace < r.len && r[first_non_whitespace] == '\t')
-                        ++first_non_whitespace;
-                    _c4dbgfbl("[{}]: #newlines={} firstnonws={}", i, numnl_following, first_non_whitespace);
-                    _RYML_CB_ASSERT(m_stack.m_callbacks, first_non_whitespace <= r.len);
-                    size_t last_newl = r.last_of('\n', first_non_whitespace);
-                    size_t this_indentation = first_non_whitespace - last_newl - 1;
-                    _c4dbgfbl("[{}]: #newlines={} firstnonws={} lastnewl={} this_indentation={} vs indentation={}", i, numnl_following, first_non_whitespace, last_newl, this_indentation, indentation);
-                    _RYML_CB_ASSERT(m_stack.m_callbacks, first_non_whitespace >= last_newl + 1);
-                    _RYML_CB_ASSERT(m_stack.m_callbacks, this_indentation >= indentation);
-                    if(!started)
-                    {
-                        _c4dbgfbl("[{}]: #newlines={}. write all leading newlines", i, numnl_following);
-                        for(size_t j = 0; j < 1 + numnl_following; ++j)
-                            m_filter_arena.str[pos++] = '\n';
-                        if(this_indentation > indentation)
-                        {
-                            is_indented = true;
-                            _c4dbgfbl("[{}]: advance ->{}", i, last_newl + indentation);
-                            i = last_newl + indentation;
-                        }
-                        else
-                        {
-                            i = first_non_whitespace - 1;
-                            _c4dbgfbl("[{}]: advance ->{}", i, first_non_whitespace);
-                        }
-                    }
-                    else if(this_indentation == indentation)
-                    {
-                        _c4dbgfbl("[{}]: same indentation", i);
-                        if(!is_indented)
-                        {
-                            if(numnl_following == 0)
-                            {
-                                _c4dbgfbl("[{}]: fold!", i);
-                                m_filter_arena.str[pos++] = ' ';
-                            }
-                            else
-                            {
-                                _c4dbgfbl("[{}]: add {} newlines", i, 1 + numnl_following);
-                                for(size_t j = 0; j < numnl_following; ++j)
-                                    m_filter_arena.str[pos++] = '\n';
-                            }
-                            i = first_non_whitespace - 1;
-                            _c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace);
-                        }
-                        else
-                        {
-                            _c4dbgfbl("[{}]: back to ref indentation", i);
-                            is_indented = false;
-                            on_change_indentation(numnl_following, last_newl, first_non_whitespace);
-                            _c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace);
-                        }
-                    }
-                    else
-                    {
-                        _c4dbgfbl("[{}]: increased indentation.", i);
-                        is_indented = true;
-                        _RYML_CB_ASSERT(m_stack.m_callbacks, this_indentation > indentation);
-                        on_change_indentation(numnl_following, last_newl, first_non_whitespace);
-                        _c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace);
-                    }
+                    _handle_seq_block();
                 }
-                else if(curr != '\r')
+                else
                 {
-                    if(curr != '\t')
-                        started = true;
-                    m_filter_arena.str[pos++] = curr;
+                    _RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_all(RMAP));
+                    _handle_map_block();
                 }
             }
-            _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len);
-            _c4dbgfbl(": #filteredchars={} after=[{}]~~~{}~~~", (int)s.len - (int)pos, pos, m_filter_arena.first(pos));
-            bool changed = _apply_chomp(m_filter_arena, &pos, chomp);
-            if(pos < r.len || filtered_chars || changed)
+            else if(has_any(RUNK))
+            {
+                _handle_unk();
+            }
+            else if(has_any(USTY))
             {
-                r = _finish_filter_arena(s, pos); // write into s
+                _handle_usty();
+            }
+            else
+            {
+                _c4err("internal error");
             }
         }
-        break;
-    default:
-        _c4err("unknown block style");
+        if(_finished_file())
+            break; // it may have finished because of multiline blocks
+        _line_ended();
     }
-
-    _c4dbgfbl(": final=[{}]~~~{}~~~", r.len, r);
-
-    #undef _c4dbgfbl
-
-    return r;
+    _end_stream();
+    m_evt_handler->finish_parse();
 }
 
-//-----------------------------------------------------------------------------
-size_t Parser::_count_nlines(csubstr src)
-{
-    return 1 + src.count('\n');
-}
+} // namespace yml
+} // namespace c4
 
-//-----------------------------------------------------------------------------
-void Parser::_handle_directive(csubstr directive_)
-{
-    csubstr directive = directive_;
-    if(directive.begins_with("%TAG"))
-    {
-        TagDirective td;
-        _c4dbgpf("%TAG directive: {}", directive_);
-        directive = directive.sub(4);
-        if(!directive.begins_with(' '))
-            _c4err("malformed tag directive: {}", directive_);
-        directive = directive.triml(' ');
-        size_t pos = directive.find(' ');
-        if(pos == npos)
-            _c4err("malformed tag directive: {}", directive_);
-        td.handle = directive.first(pos);
-        directive = directive.sub(td.handle.len).triml(' ');
-        pos = directive.find(' ');
-        if(pos != npos)
-            directive = directive.first(pos);
-        td.prefix = directive;
-        td.next_node_id = m_tree->size();
-        if(m_tree->size() > 0)
-        {
-            size_t prev = m_tree->size() - 1;
-            if(m_tree->is_root(prev) && m_tree->type(prev) != NOTYPE && !m_tree->is_stream(prev))
-                ++td.next_node_id;
-        }
-        _c4dbgpf("%TAG: handle={} prefix={} next_node={}", td.handle, td.prefix, td.next_node_id);
-        m_tree->add_tag_directive(td);
-    }
-    else if(directive.begins_with("%YAML"))
-    {
-        _c4dbgpf("%YAML directive! ignoring...: {}", directive);
-    }
-}
+#undef _c4dbgnextline
 
-//-----------------------------------------------------------------------------
-void Parser::set_flags(flag_t f, State * s)
-{
-#ifdef RYML_DBG
-    char buf1_[64], buf2_[64];
-    csubstr buf1 = _prfl(buf1_, f);
-    csubstr buf2 = _prfl(buf2_, s->flags);
-    _c4dbgpf("state[{}]: setting flags to {}: before={}", s-m_stack.begin(), buf1, buf2);
+#if defined(_MSC_VER)
+#   pragma warning(pop)
+#elif defined(__clang__)
+#   pragma clang diagnostic pop
+#elif defined(__GNUC__)
+#   pragma GCC diagnostic pop
 #endif
-    s->flags = f;
-}
 
-void Parser::add_flags(flag_t on, State * s)
-{
-#ifdef RYML_DBG
-    char buf1_[64], buf2_[64], buf3_[64];
-    csubstr buf1 = _prfl(buf1_, on);
-    csubstr buf2 = _prfl(buf2_, s->flags);
-    csubstr buf3 = _prfl(buf3_, s->flags|on);
-    _c4dbgpf("state[{}]: adding flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3);
-#endif
-    s->flags |= on;
-}
+#endif // _C4_YML_PARSE_ENGINE_DEF_HPP_
 
-void Parser::addrem_flags(flag_t on, flag_t off, State * s)
-{
-#ifdef RYML_DBG
-    char buf1_[64], buf2_[64], buf3_[64], buf4_[64];
-    csubstr buf1 = _prfl(buf1_, on);
-    csubstr buf2 = _prfl(buf2_, off);
-    csubstr buf3 = _prfl(buf3_, s->flags);
-    csubstr buf4 = _prfl(buf4_, ((s->flags|on)&(~off)));
-    _c4dbgpf("state[{}]: adding flags {} / removing flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3, buf4);
-#endif
-    s->flags |= on;
-    s->flags &= ~off;
-}
 
-void Parser::rem_flags(flag_t off, State * s)
-{
-#ifdef RYML_DBG
-    char buf1_[64], buf2_[64], buf3_[64];
-    csubstr buf1 = _prfl(buf1_, off);
-    csubstr buf2 = _prfl(buf2_, s->flags);
-    csubstr buf3 = _prfl(buf3_, s->flags&(~off));
-    _c4dbgpf("state[{}]: removing flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3);
-#endif
-    s->flags &= ~off;
-}
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/parse_engine.def.hpp)
 
-//-----------------------------------------------------------------------------
 
-csubstr Parser::_prfl(substr buf, flag_t flags)
-{
-    size_t pos = 0;
-    bool gotone = false;
 
-    #define _prflag(fl)                                     \
-    if((flags & fl) == (fl))                                \
-    {                                                       \
-        if(gotone)                                          \
-        {                                                   \
-            if(pos + 1 < buf.len)                           \
-                buf[pos] = '|';                             \
-            ++pos;                                          \
-        }                                                   \
-        csubstr fltxt = #fl;                                \
-        if(pos + fltxt.len <= buf.len)                      \
-            memcpy(buf.str + pos, fltxt.str, fltxt.len);    \
-        pos += fltxt.len;                                   \
-        gotone = true;                                      \
-    }
+//********************************************************************************
+//--------------------------------------------------------------------------------
+// src/c4/yml/reference_resolver.cpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/reference_resolver.cpp
+//--------------------------------------------------------------------------------
+//********************************************************************************
 
-    _prflag(RTOP);
-    _prflag(RUNK);
-    _prflag(RMAP);
-    _prflag(RSEQ);
-    _prflag(FLOW);
-    _prflag(QMRK);
-    _prflag(RKEY);
-    _prflag(RVAL);
-    _prflag(RNXT);
-    _prflag(SSCL);
-    _prflag(QSCL);
-    _prflag(RSET);
-    _prflag(NDOC);
-    _prflag(RSEQIMAP);
+#ifdef RYML_SINGLE_HDR_DEFINE_NOW
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/reference_resolver.hpp
+//#include "c4/yml/reference_resolver.hpp"
+#if !defined(C4_YML_REFERENCE_RESOLVER_HPP_) && !defined(_C4_YML_REFERENCE_RESOLVER_HPP_)
+#error "amalgamate: file c4/yml/reference_resolver.hpp must have been included at this point"
+#endif /* C4_YML_REFERENCE_RESOLVER_HPP_ */
 
-    #undef _prflag
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/dump.hpp
+//#include "c4/dump.hpp" // this is needed to resolve a function in the next header
+#if !defined(C4_DUMP_HPP_) && !defined(_C4_DUMP_HPP_)
+#error "amalgamate: file c4/dump.hpp must have been included at this point"
+#endif /* C4_DUMP_HPP_ */
 
-    RYML_ASSERT(pos <= buf.len);
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp
+//#include "c4/yml/common.hpp"
+#if !defined(C4_YML_COMMON_HPP_) && !defined(_C4_YML_COMMON_HPP_)
+#error "amalgamate: file c4/yml/common.hpp must have been included at this point"
+#endif /* C4_YML_COMMON_HPP_ */
 
-    return buf.first(pos);
-}
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp
+//#include "c4/yml/detail/parser_dbg.hpp"
+#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_)
+#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point"
+#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */
+
+#ifdef RYML_DBG
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/print.hpp
+//#include "c4/yml/detail/print.hpp"
+#if !defined(C4_YML_DETAIL_PRINT_HPP_) && !defined(_C4_YML_DETAIL_PRINT_HPP_)
+#error "amalgamate: file c4/yml/detail/print.hpp must have been included at this point"
+#endif /* C4_YML_DETAIL_PRINT_HPP_ */
 
+#else
+#define _c4dbg_tree(...)
+#define _c4dbg_node(...)
+#endif
 
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
+namespace c4 {
+namespace yml {
 
-void Parser::_grow_filter_arena(size_t num_characters_needed)
+id_type ReferenceResolver::count_anchors_and_refs_(id_type n)
 {
-    _c4dbgpf("grow: arena={} numchars={}", m_filter_arena.len, num_characters_needed);
-    if(num_characters_needed <= m_filter_arena.len)
-        return;
-    size_t sz = m_filter_arena.len << 1;
-    _c4dbgpf("grow: sz={}", sz);
-    sz = num_characters_needed > sz ? num_characters_needed : sz;
-    _c4dbgpf("grow: sz={}", sz);
-    sz = sz < 128u ? 128u : sz;
-    _c4dbgpf("grow: sz={}", sz);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, sz >= num_characters_needed);
-    _resize_filter_arena(sz);
+    id_type c = 0;
+    c += m_tree->has_key_anchor(n);
+    c += m_tree->has_val_anchor(n);
+    c += m_tree->is_key_ref(n);
+    c += m_tree->is_val_ref(n);
+    c += m_tree->has_key(n) && m_tree->key(n) == "<<";
+    for(id_type ch = m_tree->first_child(n); ch != NONE; ch = m_tree->next_sibling(ch))
+        c += count_anchors_and_refs_(ch);
+    return c;
 }
 
-void Parser::_resize_filter_arena(size_t num_characters)
+void ReferenceResolver::gather_anchors_and_refs__(id_type n)
 {
-    if(num_characters > m_filter_arena.len)
+    // insert key refs BEFORE inserting val refs
+    if(m_tree->has_key(n))
     {
-        _c4dbgpf("resize: sz={}", num_characters);
-        char *prev = m_filter_arena.str;
-        if(m_filter_arena.str)
+        if(m_tree->key(n) == "<<")
+        {
+            _c4dbgpf("node[{}]: key is <<", n);
+            if(m_tree->has_val(n))
+            {
+                if(m_tree->is_val_ref(n))
+                {
+                    _c4dbgpf("node[{}]: val ref, inheriting!", n);
+                    m_refs.push({VALREF, n, NONE, NONE, NONE, NONE});
+                    //m_refs.push({KEYREF, n, NONE, NONE, NONE, NONE});
+                }
+                else
+                {
+                    _c4dbgpf("node[{}]: not ref!", n);
+                }
+            }
+            else if(m_tree->is_seq(n))
+            {
+                // for merging multiple inheritance targets
+                //   <<: [ *CENTER, *BIG ]
+                _c4dbgpf("node[{}]: is seq!", n);
+                for(id_type ich = m_tree->first_child(n); ich != NONE; ich = m_tree->next_sibling(ich))
+                {
+                    _c4dbgpf("node[{}]: val ref, inheriting multiple: {}", n, ich);
+                    if(m_tree->is_container(ich))
+                    {
+                        detail::_report_err(m_tree->m_callbacks, "ERROR: node {} child {}: refs for << cannot be containers.'", n, ich);
+                        C4_UNREACHABLE_AFTER_ERR();
+                    }
+                    m_refs.push({VALREF, ich, NONE, NONE, n, m_tree->next_sibling(n)});
+                }
+                return; // don't descend into the seq
+            }
+            else
+            {
+                detail::_report_err(m_tree->m_callbacks, "ERROR: node {}: refs for << must be either val or seq", n);
+                C4_UNREACHABLE_AFTER_ERR();
+            }
+        }
+        else if(m_tree->is_key_ref(n))
         {
-            _RYML_CB_ASSERT(m_stack.m_callbacks, m_filter_arena.len > 0);
-            _RYML_CB_FREE(m_stack.m_callbacks, m_filter_arena.str, char, m_filter_arena.len);
+            _c4dbgpf("node[{}]: key ref: '{}'", n, m_tree->key_ref(n));
+            _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->key(n) != "<<");
+            _RYML_CB_CHECK(m_tree->m_callbacks, (!m_tree->has_key(n)) || m_tree->key(n).ends_with(m_tree->key_ref(n)));
+            m_refs.push({KEYREF, n, NONE, NONE, NONE, NONE});
         }
-        m_filter_arena.str = _RYML_CB_ALLOC_HINT(m_stack.m_callbacks, char, num_characters, prev);
-        m_filter_arena.len = num_characters;
     }
+    // val ref
+    if(m_tree->is_val_ref(n) && (!m_tree->has_key(n) || m_tree->key(n) != "<<"))
+    {
+        _c4dbgpf("node[{}]: val ref: '{}'", n, m_tree->val_ref(n));
+        RYML_CHECK((!m_tree->has_val(n)) || m_tree->val(n).ends_with(m_tree->val_ref(n)));
+        m_refs.push({VALREF, n, NONE, NONE, NONE, NONE});
+    }
+    // anchors
+    if(m_tree->has_key_anchor(n))
+    {
+        _c4dbgpf("node[{}]: key anchor: '{}'", n, m_tree->key_anchor(n));
+        RYML_CHECK(m_tree->has_key(n));
+        m_refs.push({KEYANCH, n, NONE, NONE, NONE, NONE});
+    }
+    if(m_tree->has_val_anchor(n))
+    {
+        _c4dbgpf("node[{}]: val anchor: '{}'", n, m_tree->val_anchor(n));
+        RYML_CHECK(m_tree->has_val(n) || m_tree->is_container(n));
+        m_refs.push({VALANCH, n, NONE, NONE, NONE, NONE});
+    }
+    // recurse
+    for(id_type ch = m_tree->first_child(n); ch != NONE; ch = m_tree->next_sibling(ch))
+        gather_anchors_and_refs__(ch);
 }
 
-substr Parser::_finish_filter_arena(substr dst, size_t pos)
+void ReferenceResolver::gather_anchors_and_refs_()
 {
-    _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= dst.len);
-    memcpy(dst.str, m_filter_arena.str, pos);
-    return dst.first(pos);
-}
+    _c4dbgp("gathering anchors and refs...");
 
+    // minimize (re-)allocations by counting first
+    id_type num_anchors_and_refs = count_anchors_and_refs_(m_tree->root_id());
+    if(!num_anchors_and_refs)
+        return;
+    m_refs.reserve(num_anchors_and_refs);
+    m_refs.clear();
 
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
-//-----------------------------------------------------------------------------
+    // now descend through the hierarchy
+    gather_anchors_and_refs__(m_tree->root_id());
 
-csubstr Parser::location_contents(Location const& loc) const
-{
-    _RYML_CB_ASSERT(m_stack.m_callbacks, loc.offset < m_buf.len);
-    return m_buf.sub(loc.offset);
-}
+    _c4dbgpf("found {} anchors/refs", m_refs.size());
 
-Location Parser::location(NodeRef node) const
-{
-    _RYML_CB_ASSERT(m_stack.m_callbacks, node.valid());
-    return location(*node.tree(), node.id());
+    // finally connect the reference list
+    id_type prev_anchor = NONE;
+    id_type count = 0;
+    for(auto &rd : m_refs)
+    {
+        rd.prev_anchor = prev_anchor;
+        if(rd.type.has_anchor())
+            prev_anchor = count;
+        ++count;
+    }
+    _c4dbgp("gathering anchors and refs: finished");
 }
 
-Location Parser::location(Tree const& tree, size_t node) const
+id_type ReferenceResolver::lookup_(RefData *C4_RESTRICT ra)
 {
-    _RYML_CB_CHECK(m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str);
-    _RYML_CB_CHECK(m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len);
-    if(tree.has_key(node))
+    RYML_ASSERT(ra->type.is_key_ref() || ra->type.is_val_ref());
+    RYML_ASSERT(ra->type.is_key_ref() != ra->type.is_val_ref());
+    csubstr refname;
+    if(ra->type.is_val_ref())
     {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, tree.key(node).is_sub(m_buf));
-        _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(tree.key(node)));
-        return val_location(tree.key(node).str);
+        refname = m_tree->val_ref(ra->node);
     }
-    else if(tree.has_val(node))
+    else
     {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, tree.val(node).is_sub(m_buf));
-        _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(tree.val(node)));
-        return val_location(tree.val(node).str);
+        RYML_ASSERT(ra->type.is_key_ref());
+        refname = m_tree->key_ref(ra->node);
     }
-    else if(tree.is_container(node))
+    while(ra->prev_anchor != NONE)
     {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, !tree.has_key(node));
-        if(!tree.is_stream(node))
-        {
-            const char *node_start = tree._p(node)->m_val.scalar.str;  // this was stored in the container
-            if(tree.has_children(node))
-            {
-                size_t child = tree.first_child(node);
-                if(tree.has_key(child))
-                {
-                    // when a map starts, the container was set after the key
-                    csubstr k = tree.key(child);
-                    if(node_start > k.str)
-                        node_start = k.str;
-                }
-            }
-            return val_location(node_start);
-        }
-        else // it's a stream
-        {
-            return val_location(m_buf.str); // just return the front of the buffer
-        }
+        ra = &m_refs[ra->prev_anchor];
+        if(m_tree->has_anchor(ra->node, refname))
+            return ra->node;
     }
-    _RYML_CB_ASSERT(m_stack.m_callbacks, tree.type(node) == NOTYPE);
-    return val_location(m_buf.str);
+    detail::_report_err(m_tree->m_callbacks, "ERROR: anchor not found: '{}'", refname);
+    C4_UNREACHABLE_AFTER_ERR();
 }
 
-Location Parser::val_location(const char *val) const
+void ReferenceResolver::reset_(Tree *t_)
 {
-    if(_locations_dirty())
-        _prepare_locations();
-    csubstr src = m_buf;
-    _RYML_CB_CHECK(m_stack.m_callbacks, src.str == m_newline_offsets_buf.str);
-    _RYML_CB_CHECK(m_stack.m_callbacks, src.len == m_newline_offsets_buf.len);
-    _RYML_CB_CHECK(m_stack.m_callbacks, val >= src.begin() && val <= src.end());
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets != nullptr);
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size > 0);
-    using linetype = size_t const* C4_RESTRICT;
-    linetype line = nullptr;
-    size_t offset = (size_t)(val - src.begin());
-    if(m_newline_offsets_size < 30)
+    if(t_->callbacks() != m_refs.m_callbacks)
     {
-        // do a linear search if the size is small.
-        for(linetype curr = m_newline_offsets; curr < m_newline_offsets + m_newline_offsets_size; ++curr)
-        {
-            if(*curr > offset)
-            {
-                line = curr;
-                break;
-            }
-        }
+        m_refs.m_callbacks = t_->callbacks();
     }
-    else
+    m_refs.clear();
+    m_tree = t_;
+}
+
+void ReferenceResolver::resolve(Tree *t_)
+{
+    _c4dbgp("resolving references...");
+
+    reset_(t_);
+
+    _c4dbg_tree("unresolved tree", *m_tree);
+
+    gather_anchors_and_refs_();
+    if(m_refs.empty())
+        return;
+
+    /* from the specs: "an alias node refers to the most recent
+     * node in the serialization having the specified anchor". So
+     * we need to start looking upward from ref nodes.
+     *
+     * @see http://yaml.org/spec/1.2/spec.html#id2765878 */
+    _c4dbgp("matching anchors/refs...");
+    for(id_type i = 0, e = m_refs.size(); i < e; ++i)
     {
-        // Do a bisection search if the size is not small.
-        //
-        // We could use std::lower_bound but this is simple enough and
-        // spares the include of <algorithm>.
-        size_t count = m_newline_offsets_size;
-        size_t step;
-        linetype it;
-        line = m_newline_offsets;
-        while(count)
+        RefData &C4_RESTRICT refdata = m_refs.top(i);
+        if( ! refdata.type.is_ref())
+            continue;
+        refdata.target = lookup_(&refdata);
+    }
+    _c4dbgp("matching anchors/refs: finished");
+
+    // insert the resolved references
+    _c4dbgp("modifying tree...");
+    id_type prev_parent_ref = NONE;
+    id_type prev_parent_ref_after = NONE;
+    for(id_type i = 0, e = m_refs.size(); i < e; ++i)
+    {
+        RefData const& C4_RESTRICT refdata = m_refs[i];
+        _c4dbgpf("instance {}/{}...", i, e);
+        if( ! refdata.type.is_ref())
+            continue;
+        _c4dbgpf("instance {} is reference!", i);
+        if(refdata.parent_ref != NONE)
+        {
+            _c4dbgpf("ref {} has parent: {}", i, refdata.parent_ref);
+            _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->is_seq(refdata.parent_ref));
+            const id_type p = m_tree->parent(refdata.parent_ref);
+            const id_type after = (prev_parent_ref != refdata.parent_ref) ?
+                refdata.parent_ref//prev_sibling(rd.parent_ref_sibling)
+                :
+                prev_parent_ref_after;
+            prev_parent_ref = refdata.parent_ref;
+            prev_parent_ref_after = m_tree->duplicate_children_no_rep(refdata.target, p, after);
+            m_tree->remove(refdata.node);
+        }
+        else
         {
-            step = count >> 1;
-            it = line + step;
-            if(*it < offset)
+            _c4dbgpf("ref {} has no parent", i, refdata.parent_ref);
+            if(m_tree->has_key(refdata.node) && m_tree->key(refdata.node) == "<<")
             {
-                line = ++it;
-                count -= step + 1;
+                _c4dbgpf("ref {} is inheriting", i);
+                _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->is_keyval(refdata.node));
+                const id_type p = m_tree->parent(refdata.node);
+                const id_type after = m_tree->prev_sibling(refdata.node);
+                m_tree->duplicate_children_no_rep(refdata.target, p, after);
+                m_tree->remove(refdata.node);
             }
-            else
+            else if(refdata.type.is_key_ref())
             {
-                count = step;
+                _c4dbgpf("ref {} is key ref", i);
+                _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->is_key_ref(refdata.node));
+                _RYML_CB_ASSERT(m_tree->m_callbacks, m_tree->has_key_anchor(refdata.target) || m_tree->has_val_anchor(refdata.target));
+                if(m_tree->has_val_anchor(refdata.target) && m_tree->val_anchor(refdata.target) == m_tree->key_ref(refdata.node))
+                {
+                    _RYML_CB_CHECK(m_tree->m_callbacks, !m_tree->is_container(refdata.target));
+                    _RYML_CB_CHECK(m_tree->m_callbacks, m_tree->has_val(refdata.target));
+                    const type_bits existing_style_flags = VAL_STYLE & m_tree->_p(refdata.target)->m_type.type;
+                    static_assert((VAL_STYLE >> 1u) == (KEY_STYLE), "bad flags");
+                    m_tree->_p(refdata.node)->m_key.scalar = m_tree->val(refdata.target);
+                    m_tree->_add_flags(refdata.node, KEY | (existing_style_flags >> 1u));
+                }
+                else
+                {
+                    _RYML_CB_CHECK(m_tree->m_callbacks, m_tree->key_anchor(refdata.target) == m_tree->key_ref(refdata.node));
+                    m_tree->_p(refdata.node)->m_key.scalar = m_tree->key(refdata.target);
+                    // keys cannot be containers, so don't inherit container flags
+                    const type_bits existing_style_flags = KEY_STYLE & m_tree->_p(refdata.target)->m_type.type;
+                    m_tree->_add_flags(refdata.node, KEY | existing_style_flags);
+                }
+            }
+            else // val ref
+            {
+                _c4dbgpf("ref {} is val ref", i);
+                _RYML_CB_ASSERT(m_tree->m_callbacks, refdata.type.is_val_ref());
+                if(m_tree->has_key_anchor(refdata.target) && m_tree->key_anchor(refdata.target) == m_tree->val_ref(refdata.node))
+                {
+                    _RYML_CB_CHECK(m_tree->m_callbacks, !m_tree->is_container(refdata.target));
+                    _RYML_CB_CHECK(m_tree->m_callbacks, m_tree->has_val(refdata.target));
+                    // keys cannot be containers, so don't inherit container flags
+                    const type_bits existing_style_flags = (KEY_STYLE) & m_tree->_p(refdata.target)->m_type.type;
+                    static_assert((KEY_STYLE << 1u) == (VAL_STYLE), "bad flags");
+                    m_tree->_p(refdata.node)->m_val.scalar = m_tree->key(refdata.target);
+                    m_tree->_add_flags(refdata.node, VAL | (existing_style_flags << 1u));
+                }
+                else
+                {
+                    m_tree->duplicate_contents(refdata.target, refdata.node);
+                }
             }
         }
     }
-    if(line)
-    {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, *line > offset);
-    }
-    else
-    {
-        _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.empty());
-        _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size == 1);
-        line = m_newline_offsets;
-    }
-    _RYML_CB_ASSERT(m_stack.m_callbacks, line >= m_newline_offsets && line < m_newline_offsets + m_newline_offsets_size);;
-    Location loc = {};
-    loc.name = m_file;
-    loc.offset = offset;
-    loc.line = (size_t)(line - m_newline_offsets);
-    if(line > m_newline_offsets)
-        loc.col = (offset - *(line-1) - 1u);
-    else
-        loc.col = offset;
-    return loc;
-}
-
-void Parser::_prepare_locations() const
-{
-    _RYML_CB_ASSERT(m_stack.m_callbacks, !m_file.empty());
-    size_t numnewlines = 1u + m_buf.count('\n');
-    _resize_locations(numnewlines);
-    m_newline_offsets_size = 0;
-    for(size_t i = 0; i < m_buf.len; i++)
-        if(m_buf[i] == '\n')
-            m_newline_offsets[m_newline_offsets_size++] = i;
-    m_newline_offsets[m_newline_offsets_size++] = m_buf.len;
-    _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size == numnewlines);
-}
-
-void Parser::_resize_locations(size_t numnewlines) const
-{
-    if(numnewlines > m_newline_offsets_capacity)
+    _c4dbgp("modifying tree: finished");
+
+    // clear anchors and refs
+    _c4dbgp("clearing anchors/refs");
+    for(auto const& C4_RESTRICT ar : m_refs)
     {
-        if(m_newline_offsets)
-            _RYML_CB_FREE(m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity);
-        m_newline_offsets = _RYML_CB_ALLOC_HINT(m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets);
-        m_newline_offsets_capacity = numnewlines;
+        m_tree->rem_anchor_ref(ar.node);
+        if(ar.parent_ref != NONE)
+            if(m_tree->type(ar.parent_ref) != NOTYPE)
+                m_tree->remove(ar.parent_ref);
     }
-}
+    _c4dbgp("clearing anchors/refs: finished");
 
-void Parser::_mark_locations_dirty()
-{
-    m_newline_offsets_size = 0u;
-    m_newline_offsets_buf = m_buf;
-}
+    _c4dbg_tree("resolved tree", *m_tree);
 
-bool Parser::_locations_dirty() const
-{
-    return !m_newline_offsets_size;
+    m_tree = nullptr;
+    _c4dbgp("resolving references: finished");
 }
 
-} // namespace yml
-} // namespace c4
-
 
-#if defined(_MSC_VER)
-#   pragma warning(pop)
-#elif defined(__clang__)
-#   pragma clang diagnostic pop
-#elif defined(__GNUC__)
-#   pragma GCC diagnostic pop
-#endif
+} // namespace ryml
+} // namespace c4
 
 #endif /* RYML_SINGLE_HDR_DEFINE_NOW */
 
 
-// (end https://github.com/biojppm/rapidyaml/src/c4/yml/parse.cpp)
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/reference_resolver.cpp)
 
 
 
 //********************************************************************************
 //--------------------------------------------------------------------------------
-// src/c4/yml/node.cpp
-// https://github.com/biojppm/rapidyaml/src/c4/yml/node.cpp
+// src/c4/yml/parse.cpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/parse.cpp
 //--------------------------------------------------------------------------------
 //********************************************************************************
 
 #ifdef RYML_SINGLE_HDR_DEFINE_NOW
 // amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp
+//#include "c4/yml/parse.hpp"
+#if !defined(C4_YML_PARSE_HPP_) && !defined(_C4_YML_PARSE_HPP_)
+#error "amalgamate: file c4/yml/parse.hpp must have been included at this point"
+#endif /* C4_YML_PARSE_HPP_ */
+
+
+#ifndef _C4_YML_NODE_HPP_
+// amalgamate: removed include of
 // https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp
 //#include "c4/yml/node.hpp"
 #if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_)
 #error "amalgamate: file c4/yml/node.hpp must have been included at this point"
 #endif /* C4_YML_NODE_HPP_ */
 
+#endif
+#ifndef _C4_YML_PARSE_ENGINE_HPP_
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/parse_engine.hpp
+//#include "c4/yml/parse_engine.hpp"
+#if !defined(C4_YML_PARSE_ENGINE_HPP_) && !defined(_C4_YML_PARSE_ENGINE_HPP_)
+#error "amalgamate: file c4/yml/parse_engine.hpp must have been included at this point"
+#endif /* C4_YML_PARSE_ENGINE_HPP_ */
+
+#endif
+#ifndef _C4_YML_PARSE_ENGINE_DEF_HPP_
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/parse_engine.def.hpp
+//#include "c4/yml/parse_engine.def.hpp"
+#if !defined(C4_YML_PARSE_ENGINE_DEF_HPP_) && !defined(_C4_YML_PARSE_ENGINE_DEF_HPP_)
+#error "amalgamate: file c4/yml/parse_engine.def.hpp must have been included at this point"
+#endif /* C4_YML_PARSE_ENGINE_DEF_HPP_ */
+
+#endif
+#ifndef _C4_YML_EVENT_HANDLER_TREE_HPP_
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/event_handler_tree.hpp
+//#include "c4/yml/event_handler_tree.hpp"
+#if !defined(C4_YML_EVENT_HANDLER_TREE_HPP_) && !defined(_C4_YML_EVENT_HANDLER_TREE_HPP_)
+#error "amalgamate: file c4/yml/event_handler_tree.hpp must have been included at this point"
+#endif /* C4_YML_EVENT_HANDLER_TREE_HPP_ */
+
+#endif
+
+
+//-----------------------------------------------------------------------------
 
 namespace c4 {
 namespace yml {
 
-size_t NodeRef::set_key_serialized(c4::fmt::const_base64_wrapper w)
-{
-    _apply_seed();
-    csubstr encoded = this->to_arena(w);
-    this->set_key(encoded);
-    return encoded.len;
-}
+// instantiate the parser class
+template class ParseEngine<EventHandlerTree>;
 
-size_t NodeRef::set_val_serialized(c4::fmt::const_base64_wrapper w)
+namespace {
+inline void _reset_tree_handler(Parser *parser, Tree *t, id_type node_id)
 {
-    _apply_seed();
-    csubstr encoded = this->to_arena(w);
-    this->set_val(encoded);
-    return encoded.len;
+    RYML_ASSERT(parser);
+    RYML_ASSERT(t);
+    if(!parser->m_evt_handler)
+        _RYML_CB_ERR(t->m_callbacks, "event handler is not set");
+    parser->m_evt_handler->reset(t, node_id);
+    RYML_ASSERT(parser->m_evt_handler->m_tree == t);
 }
+} // namespace
 
-size_t NodeRef::deserialize_key(c4::fmt::base64_wrapper w) const
-{
-    RYML_ASSERT( ! is_seed());
-    RYML_ASSERT(valid());
-    RYML_ASSERT(get() != nullptr);
-    return from_chars(key(), &w);
+void parse_in_place(Parser *parser, csubstr filename, substr yaml, Tree *t, id_type node_id)
+{
+    _reset_tree_handler(parser, t, node_id);
+    parser->parse_in_place_ev(filename, yaml);
+}
+
+void parse_json_in_place(Parser *parser, csubstr filename, substr json, Tree *t, id_type node_id)
+{
+    _reset_tree_handler(parser, t, node_id);
+    parser->parse_json_in_place_ev(filename, json);
+}
+
+
+// this is vertically aligned to highlight the parameter differences.
+void parse_in_place(Parser *parser,                   substr yaml, Tree *t, id_type node_id) { parse_in_place(parser, {}, yaml, t, node_id); }
+void parse_in_place(Parser *parser, csubstr filename, substr yaml, Tree *t                ) { RYML_CHECK(t); parse_in_place(parser, filename, yaml, t, t->root_id()); }
+void parse_in_place(Parser *parser,                   substr yaml, Tree *t                ) { RYML_CHECK(t); parse_in_place(parser, {}      , yaml, t, t->root_id()); }
+void parse_in_place(Parser *parser, csubstr filename, substr yaml, NodeRef node           ) { RYML_CHECK(!node.invalid()); parse_in_place(parser, filename, yaml, node.tree(), node.id()); }
+void parse_in_place(Parser *parser,                   substr yaml, NodeRef node           ) { RYML_CHECK(!node.invalid()); parse_in_place(parser, {}      , yaml, node.tree(), node.id()); }
+Tree parse_in_place(Parser *parser, csubstr filename, substr yaml                         ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); parse_in_place(parser, filename, yaml, &tree, tree.root_id()); return tree; }
+Tree parse_in_place(Parser *parser,                   substr yaml                         ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); parse_in_place(parser, {}      , yaml, &tree, tree.root_id()); return tree; }
+
+// this is vertically aligned to highlight the parameter differences.
+void parse_in_place(csubstr filename, substr yaml, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_in_place(&parser, filename, yaml, t, node_id); }
+void parse_in_place(                  substr yaml, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_in_place(&parser, {}      , yaml, t, node_id); }
+void parse_in_place(csubstr filename, substr yaml, Tree *t                ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_in_place(&parser, filename, yaml, t, t->root_id()); }
+void parse_in_place(                  substr yaml, Tree *t                ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_in_place(&parser, {}      , yaml, t, t->root_id()); }
+void parse_in_place(csubstr filename, substr yaml, NodeRef node           ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); parse_in_place(&parser, filename, yaml, node.tree(), node.id()); }
+void parse_in_place(                  substr yaml, NodeRef node           ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); parse_in_place(&parser, {}      , yaml, node.tree(), node.id()); }
+Tree parse_in_place(csubstr filename, substr yaml                         ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); parse_in_place(&parser, filename, yaml, &tree, tree.root_id()); return tree; }
+Tree parse_in_place(                  substr yaml                         ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); parse_in_place(&parser, {}      , yaml, &tree, tree.root_id()); return tree; }
+
+
+// this is vertically aligned to highlight the parameter differences.
+void parse_json_in_place(Parser *parser,                   substr json, Tree *t, id_type node_id) { parse_json_in_place(parser, {}, json, t, node_id); }
+void parse_json_in_place(Parser *parser, csubstr filename, substr json, Tree *t                ) { RYML_CHECK(t); parse_json_in_place(parser, filename, json, t, t->root_id()); }
+void parse_json_in_place(Parser *parser,                   substr json, Tree *t                ) { RYML_CHECK(t); parse_json_in_place(parser, {}      , json, t, t->root_id()); }
+void parse_json_in_place(Parser *parser, csubstr filename, substr json, NodeRef node           ) { RYML_CHECK(!node.invalid()); parse_json_in_place(parser, filename, json, node.tree(), node.id()); }
+void parse_json_in_place(Parser *parser,                   substr json, NodeRef node           ) { RYML_CHECK(!node.invalid()); parse_json_in_place(parser, {}      , json, node.tree(), node.id()); }
+Tree parse_json_in_place(Parser *parser, csubstr filename, substr json                         ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); parse_json_in_place(parser, filename, json, &tree, tree.root_id()); return tree; }
+Tree parse_json_in_place(Parser *parser,                   substr json                         ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); parse_json_in_place(parser, {}      , json, &tree, tree.root_id()); return tree; }
+
+// this is vertically aligned to highlight the parameter differences.
+void parse_json_in_place(csubstr filename, substr json, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_json_in_place(&parser, filename, json, t, node_id); }
+void parse_json_in_place(                  substr json, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_json_in_place(&parser, {}      , json, t, node_id); }
+void parse_json_in_place(csubstr filename, substr json, Tree *t                ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_json_in_place(&parser, filename, json, t, t->root_id()); }
+void parse_json_in_place(                  substr json, Tree *t                ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); parse_json_in_place(&parser, {}      , json, t, t->root_id()); }
+void parse_json_in_place(csubstr filename, substr json, NodeRef node           ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); parse_json_in_place(&parser, filename, json, node.tree(), node.id()); }
+void parse_json_in_place(                  substr json, NodeRef node           ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); parse_json_in_place(&parser, {}      , json, node.tree(), node.id()); }
+Tree parse_json_in_place(csubstr filename, substr json                         ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); parse_json_in_place(&parser, filename, json, &tree, tree.root_id()); return tree; }
+Tree parse_json_in_place(                  substr json                         ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); parse_json_in_place(&parser, {}      , json, &tree, tree.root_id()); return tree; }
+
+
+// this is vertically aligned to highlight the parameter differences.
+void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, Tree *t, id_type node_id) { RYML_CHECK(t); substr src = t->copy_to_arena(yaml); parse_in_place(parser, filename, src, t, node_id); }
+void parse_in_arena(Parser *parser,                   csubstr yaml, Tree *t, id_type node_id) { RYML_CHECK(t); substr src = t->copy_to_arena(yaml); parse_in_place(parser, {}      , src, t, node_id); }
+void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, Tree *t                ) { RYML_CHECK(t); substr src = t->copy_to_arena(yaml); parse_in_place(parser, filename, src, t, t->root_id()); }
+void parse_in_arena(Parser *parser,                   csubstr yaml, Tree *t                ) { RYML_CHECK(t); substr src = t->copy_to_arena(yaml); parse_in_place(parser, {}      , src, t, t->root_id()); }
+void parse_in_arena(Parser *parser, csubstr filename, csubstr yaml, NodeRef node           ) { RYML_CHECK(!node.invalid()); substr src = node.tree()->copy_to_arena(yaml); parse_in_place(parser, filename, src, node.tree(), node.id()); }
+void parse_in_arena(Parser *parser,                   csubstr yaml, NodeRef node           ) { RYML_CHECK(!node.invalid()); substr src = node.tree()->copy_to_arena(yaml); parse_in_place(parser, {}      , src, node.tree(), node.id()); }
+Tree parse_in_arena(Parser *parser, csubstr filename, csubstr yaml                         ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); substr src = tree.copy_to_arena(yaml); parse_in_place(parser, filename, src, &tree, tree.root_id()); return tree; }
+Tree parse_in_arena(Parser *parser,                   csubstr yaml                         ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); substr src = tree.copy_to_arena(yaml); parse_in_place(parser, {}      , src, &tree, tree.root_id()); return tree; }
+
+// this is vertically aligned to highlight the parameter differences.
+void parse_in_arena(csubstr filename, csubstr yaml, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(yaml); parse_in_place(&parser, filename, src, t, node_id); }
+void parse_in_arena(                  csubstr yaml, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(yaml); parse_in_place(&parser, {}      , src, t, node_id); }
+void parse_in_arena(csubstr filename, csubstr yaml, Tree *t                ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(yaml); parse_in_place(&parser, filename, src, t, t->root_id()); }
+void parse_in_arena(                  csubstr yaml, Tree *t                ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(yaml); parse_in_place(&parser, {}      , src, t, t->root_id()); }
+void parse_in_arena(csubstr filename, csubstr yaml, NodeRef node           ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); substr src = node.tree()->copy_to_arena(yaml); parse_in_place(&parser, filename, src, node.tree(), node.id()); }
+void parse_in_arena(                  csubstr yaml, NodeRef node           ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); substr src = node.tree()->copy_to_arena(yaml); parse_in_place(&parser, {}      , src, node.tree(), node.id()); }
+Tree parse_in_arena(csubstr filename, csubstr yaml                         ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); substr src = tree.copy_to_arena(yaml); parse_in_place(&parser, filename, src, &tree, tree.root_id()); return tree; }
+Tree parse_in_arena(                  csubstr yaml                         ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); substr src = tree.copy_to_arena(yaml); parse_in_place(&parser, {}      , src, &tree, tree.root_id()); return tree; }
+
+
+// this is vertically aligned to highlight the parameter differences.
+void parse_json_in_arena(Parser *parser, csubstr filename, csubstr json, Tree *t, id_type node_id) { RYML_CHECK(t); substr src = t->copy_to_arena(json); parse_json_in_place(parser, filename, src, t, node_id); }
+void parse_json_in_arena(Parser *parser,                   csubstr json, Tree *t, id_type node_id) { RYML_CHECK(t); substr src = t->copy_to_arena(json); parse_json_in_place(parser, {}      , src, t, node_id); }
+void parse_json_in_arena(Parser *parser, csubstr filename, csubstr json, Tree *t                ) { RYML_CHECK(t); substr src = t->copy_to_arena(json); parse_json_in_place(parser, filename, src, t, t->root_id()); }
+void parse_json_in_arena(Parser *parser,                   csubstr json, Tree *t                ) { RYML_CHECK(t); substr src = t->copy_to_arena(json); parse_json_in_place(parser, {}      , src, t, t->root_id()); }
+void parse_json_in_arena(Parser *parser, csubstr filename, csubstr json, NodeRef node           ) { RYML_CHECK(!node.invalid()); substr src = node.tree()->copy_to_arena(json); parse_json_in_place(parser, filename, src, node.tree(), node.id()); }
+void parse_json_in_arena(Parser *parser,                   csubstr json, NodeRef node           ) { RYML_CHECK(!node.invalid()); substr src = node.tree()->copy_to_arena(json); parse_json_in_place(parser, {}      , src, node.tree(), node.id()); }
+Tree parse_json_in_arena(Parser *parser, csubstr filename, csubstr json                         ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); substr src = tree.copy_to_arena(json); parse_json_in_place(parser, filename, src, &tree, tree.root_id()); return tree; }
+Tree parse_json_in_arena(Parser *parser,                   csubstr json                         ) { RYML_CHECK(parser); RYML_CHECK(parser->m_evt_handler); Tree tree(parser->callbacks()); substr src = tree.copy_to_arena(json); parse_json_in_place(parser, {}      , src, &tree, tree.root_id()); return tree; }
+
+// this is vertically aligned to highlight the parameter differences.
+void parse_json_in_arena(csubstr filename, csubstr json, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(json); parse_json_in_place(&parser, filename, src, t, node_id); }
+void parse_json_in_arena(                  csubstr json, Tree *t, id_type node_id) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(json); parse_json_in_place(&parser, {}      , src, t, node_id); }
+void parse_json_in_arena(csubstr filename, csubstr json, Tree *t                ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(json); parse_json_in_place(&parser, filename, src, t, t->root_id()); }
+void parse_json_in_arena(                  csubstr json, Tree *t                ) { RYML_CHECK(t); Parser::handler_type event_handler(t->callbacks()); Parser parser(&event_handler); substr src = t->copy_to_arena(json); parse_json_in_place(&parser, {}      , src, t, t->root_id()); }
+void parse_json_in_arena(csubstr filename, csubstr json, NodeRef node           ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); substr src = node.tree()->copy_to_arena(json); parse_json_in_place(&parser, filename, src, node.tree(), node.id()); }
+void parse_json_in_arena(                  csubstr json, NodeRef node           ) { RYML_CHECK(!node.invalid()); Parser::handler_type event_handler(node.tree()->callbacks()); Parser parser(&event_handler); substr src = node.tree()->copy_to_arena(json); parse_json_in_place(&parser, {}      , src, node.tree(), node.id()); }
+Tree parse_json_in_arena(csubstr filename, csubstr json                         ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); substr src = tree.copy_to_arena(json); parse_json_in_place(&parser, filename, src, &tree, tree.root_id()); return tree; }
+Tree parse_json_in_arena(                  csubstr json                         ) { Parser::handler_type event_handler; Parser parser(&event_handler); Tree tree(parser.callbacks()); substr src = tree.copy_to_arena(json); parse_json_in_place(&parser, {}      , src, &tree, tree.root_id()); return tree; }
+
+
+RYML_EXPORT C4_NO_INLINE size_t _find_last_newline_and_larger_indentation(csubstr s, size_t indentation) noexcept
+{
+    if(indentation + 1 > s.len)
+        return npos;
+    for(size_t i = s.len-indentation-1; i != size_t(-1); --i)
+    {
+        if(s.str[i] == '\n')
+        {
+            csubstr rem = s.sub(i + 1);
+            size_t first = rem.first_not_of(' ');
+            first = (first != npos) ? first : rem.len;
+            if(first > indentation)
+                return i;
+        }
+    }
+    return npos;
 }
 
-size_t NodeRef::deserialize_val(c4::fmt::base64_wrapper w) const
+//-----------------------------------------------------------------------------
+
+RYML_EXPORT id_type estimate_tree_capacity(csubstr src)
 {
-    RYML_ASSERT( ! is_seed());
-    RYML_ASSERT(valid());
-    RYML_ASSERT(get() != nullptr);
-    return from_chars(val(), &w);
+    id_type num_nodes = 1; // root
+    for(size_t i = 0; i < src.len; ++i)
+    {
+        const char c = src.str[i];
+        num_nodes += (c == '\n') || (c == ',') || (c == '[') || (c == '{');
+    }
+    return num_nodes;
 }
 
 } // namespace yml
@@ -31291,126 +41883,59 @@ size_t NodeRef::deserialize_val(c4::fmt::base64_wrapper w) const
 #endif /* RYML_SINGLE_HDR_DEFINE_NOW */
 
 
-// (end https://github.com/biojppm/rapidyaml/src/c4/yml/node.cpp)
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/parse.cpp)
 
 
 
 //********************************************************************************
 //--------------------------------------------------------------------------------
-// src/c4/yml/preprocess.hpp
-// https://github.com/biojppm/rapidyaml/src/c4/yml/preprocess.hpp
+// src/c4/yml/node.cpp
+// https://github.com/biojppm/rapidyaml/src/c4/yml/node.cpp
 //--------------------------------------------------------------------------------
 //********************************************************************************
 
-#ifndef _C4_YML_PREPROCESS_HPP_
-#define _C4_YML_PREPROCESS_HPP_
-
-/** @file preprocess.hpp Functions for preprocessing YAML prior to parsing. */
-
-/** @defgroup Preprocessors Preprocessor functions
- *
- * These are the existing preprocessors:
- *
- * @code{.cpp}
- * size_t preprocess_json(csubstr json, substr buf)
- * size_t preprocess_rxmap(csubstr json, substr buf)
- * @endcode
- */
-
-#ifndef _C4_YML_COMMON_HPP_
-//included above:
-//#include "./common.hpp"
-#endif
+#ifdef RYML_SINGLE_HDR_DEFINE_NOW
 // amalgamate: removed include of
-// https://github.com/biojppm/rapidyaml/src/c4/substr.hpp
-//#include <c4/substr.hpp>
-#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_)
-#error "amalgamate: file c4/substr.hpp must have been included at this point"
-#endif /* C4_SUBSTR_HPP_ */
-
+// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp
+//#include "c4/yml/node.hpp"
+#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_)
+#error "amalgamate: file c4/yml/node.hpp must have been included at this point"
+#endif /* C4_YML_NODE_HPP_ */
 
 
 namespace c4 {
 namespace yml {
 
-namespace detail {
-using Preprocessor = size_t(csubstr, substr);
-template<Preprocessor PP, class CharContainer>
-substr preprocess_into_container(csubstr input, CharContainer *out)
-{
-    // try to write once. the preprocessor will stop writing at the end of
-    // the container, but will process all the input to determine the
-    // required container size.
-    size_t sz = PP(input, to_substr(*out));
-    // if the container size is not enough, resize, and run again in the
-    // resized container
-    if(sz > out->size())
-    {
-        out->resize(sz);
-        sz = PP(input, to_substr(*out));
-    }
-    return to_substr(*out).first(sz);
-}
-} // namespace detail
-
-
-//-----------------------------------------------------------------------------
-
-/** @name preprocess_rxmap
- * Convert flow-type relaxed maps (with implicit bools) into strict YAML
- * flow map.
- *
- * @code{.yaml}
- * {a, b, c, d: [e, f], g: {a, b}}
- * # is converted into this:
- * {a: 1, b: 1, c: 1, d: [e, f], g: {a, b}}
- * @endcode
-
- * @note this is NOT recursive - conversion happens only in the top-level map
- * @param rxmap A relaxed map
- * @param buf output buffer
- * @param out output container
- */
 
-//@{
 
-/** Write into a given output buffer. This function is safe to call with
- * empty or small buffers; it won't write beyond the end of the buffer.
- *
- * @return the number of characters required for output
- */
-RYML_EXPORT size_t preprocess_rxmap(csubstr rxmap, substr buf);
 
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
+//-----------------------------------------------------------------------------
 
-/** Write into an existing container. It is resized to contained the output.
- * @return a substr of the container
- * @overload preprocess_rxmap */
-template<class CharContainer>
-substr preprocess_rxmap(csubstr rxmap, CharContainer *out)
+size_t NodeRef::set_key_serialized(c4::fmt::const_base64_wrapper w)
 {
-    return detail::preprocess_into_container<preprocess_rxmap>(rxmap, out);
+    _apply_seed();
+    csubstr encoded = this->to_arena(w);
+    this->set_key(encoded);
+    return encoded.len;
 }
 
-
-/** Create a container with the result.
- * @overload preprocess_rxmap */
-template<class CharContainer>
-CharContainer preprocess_rxmap(csubstr rxmap)
+size_t NodeRef::set_val_serialized(c4::fmt::const_base64_wrapper w)
 {
-    CharContainer out;
-    preprocess_rxmap(rxmap, &out);
-    return out;
+    _apply_seed();
+    csubstr encoded = this->to_arena(w);
+    this->set_val(encoded);
+    return encoded.len;
 }
 
-//@}
-
 } // namespace yml
 } // namespace c4
 
-#endif /* _C4_YML_PREPROCESS_HPP_ */
+#endif /* RYML_SINGLE_HDR_DEFINE_NOW */
 
 
-// (end https://github.com/biojppm/rapidyaml/src/c4/yml/preprocess.hpp)
+// (end https://github.com/biojppm/rapidyaml/src/c4/yml/node.cpp)
 
 
 
@@ -31442,6 +41967,7 @@ CharContainer preprocess_rxmap(csubstr rxmap)
 namespace c4 {
 namespace yml {
 
+C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast")
 
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
@@ -31541,6 +42067,7 @@ size_t preprocess_rxmap(csubstr s, substr buf)
     return writer.pos;
 }
 
+C4_SUPPRESS_WARNING_GCC_CLANG_POP
 
 } // namespace yml
 } // namespace c4
@@ -31584,7 +42111,7 @@ namespace c4 {
 namespace yml {
 
 
-void check_invariants(Tree const& t, size_t node=NONE);
+void check_invariants(Tree const& t, id_type node=NONE);
 void check_free_list(Tree const& t);
 void check_arena(Tree const& t);
 
@@ -31593,7 +42120,7 @@ void check_arena(Tree const& t);
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-inline void check_invariants(Tree const& t, size_t node)
+inline void check_invariants(Tree const& t, id_type node)
 {
     if(node == NONE)
     {
@@ -31601,8 +42128,8 @@ inline void check_invariants(Tree const& t, size_t node)
         node = t.root_id();
     }
 
-    auto const& n = *t._p(node);
-#ifdef RYML_DBG
+    NodeData const& n = *t._p(node);
+#if defined(RYML_DBG) && 0
     if(n.m_first_child != NONE || n.m_last_child != NONE)
     {
         printf("check(%zu): fc=%zu lc=%zu\n", node, n.m_first_child, n.m_last_child);
@@ -31667,10 +42194,10 @@ inline void check_invariants(Tree const& t, size_t node)
         C4_CHECK(t._p(n.m_next_sibling)->m_next_sibling != node);
     }
 
-    size_t count = 0;
-    for(size_t i = n.m_first_child; i != NONE; i = t.next_sibling(i))
+    id_type count = 0;
+    for(id_type i = n.m_first_child; i != NONE; i = t.next_sibling(i))
     {
-#ifdef RYML_DBG
+#if defined(RYML_DBG) && 0
         printf("check(%zu):               descend to child[%zu]=%zu\n", node, count, i);
 #endif
         auto const& ch = *t._p(i);
@@ -31698,7 +42225,7 @@ inline void check_invariants(Tree const& t, size_t node)
         check_arena(t);
     }
 
-    for(size_t i = t.first_child(node); i != NONE; i = t.next_sibling(i))
+    for(id_type i = t.first_child(node); i != NONE; i = t.next_sibling(i))
     {
         check_invariants(t, i);
     }
@@ -31726,8 +42253,8 @@ inline void check_free_list(Tree const& t)
     //C4_CHECK(head.m_prev_sibling == NONE);
     //C4_CHECK(tail.m_next_sibling == NONE);
 
-    size_t count = 0;
-    for(size_t i = t.m_free_head, prev = NONE; i != NONE; i = t._p(i)->m_next_sibling)
+    id_type count = 0;
+    for(id_type i = t.m_free_head, prev = NONE; i != NONE; i = t._p(i)->m_next_sibling)
     {
         auto const& elm = *t._p(i);
         if(&elm != &head)
@@ -31796,19 +42323,76 @@ inline void check_arena(Tree const& t)
 #endif /* C4_YML_NODE_HPP_ */
 
 
+#ifdef RYML_DBG
+#define _c4dbg_tree(...) print_tree(__VA_ARGS__)
+#define _c4dbg_node(...) print_tree(__VA_ARGS__)
+#else
+#define _c4dbg_tree(...)
+#define _c4dbg_node(...)
+#endif
 
 namespace c4 {
 namespace yml {
 
+C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wold-style-cast")
+C4_SUPPRESS_WARNING_GCC("-Wuseless-cast")
 
-inline size_t print_node(Tree const& p, size_t node, int level, size_t count, bool print_children)
+inline const char* _container_style_code(Tree const& p, id_type node)
+{
+    if(p.is_container(node))
+    {
+        if(p._p(node)->m_type & (FLOW_SL|FLOW_ML))
+        {
+            return "[FLOW]";
+        }
+        if(p._p(node)->m_type & (BLOCK))
+        {
+            return "[BLCK]";
+        }
+    }
+    return "";
+}
+inline char _scalar_code(NodeType masked)
+{
+    if(masked & (KEY_LITERAL|VAL_LITERAL))
+        return '|';
+    if(masked & (KEY_FOLDED|VAL_FOLDED))
+        return '>';
+    if(masked & (KEY_SQUO|VAL_SQUO))
+        return '\'';
+    if(masked & (KEY_DQUO|VAL_DQUO))
+        return '"';
+    if(masked & (KEY_PLAIN|VAL_PLAIN))
+        return '~';
+    return '@';
+}
+inline char _scalar_code_key(NodeType t)
+{
+    return _scalar_code(t & KEY_STYLE);
+}
+inline char _scalar_code_val(NodeType t)
+{
+    return _scalar_code(t & VAL_STYLE);
+}
+inline char _scalar_code_key(Tree const& p, id_type node)
+{
+    return _scalar_code_key(p._p(node)->m_type);
+}
+inline char _scalar_code_val(Tree const& p, id_type node)
+{
+    return _scalar_code_key(p._p(node)->m_type);
+}
+inline id_type print_node(Tree const& p, id_type node, int level, id_type count, bool print_children)
 {
-    printf("[%zd]%*s[%zd] %p", count, (2*level), "", node, (void*)p.get(node));
+    printf("[%zu]%*s[%zu] %p", (size_t)count, (2*level), "", (size_t)node, (void const*)p.get(node));
     if(p.is_root(node))
     {
         printf(" [ROOT]");
     }
-    printf(" %s:", p.type_str(node));
+    char typebuf[128];
+    csubstr typestr = p.type(node).type_str(typebuf);
+    RYML_CHECK(typestr.str);
+    printf(" %.*s", (int)typestr.len, typestr.str);
     if(p.has_key(node))
     {
         if(p.has_key_anchor(node))
@@ -31819,65 +42403,47 @@ inline size_t print_node(Tree const& p, size_t node, int level, size_t count, bo
         if(p.has_key_tag(node))
         {
             csubstr kt = p.key_tag(node);
-            csubstr k  = p.key(node);
-            printf(" %.*s '%.*s'", (int)kt.len, kt.str, (int)k.len, k.str);
-        }
-        else
-        {
-            csubstr k  = p.key(node);
-            printf(" '%.*s'", (int)k.len, k.str);
+            printf(" <%.*s>", (int)kt.len, kt.str);
         }
+        const char code = _scalar_code_key(p, node);
+        csubstr k  = p.key(node);
+        printf(" %c%.*s%c :", code, (int)k.len, k.str, code);
     }
-    else
-    {
-        RYML_ASSERT( ! p.has_key_tag(node));
-    }
-    if(p.has_val(node))
+    if(p.has_val_anchor(node))
     {
-        if(p.has_val_tag(node))
-        {
-            csubstr vt = p.val_tag(node);
-            csubstr v  = p.val(node);
-            printf(" %.*s '%.*s'", (int)vt.len, vt.str, (int)v.len, v.str);
-        }
-        else
-        {
-            csubstr v  = p.val(node);
-            printf(" '%.*s'", (int)v.len, v.str);
-        }
+        csubstr a = p.val_anchor(node);
+        printf(" &%.*s'", (int)a.len, a.str);
     }
-    else
+    if(p.has_val_tag(node))
     {
-        if(p.has_val_tag(node))
-        {
-            csubstr vt = p.val_tag(node);
-            printf(" %.*s", (int)vt.len, vt.str);
-        }
+        csubstr vt = p.val_tag(node);
+        printf(" <%.*s>", (int)vt.len, vt.str);
     }
-    if(p.has_val_anchor(node))
+    if(p.has_val(node))
     {
-        auto &a = p.val_anchor(node);
-        printf(" valanchor='&%.*s'", (int)a.len, a.str);
+        const char code = _scalar_code_val(p, node);
+        csubstr v  = p.val(node);
+        printf(" %c%.*s%c", code, (int)v.len, v.str, code);
     }
-    printf(" (%zd sibs)", p.num_siblings(node));
+    printf("  (%zu sibs)", (size_t)p.num_siblings(node));
 
     ++count;
 
-    if(p.is_container(node))
+    if(!p.is_container(node))
+    {
+        printf("\n");
+    }
+    else
     {
-        printf(" %zd children:\n", p.num_children(node));
+        printf(" (%zu children)\n", (size_t)p.num_children(node));
         if(print_children)
         {
-            for(size_t i = p.first_child(node); i != NONE; i = p.next_sibling(i))
+            for(id_type i = p.first_child(node); i != NONE; i = p.next_sibling(i))
             {
                 count = print_node(p, i, level+1, count, print_children);
             }
         }
     }
-    else
-    {
-        printf("\n");
-    }
 
     return count;
 }
@@ -31887,7 +42453,7 @@ inline size_t print_node(Tree const& p, size_t node, int level, size_t count, bo
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-inline void print_node(NodeRef const& p, int level=0)
+inline void print_node(ConstNodeRef const& p, int level=0)
 {
     print_node(*p.tree(), p.id(), level, 0, true);
 }
@@ -31897,21 +42463,38 @@ inline void print_node(NodeRef const& p, int level=0)
 //-----------------------------------------------------------------------------
 //-----------------------------------------------------------------------------
 
-inline size_t print_tree(Tree const& p, size_t node=NONE)
+inline id_type print_tree(const char *message, Tree const& p, id_type node=NONE)
 {
     printf("--------------------------------------\n");
-    size_t ret = 0;
+    if(message != nullptr)
+        printf("%s:\n", message);
+    id_type ret = 0;
     if(!p.empty())
     {
         if(node == NONE)
             node = p.root_id();
         ret = print_node(p, node, 0, 0, true);
     }
-    printf("#nodes=%zd vs #printed=%zd\n", p.size(), ret);
+    printf("#nodes=%zu vs #printed=%zu\n", (size_t)p.size(), (size_t)ret);
     printf("--------------------------------------\n");
     return ret;
 }
 
+inline id_type print_tree(Tree const& p, id_type node=NONE)
+{
+    return print_tree(nullptr, p, node);
+}
+
+inline void print_tree(ConstNodeRef const& p, int level)
+{
+    print_node(p, level);
+    for(ConstNodeRef ch : p.children())
+    {
+        print_tree(ch, level+1);
+    }
+}
+
+C4_SUPPRESS_WARNING_GCC_CLANG_POP
 
 } /* namespace yml */
 } /* namespace c4 */
@@ -31934,6 +42517,13 @@ inline size_t print_tree(Tree const& p, size_t node=NONE)
 #ifndef _C4_YML_YML_HPP_
 #define _C4_YML_YML_HPP_
 
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/version.hpp
+//#include "c4/yml/version.hpp"
+#if !defined(C4_YML_VERSION_HPP_) && !defined(_C4_YML_VERSION_HPP_)
+#error "amalgamate: file c4/yml/version.hpp must have been included at this point"
+#endif /* C4_YML_VERSION_HPP_ */
+
 // amalgamate: removed include of
 // https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp
 //#include "c4/yml/tree.hpp"
@@ -31955,6 +42545,27 @@ inline size_t print_tree(Tree const& p, size_t node=NONE)
 #error "amalgamate: file c4/yml/emit.hpp must have been included at this point"
 #endif /* C4_YML_EMIT_HPP_ */
 
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/event_handler_tree.hpp
+//#include "c4/yml/event_handler_tree.hpp"
+#if !defined(C4_YML_EVENT_HANDLER_TREE_HPP_) && !defined(_C4_YML_EVENT_HANDLER_TREE_HPP_)
+#error "amalgamate: file c4/yml/event_handler_tree.hpp must have been included at this point"
+#endif /* C4_YML_EVENT_HANDLER_TREE_HPP_ */
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/parse_engine.hpp
+//#include "c4/yml/parse_engine.hpp"
+#if !defined(C4_YML_PARSE_ENGINE_HPP_) && !defined(_C4_YML_PARSE_ENGINE_HPP_)
+#error "amalgamate: file c4/yml/parse_engine.hpp must have been included at this point"
+#endif /* C4_YML_PARSE_ENGINE_HPP_ */
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/filter_processor.hpp
+//#include "c4/yml/filter_processor.hpp"
+#if !defined(C4_YML_FILTER_PROCESSOR_HPP_) && !defined(_C4_YML_FILTER_PROCESSOR_HPP_)
+#error "amalgamate: file c4/yml/filter_processor.hpp must have been included at this point"
+#endif /* C4_YML_FILTER_PROCESSOR_HPP_ */
+
 // amalgamate: removed include of
 // https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp
 //#include "c4/yml/parse.hpp"
@@ -31969,6 +42580,20 @@ inline size_t print_tree(Tree const& p, size_t node=NONE)
 #error "amalgamate: file c4/yml/preprocess.hpp must have been included at this point"
 #endif /* C4_YML_PREPROCESS_HPP_ */
 
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/reference_resolver.hpp
+//#include "c4/yml/reference_resolver.hpp"
+#if !defined(C4_YML_REFERENCE_RESOLVER_HPP_) && !defined(_C4_YML_REFERENCE_RESOLVER_HPP_)
+#error "amalgamate: file c4/yml/reference_resolver.hpp must have been included at this point"
+#endif /* C4_YML_REFERENCE_RESOLVER_HPP_ */
+
+// amalgamate: removed include of
+// https://github.com/biojppm/rapidyaml/src/c4/yml/tag.hpp
+//#include "c4/yml/tag.hpp"
+#if !defined(C4_YML_TAG_HPP_) && !defined(_C4_YML_TAG_HPP_)
+#error "amalgamate: file c4/yml/tag.hpp must have been included at this point"
+#endif /* C4_YML_TAG_HPP_ */
+
 
 #endif // _C4_YML_YML_HPP_
 
@@ -32007,4 +42632,3 @@ using namespace c4;
 
 #endif /* _RYML_SINGLE_HEADER_AMALGAMATED_HPP_ */
 
-
diff --git a/src/flang_salt_instrument_plugin.cpp b/src/flang_salt_instrument_plugin.cpp
index 01541da..ab9493d 100644
--- a/src/flang_salt_instrument_plugin.cpp
+++ b/src/flang_salt_instrument_plugin.cpp
@@ -521,45 +521,45 @@ namespace salt::fortran {
             std::stringstream ss;
 
             // Access the "Fortran" node
-            ryml::NodeRef fortranNode = tree[SALT_FORTRAN_KEY];
+            ryml::ConstNodeRef fortranNode = tree[SALT_FORTRAN_KEY];
 
             // Validate that the "Fortran" node exists
-            if (!fortranNode.valid()) {
+            if (fortranNode.invalid()) {
                 llvm::errs() << "ERROR: '" << SALT_FORTRAN_KEY << "' key not found in the configuration file.\n";
                 std::exit(-3);
             }
 
             // Access and process the "program_begin_insert" node
-            ryml::NodeRef programBeginNode = fortranNode[SALT_FORTRAN_PROGRAM_BEGIN_KEY];
-            if (!programBeginNode.valid()) {
+            ryml::ConstNodeRef programBeginNode = fortranNode[SALT_FORTRAN_PROGRAM_BEGIN_KEY];
+            if (programBeginNode.invalid()) {
                 llvm::errs() << "ERROR: '" << SALT_FORTRAN_PROGRAM_BEGIN_KEY << "' key not found under 'Fortran'.\n";
                 std::exit(-3);
             }
-            for (const ryml::NodeRef child: programBeginNode.children()) {
+            for (const ryml::ConstNodeRef child: programBeginNode.children()) {
                 ss << child.val() << "\n";
             }
             map.emplace(InstrumentationPointType::PROGRAM_BEGIN, ss.str());
             ss.str(""s);
 
             // Access and process the "procedure_begin_insert" node
-            ryml::NodeRef procedureBeginNode = fortranNode[SALT_FORTRAN_PROCEDURE_BEGIN_KEY];
-            if (!procedureBeginNode.valid()) {
+            ryml::ConstNodeRef procedureBeginNode = fortranNode[SALT_FORTRAN_PROCEDURE_BEGIN_KEY];
+            if (procedureBeginNode.invalid()) {
                 llvm::errs() << "ERROR: '" << SALT_FORTRAN_PROCEDURE_BEGIN_KEY << "' key not found under 'Fortran'.\n";
                 std::exit(-3);
             }
-            for (const ryml::NodeRef child: procedureBeginNode.children()) {
+            for (const ryml::ConstNodeRef child: procedureBeginNode.children()) {
                 ss << child.val() << "\n";
             }
             map.emplace(InstrumentationPointType::PROCEDURE_BEGIN, ss.str());
             ss.str(""s);
 
             // Access and process the "procedure_end_insert" node
-            ryml::NodeRef procedureEndNode = fortranNode[SALT_FORTRAN_PROCEDURE_END_KEY];
-            if (!procedureEndNode.valid()) {
+            ryml::ConstNodeRef procedureEndNode = fortranNode[SALT_FORTRAN_PROCEDURE_END_KEY];
+            if (procedureEndNode.invalid()) {
                 llvm::errs() << "ERROR: '" << SALT_FORTRAN_PROCEDURE_END_KEY << "' key not found under 'Fortran'.\n";
                 std::exit(-3);
             }
-            for (const ryml::NodeRef child: procedureEndNode.children()) {
+            for (const ryml::ConstNodeRef child: procedureEndNode.children()) {
                 ss << child.val() << "\n";
             }
             map.emplace(InstrumentationPointType::PROCEDURE_END, ss.str());

From 834194f64697a371287b84418d8e2a0f080479be Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Mon, 27 Jan 2025 17:21:30 -0500
Subject: [PATCH 125/135] Make TAU optional to build SALT

---
 CMakeLists.txt | 421 +++++++++++++++++++++++--------------------------
 1 file changed, 200 insertions(+), 221 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 71cb91e..91f9bf8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -344,13 +344,13 @@ if(NOT DEFINED ENV{TAU_ROOT})
     PATH_SUFFIXES ${TAU_PATH_SUFFIXES}
   )
   if(NOT TAU_EXEC)
-    message(FATAL_ERROR "TAU not found. Please set TAU_ROOT to the TAU installation directory.")
+    message(STATUS "TAU not found. Please set TAU_ROOT to the TAU installation directory.")
   else()
     get_filename_component(TAU_ROOT ${TAU_EXEC} DIRECTORY) # This will be a bin directory
     get_filename_component(TAU_ROOT ${TAU_ROOT} DIRECTORY) # This might be an arch directory
     string(REGEX REPLACE "(/x86_64$)|(/apple$)|(/craycnl$)" "" TAU_ROOT ${TAU_ROOT})
   endif()
-  else()
+else()
     set(TAU_ROOT $ENV{TAU_ROOT} CACHE PATH "TAU Root Directory")
 endif()
 
@@ -360,62 +360,38 @@ message(STATUS "TAU_ROOT: ${TAU_ROOT}")
 find_program(TAU_EXEC tau_exec
   PATHS ${TAU_ROOT}/x86_64 ${TAU_ROOT}/apple ${TAU_ROOT}/craycnl
   PATH_SUFFIXES bin
-  REQUIRED
-)
-get_filename_component(TAU_ARCH_DIR ${TAU_EXEC} DIRECTORY)
-get_filename_component(TAU_ARCH_DIR ${TAU_ARCH_DIR} DIRECTORY)
-message(STATUS "TAU_ARCH_DIR: ${TAU_ARCH_DIR}")
-
-find_file(TAU_CLANG_MAKEFILE
-  NAMES Makefile.tau-clang-pthread
-  PATHS ${TAU_ARCH_DIR}  PATH_SUFFIXES lib
-  REQUIRED
-)
-find_file(TAU_GCC_MAKEFILE
-  NAMES Makefile.tau-pthread Makefile.tau-pthread-pdt
-  PATHS ${TAU_ARCH_DIR}  PATH_SUFFIXES lib
-  REQUIRED
-)
-find_program(TAUCC tau_cc.sh
-  PATHS ${TAU_ARCH_DIR}  PATH_SUFFIXES bin
-  REQUIRED
 )
-find_program(TAUCXX tau_cxx.sh
-  PATHS ${TAU_ARCH_DIR}  PATH_SUFFIXES bin
-  REQUIRED
-)
-find_program(TAUF90 tau_f90.sh
-  PATHS ${TAU_ARCH_DIR}  PATH_SUFFIXES bin
-  REQUIRED
-)
-file(GLOB TAU_GCC_LIBUNWIND_DIR ${TAU_ARCH_DIR}/libunwind-*-gcc)
-if(NOT TAU_GCC_LIBUNWIND_DIR)
-  message(FATAL_ERROR "libunwind not found for TAU's gcc build")
-endif()
-find_path(TAU_GCC_LIBUNWIND_INCLUDE_DIR
-  NAMES include
-  PATHS ${TAU_GCC_LIBUNWIND_DIR}
-  REQUIRED
-)
-file(GLOB TAU_CLANG_LIBUNWIND_DIR ${TAU_ARCH_DIR}/libunwind-*-clang)
-if(NOT TAU_CLANG_LIBUNWIND_DIR)
-  message(FATAL_ERROR "libunwind not found for TAU's clang build")
+if(TAU_EXEC)
+  set(HAVE_TAU TRUE)
+  get_filename_component(TAU_ARCH_DIR ${TAU_EXEC} DIRECTORY)
+  get_filename_component(TAU_ARCH_DIR ${TAU_ARCH_DIR} DIRECTORY)
+  message(STATUS "TAU_ARCH_DIR: ${TAU_ARCH_DIR}")
+
+  find_file(TAU_CLANG_MAKEFILE
+    NAMES Makefile.tau-clang-pthread
+    PATHS ${TAU_ARCH_DIR}  PATH_SUFFIXES lib
+    REQUIRED
+  )
+  find_file(TAU_GCC_MAKEFILE
+    NAMES Makefile.tau-pthread Makefile.tau-pthread-pdt
+    PATHS ${TAU_ARCH_DIR}  PATH_SUFFIXES lib
+    REQUIRED
+  )
+  find_program(TAUCC tau_cc.sh
+    PATHS ${TAU_ARCH_DIR}  PATH_SUFFIXES bin
+    REQUIRED
+  )
+  find_program(TAUCXX tau_cxx.sh
+    PATHS ${TAU_ARCH_DIR}  PATH_SUFFIXES bin
+    REQUIRED
+  )
+  find_program(TAUF90 tau_f90.sh
+    PATHS ${TAU_ARCH_DIR}  PATH_SUFFIXES bin
+    REQUIRED
+  )
+else()
+  set(HAVE_TAU FALSE)
 endif()
-find_path(TAU_CLANG_LIBUNWIND_INCLUDE_DIR
-  NAMES include
-  PATHS ${TAU_CLANG_LIBUNWIND_DIR}
-  REQUIRED
-)
-find_path(TAU_GCC_LIBDWARF_INCLUDE_DIR
-  NAMES include
-  PATHS ${TAU_ARCH_DIR}/libdwarf-gcc
-  REQUIRED
-)
-find_path(TAU_CLANG_LIBDWARF_INCLUDE_DIR
-  NAMES include
-  PATHS ${TAU_ARCH_DIR}/libdwarf-clang
-  REQUIRED
-)
 
 #---------------
 # Tests
@@ -428,7 +404,6 @@ find_path(TAU_CLANG_LIBDWARF_INCLUDE_DIR
 # 5. TAU is installed with the following configuratins:
 #    - Makefile.tau-clang-pthread
 #    - Makefile.tau-pthread
-# 6. SALT parser configuration files are in ${CMAKE_SOURCE_DIR}/config_files
 
 # Care has been taken to break test inter-dependencies where possible and
 # express them explicitly with the DEPENDS test property.
@@ -505,120 +480,122 @@ foreach(comp IN LISTS compilers_to_test)
     COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_BINARY_DIR}/${comp})
 endforeach()
 
-function(compile_instrumented test_src)
-  # This is the 2nd of 2 functions for adding tests.
-  # It is opinionated and perhaps somewhat fragile.
-  # Using the optional argument in this function and the previous one
-  # is untested and may cause issues.
-  # A more robust design might be to incorporate both functions into a
-  # single function.
-  get_filename_component(TEST_BASE_NAME ${test_src} NAME_WE)
-  set(TEST_NAME "${TEST_BASE_NAME}")
-  # This next line depends on the previous function implementation
-  set(depends_on instrument_${TEST_BASE_NAME}) # Right now this is just to prevent tests from running at the same time from clobbering the instrumented source file
-  get_filename_component(TEST_LANG ${test_src} LAST_EXT)
-  string(REPLACE "." "" TEST_LANG ${TEST_LANG})
-  set(extra_args ${ARGN})
-  list(LENGTH extra_args n_extra_args)
-  if(n_extra_args GREATER 1)
-    message(AUTHOR_WARNING
-      "Incorrect number of arguments (1 + ${n_extra_args}) passed to add_instrumentor_test!")
-    return()
-  endif()
-  if(n_extra_args EQUAL 1)
-    set(TEST_NAME ${ARGV1})
+if(HAVE_TAU)
+  function(compile_instrumented test_src)
+    # This is the 2nd of 2 functions for adding tests.
+    # It is opinionated and perhaps somewhat fragile.
+    # Using the optional argument in this function and the previous one
+    # is untested and may cause issues.
+    # A more robust design might be to incorporate both functions into a
+    # single function.
+    get_filename_component(TEST_BASE_NAME ${test_src} NAME_WE)
+    set(TEST_NAME "${TEST_BASE_NAME}")
     # This next line depends on the previous function implementation
-    set(depends_on instrument_${ARGV1})
-  endif()
+    set(depends_on instrument_${TEST_BASE_NAME}) # Right now this is just to prevent tests from running at the same time from clobbering the instrumented source file
+    get_filename_component(TEST_LANG ${test_src} LAST_EXT)
+    string(REPLACE "." "" TEST_LANG ${TEST_LANG})
+    set(extra_args ${ARGN})
+    list(LENGTH extra_args n_extra_args)
+    if(n_extra_args GREATER 1)
+      message(AUTHOR_WARNING
+        "Incorrect number of arguments (1 + ${n_extra_args}) passed to add_instrumentor_test!")
+      return()
+    endif()
+    if(n_extra_args EQUAL 1)
+      set(TEST_NAME ${ARGV1})
+      # This next line depends on the previous function implementation
+      set(depends_on instrument_${ARGV1})
+    endif()
 
-  if(${TEST_LANG} STREQUAL "c")
-    set(TAUC ${TAUCC})
-  elseif(${TEST_LANG} STREQUAL "cpp")
-    set(TAUC ${TAUCXX})
-  else()
-    message( FATAL_ERROR "Unknown test source file extension: ${TEST_LANG}")
-  endif()
+    if(${TEST_LANG} STREQUAL "c")
+      set(TAUC ${TAUCC})
+    elseif(${TEST_LANG} STREQUAL "cpp")
+      set(TAUC ${TAUCXX})
+    else()
+      message( FATAL_ERROR "Unknown test source file extension: ${TEST_LANG}")
+    endif()
 
-  set(test_path ${CMAKE_SOURCE_DIR}/tests/${TEST_BASE_NAME}.${TEST_LANG})
-  set(TAUC_OPTS -optVerbose -optSaltInst -optSaltParser=${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/saltfm -optSaltConfigFile=${CMAKE_SOURCE_DIR}/config_files/tau_config.yaml)
-  set(compile_opts ${TAU_COMPILE_OPTIONS})
-  foreach(comp IN LISTS compilers_to_test)
-    set(lower_comp ${comp})
-    string(TOUPPER ${comp} comp)
-    # Fixture to cleanup old instrumented source, object files, and executables
-    add_test(NAME rm_${lower_comp}_${TEST_NAME}_objects
-      COMMAND
-      ${CMAKE_COMMAND} -E rm -rf ${TEST_BASE_NAME}.o ${TEST_BASE_NAME}.inst.o ${TEST_BASE_NAME}.inst.${TEST_LANG}
-      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${comp}
-    )
-    set_tests_properties(rm_${lower_comp}_${TEST_NAME}_objects
-      PROPERTIES
-      FIXTURES_SETUP clean_${lower_comp}_${TEST_NAME}_objects
-      DEPENDS setup_${comp}_dir
-    )
-    # Test to actually instrument and build the test source using TAU compiler wrappers & slat parser
-    add_test(NAME compile_${lower_comp}_${TEST_NAME}
-      COMMAND
-      ${TAUC} ${TAUC_OPTS} ${compiler_opts} -o ${TEST_BASE_NAME}.${lower_comp} ${test_path}
-      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${comp}
-    )
-    set_tests_properties(compile_${lower_comp}_${TEST_NAME}
-      PROPERTIES
-      ENVIRONMENT TAU_MAKEFILE=${TAU_${comp}_MAKEFILE}
-      FIXTURES_REQUIRED clean_${lower_comp}_${TEST_NAME}_objects
-      DEPENDS ${depends_on}
-      FAIL_REGULAR_EXPRESSION "[Dd]isabling instrumentation of source code;[Ss]witching to compiler-based instrumentation;[Cc]ompiling with [Nn]on-[Ii]nstrumented [Rr]egular [Cc]ode;[Ee]rror:"
-    )
-    # Fixture to cleanup old profile directories
-    add_test(NAME rm_old_${lower_comp}_${TEST_NAME}_profiles
-      COMMAND
-      ${CMAKE_COMMAND} -E rm -rf ${TEST_BASE_NAME}.d
-      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${comp}
-    )
-    set_tests_properties(rm_old_${lower_comp}_${TEST_NAME}_profiles
-      PROPERTIES
-      FIXTURES_SETUP clean_${lower_comp}_${TEST_NAME}_profiles
-      DEPENDS setup_${comp}_dir
-    )
-    add_test(NAME mkdir_${TEST_BASE_NAME}.${lower_comp}.d
-      COMMAND
-      ${CMAKE_COMMAND} -E make_directory ${TEST_BASE_NAME}.d
-      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${comp}
-    )
-    set_tests_properties(mkdir_${TEST_BASE_NAME}.${lower_comp}.d
-      PROPERTIES
-      FIXTURES_SETUP clean_${lower_comp}_${TEST_NAME}_profiles
-      DEPENDS rm_old_${lower_comp}_${TEST_NAME}_profiles
-    )
-    # Profile w/ TAU and Verify profiles are created
-    add_test(NAME run_${lower_comp}_${TEST_NAME}
-      COMMAND
-      ${TAU_EXEC} -T serial,pthread ./${TEST_BASE_NAME}.${lower_comp}
-      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${comp}
-    )
-    set_tests_properties(run_${lower_comp}_${TEST_NAME}
-      PROPERTIES
-      ENVIRONMENT "TAU_MAKEFILE=${TAU_${comp}_MAKEFILE};PROFILEDIR=${TEST_BASE_NAME}.d"
-      DEPENDS compile_${lower_comp}_${TEST_NAME}
-      FIXTURES_REQUIRED clean_${lower_comp}_${TEST_NAME}_profiles
-      FAIL_REGULAR_EXPRESSION "[Cc]ommand not found;[Ss]egmentation;[Ff]ault;[Ee]rror"
-    )
-    add_test(NAME check_${lower_comp}_${TEST_NAME}_profile
-      COMMAND ${CMAKE_COMMAND} -E cat ./${TEST_BASE_NAME}.d/profile.0.0.0
-      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${comp}
-    )
-    set_tests_properties(check_${lower_comp}_${TEST_NAME}_profile
-      PROPERTIES
-      PASS_REGULAR_EXPRESSION "GROUP=\"TAU_DEFAULT\""
-      FAIL_REGULAR_EXPRESSION "addr=\<0x"
-      DEPENDS run_${lower_comp}_${TEST_NAME}
-    )
-  endforeach()
-endfunction()
+    set(test_path ${CMAKE_SOURCE_DIR}/tests/${TEST_BASE_NAME}.${TEST_LANG})
+    set(TAUC_OPTS -optVerbose -optSaltInst -optSaltParser=${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/saltfm -optSaltConfigFile=${CMAKE_SOURCE_DIR}/config_files/tau_config.yaml)
+    set(compile_opts ${TAU_COMPILE_OPTIONS})
+    foreach(comp IN LISTS compilers_to_test)
+      set(lower_comp ${comp})
+      string(TOUPPER ${comp} comp)
+      # Fixture to cleanup old instrumented source, object files, and executables
+      add_test(NAME rm_${lower_comp}_${TEST_NAME}_objects
+        COMMAND
+        ${CMAKE_COMMAND} -E rm -rf ${TEST_BASE_NAME}.o ${TEST_BASE_NAME}.inst.o ${TEST_BASE_NAME}.inst.${TEST_LANG}
+        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${comp}
+      )
+      set_tests_properties(rm_${lower_comp}_${TEST_NAME}_objects
+        PROPERTIES
+        FIXTURES_SETUP clean_${lower_comp}_${TEST_NAME}_objects
+        DEPENDS setup_${comp}_dir
+      )
+      # Test to actually instrument and build the test source using TAU compiler wrappers & slat parser
+      add_test(NAME compile_${lower_comp}_${TEST_NAME}
+        COMMAND
+        ${TAUC} ${TAUC_OPTS} ${compiler_opts} -o ${TEST_BASE_NAME}.${lower_comp} ${test_path}
+        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${comp}
+      )
+      set_tests_properties(compile_${lower_comp}_${TEST_NAME}
+        PROPERTIES
+        ENVIRONMENT TAU_MAKEFILE=${TAU_${comp}_MAKEFILE}
+        FIXTURES_REQUIRED clean_${lower_comp}_${TEST_NAME}_objects
+        DEPENDS ${depends_on}
+        FAIL_REGULAR_EXPRESSION "[Dd]isabling instrumentation of source code;[Ss]witching to compiler-based instrumentation;[Cc]ompiling with [Nn]on-[Ii]nstrumented [Rr]egular [Cc]ode;[Ee]rror:"
+      )
+      # Fixture to cleanup old profile directories
+      add_test(NAME rm_old_${lower_comp}_${TEST_NAME}_profiles
+        COMMAND
+        ${CMAKE_COMMAND} -E rm -rf ${TEST_BASE_NAME}.d
+        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${comp}
+      )
+      set_tests_properties(rm_old_${lower_comp}_${TEST_NAME}_profiles
+        PROPERTIES
+        FIXTURES_SETUP clean_${lower_comp}_${TEST_NAME}_profiles
+        DEPENDS setup_${comp}_dir
+      )
+      add_test(NAME mkdir_${TEST_BASE_NAME}.${lower_comp}.d
+        COMMAND
+        ${CMAKE_COMMAND} -E make_directory ${TEST_BASE_NAME}.d
+        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${comp}
+      )
+      set_tests_properties(mkdir_${TEST_BASE_NAME}.${lower_comp}.d
+        PROPERTIES
+        FIXTURES_SETUP clean_${lower_comp}_${TEST_NAME}_profiles
+        DEPENDS rm_old_${lower_comp}_${TEST_NAME}_profiles
+      )
+      # Profile w/ TAU and Verify profiles are created
+      add_test(NAME run_${lower_comp}_${TEST_NAME}
+        COMMAND
+        ${TAU_EXEC} -T serial,pthread ./${TEST_BASE_NAME}.${lower_comp}
+        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${comp}
+      )
+      set_tests_properties(run_${lower_comp}_${TEST_NAME}
+        PROPERTIES
+        ENVIRONMENT "TAU_MAKEFILE=${TAU_${comp}_MAKEFILE};PROFILEDIR=${TEST_BASE_NAME}.d"
+        DEPENDS compile_${lower_comp}_${TEST_NAME}
+        FIXTURES_REQUIRED clean_${lower_comp}_${TEST_NAME}_profiles
+        FAIL_REGULAR_EXPRESSION "[Cc]ommand not found;[Ss]egmentation;[Ff]ault;[Ee]rror"
+      )
+      add_test(NAME check_${lower_comp}_${TEST_NAME}_profile
+        COMMAND ${CMAKE_COMMAND} -E cat ./${TEST_BASE_NAME}.d/profile.0.0.0
+        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${comp}
+      )
+      set_tests_properties(check_${lower_comp}_${TEST_NAME}_profile
+        PROPERTIES
+        PASS_REGULAR_EXPRESSION "GROUP=\"TAU_DEFAULT\""
+        FAIL_REGULAR_EXPRESSION "addr=\<0x"
+        DEPENDS run_${lower_comp}_${TEST_NAME}
+      )
+    endforeach()
+  endfunction()
 
-foreach(test_source IN LISTS TESTS_LIST)
-  compile_instrumented(${test_source})
-endforeach()
+  foreach(test_source IN LISTS TESTS_LIST)
+    compile_instrumented(${test_source})
+  endforeach()
+endif()
 
 # Add some Fortran tests for SALT-FM
 set(FORTRAN_TESTS_SOURCES_LIST
@@ -666,59 +643,61 @@ foreach(compiler IN LISTS fortran_compilers_to_test)
   )
 endforeach()
 
-foreach(test_source IN LISTS FORTRAN_TESTS_SOURCES_LIST)
-  # Get the name of the instrumented source file
-  get_filename_component(TEST_BASE_NAME ${test_source} NAME_WLE)
-  get_filename_component(TEST_LANG ${test_source} LAST_EXT)
-  # fparse-llvm is adding preprocessor directives and should emit uppercase file extensions (e.g., .F90)
-  string(TOUPPER ${TEST_LANG} TEST_LANG)
-  set(TEST_INST_SOURCE ${TEST_BASE_NAME}.inst${TEST_LANG})
-
-  foreach(compiler IN LISTS fortran_compilers_to_test)
-    STRING(TOUPPER ${compiler} upper_comp)
-    if(${compiler} STREQUAL "gfortran")
-      set(mapped_comp GCC)
-      set(EXTRA_FLAGS -Wpedantic -Wextra -Wno-missing-include-dirs -Werror)
-    elseif(${compiler} STREQUAL "flang-new")
-      set(mapped_comp CLANG)
-      set(EXTRA_FLAGS -Werror)
-    elseif(${compiler} STREQUAL "flang")
-      set(mapped_comp CLANG)
-      set(EXTRA_FLAGS -Werror)
-    else()
-      message(FATAL_ERROR "Unknown compiler: ${compiler}")
-    endif()
-    add_test(NAME compile_${upper_comp}_${test_source}
-      COMMAND ${TAUF90} ${TAU_F90_OPTS} -o ${TEST_BASE_NAME} -Wall ${EXTRA_FLAGS} ${CMAKE_BINARY_DIR}/${TEST_INST_SOURCE}
-      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${upper_comp}
+if(HAVE_TAU)
+  foreach(test_source IN LISTS FORTRAN_TESTS_SOURCES_LIST)
+    # Get the name of the instrumented source file
+    get_filename_component(TEST_BASE_NAME ${test_source} NAME_WLE)
+    get_filename_component(TEST_LANG ${test_source} LAST_EXT)
+    # fparse-llvm is adding preprocessor directives and should emit uppercase file extensions (e.g., .F90)
+    string(TOUPPER ${TEST_LANG} TEST_LANG)
+    set(TEST_INST_SOURCE ${TEST_BASE_NAME}.inst${TEST_LANG})
+
+    foreach(compiler IN LISTS fortran_compilers_to_test)
+      STRING(TOUPPER ${compiler} upper_comp)
+      if(${compiler} STREQUAL "gfortran")
+        set(mapped_comp GCC)
+        set(EXTRA_FLAGS -Wpedantic -Wextra -Wno-missing-include-dirs -Werror)
+      elseif(${compiler} STREQUAL "flang-new")
+        set(mapped_comp CLANG)
+        set(EXTRA_FLAGS -Werror)
+      elseif(${compiler} STREQUAL "flang")
+        set(mapped_comp CLANG)
+        set(EXTRA_FLAGS -Werror)
+      else()
+        message(FATAL_ERROR "Unknown compiler: ${compiler}")
+      endif()
+      add_test(NAME compile_${upper_comp}_${test_source}
+        COMMAND ${TAUF90} ${TAU_F90_OPTS} -o ${TEST_BASE_NAME} -Wall ${EXTRA_FLAGS} ${CMAKE_BINARY_DIR}/${TEST_INST_SOURCE}
+        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${upper_comp}
+        )
+      set_tests_properties(compile_${upper_comp}_${test_source}
+        PROPERTIES
+        ENVIRONMENT "TAU_MAKEFILE=${TAU_${mapped_comp}_MAKEFILE}"
+        DEPENDS "instrument_${test_source}"
+        FIXTURES_REQUIRED ${upper_comp}_dir
+        FAIL_REGULAR_EXPRESSION "[^W][Ee]rror"
       )
-    set_tests_properties(compile_${upper_comp}_${test_source}
-      PROPERTIES
-      ENVIRONMENT "TAU_MAKEFILE=${TAU_${mapped_comp}_MAKEFILE}"
-      DEPENDS "instrument_${test_source}"
-      FIXTURES_REQUIRED ${upper_comp}_dir
-      FAIL_REGULAR_EXPRESSION "[^W][Ee]rror"
-    )
-    # Profile with TAU and Verify profiles are created
-    add_test(NAME run_${upper_comp}_${test_source}
-      COMMAND ${TAU_EXEC} -T serial,pthread ./${TEST_BASE_NAME}
-      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${upper_comp}
-    )
-    set_tests_properties(run_${upper_comp}_${test_source}
-      PROPERTIES
-      ENVIRONMENT "TAU_MAKEFILE=${TAU_${mapped_comp}_MAKEFILE};PROFILEDIR=${TEST_BASE_NAME}.d"
-      DEPENDS compile_${upper_comp}_${test_source}
-      FAIL_REGULAR_EXPRESSION "[Cc]ommand not found;[Ss]egmentation;[Ff]ault;[Ee]rror"
-    )
-    add_test(NAME check_${upper_comp}_${test_source}_profile
-      COMMAND ${CMAKE_COMMAND} -E cat ./${TEST_BASE_NAME}.d/profile.0.0.0
-      WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${upper_comp}
-    )
-    set_tests_properties(check_${upper_comp}_${test_source}_profile
-      PROPERTIES
-      PASS_REGULAR_EXPRESSION "GROUP=\"TAU_DEFAULT\""
-      FAIL_REGULAR_EXPRESSION "addr=\<0x"
-      DEPENDS run_${upper_comp}_${test_source}
-    )
+      # Profile with TAU and Verify profiles are created
+      add_test(NAME run_${upper_comp}_${test_source}
+        COMMAND ${TAU_EXEC} -T serial,pthread ./${TEST_BASE_NAME}
+        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${upper_comp}
+      )
+      set_tests_properties(run_${upper_comp}_${test_source}
+        PROPERTIES
+        ENVIRONMENT "TAU_MAKEFILE=${TAU_${mapped_comp}_MAKEFILE};PROFILEDIR=${TEST_BASE_NAME}.d"
+        DEPENDS compile_${upper_comp}_${test_source}
+        FAIL_REGULAR_EXPRESSION "[Cc]ommand not found;[Ss]egmentation;[Ff]ault;[Ee]rror"
+      )
+      add_test(NAME check_${upper_comp}_${test_source}_profile
+        COMMAND ${CMAKE_COMMAND} -E cat ./${TEST_BASE_NAME}.d/profile.0.0.0
+        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${upper_comp}
+      )
+      set_tests_properties(check_${upper_comp}_${test_source}_profile
+        PROPERTIES
+        PASS_REGULAR_EXPRESSION "GROUP=\"TAU_DEFAULT\""
+        FAIL_REGULAR_EXPRESSION "addr=\<0x"
+        DEPENDS run_${upper_comp}_${test_source}
+      )
+    endforeach()
   endforeach()
-endforeach()
+endif()
\ No newline at end of file

From 206a2b56315672baa01c685b157a887109579a0b Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Mon, 27 Jan 2025 17:36:45 -0500
Subject: [PATCH 126/135] Remove explicit location of config files in ctest
 tests

The build directory now mimics the install directory, and the
instrumentors should be able to locate the default config file on
their own.
---
 CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 91f9bf8..02ee4e7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -516,7 +516,7 @@ if(HAVE_TAU)
     endif()
 
     set(test_path ${CMAKE_SOURCE_DIR}/tests/${TEST_BASE_NAME}.${TEST_LANG})
-    set(TAUC_OPTS -optVerbose -optSaltInst -optSaltParser=${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/saltfm -optSaltConfigFile=${CMAKE_SOURCE_DIR}/config_files/tau_config.yaml)
+    set(TAUC_OPTS -optVerbose -optSaltInst -optSaltParser=${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/saltfm)
     set(compile_opts ${TAU_COMPILE_OPTIONS})
     foreach(comp IN LISTS compilers_to_test)
       set(lower_comp ${comp})
@@ -626,7 +626,7 @@ foreach(test_source IN LISTS FORTRAN_TESTS_SOURCES_LIST)
   set_tests_properties(instrument_${test_source}
     PROPERTIES
     REQUIRED_FILES "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/fparse-llvm;${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/saltfm"
-    ENVIRONMENT "SALT_FORTRAN_CONFIG_FILE=${CMAKE_SOURCE_DIR}/config_files/tau_config.yaml;SALT_FORTRAN_VERBOSE=1"
+    ENVIRONMENT "SALT_FORTRAN_VERBOSE=1"
     PASS_REGULAR_EXPRESSION "SALT Instrumentor Plugin finished"
   )
 endforeach()

From 0842708df787fd78a131027bc2d054e5a62f47f5 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Mon, 27 Jan 2025 20:24:26 -0500
Subject: [PATCH 127/135] Turn off debug output in saltfm.in

---
 src/saltfm.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/saltfm.in b/src/saltfm.in
index 6f62b57..53d38f2 100755
--- a/src/saltfm.in
+++ b/src/saltfm.in
@@ -20,7 +20,7 @@ set -o errexit
 set -o nounset
 set -o pipefail
 #set -o verbose
-set -o xtrace
+#set -o xtrace
 
 readonly _VERSION=@SALT_VERSION_MAJOR@.@SALT_VERSION_MINOR@
 # get the absolute path of this script

From ec085bbf5670e77b926bbd6773d154c32db11211 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Mon, 27 Jan 2025 20:56:02 -0500
Subject: [PATCH 128/135] Make fparse-llvm better match the behavior of
 cparse-llvm

---
 src/fparse-llvm.in | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/src/fparse-llvm.in b/src/fparse-llvm.in
index 231eebc..8dcab69 100755
--- a/src/fparse-llvm.in
+++ b/src/fparse-llvm.in
@@ -106,8 +106,8 @@ for flag in "${_WHITELISTED_FLAGS[@]:1}"; do
     _WHITELISTED_REGEX="${_WHITELISTED_REGEX}|(${flag})"
 done
 declare -r -a _BLACKLISTED_FLAGS=(
-    "-Wl,.*"
-    "--"
+    "^-Wl,.*"
+    '^--$'
 )
 _BLACKLISTED_REGEX="(${_BLACKLISTED_FLAGS[0]})"
 for flag in "${_BLACKLISTED_FLAGS[@]:1}"; do
@@ -156,6 +156,9 @@ done
 
 args=()
 expecting_arg_to_forward=false
+expecting_output_file=false
+expecting_config_file=false
+expecting_select_file=false
 show=false
 for arg in "$@"; do
     #echo "working on arg: $arg"
@@ -171,10 +174,33 @@ for arg in "$@"; do
         expecting_arg_to_forward=false
         shift
         #echo "args remaining: $*"
+    elif $expecting_output_file; then
+        output_file="$arg"
+        expecting_output_file=false
+        shift
+        #echo "args remaining: $*"
+    elif $expecting_config_file; then
+        FORTRAN_CONFIG_FILE="$arg"
+        expecting_config_file=false
+        shift
+        #echo "args remaining: $*"
+    elif $expecting_select_file; then
+        select_file="$arg"
+        expecting_select_file=false
+        shift
+        #echo "args remaining: $*"
+    elif [[ $arg == --tau_output ]]; then
+        expecting_output_file=true
+        shift || true
+        #echo "args remaining: $*"
     elif [[ $arg == --tau_output=* ]]; then
         output_file="${arg#--tau_output=}"
         shift || true
         #echo "args remaining: $*"
+    elif [[ $arg == --tau_select_file ]]; then
+        expecting_select_file=true
+        shift || true
+        #echo "args remaining: $*"
     elif [[ $arg == --tau_select_file=* ]]; then
         select_file="${arg#--tau_select_file=}"
         shift || true
@@ -187,6 +213,10 @@ for arg in "$@"; do
         show=true
         shift || true
         #echo "args remaining: $*"
+    elif [[ $arg == --config_file ]]; then
+        expecting_config_file=true
+        shift || true
+        #echo "args remaining: $*"
     elif [[ $arg == --config_file=* ]]; then
         FORTRAN_CONFIG_FILE="${arg#--config_file=}"
         shift || true

From fd058ab47cb648c44b50faeb91c380a83aa784a7 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Mon, 27 Jan 2025 21:02:33 -0500
Subject: [PATCH 129/135] Tidy testing & allow configure w/o TAU and/or Fortran

---
 CMakeLists.txt | 223 ++++++++++++++++++++++++++-----------------------
 1 file changed, 118 insertions(+), 105 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 02ee4e7..c4b20d0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -238,7 +238,7 @@ target_link_options(cparse-llvm PUBLIC -Wl,--as-needed -Wl,--no-allow-shlib-unde
 target_compile_definitions(cparse-llvm PUBLIC $<$<CONFIG:Debug>:DEBUG_NO_WAY>)
 # Install the target
 install(TARGETS cparse-llvm DESTINATION ${CMAKE_INSTALL_BINDIR})
-set_target_properties(cparse-llvm PROPERTIES 
+set_target_properties(cparse-llvm PROPERTIES
   RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}")
 
 #------------------------------------------------------
@@ -267,7 +267,8 @@ endif()
 
 if(MLIR_FOUND AND Flang_FOUND)
     message(STATUS "Found Flang -- will build Flang frontend plugin")
-                
+    set(TEST_FORTRAN TRUE)
+
 # Variables set in FlangConfig.cmake
     message(STATUS "FLANG_CMAKE_DIR: ${FLANG_CMAKE_DIR}")
     message(STATUS "FLANG_EXPORTED_TARGETS: ${FLANG_EXPORTED_TARGETS}")
@@ -400,10 +401,9 @@ endif()
 # 1. Test source files to be instrumented are located in ${CMAKE_SOURCE_DIR}/tests/
 # 2. Tests are currently not using MPI
 # 3. Only pthread threading is assumed
-# 4. TAU is installed into the default location of /usr/local/x86_64
-# 5. TAU is installed with the following configuratins:
+# 4. TAU is installed with the following configuratins:
 #    - Makefile.tau-clang-pthread
-#    - Makefile.tau-pthread
+#    - Makefile.tau-pthread-pdt
 
 # Care has been taken to break test inter-dependencies where possible and
 # express them explicitly with the DEPENDS test property.
@@ -507,16 +507,20 @@ if(HAVE_TAU)
       set(depends_on instrument_${ARGV1})
     endif()
 
-    if(${TEST_LANG} STREQUAL "c")
+    if(${TEST_LANG} MATCHES "^[cC]$")
       set(TAUC ${TAUCC})
-    elseif(${TEST_LANG} STREQUAL "cpp")
+      set(test_path ${CMAKE_SOURCE_DIR}/tests/${TEST_BASE_NAME}.${TEST_LANG})
+    elseif(${TEST_LANG} MATCHES "^(cpp|CPP)$")
       set(TAUC ${TAUCXX})
+      set(test_path ${CMAKE_SOURCE_DIR}/tests/${TEST_BASE_NAME}.${TEST_LANG})
+    elseif(${TEST_LANG} MATCHES "^[fF](90)?$")
+      set(TAUC ${TAUF90})
+      set(test_path ${CMAKE_SOURCE_DIR}/tests/fortran/${TEST_BASE_NAME}.${TEST_LANG})
     else()
       message( FATAL_ERROR "Unknown test source file extension: ${TEST_LANG}")
     endif()
 
-    set(test_path ${CMAKE_SOURCE_DIR}/tests/${TEST_BASE_NAME}.${TEST_LANG})
-    set(TAUC_OPTS -optVerbose -optSaltInst -optSaltParser=${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/saltfm)
+    set(TAUC_OPTS -optVerbose -optNoRevert -optSaltInst -optSaltParser=${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/saltfm)
     set(compile_opts ${TAU_COMPILE_OPTIONS})
     foreach(comp IN LISTS compilers_to_test)
       set(lower_comp ${comp})
@@ -592,112 +596,121 @@ if(HAVE_TAU)
     endforeach()
   endfunction()
 
+  # This works, but a more thorough comparison of the fortran test setup with the C/C++ setup
+  # is needed and the directory naming is awkward when using Fortran sources in the above function.
+  # list(APPEND TESTS_LIST
+  #   funcsub.f90
+  #   myhi.f
+  # )
+
   foreach(test_source IN LISTS TESTS_LIST)
     compile_instrumented(${test_source})
   endforeach()
 endif()
 
-# Add some Fortran tests for SALT-FM
-set(FORTRAN_TESTS_SOURCES_LIST
-  myhi.f
-  cubes.f
-  emptyprog.f90
-  funcsub.f90
-  hello.f90
-  loop_test.f90
-  trivial.f90
-  return-only.f90
-  if-stmt.f90
-)
-
-# Add a smoke test of the fparse-llvm script
-add_test(NAME fparse_llvm_smoke_test
-  COMMAND ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/fparse-llvm --help)
-set_tests_properties(fparse_llvm_smoke_test
-  PROPERTIES
-  LABELS smoke
-  PASS_REGULAR_EXPRESSION "USAGE"
-)
-
-foreach(test_source IN LISTS FORTRAN_TESTS_SOURCES_LIST)
-  add_test(NAME instrument_${test_source}
-    COMMAND ./${CMAKE_INSTALL_BINDIR}/saltfm ${CMAKE_SOURCE_DIR}/tests/fortran/${test_source}
-    )
-  set_tests_properties(instrument_${test_source}
-    PROPERTIES
-    REQUIRED_FILES "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/fparse-llvm;${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/saltfm"
-    ENVIRONMENT "SALT_FORTRAN_VERBOSE=1"
-    PASS_REGULAR_EXPRESSION "SALT Instrumentor Plugin finished"
+if (TEST_FORTRAN)
+  # Add some Fortran tests for SALT-FM
+  set(FORTRAN_TESTS_SOURCES_LIST
+    myhi.f
+    cubes.f
+    emptyprog.f90
+    funcsub.f90
+    hello.f90
+    loop_test.f90
+    trivial.f90
+    return-only.f90
+    if-stmt.f90
   )
-endforeach()
 
-set(fortran_compilers_to_test gfortran flang-new)
-set(TAU_F90_OPTS -optVerbose -optLinkOnly)
-foreach(compiler IN LISTS fortran_compilers_to_test)
-  STRING(TOUPPER ${compiler} upper_comp)
-  add_test(NAME setup_${compiler}_dir
-    COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_BINARY_DIR}/${upper_comp})
-  set_tests_properties(setup_${compiler}_dir
+  # Add a smoke test of the fparse-llvm script
+  add_test(NAME fparse_llvm_smoke_test
+    COMMAND ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/fparse-llvm --help)
+  set_tests_properties(fparse_llvm_smoke_test
     PROPERTIES
-    FIXTURES_SETUP ${upper_comp}_dir
+    LABELS smoke
+    PASS_REGULAR_EXPRESSION "USAGE"
   )
-endforeach()
 
-if(HAVE_TAU)
   foreach(test_source IN LISTS FORTRAN_TESTS_SOURCES_LIST)
-    # Get the name of the instrumented source file
-    get_filename_component(TEST_BASE_NAME ${test_source} NAME_WLE)
-    get_filename_component(TEST_LANG ${test_source} LAST_EXT)
-    # fparse-llvm is adding preprocessor directives and should emit uppercase file extensions (e.g., .F90)
-    string(TOUPPER ${TEST_LANG} TEST_LANG)
-    set(TEST_INST_SOURCE ${TEST_BASE_NAME}.inst${TEST_LANG})
-
-    foreach(compiler IN LISTS fortran_compilers_to_test)
-      STRING(TOUPPER ${compiler} upper_comp)
-      if(${compiler} STREQUAL "gfortran")
-        set(mapped_comp GCC)
-        set(EXTRA_FLAGS -Wpedantic -Wextra -Wno-missing-include-dirs -Werror)
-      elseif(${compiler} STREQUAL "flang-new")
-        set(mapped_comp CLANG)
-        set(EXTRA_FLAGS -Werror)
-      elseif(${compiler} STREQUAL "flang")
-        set(mapped_comp CLANG)
-        set(EXTRA_FLAGS -Werror)
-      else()
-        message(FATAL_ERROR "Unknown compiler: ${compiler}")
-      endif()
-      add_test(NAME compile_${upper_comp}_${test_source}
-        COMMAND ${TAUF90} ${TAU_F90_OPTS} -o ${TEST_BASE_NAME} -Wall ${EXTRA_FLAGS} ${CMAKE_BINARY_DIR}/${TEST_INST_SOURCE}
-        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${upper_comp}
-        )
-      set_tests_properties(compile_${upper_comp}_${test_source}
-        PROPERTIES
-        ENVIRONMENT "TAU_MAKEFILE=${TAU_${mapped_comp}_MAKEFILE}"
-        DEPENDS "instrument_${test_source}"
-        FIXTURES_REQUIRED ${upper_comp}_dir
-        FAIL_REGULAR_EXPRESSION "[^W][Ee]rror"
+    add_test(NAME instrument_${test_source}
+      COMMAND ./${CMAKE_INSTALL_BINDIR}/saltfm ${CMAKE_SOURCE_DIR}/tests/fortran/${test_source}
       )
-      # Profile with TAU and Verify profiles are created
-      add_test(NAME run_${upper_comp}_${test_source}
-        COMMAND ${TAU_EXEC} -T serial,pthread ./${TEST_BASE_NAME}
-        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${upper_comp}
-      )
-      set_tests_properties(run_${upper_comp}_${test_source}
-        PROPERTIES
-        ENVIRONMENT "TAU_MAKEFILE=${TAU_${mapped_comp}_MAKEFILE};PROFILEDIR=${TEST_BASE_NAME}.d"
-        DEPENDS compile_${upper_comp}_${test_source}
-        FAIL_REGULAR_EXPRESSION "[Cc]ommand not found;[Ss]egmentation;[Ff]ault;[Ee]rror"
-      )
-      add_test(NAME check_${upper_comp}_${test_source}_profile
-        COMMAND ${CMAKE_COMMAND} -E cat ./${TEST_BASE_NAME}.d/profile.0.0.0
-        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${upper_comp}
-      )
-      set_tests_properties(check_${upper_comp}_${test_source}_profile
-        PROPERTIES
-        PASS_REGULAR_EXPRESSION "GROUP=\"TAU_DEFAULT\""
-        FAIL_REGULAR_EXPRESSION "addr=\<0x"
-        DEPENDS run_${upper_comp}_${test_source}
-      )
-    endforeach()
+    set_tests_properties(instrument_${test_source}
+      PROPERTIES
+      REQUIRED_FILES "${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/fparse-llvm;${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/saltfm"
+      ENVIRONMENT "SALT_FORTRAN_VERBOSE=1"
+      PASS_REGULAR_EXPRESSION "SALT Instrumentor Plugin finished"
+    )
+  endforeach()
+
+  set(fortran_compilers_to_test gfortran flang-new)
+  foreach(compiler IN LISTS fortran_compilers_to_test)
+    STRING(TOUPPER ${compiler} upper_comp)
+    add_test(NAME setup_${compiler}_dir
+      COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_BINARY_DIR}/${upper_comp})
+    set_tests_properties(setup_${compiler}_dir
+      PROPERTIES
+      FIXTURES_SETUP ${upper_comp}_dir
+    )
   endforeach()
-endif()
\ No newline at end of file
+
+  # TODO use the generic function above to add and process the Fortran tests.
+  if(HAVE_TAU)
+    set(TAU_F90_OPTS -optVerbose -optNoRevert -optSaltInst -optSaltParser=${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}/saltfm)
+    foreach(test_source IN LISTS FORTRAN_TESTS_SOURCES_LIST)
+      # Get the name of the instrumented source file
+      get_filename_component(TEST_BASE_NAME ${test_source} NAME_WLE)
+      get_filename_component(TEST_LANG ${test_source} LAST_EXT)
+      # fparse-llvm is adding preprocessor directives and should emit uppercase file extensions (e.g., .F90)
+      set(TEST_INST_SOURCE ${CMAKE_SOURCE_DIR}/tests/fortran/${TEST_BASE_NAME}${TEST_LANG})
+
+      foreach(compiler IN LISTS fortran_compilers_to_test)
+        STRING(TOUPPER ${compiler} upper_comp)
+        if(${compiler} STREQUAL "gfortran")
+          set(mapped_comp GCC)
+          set(EXTRA_FLAGS -cpp -Wpedantic -Wextra -Wno-missing-include-dirs -Werror)
+        elseif(${compiler} STREQUAL "flang-new")
+          set(mapped_comp CLANG)
+          set(EXTRA_FLAGS -Werror)
+        elseif(${compiler} STREQUAL "flang")
+          set(mapped_comp CLANG)
+          set(EXTRA_FLAGS -Werror)
+        else()
+          message(FATAL_ERROR "Unknown compiler: ${compiler}")
+        endif()
+        add_test(NAME compile_${upper_comp}_${test_source}
+          COMMAND ${TAUF90} ${TAU_F90_OPTS} -o ${TEST_BASE_NAME} -Wall ${EXTRA_FLAGS} ${TEST_INST_SOURCE}
+          WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${upper_comp}
+          )
+        set_tests_properties(compile_${upper_comp}_${test_source}
+          PROPERTIES
+          ENVIRONMENT "TAU_MAKEFILE=${TAU_${mapped_comp}_MAKEFILE}"
+          DEPENDS "instrument_${test_source}"
+          FIXTURES_REQUIRED ${upper_comp}_dir
+          FAIL_REGULAR_EXPRESSION "[^W][Ee]rror"
+        )
+        # Profile with TAU and Verify profiles are created
+        add_test(NAME run_${upper_comp}_${test_source}
+          COMMAND ${TAU_EXEC} -T serial,pthread ./${TEST_BASE_NAME}
+          WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${upper_comp}
+        )
+        set_tests_properties(run_${upper_comp}_${test_source}
+          PROPERTIES
+          ENVIRONMENT "TAU_MAKEFILE=${TAU_${mapped_comp}_MAKEFILE};PROFILEDIR=${TEST_BASE_NAME}.d"
+          DEPENDS compile_${upper_comp}_${test_source}
+          FAIL_REGULAR_EXPRESSION "[Cc]ommand not found;[Ss]egmentation;[Ff]ault;[Ee]rror"
+        )
+        add_test(NAME check_${upper_comp}_${test_source}_profile
+          COMMAND ${CMAKE_COMMAND} -E cat ./${TEST_BASE_NAME}.d/profile.0.0.0
+          WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/${upper_comp}
+        )
+        set_tests_properties(check_${upper_comp}_${test_source}_profile
+          PROPERTIES
+          PASS_REGULAR_EXPRESSION "GROUP=\"TAU_DEFAULT\""
+          FAIL_REGULAR_EXPRESSION "addr=\<0x"
+          DEPENDS run_${upper_comp}_${test_source}
+        )
+      endforeach()
+    endforeach()
+  endif()
+endif()

From 8bc46ca556740e04203ff62b374d3aa6ee902e62 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 28 Jan 2025 14:36:26 -0500
Subject: [PATCH 130/135] Attempt to fix issue with HEADER file sets basedir

---
 CMakeLists.txt | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c4b20d0..ddb698d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -228,7 +228,12 @@ list(TRANSFORM CPARSE_LLVM_SRCS PREPEND "${CMAKE_SOURCE_DIR}/src/")
 # If we refactor into a library that executables link against it will simplify this and reduce repitition
 add_executable(cparse-llvm)
 target_sources(cparse-llvm PUBLIC ${CPARSE_LLVM_SRCS})
-target_sources(cparse-llvm PUBLIC FILE_SET headers TYPE HEADERS FILES ${SALT_HEADER_FILES})
+target_sources(cparse-llvm
+  PUBLIC
+  FILE_SET headers
+  TYPE HEADERS
+  FILES ${SALT_HEADER_FILES}
+  BASE_DIRS ${CMAKE_SOURCE_DIR}/include;${CMAKE_BINARY_DIR}/include)
 target_include_directories(cparse-llvm PUBLIC "${CMAKE_SOURCE_DIR}/include" "${CMAKE_BINARY_DIR}/include")
 target_compile_features(cparse-llvm PUBLIC cxx_std_17)
 target_link_libraries(cparse-llvm PUBLIC SALT_LLVM_TOOLING) # Inherit definitions, compile features, etc.
@@ -310,7 +315,11 @@ if(MLIR_FOUND AND Flang_FOUND)
 
     add_library(salt-flang-plugin SHARED)
     target_sources(salt-flang-plugin PUBLIC ${SALT_FLANG_PLUGIN_SRCS})
-    target_sources(salt-flang-plugin PUBLIC FILE_SET headers TYPE HEADERS FILES ${SALT_FLANG_PLUGIN_HEADER_FILES})
+    target_sources(salt-flang-plugin
+      PUBLIC
+      FILE_SET headers
+      TYPE HEADERS
+      FILES ${SALT_FLANG_PLUGIN_HEADER_FILES})
     target_include_directories(salt-flang-plugin PUBLIC "${CMAKE_SOURCE_DIR}/include" "${CMAKE_BINARY_DIR}/include" )
     target_compile_features(salt-flang-plugin PUBLIC cxx_std_17)
     target_link_libraries(salt-flang-plugin PUBLIC SALT_FLANG_FRONTEND)

From c5f1c60fadf1f6f04758d83d5b868d28fc9ecdb1 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 28 Jan 2025 15:42:30 -0500
Subject: [PATCH 131/135] Swap clang version guards to default to modern
 llvm/clang

This should allow SALT to be built with GCC
---
 src/instrumentor.cpp | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/src/instrumentor.cpp b/src/instrumentor.cpp
index 9093d1c..69e2012 100644
--- a/src/instrumentor.cpp
+++ b/src/instrumentor.cpp
@@ -529,16 +529,16 @@ class FindReturnVisitor : public RecursiveASTVisitor<FindReturnVisitor>
     {
         for (SourceRange lambda : lambda_locs)
         {
-#if __clang_major__ > 9
-            if (lambda.fullyContains(ret->getSourceRange()))
-            {
-#else
+#if __clang_major__ < 10
             SourceLocation lambda_begin = lambda.getBegin();
             SourceLocation lambda_end = lambda.getEnd();
             SourceLocation ret_begin = ret->getSourceRange().getBegin();
             SourceLocation ret_end = ret->getSourceRange().getEnd();
             if (lambda_begin <= ret_begin && ret_end <= lambda_end)
             {
+#else
+            if (lambda.fullyContains(ret->getSourceRange()))
+            {
 #endif
                 // ignore lambdas
                 return true;
@@ -596,10 +596,8 @@ class FindReturnVisitor : public RecursiveASTVisitor<FindReturnVisitor>
         if (encl_function->getReturnType()->isClassType())
         {
             CXXRecordDecl *decl = encl_function->getReturnType()->getAsCXXRecordDecl();
-#if __clang_major__ > 10
-            if (!(decl->hasSimpleCopyAssignment() || decl->hasTrivialCopyAssignment()))
-            {
-#else // borrow logic of llvm 10 DeclCXX.cpp for setting DefaultedCopyAssignmentIsDeleted
+#if __clang_major__ < 11
+            // borrow logic of llvm 10 DeclCXX.cpp for setting DefaultedCopyAssignmentIsDeleted
             bool DefaultedCopyAssignmentIsDeleted = false;
             if (const auto *Field = dyn_cast<FieldDecl>(decl))
             {
@@ -633,6 +631,9 @@ class FindReturnVisitor : public RecursiveASTVisitor<FindReturnVisitor>
             if (!((!decl->hasUserDeclaredCopyAssignment() && !DefaultedCopyAssignmentIsDeleted) ||
                   decl->hasTrivialCopyAssignment()))
             {
+#else
+            if (!(decl->hasSimpleCopyAssignment() || decl->hasTrivialCopyAssignment()))
+            {
 #endif
                 needs_move = true;
             }
@@ -734,10 +735,8 @@ class FindFunctionVisitor : public RecursiveASTVisitor<FindFunctionVisitor>
         if (func->getReturnType()->isClassType())
         {
             CXXRecordDecl *decl = func->getReturnType()->getAsCXXRecordDecl();
-#if __clang_major__ > 10
-            if (!(decl->hasSimpleCopyAssignment() || decl->hasTrivialCopyAssignment()))
-            {
-#else // borrow logic of llvm 10 DeclCXX.cpp for setting DefaultedCopyAssignmentIsDeleted
+#if __clang_major__ < 11
+            // borrow logic of llvm 10 DeclCXX.cpp for setting DefaultedCopyAssignmentIsDeleted
             bool DefaultedCopyAssignmentIsDeleted = false;
             if (const auto *Field = dyn_cast<FieldDecl>(decl))
             {
@@ -771,6 +770,9 @@ class FindFunctionVisitor : public RecursiveASTVisitor<FindFunctionVisitor>
             if (!((!decl->hasUserDeclaredCopyAssignment() && !DefaultedCopyAssignmentIsDeleted) ||
                   decl->hasTrivialCopyAssignment()))
             {
+#else
+            if (!(decl->hasSimpleCopyAssignment() || decl->hasTrivialCopyAssignment()))
+            {
 #endif
                 needs_move = true;
             }

From 0c0273e2d1a1e2ebf9acb841a3ade516703f01c5 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 28 Jan 2025 16:13:11 -0500
Subject: [PATCH 132/135] Relax build errors for unused variables

---
 CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ddb698d..d721b6e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -184,7 +184,7 @@ add_library(SALT_LLVM_TOOLING INTERFACE)
 target_compile_features(SALT_LLVM_TOOLING INTERFACE cxx_std_17)
 target_include_directories(SALT_LLVM_TOOLING INTERFACE ${LLVM_INCLUDE_DIRS})
 target_compile_definitions(SALT_LLVM_TOOLING INTERFACE ${LLVM_DEFINITIONS_LIST})
-target_compile_options(SALT_LLVM_TOOLING INTERFACE ${USE_RTTI} -Wall -Wpedantic $<$<CONFIG:Debug>:-Wno-gnu-zero-variadic-macro-arguments>)
+target_compile_options(SALT_LLVM_TOOLING INTERFACE ${USE_RTTI} -Wall -Wpedantic -Wno-unused-variable $<$<CONFIG:Debug>:-Wno-gnu-zero-variadic-macro-arguments>)
 target_link_libraries(SALT_LLVM_TOOLING INTERFACE ${CLANG_LIBS} ${LLVM_LIBS})
 
 #---------------------------------
@@ -284,7 +284,7 @@ if(MLIR_FOUND AND Flang_FOUND)
     target_compile_features(SALT_FLANG_FRONTEND INTERFACE cxx_std_17)
     target_include_directories(SALT_FLANG_FRONTEND INTERFACE ${LLVM_INCLUDE_DIRS} ${FLANG_INCLUDE_DIRS})
     target_compile_definitions(SALT_FLANG_FRONTEND INTERFACE ${LLVM_DEFINITIONS_LIST})
-    target_compile_options(SALT_FLANG_FRONTEND INTERFACE ${USE_RTTI} -Wall -Werror -Wpedantic)
+    target_compile_options(SALT_FLANG_FRONTEND INTERFACE ${USE_RTTI} -Wall -Wno-unused-variable -Werror -Wpedantic)
 
 # Endianness definitions are required, and Flang does not export a definitions list
     include(TestBigEndian)

From 21a785508beddac2e955afdbad87aa33997745ee Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 28 Jan 2025 16:13:46 -0500
Subject: [PATCH 133/135] Remove special cases for older llvm/clang

We required LLVM 19 or newer, so this code is obsolete & error prone
when building SALT with gcc or other compilers
---
 src/instrumentor.cpp | 91 --------------------------------------------
 1 file changed, 91 deletions(-)

diff --git a/src/instrumentor.cpp b/src/instrumentor.cpp
index 69e2012..b12a00b 100644
--- a/src/instrumentor.cpp
+++ b/src/instrumentor.cpp
@@ -505,14 +505,6 @@ void makeFuncAndTimerNames(FunctionDecl *func, ASTContext *context, SourceManage
                std::to_string(start_col) + "}-{" + std::to_string(end_line) + "," + std::to_string(end_col) + "}]";
 }
 
-// borrowed from SourceLocation.h for use in fullyContains() replacement
-#if __clang_major__ < 10
-inline bool operator<=(const SourceLocation &LHS, const SourceLocation &RHS)
-{
-    return LHS.getRawEncoding() <= RHS.getRawEncoding();
-}
-#endif
-
 class FindReturnVisitor : public RecursiveASTVisitor<FindReturnVisitor>
 {
     ASTContext *context;
@@ -529,17 +521,8 @@ class FindReturnVisitor : public RecursiveASTVisitor<FindReturnVisitor>
     {
         for (SourceRange lambda : lambda_locs)
         {
-#if __clang_major__ < 10
-            SourceLocation lambda_begin = lambda.getBegin();
-            SourceLocation lambda_end = lambda.getEnd();
-            SourceLocation ret_begin = ret->getSourceRange().getBegin();
-            SourceLocation ret_end = ret->getSourceRange().getEnd();
-            if (lambda_begin <= ret_begin && ret_end <= lambda_end)
-            {
-#else
             if (lambda.fullyContains(ret->getSourceRange()))
             {
-#endif
                 // ignore lambdas
                 return true;
             }
@@ -596,45 +579,8 @@ class FindReturnVisitor : public RecursiveASTVisitor<FindReturnVisitor>
         if (encl_function->getReturnType()->isClassType())
         {
             CXXRecordDecl *decl = encl_function->getReturnType()->getAsCXXRecordDecl();
-#if __clang_major__ < 11
-            // borrow logic of llvm 10 DeclCXX.cpp for setting DefaultedCopyAssignmentIsDeleted
-            bool DefaultedCopyAssignmentIsDeleted = false;
-            if (const auto *Field = dyn_cast<FieldDecl>(decl))
-            {
-                QualType T = context->getBaseElementType(Field->getType());
-                if (T->isReferenceType())
-                {
-                    DefaultedCopyAssignmentIsDeleted = true;
-                }
-                if (const auto *RecordTy = T->getAs<RecordType>())
-                {
-                    auto *FieldRec = cast<CXXRecordDecl>(RecordTy->getDecl());
-                    if (FieldRec->getDefinition())
-                    {
-                        if (decl->isUnion())
-                        {
-                            if (FieldRec->hasNonTrivialCopyAssignment())
-                            {
-                                DefaultedCopyAssignmentIsDeleted = true;
-                            }
-                        }
-                    }
-                }
-                else
-                {
-                    if (T.isConstQualified())
-                    {
-                        DefaultedCopyAssignmentIsDeleted = true;
-                    }
-                }
-            }
-            if (!((!decl->hasUserDeclaredCopyAssignment() && !DefaultedCopyAssignmentIsDeleted) ||
-                  decl->hasTrivialCopyAssignment()))
-            {
-#else
             if (!(decl->hasSimpleCopyAssignment() || decl->hasTrivialCopyAssignment()))
             {
-#endif
                 needs_move = true;
             }
         }
@@ -735,45 +681,8 @@ class FindFunctionVisitor : public RecursiveASTVisitor<FindFunctionVisitor>
         if (func->getReturnType()->isClassType())
         {
             CXXRecordDecl *decl = func->getReturnType()->getAsCXXRecordDecl();
-#if __clang_major__ < 11
-            // borrow logic of llvm 10 DeclCXX.cpp for setting DefaultedCopyAssignmentIsDeleted
-            bool DefaultedCopyAssignmentIsDeleted = false;
-            if (const auto *Field = dyn_cast<FieldDecl>(decl))
-            {
-                QualType T = context->getBaseElementType(Field->getType());
-                if (T->isReferenceType())
-                {
-                    DefaultedCopyAssignmentIsDeleted = true;
-                }
-                if (const auto *RecordTy = T->getAs<RecordType>())
-                {
-                    auto *FieldRec = cast<CXXRecordDecl>(RecordTy->getDecl());
-                    if (FieldRec->getDefinition())
-                    {
-                        if (decl->isUnion())
-                        {
-                            if (FieldRec->hasNonTrivialCopyAssignment())
-                            {
-                                DefaultedCopyAssignmentIsDeleted = true;
-                            }
-                        }
-                    }
-                }
-                else
-                {
-                    if (T.isConstQualified())
-                    {
-                        DefaultedCopyAssignmentIsDeleted = true;
-                    }
-                }
-            }
-            if (!((!decl->hasUserDeclaredCopyAssignment() && !DefaultedCopyAssignmentIsDeleted) ||
-                  decl->hasTrivialCopyAssignment()))
-            {
-#else
             if (!(decl->hasSimpleCopyAssignment() || decl->hasTrivialCopyAssignment()))
             {
-#endif
                 needs_move = true;
             }
         }

From f5075a2148725c9523ca5d954216657c49bde686 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 28 Jan 2025 16:15:24 -0500
Subject: [PATCH 134/135] Fix warnings comparing signed & unsigned integers

---
 src/selectfile.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/selectfile.cpp b/src/selectfile.cpp
index aa790dc..c0db254 100644
--- a/src/selectfile.cpp
+++ b/src/selectfile.cpp
@@ -736,8 +736,7 @@ bool processInstrumentationRequests(const char *fname)
              the string. "#foo" becomes #foo and is passed on to the
              exclude list. */
           char *exclude = strdup(&inbuf[1]);
-          int i;
-          for (i = 0; i < strlen(exclude); i++) {
+          for (size_t i = 0; i < strlen(exclude); i++) {
             if (exclude[i] == '"') {
               exclude[i]='\0';
               break; /* out of the loop */
@@ -772,8 +771,7 @@ bool processInstrumentationRequests(const char *fname)
              the string. "#foo" becomes #foo and is passed on to the
              exclude list. */
           char *exclude = strdup(&inbuf[1]);
-          int i;
-          for (i = 0; i < strlen(exclude); i++) {
+          for (size_t i = 0; i < strlen(exclude); i++) {
             if (exclude[i] == '"') {
               exclude[i]='\0';
               break; /* out of the loop */
@@ -806,7 +804,7 @@ bool processInstrumentationRequests(const char *fname)
         // strip quotes
         if (inbuf[0] == '"') {
           char *include = strdup(&inbuf[1]);
-          for (int i = 0; i < strlen(include); i++) {
+          for (size_t i = 0; i < strlen(include); i++) {
             if (include[i] == '"') {
               include[i] = '\0';
               break;
@@ -839,7 +837,7 @@ bool processInstrumentationRequests(const char *fname)
         // strip quotes
         if (inbuf[0] == '"') {
           char *exclude = strdup(&inbuf[1]);
-          for (int i = 0; i < strlen(exclude); i++) {
+          for (size_t i = 0; i < strlen(exclude); i++) {
             if (exclude[i] == '"') {
               exclude[i] = '\0';
               break;

From 28e6d85919af1e413ad7edada454aac9c03cf381 Mon Sep 17 00:00:00 2001
From: Izaak Beekman <zbeekman@gmail.com>
Date: Tue, 28 Jan 2025 16:58:31 -0500
Subject: [PATCH 135/135] Fix some whitespace issues

---
 config_files/tau_config.yaml | 2 +-
 include/frontend.hpp.in      | 2 +-
 spack.yaml                   | 1 -
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/config_files/tau_config.yaml b/config_files/tau_config.yaml
index 03bb209..5315cc6 100644
--- a/config_files/tau_config.yaml
+++ b/config_files/tau_config.yaml
@@ -44,4 +44,4 @@ Fortran:
     - "      call TAU_PROFILE_START(tauProfileTimer)"
 
   procedure_end_insert:
-    - "      call TAU_PROFILE_STOP(tauProfileTimer)"
\ No newline at end of file
+    - "      call TAU_PROFILE_STOP(tauProfileTimer)"
diff --git a/include/frontend.hpp.in b/include/frontend.hpp.in
index 18fe9d0..5f5fa4f 100644
--- a/include/frontend.hpp.in
+++ b/include/frontend.hpp.in
@@ -1,2 +1,2 @@
 // Define constants needed for the frontend
-#define SALT_DEFAULT_CONFIG_FILE "../@CMAKE_INSTALL_DATADIR@/@CMAKE_PROJECT_NAME@/config_files/config.yaml"
\ No newline at end of file
+#define SALT_DEFAULT_CONFIG_FILE "../@CMAKE_INSTALL_DATADIR@/@CMAKE_PROJECT_NAME@/config_files/config.yaml"
diff --git a/spack.yaml b/spack.yaml
index b2f00ef..9414bb0 100644
--- a/spack.yaml
+++ b/spack.yaml
@@ -59,4 +59,3 @@ spack:
   specs:
   - llvm@git.086d8e6bb5daf8de43880ba90258c49e0fabf2c9=19.1.4 +flang +mlir openmp=project
   - mpich ~wrapperrpath
-  
\ No newline at end of file