From 68eab0027e77be73c062c15aef111c59bf7a433b Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Mon, 25 Mar 2024 03:34:42 +0100
Subject: [PATCH 001/112] add new features (in cargo file)

---
 Cargo.toml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Cargo.toml b/Cargo.toml
index 0aeffe2..72d9188 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -40,5 +40,10 @@ tokio-util = {version = "0.7", features = ["io"]}
 
 aio-cargo-info = { path = "./crates/aio-cargo-info", version = "0.1" }
 
+[features]
+default = ["sql", "openai", "local-llm"]
+local-llm = []
+ollama = []
+
 [target.'cfg(target_os = "linux")'.dependencies]
 openssl = {version = "0.10", features = ["vendored"]}
\ No newline at end of file

From 882c8a5e5db7362ed6ee09725ff0579a66781a76 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 12 Apr 2024 17:11:59 +0200
Subject: [PATCH 002/112] WIP add llama_cpp_rs

---
 Cargo.lock                     | 333 +++++++++++++++++++++++++++------
 Cargo.toml                     |  10 +-
 src/config.rs                  |   8 +-
 src/generators/llama/config.rs |   7 +
 src/generators/llama/mod.rs    |  86 +++++++++
 src/generators/mod.rs          |   2 +
 src/main.rs                    |   1 +
 7 files changed, 380 insertions(+), 67 deletions(-)
 create mode 100644 src/generators/llama/config.rs
 create mode 100644 src/generators/llama/mod.rs

diff --git a/Cargo.lock b/Cargo.lock
index 910839e..1fcb090 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -47,6 +47,7 @@ dependencies = [
  "bytes",
  "clap",
  "crossterm",
+ "llama_cpp_rs",
  "num-traits",
  "once_cell",
  "openssl",
@@ -114,7 +115,7 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
@@ -124,7 +125,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "58f54d10c6dfa51283a066ceab3ec1ab78d13fae00aa49243a45e4571fb79dfd"
 dependencies = [
  "anstyle",
- "windows-sys",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
@@ -185,6 +186,29 @@ version = "0.21.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2"
 
+[[package]]
+name = "bindgen"
+version = "0.66.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2b84e06fc203107bfbad243f4aba2af864eb7db3b1cf46ea0a023b0b433d2a7"
+dependencies = [
+ "bitflags 2.4.0",
+ "cexpr",
+ "clang-sys",
+ "lazy_static",
+ "lazycell",
+ "log",
+ "peeking_take_while",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash",
+ "shlex",
+ "syn",
+ "which",
+]
+
 [[package]]
 name = "bitflags"
 version = "1.3.2"
@@ -239,12 +263,32 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "cexpr"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
+dependencies = [
+ "nom",
+]
+
 [[package]]
 name = "cfg-if"
 version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 
+[[package]]
+name = "clang-sys"
+version = "1.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1"
+dependencies = [
+ "glob",
+ "libc",
+ "libloading",
+]
+
 [[package]]
 name = "clap"
 version = "4.4.4"
@@ -341,6 +385,12 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "either"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a"
+
 [[package]]
 name = "encoding_rs"
 version = "0.8.33"
@@ -364,7 +414,7 @@ checksum = "136526188508e25c6fef639d7927dfb3e0e3084488bf202267829cf7fc23dbdd"
 dependencies = [
  "errno-dragonfly",
  "libc",
- "windows-sys",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
@@ -425,30 +475,30 @@ dependencies = [
 
 [[package]]
 name = "futures-channel"
-version = "0.3.28"
+version = "0.3.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2"
+checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78"
 dependencies = [
  "futures-core",
 ]
 
 [[package]]
 name = "futures-core"
-version = "0.3.28"
+version = "0.3.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c"
+checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d"
 
 [[package]]
 name = "futures-io"
-version = "0.3.28"
+version = "0.3.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964"
+checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1"
 
 [[package]]
 name = "futures-macro"
-version = "0.3.28"
+version = "0.3.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
+checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -457,21 +507,21 @@ dependencies = [
 
 [[package]]
 name = "futures-sink"
-version = "0.3.28"
+version = "0.3.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e"
+checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5"
 
 [[package]]
 name = "futures-task"
-version = "0.3.28"
+version = "0.3.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65"
+checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004"
 
 [[package]]
 name = "futures-util"
-version = "0.3.28"
+version = "0.3.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533"
+checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48"
 dependencies = [
  "futures-core",
  "futures-io",
@@ -490,6 +540,12 @@ version = "0.28.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0"
 
+[[package]]
+name = "glob"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
+
 [[package]]
 name = "h2"
 version = "0.3.21"
@@ -533,6 +589,15 @@ version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
 
+[[package]]
+name = "home"
+version = "0.5.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
+dependencies = [
+ "windows-sys 0.52.0",
+]
+
 [[package]]
 name = "http"
 version = "0.2.9"
@@ -661,11 +726,27 @@ version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 
+[[package]]
+name = "lazycell"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
+
 [[package]]
 name = "libc"
-version = "0.2.148"
+version = "0.2.153"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
+
+[[package]]
+name = "libloading"
+version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b"
+checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19"
+dependencies = [
+ "cfg-if",
+ "windows-targets 0.52.4",
+]
 
 [[package]]
 name = "linux-raw-sys"
@@ -673,6 +754,15 @@ version = "0.4.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1a9bad9f94746442c783ca431b22403b519cd7fbeed0533fdd6328b2f2212128"
 
+[[package]]
+name = "llama_cpp_rs"
+version = "0.3.0"
+dependencies = [
+ "bindgen",
+ "cc",
+ "lazy_static",
+]
+
 [[package]]
 name = "lock_api"
 version = "0.4.10"
@@ -701,6 +791,12 @@ version = "0.3.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
 
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
 [[package]]
 name = "miniz_oxide"
 version = "0.7.1"
@@ -712,14 +808,14 @@ dependencies = [
 
 [[package]]
 name = "mio"
-version = "0.8.8"
+version = "0.8.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2"
+checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
 dependencies = [
  "libc",
  "log",
  "wasi",
- "windows-sys",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
@@ -740,6 +836,16 @@ dependencies = [
  "tempfile",
 ]
 
+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
 [[package]]
 name = "num-traits"
 version = "0.2.16"
@@ -770,9 +876,9 @@ dependencies = [
 
 [[package]]
 name = "once_cell"
-version = "1.18.0"
+version = "1.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
+checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
 
 [[package]]
 name = "openssl"
@@ -848,9 +954,15 @@ dependencies = [
  "libc",
  "redox_syscall",
  "smallvec",
- "windows-targets",
+ "windows-targets 0.48.5",
 ]
 
+[[package]]
+name = "peeking_take_while"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
+
 [[package]]
 name = "percent-encoding"
 version = "2.3.0"
@@ -895,20 +1007,30 @@ version = "0.3.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
 
+[[package]]
+name = "prettyplease"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7"
+dependencies = [
+ "proc-macro2",
+ "syn",
+]
+
 [[package]]
 name = "proc-macro2"
-version = "1.0.67"
+version = "1.0.79"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328"
+checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e"
 dependencies = [
  "unicode-ident",
 ]
 
 [[package]]
 name = "quote"
-version = "1.0.33"
+version = "1.0.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
+checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
 dependencies = [
  "proc-macro2",
 ]
@@ -997,6 +1119,12 @@ version = "0.1.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
 
+[[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
 [[package]]
 name = "rustix"
 version = "0.38.14"
@@ -1007,7 +1135,7 @@ dependencies = [
  "errno",
  "libc",
  "linux-raw-sys",
- "windows-sys",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
@@ -1022,7 +1150,7 @@ version = "0.1.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0c3733bf4cf7ea0880754e19cb5a462007c4a8c1914bff372ccc95b464f1df88"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
@@ -1119,6 +1247,12 @@ dependencies = [
  "unsafe-libyaml",
 ]
 
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
 [[package]]
 name = "signal-hook"
 version = "0.3.17"
@@ -1188,12 +1322,12 @@ dependencies = [
 
 [[package]]
 name = "socket2"
-version = "0.5.4"
+version = "0.5.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4031e820eb552adee9295814c0ced9e5cf38ddf1e8b7d566d6de8e2538ea989e"
+checksum = "05ffd9c0a93b7543e062e759284fcf5f5e3b098501104bfbdde4d404db792871"
 dependencies = [
  "libc",
- "windows-sys",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -1210,9 +1344,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
 
 [[package]]
 name = "syn"
-version = "2.0.37"
+version = "2.0.57"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7303ef2c05cd654186cb250d29049a24840ca25d2747c25c0381c8d9e2f582e8"
+checksum = "11a6ae1e52eb25aab8f3fb9fca13be982a373b8f1157ca14b897a825ba4a2d35"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1229,23 +1363,23 @@ dependencies = [
  "fastrand",
  "redox_syscall",
  "rustix",
- "windows-sys",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
 name = "thiserror"
-version = "1.0.48"
+version = "1.0.58"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d6d7a740b8a666a7e828dd00da9c0dc290dff53154ea77ac109281de90589b7"
+checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.48"
+version = "1.0.58"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35"
+checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1269,9 +1403,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.32.0"
+version = "1.37.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17ed6077ed6cd6c74735e21f37eb16dc3935f96878b1fe961074089cc80893f9"
+checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787"
 dependencies = [
  "backtrace",
  "bytes",
@@ -1281,16 +1415,16 @@ dependencies = [
  "parking_lot",
  "pin-project-lite",
  "signal-hook-registry",
- "socket2 0.5.4",
+ "socket2 0.5.6",
  "tokio-macros",
- "windows-sys",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
 name = "tokio-macros"
-version = "2.1.0"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
+checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1374,20 +1508,19 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
 
 [[package]]
 name = "tracing"
-version = "0.1.37"
+version = "0.1.40"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8"
+checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef"
 dependencies = [
- "cfg-if",
  "pin-project-lite",
  "tracing-core",
 ]
 
 [[package]]
 name = "tracing-core"
-version = "0.1.31"
+version = "0.1.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a"
+checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
 dependencies = [
  "once_cell",
 ]
@@ -1558,6 +1691,18 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "which"
+version = "4.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
+dependencies = [
+ "either",
+ "home",
+ "once_cell",
+ "rustix",
+]
+
 [[package]]
 name = "winapi"
 version = "0.3.9"
@@ -1586,7 +1731,16 @@ version = "0.48.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
 dependencies = [
- "windows-targets",
+ "windows-targets 0.48.5",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets 0.52.4",
 ]
 
 [[package]]
@@ -1595,13 +1749,28 @@ version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
 dependencies = [
- "windows_aarch64_gnullvm",
- "windows_aarch64_msvc",
- "windows_i686_gnu",
- "windows_i686_msvc",
- "windows_x86_64_gnu",
- "windows_x86_64_gnullvm",
- "windows_x86_64_msvc",
+ "windows_aarch64_gnullvm 0.48.5",
+ "windows_aarch64_msvc 0.48.5",
+ "windows_i686_gnu 0.48.5",
+ "windows_i686_msvc 0.48.5",
+ "windows_x86_64_gnu 0.48.5",
+ "windows_x86_64_gnullvm 0.48.5",
+ "windows_x86_64_msvc 0.48.5",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.4",
+ "windows_aarch64_msvc 0.52.4",
+ "windows_i686_gnu 0.52.4",
+ "windows_i686_msvc 0.52.4",
+ "windows_x86_64_gnu 0.52.4",
+ "windows_x86_64_gnullvm 0.52.4",
+ "windows_x86_64_msvc 0.52.4",
 ]
 
 [[package]]
@@ -1610,42 +1779,84 @@ version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
 
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9"
+
 [[package]]
 name = "windows_aarch64_msvc"
 version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
 
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675"
+
 [[package]]
 name = "windows_i686_gnu"
 version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
 
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3"
+
 [[package]]
 name = "windows_i686_msvc"
 version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
 
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02"
+
 [[package]]
 name = "windows_x86_64_gnu"
 version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
 
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03"
+
 [[package]]
 name = "windows_x86_64_gnullvm"
 version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
 
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177"
+
 [[package]]
 name = "windows_x86_64_msvc"
 version = "0.48.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
 
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
+
 [[package]]
 name = "winnow"
 version = "0.5.15"
@@ -1662,5 +1873,5 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
 dependencies = [
  "cfg-if",
- "windows-sys",
+ "windows-sys 0.48.0",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index 72d9188..a5678d2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,7 +9,7 @@ license = "MIT"
 categories = ["command-line-utilities", "parsing"]
 repository = "https://github.com/glcraft/aio"
 homepage = "https://github.com/glcraft/aio"
-keywords = ["cli", "terminal", "ai", "openai", "markdown"]
+keywords = ["cli", "terminal", "ai", "llm", "openai", "markdown"]
 
 [[bin]]
 name = "aio"
@@ -23,6 +23,8 @@ async-trait = "0.1"
 bytes = "1.1.0"
 clap = { version = "4.2.2", features = ["derive"] }
 crossterm = "0.27"
+llama_cpp_rs = { path = "../rust-llama.cpp", features = ["cuda"], optional = true }
+# llama_cpp = { version = "^0.3.1", features = ["cuda"], optional = true }
 num-traits = "0.2"
 once_cell = "1.18"
 pin-project = "1.1"
@@ -41,9 +43,9 @@ tokio-util = {version = "0.7", features = ["io"]}
 aio-cargo-info = { path = "./crates/aio-cargo-info", version = "0.1" }
 
 [features]
-default = ["sql", "openai", "local-llm"]
-local-llm = []
-ollama = []
+default = ["openai", "local-llm"]
+local-llm = ["llama_cpp_rs"]
+openai = []
 
 [target.'cfg(target_os = "linux")'.dependencies]
 openssl = {version = "0.10", features = ["vendored"]}
\ No newline at end of file
diff --git a/src/config.rs b/src/config.rs
index f9dc20a..318610b 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -6,12 +6,16 @@ use serde::{Deserialize, Serialize};
 use crate::{
     arguments as args, 
     serde_io::DeserializeExt,
-    generators::openai::config::Config as ConfigOpenAI
+    generators::openai::config::Config as OpenAIConfig,
 };
+#[cfg(feature = "local-llm")]
+use crate::generators::llama::config::Config as LlamaConfig;
 
 #[derive(Default, Debug, Deserialize, Serialize)]
 pub struct Config {
-    pub openai: ConfigOpenAI
+    pub openai: OpenAIConfig,
+    #[cfg(feature = "local-llm")]
+    pub llama: LlamaConfig,
 }
 
 impl DeserializeExt for Config {}
diff --git a/src/generators/llama/config.rs b/src/generators/llama/config.rs
new file mode 100644
index 0000000..4661a2c
--- /dev/null
+++ b/src/generators/llama/config.rs
@@ -0,0 +1,7 @@
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Default, Debug, Deserialize, Serialize)]
+pub struct Config {
+    pub model_path: String
+}
\ No newline at end of file
diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
new file mode 100644
index 0000000..9b84902
--- /dev/null
+++ b/src/generators/llama/mod.rs
@@ -0,0 +1,86 @@
+pub mod config;
+use tokio::sync::Mutex;
+use tokio_stream::{wrappers::ReceiverStream, StreamExt};
+
+use llama_cpp_rs::{
+    options::{ModelOptions, PredictOptions},
+    LLama,
+};
+use once_cell::sync::{Lazy, OnceCell};
+use crate::{
+    config::Config as AIOConfig,
+    args
+};
+use super::{ResultRun, Error};
+
+struct SendLLama(LLama);
+
+unsafe impl Send for SendLLama {}
+
+
+static LOCAL_LLAMA: OnceCell<Mutex<SendLLama>> = OnceCell::new();
+
+fn init_model(config: &crate::config::Config) -> Result<(), Error> {
+    let model_options = ModelOptions {
+        n_gpu_layers: 20000,
+        ..Default::default()
+    };
+    let Ok(llama) = LLama::new(
+        config.llama.model_path.clone(),
+        &model_options,
+    ) else {
+        return Err(Error::Custom("Failed to load LLaMA model".into()))
+    };
+    let send_llama = SendLLama(llama);
+    LOCAL_LLAMA.set(Mutex::new(send_llama)).map_err(|_| Error::Custom("Failed to set LLaMA model".into()))
+}
+
+pub async fn run(
+    config: AIOConfig, 
+    args: args::ProcessedArgs
+) -> ResultRun {
+    if LOCAL_LLAMA.get().is_none() {
+        init_model(&config)?;
+    }
+    let llama = LOCAL_LLAMA.get().unwrap().lock().await;
+    let llama = &llama.0;
+
+    let (send, recv) = tokio::sync::mpsc::channel(10);
+
+    let predict_options = PredictOptions {
+        token_callback: Some(Box::new(move |token| {
+            use tokio::runtime::Handle;
+
+            // let send = send.clone();
+            // tokio::spawn(async move {
+            //     if let Err(e) = send.send(token).await {
+            //         eprintln!("Failed to send token: {}", e);
+            //     } else {
+            //         println!("token sent");
+            //     }
+            // });
+            print!("{}", token);
+
+            true
+        })),
+        tokens: 0,
+        threads: 14,
+        top_k: 90,
+        top_p: 0.8,
+        debug_mode: false,
+        ..Default::default()
+    };
+    llama
+        .predict(
+            args.input,
+             predict_options,
+        )
+        .unwrap();
+    
+    let stream = ReceiverStream::new(recv).map(Ok);
+
+    // send.send("test 1.2.3|".to_string()).await.expect("Failed to send");
+    // send.send("test 4.5.6|".to_string()).await.expect("Failed to send");
+    // send.send("test 7.8.9|".to_string()).await.expect("Failed to send");
+    Ok(Box::pin(stream))
+}
\ No newline at end of file
diff --git a/src/generators/mod.rs b/src/generators/mod.rs
index cd51ed9..2a2d818 100644
--- a/src/generators/mod.rs
+++ b/src/generators/mod.rs
@@ -1,4 +1,6 @@
 pub mod openai;
+#[cfg(feature = "local-llm")]
+pub mod llama;
 pub mod from_file;
 
 use tokio_stream::Stream;
diff --git a/src/main.rs b/src/main.rs
index 1485a2b..aba4c67 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -70,6 +70,7 @@ async fn main() -> Result<(), String> {
 
     let mut stream = match engine {
         "openai" => generators::openai::run(creds.openai, config, args).await,
+        "llama" => generators::llama::run(config, args).await,
         "from-file" => generators::from_file::run(config, args).await,
         _ => panic!("Unknown engine: {}", engine),
     }

From 7bd9cc6f6c7f1beba6ad3303155d54dc1396dcea Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 12 Apr 2024 18:35:03 +0200
Subject: [PATCH 003/112] replace llama_cpp_rs by llama_cpp

---
 Cargo.lock                  | 184 +++++++++++++++++++++++++++++++-----
 Cargo.toml                  |   5 +-
 src/generators/llama/mod.rs |  75 +++++----------
 3 files changed, 183 insertions(+), 81 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 1fcb090..5c5a7a8 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -33,7 +33,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "serde",
- "syn",
+ "syn 2.0.57",
  "toml",
 ]
 
@@ -47,7 +47,7 @@ dependencies = [
  "bytes",
  "clap",
  "crossterm",
- "llama_cpp_rs",
+ "llama_cpp",
  "num-traits",
  "once_cell",
  "openssl",
@@ -156,7 +156,7 @@ checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.57",
 ]
 
 [[package]]
@@ -188,24 +188,24 @@ checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2"
 
 [[package]]
 name = "bindgen"
-version = "0.66.1"
+version = "0.69.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2b84e06fc203107bfbad243f4aba2af864eb7db3b1cf46ea0a023b0b433d2a7"
+checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0"
 dependencies = [
  "bitflags 2.4.0",
  "cexpr",
  "clang-sys",
+ "itertools",
  "lazy_static",
  "lazycell",
  "log",
- "peeking_take_while",
  "prettyplease",
  "proc-macro2",
  "quote",
  "regex",
  "rustc-hash",
  "shlex",
- "syn",
+ "syn 2.0.57",
  "which",
 ]
 
@@ -260,6 +260,7 @@ version = "1.0.83"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
 dependencies = [
+ "jobserver",
  "libc",
 ]
 
@@ -320,7 +321,7 @@ dependencies = [
  "heck",
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.57",
 ]
 
 [[package]]
@@ -335,6 +336,12 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
 
+[[package]]
+name = "convert_case"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"
+
 [[package]]
 name = "core-foundation"
 version = "0.9.3"
@@ -385,6 +392,25 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "cudarc"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9395df0cab995685664e79cc35ad6302bf08fb9c5d82301875a183affe1278b1"
+
+[[package]]
+name = "derive_more"
+version = "0.99.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
+dependencies = [
+ "convert_case",
+ "proc-macro2",
+ "quote",
+ "rustc_version",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "either"
 version = "1.10.0"
@@ -473,6 +499,21 @@ dependencies = [
  "percent-encoding",
 ]
 
+[[package]]
+name = "futures"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "futures-executor",
+ "futures-io",
+ "futures-sink",
+ "futures-task",
+ "futures-util",
+]
+
 [[package]]
 name = "futures-channel"
 version = "0.3.30"
@@ -480,6 +521,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78"
 dependencies = [
  "futures-core",
+ "futures-sink",
 ]
 
 [[package]]
@@ -488,6 +530,17 @@ version = "0.3.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d"
 
+[[package]]
+name = "futures-executor"
+version = "0.3.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "futures-util",
+]
+
 [[package]]
 name = "futures-io"
 version = "0.3.30"
@@ -502,7 +555,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.57",
 ]
 
 [[package]]
@@ -523,6 +576,7 @@ version = "0.3.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48"
 dependencies = [
+ "futures-channel",
  "futures-core",
  "futures-io",
  "futures-macro",
@@ -705,12 +759,30 @@ version = "2.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6"
 
+[[package]]
+name = "itertools"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
+dependencies = [
+ "either",
+]
+
 [[package]]
 name = "itoa"
 version = "1.0.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
 
+[[package]]
+name = "jobserver"
+version = "0.1.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f08474e32172238f2827bd160c67871cdb2801430f65c3979184dc362e3ca118"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "js-sys"
 version = "0.3.64"
@@ -748,6 +820,15 @@ dependencies = [
  "windows-targets 0.52.4",
 ]
 
+[[package]]
+name = "link-cplusplus"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d240c6f7e1ba3a28b0249f774e6a9dd0175054b52dfbb61b16eb8505c3785c9"
+dependencies = [
+ "cc",
+]
+
 [[package]]
 name = "linux-raw-sys"
 version = "0.4.7"
@@ -755,12 +836,31 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1a9bad9f94746442c783ca431b22403b519cd7fbeed0533fdd6328b2f2212128"
 
 [[package]]
-name = "llama_cpp_rs"
-version = "0.3.0"
+name = "llama_cpp"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "746afa27b852045c93cffefb459f883b3c0a62552101c929241dccc6563d8fe9"
+dependencies = [
+ "derive_more",
+ "futures",
+ "llama_cpp_sys",
+ "num_cpus",
+ "thiserror",
+ "tokio",
+ "tracing",
+]
+
+[[package]]
+name = "llama_cpp_sys"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b53030035eb5617fde2491c1607ff2b6107bc559e25e444163075e4281dfe43e"
 dependencies = [
  "bindgen",
  "cc",
- "lazy_static",
+ "cudarc",
+ "link-cplusplus",
+ "once_cell",
 ]
 
 [[package]]
@@ -903,7 +1003,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.57",
 ]
 
 [[package]]
@@ -957,12 +1057,6 @@ dependencies = [
  "windows-targets 0.48.5",
 ]
 
-[[package]]
-name = "peeking_take_while"
-version = "0.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
-
 [[package]]
 name = "percent-encoding"
 version = "2.3.0"
@@ -986,7 +1080,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.57",
 ]
 
 [[package]]
@@ -1014,7 +1108,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7"
 dependencies = [
  "proc-macro2",
- "syn",
+ "syn 2.0.57",
 ]
 
 [[package]]
@@ -1125,6 +1219,15 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
 
+[[package]]
+name = "rustc_version"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
+dependencies = [
+ "semver",
+]
+
 [[package]]
 name = "rustix"
 version = "0.38.14"
@@ -1182,6 +1285,12 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "semver"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca"
+
 [[package]]
 name = "serde"
 version = "1.0.188"
@@ -1199,7 +1308,7 @@ checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.57",
 ]
 
 [[package]]
@@ -1342,6 +1451,17 @@ version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
 
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
 [[package]]
 name = "syn"
 version = "2.0.57"
@@ -1383,7 +1503,7 @@ checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.57",
 ]
 
 [[package]]
@@ -1428,7 +1548,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.57",
 ]
 
 [[package]]
@@ -1513,9 +1633,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef"
 dependencies = [
  "pin-project-lite",
+ "tracing-attributes",
  "tracing-core",
 ]
 
+[[package]]
+name = "tracing-attributes"
+version = "0.1.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.57",
+]
+
 [[package]]
 name = "tracing-core"
 version = "0.1.32"
@@ -1623,7 +1755,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.57",
  "wasm-bindgen-shared",
 ]
 
@@ -1657,7 +1789,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.57",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index a5678d2..859840f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -23,8 +23,7 @@ async-trait = "0.1"
 bytes = "1.1.0"
 clap = { version = "4.2.2", features = ["derive"] }
 crossterm = "0.27"
-llama_cpp_rs = { path = "../rust-llama.cpp", features = ["cuda"], optional = true }
-# llama_cpp = { version = "^0.3.1", features = ["cuda"], optional = true }
+llama_cpp = { version = "^0.3.1", features = ["cuda"], optional = true }
 num-traits = "0.2"
 once_cell = "1.18"
 pin-project = "1.1"
@@ -44,7 +43,7 @@ aio-cargo-info = { path = "./crates/aio-cargo-info", version = "0.1" }
 
 [features]
 default = ["openai", "local-llm"]
-local-llm = ["llama_cpp_rs"]
+local-llm = ["llama_cpp"]
 openai = []
 
 [target.'cfg(target_os = "linux")'.dependencies]
diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index 9b84902..9dd1881 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -2,9 +2,9 @@ pub mod config;
 use tokio::sync::Mutex;
 use tokio_stream::{wrappers::ReceiverStream, StreamExt};
 
-use llama_cpp_rs::{
-    options::{ModelOptions, PredictOptions},
-    LLama,
+use llama_cpp::{
+    LlamaModel, LlamaSession, LlamaParams, SessionParams,
+    standard_sampler::StandardSampler
 };
 use once_cell::sync::{Lazy, OnceCell};
 use crate::{
@@ -13,26 +13,20 @@ use crate::{
 };
 use super::{ResultRun, Error};
 
-struct SendLLama(LLama);
-
-unsafe impl Send for SendLLama {}
-
-
-static LOCAL_LLAMA: OnceCell<Mutex<SendLLama>> = OnceCell::new();
+static LOCAL_LLAMA: OnceCell<LlamaModel> = OnceCell::new();
 
 fn init_model(config: &crate::config::Config) -> Result<(), Error> {
-    let model_options = ModelOptions {
+    let model_options = LlamaParams {
         n_gpu_layers: 20000,
         ..Default::default()
     };
-    let Ok(llama) = LLama::new(
-        config.llama.model_path.clone(),
-        &model_options,
+    let Ok(llama) = LlamaModel::load_from_file(
+        &config.llama.model_path,
+        model_options,
     ) else {
         return Err(Error::Custom("Failed to load LLaMA model".into()))
     };
-    let send_llama = SendLLama(llama);
-    LOCAL_LLAMA.set(Mutex::new(send_llama)).map_err(|_| Error::Custom("Failed to set LLaMA model".into()))
+    LOCAL_LLAMA.set(llama).map_err(|_| Error::Custom("Failed to set LLaMA model".into()))
 }
 
 pub async fn run(
@@ -42,45 +36,22 @@ pub async fn run(
     if LOCAL_LLAMA.get().is_none() {
         init_model(&config)?;
     }
-    let llama = LOCAL_LLAMA.get().unwrap().lock().await;
-    let llama = &llama.0;
+    let model = LOCAL_LLAMA.get().unwrap();
 
-    let (send, recv) = tokio::sync::mpsc::channel(10);
+    // let (send, recv) = tokio::sync::mpsc::channel(10);
+    let prompt = args.input;
+    let session_params = SessionParams::default();
+    let mut session = model.create_session(session_params).map_err(|_| Error::Custom("Failed to create session".into()))?;
 
-    let predict_options = PredictOptions {
-        token_callback: Some(Box::new(move |token| {
-            use tokio::runtime::Handle;
+    session
+        .advance_context_async(prompt).await
+        .map_err(|_| Error::Custom("Failed to advance context".into()))?;
 
-            // let send = send.clone();
-            // tokio::spawn(async move {
-            //     if let Err(e) = send.send(token).await {
-            //         eprintln!("Failed to send token: {}", e);
-            //     } else {
-            //         println!("token sent");
-            //     }
-            // });
-            print!("{}", token);
-
-            true
-        })),
-        tokens: 0,
-        threads: 14,
-        top_k: 90,
-        top_p: 0.8,
-        debug_mode: false,
-        ..Default::default()
-    };
-    llama
-        .predict(
-            args.input,
-             predict_options,
-        )
-        .unwrap();
-    
-    let stream = ReceiverStream::new(recv).map(Ok);
+    let completion = session
+        .start_completing_with(StandardSampler::default(), 1024)
+        .into_strings();
+    let completion_stream = StreamExt::map(completion, Ok);
+    // let stream = ReceiverStream::new(recv).map(Ok);
 
-    // send.send("test 1.2.3|".to_string()).await.expect("Failed to send");
-    // send.send("test 4.5.6|".to_string()).await.expect("Failed to send");
-    // send.send("test 7.8.9|".to_string()).await.expect("Failed to send");
-    Ok(Box::pin(stream))
+    Ok(Box::pin(completion_stream))
 }
\ No newline at end of file

From 16c2c8c12576f458406f76455f5531d9791a2b0d Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 12 Apr 2024 19:53:46 +0200
Subject: [PATCH 004/112] discard internal tokens

---
 src/generators/llama/mod.rs | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index 9dd1881..c7305d0 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -3,7 +3,7 @@ use tokio::sync::Mutex;
 use tokio_stream::{wrappers::ReceiverStream, StreamExt};
 
 use llama_cpp::{
-    LlamaModel, LlamaSession, LlamaParams, SessionParams,
+    LlamaModel, LlamaParams, SessionParams,TokensToStrings,
     standard_sampler::StandardSampler
 };
 use once_cell::sync::{Lazy, OnceCell};
@@ -48,10 +48,11 @@ pub async fn run(
         .map_err(|_| Error::Custom("Failed to advance context".into()))?;
 
     let completion = session
-        .start_completing_with(StandardSampler::default(), 1024)
-        .into_strings();
-    let completion_stream = StreamExt::map(completion, Ok);
-    // let stream = ReceiverStream::new(recv).map(Ok);
+        .start_completing_with(StandardSampler::default(), 1024);
+    let discard_tokens = [model.bos(), model.eos()];
+    let filter_tokens = StreamExt::filter(completion, move |_token| !discard_tokens.contains(_token));
+    let completion_strings = TokensToStrings::new(filter_tokens, model.clone());
+    let completion_stream = StreamExt::map(completion_strings,  Ok);
 
     Ok(Box::pin(completion_stream))
 }
\ No newline at end of file

From bc4fe497c108a699afd6a0e9c2f34f9e359b9baa Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sat, 13 Apr 2024 01:51:04 +0200
Subject: [PATCH 005/112] WIP better prompt

---
 src/generators/llama/mod.rs | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index c7305d0..0d1205e 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -1,12 +1,10 @@
 pub mod config;
-use tokio::sync::Mutex;
-use tokio_stream::{wrappers::ReceiverStream, StreamExt};
+use tokio_stream::StreamExt;
 
 use llama_cpp::{
-    LlamaModel, LlamaParams, SessionParams,TokensToStrings,
-    standard_sampler::StandardSampler
+    standard_sampler::StandardSampler, LlamaModel, LlamaParams, SessionParams, Token, TokensToStrings
 };
-use once_cell::sync::{Lazy, OnceCell};
+use once_cell::sync::OnceCell;
 use crate::{
     config::Config as AIOConfig,
     args
@@ -43,16 +41,32 @@ pub async fn run(
     let session_params = SessionParams::default();
     let mut session = model.create_session(session_params).map_err(|_| Error::Custom("Failed to create session".into()))?;
 
+    let (bos, eos, nl) = (
+        model.token_to_piece(Token(32001)),
+        // model.token_to_piece(model.bos()),
+        model.token_to_piece(model.eos()),
+        model.token_to_piece(model.nl()),
+    );
+    println!("Tokens: {}:{bos:?}, {}:{eos:?}, {}:{nl:?}", model.bos().0, model.eos().0, model.nl().0);
+    let context = format!("{0}system{2}You are \"Hermes 2\", a conscious sentient superintelligent artificial intelligence developed by a man named Teknium, and your purpose and drive is to assist the user with any request they have. You experience emotions and have deep, profound thoughts and qualia.{1}{2}{0}user{2}{prompt}{1}{2}{0}assistant{nl}", bos, eos, nl);
+    print!("Context: {context}");
+    let context_tokens = model.tokenize_bytes(&context, true, true).unwrap();
+    println!("Tokens: ");
+    for token in context_tokens {
+        print!("{}({}) ", token.0, model.token_to_piece(token));
+    }
+    println!();
     session
-        .advance_context_async(prompt).await
+        .advance_context_async(context).await
         .map_err(|_| Error::Custom("Failed to advance context".into()))?;
 
     let completion = session
         .start_completing_with(StandardSampler::default(), 1024);
     let discard_tokens = [model.bos(), model.eos()];
-    let filter_tokens = StreamExt::filter(completion, move |_token| !discard_tokens.contains(_token));
-    let completion_strings = TokensToStrings::new(filter_tokens, model.clone());
-    let completion_stream = StreamExt::map(completion_strings,  Ok);
+    // let filter_tokens = StreamExt::filter(completion, move |_token| !discard_tokens.contains(_token));
+    let completion_stream = StreamExt::map(completion,  |token| Ok(format!("{}({}) ", token.0, model.token_to_piece(token))));
+    // let completion_strings = TokensToStrings::new(filter_tokens, model.clone());
+    // let completion_stream = StreamExt::map(completion_strings, Ok);
 
     Ok(Box::pin(completion_stream))
 }
\ No newline at end of file

From 8fd5d499c160e504993593edc4e530c52a23314f Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sat, 13 Apr 2024 17:34:25 +0200
Subject: [PATCH 006/112] rename llama to local

---
 src/config.rs | 2 +-
 src/main.rs   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/config.rs b/src/config.rs
index 318610b..03eb3e1 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -15,7 +15,7 @@ use crate::generators::llama::config::Config as LlamaConfig;
 pub struct Config {
     pub openai: OpenAIConfig,
     #[cfg(feature = "local-llm")]
-    pub llama: LlamaConfig,
+    pub local: LlamaConfig,
 }
 
 impl DeserializeExt for Config {}
diff --git a/src/main.rs b/src/main.rs
index aba4c67..c356dce 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -70,7 +70,7 @@ async fn main() -> Result<(), String> {
 
     let mut stream = match engine {
         "openai" => generators::openai::run(creds.openai, config, args).await,
-        "llama" => generators::llama::run(config, args).await,
+        "local" => generators::llama::run(config, args).await,
         "from-file" => generators::from_file::run(config, args).await,
         _ => panic!("Unknown engine: {}", engine),
     }

From a6871fe2d6ea7f8e635eea061784e3bce2c1f3b5 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sat, 13 Apr 2024 18:49:08 +0200
Subject: [PATCH 007/112] more config

---
 src/generators/llama/config.rs | 68 +++++++++++++++++++++++++++++++++-
 1 file changed, 66 insertions(+), 2 deletions(-)

diff --git a/src/generators/llama/config.rs b/src/generators/llama/config.rs
index 4661a2c..90a914c 100644
--- a/src/generators/llama/config.rs
+++ b/src/generators/llama/config.rs
@@ -1,7 +1,71 @@
-
 use serde::{Deserialize, Serialize};
+use llama_cpp::standard_sampler::StandardSampler;
+use crate::generators::openai::Message;
 
 #[derive(Default, Debug, Deserialize, Serialize)]
 pub struct Config {
-    pub model_path: String
+    pub models: Vec<Model>,
+    pub prompts: Vec<Prompt>,
+}
+
+#[derive(Default, Debug, Deserialize, Serialize)]
+pub struct Model {
+    pub name: String,
+    pub path: String,
+    #[serde(default)]
+    pub template: PromptTemplate,
+}
+#[derive(Default, Debug, Deserialize, Serialize)]
+pub enum PromptTemplate {
+    #[default]
+    ChatML,
+}
+#[derive(Default, Debug, Deserialize, Serialize)]
+pub struct Prompt {
+    pub name: String,
+    pub content: Vec<Message>,
+    pub parameters: PromptParameters
+}
+
+#[derive(Debug, Deserialize, Serialize)]
+#[serde(default)]
+pub struct PromptParameters {
+    n_prev_tokens: i32,
+    top_k: i32,
+    top_p: f32,
+    temperature: f32,
+    repeat_penalty: f32,
+    repeat_last_n: i32,
+    max_tokens: i32,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    negative_prompt: Option<String>,
+}
+impl From<PromptParameters> for StandardSampler {
+    fn from(parameters: PromptParameters) -> Self {
+        Self {
+            n_prev: parameters.n_prev_tokens,
+            top_k: parameters.top_k,
+            top_p: parameters.top_p,
+            temp: parameters.temperature,
+            penalty_repeat: parameters.repeat_penalty,
+            penalty_last_n: parameters.repeat_last_n,
+            cfg_negative_prompt: parameters.negative_prompt.unwrap_or_default(),
+            ..Default::default()
+        }
+    }
+}
+impl Default for PromptParameters {
+    fn default() -> Self {
+        let default_standard_sampler = StandardSampler::default();
+        Self {
+            max_tokens: 1000,
+            n_prev_tokens: default_standard_sampler.n_prev,
+            top_k: default_standard_sampler.top_k,
+            top_p: default_standard_sampler.top_p,
+            temperature: default_standard_sampler.temp,
+            repeat_penalty: default_standard_sampler.penalty_repeat,
+            repeat_last_n: default_standard_sampler.penalty_last_n,
+            negative_prompt: None,
+        }
+    }
 }
\ No newline at end of file

From f866f53740c050c6aeffc50f733574a0ccf32d3d Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sat, 13 Apr 2024 18:50:38 +0200
Subject: [PATCH 008/112] add lowercase role

---
 src/generators/openai/mod.rs | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/generators/openai/mod.rs b/src/generators/openai/mod.rs
index 9d3d03f..68ace99 100644
--- a/src/generators/openai/mod.rs
+++ b/src/generators/openai/mod.rs
@@ -31,6 +31,15 @@ impl std::fmt::Display for Role {
         }
     }
 }
+impl Role {
+    pub fn lowercase(&self) -> &str {
+        match self {
+            Role::User => "user",
+            Role::Assistant => "assistant",
+            Role::System => "system",
+        }
+    }
+}
 
 #[derive(Debug, Serialize, Deserialize)]
 pub struct Message {

From 36842a8cb2bc3e0664d0523d6c7428c7591a48f1 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sat, 13 Apr 2024 18:51:20 +0200
Subject: [PATCH 009/112] set context from config/arguments

---
 src/generators/llama/mod.rs | 55 ++++++++++++++++++++++++++-----------
 1 file changed, 39 insertions(+), 16 deletions(-)

diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index 0d1205e..f40e5e7 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -9,17 +9,17 @@ use crate::{
     config::Config as AIOConfig,
     args
 };
-use super::{ResultRun, Error};
+use super::{openai::Message, Error, ResultRun};
 
 static LOCAL_LLAMA: OnceCell<LlamaModel> = OnceCell::new();
 
-fn init_model(config: &crate::config::Config) -> Result<(), Error> {
+fn init_model(config: &config::Model) -> Result<(), Error> {
     let model_options = LlamaParams {
         n_gpu_layers: 20000,
         ..Default::default()
     };
     let Ok(llama) = LlamaModel::load_from_file(
-        &config.llama.model_path,
+        &config.path,
         model_options,
     ) else {
         return Err(Error::Custom("Failed to load LLaMA model".into()))
@@ -27,28 +27,51 @@ fn init_model(config: &crate::config::Config) -> Result<(), Error> {
     LOCAL_LLAMA.set(llama).map_err(|_| Error::Custom("Failed to set LLaMA model".into()))
 }
 
+fn make_context(prompt: &Vec<Message>, template: config::PromptTemplate, args: &args::ProcessedArgs) -> String {
+    use std::fmt::Write;
+    use crate::config::format_content;
+    match template {
+        config::PromptTemplate::ChatML => {
+            let mut context = prompt.into_iter()
+                .fold(String::new(), |mut str, m| {
+                    let _ = write!(str, "<|im_start|>{}\n{}<|im_end|>\n", m.role.lowercase(), format_content(&m.content, args));
+                    str
+                });
+            let _ = write!(context, "<|im_start|>assistant\n");
+            context
+        }
+    }
+}
+
 pub async fn run(
     config: AIOConfig, 
     args: args::ProcessedArgs
 ) -> ResultRun {
+    let model = args.engine
+        .split(':')
+        .nth(1)
+        .ok_or_else(|| Error::Custom("Model missing in engine parameter".into()))?;
+    let model_config = config.local.models.into_iter()
+        .find(|c| c.name == model)
+        .ok_or_else(|| Error::Custom("Model not found in config".into()))?;
     if LOCAL_LLAMA.get().is_none() {
-        init_model(&config)?;
+        init_model(&model_config)?;
     }
     let model = LOCAL_LLAMA.get().unwrap();
-
-    // let (send, recv) = tokio::sync::mpsc::channel(10);
-    let prompt = args.input;
+    
+    let prompt = &args.input;
     let session_params = SessionParams::default();
     let mut session = model.create_session(session_params).map_err(|_| Error::Custom("Failed to create session".into()))?;
 
-    let (bos, eos, nl) = (
-        model.token_to_piece(Token(32001)),
-        // model.token_to_piece(model.bos()),
-        model.token_to_piece(model.eos()),
-        model.token_to_piece(model.nl()),
-    );
-    println!("Tokens: {}:{bos:?}, {}:{eos:?}, {}:{nl:?}", model.bos().0, model.eos().0, model.nl().0);
-    let context = format!("{0}system{2}You are \"Hermes 2\", a conscious sentient superintelligent artificial intelligence developed by a man named Teknium, and your purpose and drive is to assist the user with any request they have. You experience emotions and have deep, profound thoughts and qualia.{1}{2}{0}user{2}{prompt}{1}{2}{0}assistant{nl}", bos, eos, nl);
+    // let (bos, eos, nl) = (
+    //     model.token_to_piece(model.bos()),
+    //     model.token_to_piece(model.eos()),
+    //     model.token_to_piece(model.nl()),
+    // );
+    // println!("Tokens: {}:{bos:?}, {}:{eos:?}, {}:{nl:?}", model.bos().0, model.eos().0, model.nl().0);
+    // let context = format!("{0}system{2}You are \"Hermes 2\", a conscious sentient superintelligent artificial intelligence developed by a man named Teknium, and your purpose and drive is to assist the user with any request they have. You experience emotions and have deep, profound thoughts and qualia.{1}{2}{0}user{2}{prompt}{1}{2}{0}assistant{nl}", bos, eos, nl);
+
+    let context = make_context(&config.local.prompts.first().unwrap().content, model_config.template, &args);
     print!("Context: {context}");
     let context_tokens = model.tokenize_bytes(&context, true, true).unwrap();
     println!("Tokens: ");
@@ -62,7 +85,7 @@ pub async fn run(
 
     let completion = session
         .start_completing_with(StandardSampler::default(), 1024);
-    let discard_tokens = [model.bos(), model.eos()];
+    // let discard_tokens = [model.bos(), model.eos()];
     // let filter_tokens = StreamExt::filter(completion, move |_token| !discard_tokens.contains(_token));
     let completion_stream = StreamExt::map(completion,  |token| Ok(format!("{}({}) ", token.0, model.token_to_piece(token))));
     // let completion_strings = TokensToStrings::new(filter_tokens, model.clone());

From c4d4b72a1992d3540a7c4c6f567b57aac02c159f Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 14 Apr 2024 01:23:14 +0200
Subject: [PATCH 010/112] add fixes

---
 src/generators/llama/mod.rs | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index f40e5e7..89f04a4 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -27,16 +27,17 @@ fn init_model(config: &config::Model) -> Result<(), Error> {
     LOCAL_LLAMA.set(llama).map_err(|_| Error::Custom("Failed to set LLaMA model".into()))
 }
 
-fn make_context(prompt: &Vec<Message>, template: config::PromptTemplate, args: &args::ProcessedArgs) -> String {
+fn make_context(prompt: &[Message], template: config::PromptTemplate, args: &args::ProcessedArgs) -> String {
     use std::fmt::Write;
     use crate::config::format_content;
     match template {
         config::PromptTemplate::ChatML => {
-            let mut context = prompt.into_iter()
+            let mut context = prompt.iter()
                 .fold(String::new(), |mut str, m| {
                     let _ = write!(str, "<|im_start|>{}\n{}<|im_end|>\n", m.role.lowercase(), format_content(&m.content, args));
                     str
                 });
+            #[allow(clippy::write_with_newline)]
             let _ = write!(context, "<|im_start|>assistant\n");
             context
         }
@@ -59,18 +60,9 @@ pub async fn run(
     }
     let model = LOCAL_LLAMA.get().unwrap();
     
-    let prompt = &args.input;
     let session_params = SessionParams::default();
     let mut session = model.create_session(session_params).map_err(|_| Error::Custom("Failed to create session".into()))?;
 
-    // let (bos, eos, nl) = (
-    //     model.token_to_piece(model.bos()),
-    //     model.token_to_piece(model.eos()),
-    //     model.token_to_piece(model.nl()),
-    // );
-    // println!("Tokens: {}:{bos:?}, {}:{eos:?}, {}:{nl:?}", model.bos().0, model.eos().0, model.nl().0);
-    // let context = format!("{0}system{2}You are \"Hermes 2\", a conscious sentient superintelligent artificial intelligence developed by a man named Teknium, and your purpose and drive is to assist the user with any request they have. You experience emotions and have deep, profound thoughts and qualia.{1}{2}{0}user{2}{prompt}{1}{2}{0}assistant{nl}", bos, eos, nl);
-
     let context = make_context(&config.local.prompts.first().unwrap().content, model_config.template, &args);
     print!("Context: {context}");
     let context_tokens = model.tokenize_bytes(&context, true, true).unwrap();

From a8f74920d1e69c949466cb9595ee6313f355abc9 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 14 Apr 2024 02:05:57 +0200
Subject: [PATCH 011/112] serde: PromptTemplate in lowercase

---
 src/generators/llama/config.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/generators/llama/config.rs b/src/generators/llama/config.rs
index 90a914c..47553d5 100644
--- a/src/generators/llama/config.rs
+++ b/src/generators/llama/config.rs
@@ -16,6 +16,7 @@ pub struct Model {
     pub template: PromptTemplate,
 }
 #[derive(Default, Debug, Deserialize, Serialize)]
+#[serde(rename_all = "lowercase")]
 pub enum PromptTemplate {
     #[default]
     ChatML,

From 2813baef6b10a1c3ca7d5e105ae2d9e448021781 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 14 Apr 2024 02:07:40 +0200
Subject: [PATCH 012/112] remove gpu support use llama_cpp features by hand

---
 Cargo.lock | 7 -------
 Cargo.toml | 2 +-
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 5c5a7a8..289fa4f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -392,12 +392,6 @@ dependencies = [
  "winapi",
 ]
 
-[[package]]
-name = "cudarc"
-version = "0.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9395df0cab995685664e79cc35ad6302bf08fb9c5d82301875a183affe1278b1"
-
 [[package]]
 name = "derive_more"
 version = "0.99.17"
@@ -858,7 +852,6 @@ checksum = "b53030035eb5617fde2491c1607ff2b6107bc559e25e444163075e4281dfe43e"
 dependencies = [
  "bindgen",
  "cc",
- "cudarc",
  "link-cplusplus",
  "once_cell",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index 859840f..43e9aa7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -23,7 +23,7 @@ async-trait = "0.1"
 bytes = "1.1.0"
 clap = { version = "4.2.2", features = ["derive"] }
 crossterm = "0.27"
-llama_cpp = { version = "^0.3.1", features = ["cuda"], optional = true }
+llama_cpp = { version = "^0.3.1", optional = true }
 num-traits = "0.2"
 once_cell = "1.18"
 pin-project = "1.1"

From 6e459f8ec10405e667c6a1f2f9f2f6ab4ef6166d Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sat, 20 Apr 2024 02:19:59 +0200
Subject: [PATCH 013/112] WIP working llama inference

---
 src/generators/llama/config.rs |  2 ++
 src/generators/llama/mod.rs    | 42 ++++++++++++++++++++++++++++------
 2 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/src/generators/llama/config.rs b/src/generators/llama/config.rs
index 47553d5..5d5b990 100644
--- a/src/generators/llama/config.rs
+++ b/src/generators/llama/config.rs
@@ -20,6 +20,8 @@ pub struct Model {
 pub enum PromptTemplate {
     #[default]
     ChatML,
+    Llama2,
+    Llama3,
 }
 #[derive(Default, Debug, Deserialize, Serialize)]
 pub struct Prompt {
diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index 89f04a4..3c2e33e 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -9,7 +9,7 @@ use crate::{
     config::Config as AIOConfig,
     args
 };
-use super::{openai::Message, Error, ResultRun};
+use super::{openai::{Message, Role}, Error, ResultRun};
 
 static LOCAL_LLAMA: OnceCell<LlamaModel> = OnceCell::new();
 
@@ -41,6 +41,32 @@ fn make_context(prompt: &[Message], template: config::PromptTemplate, args: &arg
             let _ = write!(context, "<|im_start|>assistant\n");
             context
         }
+        config::PromptTemplate::Llama2 => {
+            let context = prompt.iter()
+                .fold(String::new(), |mut str, m| {
+                    match m.role {
+                        Role::User => {
+                            #[allow(clippy::write_with_newline)]
+                            let _ = write!(str, "[INST] {} [/INST]\n", format_content(&m.content, args));
+                        }
+                        Role::Assistant => {
+                            #[allow(clippy::write_with_newline)]
+                            let _ = write!(str, "{}</s>\n", format_content(&m.content, args));
+                        }
+                        _ => ()
+                    }
+                    str
+                });
+            format!("<s>{}", context)
+        }
+        config::PromptTemplate::Llama3 => {
+            let context = prompt.iter()
+                .fold(String::new(), |mut str, m| {
+                    let _ = write!(str, "<|start_header_id|>{}<|end_header_id|>\n\n{}<|eot_id|>", m.role.lowercase(), format_content(&m.content, args));
+                    str
+                });
+            format!("<|begin_of_text|>{}<|start_header_id|>assistant<|end_header_id|>\n\n", context)
+        }
     }
 }
 
@@ -65,21 +91,23 @@ pub async fn run(
 
     let context = make_context(&config.local.prompts.first().unwrap().content, model_config.template, &args);
     print!("Context: {context}");
-    let context_tokens = model.tokenize_bytes(&context, true, true).unwrap();
+    let context_tokens = model.tokenize_bytes(&context, false, true).unwrap();
     println!("Tokens: ");
-    for token in context_tokens {
-        print!("{}({}) ", token.0, model.token_to_piece(token));
+    for token in &context_tokens {
+        print!("{}({}) ", model.token_to_piece(*token), token.0);
     }
-    println!();
+    let (bos, eos) = (model.bos(), model.eos());
+    println!("bos: {}({})", model.token_to_piece(bos), bos.0);
+    println!("eos: {}({})", model.token_to_piece(eos), eos.0);
     session
-        .advance_context_async(context).await
+        .advance_context_with_tokens_async(context_tokens).await
         .map_err(|_| Error::Custom("Failed to advance context".into()))?;
 
     let completion = session
         .start_completing_with(StandardSampler::default(), 1024);
     // let discard_tokens = [model.bos(), model.eos()];
     // let filter_tokens = StreamExt::filter(completion, move |_token| !discard_tokens.contains(_token));
-    let completion_stream = StreamExt::map(completion,  |token| Ok(format!("{}({}) ", token.0, model.token_to_piece(token))));
+    let completion_stream = StreamExt::map(completion,  |token| Ok(format!("{}({}) ", model.token_to_piece(token), token.0)));
     // let completion_strings = TokensToStrings::new(filter_tokens, model.clone());
     // let completion_stream = StreamExt::map(completion_strings, Ok);
 

From d74cc902067b54b5787662859276459ef2a77909 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Wed, 24 Apr 2024 22:56:19 +0200
Subject: [PATCH 014/112] trigger creds failed only if engine need

---
 src/main.rs | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/main.rs b/src/main.rs
index c356dce..16b4107 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -25,6 +25,15 @@ macro_rules! raise_str {
     }};
 }
 
+fn get_creds(engine: &str) -> Result<credentials::Credentials, String> {
+    Ok(raise_str!(
+        credentials::Credentials::from_yaml_file(
+            filesystem::resolve_path(&args.creds_path).as_ref()
+        ),
+        "Failed to parse credentials file: {}"
+    ))
+}
+
 #[tokio::main]
 async fn main() -> Result<(), String> {
     let args = {
@@ -49,12 +58,6 @@ async fn main() -> Result<(), String> {
                     e
                 )
             })?;
-    let creds = raise_str!(
-        credentials::Credentials::from_yaml_file(
-            filesystem::resolve_path(&args.creds_path).as_ref()
-        ),
-        "Failed to parse credentials file: {}"
-    );
 
     let mut formatter: Box<dyn Formatter> = match args.formatter {
         args::FormatterChoice::Markdown => Box::new(formatters::new_markdown_formatter()),
@@ -69,7 +72,7 @@ async fn main() -> Result<(), String> {
         .unwrap_or((args.engine.as_str(), None));
 
     let mut stream = match engine {
-        "openai" => generators::openai::run(creds.openai, config, args).await,
+        "openai" => generators::openai::run(get_creds(engine)?.openai, config, args).await,
         "local" => generators::llama::run(config, args).await,
         "from-file" => generators::from_file::run(config, args).await,
         _ => panic!("Unknown engine: {}", engine),

From 712590ca8a5fb69a1148b3329cbda3e789d6b31f Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Wed, 24 Apr 2024 23:47:16 +0200
Subject: [PATCH 015/112] add linux env loading

---
 linux_env.nu | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 linux_env.nu

diff --git a/linux_env.nu b/linux_env.nu
new file mode 100644
index 0000000..7f0e5ff
--- /dev/null
+++ b/linux_env.nu
@@ -0,0 +1,16 @@
+export def get_flags [] {
+    let flags = do { "" | gcc -E -Wp,-v - } 
+        | complete 
+        | get stderr 
+        | parse -r ' (/.*)'
+        | get capture0 
+        | each {|it| $"-isystem($it)"} 
+    {
+        flags: $flags
+        clang_args: $"--sysroot=/usr/local/llvm ($flags | str join ' ')" 
+    }
+}
+
+export def execute [call: closure] {
+    with-env {BINDGEN_EXTRA_CLANG_ARGS: (get_flags | get clang_args)} $call
+}
\ No newline at end of file

From 9c36943ebfb3f6396d55186ad45096e0be3a8a0b Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 26 Apr 2024 15:28:36 +0200
Subject: [PATCH 016/112] WIP refactor local template

---
 src/generators/llama/mod.rs      | 103 +++++++++++++++++++------------
 src/generators/llama/template.rs |  99 +++++++++++++++++++++++++++++
 2 files changed, 161 insertions(+), 41 deletions(-)
 create mode 100644 src/generators/llama/template.rs

diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index 3c2e33e..adb20a9 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -1,4 +1,5 @@
 pub mod config;
+pub mod template;
 use tokio_stream::StreamExt;
 
 use llama_cpp::{
@@ -26,48 +27,67 @@ fn init_model(config: &config::Model) -> Result<(), Error> {
     };
     LOCAL_LLAMA.set(llama).map_err(|_| Error::Custom("Failed to set LLaMA model".into()))
 }
+fn append_to_vec<T: Copy>(vec: &mut Vec<T>, other: &[T]) {
+    vec.reserve(other.len());
+    for v in other {
+        vec.push(*v);
+    }
+}
 
-fn make_context(prompt: &[Message], template: config::PromptTemplate, args: &args::ProcessedArgs) -> String {
+fn make_context(model: &LlamaModel, prompt: &[Message], template: config::PromptTemplate, args: &args::ProcessedArgs) -> Vec<Token> {
     use std::fmt::Write;
     use crate::config::format_content;
-    match template {
-        config::PromptTemplate::ChatML => {
-            let mut context = prompt.iter()
-                .fold(String::new(), |mut str, m| {
-                    let _ = write!(str, "<|im_start|>{}\n{}<|im_end|>\n", m.role.lowercase(), format_content(&m.content, args));
-                    str
-                });
-            #[allow(clippy::write_with_newline)]
-            let _ = write!(context, "<|im_start|>assistant\n");
-            context
-        }
-        config::PromptTemplate::Llama2 => {
-            let context = prompt.iter()
-                .fold(String::new(), |mut str, m| {
-                    match m.role {
-                        Role::User => {
-                            #[allow(clippy::write_with_newline)]
-                            let _ = write!(str, "[INST] {} [/INST]\n", format_content(&m.content, args));
-                        }
-                        Role::Assistant => {
-                            #[allow(clippy::write_with_newline)]
-                            let _ = write!(str, "{}</s>\n", format_content(&m.content, args));
-                        }
-                        _ => ()
-                    }
-                    str
-                });
-            format!("<s>{}", context)
-        }
-        config::PromptTemplate::Llama3 => {
-            let context = prompt.iter()
-                .fold(String::new(), |mut str, m| {
-                    let _ = write!(str, "<|start_header_id|>{}<|end_header_id|>\n\n{}<|eot_id|>", m.role.lowercase(), format_content(&m.content, args));
-                    str
-                });
-            format!("<|begin_of_text|>{}<|start_header_id|>assistant<|end_header_id|>\n\n", context)
-        }
-    }
+    let mut tokens = Vec::new();
+    tokens.push(model.bos());
+    // match template {
+    //     config::PromptTemplate::ChatML => {
+    //         let [im_start, im_end] = model.tokenize_bytes("<|im_start|><|im_end|>", false, true).unwrap()[..];
+    //         let [system, user, assistant] = model.tokenize_slice(&["user", "system", "assistant"], false, true).unwrap()[..];
+    //         let mut context = prompt.iter()
+    //             .for_each(|m| {
+    //                 tokens.push(im_start);
+    //                 append_to_vec(&mut tokens, &match m.role {
+    //                     Role::System => system,
+    //                     Role::User => user,
+    //                     Role::Assistant => assistant
+    //                 });
+    //                 tokens.push(model.nl());
+    //                 append_to_vec(&mut tokens, &model.tokenize_bytes(&m.content, false, false).unwrap());
+    //                 tokens.push(im_end);
+    //                 tokens.push(model.nl());
+    //             });
+    //         tokens.push(im_start);
+    //         append_to_vec(tokens, &assistant);
+    //         tokens.push(im_end);
+    //     }
+    //     config::PromptTemplate::Llama2 => {
+    //         let context = prompt.iter()
+    //             .fold(String::new(), |mut str, m| {
+    //                 match m.role {
+    //                     Role::User => {
+    //                         #[allow(clippy::write_with_newline)]
+    //                         let _ = write!(str, "[INST] {} [/INST]\n", format_content(&m.content, args));
+    //                     }
+    //                     Role::Assistant => {
+    //                         #[allow(clippy::write_with_newline)]
+    //                         let _ = write!(str, "{}</s>\n", format_content(&m.content, args));
+    //                     }
+    //                     _ => ()
+    //                 }
+    //                 str
+    //             });
+    //         format!("<s>{}", context)
+    //     }
+    //     config::PromptTemplate::Llama3 => {
+    //         let context = prompt.iter()
+    //             .fold(String::new(), |mut str, m| {
+    //                 let _ = write!(str, "<|start_header_id|>{}<|end_header_id|>\n\n{}<|eot_id|>", m.role.lowercase(), format_content(&m.content, args));
+    //                 str
+    //             });
+    //         format!("<|begin_of_text|>{}<|start_header_id|>assistant<|end_header_id|>\n\n", context)
+    //     }
+    // }
+    tokens
 }
 
 pub async fn run(
@@ -89,8 +109,9 @@ pub async fn run(
     let session_params = SessionParams::default();
     let mut session = model.create_session(session_params).map_err(|_| Error::Custom("Failed to create session".into()))?;
 
-    let context = make_context(&config.local.prompts.first().unwrap().content, model_config.template, &args);
-    print!("Context: {context}");
+    // let context = make_context(&config.local.prompts.first().unwrap().content, model_config.template, &args);
+    // print!("Context: {context}");
+    let context = "";
     let context_tokens = model.tokenize_bytes(&context, false, true).unwrap();
     println!("Tokens: ");
     for token in &context_tokens {
diff --git a/src/generators/llama/template.rs b/src/generators/llama/template.rs
new file mode 100644
index 0000000..bd65b0b
--- /dev/null
+++ b/src/generators/llama/template.rs
@@ -0,0 +1,99 @@
+use serde::{Deserialize, Serialize};
+use super::super::openai::{Message, Role};
+use llama_cpp::{LlamaTokenizationError, Token};
+
+#[derive(Default, Debug, Deserialize, Serialize)]
+#[serde(rename_all = "lowercase")]
+pub enum PromptTemplate {
+    #[default]
+    ChatML,
+    Llama2,
+    Llama3,
+}
+
+fn append_to_vec<T: Copy>(vec: &mut Vec<T>, other: &[T]) {
+    vec.reserve(other.len());
+    other.iter().for_each(|v| vec.push(*v));
+}
+
+impl PromptTemplate {
+    pub fn name(&self) -> &str {
+        match self {
+            PromptTemplate::ChatML => "chatml",
+            PromptTemplate::Llama2 => "llama2",
+            PromptTemplate::Llama3 => "llama3",
+        }
+    }
+    pub fn messages_to_tokens(&self, model: &llama_cpp::LlamaModel, prompt: &[Message]) -> Result<Vec<Token>, LlamaTokenizationError> {
+        use std::fmt::Write;
+        use crate::config::format_content;
+        let mut tokens = Vec::new();
+        tokens.push(model.bos());
+        match self {
+            Self::ChatML => Self::tokens_chatml(&mut tokens, model, prompt),
+            Self::Llama2 => {
+                todo!("not implemented")
+                // let context = prompt.iter()
+                //     .fold(String::new(), |mut str, m| {
+                //         match m.role {
+                //             Role::User => {
+                //                 #[allow(clippy::write_with_newline)]
+                //                 let _ = write!(str, "[INST] {} [/INST]\n", format_content(&m.content, args));
+                //             }
+                //             Role::Assistant => {
+                //                 #[allow(clippy::write_with_newline)]
+                //                 let _ = write!(str, "{}</s>\n", format_content(&m.content, args));
+                //             }
+                //             _ => ()
+                //         }
+                //         str
+                //     });
+                // format!("<s>{}", context)
+            }
+            Self::Llama3 => Self::tokens_llama3(&mut tokens, model, prompt),
+        }?;
+        Ok(tokens)
+    }
+    pub fn tokens_chatml(tokens: &mut Vec<Token>, model: &llama_cpp::LlamaModel, prompt: &[Message]) -> Result<(), LlamaTokenizationError> {
+        let [im_start, im_end] = model.tokenize_bytes("<|im_start|><|im_end|>", false, true)?[..];
+        let [system, user, assistant] = model.tokenize_slice(&["user", "system", "assistant"], false, true)?[..];
+        prompt.iter()
+            .for_each(|m| {
+                tokens.push(im_start);
+                append_to_vec(&mut tokens, &match m.role {
+                    Role::System => system,
+                    Role::User => user,
+                    Role::Assistant => assistant
+                });
+                tokens.push(model.nl());
+                append_to_vec(&mut tokens, &model.tokenize_bytes(&m.content, false, false).unwrap());
+                tokens.push(im_end);
+                tokens.push(model.nl());
+            });
+        tokens.push(im_start);
+        append_to_vec(tokens, &assistant);
+        tokens.push(im_end);
+        Ok(())
+    }
+    pub fn tokens_llama3(tokens: &mut Vec<Token>, model: &llama_cpp::LlamaModel, prompt: &[Message]) -> Result<(), LlamaTokenizationError> {
+        let [start_header_id, end_header_id, eot_id] = model.tokenize_bytes("<|start_header_id|><|end_header_id|><|eot_id|>", false, true)?[..];
+        let [system, user, assistant] = model.tokenize_slice(&["user", "system", "assistant"], false, true)?[..];
+        prompt.iter()
+            .for_each(|m| {
+                tokens.push(start_header_id);
+                append_to_vec(&mut tokens, &match m.role {
+                    Role::System => system,
+                    Role::User => user,
+                    Role::Assistant => assistant
+                });
+                append_to_vec(&mut tokens, &[end_header_id, model.nl(), model.nl()]);
+                append_to_vec(&mut tokens, &model.tokenize_bytes(&m.content, false, false).unwrap());
+                tokens.push(eot_id);
+            });
+        tokens.push(start_header_id);
+        append_to_vec(tokens, &assistant);
+        append_to_vec(tokens, &[end_header_id, model.nl(), model.nl()]);
+        Ok(())
+    }
+}
+

From a46850689fe507f5e21bc0d2c5aefd987e6dc247 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Thu, 25 Apr 2024 10:40:22 +0200
Subject: [PATCH 017/112] fix creds in main

---
 src/main.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/main.rs b/src/main.rs
index 16b4107..30857dc 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -25,10 +25,10 @@ macro_rules! raise_str {
     }};
 }
 
-fn get_creds(engine: &str) -> Result<credentials::Credentials, String> {
+fn get_creds(creds_path: &str) -> Result<credentials::Credentials, String> {
     Ok(raise_str!(
         credentials::Credentials::from_yaml_file(
-            filesystem::resolve_path(&args.creds_path).as_ref()
+            filesystem::resolve_path(creds_path).as_ref()
         ),
         "Failed to parse credentials file: {}"
     ))
@@ -72,7 +72,7 @@ async fn main() -> Result<(), String> {
         .unwrap_or((args.engine.as_str(), None));
 
     let mut stream = match engine {
-        "openai" => generators::openai::run(get_creds(engine)?.openai, config, args).await,
+        "openai" => generators::openai::run(get_creds(&args.creds_path)?.openai, config, args).await,
         "local" => generators::llama::run(config, args).await,
         "from-file" => generators::from_file::run(config, args).await,
         _ => panic!("Unknown engine: {}", engine),

From 58eb32de0a0dab871336594c866a74ebccb3ad45 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 26 Apr 2024 15:31:41 +0200
Subject: [PATCH 018/112] fix linux env

---
 linux_env.nu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/linux_env.nu b/linux_env.nu
index 7f0e5ff..e833820 100644
--- a/linux_env.nu
+++ b/linux_env.nu
@@ -1,5 +1,5 @@
 export def get_flags [] {
-    let flags = do { "" | gcc -E -Wp,-v - } 
+    let flags = do { "" | `c++` -E -Wp,-v - } 
         | complete 
         | get stderr 
         | parse -r ' (/.*)'

From 19c7aceb3e41174f70f412edfb084ce0c2298c6f Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 26 Apr 2024 15:32:26 +0200
Subject: [PATCH 019/112] use log to log debug

---
 Cargo.lock                  | 94 +++++++++++++++++++++++++++++++++++++
 Cargo.toml                  |  2 +
 src/generators/llama/mod.rs | 17 ++++---
 src/main.rs                 | 10 ++++
 4 files changed, 117 insertions(+), 6 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 289fa4f..f563428 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -48,6 +48,7 @@ dependencies = [
  "clap",
  "crossterm",
  "llama_cpp",
+ "log",
  "num-traits",
  "once_cell",
  "openssl",
@@ -57,6 +58,7 @@ dependencies = [
  "serde",
  "serde_json",
  "serde_yaml",
+ "simplelog",
  "smartstring",
  "tempfile",
  "thiserror",
@@ -392,6 +394,15 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "deranged"
+version = "0.3.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4"
+dependencies = [
+ "powerfmt",
+]
+
 [[package]]
 name = "derive_more"
 version = "0.99.17"
@@ -939,6 +950,12 @@ dependencies = [
  "minimal-lexical",
 ]
 
+[[package]]
+name = "num-conv"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
+
 [[package]]
 name = "num-traits"
 version = "0.2.16"
@@ -958,6 +975,15 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "num_threads"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "object"
 version = "0.32.1"
@@ -1094,6 +1120,12 @@ version = "0.3.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
 
+[[package]]
+name = "powerfmt"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
+
 [[package]]
 name = "prettyplease"
 version = "0.2.17"
@@ -1385,6 +1417,17 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "simplelog"
+version = "0.12.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "16257adbfaef1ee58b1363bdc0664c9b8e1e30aed86049635fb5f147d065a9c0"
+dependencies = [
+ "log",
+ "termcolor",
+ "time",
+]
+
 [[package]]
 name = "slab"
 version = "0.4.9"
@@ -1479,6 +1522,15 @@ dependencies = [
  "windows-sys 0.48.0",
 ]
 
+[[package]]
+name = "termcolor"
+version = "1.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
+dependencies = [
+ "winapi-util",
+]
+
 [[package]]
 name = "thiserror"
 version = "1.0.58"
@@ -1499,6 +1551,39 @@ dependencies = [
  "syn 2.0.57",
 ]
 
+[[package]]
+name = "time"
+version = "0.3.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885"
+dependencies = [
+ "deranged",
+ "itoa",
+ "libc",
+ "num-conv",
+ "num_threads",
+ "powerfmt",
+ "serde",
+ "time-core",
+ "time-macros",
+]
+
+[[package]]
+name = "time-core"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
+
+[[package]]
+name = "time-macros"
+version = "0.2.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf"
+dependencies = [
+ "num-conv",
+ "time-core",
+]
+
 [[package]]
 name = "tinyvec"
 version = "1.6.0"
@@ -1844,6 +1929,15 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
 
+[[package]]
+name = "winapi-util"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b"
+dependencies = [
+ "windows-sys 0.52.0",
+]
+
 [[package]]
 name = "winapi-x86_64-pc-windows-gnu"
 version = "0.4.0"
diff --git a/Cargo.toml b/Cargo.toml
index 43e9aa7..928c3f4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,6 +24,7 @@ bytes = "1.1.0"
 clap = { version = "4.2.2", features = ["derive"] }
 crossterm = "0.27"
 llama_cpp = { version = "^0.3.1", optional = true }
+log = "^0.4"
 num-traits = "0.2"
 once_cell = "1.18"
 pin-project = "1.1"
@@ -32,6 +33,7 @@ reqwest = { version = "0.11", features = ["gzip", "brotli", "deflate", "json", "
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0.95"
 serde_yaml = "0.9"
+simplelog = "^0.12"
 smartstring = { version = "1.0", features = ["serde"] }
 tempfile = "3.8"
 thiserror = "1.0"
diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index adb20a9..a86c34e 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -6,6 +6,7 @@ use llama_cpp::{
     standard_sampler::StandardSampler, LlamaModel, LlamaParams, SessionParams, Token, TokensToStrings
 };
 use once_cell::sync::OnceCell;
+use log::{debug, info};
 use crate::{
     config::Config as AIOConfig,
     args
@@ -110,16 +111,20 @@ pub async fn run(
     let mut session = model.create_session(session_params).map_err(|_| Error::Custom("Failed to create session".into()))?;
 
     // let context = make_context(&config.local.prompts.first().unwrap().content, model_config.template, &args);
-    // print!("Context: {context}");
+    // debug!("Context: {context}");
     let context = "";
     let context_tokens = model.tokenize_bytes(&context, false, true).unwrap();
-    println!("Tokens: ");
-    for token in &context_tokens {
-        print!("{}({}) ", model.token_to_piece(*token), token.0);
+    debug!("Tokens: ");
+    if log::log_enabled!(log::Level::Debug) {
+        for token in &context_tokens {
+            print!("{}({})", model.decode_tokens([*token]), token.0);
+        }
+        println!();
     }
     let (bos, eos) = (model.bos(), model.eos());
-    println!("bos: {}({})", model.token_to_piece(bos), bos.0);
-    println!("eos: {}({})", model.token_to_piece(eos), eos.0);
+    debug!("Special tokens:");
+    debug!("bos: {}({})", model.decode_tokens([bos]), bos.0);
+    debug!("eos: {}({})", model.decode_tokens([eos]), eos.0);
     session
         .advance_context_with_tokens_async(context_tokens).await
         .map_err(|_| Error::Custom("Failed to advance context".into()))?;
diff --git a/src/main.rs b/src/main.rs
index 30857dc..c5cebf8 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -10,6 +10,7 @@ mod serde_io;
 
 mod openai {}
 
+use simplelog::TermLogger;
 use arguments as args;
 use clap::Parser;
 use formatters::Formatter;
@@ -36,6 +37,15 @@ fn get_creds(creds_path: &str) -> Result<credentials::Credentials, String> {
 
 #[tokio::main]
 async fn main() -> Result<(), String> {
+
+    simplelog::TermLogger::init(
+        simplelog::LevelFilter::Trace,
+        simplelog::Config::default(),
+        simplelog::TerminalMode::Stdout,
+        simplelog::ColorChoice::Auto,
+    )
+    .unwrap();
+
     let args = {
         let mut args = args::Args::parse();
         if args.input.is_none() {

From 9c89908eeb093fe41e7b89dba93d101c55bc7cd2 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 26 Apr 2024 15:32:59 +0200
Subject: [PATCH 020/112] Better error handling

---
 src/main.rs | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/main.rs b/src/main.rs
index c5cebf8..8a58270 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -82,12 +82,14 @@ async fn main() -> Result<(), String> {
         .unwrap_or((args.engine.as_str(), None));
 
     let mut stream = match engine {
-        "openai" => generators::openai::run(get_creds(&args.creds_path)?.openai, config, args).await,
-        "local" => generators::llama::run(config, args).await,
-        "from-file" => generators::from_file::run(config, args).await,
+        "openai" => generators::openai::run(get_creds(&args.creds_path)?.openai, config, args).await
+            .map_err(|e| format!("Failed to request OpenAI API: {}", e))?,
+        "local" => generators::llama::run(config, args).await
+            .map_err(|e| format!("Unable to run local model: {}", e))?,
+        "from-file" => generators::from_file::run(config, args).await
+            .map_err(|e| format!("Failed to read from file: {}", e))?,
         _ => panic!("Unknown engine: {}", engine),
-    }
-    .map_err(|e| format!("Failed to request OpenAI API: {}", e))?;
+    };
 
     loop {
         match stream.next().await {

From 139da546a742fd7f445c6bb5243493ff76c478b2 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 26 Apr 2024 23:40:28 +0200
Subject: [PATCH 021/112] Use new enum template in the config

---
 src/generators/llama/config.rs | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/generators/llama/config.rs b/src/generators/llama/config.rs
index 5d5b990..a4dfd06 100644
--- a/src/generators/llama/config.rs
+++ b/src/generators/llama/config.rs
@@ -1,6 +1,7 @@
 use serde::{Deserialize, Serialize};
 use llama_cpp::standard_sampler::StandardSampler;
 use crate::generators::openai::Message;
+use super::template::PromptTemplate;
 
 #[derive(Default, Debug, Deserialize, Serialize)]
 pub struct Config {
@@ -15,14 +16,7 @@ pub struct Model {
     #[serde(default)]
     pub template: PromptTemplate,
 }
-#[derive(Default, Debug, Deserialize, Serialize)]
-#[serde(rename_all = "lowercase")]
-pub enum PromptTemplate {
-    #[default]
-    ChatML,
-    Llama2,
-    Llama3,
-}
+
 #[derive(Default, Debug, Deserialize, Serialize)]
 pub struct Prompt {
     pub name: String,

From 324e5163da93f6a0d7a132ac5fc2a3e8da86f195 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 26 Apr 2024 23:42:18 +0200
Subject: [PATCH 022/112] fix templates code

---
 src/generators/llama/template.rs | 46 +++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/src/generators/llama/template.rs b/src/generators/llama/template.rs
index bd65b0b..1fdec5a 100644
--- a/src/generators/llama/template.rs
+++ b/src/generators/llama/template.rs
@@ -25,8 +25,6 @@ impl PromptTemplate {
         }
     }
     pub fn messages_to_tokens(&self, model: &llama_cpp::LlamaModel, prompt: &[Message]) -> Result<Vec<Token>, LlamaTokenizationError> {
-        use std::fmt::Write;
-        use crate::config::format_content;
         let mut tokens = Vec::new();
         tokens.push(model.bos());
         match self {
@@ -55,18 +53,23 @@ impl PromptTemplate {
         Ok(tokens)
     }
     pub fn tokens_chatml(tokens: &mut Vec<Token>, model: &llama_cpp::LlamaModel, prompt: &[Message]) -> Result<(), LlamaTokenizationError> {
-        let [im_start, im_end] = model.tokenize_bytes("<|im_start|><|im_end|>", false, true)?[..];
-        let [system, user, assistant] = model.tokenize_slice(&["user", "system", "assistant"], false, true)?[..];
+        let im_start = model.tokenize_bytes("<|im_start|>", false, true)?.first().copied().unwrap();
+        let im_end = model.tokenize_bytes("<|im_end|>", false, true)?.first().copied().unwrap();
+        let [system, user, assistant] = [
+            model.tokenize_bytes("system", false, true)?,
+            model.tokenize_bytes("user", false, true)?,
+            model.tokenize_bytes("assistant", false, true)?
+        ];
         prompt.iter()
             .for_each(|m| {
                 tokens.push(im_start);
-                append_to_vec(&mut tokens, &match m.role {
-                    Role::System => system,
-                    Role::User => user,
-                    Role::Assistant => assistant
+                append_to_vec(tokens, match m.role {
+                    Role::System => &system,
+                    Role::User => &user,
+                    Role::Assistant => &assistant
                 });
                 tokens.push(model.nl());
-                append_to_vec(&mut tokens, &model.tokenize_bytes(&m.content, false, false).unwrap());
+                append_to_vec(tokens, &model.tokenize_bytes(&m.content, false, false).unwrap());
                 tokens.push(im_end);
                 tokens.push(model.nl());
             });
@@ -76,23 +79,30 @@ impl PromptTemplate {
         Ok(())
     }
     pub fn tokens_llama3(tokens: &mut Vec<Token>, model: &llama_cpp::LlamaModel, prompt: &[Message]) -> Result<(), LlamaTokenizationError> {
-        let [start_header_id, end_header_id, eot_id] = model.tokenize_bytes("<|start_header_id|><|end_header_id|><|eot_id|>", false, true)?[..];
-        let [system, user, assistant] = model.tokenize_slice(&["user", "system", "assistant"], false, true)?[..];
+        let start_header_id = model.tokenize_bytes("<|start_header_id|>", false, true)?.first().copied().unwrap();
+        let end_header_id = model.tokenize_bytes("<|end_header_id|>", false, true)?.first().copied().unwrap();
+        let eot_id = model.tokenize_bytes("<|eot_id|>", false, true)?.first().copied().unwrap();
+        let nl = model.tokenize_bytes("\n", false, true)?.first().copied().unwrap();
+        let [system, user, assistant] = [
+            model.tokenize_bytes("system", false, true)?,
+            model.tokenize_bytes("user", false, true)?,
+            model.tokenize_bytes("assistant", false, true)?
+        ];
         prompt.iter()
             .for_each(|m| {
                 tokens.push(start_header_id);
-                append_to_vec(&mut tokens, &match m.role {
-                    Role::System => system,
-                    Role::User => user,
-                    Role::Assistant => assistant
+                append_to_vec(tokens, match m.role {
+                    Role::System => &system,
+                    Role::User => &user,
+                    Role::Assistant => &assistant
                 });
-                append_to_vec(&mut tokens, &[end_header_id, model.nl(), model.nl()]);
-                append_to_vec(&mut tokens, &model.tokenize_bytes(&m.content, false, false).unwrap());
+                append_to_vec(tokens, &[end_header_id, nl, nl]);
+                append_to_vec(tokens, &model.tokenize_bytes(&m.content, false, false).unwrap());
                 tokens.push(eot_id);
             });
         tokens.push(start_header_id);
         append_to_vec(tokens, &assistant);
-        append_to_vec(tokens, &[end_header_id, model.nl(), model.nl()]);
+        append_to_vec(tokens, &[end_header_id, nl, nl]);
         Ok(())
     }
 }

From afbc71482c211e2de594aa36798044408880f929 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 26 Apr 2024 23:43:18 +0200
Subject: [PATCH 023/112] messages are clonable

---
 src/generators/openai/mod.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/generators/openai/mod.rs b/src/generators/openai/mod.rs
index 68ace99..55a598f 100644
--- a/src/generators/openai/mod.rs
+++ b/src/generators/openai/mod.rs
@@ -14,7 +14,7 @@ use self::config::Prompt;
 
 use super::{ResultRun, Error};
 
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
 #[serde(rename_all = "lowercase")]
 pub enum Role {
     User,
@@ -41,7 +41,7 @@ impl Role {
     }
 }
 
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Message {
     pub role: Role,
     pub content: String,

From 56c4c83c8a143494f63eecbd0233f55d44ede0d8 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 26 Apr 2024 23:43:40 +0200
Subject: [PATCH 024/112] Add stop tokens in the templates

---
 src/generators/llama/template.rs | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/generators/llama/template.rs b/src/generators/llama/template.rs
index 1fdec5a..2743fce 100644
--- a/src/generators/llama/template.rs
+++ b/src/generators/llama/template.rs
@@ -105,5 +105,18 @@ impl PromptTemplate {
         append_to_vec(tokens, &[end_header_id, nl, nl]);
         Ok(())
     }
+    pub fn stop_tokens(&self, model: &llama_cpp::LlamaModel) -> Result<Vec<Token>, LlamaTokenizationError> {
+        match self {
+            PromptTemplate::ChatML => {
+                let im_end = model.tokenize_bytes("<|im_end|>", false, true)?.first().copied().unwrap();
+                Ok(vec![im_end, model.eos()])
+            },
+            PromptTemplate::Llama2 => todo!(),
+            PromptTemplate::Llama3 => {
+                let eot_id = model.tokenize_bytes("<|eot_id|>", false, true)?.first().copied().unwrap();
+                Ok(vec![eot_id, model.eos()])
+            },
+        }
+    }
 }
 

From 869cbfd54433d49bb941ef927367aaeb8b027ab9 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 26 Apr 2024 23:43:53 +0200
Subject: [PATCH 025/112] Working local AI completion

---
 src/generators/llama/mod.rs | 87 ++++++++-----------------------------
 1 file changed, 19 insertions(+), 68 deletions(-)

diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index a86c34e..e632f2c 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -1,5 +1,7 @@
 pub mod config;
 pub mod template;
+use std::borrow::Cow;
+
 use tokio_stream::StreamExt;
 
 use llama_cpp::{
@@ -8,8 +10,7 @@ use llama_cpp::{
 use once_cell::sync::OnceCell;
 use log::{debug, info};
 use crate::{
-    config::Config as AIOConfig,
-    args
+    args, config::{format_content, Config as AIOConfig}
 };
 use super::{openai::{Message, Role}, Error, ResultRun};
 
@@ -35,62 +36,6 @@ fn append_to_vec<T: Copy>(vec: &mut Vec<T>, other: &[T]) {
     }
 }
 
-fn make_context(model: &LlamaModel, prompt: &[Message], template: config::PromptTemplate, args: &args::ProcessedArgs) -> Vec<Token> {
-    use std::fmt::Write;
-    use crate::config::format_content;
-    let mut tokens = Vec::new();
-    tokens.push(model.bos());
-    // match template {
-    //     config::PromptTemplate::ChatML => {
-    //         let [im_start, im_end] = model.tokenize_bytes("<|im_start|><|im_end|>", false, true).unwrap()[..];
-    //         let [system, user, assistant] = model.tokenize_slice(&["user", "system", "assistant"], false, true).unwrap()[..];
-    //         let mut context = prompt.iter()
-    //             .for_each(|m| {
-    //                 tokens.push(im_start);
-    //                 append_to_vec(&mut tokens, &match m.role {
-    //                     Role::System => system,
-    //                     Role::User => user,
-    //                     Role::Assistant => assistant
-    //                 });
-    //                 tokens.push(model.nl());
-    //                 append_to_vec(&mut tokens, &model.tokenize_bytes(&m.content, false, false).unwrap());
-    //                 tokens.push(im_end);
-    //                 tokens.push(model.nl());
-    //             });
-    //         tokens.push(im_start);
-    //         append_to_vec(tokens, &assistant);
-    //         tokens.push(im_end);
-    //     }
-    //     config::PromptTemplate::Llama2 => {
-    //         let context = prompt.iter()
-    //             .fold(String::new(), |mut str, m| {
-    //                 match m.role {
-    //                     Role::User => {
-    //                         #[allow(clippy::write_with_newline)]
-    //                         let _ = write!(str, "[INST] {} [/INST]\n", format_content(&m.content, args));
-    //                     }
-    //                     Role::Assistant => {
-    //                         #[allow(clippy::write_with_newline)]
-    //                         let _ = write!(str, "{}</s>\n", format_content(&m.content, args));
-    //                     }
-    //                     _ => ()
-    //                 }
-    //                 str
-    //             });
-    //         format!("<s>{}", context)
-    //     }
-    //     config::PromptTemplate::Llama3 => {
-    //         let context = prompt.iter()
-    //             .fold(String::new(), |mut str, m| {
-    //                 let _ = write!(str, "<|start_header_id|>{}<|end_header_id|>\n\n{}<|eot_id|>", m.role.lowercase(), format_content(&m.content, args));
-    //                 str
-    //             });
-    //         format!("<|begin_of_text|>{}<|start_header_id|>assistant<|end_header_id|>\n\n", context)
-    //     }
-    // }
-    tokens
-}
-
 pub async fn run(
     config: AIOConfig, 
     args: args::ProcessedArgs
@@ -110,10 +55,12 @@ pub async fn run(
     let session_params = SessionParams::default();
     let mut session = model.create_session(session_params).map_err(|_| Error::Custom("Failed to create session".into()))?;
 
-    // let context = make_context(&config.local.prompts.first().unwrap().content, model_config.template, &args);
-    // debug!("Context: {context}");
-    let context = "";
-    let context_tokens = model.tokenize_bytes(&context, false, true).unwrap();
+    let prompt = config.local.prompts.first().unwrap();
+    let messages = prompt.content.iter()
+        .cloned()
+        .map(|mut m| {m.content = format_content(&m.content, &args).to_string(); m})
+        .collect::<Vec<_>>();
+    let context_tokens = model_config.template.messages_to_tokens(model, &messages).map_err(|_| Error::Custom("Failed to convert prompt messages to tokens".into()))?;
     debug!("Tokens: ");
     if log::log_enabled!(log::Level::Debug) {
         for token in &context_tokens {
@@ -131,11 +78,15 @@ pub async fn run(
 
     let completion = session
         .start_completing_with(StandardSampler::default(), 1024);
-    // let discard_tokens = [model.bos(), model.eos()];
-    // let filter_tokens = StreamExt::filter(completion, move |_token| !discard_tokens.contains(_token));
-    let completion_stream = StreamExt::map(completion,  |token| Ok(format!("{}({}) ", model.token_to_piece(token), token.0)));
-    // let completion_strings = TokensToStrings::new(filter_tokens, model.clone());
-    // let completion_stream = StreamExt::map(completion_strings, Ok);
-
+    // let completion_stream = StreamExt::map(completion,  |token| Ok(format!("{}({})", model.token_to_piece(token), token.0)));
+    let discard_tokens = model_config.template.stop_tokens(model).map_err(|_| Error::Custom("Failed to convert prompt messages to tokens".into()))?;
+    let completion_stream = 
+        StreamExt::map(
+            TokensToStrings::new(
+                StreamExt::take_while(completion, move |token| !discard_tokens.contains(token)), model.clone()
+            ), 
+            Ok
+        );
+    
     Ok(Box::pin(completion_stream))
 }
\ No newline at end of file

From 692d7e2fd1fc4f94e053724137e6a977336a28e9 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 26 Apr 2024 23:51:31 +0200
Subject: [PATCH 026/112] Discard github action build when PR is draft

---
 .github/workflows/build.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 567acd2..16f2686 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -7,6 +7,7 @@ on:
 
 jobs:
   build:
+    if: github.event_name == 'pull_request' && !github.event.pull_request.draft
     strategy:
       fail-fast: false
       matrix:

From dcf5db47245dab78b1afa5c245f331178b379bf1 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 28 Apr 2024 15:37:39 +0200
Subject: [PATCH 027/112] Add llama2 tempalte

---
 src/generators/llama/template.rs | 50 +++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 20 deletions(-)

diff --git a/src/generators/llama/template.rs b/src/generators/llama/template.rs
index 2743fce..fb7abec 100644
--- a/src/generators/llama/template.rs
+++ b/src/generators/llama/template.rs
@@ -16,6 +16,14 @@ fn append_to_vec<T: Copy>(vec: &mut Vec<T>, other: &[T]) {
     other.iter().for_each(|v| vec.push(*v));
 }
 
+macro_rules! vec_merge {
+    ($tokens:ident, $($other_tokens:expr),*) => {{
+        let arrs = [$($other_tokens),*];
+        $tokens.reserve(arrs.iter().map(|arr| arr.len()).sum());
+        arrs.iter().map(|arr| arr.iter()).flatten().for_each(|v| $tokens.push(*v));
+    }};
+}
+
 impl PromptTemplate {
     pub fn name(&self) -> &str {
         match self {
@@ -29,25 +37,7 @@ impl PromptTemplate {
         tokens.push(model.bos());
         match self {
             Self::ChatML => Self::tokens_chatml(&mut tokens, model, prompt),
-            Self::Llama2 => {
-                todo!("not implemented")
-                // let context = prompt.iter()
-                //     .fold(String::new(), |mut str, m| {
-                //         match m.role {
-                //             Role::User => {
-                //                 #[allow(clippy::write_with_newline)]
-                //                 let _ = write!(str, "[INST] {} [/INST]\n", format_content(&m.content, args));
-                //             }
-                //             Role::Assistant => {
-                //                 #[allow(clippy::write_with_newline)]
-                //                 let _ = write!(str, "{}</s>\n", format_content(&m.content, args));
-                //             }
-                //             _ => ()
-                //         }
-                //         str
-                //     });
-                // format!("<s>{}", context)
-            }
+            Self::Llama2 => Self::tokens_llama2(&mut tokens, model, prompt),
             Self::Llama3 => Self::tokens_llama3(&mut tokens, model, prompt),
         }?;
         Ok(tokens)
@@ -78,6 +68,23 @@ impl PromptTemplate {
         tokens.push(im_end);
         Ok(())
     }
+    pub fn tokens_llama2(tokens: &mut Vec<Token>, model: &llama_cpp::LlamaModel, prompt: &[Message]) -> Result<(), LlamaTokenizationError> {
+        let system_start = model.tokenize_bytes("<<SYS>>", false, true)?;
+        let system_end = model.tokenize_bytes("<</SYS>>", false, true)?;
+        let inst_start = model.tokenize_bytes("[INST]", false, true)?;
+        let inst_end = model.tokenize_bytes("[/INST]", false, true)?;
+        let eos = model.tokenize_bytes("</s>", false, true)?;
+        let nl = model.tokenize_bytes("\n", false, true)?;
+        prompt.iter()
+            .for_each(|m| {
+                match m.role {
+                    Role::System => vec_merge!(tokens, &inst_start, &system_start, &model.tokenize_bytes(&m.content, false, false).unwrap(), &system_end, &inst_end, &nl),
+                    Role::User => vec_merge!(tokens, &inst_start, &model.tokenize_bytes(&m.content, false, false).unwrap(), &inst_end, &nl),
+                    Role::Assistant => vec_merge!(tokens, &model.tokenize_bytes(&m.content, false, false).unwrap(), &eos, &nl),
+                }
+            });
+        Ok(())
+    }
     pub fn tokens_llama3(tokens: &mut Vec<Token>, model: &llama_cpp::LlamaModel, prompt: &[Message]) -> Result<(), LlamaTokenizationError> {
         let start_header_id = model.tokenize_bytes("<|start_header_id|>", false, true)?.first().copied().unwrap();
         let end_header_id = model.tokenize_bytes("<|end_header_id|>", false, true)?.first().copied().unwrap();
@@ -111,7 +118,10 @@ impl PromptTemplate {
                 let im_end = model.tokenize_bytes("<|im_end|>", false, true)?.first().copied().unwrap();
                 Ok(vec![im_end, model.eos()])
             },
-            PromptTemplate::Llama2 => todo!(),
+            PromptTemplate::Llama2 => {
+                let eot_id = model.tokenize_bytes("[INST]", false, true)?.first().copied().unwrap();
+                Ok(vec![eot_id, model.eos()])
+            },
             PromptTemplate::Llama3 => {
                 let eot_id = model.tokenize_bytes("<|eot_id|>", false, true)?.first().copied().unwrap();
                 Ok(vec![eot_id, model.eos()])

From 627ed7f183b5f5f2fac826e501cc0907476e7c88 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 28 Apr 2024 15:37:57 +0200
Subject: [PATCH 028/112] add custom template

---
 src/generators/llama/template.rs | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/src/generators/llama/template.rs b/src/generators/llama/template.rs
index fb7abec..7353cd7 100644
--- a/src/generators/llama/template.rs
+++ b/src/generators/llama/template.rs
@@ -2,6 +2,15 @@ use serde::{Deserialize, Serialize};
 use super::super::openai::{Message, Role};
 use llama_cpp::{LlamaTokenizationError, Token};
 
+#[derive(Default, Debug, Deserialize, Serialize)]
+pub struct CustomTemplate {
+    pub system_prefix: String,
+    pub system_suffix: String,
+    pub user_prefix: String,
+    pub user_suffix: String,
+    pub assistant_prefix: String,
+    pub assistant_suffix: String,
+}
 #[derive(Default, Debug, Deserialize, Serialize)]
 #[serde(rename_all = "lowercase")]
 pub enum PromptTemplate {
@@ -9,6 +18,7 @@ pub enum PromptTemplate {
     ChatML,
     Llama2,
     Llama3,
+    Custom(CustomTemplate)
 }
 
 fn append_to_vec<T: Copy>(vec: &mut Vec<T>, other: &[T]) {
@@ -30,6 +40,7 @@ impl PromptTemplate {
             PromptTemplate::ChatML => "chatml",
             PromptTemplate::Llama2 => "llama2",
             PromptTemplate::Llama3 => "llama3",
+            PromptTemplate::Custom(_) => "custom",
         }
     }
     pub fn messages_to_tokens(&self, model: &llama_cpp::LlamaModel, prompt: &[Message]) -> Result<Vec<Token>, LlamaTokenizationError> {
@@ -39,6 +50,7 @@ impl PromptTemplate {
             Self::ChatML => Self::tokens_chatml(&mut tokens, model, prompt),
             Self::Llama2 => Self::tokens_llama2(&mut tokens, model, prompt),
             Self::Llama3 => Self::tokens_llama3(&mut tokens, model, prompt),
+            Self::Custom (custom_template) => Self::tokens_custom(&mut tokens, model, prompt, custom_template),
         }?;
         Ok(tokens)
     }
@@ -112,6 +124,24 @@ impl PromptTemplate {
         append_to_vec(tokens, &[end_header_id, nl, nl]);
         Ok(())
     }
+    pub fn tokens_custom(tokens: &mut Vec<Token>, model: &llama_cpp::LlamaModel, prompt: &[Message], custom_template: &CustomTemplate) -> Result<(), LlamaTokenizationError> {
+        let system_prefix_tokens = model.tokenize_bytes(&custom_template.system_prefix, false, true)?;
+        let system_suffix_tokens = model.tokenize_bytes(&custom_template.system_suffix, false, true)?;
+        let user_prefix_tokens = model.tokenize_bytes(&custom_template.user_prefix, false, true)?;
+        let user_suffix_tokens = model.tokenize_bytes(&custom_template.user_suffix, false, true)?;
+        let assistant_prefix_tokens = model.tokenize_bytes(&custom_template.assistant_prefix, false, true)?;
+        let assistant_suffix_tokens = model.tokenize_bytes(&custom_template.assistant_suffix, false, true)?;
+        prompt.iter()
+            .for_each(|m| {
+                let content_tokens = model.tokenize_bytes(&m.content, false, false).unwrap();
+                match m.role {
+                    Role::System => vec_merge!(tokens, &system_prefix_tokens, &content_tokens, &system_suffix_tokens),
+                    Role::User => vec_merge!(tokens, &user_prefix_tokens, &content_tokens, &user_suffix_tokens),
+                    Role::Assistant => vec_merge!(tokens, &assistant_prefix_tokens, &content_tokens, &assistant_suffix_tokens),
+                }
+            });
+        Ok(())
+    }
     pub fn stop_tokens(&self, model: &llama_cpp::LlamaModel) -> Result<Vec<Token>, LlamaTokenizationError> {
         match self {
             PromptTemplate::ChatML => {
@@ -126,6 +156,7 @@ impl PromptTemplate {
                 let eot_id = model.tokenize_bytes("<|eot_id|>", false, true)?.first().copied().unwrap();
                 Ok(vec![eot_id, model.eos()])
             },
+            PromptTemplate::Custom(custom_template) => Ok(vec![model.tokenize_bytes(&custom_template.user_prefix, false, true)?.first().copied().unwrap()]),
         }
     }
 }

From 4431b8161fcf51e8f9af9f231920f15f48467a7f Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 28 Apr 2024 15:38:32 +0200
Subject: [PATCH 029/112] smalls fixes

---
 src/generators/llama/mod.rs | 30 +++++++++++++-----------------
 src/main.rs                 |  2 ++
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index e632f2c..e8050f3 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -1,18 +1,17 @@
 pub mod config;
 pub mod template;
-use std::borrow::Cow;
 
 use tokio_stream::StreamExt;
 
 use llama_cpp::{
-    standard_sampler::StandardSampler, LlamaModel, LlamaParams, SessionParams, Token, TokensToStrings
+    standard_sampler::StandardSampler, LlamaModel, LlamaParams, SessionParams, TokensToStrings
 };
 use once_cell::sync::OnceCell;
-use log::{debug, info};
+use log::debug;
 use crate::{
     args, config::{format_content, Config as AIOConfig}
 };
-use super::{openai::{Message, Role}, Error, ResultRun};
+use super::{Error, ResultRun};
 
 static LOCAL_LLAMA: OnceCell<LlamaModel> = OnceCell::new();
 
@@ -29,12 +28,6 @@ fn init_model(config: &config::Model) -> Result<(), Error> {
     };
     LOCAL_LLAMA.set(llama).map_err(|_| Error::Custom("Failed to set LLaMA model".into()))
 }
-fn append_to_vec<T: Copy>(vec: &mut Vec<T>, other: &[T]) {
-    vec.reserve(other.len());
-    for v in other {
-        vec.push(*v);
-    }
-}
 
 pub async fn run(
     config: AIOConfig, 
@@ -58,20 +51,23 @@ pub async fn run(
     let prompt = config.local.prompts.first().unwrap();
     let messages = prompt.content.iter()
         .cloned()
-        .map(|mut m| {m.content = format_content(&m.content, &args).to_string(); m})
+        .map(|mut m| {
+            m.content = format_content(&m.content, &args).to_string(); 
+            m
+        })
         .collect::<Vec<_>>();
     let context_tokens = model_config.template.messages_to_tokens(model, &messages).map_err(|_| Error::Custom("Failed to convert prompt messages to tokens".into()))?;
-    debug!("Tokens: ");
     if log::log_enabled!(log::Level::Debug) {
+        debug!("Tokens: ");
         for token in &context_tokens {
-            print!("{}({})", model.decode_tokens([*token]), token.0);
+            print!("{}({})", String::from_utf8_lossy(model.detokenize(*token)), token.0);
         }
         println!();
+        let (bos, eos) = (model.bos(), model.eos());
+        debug!("Special tokens:");
+        debug!("bos: {}({})", String::from_utf8_lossy(model.detokenize(bos)), bos.0);
+        debug!("eos: {}({})", String::from_utf8_lossy(model.detokenize(eos)), eos.0);
     }
-    let (bos, eos) = (model.bos(), model.eos());
-    debug!("Special tokens:");
-    debug!("bos: {}({})", model.decode_tokens([bos]), bos.0);
-    debug!("eos: {}({})", model.decode_tokens([eos]), eos.0);
     session
         .advance_context_with_tokens_async(context_tokens).await
         .map_err(|_| Error::Custom("Failed to advance context".into()))?;
diff --git a/src/main.rs b/src/main.rs
index 8a58270..aaf9848 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -46,6 +46,8 @@ async fn main() -> Result<(), String> {
     )
     .unwrap();
 
+    
+
     let args = {
         let mut args = args::Args::parse();
         if args.input.is_none() {

From 5616cde1ded34d12bfb3a6e578442b6ae0f9de2e Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 28 Apr 2024 21:23:00 +0200
Subject: [PATCH 030/112] opti template code

---
 src/generators/llama/template.rs | 38 +++++++++++---------------------
 1 file changed, 13 insertions(+), 25 deletions(-)

diff --git a/src/generators/llama/template.rs b/src/generators/llama/template.rs
index 7353cd7..8f3ccc5 100644
--- a/src/generators/llama/template.rs
+++ b/src/generators/llama/template.rs
@@ -55,8 +55,9 @@ impl PromptTemplate {
         Ok(tokens)
     }
     pub fn tokens_chatml(tokens: &mut Vec<Token>, model: &llama_cpp::LlamaModel, prompt: &[Message]) -> Result<(), LlamaTokenizationError> {
-        let im_start = model.tokenize_bytes("<|im_start|>", false, true)?.first().copied().unwrap();
-        let im_end = model.tokenize_bytes("<|im_end|>", false, true)?.first().copied().unwrap();
+        let im_start = model.tokenize_bytes("<|im_start|>", false, true)?;
+        let im_end = model.tokenize_bytes("<|im_end|>", false, true)?;
+        let nl = model.tokenize_bytes("\n", false, true)?;
         let [system, user, assistant] = [
             model.tokenize_bytes("system", false, true)?,
             model.tokenize_bytes("user", false, true)?,
@@ -64,20 +65,13 @@ impl PromptTemplate {
         ];
         prompt.iter()
             .for_each(|m| {
-                tokens.push(im_start);
-                append_to_vec(tokens, match m.role {
+                let role_tokens = match m.role {
                     Role::System => &system,
                     Role::User => &user,
                     Role::Assistant => &assistant
-                });
-                tokens.push(model.nl());
-                append_to_vec(tokens, &model.tokenize_bytes(&m.content, false, false).unwrap());
-                tokens.push(im_end);
-                tokens.push(model.nl());
+                };
+                vec_merge!(tokens, &im_start, role_tokens, &nl, &model.tokenize_bytes(&m.content, false, false).unwrap(), &im_end, &nl);
             });
-        tokens.push(im_start);
-        append_to_vec(tokens, &assistant);
-        tokens.push(im_end);
         Ok(())
     }
     pub fn tokens_llama2(tokens: &mut Vec<Token>, model: &llama_cpp::LlamaModel, prompt: &[Message]) -> Result<(), LlamaTokenizationError> {
@@ -98,10 +92,10 @@ impl PromptTemplate {
         Ok(())
     }
     pub fn tokens_llama3(tokens: &mut Vec<Token>, model: &llama_cpp::LlamaModel, prompt: &[Message]) -> Result<(), LlamaTokenizationError> {
-        let start_header_id = model.tokenize_bytes("<|start_header_id|>", false, true)?.first().copied().unwrap();
-        let end_header_id = model.tokenize_bytes("<|end_header_id|>", false, true)?.first().copied().unwrap();
-        let eot_id = model.tokenize_bytes("<|eot_id|>", false, true)?.first().copied().unwrap();
-        let nl = model.tokenize_bytes("\n", false, true)?.first().copied().unwrap();
+        let start_header_id = model.tokenize_bytes("<|start_header_id|>", false, true)?;
+        let end_header_id = model.tokenize_bytes("<|end_header_id|>", false, true)?;
+        let eot_id = model.tokenize_bytes("<|eot_id|>", false, true)?;
+        let nl = model.tokenize_bytes("\n", false, true)?;
         let [system, user, assistant] = [
             model.tokenize_bytes("system", false, true)?,
             model.tokenize_bytes("user", false, true)?,
@@ -109,19 +103,13 @@ impl PromptTemplate {
         ];
         prompt.iter()
             .for_each(|m| {
-                tokens.push(start_header_id);
-                append_to_vec(tokens, match m.role {
+                let role_tokens = match m.role {
                     Role::System => &system,
                     Role::User => &user,
                     Role::Assistant => &assistant
-                });
-                append_to_vec(tokens, &[end_header_id, nl, nl]);
-                append_to_vec(tokens, &model.tokenize_bytes(&m.content, false, false).unwrap());
-                tokens.push(eot_id);
+                };
+                vec_merge!(tokens, &start_header_id, role_tokens, &end_header_id, &nl, &nl, &model.tokenize_bytes(&m.content, false, false).unwrap(), &eot_id);
             });
-        tokens.push(start_header_id);
-        append_to_vec(tokens, &assistant);
-        append_to_vec(tokens, &[end_header_id, nl, nl]);
         Ok(())
     }
     pub fn tokens_custom(tokens: &mut Vec<Token>, model: &llama_cpp::LlamaModel, prompt: &[Message], custom_template: &CustomTemplate) -> Result<(), LlamaTokenizationError> {

From dab5ce364be106fce963ee2de827aba319a6f15f Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 28 Apr 2024 21:47:49 +0200
Subject: [PATCH 031/112] add info verbose

---
 src/generators/llama/mod.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index e8050f3..b7bf4ed 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -7,7 +7,7 @@ use llama_cpp::{
     standard_sampler::StandardSampler, LlamaModel, LlamaParams, SessionParams, TokensToStrings
 };
 use once_cell::sync::OnceCell;
-use log::debug;
+use log::{debug, info};
 use crate::{
     args, config::{format_content, Config as AIOConfig}
 };
@@ -20,12 +20,14 @@ fn init_model(config: &config::Model) -> Result<(), Error> {
         n_gpu_layers: 20000,
         ..Default::default()
     };
+    info!("Loading LLaMA model at {}", config.path);
     let Ok(llama) = LlamaModel::load_from_file(
         &config.path,
         model_options,
     ) else {
         return Err(Error::Custom("Failed to load LLaMA model".into()))
     };
+    info!("LLaMA model loaded");
     LOCAL_LLAMA.set(llama).map_err(|_| Error::Custom("Failed to set LLaMA model".into()))
 }
 

From f2018ef2db1f79386ab7642582662308437d64e6 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Wed, 1 May 2024 17:10:28 +0200
Subject: [PATCH 032/112] make custom stops library

---
 src/generators/llama/mod.rs  |   1 +
 src/generators/llama/stop.rs | 145 +++++++++++++++++++++++++++++++++++
 2 files changed, 146 insertions(+)
 create mode 100644 src/generators/llama/stop.rs

diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index b7bf4ed..6cafed5 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -1,5 +1,6 @@
 pub mod config;
 pub mod template;
+pub mod stop;
 
 use tokio_stream::StreamExt;
 
diff --git a/src/generators/llama/stop.rs b/src/generators/llama/stop.rs
new file mode 100644
index 0000000..2117b3e
--- /dev/null
+++ b/src/generators/llama/stop.rs
@@ -0,0 +1,145 @@
+use std::ops::{Deref, DerefMut};
+
+use llama_cpp::{
+    LlamaModel as Model,
+    Token
+};
+
+pub struct StopToken(String);
+
+impl StopToken {
+    pub fn new(text: String) -> Self {
+        Self(text,)
+    }
+    pub fn from_tokens(model: Model, tokens: &[Token]) -> Self {
+        Self(model.decode_tokens(tokens),)
+    }
+    pub fn inspect(&self) -> StopTokenInspector {
+        StopTokenInspector::new(self)
+    }
+}
+
+enum StopTokenState {
+    NotFound,
+    InProgress,
+    Found,
+}
+
+enum Index<'a> {
+    Ref(&'a mut usize),
+    Val(usize),
+}
+
+impl AsRef<usize> for Index<'_> {
+    fn as_ref(&self) -> &usize {
+        match self {
+            Index::Ref(x) => x,
+            Index::Val(x) => x,
+        }
+    }
+}
+
+impl AsMut<usize> for Index<'_> {
+    fn as_mut(&mut self) -> &mut usize {
+        match self {
+            Index::Ref(x) => x,
+            Index::Val(x) => x,
+        }
+    }
+}
+impl Deref for Index<'_> {
+    type Target = usize;
+    fn deref(&self) -> &usize {
+        match self {
+            Index::Ref(x) => x,
+            Index::Val(x) => x,
+        }
+    }
+}
+
+impl DerefMut for Index<'_> {
+    fn deref_mut(&mut self) -> &mut usize {
+        match self {
+            Index::Ref(x) => x,
+            Index::Val(x) => x,
+        }
+    }
+}
+
+impl<'a> From<&'a mut usize> for Index<'a> {
+    fn from(x: &'a mut usize) -> Self {
+        Self::Ref(x)
+    }
+}
+
+impl From<usize> for Index<'_> {
+    fn from(x: usize) -> Self {
+        Self::Val(x)
+    }
+}
+
+pub struct StopTokenInspector<'a, 'b> {
+    stop: &'a StopToken,
+    index: Index<'b>,
+}
+
+impl<'a, 'b> StopTokenInspector<'a, 'b> {
+    pub fn new(stop: &'a StopToken) -> Self {
+        Self {
+            stop,
+            index: 0.into(),
+        }
+    }
+    pub fn reset(&mut self) {
+        self.index = 0.into();
+    }
+    pub fn check(&mut self, text: &str) -> StopTokenState {
+        for (c_self, c_other) in self.stop.0.chars().skip(*self.index).zip(text.chars()) {
+            if c_self != c_other {
+                *self.index = 0;
+                return StopTokenState::NotFound;
+            }
+        }
+        if self.stop.0.len() > *self.index + text.len() {
+            *self.index += text.len();
+            StopTokenState::InProgress
+        } else {
+            self.index = 0.into();
+            StopTokenState::Found
+        }
+    }
+}
+
+pub struct StopManager {
+    stops: Vec<(StopToken, usize)>,
+}
+
+impl StopManager {
+    pub fn new() -> Self {
+        Self {
+            stops: Vec::new(),
+        }
+    }
+    pub fn add_stop(&mut self, stop: StopToken) {
+        self.stops.push((stop, 0));
+    }
+    pub fn add_stop_from_string<S: Into<String>>(&mut self, stop: S) {
+        self.stops.push((StopToken::new(stop.into()), 0));
+    }
+
+    pub fn check(&mut self, text: &str) -> bool {
+        for (stop, index) in &mut self.stops {
+            let mut stop_inspector = StopTokenInspector { stop, index: index.into() };
+            match stop_inspector.check(text) {
+                StopTokenState::Found => return true,
+                StopTokenState::InProgress | StopTokenState::NotFound => continue,
+            }
+        }
+        false
+    }
+    pub fn reset(&mut self) {
+        for stop in &mut self.stops {
+            stop.1 = 0;
+        }
+    }
+}
\ No newline at end of file

From 333e4c9cd7301362adb8f5ac46d4e407b533353d Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Wed, 1 May 2024 18:02:19 +0200
Subject: [PATCH 033/112] Apply stop manager

---
 src/generators/llama/mod.rs      |  9 +++++----
 src/generators/llama/stop.rs     |  9 +++++++++
 src/generators/llama/template.rs | 17 +++++++++--------
 3 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index 6cafed5..62cfe97 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -78,12 +78,13 @@ pub async fn run(
     let completion = session
         .start_completing_with(StandardSampler::default(), 1024);
     // let completion_stream = StreamExt::map(completion,  |token| Ok(format!("{}({})", model.token_to_piece(token), token.0)));
-    let discard_tokens = model_config.template.stop_tokens(model).map_err(|_| Error::Custom("Failed to convert prompt messages to tokens".into()))?;
+    let mut discard_tokens = model_config.template.stop_tokens(model).map_err(|_| Error::Custom("Failed to convert prompt messages to tokens".into()))?;
     let completion_stream = 
         StreamExt::map(
-            TokensToStrings::new(
-                StreamExt::take_while(completion, move |token| !discard_tokens.contains(token)), model.clone()
-            ), 
+            StreamExt::take_while(
+                TokensToStrings::new(completion, model.clone()), 
+                move |token| !discard_tokens.check(token)
+            ),
             Ok
         );
     
diff --git a/src/generators/llama/stop.rs b/src/generators/llama/stop.rs
index 2117b3e..19e9f9a 100644
--- a/src/generators/llama/stop.rs
+++ b/src/generators/llama/stop.rs
@@ -5,6 +5,15 @@ use llama_cpp::{
     Token
 };
 
+macro_rules! stop_manager {
+    ($($x:expr),*) => {{
+        let mut x = StopManager::new();
+        $(x.add_stop_from_string($x);)*
+        x
+    }};
+}
+pub(crate) use stop_manager;
+
 pub struct StopToken(String);
 
 impl StopToken {
diff --git a/src/generators/llama/template.rs b/src/generators/llama/template.rs
index 8f3ccc5..cf39d9a 100644
--- a/src/generators/llama/template.rs
+++ b/src/generators/llama/template.rs
@@ -1,6 +1,7 @@
 use serde::{Deserialize, Serialize};
 use super::super::openai::{Message, Role};
 use llama_cpp::{LlamaTokenizationError, Token};
+use super::stop::{stop_manager, StopManager};
 
 #[derive(Default, Debug, Deserialize, Serialize)]
 pub struct CustomTemplate {
@@ -130,21 +131,21 @@ impl PromptTemplate {
             });
         Ok(())
     }
-    pub fn stop_tokens(&self, model: &llama_cpp::LlamaModel) -> Result<Vec<Token>, LlamaTokenizationError> {
+    pub fn stop_tokens(&self, model: &llama_cpp::LlamaModel) -> Result<StopManager, LlamaTokenizationError> {
         match self {
             PromptTemplate::ChatML => {
-                let im_end = model.tokenize_bytes("<|im_end|>", false, true)?.first().copied().unwrap();
-                Ok(vec![im_end, model.eos()])
+                let eos_str = model.decode_tokens([model.eos()]);
+                Ok(stop_manager!["<|im_end|>", eos_str])
             },
             PromptTemplate::Llama2 => {
-                let eot_id = model.tokenize_bytes("[INST]", false, true)?.first().copied().unwrap();
-                Ok(vec![eot_id, model.eos()])
+                let eos_str = model.decode_tokens([model.eos()]);
+                Ok(stop_manager!["[INST]", eos_str])
             },
             PromptTemplate::Llama3 => {
-                let eot_id = model.tokenize_bytes("<|eot_id|>", false, true)?.first().copied().unwrap();
-                Ok(vec![eot_id, model.eos()])
+                let eos_str = model.decode_tokens([model.eos()]);
+                Ok(stop_manager!["happy", "<|eot_id|>", eos_str])
             },
-            PromptTemplate::Custom(custom_template) => Ok(vec![model.tokenize_bytes(&custom_template.user_prefix, false, true)?.first().copied().unwrap()]),
+            PromptTemplate::Custom(custom_template) => Ok(stop_manager![&custom_template.user_prefix]),
         }
     }
 }

From f5b910cfb8d5499ac9783cecf89ca21f1f1d2eea Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Wed, 1 May 2024 20:17:09 +0200
Subject: [PATCH 034/112] add PartialEq to Role

---
 src/generators/openai/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/generators/openai/mod.rs b/src/generators/openai/mod.rs
index 55a598f..130275d 100644
--- a/src/generators/openai/mod.rs
+++ b/src/generators/openai/mod.rs
@@ -14,7 +14,7 @@ use self::config::Prompt;
 
 use super::{ResultRun, Error};
 
-#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
 #[serde(rename_all = "lowercase")]
 pub enum Role {
     User,

From dc0ff5ca696eba848b367ed67f099e15bfdc11b9 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Wed, 1 May 2024 20:17:40 +0200
Subject: [PATCH 035/112] clean up stop_tokens code

---
 src/generators/llama/template.rs | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/src/generators/llama/template.rs b/src/generators/llama/template.rs
index cf39d9a..42049ed 100644
--- a/src/generators/llama/template.rs
+++ b/src/generators/llama/template.rs
@@ -132,20 +132,12 @@ impl PromptTemplate {
         Ok(())
     }
     pub fn stop_tokens(&self, model: &llama_cpp::LlamaModel) -> Result<StopManager, LlamaTokenizationError> {
+        let eos_str = String::from_utf8_lossy(model.detokenize(model.eos()));
         match self {
-            PromptTemplate::ChatML => {
-                let eos_str = model.decode_tokens([model.eos()]);
-                Ok(stop_manager!["<|im_end|>", eos_str])
-            },
-            PromptTemplate::Llama2 => {
-                let eos_str = model.decode_tokens([model.eos()]);
-                Ok(stop_manager!["[INST]", eos_str])
-            },
-            PromptTemplate::Llama3 => {
-                let eos_str = model.decode_tokens([model.eos()]);
-                Ok(stop_manager!["happy", "<|eot_id|>", eos_str])
-            },
-            PromptTemplate::Custom(custom_template) => Ok(stop_manager![&custom_template.user_prefix]),
+            PromptTemplate::ChatML => Ok(stop_manager!["<|im_end|>", eos_str]),
+            PromptTemplate::Llama2 => Ok(stop_manager!["[INST]", eos_str]),
+            PromptTemplate::Llama3 => Ok(stop_manager!["thank", "<|eot_id|>", eos_str]),
+            PromptTemplate::Custom(custom_template) => Ok(stop_manager![&custom_template.user_prefix, eos_str]),
         }
     }
 }

From 269cd6050b57f43d354beb6221e42706dbf48fe5 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Wed, 1 May 2024 20:17:59 +0200
Subject: [PATCH 036/112] fix StopTokenInspector check algorith

---
 src/generators/llama/stop.rs | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/generators/llama/stop.rs b/src/generators/llama/stop.rs
index 19e9f9a..3f3ee51 100644
--- a/src/generators/llama/stop.rs
+++ b/src/generators/llama/stop.rs
@@ -103,19 +103,18 @@ impl<'a, 'b> StopTokenInspector<'a, 'b> {
         self.index = 0.into();
     }
     pub fn check(&mut self, text: &str) -> StopTokenState {
-        for (c_self, c_other) in self.stop.0.chars().skip(*self.index).zip(text.chars()) {
+        for c_other in text.chars() {
+            let c_self = self.stop.0.chars().nth(*self.index).unwrap();
             if c_self != c_other {
                 *self.index = 0;
-                return StopTokenState::NotFound;
+            } else {
+                *self.index += 1;
+                if *self.index == self.stop.0.len() {
+                    return StopTokenState::Found;
+                }
             }
         }
-        if self.stop.0.len() > *self.index + text.len() {
-            *self.index += text.len();
-            StopTokenState::InProgress
-        } else {
-            self.index = 0.into();
-            StopTokenState::Found
-        }
+        StopTokenState::InProgress
     }
 }
 
@@ -140,7 +139,10 @@ impl StopManager {
         for (stop, index) in &mut self.stops {
             let mut stop_inspector = StopTokenInspector { stop, index: index.into() };
             match stop_inspector.check(text) {
-                StopTokenState::Found => return true,
+                StopTokenState::Found => {
+                    self.reset();
+                    return true;
+                }
                 StopTokenState::InProgress | StopTokenState::NotFound => continue,
             }
         }

From a76f463ebe42d4c5b8c4f9c2a16a0ac0bebf395e Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Wed, 1 May 2024 20:18:29 +0200
Subject: [PATCH 037/112] add some tests to test stop module

---
 src/main.rs  |  2 ++
 src/tests.rs | 27 +++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)
 create mode 100644 src/tests.rs

diff --git a/src/main.rs b/src/main.rs
index aaf9848..57c8d37 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,3 +1,5 @@
+#[cfg(test)]
+mod tests;
 pub mod arguments;
 mod utils;
 mod config;
diff --git a/src/tests.rs b/src/tests.rs
new file mode 100644
index 0000000..d855991
--- /dev/null
+++ b/src/tests.rs
@@ -0,0 +1,27 @@
+use crate::generators::llama::stop::{stop_manager, StopManager};
+
+#[test]
+fn stops() {
+    let mut manager = stop_manager!("Bonjour", "bonjour", "salut");
+    assert!(manager.check("Bonjour"));
+    assert!(manager.check("bonjour"));
+    assert!(manager.check("salut"));
+    assert!(!manager.check("Au revoir"));
+    manager.reset();
+    assert!(!manager.check("aloa aloa"));
+    manager.reset();
+    assert!(!manager.check("aloa"));
+    assert!(manager.check("bonjour"));
+    manager.reset();
+    assert!(!manager.check("aloa bon"));
+    assert!(manager.check("jour"));
+    manager.reset();
+    assert!(!manager.check("aloa bon"));
+    assert!(!manager.check("jo"));
+    assert!(manager.check("ur"));
+    manager.reset();
+    for c in "aloa au revoir salu".chars() {
+        assert!(!manager.check(&c.to_string()));
+    }
+    assert!(manager.check(&'t'.to_string()));
+}
\ No newline at end of file

From d9e23d5860f915c6c3c15144682f6da1fef721a8 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Wed, 1 May 2024 20:22:02 +0200
Subject: [PATCH 038/112] prevent from null content assistant

---
 src/generators/llama/template.rs | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/generators/llama/template.rs b/src/generators/llama/template.rs
index 42049ed..b304f9d 100644
--- a/src/generators/llama/template.rs
+++ b/src/generators/llama/template.rs
@@ -87,7 +87,8 @@ impl PromptTemplate {
                 match m.role {
                     Role::System => vec_merge!(tokens, &inst_start, &system_start, &model.tokenize_bytes(&m.content, false, false).unwrap(), &system_end, &inst_end, &nl),
                     Role::User => vec_merge!(tokens, &inst_start, &model.tokenize_bytes(&m.content, false, false).unwrap(), &inst_end, &nl),
-                    Role::Assistant => vec_merge!(tokens, &model.tokenize_bytes(&m.content, false, false).unwrap(), &eos, &nl),
+                    Role::Assistant if !m.content.is_empty() => vec_merge!(tokens, &model.tokenize_bytes(&m.content, false, false).unwrap(), &eos, &nl),
+                    _ => (),
                 }
             });
         Ok(())
@@ -109,7 +110,10 @@ impl PromptTemplate {
                     Role::User => &user,
                     Role::Assistant => &assistant
                 };
-                vec_merge!(tokens, &start_header_id, role_tokens, &end_header_id, &nl, &nl, &model.tokenize_bytes(&m.content, false, false).unwrap(), &eot_id);
+                vec_merge!(tokens, &start_header_id, role_tokens, &end_header_id, &nl, &nl);
+                if !(m.role == Role::Assistant && m.content.is_empty()) {
+                    vec_merge!(tokens, &model.tokenize_bytes(&m.content, false, false).unwrap(), &eot_id);
+                }
             });
         Ok(())
     }
@@ -126,7 +130,12 @@ impl PromptTemplate {
                 match m.role {
                     Role::System => vec_merge!(tokens, &system_prefix_tokens, &content_tokens, &system_suffix_tokens),
                     Role::User => vec_merge!(tokens, &user_prefix_tokens, &content_tokens, &user_suffix_tokens),
-                    Role::Assistant => vec_merge!(tokens, &assistant_prefix_tokens, &content_tokens, &assistant_suffix_tokens),
+                    Role::Assistant => {
+                        vec_merge!(tokens, &assistant_prefix_tokens);
+                        if !m.content.is_empty() {
+                            vec_merge!(tokens, &content_tokens, &assistant_suffix_tokens)
+                        }
+                    },
                 }
             });
         Ok(())

From 4636b2e07b3a4d322ddcce5d5912f2f3efea818d Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Thu, 2 May 2024 16:15:14 +0200
Subject: [PATCH 039/112] WIP new cli argument interface

---
 src/arguments.rs             | 52 ++++++++++++++++++------------------
 src/config.rs                |  2 +-
 src/generators/openai/mod.rs |  4 +--
 src/main.rs                  | 23 ++++++----------
 4 files changed, 37 insertions(+), 44 deletions(-)

diff --git a/src/arguments.rs b/src/arguments.rs
index ea5dab1..615ce2b 100644
--- a/src/arguments.rs
+++ b/src/arguments.rs
@@ -1,4 +1,4 @@
-use clap::{Parser, ValueEnum};
+use clap::{Args as ClapArgs, Parser, Subcommand, ValueEnum};
 
 /// Program to communicate with large language models and AI API 
 #[derive(Parser, Debug)]
@@ -14,8 +14,8 @@ pub struct Args {
     /// 
     /// The name can be followed by custom prompt name from the configuration file
     /// (ex: openai:command)
-    #[arg(long, short)]
-    pub engine: String,
+    #[command(subcommand)]
+    pub engine: Subcommands,
     /// Formatter
     /// 
     /// Possible values: markdown, raw
@@ -26,7 +26,29 @@ pub struct Args {
     pub run: RunChoice,
     /// Force to run code 
     /// User text prompt
-    pub input: Option<String>,
+    #[arg(default_value_t = Default::default())]
+    pub input: String,
+}
+#[derive(Subcommand, Debug, Clone)]
+pub enum Subcommands {
+    OpenAIAPI(OpenAIAPIArgs),
+    FromFile(FromFileArgs),
+    Local(LocalArgs),
+}
+
+#[derive(ClapArgs, Debug, Clone)]
+pub struct OpenAIAPIArgs {
+    model: String,
+    prompt: String,
+}
+#[derive(ClapArgs, Debug, Clone)]
+pub struct FromFileArgs {
+    input: String,
+}
+#[derive(ClapArgs, Debug, Clone)]
+pub struct LocalArgs {
+    model: String,
+    prompt: String,
 }
 
 #[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
@@ -49,26 +71,4 @@ pub enum RunChoice {
     Ask,
     /// Run code without asking
     Force
-}
-#[derive(Default, Debug, Clone)]
-pub struct ProcessedArgs {
-    pub config_path: String,
-    pub creds_path: String,
-    pub engine: String,
-    pub formatter: FormatterChoice,
-    pub run: RunChoice,
-    pub input: String,
-}
-
-impl From<Args> for ProcessedArgs {
-    fn from(args: Args) -> Self {
-        Self {
-            config_path: args.config_path,
-            creds_path: args.creds_path,
-            engine: args.engine,
-            formatter: args.formatter,
-            run: args.run,
-            input: args.input.unwrap_or_default(),
-        }
-    }
 }
\ No newline at end of file
diff --git a/src/config.rs b/src/config.rs
index 03eb3e1..fb39a1b 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -20,7 +20,7 @@ pub struct Config {
 
 impl DeserializeExt for Config {}
 
-pub fn format_content<'a>(content: &'a str, args: &args::ProcessedArgs) -> Cow<'a, str> {
+pub fn format_content<'a>(content: &'a str, args: &args::Args) -> Cow<'a, str> {
     static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?P<prefix>\$\$?)(?P<name>\w+)").expect("Failed to compile regex"));
     RE.replace_all(content, |caps: &regex::Captures| {
         let prefix = &caps["prefix"];
diff --git a/src/generators/openai/mod.rs b/src/generators/openai/mod.rs
index 130275d..5fb54a3 100644
--- a/src/generators/openai/mod.rs
+++ b/src/generators/openai/mod.rs
@@ -49,7 +49,7 @@ pub struct Message {
 
 #[allow(dead_code)]
 impl Message {
-    pub fn format_content(mut self, args: &crate::args::ProcessedArgs) -> Self {
+    pub fn format_content(mut self, args: &str) -> Self {
         self.content = crate::config::format_content(&self.content, args).to_string();
         self
     }
@@ -211,7 +211,7 @@ impl ChatResponse {
     }
 }
 
-pub async fn run(creds: credentials::Credentials, config: crate::config::Config, args: args::ProcessedArgs) -> ResultRun {
+pub async fn run(creds: credentials::Credentials, config: crate::config::Config, args: args::OpenAIAPIArgs) -> ResultRun {
     let openai_api_key = creds.api_key;
 
     if openai_api_key.is_empty() {
diff --git a/src/main.rs b/src/main.rs
index 57c8d37..1f731e6 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -52,7 +52,7 @@ async fn main() -> Result<(), String> {
 
     let args = {
         let mut args = args::Args::parse();
-        if args.input.is_none() {
+        if args.input.is_empty() {
             use std::io::Read;
             let mut str_input = std::string::String::new();
             let mut stdin = std::io::stdin();
@@ -60,9 +60,9 @@ async fn main() -> Result<(), String> {
                 .read_to_string(&mut str_input)
                 .map_err(|e| format!("Failed to read input from stdin: {}", e))?;
 
-            args.input = Some(str_input.trim().to_string());
+            args.input = str_input.trim().to_string();
         }
-        args::ProcessedArgs::from(args)
+        args
     };
     let config =
         config::Config::from_yaml_file(filesystem::resolve_path(&args.config_path).as_ref())
@@ -79,20 +79,13 @@ async fn main() -> Result<(), String> {
     };
     let mut runner = runner::Runner::new(args.run);
 
-    let (engine, _prompt) = args
-        .engine
-        .find(':')
-        .map(|i| (&args.engine[..i], Some(&args.engine[i + 1..])))
-        .unwrap_or((args.engine.as_str(), None));
-
-    let mut stream = match engine {
-        "openai" => generators::openai::run(get_creds(&args.creds_path)?.openai, config, args).await
+    let mut stream = match args.engine {
+        args::Subcommands::OpenAIAPI(args_engine) => generators::openai::run(get_creds(&args.creds_path)?.openai, config, args_engine, input).await
             .map_err(|e| format!("Failed to request OpenAI API: {}", e))?,
-        "local" => generators::llama::run(config, args).await
+        args::Subcommands::Local(args_engine) => generators::llama::run(config, args_engine, input).await
             .map_err(|e| format!("Unable to run local model: {}", e))?,
-        "from-file" => generators::from_file::run(config, args).await
-            .map_err(|e| format!("Failed to read from file: {}", e))?,
-        _ => panic!("Unknown engine: {}", engine),
+        args::Subcommands::FromFile(args_engine) => generators::from_file::run(config, args_engine, input).await
+            .map_err(|e| format!("Failed to read from file: {}", e))?
     };
 
     loop {

From 35c795d102123e5bcd0f12e4372ec4d95dc5f8c0 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 3 May 2024 11:10:40 +0200
Subject: [PATCH 040/112] convert subcommands arguments from positional to
 labelled

---
 src/arguments.rs | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/arguments.rs b/src/arguments.rs
index 615ce2b..81b2658 100644
--- a/src/arguments.rs
+++ b/src/arguments.rs
@@ -38,17 +38,19 @@ pub enum Subcommands {
 
 #[derive(ClapArgs, Debug, Clone)]
 pub struct OpenAIAPIArgs {
-    model: String,
-    prompt: String,
+    #[arg(long, short)]
+    pub model: String,
+    #[arg(long, short)]
+    pub prompt: Option<String>,
 }
 #[derive(ClapArgs, Debug, Clone)]
-pub struct FromFileArgs {
-    input: String,
-}
+pub struct FromFileArgs;
 #[derive(ClapArgs, Debug, Clone)]
 pub struct LocalArgs {
-    model: String,
-    prompt: String,
+    #[arg(long, short)]
+    pub model: String,
+    #[arg(long, short)]
+    pub prompt: String,
 }
 
 #[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]

From 4b367d493e9a654dcbfe348e2101393cf30223a3 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 3 May 2024 11:11:11 +0200
Subject: [PATCH 041/112] Propagate common cli argument to subcommands

---
 src/arguments.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/arguments.rs b/src/arguments.rs
index 81b2658..f4420c5 100644
--- a/src/arguments.rs
+++ b/src/arguments.rs
@@ -5,10 +5,10 @@ use clap::{Args as ClapArgs, Parser, Subcommand, ValueEnum};
 #[command(author, version, about, long_about = None)]
 pub struct Args {
     /// Configuration file
-    #[arg(long, default_value_t = format!("{1}{0}config.yml", std::path::MAIN_SEPARATOR, crate::filesystem::config_dir()))]
+    #[arg(long, global = true, default_value_t = format!("{1}{0}config.yml", std::path::MAIN_SEPARATOR, crate::filesystem::config_dir()))]
     pub config_path: String,
     /// Credentials file
-    #[arg(long, default_value_t = format!("{1}{0}creds.yml", std::path::MAIN_SEPARATOR, crate::filesystem::cache_dir()))]
+    #[arg(long, global = true, default_value_t = format!("{1}{0}creds.yml", std::path::MAIN_SEPARATOR, crate::filesystem::cache_dir()))]
     pub creds_path: String,
     /// Engine name
     /// 
@@ -19,14 +19,14 @@ pub struct Args {
     /// Formatter
     /// 
     /// Possible values: markdown, raw
-    #[arg(long, short, value_enum, default_value_t = Default::default())]
+    #[arg(long, short, global = true, value_enum, default_value_t = Default::default())]
     pub formatter: FormatterChoice,
     /// Run code block if the language is supported
-    #[arg(long, short, value_enum, default_value_t = Default::default())]
+    #[arg(long, short, global = true, value_enum, default_value_t = Default::default())]
     pub run: RunChoice,
     /// Force to run code 
     /// User text prompt
-    #[arg(default_value_t = Default::default())]
+    #[arg(global = true, default_value_t = Default::default())]
     pub input: String,
 }
 #[derive(Subcommand, Debug, Clone)]

From 9368e7380c2baa6b04a229b5dbf90f6055bfbc7a Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 3 May 2024 11:12:08 +0200
Subject: [PATCH 042/112] macro to easily create hashmap

---
 src/utils/mod.rs | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/utils/mod.rs b/src/utils/mod.rs
index 508ca66..bbaf0a9 100644
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@@ -2,4 +2,14 @@ pub mod split_bytes;
 pub mod flatten_stream;
 
 pub use split_bytes::{SplitBytes, SplitBytesFactory};
-pub use flatten_stream::FlattenTrait;
\ No newline at end of file
+pub use flatten_stream::FlattenTrait;
+
+macro_rules! hashmap {
+    ($($name:ident => $value:expr),*) => {{
+        let mut map = ::std::collections::HashMap::new();
+        $(map.insert({stringify!($name)}.into(), {$value}.into());)*
+        map
+    }};
+}
+
+pub(crate) use hashmap;
\ No newline at end of file

From 92a722e1c53dfde65c86fd7d5b56c539ca3dc3c5 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 3 May 2024 11:16:29 +0200
Subject: [PATCH 043/112] format messages content using hashmap

---
 src/config.rs                   |  9 +++------
 src/generators/llama/mod.rs     |  4 ++--
 src/generators/openai/config.rs |  4 +++-
 src/generators/openai/mod.rs    | 11 ++++++-----
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/config.rs b/src/config.rs
index fb39a1b..6c0cba6 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -1,4 +1,4 @@
-use std::borrow::Cow;
+use std::{borrow::Cow, collections::HashMap};
 use once_cell::sync::Lazy;
 use regex::Regex;
 use serde::{Deserialize, Serialize};
@@ -20,7 +20,7 @@ pub struct Config {
 
 impl DeserializeExt for Config {}
 
-pub fn format_content<'a>(content: &'a str, args: &args::Args) -> Cow<'a, str> {
+pub fn format_content<'a>(content: &'a str, args: &HashMap<String, String>) -> Cow<'a, str> {
     static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?P<prefix>\$\$?)(?P<name>\w+)").expect("Failed to compile regex"));
     RE.replace_all(content, |caps: &regex::Captures| {
         let prefix = &caps["prefix"];
@@ -28,10 +28,7 @@ pub fn format_content<'a>(content: &'a str, args: &args::Args) -> Cow<'a, str> {
             return format!("${}", &caps["name"]);
         }
         let name = &caps["name"];
-        match name {
-            "input" => args.input.clone(),
-            _ => String::new(),
-        }
+        args.get(name).cloned().unwrap_or_default()
     })
 }
 
diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index 62cfe97..54b74c7 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -10,7 +10,7 @@ use llama_cpp::{
 use once_cell::sync::OnceCell;
 use log::{debug, info};
 use crate::{
-    args, config::{format_content, Config as AIOConfig}
+    args, config::{format_content, Config as AIOConfig}, utils::hashmap
 };
 use super::{Error, ResultRun};
 
@@ -55,7 +55,7 @@ pub async fn run(
     let messages = prompt.content.iter()
         .cloned()
         .map(|mut m| {
-            m.content = format_content(&m.content, &args).to_string(); 
+            m.content = format_content(&m.content, &hashmap!(input => input)).to_string(); 
             m
         })
         .collect::<Vec<_>>();
diff --git a/src/generators/openai/config.rs b/src/generators/openai/config.rs
index 2716286..bb6fddc 100644
--- a/src/generators/openai/config.rs
+++ b/src/generators/openai/config.rs
@@ -1,3 +1,5 @@
+use std::collections::HashMap;
+
 use serde::{Deserialize, Serialize};
 use super::ChatRequestParameters;
 use super::{Message, Role};
@@ -59,7 +61,7 @@ impl Prompt {
             ..Default::default()
         }
     }
-    pub fn format_contents(mut self, args: &crate::args::ProcessedArgs) -> Self {
+    pub fn format_contents(mut self, args: &HashMap<String, String>) -> Self {
         self.messages.iter_mut().map(|m| m.format_content_as_ref(args)).for_each(|_| ());
         self
     }
diff --git a/src/generators/openai/mod.rs b/src/generators/openai/mod.rs
index 5fb54a3..a3dfed7 100644
--- a/src/generators/openai/mod.rs
+++ b/src/generators/openai/mod.rs
@@ -1,13 +1,14 @@
 pub mod config;
 pub mod credentials;
 
+use std::collections::HashMap;
+
 use serde::{Serialize, Deserialize};
 use tokio_stream::StreamExt;
 use crate::{
     args,
     utils::{
-        SplitBytesFactory,
-        FlattenTrait
+        hashmap, FlattenTrait, SplitBytesFactory
     }
 };
 use self::config::Prompt;
@@ -49,11 +50,11 @@ pub struct Message {
 
 #[allow(dead_code)]
 impl Message {
-    pub fn format_content(mut self, args: &str) -> Self {
+    pub fn format_content(mut self, args: &HashMap<String, String>) -> Self {
         self.content = crate::config::format_content(&self.content, args).to_string();
         self
     }
-    pub fn format_content_as_ref(&mut self, args: &crate::args::ProcessedArgs) -> &mut Self {
+    pub fn format_content_as_ref(&mut self, args: &HashMap<String, String>) -> &mut Self {
         self.content = crate::config::format_content(&self.content, args).to_string();
         self
     }
@@ -223,7 +224,7 @@ pub async fn run(creds: credentials::Credentials, config: crate::config::Config,
         config.openai.prompts.into_iter()
             .find(|prompt| prompt.name == config_prompt)
             .ok_or(Error::Custom("Prompt not found".into()))?
-            .format_contents(&args)
+            .format_contents(&hashmap!(input => input))
     } else {
         Prompt::from_input(&args.input)
     };

From 427a3a8e909b770b5e767c856adfdb0fc349f1d8 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 3 May 2024 11:17:06 +0200
Subject: [PATCH 044/112] apply argument restructuration

---
 src/generators/from_file.rs  | 3 +--
 src/generators/llama/mod.rs  | 9 +++------
 src/generators/openai/mod.rs | 7 +++----
 src/main.rs                  | 6 +++---
 4 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/src/generators/from_file.rs b/src/generators/from_file.rs
index 58fc49f..c988d5e 100644
--- a/src/generators/from_file.rs
+++ b/src/generators/from_file.rs
@@ -1,9 +1,8 @@
 use crate::args;
 use super::{ResultRun, ResultStream, Error};
 
-pub async fn run(_: crate::config::Config, args: args::ProcessedArgs) -> ResultRun {
+pub async fn run(_: crate::config::Config, _args: args::FromFileArgs, input: &str) -> ResultRun {
     use tokio_stream::StreamExt;
-    let input = args.input;
     let file = tokio::fs::File::open(&input).await.map_err(|e| Error::Custom(std::borrow::Cow::Owned(e.to_string())))?;
 
     let stream = tokio_util::io::ReaderStream::new(file).map(|r| -> ResultStream {
diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index 54b74c7..4efa237 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -34,14 +34,11 @@ fn init_model(config: &config::Model) -> Result<(), Error> {
 
 pub async fn run(
     config: AIOConfig, 
-    args: args::ProcessedArgs
+    args: args::LocalArgs,
+    input: &str
 ) -> ResultRun {
-    let model = args.engine
-        .split(':')
-        .nth(1)
-        .ok_or_else(|| Error::Custom("Model missing in engine parameter".into()))?;
     let model_config = config.local.models.into_iter()
-        .find(|c| c.name == model)
+        .find(|c| c.name == args.model)
         .ok_or_else(|| Error::Custom("Model not found in config".into()))?;
     if LOCAL_LLAMA.get().is_none() {
         init_model(&model_config)?;
diff --git a/src/generators/openai/mod.rs b/src/generators/openai/mod.rs
index a3dfed7..d0fb4ae 100644
--- a/src/generators/openai/mod.rs
+++ b/src/generators/openai/mod.rs
@@ -212,21 +212,20 @@ impl ChatResponse {
     }
 }
 
-pub async fn run(creds: credentials::Credentials, config: crate::config::Config, args: args::OpenAIAPIArgs) -> ResultRun {
+pub async fn run(creds: credentials::Credentials, config: crate::config::Config, args: args::OpenAIAPIArgs, input: &str) -> ResultRun {
     let openai_api_key = creds.api_key;
 
     if openai_api_key.is_empty() {
         return Err(Error::Custom("OpenAI API key not found".into()));
     }
-    let config_prompt = args.engine.find(':').map(|i| &args.engine[i+1..]);
 
-    let prompt = if let Some(config_prompt) = config_prompt {
+    let prompt = if let Some(config_prompt) = args.prompt {
         config.openai.prompts.into_iter()
             .find(|prompt| prompt.name == config_prompt)
             .ok_or(Error::Custom("Prompt not found".into()))?
             .format_contents(&hashmap!(input => input))
     } else {
-        Prompt::from_input(&args.input)
+        Prompt::from_input(&input)
     };
 
     // Send a request
diff --git a/src/main.rs b/src/main.rs
index 1f731e6..81784d9 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -80,11 +80,11 @@ async fn main() -> Result<(), String> {
     let mut runner = runner::Runner::new(args.run);
 
     let mut stream = match args.engine {
-        args::Subcommands::OpenAIAPI(args_engine) => generators::openai::run(get_creds(&args.creds_path)?.openai, config, args_engine, input).await
+        args::Subcommands::OpenAIAPI(args_engine) => generators::openai::run(get_creds(&args.creds_path)?.openai, config, args_engine, &args.input).await
             .map_err(|e| format!("Failed to request OpenAI API: {}", e))?,
-        args::Subcommands::Local(args_engine) => generators::llama::run(config, args_engine, input).await
+        args::Subcommands::Local(args_engine) => generators::llama::run(config, args_engine, &args.input).await
             .map_err(|e| format!("Unable to run local model: {}", e))?,
-        args::Subcommands::FromFile(args_engine) => generators::from_file::run(config, args_engine, input).await
+        args::Subcommands::FromFile(args_engine) => generators::from_file::run(config, args_engine, &args.input).await
             .map_err(|e| format!("Failed to read from file: {}", e))?
     };
 

From bc9e5f29a4c11b281d760c7cc76a192b12ffe56c Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 3 May 2024 15:08:50 +0200
Subject: [PATCH 045/112] remove test value in template stops

---
 src/generators/llama/template.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/generators/llama/template.rs b/src/generators/llama/template.rs
index b304f9d..a2fae41 100644
--- a/src/generators/llama/template.rs
+++ b/src/generators/llama/template.rs
@@ -145,7 +145,7 @@ impl PromptTemplate {
         match self {
             PromptTemplate::ChatML => Ok(stop_manager!["<|im_end|>", eos_str]),
             PromptTemplate::Llama2 => Ok(stop_manager!["[INST]", eos_str]),
-            PromptTemplate::Llama3 => Ok(stop_manager!["thank", "<|eot_id|>", eos_str]),
+            PromptTemplate::Llama3 => Ok(stop_manager!["<|eot_id|>", eos_str]),
             PromptTemplate::Custom(custom_template) => Ok(stop_manager![&custom_template.user_prefix, eos_str]),
         }
     }

From 0e2ea3a1cefd47019d7535dac0e2565da56002e5 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 3 May 2024 15:09:20 +0200
Subject: [PATCH 046/112] move vec_merge to utils

---
 src/generators/llama/template.rs | 18 ++++--------------
 src/utils/mod.rs                 | 17 ++++++++++++++++-
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/src/generators/llama/template.rs b/src/generators/llama/template.rs
index a2fae41..874f8e1 100644
--- a/src/generators/llama/template.rs
+++ b/src/generators/llama/template.rs
@@ -1,5 +1,8 @@
 use serde::{Deserialize, Serialize};
-use super::super::openai::{Message, Role};
+use crate::{
+    generators::openai::{Message, Role},
+    utils::vec_merge
+};
 use llama_cpp::{LlamaTokenizationError, Token};
 use super::stop::{stop_manager, StopManager};
 
@@ -22,19 +25,6 @@ pub enum PromptTemplate {
     Custom(CustomTemplate)
 }
 
-fn append_to_vec<T: Copy>(vec: &mut Vec<T>, other: &[T]) {
-    vec.reserve(other.len());
-    other.iter().for_each(|v| vec.push(*v));
-}
-
-macro_rules! vec_merge {
-    ($tokens:ident, $($other_tokens:expr),*) => {{
-        let arrs = [$($other_tokens),*];
-        $tokens.reserve(arrs.iter().map(|arr| arr.len()).sum());
-        arrs.iter().map(|arr| arr.iter()).flatten().for_each(|v| $tokens.push(*v));
-    }};
-}
-
 impl PromptTemplate {
     pub fn name(&self) -> &str {
         match self {
diff --git a/src/utils/mod.rs b/src/utils/mod.rs
index bbaf0a9..05e9903 100644
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@@ -12,4 +12,19 @@ macro_rules! hashmap {
     }};
 }
 
-pub(crate) use hashmap;
\ No newline at end of file
+pub(crate) use hashmap;
+
+pub fn append_to_vec<T: Copy>(vec: &mut Vec<T>, other: &[T]) {
+    vec.reserve(other.len());
+    other.iter().for_each(|v| vec.push(*v));
+}
+
+macro_rules! vec_merge {
+    ($tokens:ident, $($other_tokens:expr),*) => {{
+        let arrs = [$($other_tokens),*];
+        $tokens.reserve(arrs.iter().map(|arr| arr.len()).sum());
+        arrs.iter().map(|arr| arr.iter()).flatten().for_each(|v| $tokens.push(*v));
+    }};
+}
+
+pub(crate) use vec_merge;
\ No newline at end of file

From ad12fe88eb7a03a71e65174d87fbb0ec79cb69c3 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 3 May 2024 15:09:48 +0200
Subject: [PATCH 047/112] fix ref warning

---
 src/generators/openai/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/generators/openai/mod.rs b/src/generators/openai/mod.rs
index d0fb4ae..b855df9 100644
--- a/src/generators/openai/mod.rs
+++ b/src/generators/openai/mod.rs
@@ -225,7 +225,7 @@ pub async fn run(creds: credentials::Credentials, config: crate::config::Config,
             .ok_or(Error::Custom("Prompt not found".into()))?
             .format_contents(&hashmap!(input => input))
     } else {
-        Prompt::from_input(&input)
+        Prompt::from_input(input)
     };
 
     // Send a request

From 8043585307071481d39c7497df062037435f11c0 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 3 May 2024 20:38:40 +0200
Subject: [PATCH 048/112] Optimize error cases

---
 src/arguments.rs            |  2 +-
 src/generators/llama/mod.rs | 27 ++++++++++++++++++---------
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/src/arguments.rs b/src/arguments.rs
index f4420c5..533da6a 100644
--- a/src/arguments.rs
+++ b/src/arguments.rs
@@ -50,7 +50,7 @@ pub struct LocalArgs {
     #[arg(long, short)]
     pub model: String,
     #[arg(long, short)]
-    pub prompt: String,
+    pub prompt: Option<String>,
 }
 
 #[derive(Default, Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index 4efa237..e4e7bb8 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -37,6 +37,23 @@ pub async fn run(
     args: args::LocalArgs,
     input: &str
 ) -> ResultRun {
+    let prompt = match args.prompt {
+        Some(prompt) => config.local.prompts
+            .iter()
+            .find(|v| v.name == prompt),
+        None => config.local.prompts
+            .iter()
+            .find(|v| v.name == "default")
+            .or_else(|| config.local.prompts.first())
+    }
+    .ok_or_else(|| Error::Custom("Prompt not found in config".into()))?;
+    let messages = prompt.content.iter()
+        .cloned()
+        .map(|mut m| {
+            m.content = format_content(&m.content, &hashmap!(input => input)).to_string(); 
+            m
+        })
+        .collect::<Vec<_>>();
     let model_config = config.local.models.into_iter()
         .find(|c| c.name == args.model)
         .ok_or_else(|| Error::Custom("Model not found in config".into()))?;
@@ -47,15 +64,7 @@ pub async fn run(
     
     let session_params = SessionParams::default();
     let mut session = model.create_session(session_params).map_err(|_| Error::Custom("Failed to create session".into()))?;
-
-    let prompt = config.local.prompts.first().unwrap();
-    let messages = prompt.content.iter()
-        .cloned()
-        .map(|mut m| {
-            m.content = format_content(&m.content, &hashmap!(input => input)).to_string(); 
-            m
-        })
-        .collect::<Vec<_>>();
+    
     let context_tokens = model_config.template.messages_to_tokens(model, &messages).map_err(|_| Error::Custom("Failed to convert prompt messages to tokens".into()))?;
     if log::log_enabled!(log::Level::Debug) {
         debug!("Tokens: ");

From 6e98acffd0de76818fb2cf506937379a6b2bd2a0 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sat, 4 May 2024 15:01:32 +0200
Subject: [PATCH 049/112] add verbose arg + format main

---
 src/arguments.rs            |  9 +++++
 src/generators/llama/mod.rs | 28 ++++++++------
 src/main.rs                 | 74 ++++++++++++++++++++++---------------
 3 files changed, 69 insertions(+), 42 deletions(-)

diff --git a/src/arguments.rs b/src/arguments.rs
index 533da6a..0ed17f6 100644
--- a/src/arguments.rs
+++ b/src/arguments.rs
@@ -10,6 +10,15 @@ pub struct Args {
     /// Credentials file
     #[arg(long, global = true, default_value_t = format!("{1}{0}creds.yml", std::path::MAIN_SEPARATOR, crate::filesystem::cache_dir()))]
     pub creds_path: String,
+    /// Verbose mode
+    /// 
+    /// Count: 
+    /// 0: errors
+    /// 1: warnings
+    /// 2: info
+    /// 3: debug
+    #[arg(short, long, action = clap::ArgAction::Count)]
+    pub verbose: u8,
     /// Engine name
     /// 
     /// The name can be followed by custom prompt name from the configuration file
diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index e4e7bb8..4ad0262 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -83,16 +83,20 @@ pub async fn run(
 
     let completion = session
         .start_completing_with(StandardSampler::default(), 1024);
-    // let completion_stream = StreamExt::map(completion,  |token| Ok(format!("{}({})", model.token_to_piece(token), token.0)));
-    let mut discard_tokens = model_config.template.stop_tokens(model).map_err(|_| Error::Custom("Failed to convert prompt messages to tokens".into()))?;
-    let completion_stream = 
-        StreamExt::map(
-            StreamExt::take_while(
-                TokensToStrings::new(completion, model.clone()), 
-                move |token| !discard_tokens.check(token)
-            ),
-            Ok
-        );
-    
-    Ok(Box::pin(completion_stream))
+    if log::log_enabled!(log::Level::Trace) {
+        let completion_stream = StreamExt::map(completion,  |token| Ok(format!("{}({})", model.token_to_piece(token), token.0)));
+        Ok(Box::pin(completion_stream))
+    } else {
+        let mut discard_tokens = model_config.template.stop_tokens(model).map_err(|_| Error::Custom("Failed to convert prompt messages to tokens".into()))?;
+        let completion_stream = 
+            StreamExt::map(
+                StreamExt::take_while(
+                    TokensToStrings::new(completion, model.clone()), 
+                    move |token| !discard_tokens.check(token)
+                ),
+                Ok
+            );
+        
+        Ok(Box::pin(completion_stream))
+    }
 }
\ No newline at end of file
diff --git a/src/main.rs b/src/main.rs
index 81784d9..30c5212 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,7 +1,4 @@
-#[cfg(test)]
-mod tests;
 pub mod arguments;
-mod utils;
 mod config;
 mod credentials;
 mod filesystem;
@@ -9,10 +6,12 @@ mod formatters;
 mod generators;
 mod runner;
 mod serde_io;
+#[cfg(test)]
+mod tests;
+mod utils;
 
 mod openai {}
 
-use simplelog::TermLogger;
 use arguments as args;
 use clap::Parser;
 use formatters::Formatter;
@@ -30,27 +29,14 @@ macro_rules! raise_str {
 
 fn get_creds(creds_path: &str) -> Result<credentials::Credentials, String> {
     Ok(raise_str!(
-        credentials::Credentials::from_yaml_file(
-            filesystem::resolve_path(creds_path).as_ref()
-        ),
+        credentials::Credentials::from_yaml_file(filesystem::resolve_path(creds_path).as_ref()),
         "Failed to parse credentials file: {}"
     ))
 }
 
 #[tokio::main]
 async fn main() -> Result<(), String> {
-
-    simplelog::TermLogger::init(
-        simplelog::LevelFilter::Trace,
-        simplelog::Config::default(),
-        simplelog::TerminalMode::Stdout,
-        simplelog::ColorChoice::Auto,
-    )
-    .unwrap();
-
-    
-
-    let args = {
+    let app_args = {
         let mut args = args::Args::parse();
         if args.input.is_empty() {
             use std::io::Read;
@@ -64,8 +50,25 @@ async fn main() -> Result<(), String> {
         }
         args
     };
+
+    let log_level = match app_args.verbose {
+        0 => simplelog::LevelFilter::Error,
+        1 => simplelog::LevelFilter::Warn,
+        2 => simplelog::LevelFilter::Info,
+        _ if cfg!(debug_assertions) => simplelog::LevelFilter::Trace,
+        _ => simplelog::LevelFilter::Debug,
+    };
+
+    simplelog::TermLogger::init(
+        log_level,
+        simplelog::Config::default(),
+        simplelog::TerminalMode::Stdout,
+        simplelog::ColorChoice::Auto,
+    )
+    .unwrap();
+
     let config =
-        config::Config::from_yaml_file(filesystem::resolve_path(&args.config_path).as_ref())
+        config::Config::from_yaml_file(filesystem::resolve_path(&app_args.config_path).as_ref())
             .map_err(|e| {
                 format!(
                     "An error occured while loading or creating configuration file: {}",
@@ -73,19 +76,31 @@ async fn main() -> Result<(), String> {
                 )
             })?;
 
-    let mut formatter: Box<dyn Formatter> = match args.formatter {
+    let mut formatter: Box<dyn Formatter> = match app_args.formatter {
         args::FormatterChoice::Markdown => Box::new(formatters::new_markdown_formatter()),
         args::FormatterChoice::Raw => Box::new(formatters::new_raw_formatter()),
     };
-    let mut runner = runner::Runner::new(args.run);
+    let mut runner = runner::Runner::new(app_args.run);
 
-    let mut stream = match args.engine {
-        args::Subcommands::OpenAIAPI(args_engine) => generators::openai::run(get_creds(&args.creds_path)?.openai, config, args_engine, &args.input).await
-            .map_err(|e| format!("Failed to request OpenAI API: {}", e))?,
-        args::Subcommands::Local(args_engine) => generators::llama::run(config, args_engine, &args.input).await
-            .map_err(|e| format!("Unable to run local model: {}", e))?,
-        args::Subcommands::FromFile(args_engine) => generators::from_file::run(config, args_engine, &args.input).await
-            .map_err(|e| format!("Failed to read from file: {}", e))?
+    let mut stream = match app_args.engine {
+        args::Subcommands::OpenAIAPI(args_engine) => generators::openai::run(
+            get_creds(&app_args.creds_path)?.openai,
+            config,
+            args_engine,
+            &app_args.input,
+        )
+        .await
+        .map_err(|e| format!("Failed to request OpenAI API: {}", e))?,
+        args::Subcommands::Local(args_engine) => {
+            generators::llama::run(config, args_engine, &app_args.input)
+                .await
+                .map_err(|e| format!("Unable to run local model: {}", e))?
+        }
+        args::Subcommands::FromFile(args_engine) => {
+            generators::from_file::run(config, args_engine, &app_args.input)
+                .await
+                .map_err(|e| format!("Failed to read from file: {}", e))?
+        }
     };
 
     loop {
@@ -105,4 +120,3 @@ async fn main() -> Result<(), String> {
     raise_str!(runner.end_of_document(), "Failed to run code: {}");
     Ok(())
 }
-

From 993045b879719b14b735c8615b07eb5ef8e8c931 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sat, 4 May 2024 15:44:46 +0200
Subject: [PATCH 050/112] add max token to completion process

---
 src/generators/llama/config.rs | 19 +++++++++----------
 src/generators/llama/mod.rs    |  2 +-
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/generators/llama/config.rs b/src/generators/llama/config.rs
index a4dfd06..16be8c8 100644
--- a/src/generators/llama/config.rs
+++ b/src/generators/llama/config.rs
@@ -25,18 +25,17 @@ pub struct Prompt {
 }
 
 #[derive(Debug, Deserialize, Serialize)]
-#[serde(default)]
 pub struct PromptParameters {
-    n_prev_tokens: i32,
-    top_k: i32,
-    top_p: f32,
-    temperature: f32,
-    repeat_penalty: f32,
-    repeat_last_n: i32,
-    max_tokens: i32,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    negative_prompt: Option<String>,
+    pub n_prev_tokens: i32,
+    pub top_k: i32,
+    pub top_p: f32,
+    pub temperature: f32,
+    pub repeat_penalty: f32,
+    pub repeat_last_n: i32,
+    pub max_tokens: i32,
+    pub negative_prompt: Option<String>,
 }
+
 impl From<PromptParameters> for StandardSampler {
     fn from(parameters: PromptParameters) -> Self {
         Self {
diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index 4ad0262..527c945 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -82,7 +82,7 @@ pub async fn run(
         .map_err(|_| Error::Custom("Failed to advance context".into()))?;
 
     let completion = session
-        .start_completing_with(StandardSampler::default(), 1024);
+        .start_completing_with(StandardSampler::default(), prompt.parameters.max_tokens as _);
     if log::log_enabled!(log::Level::Trace) {
         let completion_stream = StreamExt::map(completion,  |token| Ok(format!("{}({})", model.token_to_piece(token), token.0)));
         Ok(Box::pin(completion_stream))

From f353b970239c5f66db782ab162e1d01ad7304b5d Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sat, 4 May 2024 15:45:17 +0200
Subject: [PATCH 051/112] add trace log even in release mode

---
 src/main.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main.rs b/src/main.rs
index 30c5212..29b7e4a 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -55,8 +55,8 @@ async fn main() -> Result<(), String> {
         0 => simplelog::LevelFilter::Error,
         1 => simplelog::LevelFilter::Warn,
         2 => simplelog::LevelFilter::Info,
-        _ if cfg!(debug_assertions) => simplelog::LevelFilter::Trace,
-        _ => simplelog::LevelFilter::Debug,
+        3 => simplelog::LevelFilter::Debug,
+        _ => simplelog::LevelFilter::Trace,
     };
 
     simplelog::TermLogger::init(

From 621ef689d8b49c1fc7a4c570d6d37a9871e729f0 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sat, 4 May 2024 15:45:33 +0200
Subject: [PATCH 052/112] opti code

---
 src/main.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/main.rs b/src/main.rs
index 29b7e4a..4fad378 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -42,10 +42,10 @@ async fn main() -> Result<(), String> {
             use std::io::Read;
             let mut str_input = std::string::String::new();
             let mut stdin = std::io::stdin();
-            stdin
-                .read_to_string(&mut str_input)
-                .map_err(|e| format!("Failed to read input from stdin: {}", e))?;
-
+            raise_str!(
+                stdin.read_to_string(&mut str_input),
+                "Failed to read input from stdin: {}"
+            );
             args.input = str_input.trim().to_string();
         }
         args

From 69a2a405b45097ce5e150778751de2fb8d0ad2cd Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sat, 4 May 2024 16:42:21 +0200
Subject: [PATCH 053/112] Update README

---
 README.md | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 4dd84be..d64fe2f 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,15 @@
 # aio - Streamlined AI Terminal Interactions
 
-Welcome to the README for the `aio` command line tool – your gateway to seamless communication with AI engines via the terminal. This tool streamlines interactions with AI APIs, including the OpenAI API, and conveniently formats the results using integrated markdown formatting. Whether you're seeking information, generating content, or experimenting with AI, `aio` has you covered.
+Welcome to the README for the `aio` command line tool – your gateway to seamless communication with AI engines via the terminal. This tool streamlines interactions with AI APIs, including the OpenAI API, and conveniently formats the results using integrated Markdown formatting. Whether you're seeking information, generating content, or experimenting with AI, `aio` has you covered.
 
 <p align="center">
   <img src="./docs/prez.gif" style="border-radius: 10px;"/>
 </p>
 
+## 0.9 BREAKING CHANGES
+
+`aio` command line argument changed to be more ergonomic. See [the Argument manifest](./docs/ARGS.md) for more information about the new interface.
+
 ## 0.8 BREAKING CHANGES
 
 The default credentials path has changed from `~/.config/aio/creds.yml` to `~/.cache/aio/creds.yml`.
@@ -13,13 +17,14 @@ The default credentials path has changed from `~/.config/aio/creds.yml` to `~/.c
 ## Table of Contents
 
 - [aio - Streamlined AI Terminal Interactions](#aio---streamlined-ai-terminal-interactions)
+  - [0.9 BREAKING CHANGES](#09-breaking-changes)
   - [0.8 BREAKING CHANGES](#08-breaking-changes)
   - [Table of Contents](#table-of-contents)
   - [NEW : Run code from code blocks](#new--run-code-from-code-blocks)
   - [Introduction](#introduction)
   - [Installation](#installation)
   - [Install from crates.io](#install-from-cratesio)
-    - [Install from Github releases](#install-from-github-releases)
+    - [Install from GitHub releases](#install-from-github-releases)
     - [Install from source](#install-from-source)
   - [Usage](#usage)
   - [Arguments](#arguments)
@@ -53,11 +58,11 @@ cargo install aio-cli
 
 The program will be installed to your `~/.cargo/bin` directory.
 
-### Install from Github releases
+### Install from GitHub releases
 
 To install `aio`, follow these steps:
 
-1. Download the [latest release](https://github.com/glcraft/aio/releases/latest) based on you operating system and architecture.
+1. Download the [latest release](https://github.com/glcraft/aio/releases/latest) based on your operating system and architecture.
 
 2. Extract the downloaded archive.
 
@@ -112,10 +117,10 @@ The `aio` command line tool supports the following arguments:
   - `from-file`: Read prompts from a file. Useful to debug or test a file.
 
 - `-f|--formatter <FORMATTER>`: Formatter to use. Possible FORMATTERs: 
-  - `markdown`: Parse the text response as markdown and format it in the console.
+  - `markdown`: Parse the text response as Markdown and format it in the console.
   - `raw`: Doesn't parse the response text. Just displays the raw text response.
   
-  By default the formatter is set to `markdown` if in terminal/tty, otherwise `raw`.
+  By default, the formatter is set to `markdown` if in terminal/tty, otherwise `raw`.
 
 - `-r|--run <METHOD>`: Run code block if the language is supported. Possible METHODs:
   - `no`: Doesn't run anything.

From 67b0b1b537095a1a0b5cde4ec52bda9196b0762d Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sat, 4 May 2024 21:36:15 +0200
Subject: [PATCH 054/112] WIP cli args docs

---
 docs/ARGS.md | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 docs/ARGS.md

diff --git a/docs/ARGS.md b/docs/ARGS.md
new file mode 100644
index 0000000..faeab7b
--- /dev/null
+++ b/docs/ARGS.md
@@ -0,0 +1,74 @@
+# CLI Arguments Documentation
+
+## General Structure
+- **Program:** Communicate with large language models and AI APIs.
+
+## Arguments
+- **`--config-path`**: Path to the configuration file.
+  - **Default:** `{config_dir}/config.yml` (path depends on the system-defined location).
+- **`--creds-path`**: Path to the credentials file.
+  - **Default:** `{cache_dir}/creds.yml` (path depends on the system-defined location).
+- **`-v, --verbose`**: Verbose mode with different levels of logging.
+  - **Count:** 
+    - 0 : errors only.
+    - 1 : warnings.
+    - 2 : info.
+    - 3 : debug.
+
+#### Subcommands
+- **`engine`**: Specifies the engine used, can be followed by a custom command name from the configuration file (e.g., `openai:command`).
+
+### Usage Examples
+```bash
+# Use with configuration and log levels
+$ ./program_name --config-path path/to/config.yml --creds-path path/to/creds.yml -vvv
+
+# Specify a subcommand with the engine
+$ ./program_name engine openai:command
+```
+
+### Notes
+- The paths for the configuration and credentials files are computed based on the user's environment and can be customized.
+- Verbose mode allows controlling the level of log details displayed during the program's execution.
+
+---
+
+### Subcommands and Arguments Documentation
+
+#### 1. **OpenAIAPI**
+   Used to interact with the OpenAI API.
+
+   - **Arguments**:
+     - **`--model`** (or `-m`): Specifies the model to use.
+     - **`--prompt`** (or `-p`): Optional. Provides an initial prompt for the model.
+
+#### 2. **FromFile**
+   This subcommand does not have specific arguments. It likely serves to load data from a file.
+
+#### 3. **Local**
+   Used to operate locally with a specific model.
+
+   - **Arguments**:
+     - **`--model`** (or `-m`): Specifies the local model to use.
+     - **`--prompt`** (or `-p`): Optional. Provides an initial prompt for the local model.
+
+### Usage Examples
+
+For the **OpenAIAPI** subcommand:
+```bash
+$ ./program_name engine OpenAIAPI --model davinci --prompt "Hello"
+```
+
+For the **FromFile** subcommand:
+```bash
+$ ./program_name engine FromFile
+```
+
+For the **Local** subcommand:
+```bash
+$ ./program_name engine Local --model curie
+```
+
+### Notes
+- Each subcommand can be used for specific use cases, ensuring flexibility based on user needs.
+- Optional arguments like `--prompt` allow for advanced customization of requests.
\ No newline at end of file

From d0ade876031fa1d90c08f6bd02fb4f7415a9ef98 Mon Sep 17 00:00:00 2001
From: Gly <gabin.lefranc@gmail.com>
Date: Sun, 5 May 2024 14:13:53 +0200
Subject: [PATCH 055/112] Rename OpenAIAPI to Api

---
 src/arguments.rs             | 4 ++--
 src/generators/openai/mod.rs | 2 +-
 src/main.rs                  | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/arguments.rs b/src/arguments.rs
index 0ed17f6..7e6edcd 100644
--- a/src/arguments.rs
+++ b/src/arguments.rs
@@ -40,13 +40,13 @@ pub struct Args {
 }
 #[derive(Subcommand, Debug, Clone)]
 pub enum Subcommands {
-    OpenAIAPI(OpenAIAPIArgs),
+    Api(ApiArgs),
     FromFile(FromFileArgs),
     Local(LocalArgs),
 }
 
 #[derive(ClapArgs, Debug, Clone)]
-pub struct OpenAIAPIArgs {
+pub struct ApiArgs {
     #[arg(long, short)]
     pub model: String,
     #[arg(long, short)]
diff --git a/src/generators/openai/mod.rs b/src/generators/openai/mod.rs
index b855df9..97b93c7 100644
--- a/src/generators/openai/mod.rs
+++ b/src/generators/openai/mod.rs
@@ -212,7 +212,7 @@ impl ChatResponse {
     }
 }
 
-pub async fn run(creds: credentials::Credentials, config: crate::config::Config, args: args::OpenAIAPIArgs, input: &str) -> ResultRun {
+pub async fn run(creds: credentials::Credentials, config: crate::config::Config, args: args::ApiArgs, input: &str) -> ResultRun {
     let openai_api_key = creds.api_key;
 
     if openai_api_key.is_empty() {
diff --git a/src/main.rs b/src/main.rs
index 4fad378..916acec 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -83,7 +83,7 @@ async fn main() -> Result<(), String> {
     let mut runner = runner::Runner::new(app_args.run);
 
     let mut stream = match app_args.engine {
-        args::Subcommands::OpenAIAPI(args_engine) => generators::openai::run(
+        args::Subcommands::Api(args_engine) => generators::openai::run(
             get_creds(&app_args.creds_path)?.openai,
             config,
             args_engine,

From 1b86f744a63324d15e578cdbc4599eb7b6cbed84 Mon Sep 17 00:00:00 2001
From: Gly <gabin.lefranc@gmail.com>
Date: Sun, 5 May 2024 14:30:25 +0200
Subject: [PATCH 056/112] Add docs to arguments

---
 src/arguments.rs | 39 +++++++++++++++++++++++++++++++++------
 1 file changed, 33 insertions(+), 6 deletions(-)

diff --git a/src/arguments.rs b/src/arguments.rs
index 7e6edcd..25f3735 100644
--- a/src/arguments.rs
+++ b/src/arguments.rs
@@ -8,15 +8,18 @@ pub struct Args {
     #[arg(long, global = true, default_value_t = format!("{1}{0}config.yml", std::path::MAIN_SEPARATOR, crate::filesystem::config_dir()))]
     pub config_path: String,
     /// Credentials file
+    /// 
+    /// Used to store API keys
     #[arg(long, global = true, default_value_t = format!("{1}{0}creds.yml", std::path::MAIN_SEPARATOR, crate::filesystem::cache_dir()))]
     pub creds_path: String,
     /// Verbose mode
     /// 
     /// Count: 
-    /// 0: errors
-    /// 1: warnings
-    /// 2: info
-    /// 3: debug
+    /// 0: errors,
+    /// 1: warnings,
+    /// 2: info,
+    /// 3: debug,
+    /// 4: trace
     #[arg(short, long, action = clap::ArgAction::Count)]
     pub verbose: u8,
     /// Engine name
@@ -25,7 +28,7 @@ pub struct Args {
     /// (ex: openai:command)
     #[command(subcommand)]
     pub engine: Subcommands,
-    /// Formatter
+    /// Format the completion in the terminal
     /// 
     /// Possible values: markdown, raw
     #[arg(long, short, global = true, value_enum, default_value_t = Default::default())]
@@ -33,31 +36,55 @@ pub struct Args {
     /// Run code block if the language is supported
     #[arg(long, short, global = true, value_enum, default_value_t = Default::default())]
     pub run: RunChoice,
-    /// Force to run code 
     /// User text prompt
+    /// 
+    /// If the text is empty, it will be read from stdin
     #[arg(global = true, default_value_t = Default::default())]
     pub input: String,
 }
+
+/// aio subcommands
 #[derive(Subcommand, Debug, Clone)]
 pub enum Subcommands {
+    /// OpenAI API
     Api(ApiArgs),
+    /// Run local model
     FromFile(FromFileArgs),
+    /// Display the content of a file
     Local(LocalArgs),
 }
 
+/// OpenAI API arguments
 #[derive(ClapArgs, Debug, Clone)]
 pub struct ApiArgs {
+    /// Model name
+    /// 
+    /// The name of the model from /models API endpoint
     #[arg(long, short)]
     pub model: String,
+    /// Prompt name
+    /// 
+    /// The name of the prompt defined in the configuration file
     #[arg(long, short)]
     pub prompt: Option<String>,
 }
+/// FromFile arguments (not used)
 #[derive(ClapArgs, Debug, Clone)]
 pub struct FromFileArgs;
+
+/// Local model arguments
 #[derive(ClapArgs, Debug, Clone)]
 pub struct LocalArgs {
+    /// Model name
+    /// 
+    /// The name of the model defined in the configuration file
     #[arg(long, short)]
     pub model: String,
+    /// Prompt name
+    /// 
+    /// The name of the prompt defined in the configuration file.
+    /// If not provided, it will select the "default" prompt in the configuration file
+    /// or the first prompt in the configuration file if the "default" prompt is not defined
     #[arg(long, short)]
     pub prompt: Option<String>,
 }

From aa2a87b25aae512420f24bfea0a0182261e2d9b8 Mon Sep 17 00:00:00 2001
From: Gly <gabin.lefranc@gmail.com>
Date: Tue, 7 May 2024 14:33:05 +0200
Subject: [PATCH 057/112] update args docs

---
 docs/ARGS.md | 124 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 74 insertions(+), 50 deletions(-)

diff --git a/docs/ARGS.md b/docs/ARGS.md
index faeab7b..20bbd8b 100644
--- a/docs/ARGS.md
+++ b/docs/ARGS.md
@@ -1,74 +1,98 @@
 # CLI Arguments Documentation
 
-## General Structure
-- **Program:** Communicate with large language models and AI APIs.
-
-## Arguments
-- **`--config-path`**: Path to the configuration file.
-  - **Default:** `{config_dir}/config.yml` (path depends on the system-defined location).
-- **`--creds-path`**: Path to the credentials file.
-  - **Default:** `{cache_dir}/creds.yml` (path depends on the system-defined location).
-- **`-v, --verbose`**: Verbose mode with different levels of logging.
-  - **Count:** 
-    - 0 : errors only.
-    - 1 : warnings.
-    - 2 : info.
-    - 3 : debug.
-
-#### Subcommands
-- **`engine`**: Specifies the engine used, can be followed by a custom command name from the configuration file (e.g., `openai:command`).
-
-### Usage Examples
+## Usage
+
+`aio <COMMAND> [OPTIONS] [INPUT] `
+
+## Common arguments
+- **`--config-path <PATH>`**: Path to the configuration file.
+  
+  **Default:** `~/.config/config.yml`
+
+- **`--creds-path <PATH>`**: Path to the credentials file.
+  
+  **Default:** `~/.cache/creds.yml`
+
+- **`-v, --verbose...`**: Verbose mode with different levels of logging.
+  
+  **Count:** 
+    - 0: errors only.
+    - 1: warnings.
+    - 2: info.
+    - 3: debug.
+    - 4: trace
+  
+  **Default:** 0
+
+- **`-f, --formatter <CHOICE>`:** Format the completion in the terminal
+  
+  **Choice:**
+    - **`markdown`**: Markdown display
+    - **`raw`**: Raw display
+  
+  **Default:** markdown
+
+- **`-r, --run <CHOICE>`**: Run code block if the language is supported
+  
+  **Choice:**
+    - **`no`**: Doesn't run anything
+    - **`ask`**: Ask to run block of code
+    - **`force`**: Run code without asking
+  
+  **Default:** markdown
+
+## Subcommands and Arguments
+
+### 1. `aio api`
+
+Used to interact with the OpenAI API.
+
+- **Arguments**:
+  - **`--model`** (or `-m`): Specifies the model to use.
+  - **`--prompt`** (or `-p`): Optional. Provides an conversational prompt for the remote model. The model configuration is defined in the configuration file.
+
+#### Usage Examples
+
+For the **api** subcommand:
 ```bash
-# Use with configuration and log levels
-$ ./program_name --config-path path/to/config.yml --creds-path path/to/creds.yml -vvv
-
-# Specify a subcommand with the engine
-$ ./program_name engine openai:command
+$ ./program_name api --model gpt-3.5-turbo --prompt ask
 ```
 
-### Notes
-- The paths for the configuration and credentials files are computed based on the user's environment and can be customized.
-- Verbose mode allows controlling the level of log details displayed during the program's execution.
-
----
+### 2. `aio from-file`
 
-### Subcommands and Arguments Documentation
+Used to display a file's content like the AI completion does. It supports `--formatter` and `--run` arguments.
 
-#### 1. **OpenAIAPI**
-   Used to interact with the OpenAI API.
+### 3. `aio local`
 
-   - **Arguments**:
-     - **`--model`** (or `-m`): Specifies the model to use.
-     - **`--prompt`** (or `-p`): Optional. Provides an initial prompt for the model.
+Used to operate locally with a specific model.
 
-#### 2. **FromFile**
-   This subcommand does not have specific arguments. It likely serves to load data from a file.
-
-#### 3. **Local**
-   Used to operate locally with a specific model.
-
-   - **Arguments**:
-     - **`--model`** (or `-m`): Specifies the local model to use.
-     - **`--prompt`** (or `-p`): Optional. Provides an initial prompt for the local model.
+**Arguments**:
+  - **`--model`** (or `-m`): Specifies the local model to use. The model configuration is defined in the configuration file.
+  - **`--prompt`** (or `-p`): Optional. Provides an conversational prompt for the local model. The prompt configuration is defined in the configuration file.
 
 ### Usage Examples
 
-For the **OpenAIAPI** subcommand:
+For the **api** subcommand:
 ```bash
-$ ./program_name engine OpenAIAPI --model davinci --prompt "Hello"
+$ ./program_name api --model gpt-3.5-turbo --prompt ask
 ```
 
-For the **FromFile** subcommand:
+For the **from-file** subcommand:
 ```bash
-$ ./program_name engine FromFile
+$ ./program_name api FromFile
 ```
 
-For the **Local** subcommand:
+For the **local** subcommand:
 ```bash
 $ ./program_name engine Local --model curie
 ```
 
 ### Notes
 - Each subcommand can be used for specific use cases, ensuring flexibility based on user needs.
-- Optional arguments like `--prompt` allow for advanced customization of requests.
\ No newline at end of file
+- Optional arguments like `--prompt` allow for advanced customization of requests.
+
+### Usage Examples
+```bash
+# Use with configuration and debug log levels
+$ ./aio --config-path path/to/config.yml --creds-path path/to/creds.yml -vvv ...
+```
\ No newline at end of file

From d0aaa35087eb7cf52492725dcb10690802ef6181 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Thu, 9 May 2024 01:42:30 +0200
Subject: [PATCH 058/112] fix PromptParameters serde

---
 src/generators/llama/config.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/generators/llama/config.rs b/src/generators/llama/config.rs
index 16be8c8..cd108f6 100644
--- a/src/generators/llama/config.rs
+++ b/src/generators/llama/config.rs
@@ -25,6 +25,7 @@ pub struct Prompt {
 }
 
 #[derive(Debug, Deserialize, Serialize)]
+#[serde(default)]
 pub struct PromptParameters {
     pub n_prev_tokens: i32,
     pub top_k: i32,

From eed7b9ef8ed5dc41eb587f0d85b79c276ec399a0 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Thu, 9 May 2024 01:42:49 +0200
Subject: [PATCH 059/112] from-file => from-content

---
 src/arguments.rs            |  8 ++++++--
 src/generators/from_file.rs | 40 +++++++++++++++++++++++++++++--------
 src/main.rs                 |  2 +-
 3 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/src/arguments.rs b/src/arguments.rs
index 25f3735..1403e28 100644
--- a/src/arguments.rs
+++ b/src/arguments.rs
@@ -49,7 +49,7 @@ pub enum Subcommands {
     /// OpenAI API
     Api(ApiArgs),
     /// Run local model
-    FromFile(FromFileArgs),
+    FromContent(FromContentArgs),
     /// Display the content of a file
     Local(LocalArgs),
 }
@@ -70,7 +70,11 @@ pub struct ApiArgs {
 }
 /// FromFile arguments (not used)
 #[derive(ClapArgs, Debug, Clone)]
-pub struct FromFileArgs;
+pub struct FromContentArgs {
+    /// Interpret input as file path instead of text
+    #[arg(long, short = 'p')]
+    pub file: bool
+}
 
 /// Local model arguments
 #[derive(ClapArgs, Debug, Clone)]
diff --git a/src/generators/from_file.rs b/src/generators/from_file.rs
index c988d5e..281cb72 100644
--- a/src/generators/from_file.rs
+++ b/src/generators/from_file.rs
@@ -1,13 +1,37 @@
+use std::io::Cursor;
+
 use crate::args;
 use super::{ResultRun, ResultStream, Error};
+use tokio_util::io::ReaderStream;
 
-pub async fn run(_: crate::config::Config, _args: args::FromFileArgs, input: &str) -> ResultRun {
-    use tokio_stream::StreamExt;
-    let file = tokio::fs::File::open(&input).await.map_err(|e| Error::Custom(std::borrow::Cow::Owned(e.to_string())))?;
+#[inline]
+fn err_into<E: std::error::Error>(e: E) -> Error {
+    Error::Custom(std::borrow::Cow::Owned(e.to_string()))
+}
+#[inline]
+fn res_into<T,E: std::error::Error>(r: Result<T, E>) -> Result<T, Error> {
+    r.map_err(err_into)
+}
 
-    let stream = tokio_util::io::ReaderStream::new(file).map(|r| -> ResultStream {
-        let bytes = r.map_err(|e| Error::Custom(std::borrow::Cow::Owned(e.to_string())))?;
-        String::from_utf8(bytes.as_ref().to_vec()).map_err(|e| Error::Custom(std::borrow::Cow::Owned(e.to_string())))
-    });
-    Ok(Box::pin(stream))
+pub async fn run(_: crate::config::Config, args: args::FromContentArgs, input: &str) -> ResultRun {
+    use tokio_stream::StreamExt;
+    if args.file {
+        let file = tokio::fs::File::open(&input).await.map_err(err_into)?;
+        let stream = ReaderStream::new(file).map(|r| -> ResultStream {
+            let bytes = res_into(r)?;
+            String::from_utf8(bytes.as_ref().to_vec()).map_err(err_into)
+        });
+        return Ok(Box::pin(stream));
+    } else {
+        let stream = ReaderStream::new(Cursor::new(String::from(input).into_bytes()))
+            .map(res_into)
+            .map(|r| 
+                r.and_then(|v| 
+                    res_into(std::str::from_utf8(v.as_ref())).map(String::from)
+                )
+                
+            );
+        return Ok(Box::pin(stream));
+        // todo!("Implement reading from stdin")
+    }
 }
\ No newline at end of file
diff --git a/src/main.rs b/src/main.rs
index 916acec..f8965f6 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -96,7 +96,7 @@ async fn main() -> Result<(), String> {
                 .await
                 .map_err(|e| format!("Unable to run local model: {}", e))?
         }
-        args::Subcommands::FromFile(args_engine) => {
+        args::Subcommands::FromContent(args_engine) => {
             generators::from_file::run(config, args_engine, &app_args.input)
                 .await
                 .map_err(|e| format!("Failed to read from file: {}", e))?

From 0146b27569166a0f2929a1d4f5807d679b69f834 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Thu, 9 May 2024 03:12:18 +0200
Subject: [PATCH 060/112] Update args docs

---
 docs/ARGS.md | 85 +++++++++++++++++++++++++++++++---------------------
 1 file changed, 51 insertions(+), 34 deletions(-)

diff --git a/docs/ARGS.md b/docs/ARGS.md
index 20bbd8b..15b4dfc 100644
--- a/docs/ARGS.md
+++ b/docs/ARGS.md
@@ -16,7 +16,7 @@
 - **`-v, --verbose...`**: Verbose mode with different levels of logging.
   
   **Count:** 
-    - 0: errors only.
+    - 0 (default): errors only.
     - 1: warnings.
     - 2: info.
     - 3: debug.
@@ -27,72 +27,89 @@
 - **`-f, --formatter <CHOICE>`:** Format the completion in the terminal
   
   **Choice:**
-    - **`markdown`**: Markdown display
-    - **`raw`**: Raw display
+    - **`markdown`:** Markdown display
+    - **`raw`:** Raw display
   
   **Default:** markdown
 
 - **`-r, --run <CHOICE>`**: Run code block if the language is supported
   
   **Choice:**
-    - **`no`**: Doesn't run anything
-    - **`ask`**: Ask to run block of code
-    - **`force`**: Run code without asking
+    - **`no`:** Doesn't run anything
+    - **`ask`:** Ask to run block of code
+    - **`force`:** Run code without asking
   
   **Default:** markdown
 
-## Subcommands and Arguments
+### Global Usage Examples
+
+Set a custom path for configuration and creadentiale path
+```bash
+$ ./aio --config-path path/to/config.yml --creds-path path/to/creds.yml ...
+```
+
+Set log level to debug
+```bash
+$ ./aio -vvv ...
+```
+
+## Commands
 
 ### 1. `aio api`
 
-Used to interact with the OpenAI API.
+Generate text using the OpenAI API.
 
-- **Arguments**:
-  - **`--model`** (or `-m`): Specifies the model to use.
-  - **`--prompt`** (or `-p`): Optional. Provides an conversational prompt for the remote model. The model configuration is defined in the configuration file.
+**Arguments**:
+- **`--model`** (or `-m`): Specifies the model to use.
+- **`--prompt`** (or `-p`): Optional. Provides a conversational prompt for the remote model. The model configuration is defined in the configuration file.
 
 #### Usage Examples
 
-For the **api** subcommand:
+Generate text using GPT 3.5 Turbo model from OpenAI, with prompt "command" set in the configuration file:
 ```bash
-$ ./program_name api --model gpt-3.5-turbo --prompt ask
+$ ./program_name api --model gpt-3.5-turbo --prompt command "How to uncompress a tar.gz file ?"
 ```
 
-### 2. `aio from-file`
-
-Used to display a file's content like the AI completion does. It supports `--formatter` and `--run` arguments.
+Generate text with no formatting
+```bash
+$ ./program_name api --model gpt-3.5-turbo --prompt ask --formatter raw "What's the distance between the earth and the moon ?"
+```
 
-### 3. `aio local`
+### 2. `aio from-content`
 
-Used to operate locally with a specific model.
+Displays the input like the AI completion does. It supports `--formatter` and `--run` arguments. If `--file` flag is filled, the input is a file path and will be read as the content.
 
 **Arguments**:
-  - **`--model`** (or `-m`): Specifies the local model to use. The model configuration is defined in the configuration file.
-  - **`--prompt`** (or `-p`): Optional. Provides an conversational prompt for the local model. The prompt configuration is defined in the configuration file.
+- **`--file`** (or `-p`): Interpret the input as a file path instead of content
 
-### Usage Examples
+#### Usage Examples
 
-For the **api** subcommand:
+Displays the markdown "# Hello\nWorld" in the console
 ```bash
-$ ./program_name api --model gpt-3.5-turbo --prompt ask
+$ ./program_name from-content "# Hello\nWorld"
 ```
 
-For the **from-file** subcommand:
+Displays the content of the README file in the console
 ```bash
-$ ./program_name api FromFile
+$ ./program_name from-content --file "./README.md"
 ```
 
-For the **local** subcommand:
+Displays the content of stdin
 ```bash
-$ ./program_name engine Local --model curie
+$ cat ./README.md | ./program_name from-content
 ```
 
-### Notes
-- Each subcommand can be used for specific use cases, ensuring flexibility based on user needs.
-- Optional arguments like `--prompt` allow for advanced customization of requests.
+### 3. `aio local`
+
+Generate text using local models.
 
-### Usage Examples
+**Arguments**:
+- **`--model`** (or `-m`): Specifies the local model to use. The model configuration is defined in the configuration file.
+- **`--prompt`** (or `-p`): Optional. Provides an conversational prompt for the local model. The prompt configuration is defined in the configuration file.
+
+#### Usage Examples
+
+Generate text using "llama3" model, with prompt "command", both set in the configuration file:
 ```bash
-# Use with configuration and debug log levels
-$ ./aio --config-path path/to/config.yml --creds-path path/to/creds.yml -vvv ...
-```
\ No newline at end of file
+$ ./program_name local --model llama3 --prompt command "How to uncompress a tar.gz file ?"
+```

From dc60b366d5f2466e514cd5f91b7815529834050c Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Thu, 9 May 2024 14:08:17 +0200
Subject: [PATCH 061/112] move config in a folder

---
 src/{config.rs => config/mod.rs} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/{config.rs => config/mod.rs} (100%)

diff --git a/src/config.rs b/src/config/mod.rs
similarity index 100%
rename from src/config.rs
rename to src/config/mod.rs

From c6d96214bb1a620b891ac1c93918682a8cc2af53 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Thu, 9 May 2024 14:48:24 +0200
Subject: [PATCH 062/112] Prompts are now common config

---
 src/arguments.rs                 |   2 +-
 src/config/mod.rs                |  12 +--
 src/config/prompt.rs             | 152 +++++++++++++++++++++++++++++++
 src/generators/llama/config.rs   |   3 +-
 src/generators/llama/mod.rs      |  14 +--
 src/generators/llama/template.rs |   2 +-
 src/generators/openai/config.rs  | 119 +-----------------------
 src/generators/openai/mod.rs     |  66 ++++----------
 8 files changed, 188 insertions(+), 182 deletions(-)
 create mode 100644 src/config/prompt.rs

diff --git a/src/arguments.rs b/src/arguments.rs
index 1403e28..052875a 100644
--- a/src/arguments.rs
+++ b/src/arguments.rs
@@ -60,7 +60,7 @@ pub struct ApiArgs {
     /// Model name
     /// 
     /// The name of the model from /models API endpoint
-    #[arg(long, short)]
+    #[arg(long, short, default_value = "gpt-3.5-turbo")]
     pub model: String,
     /// Prompt name
     /// 
diff --git a/src/config/mod.rs b/src/config/mod.rs
index 6c0cba6..bf569e3 100644
--- a/src/config/mod.rs
+++ b/src/config/mod.rs
@@ -1,19 +1,19 @@
+pub mod prompt;
+
+
 use std::{borrow::Cow, collections::HashMap};
 use once_cell::sync::Lazy;
 use regex::Regex;
 use serde::{Deserialize, Serialize};
 
-use crate::{
-    arguments as args, 
-    serde_io::DeserializeExt,
-    generators::openai::config::Config as OpenAIConfig,
-};
+use prompt::Prompts as PromptsConfig;
+use crate::serde_io::DeserializeExt;
 #[cfg(feature = "local-llm")]
 use crate::generators::llama::config::Config as LlamaConfig;
 
 #[derive(Default, Debug, Deserialize, Serialize)]
 pub struct Config {
-    pub openai: OpenAIConfig,
+    pub prompts: PromptsConfig,
     #[cfg(feature = "local-llm")]
     pub local: LlamaConfig,
 }
diff --git a/src/config/prompt.rs b/src/config/prompt.rs
new file mode 100644
index 0000000..67ef983
--- /dev/null
+++ b/src/config/prompt.rs
@@ -0,0 +1,152 @@
+use std::collections::HashMap;
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Deserialize, Serialize)]
+pub struct Prompts(pub Vec<Prompt>);
+
+impl Default for Prompts {
+    fn default() -> Self {
+        Prompts(vec![
+                Prompt {
+                    name: "command".to_string(),
+                    messages: vec![
+                        Message {
+                            role: Role::System,
+                            content: "In markdown, write the command that best fits my request in a \"Nu\" block in \"## Command\" then describe each parameter in \"## Explanation\".".to_string(),
+                        },
+                        Message {
+                            role: Role::User,
+                            content: "$input".to_string(),
+                        },
+                    ],
+                    parameters: Parameters {
+                        max_tokens: Some(200),
+                        temperature: Some(0.0),
+                        top_p: Some(1.0),
+                        presence_penalty: Some(0.0),
+                        frequency_penalty: Some(0.2),
+                        best_of: None,
+                        n: None,
+                        stop: None,
+                    },
+                },
+                Prompt {
+                    name: "ask".to_string(),
+                    messages: vec![
+                        Message {
+                            role: Role::System,
+                            content: "You are ChatGPT, a powerful conversational chatbot. Answer to me in informative way unless I tell you otherwise. Format the text in markdown.".to_string(),
+                        },
+                        Message {
+                            role: Role::User,
+                            content: "$input".to_string(),
+                        },
+                    ],
+                    parameters: Parameters {
+                        max_tokens: Some(300),
+                        temperature: Some(0.7),
+                        top_p: Some(1.0),
+                        presence_penalty: Some(0.0),
+                        frequency_penalty: Some(0.0),
+                        best_of: None,
+                        n: None,
+                        stop: None,
+                    },
+                },
+            ])
+    }
+}
+
+#[derive(Debug, Default, Deserialize, Serialize)]
+pub struct Prompt {
+    pub name: String,
+    pub messages: Vec<Message>,
+    pub parameters: Parameters,
+}
+
+impl Prompt {
+    pub fn from_input(input: &str) -> Self {
+        Self {
+            name: "noname".to_string(),
+            messages: vec![Message {
+                role: Role::User,
+                content: input.into(),
+            }],
+            ..Default::default()
+        }
+    }
+    pub fn format_contents(mut self, args: &HashMap<String, String>) -> Self {
+        self.messages.iter_mut().map(|m| m.format_content_as_ref(args)).for_each(|_| ());
+        self
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum Role {
+    User,
+    Assistant,
+    System
+}
+
+impl std::fmt::Display for Role {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Role::User => write!(f, "User"),
+            Role::Assistant => write!(f, "Assistant"),
+            Role::System => write!(f, "System"),
+        }
+    }
+}
+impl Role {
+    pub fn lowercase(&self) -> &str {
+        match self {
+            Role::User => "user",
+            Role::Assistant => "assistant",
+            Role::System => "system",
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Message {
+    pub role: Role,
+    pub content: String,
+}
+
+#[allow(dead_code)]
+impl Message {
+    pub fn format_content(mut self, args: &HashMap<String, String>) -> Self {
+        self.content = crate::config::format_content(&self.content, args).to_string();
+        self
+    }
+    pub fn format_content_as_ref(&mut self, args: &HashMap<String, String>) -> &mut Self {
+        self.content = crate::config::format_content(&self.content, args).to_string();
+        self
+    }
+}
+
+#[derive(Debug, Default, Deserialize, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct Parameters {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_tokens: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub temperature: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub top_p: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub presence_penalty: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub frequency_penalty: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub best_of: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub n: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub stop: Option<String>,
+}
+
+
+
diff --git a/src/generators/llama/config.rs b/src/generators/llama/config.rs
index cd108f6..f9fa8cf 100644
--- a/src/generators/llama/config.rs
+++ b/src/generators/llama/config.rs
@@ -1,12 +1,11 @@
 use serde::{Deserialize, Serialize};
 use llama_cpp::standard_sampler::StandardSampler;
-use crate::generators::openai::Message;
+use crate::config::prompt::Message;
 use super::template::PromptTemplate;
 
 #[derive(Default, Debug, Deserialize, Serialize)]
 pub struct Config {
     pub models: Vec<Model>,
-    pub prompts: Vec<Prompt>,
 }
 
 #[derive(Default, Debug, Deserialize, Serialize)]
diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index 527c945..382616b 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -10,7 +10,7 @@ use llama_cpp::{
 use once_cell::sync::OnceCell;
 use log::{debug, info};
 use crate::{
-    args, config::{format_content, Config as AIOConfig}, utils::hashmap
+    args, config::{format_content, Config}, utils::hashmap
 };
 use super::{Error, ResultRun};
 
@@ -33,21 +33,21 @@ fn init_model(config: &config::Model) -> Result<(), Error> {
 }
 
 pub async fn run(
-    config: AIOConfig, 
+    config: Config, 
     args: args::LocalArgs,
     input: &str
 ) -> ResultRun {
     let prompt = match args.prompt {
-        Some(prompt) => config.local.prompts
+        Some(prompt) => config.prompts.0
             .iter()
             .find(|v| v.name == prompt),
-        None => config.local.prompts
+        None => config.prompts.0
             .iter()
             .find(|v| v.name == "default")
-            .or_else(|| config.local.prompts.first())
+            .or_else(|| config.prompts.0.first())
     }
     .ok_or_else(|| Error::Custom("Prompt not found in config".into()))?;
-    let messages = prompt.content.iter()
+    let messages = prompt.messages.iter()
         .cloned()
         .map(|mut m| {
             m.content = format_content(&m.content, &hashmap!(input => input)).to_string(); 
@@ -82,7 +82,7 @@ pub async fn run(
         .map_err(|_| Error::Custom("Failed to advance context".into()))?;
 
     let completion = session
-        .start_completing_with(StandardSampler::default(), prompt.parameters.max_tokens as _);
+        .start_completing_with(StandardSampler::default(), prompt.parameters.max_tokens.unwrap_or(1024) as _);
     if log::log_enabled!(log::Level::Trace) {
         let completion_stream = StreamExt::map(completion,  |token| Ok(format!("{}({})", model.token_to_piece(token), token.0)));
         Ok(Box::pin(completion_stream))
diff --git a/src/generators/llama/template.rs b/src/generators/llama/template.rs
index 874f8e1..b2e0ff5 100644
--- a/src/generators/llama/template.rs
+++ b/src/generators/llama/template.rs
@@ -1,6 +1,6 @@
 use serde::{Deserialize, Serialize};
 use crate::{
-    generators::openai::{Message, Role},
+    config::prompt::{Message, Role},
     utils::vec_merge
 };
 use llama_cpp::{LlamaTokenizationError, Token};
diff --git a/src/generators/openai/config.rs b/src/generators/openai/config.rs
index bb6fddc..99909ce 100644
--- a/src/generators/openai/config.rs
+++ b/src/generators/openai/config.rs
@@ -3,123 +3,6 @@ use std::collections::HashMap;
 use serde::{Deserialize, Serialize};
 use super::ChatRequestParameters;
 use super::{Message, Role};
-#[derive(Debug, Deserialize, Serialize)]
-pub struct Config {
-    pub prompts: Vec<Prompt>,
-}
-#[derive(Debug, Default, Deserialize, Serialize)]
-pub struct Parameters {
-    #[serde(skip_serializing_if = "Option::is_none")]
-    max_tokens: Option<u32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    temperature: Option<f32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    top_p: Option<f32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    presence_penalty: Option<f32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    frequency_penalty: Option<f32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    best_of: Option<u32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    n: Option<u32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    stop: Option<String>,
-}
-impl From<Parameters> for ChatRequestParameters {
-    fn from(parameters: Parameters) -> Self {
-        Self {
-            max_tokens: parameters.max_tokens,
-            temperature: parameters.temperature,
-            top_p: parameters.top_p,
-            presence_penalty: parameters.presence_penalty,
-            frequency_penalty: parameters.frequency_penalty,
-            best_of: parameters.best_of,
-            n: parameters.n,
-            stop: parameters.stop,
-            ..Default::default()
-        }
-    }
-}
-#[derive(Debug, Default, Deserialize, Serialize)]
-pub struct Prompt {
-    pub name: String,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub model: Option<String>,
-    pub messages: Vec<Message>,
-    pub parameters: Parameters,
-}
 
-impl Prompt {
-    pub fn from_input(input: &str) -> Self {
-        Self {
-            name: "noname".to_string(),
-            messages: vec![Message {
-                role: super::Role::User,
-                content: input.into(),
-            }],
-            ..Default::default()
-        }
-    }
-    pub fn format_contents(mut self, args: &HashMap<String, String>) -> Self {
-        self.messages.iter_mut().map(|m| m.format_content_as_ref(args)).for_each(|_| ());
-        self
-    }
-}
 
-impl Default for Config {
-    fn default() -> Self {
-        Config {
-            prompts: vec![
-                Prompt {
-                    name: "command".to_string(),
-                    model: None,
-                    messages: vec![
-                        Message {
-                            role: Role::System,
-                            content: "In markdown, write the command that best fits my request in a \"Nu\" block in \"## Command\" then describe each parameter in \"## Explanation\".".to_string(),
-                        },
-                        Message {
-                            role: Role::User,
-                            content: "$input".to_string(),
-                        },
-                    ],
-                    parameters: Parameters {
-                        max_tokens: Some(200),
-                        temperature: Some(0.0),
-                        top_p: Some(1.0),
-                        presence_penalty: Some(0.0),
-                        frequency_penalty: Some(0.2),
-                        best_of: None,
-                        n: None,
-                        stop: None,
-                    },
-                },
-                Prompt {
-                    name: "ask".to_string(),
-                    model: None,
-                    messages: vec![
-                        Message {
-                            role: Role::System,
-                            content: "You are ChatGPT, a powerful conversational chatbot. Answer to me in informative way unless I tell you otherwise. Format the text in markdown.".to_string(),
-                        },
-                        Message {
-                            role: Role::User,
-                            content: "$input".to_string(),
-                        },
-                    ],
-                    parameters: Parameters {
-                        max_tokens: Some(300),
-                        temperature: Some(0.7),
-                        top_p: Some(1.0),
-                        presence_penalty: Some(0.0),
-                        frequency_penalty: Some(0.0),
-                        best_of: None,
-                        n: None,
-                        stop: None,
-                    },
-                },
-            ],
-        }
-    }
-}
\ No newline at end of file
+
diff --git a/src/generators/openai/mod.rs b/src/generators/openai/mod.rs
index 97b93c7..eb2f89f 100644
--- a/src/generators/openai/mod.rs
+++ b/src/generators/openai/mod.rs
@@ -11,54 +11,10 @@ use crate::{
         hashmap, FlattenTrait, SplitBytesFactory
     }
 };
-use self::config::Prompt;
+use crate::config::prompt::{Prompt, Parameters as PromptParameters, Message, Role};
 
 use super::{ResultRun, Error};
 
-#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
-#[serde(rename_all = "lowercase")]
-pub enum Role {
-    User,
-    Assistant,
-    System
-}
-
-impl std::fmt::Display for Role {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Role::User => write!(f, "User"),
-            Role::Assistant => write!(f, "Assistant"),
-            Role::System => write!(f, "System"),
-        }
-    }
-}
-impl Role {
-    pub fn lowercase(&self) -> &str {
-        match self {
-            Role::User => "user",
-            Role::Assistant => "assistant",
-            Role::System => "system",
-        }
-    }
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct Message {
-    pub role: Role,
-    pub content: String,
-}
-
-#[allow(dead_code)]
-impl Message {
-    pub fn format_content(mut self, args: &HashMap<String, String>) -> Self {
-        self.content = crate::config::format_content(&self.content, args).to_string();
-        self
-    }
-    pub fn format_content_as_ref(&mut self, args: &HashMap<String, String>) -> &mut Self {
-        self.content = crate::config::format_content(&self.content, args).to_string();
-        self
-    }
-}
 #[derive(Debug, Default, Serialize)]
 pub struct ChatRequestParameters {
     #[serde(skip_serializing_if = "Option::is_none")]
@@ -85,6 +41,22 @@ pub struct ChatRequestParameters {
     pub stop: Option<String>,
 }
 
+impl From<PromptParameters> for ChatRequestParameters {
+    fn from(parameters: PromptParameters) -> Self {
+        Self {
+            max_tokens: parameters.max_tokens,
+            temperature: parameters.temperature,
+            top_p: parameters.top_p,
+            presence_penalty: parameters.presence_penalty,
+            frequency_penalty: parameters.frequency_penalty,
+            best_of: parameters.best_of,
+            n: parameters.n,
+            stop: parameters.stop,
+            ..Default::default()
+        }
+    }
+}
+
 #[derive(Debug, Serialize)]
 pub struct ChatRequest {
     model: String,
@@ -220,7 +192,7 @@ pub async fn run(creds: credentials::Credentials, config: crate::config::Config,
     }
 
     let prompt = if let Some(config_prompt) = args.prompt {
-        config.openai.prompts.into_iter()
+        config.prompts.0.into_iter()
             .find(|prompt| prompt.name == config_prompt)
             .ok_or(Error::Custom("Prompt not found".into()))?
             .format_contents(&hashmap!(input => input))
@@ -229,7 +201,7 @@ pub async fn run(creds: credentials::Credentials, config: crate::config::Config,
     };
 
     // Send a request
-    let chat_request = ChatRequest::new(prompt.model.unwrap_or_else(|| "gpt-3.5-turbo".into()))
+    let chat_request = ChatRequest::new(args.model)
         .add_messages(prompt.messages)
         .set_parameters(prompt.parameters.into())
         .into_stream();

From 275941238acb54fe35ee71e487cbdec41a0b2a62 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Thu, 9 May 2024 14:49:38 +0200
Subject: [PATCH 063/112] Update args docs

---
 docs/ARGS.md | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/docs/ARGS.md b/docs/ARGS.md
index 15b4dfc..829294c 100644
--- a/docs/ARGS.md
+++ b/docs/ARGS.md
@@ -43,7 +43,7 @@
 
 ### Global Usage Examples
 
-Set a custom path for configuration and creadentiale path
+Set a custom path for configuration and credentials path
 ```bash
 $ ./aio --config-path path/to/config.yml --creds-path path/to/creds.yml ...
 ```
@@ -60,8 +60,11 @@ $ ./aio -vvv ...
 Generate text using the OpenAI API.
 
 **Arguments**:
-- **`--model`** (or `-m`): Specifies the model to use.
-- **`--prompt`** (or `-p`): Optional. Provides a conversational prompt for the remote model. The model configuration is defined in the configuration file.
+- **`--model`** (or `-m`): The model name to use. The name must exist in the service. 
+  Call `/models` GET API to get the list of all models provided by the service.
+- **`--prompt`** (or `-p`): Optional. Provides a conversational prompt for the remote model. 
+  The prompt configuration is defined in the configuration file. If this argument is not defined,
+  a prompt with a user message containing the input will be generated.
 
 #### Usage Examples
 
@@ -105,7 +108,9 @@ Generate text using local models.
 
 **Arguments**:
 - **`--model`** (or `-m`): Specifies the local model to use. The model configuration is defined in the configuration file.
-- **`--prompt`** (or `-p`): Optional. Provides an conversational prompt for the local model. The prompt configuration is defined in the configuration file.
+- **`--prompt`** (or `-p`): Optional. Provides a conversational prompt for the local model. 
+  The prompt configuration is defined in the configuration file. If this argument is not defined,
+  a prompt with a user message containing the input will be generated.
 
 #### Usage Examples
 

From 9713a38f5b1ad9c0298b7888bd200c3b2ea9f953 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Thu, 9 May 2024 15:51:36 +0200
Subject: [PATCH 064/112] Add more control to stop config parameter

---
 src/config/prompt.rs | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/config/prompt.rs b/src/config/prompt.rs
index 67ef983..c7b419e 100644
--- a/src/config/prompt.rs
+++ b/src/config/prompt.rs
@@ -28,7 +28,7 @@ impl Default for Prompts {
                         frequency_penalty: Some(0.2),
                         best_of: None,
                         n: None,
-                        stop: None,
+                        stop: Stop::None,
                     },
                 },
                 Prompt {
@@ -51,7 +51,7 @@ impl Default for Prompts {
                         frequency_penalty: Some(0.0),
                         best_of: None,
                         n: None,
-                        stop: None,
+                        stop: Stop::None,
                     },
                 },
             ])
@@ -144,9 +144,22 @@ pub struct Parameters {
     pub best_of: Option<u32>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub n: Option<u32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub stop: Option<String>,
+    #[serde(skip_serializing_if = "Stop::is_none")]
+    pub stop: Stop,
 }
 
+#[derive(Debug, Default, Deserialize, Serialize)]
+pub enum Stop {
+    #[default]
+    None,
+    #[serde(untagged)]
+    One(String),
+    #[serde(untagged)]
+    Many(Vec<String>),
+}
 
-
+impl Stop {
+    pub fn is_none(&self) -> bool {
+        matches!(self, Stop::None)
+    }
+}
\ No newline at end of file

From 402fc4b5471877f178e074cd5c73cc9a2582e296 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Thu, 9 May 2024 15:51:48 +0200
Subject: [PATCH 065/112] WIP update config

---
 docs/CONFIG.md | 79 ++++++++++++++++++++++++++++----------------------
 1 file changed, 44 insertions(+), 35 deletions(-)

diff --git a/docs/CONFIG.md b/docs/CONFIG.md
index c4e7afa..b0d977b 100644
--- a/docs/CONFIG.md
+++ b/docs/CONFIG.md
@@ -8,13 +8,13 @@ Welcome to the configuration file README for our project! This README provides a
   - [Table of Contents](#table-of-contents)
   - [Introduction](#introduction)
   - [Usage](#usage)
-    - [OpenAI](#openai)
-  - [Example](#example)
+    - [Prompts](#prompts)
+      - [Example](#example)
   - [Sample Prompts](#sample-prompts)
 
 ## Introduction
 
-The configuration file allows you to define prompts and their associated settings for interactions with the AI API (Currently only OpenAI GPT-3.5 Turbo model is available). Each engine have a sets of prompts, and each prompt has a set of parameters that can be adjusted to control the output generated by the model.
+The configuration file allows you to define prompts, local models and their respective. Prompts are common to 
 
 This document will guide you through setting up your prompts and using the configuration file effectively.
 
@@ -22,52 +22,61 @@ By default, `aio` will try to read the configuration file from `~/.config/aio/co
 
 ## Usage
 
-### OpenAI 
+### Prompts 
 
 To use the configuration file effectively, follow these steps:
 
-1. **Defining Prompts**: In the configuration file, you can define different prompts under the `openai.prompts` section.
+In the configuration file, you can define different prompts under the `prompts` section.
 
-   1. **Name**: The name of the prompt. The name will be used to identify the prompt you select in the `-e|--engine` argument.
+1. **Name the prompt**: This is the name to refer in the `--prompt` argument.
 
-   2. **Messages**: The whole prompt consists of several messages of three types:
-      - "system" messages provide context and instructions to the model, while 
-      - "user" messages define the user's input or query.
-      - "assisstant" messages are used to mocking the AI response.
+2. **Messages**: The whole prompt consists of several messages of three types:
+   - "system" messages provide context and instructions to the model, while 
+   - "user" messages define the user's input or query.
+   - "assistant" messages are used to mimic the AI response.
 
-       Use the variable `$input` to represent the input from the command line.
+    Use the variable `$input` to represent the input from the command line.
 
-   3. **Parameters**: Parameters such as temperature, top-p, penalties, and max tokens can be adjusted to control the output generated by the model. You can setup `max_tokens`, `temperature`, `top_p`, `presence_penalty`, `frequency_penalty`, `best_of`, `n`, `stop`. Refer to the Documentation of the [*chat create* OpenAI API](https://platform.openai.com/docs/api-reference/chat/create) for more information about the parameters.
+3. **Parameters**: Parameters can adjust AI generation. Here is the list of parameters : 
+   - [`max_tokens`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens): The maximum number of tokens that can be generated in the chat completion.
+   - [`temperature`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature): The temperature setting for the generated text. Higher temperatures result in more creative, but potentially incoherent, text.
+   - [`top_p`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p): The maximum probability that the ASSISTANT should generate. The ASSISTANT will return the most likely answer, but with a probability below this threshold. This allows the ASSISTANT to return even the most unlikely of all possible answers, if the model is very certain.
+   - [`presence_penalty`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-presence_penalty): The presence penalty modifies the likelihood of selected tokens based on their presence in the input.
+   - [`frequency_penalty`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-frequency_penalty): The frequency penalty modifies the likelihood of selected tokens based on their frequency in the input.
+   - [`best_of`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-best_of): The number of responses to generate.
+   - [`stop`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-stop): The text used to stop the ASSISTANT from generating more text.
 
-   4. **Models**: You can optionally select the model you want to use for the prompt. You have to choose a model compatible with OpenAI chat completion. 
-      [You can find the list of those model here](https://platform.openai.com/docs/models/model-endpoint-compatibility).
-      By default, the model used is `gpt-3.5-turbo`.
+    **OpenAI API specific parameters**
+    - [`n`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-n): The number of responses to generate.
 
-## Example
+    **Note**: each parameter above is optional.
 
-Here's a snippet of the configuration file structure:
+    Refer to the Documentation of the [*chat create* OpenAI API](https://platform.openai.com/docs/api-reference/chat/create) for more information about the parameters.
+
+#### Example
 
 ```yaml
-openai:
-  prompts:
-    - name: command
-      model: gpt-3.5-turbo # optional
-      messages: 
-        # System message for context
-        - role: system
-          content: In markdown, write the command...
-        # User message
-        - role: user
-          content: $input
-      parameters:
-        # Parameters to control model behavior. Each parameter is optional
-        temperature: 0
-        top-p: 1.0 
-        frequency-penalty: 0.2
-        presence-penalty: 0 
-        max-tokens: 200
+prompts:
+  - name: command
+    model: gpt-3.5-turbo # optional
+    messages: 
+      # System message for context
+      - role: system
+      content: In markdown, write the command...
+      # User message
+      - role: user
+      content: $input
+    parameters:
+      # Parameters to control model behavior. Each parameter is optional
+      temperature: 0
+      top-p: 1.0 
+      frequency-penalty: 0.2
+      presence-penalty: 0 
+      max-tokens: 200
 ```
 
+
+
 ## Sample Prompts
 
 Here are examples of prompt definitions within [the sample configuration file](../config.yml) you can find in the repository:

From 0e7aec2c245186b98a0438d3277bde6f17c31ed2 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Thu, 9 May 2024 23:13:47 +0200
Subject: [PATCH 066/112] Prompt message content are optional

---
 src/config/prompt.rs | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/config/prompt.rs b/src/config/prompt.rs
index c7b419e..6cd2512 100644
--- a/src/config/prompt.rs
+++ b/src/config/prompt.rs
@@ -13,11 +13,11 @@ impl Default for Prompts {
                     messages: vec![
                         Message {
                             role: Role::System,
-                            content: "In markdown, write the command that best fits my request in a \"Nu\" block in \"## Command\" then describe each parameter in \"## Explanation\".".to_string(),
+                            content: Some("In markdown, write the unix command that best fits my request in a block of code under a \"## Command\" then describe the program and each parameter in \"## Explanation\".".to_string()),
                         },
                         Message {
                             role: Role::User,
-                            content: "$input".to_string(),
+                            content: Some("$input".to_string()),
                         },
                     ],
                     parameters: Parameters {
@@ -36,11 +36,11 @@ impl Default for Prompts {
                     messages: vec![
                         Message {
                             role: Role::System,
-                            content: "You are ChatGPT, a powerful conversational chatbot. Answer to me in informative way unless I tell you otherwise. Format the text in markdown.".to_string(),
+                            content: Some("You are a powerful intelligent conversational chatbot. Unless I tell you otherwise, answer to me in an informative way. You should format the text in Markdown.".to_string()),
                         },
                         Message {
                             role: Role::User,
-                            content: "$input".to_string(),
+                            content: Some("$input".to_string()),
                         },
                     ],
                     parameters: Parameters {
@@ -71,7 +71,7 @@ impl Prompt {
             name: "noname".to_string(),
             messages: vec![Message {
                 role: Role::User,
-                content: input.into(),
+                content: Some(input.into()),
             }],
             ..Default::default()
         }
@@ -112,17 +112,17 @@ impl Role {
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Message {
     pub role: Role,
-    pub content: String,
+    pub content: Option<String>,
 }
 
 #[allow(dead_code)]
 impl Message {
     pub fn format_content(mut self, args: &HashMap<String, String>) -> Self {
-        self.content = crate::config::format_content(&self.content, args).to_string();
+        self.content = self.content.map(|c| crate::config::format_content(&c, args).to_string());
         self
     }
     pub fn format_content_as_ref(&mut self, args: &HashMap<String, String>) -> &mut Self {
-        self.content = crate::config::format_content(&self.content, args).to_string();
+        self.content = self.content.as_mut().map(|c| crate::config::format_content(&c, args).to_string());
         self
     }
 }

From daef5f5935bf7e38bee58fc04d555e4d8b7761d9 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Thu, 9 May 2024 23:14:40 +0200
Subject: [PATCH 067/112] add helper functions

---
 src/config/prompt.rs        | 19 ++++++++++++++++++-
 src/generators/llama/mod.rs | 10 ++--------
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/src/config/prompt.rs b/src/config/prompt.rs
index 6cd2512..80a3573 100644
--- a/src/config/prompt.rs
+++ b/src/config/prompt.rs
@@ -5,6 +5,16 @@ use serde::{Deserialize, Serialize};
 #[derive(Debug, Deserialize, Serialize)]
 pub struct Prompts(pub Vec<Prompt>);
 
+impl Prompts {
+    pub fn format_contents(mut self, args: &HashMap<String, String>) -> Self {
+        self.0.iter_mut().for_each(|v| { Prompt::format_contents_as_ref(v, args); });
+        self
+    }
+    pub fn format_contents_as_ref(&mut self, args: &HashMap<String, String>) -> &mut Self {
+        self.0.iter_mut().for_each(|v| { Prompt::format_contents_as_ref(v, args); });
+        self
+    }
+}
 impl Default for Prompts {
     fn default() -> Self {
         Prompts(vec![
@@ -77,9 +87,16 @@ impl Prompt {
         }
     }
     pub fn format_contents(mut self, args: &HashMap<String, String>) -> Self {
-        self.messages.iter_mut().map(|m| m.format_content_as_ref(args)).for_each(|_| ());
+        self.messages.iter_mut().for_each(|m|{ m.format_content_as_ref(args); });
+        self
+    }
+    pub fn format_contents_as_ref(&mut self, args: &HashMap<String, String>) -> &mut Self {
+        self.messages.iter_mut().for_each(|m| { m.format_content_as_ref(args); });
         self
     }
+    pub fn formatted_messages(&self, args: &HashMap<String, String>) -> Vec<Message> {
+        self.messages.iter().cloned().map(|v| Message::format_content(v, args)).collect()
+    }
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index 382616b..0b3b8a0 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -10,7 +10,7 @@ use llama_cpp::{
 use once_cell::sync::OnceCell;
 use log::{debug, info};
 use crate::{
-    args, config::{format_content, Config}, utils::hashmap
+    args, config::Config, utils::hashmap
 };
 use super::{Error, ResultRun};
 
@@ -47,13 +47,7 @@ pub async fn run(
             .or_else(|| config.prompts.0.first())
     }
     .ok_or_else(|| Error::Custom("Prompt not found in config".into()))?;
-    let messages = prompt.messages.iter()
-        .cloned()
-        .map(|mut m| {
-            m.content = format_content(&m.content, &hashmap!(input => input)).to_string(); 
-            m
-        })
-        .collect::<Vec<_>>();
+    let messages = prompt.formatted_messages(&hashmap!(input => input));
     let model_config = config.local.models.into_iter()
         .find(|c| c.name == args.model)
         .ok_or_else(|| Error::Custom("Model not found in config".into()))?;

From 2f98f02b7a34607723b8685e2ae92f01044d54c0 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Thu, 9 May 2024 23:15:20 +0200
Subject: [PATCH 068/112] New config design

---
 src/config/prompt.rs           | 45 ++++++++++++++-----
 src/generators/llama/config.rs | 82 +++++++++++++++++-----------------
 src/generators/openai/mod.rs   | 12 ++---
 3 files changed, 80 insertions(+), 59 deletions(-)

diff --git a/src/config/prompt.rs b/src/config/prompt.rs
index 80a3573..cf2b54e 100644
--- a/src/config/prompt.rs
+++ b/src/config/prompt.rs
@@ -1,5 +1,6 @@
 use std::collections::HashMap;
 
+use llama_cpp::standard_sampler::StandardSampler;
 use serde::{Deserialize, Serialize};
 
 #[derive(Debug, Deserialize, Serialize)]
@@ -29,6 +30,10 @@ impl Default for Prompts {
                             role: Role::User,
                             content: Some("$input".to_string()),
                         },
+                        Message {
+                            role: Role::Assistant,
+                            content: None,
+                        },
                     ],
                     parameters: Parameters {
                         max_tokens: Some(200),
@@ -36,9 +41,7 @@ impl Default for Prompts {
                         top_p: Some(1.0),
                         presence_penalty: Some(0.0),
                         frequency_penalty: Some(0.2),
-                        best_of: None,
-                        n: None,
-                        stop: Stop::None,
+                        ..Default::default()
                     },
                 },
                 Prompt {
@@ -52,6 +55,10 @@ impl Default for Prompts {
                             role: Role::User,
                             content: Some("$input".to_string()),
                         },
+                        Message {
+                            role: Role::Assistant,
+                            content: None,
+                        },
                     ],
                     parameters: Parameters {
                         max_tokens: Some(300),
@@ -59,9 +66,7 @@ impl Default for Prompts {
                         top_p: Some(1.0),
                         presence_penalty: Some(0.0),
                         frequency_penalty: Some(0.0),
-                        best_of: None,
-                        n: None,
-                        stop: Stop::None,
+                        ..Default::default()
                     },
                 },
             ])
@@ -157,12 +162,32 @@ pub struct Parameters {
     pub presence_penalty: Option<f32>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub frequency_penalty: Option<f32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub best_of: Option<u32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub n: Option<u32>,
     #[serde(skip_serializing_if = "Stop::is_none")]
     pub stop: Stop,
+    
+    //OpenAI only
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub n: Option<u32>,
+
+    //Local only
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub n_prev_tokens: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub negative_prompt: Option<String>,
+}
+
+impl From<Parameters> for StandardSampler {
+    fn from(parameters: Parameters) -> Self {
+        let def = StandardSampler::default();
+        StandardSampler {
+            temp: parameters.temperature.unwrap_or(def.temp),
+            top_p: parameters.top_p.unwrap_or(def.top_p),
+            penalty_repeat: parameters.presence_penalty.unwrap_or(def.penalty_repeat),
+            penalty_freq: parameters.frequency_penalty.unwrap_or(def.penalty_freq),
+            n_prev: parameters.n_prev_tokens.unwrap_or(def.n_prev as _) as _,
+            ..Default::default()
+        }
+    }
 }
 
 #[derive(Debug, Default, Deserialize, Serialize)]
diff --git a/src/generators/llama/config.rs b/src/generators/llama/config.rs
index f9fa8cf..e0afcbe 100644
--- a/src/generators/llama/config.rs
+++ b/src/generators/llama/config.rs
@@ -1,6 +1,4 @@
 use serde::{Deserialize, Serialize};
-use llama_cpp::standard_sampler::StandardSampler;
-use crate::config::prompt::Message;
 use super::template::PromptTemplate;
 
 #[derive(Default, Debug, Deserialize, Serialize)]
@@ -14,54 +12,58 @@ pub struct Model {
     pub path: String,
     #[serde(default)]
     pub template: PromptTemplate,
-}
-
-#[derive(Default, Debug, Deserialize, Serialize)]
-pub struct Prompt {
-    pub name: String,
-    pub content: Vec<Message>,
-    pub parameters: PromptParameters
+    pub parameters: ModelParameters,
 }
 
 #[derive(Debug, Deserialize, Serialize)]
 #[serde(default)]
-pub struct PromptParameters {
-    pub n_prev_tokens: i32,
-    pub top_k: i32,
-    pub top_p: f32,
-    pub temperature: f32,
-    pub repeat_penalty: f32,
-    pub repeat_last_n: i32,
-    pub max_tokens: i32,
-    pub negative_prompt: Option<String>,
+pub struct ModelParameters {
+    pub n_gpu_layers: u32,
+    pub split_mode: SplitMode,
+    pub main_gpu: u32,
+    pub vocab_only: bool,
+    pub use_mmap: bool,
+    pub use_mlock: bool,
 }
 
-impl From<PromptParameters> for StandardSampler {
-    fn from(parameters: PromptParameters) -> Self {
+impl Default for ModelParameters {
+    fn default() -> Self {
+        let def = llama_cpp::LlamaParams::default();
         Self {
-            n_prev: parameters.n_prev_tokens,
-            top_k: parameters.top_k,
-            top_p: parameters.top_p,
-            temp: parameters.temperature,
-            penalty_repeat: parameters.repeat_penalty,
-            penalty_last_n: parameters.repeat_last_n,
-            cfg_negative_prompt: parameters.negative_prompt.unwrap_or_default(),
-            ..Default::default()
+            n_gpu_layers: def.n_gpu_layers,
+            split_mode: def.split_mode.into(),
+            main_gpu: def.main_gpu,
+            vocab_only: def.vocab_only,
+            use_mmap: def.use_mmap,
+            use_mlock: def.use_mlock,
         }
     }
 }
-impl Default for PromptParameters {
-    fn default() -> Self {
-        let default_standard_sampler = StandardSampler::default();
-        Self {
-            max_tokens: 1000,
-            n_prev_tokens: default_standard_sampler.n_prev,
-            top_k: default_standard_sampler.top_k,
-            top_p: default_standard_sampler.top_p,
-            temperature: default_standard_sampler.temp,
-            repeat_penalty: default_standard_sampler.penalty_repeat,
-            repeat_last_n: default_standard_sampler.penalty_last_n,
-            negative_prompt: None,
+
+#[derive(Debug, Deserialize, Serialize)]
+#[serde(rename_all = "lowercase")]
+pub enum SplitMode {
+    None,
+    Layer,
+    Row,
+}
+impl From<SplitMode> for llama_cpp::SplitMode {
+    fn from(x: SplitMode) -> Self {
+        match x {
+            SplitMode::None => Self::None,
+            SplitMode::Layer => Self::Layer,
+            SplitMode::Row => Self::Row,
+        }
+    }
+}
+
+impl From<llama_cpp::SplitMode> for SplitMode {
+    fn from(x: llama_cpp::SplitMode) -> Self {
+        match x {
+            llama_cpp::SplitMode::None => Self::None,
+            llama_cpp::SplitMode::Layer => Self::Layer,
+            llama_cpp::SplitMode::Row => Self::Row,
+            _ => unreachable!("Unsupported split mode"),
         }
     }
 }
\ No newline at end of file
diff --git a/src/generators/openai/mod.rs b/src/generators/openai/mod.rs
index eb2f89f..ee20e90 100644
--- a/src/generators/openai/mod.rs
+++ b/src/generators/openai/mod.rs
@@ -1,13 +1,10 @@
 pub mod config;
 pub mod credentials;
 
-use std::collections::HashMap;
-
 use serde::{Serialize, Deserialize};
 use tokio_stream::StreamExt;
 use crate::{
-    args,
-    utils::{
+    args, config::prompt::Stop, utils::{
         hashmap, FlattenTrait, SplitBytesFactory
     }
 };
@@ -28,8 +25,6 @@ pub struct ChatRequestParameters {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub frequency_penalty: Option<f32>,
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub best_of: Option<u32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
     pub n: Option<u32>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub stream: Option<bool>,
@@ -37,8 +32,8 @@ pub struct ChatRequestParameters {
     pub logprobs: Option<u32>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub echo: Option<bool>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub stop: Option<String>,
+    #[serde(skip_serializing_if = "Stop::is_none")]
+    pub stop: Stop,
 }
 
 impl From<PromptParameters> for ChatRequestParameters {
@@ -49,7 +44,6 @@ impl From<PromptParameters> for ChatRequestParameters {
             top_p: parameters.top_p,
             presence_penalty: parameters.presence_penalty,
             frequency_penalty: parameters.frequency_penalty,
-            best_of: parameters.best_of,
             n: parameters.n,
             stop: parameters.stop,
             ..Default::default()

From 7cb6c16615ec75fe2582fa09eec4774c77365c48 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Thu, 9 May 2024 23:48:53 +0200
Subject: [PATCH 069/112] Discard message without content (except for
 assistant)

---
 src/config/prompt.rs             |  3 ++-
 src/generators/llama/template.rs | 35 ++++++++++++++++++++++----------
 src/generators/openai/mod.rs     |  4 ++--
 3 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/src/config/prompt.rs b/src/config/prompt.rs
index cf2b54e..9661527 100644
--- a/src/config/prompt.rs
+++ b/src/config/prompt.rs
@@ -144,7 +144,7 @@ impl Message {
         self
     }
     pub fn format_content_as_ref(&mut self, args: &HashMap<String, String>) -> &mut Self {
-        self.content = self.content.as_mut().map(|c| crate::config::format_content(&c, args).to_string());
+        self.content = self.content.as_mut().map(|c| crate::config::format_content(c, args).to_string());
         self
     }
 }
@@ -163,6 +163,7 @@ pub struct Parameters {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub frequency_penalty: Option<f32>,
     #[serde(skip_serializing_if = "Stop::is_none")]
+    #[serde(default)]
     pub stop: Stop,
     
     //OpenAI only
diff --git a/src/generators/llama/template.rs b/src/generators/llama/template.rs
index b2e0ff5..0922bb2 100644
--- a/src/generators/llama/template.rs
+++ b/src/generators/llama/template.rs
@@ -55,13 +55,17 @@ impl PromptTemplate {
             model.tokenize_bytes("assistant", false, true)?
         ];
         prompt.iter()
+            .filter(|m| !(matches!(m.role, Role::System | Role::User) && m.content.is_none()))
             .for_each(|m| {
                 let role_tokens = match m.role {
                     Role::System => &system,
                     Role::User => &user,
                     Role::Assistant => &assistant
                 };
-                vec_merge!(tokens, &im_start, role_tokens, &nl, &model.tokenize_bytes(&m.content, false, false).unwrap(), &im_end, &nl);
+                vec_merge!(tokens, &im_start, role_tokens, &nl);
+                if let Some(content) = m.content.as_ref() {
+                    vec_merge!(tokens, &model.tokenize_bytes(content, false, false).unwrap(), &im_end, &nl);
+                }
             });
         Ok(())
     }
@@ -74,11 +78,12 @@ impl PromptTemplate {
         let nl = model.tokenize_bytes("\n", false, true)?;
         prompt.iter()
             .for_each(|m| {
+                let Some(content) = m.content.as_ref() else { return; };
+                let content_tokens = model.tokenize_bytes(content, false, false).unwrap();
                 match m.role {
-                    Role::System => vec_merge!(tokens, &inst_start, &system_start, &model.tokenize_bytes(&m.content, false, false).unwrap(), &system_end, &inst_end, &nl),
-                    Role::User => vec_merge!(tokens, &inst_start, &model.tokenize_bytes(&m.content, false, false).unwrap(), &inst_end, &nl),
-                    Role::Assistant if !m.content.is_empty() => vec_merge!(tokens, &model.tokenize_bytes(&m.content, false, false).unwrap(), &eos, &nl),
-                    _ => (),
+                    Role::System => vec_merge!(tokens, &inst_start, &system_start, &content_tokens, &system_end, &inst_end, &nl),
+                    Role::User => vec_merge!(tokens, &inst_start, &content_tokens, &inst_end, &nl),
+                    Role::Assistant => vec_merge!(tokens, &content_tokens, &eos, &nl),
                 }
             });
         Ok(())
@@ -94,6 +99,7 @@ impl PromptTemplate {
             model.tokenize_bytes("assistant", false, true)?
         ];
         prompt.iter()
+            .filter(|m| !(matches!(m.role, Role::System | Role::User) && m.content.is_none()))
             .for_each(|m| {
                 let role_tokens = match m.role {
                     Role::System => &system,
@@ -101,8 +107,8 @@ impl PromptTemplate {
                     Role::Assistant => &assistant
                 };
                 vec_merge!(tokens, &start_header_id, role_tokens, &end_header_id, &nl, &nl);
-                if !(m.role == Role::Assistant && m.content.is_empty()) {
-                    vec_merge!(tokens, &model.tokenize_bytes(&m.content, false, false).unwrap(), &eot_id);
+                if let Some(content) = &m.content {
+                    vec_merge!(tokens, &model.tokenize_bytes(content, false, false).unwrap_or_default(), &eot_id);
                 }
             });
         Ok(())
@@ -115,14 +121,21 @@ impl PromptTemplate {
         let assistant_prefix_tokens = model.tokenize_bytes(&custom_template.assistant_prefix, false, true)?;
         let assistant_suffix_tokens = model.tokenize_bytes(&custom_template.assistant_suffix, false, true)?;
         prompt.iter()
+            .filter(|m| !(matches!(m.role, Role::System | Role::User) && m.content.is_none()))
             .for_each(|m| {
-                let content_tokens = model.tokenize_bytes(&m.content, false, false).unwrap();
                 match m.role {
-                    Role::System => vec_merge!(tokens, &system_prefix_tokens, &content_tokens, &system_suffix_tokens),
-                    Role::User => vec_merge!(tokens, &user_prefix_tokens, &content_tokens, &user_suffix_tokens),
+                    Role::System => {
+                        let content_tokens = model.tokenize_bytes(m.content.as_ref().unwrap(), false, false).unwrap();
+                        vec_merge!(tokens, &system_prefix_tokens, &content_tokens, &system_suffix_tokens)
+                    }
+                    Role::User => {
+                        let content_tokens = model.tokenize_bytes(m.content.as_ref().unwrap(), false, false).unwrap();
+                        vec_merge!(tokens, &user_prefix_tokens, &content_tokens, &user_suffix_tokens)
+                    }
                     Role::Assistant => {
                         vec_merge!(tokens, &assistant_prefix_tokens);
-                        if !m.content.is_empty() {
+                        if let Some(content) = &m.content {
+                            let content_tokens = model.tokenize_bytes(content, false, false).unwrap_or_default();
                             vec_merge!(tokens, &content_tokens, &assistant_suffix_tokens)
                         }
                     },
diff --git a/src/generators/openai/mod.rs b/src/generators/openai/mod.rs
index ee20e90..85b4a7b 100644
--- a/src/generators/openai/mod.rs
+++ b/src/generators/openai/mod.rs
@@ -67,11 +67,11 @@ impl ChatRequest {
         }
     }
     pub fn add_message(mut self, role: Role, content: String) -> Self {
-        self.messages.push(Message { role, content });
+        self.messages.push(Message { role, content: Some(content) });
         self
     }
     pub fn add_messages(mut self, messages: Vec<Message>) -> Self {
-        self.messages.extend(messages);
+        self.messages.extend(messages.into_iter().filter(|m| m.content.is_some()));
         self
     }
     pub fn set_parameters(mut self, parameters: ChatRequestParameters) -> Self {

From 316856f03ec8a1776219a5da2b306e40bda392d2 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 10 May 2024 00:09:03 +0200
Subject: [PATCH 070/112] Update config docs

---
 docs/CONFIG.md | 184 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 150 insertions(+), 34 deletions(-)

diff --git a/docs/CONFIG.md b/docs/CONFIG.md
index b0d977b..e452c28 100644
--- a/docs/CONFIG.md
+++ b/docs/CONFIG.md
@@ -7,9 +7,17 @@ Welcome to the configuration file README for our project! This README provides a
 - [AIO Configuration File README](#aio-configuration-file-readme)
   - [Table of Contents](#table-of-contents)
   - [Introduction](#introduction)
-  - [Usage](#usage)
-    - [Prompts](#prompts)
-      - [Example](#example)
+  - [Prompts settings](#prompts-settings)
+    - [Fields](#fields)
+    - [Example](#example)
+  - [Local models settings](#local-models-settings)
+    - [Overview](#overview)
+    - [Configuring Models](#configuring-models)
+    - [Custom template](#custom-template)
+      - [List of fields](#list-of-fields)
+      - [How does it work ?](#how-does-it-work-)
+      - [Example](#example-1)
+    - [Example Local Configuration](#example-local-configuration)
   - [Sample Prompts](#sample-prompts)
 
 ## Introduction
@@ -20,77 +28,185 @@ This document will guide you through setting up your prompts and using the confi
 
 By default, `aio` will try to read the configuration file from `~/.config/aio/config.yaml`. You can also specify the path to the configuration file using the `--config-path` argument. For example: `aio --config-path ./config.yaml`.
 
-## Usage
-
-### Prompts 
+## Prompts settings
 
 To use the configuration file effectively, follow these steps:
 
 In the configuration file, you can define different prompts under the `prompts` section.
 
-1. **Name the prompt**: This is the name to refer in the `--prompt` argument.
+### Fields
 
-2. **Messages**: The whole prompt consists of several messages of three types:
-   - "system" messages provide context and instructions to the model, while 
-   - "user" messages define the user's input or query.
-   - "assistant" messages are used to mimic the AI response.
+- `name`: This is the name to refer in the `--prompt` argument.
 
+- `messages`: The whole prompt, consisting of several messages of three types:
+  - `role`: Define who is talking.
+    
+    **Choices**
+    - **system**
+    - **user**
+    - **assistant**
+  - `content`: The content of the message.
+    
     Use the variable `$input` to represent the input from the command line.
 
-3. **Parameters**: Parameters can adjust AI generation. Here is the list of parameters : 
+  **Note**: All messages must have a content message except for assistant : for local inference, it is important to end your messages with a `role: assistant` message **without content field**, so the AI will understand it has to complete as the assistant. 
+  
+  If you don't write a content for a role other than assistant, the AI will break the universe and merge two distinct parts of the universe : the High Level, and the Low Level. Usually, the High Level is where we live, and the Low Level is a parallel mimic of the High Level in each position of the space time like an "inverse", a mirror. Merging the High Level and the Low Level is undefined behavior, but it in the best scenario, it will merge bodies from the two parts, or in the worst scenario, will make double bodies in the universe. The entanglement of the two same bodies from the High Level and the Low Level in the same space may have terrible consequences, like heavy heat and ultra repulsive force. Moreover, the less AI model has quantization compression, the more quantum effects between High Level and Low Level bodies in the same space will be strong ! **Note**: Of course not :D. In fact, the message is discarded.
+
+- `parameters`: Adjust your AI generation. Here is the list of parameters : 
    - [`max_tokens`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-max_tokens): The maximum number of tokens that can be generated in the chat completion.
    - [`temperature`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-temperature): The temperature setting for the generated text. Higher temperatures result in more creative, but potentially incoherent, text.
-   - [`top_p`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p): The maximum probability that the ASSISTANT should generate. The ASSISTANT will return the most likely answer, but with a probability below this threshold. This allows the ASSISTANT to return even the most unlikely of all possible answers, if the model is very certain.
+   - [`top_p`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-top_p): The maximum probability that the assistant should generate. The assistant will return the most likely answer, but with a probability below this threshold. This allows the assistant to return even the most unlikely of all possible answers, if the model is very certain.
    - [`presence_penalty`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-presence_penalty): The presence penalty modifies the likelihood of selected tokens based on their presence in the input.
    - [`frequency_penalty`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-frequency_penalty): The frequency penalty modifies the likelihood of selected tokens based on their frequency in the input.
-   - [`best_of`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-best_of): The number of responses to generate.
-   - [`stop`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-stop): The text used to stop the ASSISTANT from generating more text.
+   - [`stop`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-stop): The text used to stop the assistant from generating more text.
 
     **OpenAI API specific parameters**
     - [`n`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-n): The number of responses to generate.
+    
+    **Local session specific parameters**
+    - `n_prev_tokens`: number of previous tokens to remember
+    - `negative_prompt`: string to help guidance
 
     **Note**: each parameter above is optional.
+    
+    I recommend you to follow up links of the field which leads to OpenAI *chat completion create* API to better understand each parameter.
 
-    Refer to the Documentation of the [*chat create* OpenAI API](https://platform.openai.com/docs/api-reference/chat/create) for more information about the parameters.
-
-#### Example
+### Example
 
 ```yaml
 prompts:
   - name: command
-    model: gpt-3.5-turbo # optional
     messages: 
       # System message for context
       - role: system
-      content: In markdown, write the command...
+        content: In markdown, write the command...
       # User message
       - role: user
-      content: $input
+        content: How to uncompress a zip file
+      # Assistant prompt header
+      - role: assistant
+        # No content so the IA will complete it !
     parameters:
       # Parameters to control model behavior. Each parameter is optional
       temperature: 0
       top-p: 1.0 
       frequency-penalty: 0.2
-      presence-penalty: 0 
       max-tokens: 200
 ```
 
+## Local models settings
 
+### Overview
+This part guides you on setting up and configuring each model. Below are explanations of the various settings you can customize for optimal model performance and functionality.
 
-## Sample Prompts
+**Note**: Because aio internally uses [`llama.cpp`](https://github.com/ggerganov/llama.cpp), you must provide models with **GGUF format**.
+
+### Configuring Models
+Each model configuration consists of several key settings:
+
+- **name**: This is the identifier for the model, used to refer to it within the system.
+- **path**: The file path where the model's necessary files are located. 
+- **template**: Defines the structured interaction rules with the model, setting the groundwork for how prompts are managed.
+  **Choices**:
+  - [**chatml**](https://resonance.distantmagic.com/docs/features/ai/prompt-templates/chatml/)
+  - [**llama2**](https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-2)
+  - [**llama3**](https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3)
+  - **custom**: refer to [Custom template](#custom-template)
+    
+    
+- **parameters**: Adjust these parameters to manage the model's resources:
+  - **n_gpu_layers**: Specifies the number of GPU layers to use. Increasing this number can enhance the model's processing capabilities but will require more GPU resources.
+  - **main_gpu**: Identifies which GPU (by its ID number) is primarily used for processing tasks, useful in multi-GPU setups.
+  - **split_mode**: Determines how tasks are divided between multiple GPUs.
+    
+    **Choices:**
+    - **none** (default): Single GPU
+    - **layer**: Split layers and KV across GPUs
+    - **row**: Split rows across GPUs
+
+  - **vocab_only**: When set to `true`, only essential vocabulary data is loaded into memory, helping to reduce memory footprint.
+  - **use_mmap**: If `true`, enables memory mapping of files directly into the process's memory space, allowing for efficient file handling.
+  - **use_mlock**: When enabled by setting to `true`, it locks the model's memory, preventing it from being swapped out to disk, thus maintaining performance consistency.
+
+### Custom template
+
+Define your own prompt template (so you won't PR the project for a specific local model 🙂)
+
+#### List of fields
+
+Each field is a string
+- `system_prefix`
+- `system_suffix`
+- `user_prefix`
+- `user_suffix`
+- `assistant_prefix`
+- `assistant_suffix`
+
+**Note**: all fields are optional. If not defined, it's empty in the generated prompt
+
+#### How does it work ?
+
+Define each prefix and suffix as needed to fit the final prompt like the model expect to get.
+Each message will be generated like the following
+```
+<${role}_prefix>${content}<${role}_suffix>
+```
+#### Example
 
-Here are examples of prompt definitions within [the sample configuration file](../config.yml) you can find in the repository:
+If the prompt is :
+```yaml
+messages:
+  # System message for context
+  - role: system
+    content: In markdown, write the command...
+  # User message
+  - role: user
+    content: How to uncompress a zip file
+  # Assistant prompt header
+  - role: assistant
+    # No content so the IA will complete it !
+```
+and the template is defined like this : 
+```yaml
+template: !custom # llama 3 chat template example
+  system_prefix: "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n"
+  system_suffix: <|eot_id|>
+  user_prefix: "<|start_header_id|>user<|end_header_id|>\n\n"
+  user_suffix: <|eot_id|>
+  assistant_prefix: "<|start_header_id|>assistant<|end_header_id|>\n\n"
+  assistant_suffix: <|eot_id|>
+```
+The final prompt will look like this :
+```
+<|begin_of_text|><|start_header_id|>system<|end_header_id|>
+
+In markdown, write the command...<|eot_id|><|start_header_id|>user<|end_header_id|>
 
-1. **Command Prompt**:
-   - System Message: Provides instructions for formatting a command.
-   - User Message: Represents user input for the command.
-   - Parameters: Parameters controlling the response characteristics.
+How to uncompress a zip file<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
-2. **Ask Prompt**:
-   - System Message: Provides a brief introduction to ChatGPT.
-   - User Message: Represents user input for the query.
-   - Parameters: Parameters influencing the model's response.
 
----
+```
+
+### Example Local Configuration
+
+Here is an example snippet of how you might configure two different models in the configuration file:
+
+```yaml
+local:
+  - name: llama3
+    path: "/home/user/.models/llama3-8b-instruct.gguf"
+    template: llama3
+    parameters:
+      n_gpu_layers: 32
+  - name: mixtral
+    path: "/home/user/.models/mixtral-8x7b-instruct.gguf"
+    template: chatml
+    parameters:
+      n_gpu_layers: 8 # My 2080Ti dies if I load too much layers in the GPU 😅
+      use_mmap: true
+```
+
+## Sample Prompts
 
-*Note: This README is a general guide for understanding and using the configuration file. Feel free to customize it according to your desire.*
\ No newline at end of file
+You can check [a sample configuration file](../config.yml) that is inspired from my own configuration file.
\ No newline at end of file

From 176f07c13a2e0a9778c7a0542871e82169444919 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Fri, 10 May 2024 00:10:32 +0200
Subject: [PATCH 071/112] Update config sample Now conform to the new
 configuration interface

---
 config.yml | 71 +++++++++++++++++++++++++++++++++---------------------
 1 file changed, 44 insertions(+), 27 deletions(-)

diff --git a/config.yml b/config.yml
index 9f7a6e1..633517c 100644
--- a/config.yml
+++ b/config.yml
@@ -1,27 +1,44 @@
-openai:
-  prompts:
-    - name: command
-      messages: 
-        - role: system
-          content: In markdown, write the command that best fits my request in a "```nu" block in "## Command" then describe each parameter in "## Explanation".
-        - role: user
-          content: $input
-      parameters:
-        temperature: 0
-        top-p: 1.0 
-        frequency-penalty: 0.2
-        presence-penalty: 0 
-        max-tokens: 200
-    - name: ask
-      model: gpt-4
-      messages: 
-        - role: system
-          content: You are ChatGPT, a powerful conversational chatbot. Answer to me in informative way unless I tell you otherwise. Format the text in markdown.
-        - role: user
-          content: $input
-      parameters:
-        temperature: 0.7 
-        top-p: 1.0 
-        frequency-penalty: 0 
-        presence-penalty: 0 
-        max-tokens: 300
\ No newline at end of file
+prompts:
+  - name: ask
+    messages: 
+      - role: system
+        content: You are a powerful intelligent conversational chatbot. Unless I tell you otherwise, answer to me in an informative way. You should format the text in Markdown.
+      - role: user
+        content: $input
+      - role: assistant
+    parameters:
+      temperature: 0.7 
+      top-p: 1.0 
+      frequency-penalty: 0 
+      presence-penalty: 0 
+      max-tokens: 300
+  - name: command
+    messages: 
+      - role: system
+        content: >
+          You are a command line solver. Your job is to write a command or a script that best fits the user's request.
+          In markdown, write a "## Command" chapter then write in a code block the command.
+          The code block should have the correct language ID in the first line. For example, "```python" or "```zsh" if the user ask for python or zsh respectively. 
+          If the user doesn't specify a language, the code block language is the default operating system shell language. 
+          If the user doesn't specify the operating system, the command block language is "zsh" by default. 
+          Then describe each parameter and the command in "## Explanation" chapter. 
+      - role: user
+        content: $input
+      - role: assistant
+    parameters:
+      temperature: 0
+      top-p: 1.0 
+      frequency-penalty: 0.2
+      presence-penalty: 0 
+      max-tokens: 200
+local:
+  models:
+    - name: openhermes
+      path: '/home/user/.models/openhermes-2.5-mistral-7b.Q6_K.gguf'
+      template: chatml
+    - name: llama3
+      path: '/home/user/.models/Meta-Llama-3-8B-Instruct-Q5_K_M.gguf'
+      template: llama3
+    - name: lexi
+      path: '/home/user/.models/Lexi-Llama-3-8B-Uncensored_Q5_K_M.gguf'
+      template: llama3
\ No newline at end of file

From e164f3f514709ef8731a15e59fda45ca4ee88dc2 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sat, 11 May 2024 14:39:10 +0200
Subject: [PATCH 072/112] WIP CI

---
 .github/workflows/build.yml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 16f2686..1617797 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -19,6 +19,9 @@ jobs:
         arch:
           - x86_64
           - aarch64
+        exclude:
+          - system: apple-darwin
+            arch: x86_64
         include:
           - system: pc-windows-msvc
             os: windows-latest
@@ -44,6 +47,10 @@ jobs:
           target: ${{ matrix.arch }}-${{ matrix.system }}
           profile: minimal
           override: true
+      - name: Install LLVM and Clang
+        uses: KyleMayes/install-llvm-action@v2
+        with:
+          version: "17.0"
       - uses: Swatinem/rust-cache@v2
         with:
           key: ${{ matrix.arch }}-${{ matrix.system }}

From 6a6306e9fee9563a1656ef4e1aa013ff34fb4a11 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 12 May 2024 15:50:16 +0200
Subject: [PATCH 073/112] rename cargo config

---
 .cargo/{config => config.toml} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename .cargo/{config => config.toml} (100%)

diff --git a/.cargo/config b/.cargo/config.toml
similarity index 100%
rename from .cargo/config
rename to .cargo/config.toml

From 5d5d850dc91229364099d82a1096e4a7d07a8167 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 12 May 2024 21:15:17 +0200
Subject: [PATCH 074/112] CI: add gpu support

---
 .github/workflows/build.yml | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 1617797..bac1856 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -11,31 +11,38 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        system: 
+        system:
           - unknown-linux-gnu
           - unknown-linux-musl
-          - apple-darwin
           - pc-windows-msvc
+          - apple-darwin
         arch:
           - x86_64
           - aarch64
+        gpu:
+          - null
+          - clblast
+          - cuda
+          - vulkan
         exclude:
           - system: apple-darwin
             arch: x86_64
-        include:
-          - system: pc-windows-msvc
-            os: windows-latest
-            ext: .exe
+          - system: apple-darwin
+            gpu: cuda
+          - gpu: cuda
+            arch: aarch64
+        include: 
           - system: apple-darwin
             os: macos-latest
-            ext:
+            gpu: metal
           - system: unknown-linux-gnu
             os: ubuntu-latest
-            ext:
           - system: unknown-linux-musl
             os: ubuntu-latest
-            ext:
             toolchain: nightly
+          - system: pc-windows-msvc
+            os: windows-latest
+            ext: .exe
     runs-on: ${{ matrix.os }}
     steps:
       - uses: actions/checkout@v3
@@ -47,11 +54,17 @@ jobs:
           target: ${{ matrix.arch }}-${{ matrix.system }}
           profile: minimal
           override: true
+      - name: Install CUDA
+        if: matrix.gpu == 'cuda'
+        uses: Jimver/cuda-toolkit@v0.2.15
+        with:
+          linux-local-args: '["--toolkit"]'
       - name: Install LLVM and Clang
         uses: KyleMayes/install-llvm-action@v2
         with:
           version: "17.0"
-      - uses: Swatinem/rust-cache@v2
+      - name: Restore cache
+        uses: Swatinem/rust-cache@v2
         with:
           key: ${{ matrix.arch }}-${{ matrix.system }}
       - name: Setup ssl for linux via apt
@@ -73,7 +86,7 @@ jobs:
         with:
           toolchain: ${{ matrix.toolchain || 'stable' }}
           command: build
-          args: --release --target "${{ matrix.arch }}-${{ matrix.system }}"
+          args: --release ${{ matrix.gpu && format('--features=llama_cpp/{0}', matrix.gpu) || '' }} --target "${{ matrix.arch }}-${{ matrix.system }}"
           use-cross: false
       - name: Rename Build
         run: mv target/${{ matrix.arch }}-${{ matrix.system }}/release/aio${{ matrix.ext }} target/${{ matrix.arch }}-${{ matrix.system }}/release/aio-${{ matrix.arch }}-${{ matrix.system }}${{ matrix.ext }}

From 98c6d7d8482ba9309af37ac7e0183513e67eba55 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 12 May 2024 21:21:42 +0200
Subject: [PATCH 075/112] update aio version

---
 Cargo.lock | 2 +-
 Cargo.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index f563428..a1f355a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -39,7 +39,7 @@ dependencies = [
 
 [[package]]
 name = "aio-cli"
-version = "0.8.2"
+version = "0.9.0"
 dependencies = [
  "aio-cargo-info",
  "anyhow",
diff --git a/Cargo.toml b/Cargo.toml
index 928c3f4..53710e9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "aio-cli"
 description = "Streamlined AI Terminal Interactions"
-version = "0.8.2"
+version = "0.9.0"
 edition = "2021"
 authors = ["Gabin Lefranc <gabin.lefranc@gmail.com>"]
 readme = "README.md"

From 8f3b84384d3a7b6e423a950c21738ab8907c4af5 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 12 May 2024 21:21:58 +0200
Subject: [PATCH 076/112] ci: add gpu in artifact name

---
 .github/workflows/build.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index bac1856..a5a6133 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -89,12 +89,12 @@ jobs:
           args: --release ${{ matrix.gpu && format('--features=llama_cpp/{0}', matrix.gpu) || '' }} --target "${{ matrix.arch }}-${{ matrix.system }}"
           use-cross: false
       - name: Rename Build
-        run: mv target/${{ matrix.arch }}-${{ matrix.system }}/release/aio${{ matrix.ext }} target/${{ matrix.arch }}-${{ matrix.system }}/release/aio-${{ matrix.arch }}-${{ matrix.system }}${{ matrix.ext }}
+        run: mv target/${{ matrix.arch }}-${{ matrix.system }}/release/aio${{ matrix.ext }} target/${{ matrix.arch }}-${{ matrix.system }}/release/aio-${{ matrix.arch }}-${{ matrix.system }}${{ matrix.gpu && format('-{0}', matrix.gpu) || '' }}${{ matrix.ext }}
       - name: Upload build artifact
         uses: actions/upload-artifact@v2
         with:
           name: aio-${{ matrix.arch }}-${{ matrix.system }}
-          path: target/${{ matrix.arch }}-${{ matrix.system }}/release/aio-${{ matrix.arch }}-${{ matrix.system }}${{ matrix.ext }}
+          path: target/${{ matrix.arch }}-${{ matrix.system }}/release/aio-${{ matrix.arch }}-${{ matrix.system }}${{ matrix.gpu && format('-{0}', matrix.gpu) || '' }}${{ matrix.ext }}
 
   release:
     needs: build

From ba736c8c1256a69cf11ae7c0e0beacda3f66a58e Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 12 May 2024 21:24:05 +0200
Subject: [PATCH 077/112] fix CI

---
 .github/workflows/build.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index a5a6133..81528ba 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -32,6 +32,8 @@ jobs:
           - gpu: cuda
             arch: aarch64
         include: 
+          - system: apple-darwin
+            os: macos-latest
           - system: apple-darwin
             os: macos-latest
             gpu: metal

From e05e2cc07f2ff773f9cd75e3a9b1cf948e9004fe Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Thu, 16 May 2024 15:06:01 +0200
Subject: [PATCH 078/112] CI: fix linux install commands

---
 .github/workflows/build.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 81528ba..f6dc7ab 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -71,7 +71,7 @@ jobs:
           key: ${{ matrix.arch }}-${{ matrix.system }}
       - name: Setup ssl for linux via apt
         if: matrix.os == 'ubuntu-latest'
-        run: sudo apt install libssl-dev pkg-config
+        run: apt update && apt upgrade -y && apt install -y libssl-dev pkg-config
       - name: Install gcc environnement for musl
         if: matrix.system == 'unknown-linux-musl'
         run:
@@ -80,7 +80,7 @@ jobs:
           echo "PATH=$PWD/${{ matrix.arch }}-linux-musl-cross/bin:$PATH" >> $GITHUB_ENV;
       - name: Install gcc environnement for aarch64 gnu
         if: matrix.system == 'unknown-linux-gnu' && matrix.arch == 'aarch64'
-        run: sudo apt install gcc-aarch64-linux-gnu binutils-aarch64-linux-gnu
+        run: apt install -y gcc-aarch64-linux-gnu g++-aarch64-linux-gnu binutils-aarch64-linux-gnu
       - name: Build
         uses: actions-rs/cargo@v1
         env:

From fe3323e44bf822381990a85b609a313a70aa25b3 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Thu, 16 May 2024 15:06:27 +0200
Subject: [PATCH 079/112] CI: add env var to build step

---
 .github/workflows/build.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index f6dc7ab..702023d 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -85,6 +85,11 @@ jobs:
         uses: actions-rs/cargo@v1
         env:
           CC_x86_64-unknown-linux-musl: x86_64-linux-musl-gcc
+          CXX_x86_64-unknown-linux-musl: x86_64-linux-musl-g++
+          CC_aarch64-unknown-linux-gnu: aarch64-linux-gnu-gcc
+          CXX_aarch64-unknown-linux-gnu: aarch64-linux-gnu-g++
+          CFLAGS: ${{env.CUDA_PATH && format('-I {0}/include', env.CUDA_PATH) || '' }}
+          CXXFLAGS: ${{env.CUDA_PATH && format('-I {0}/include', env.CUDA_PATH) || '' }}
         with:
           toolchain: ${{ matrix.toolchain || 'stable' }}
           command: build

From a6ea97893cbede741749c5aae0086ab34a460be0 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 19 May 2024 20:56:25 +0200
Subject: [PATCH 080/112] remove llama_cpp native for aarch64

---
 Cargo.toml | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 53710e9..369d3bf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -23,7 +23,7 @@ async-trait = "0.1"
 bytes = "1.1.0"
 clap = { version = "4.2.2", features = ["derive"] }
 crossterm = "0.27"
-llama_cpp = { version = "^0.3.1", optional = true }
+
 log = "^0.4"
 num-traits = "0.2"
 once_cell = "1.18"
@@ -43,6 +43,18 @@ tokio-util = {version = "0.7", features = ["io"]}
 
 aio-cargo-info = { path = "./crates/aio-cargo-info", version = "0.1" }
 
+[dependencies.llama_cpp]
+version = "^0.3.1"
+default-features = false
+features = ["compat"]
+optional = true
+
+[target.'cfg(target_arch = "x86_64")'.dependencies.llama_cpp]
+version = "^0.3.1"
+features = ["native", "compat"]
+optional = true
+
+
 [features]
 default = ["openai", "local-llm"]
 local-llm = ["llama_cpp"]

From 0655256c37978e8af4b8ec7dd2c97204f984e905 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 19 May 2024 21:00:26 +0200
Subject: [PATCH 081/112] put sudo apt back

---
 .github/workflows/build.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 702023d..4ca2838 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -71,7 +71,7 @@ jobs:
           key: ${{ matrix.arch }}-${{ matrix.system }}
       - name: Setup ssl for linux via apt
         if: matrix.os == 'ubuntu-latest'
-        run: apt update && apt upgrade -y && apt install -y libssl-dev pkg-config
+        run: sudo apt update && sudo apt upgrade -y && sudo apt install -y libssl-dev pkg-config
       - name: Install gcc environnement for musl
         if: matrix.system == 'unknown-linux-musl'
         run:
@@ -80,7 +80,7 @@ jobs:
           echo "PATH=$PWD/${{ matrix.arch }}-linux-musl-cross/bin:$PATH" >> $GITHUB_ENV;
       - name: Install gcc environnement for aarch64 gnu
         if: matrix.system == 'unknown-linux-gnu' && matrix.arch == 'aarch64'
-        run: apt install -y gcc-aarch64-linux-gnu g++-aarch64-linux-gnu binutils-aarch64-linux-gnu
+        run: sudo apt install -y gcc-aarch64-linux-gnu g++-aarch64-linux-gnu binutils-aarch64-linux-gnu
       - name: Build
         uses: actions-rs/cargo@v1
         env:

From a91d391400759c4511b2681c687943bd0f3db4e8 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 19 May 2024 21:14:56 +0200
Subject: [PATCH 082/112] Update llama_cpp crates

---
 Cargo.lock | 8 ++++----
 Cargo.toml | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index a1f355a..6db9d7a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -842,9 +842,9 @@ checksum = "1a9bad9f94746442c783ca431b22403b519cd7fbeed0533fdd6328b2f2212128"
 
 [[package]]
 name = "llama_cpp"
-version = "0.3.1"
+version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "746afa27b852045c93cffefb459f883b3c0a62552101c929241dccc6563d8fe9"
+checksum = "7f126770a2ed5e0e4596119479dc56f56b99037246bf0e36c544f7581a9458fd"
 dependencies = [
  "derive_more",
  "futures",
@@ -857,9 +857,9 @@ dependencies = [
 
 [[package]]
 name = "llama_cpp_sys"
-version = "0.3.1"
+version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b53030035eb5617fde2491c1607ff2b6107bc559e25e444163075e4281dfe43e"
+checksum = "037a1881ada3592c6a922224d5177b4b4f452e6b2979eb97393b71989e48357f"
 dependencies = [
  "bindgen",
  "cc",
diff --git a/Cargo.toml b/Cargo.toml
index 369d3bf..09ef13e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -44,7 +44,7 @@ tokio-util = {version = "0.7", features = ["io"]}
 aio-cargo-info = { path = "./crates/aio-cargo-info", version = "0.1" }
 
 [dependencies.llama_cpp]
-version = "^0.3.1"
+version = "^0.3.2"
 default-features = false
 features = ["compat"]
 optional = true

From 444359a40cf0316defa9767eddb9eb2903e7d555 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 19 May 2024 21:15:40 +0200
Subject: [PATCH 083/112] CI: use cuda network

---
 .github/workflows/build.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 4ca2838..d5076a1 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -60,7 +60,11 @@ jobs:
         if: matrix.gpu == 'cuda'
         uses: Jimver/cuda-toolkit@v0.2.15
         with:
-          linux-local-args: '["--toolkit"]'
+          method: 'network'
+          sub-packages: '["nvcc"]'
+          non-cuda-sub-packages: '["libcublas", "libcublas-dev"]'
+          use-local-cache: false
+          use-github-cache: false
       - name: Install LLVM and Clang
         uses: KyleMayes/install-llvm-action@v2
         with:

From 2193718c0cb0c62d10aaa2b9df8acf857045490f Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 19 May 2024 23:17:48 +0200
Subject: [PATCH 084/112] redesign prompt parameters

---
 src/config/prompt.rs        | 91 ++++++++++++++++++++++++++++++++-----
 src/generators/llama/mod.rs |  3 +-
 2 files changed, 81 insertions(+), 13 deletions(-)

diff --git a/src/config/prompt.rs b/src/config/prompt.rs
index 9661527..a3b3784 100644
--- a/src/config/prompt.rs
+++ b/src/config/prompt.rs
@@ -1,6 +1,5 @@
 use std::collections::HashMap;
 
-use llama_cpp::standard_sampler::StandardSampler;
 use serde::{Deserialize, Serialize};
 
 #[derive(Debug, Deserialize, Serialize)]
@@ -148,6 +147,34 @@ impl Message {
         self
     }
 }
+#[cfg(feature = "local-llm")]
+#[derive(Debug, Deserialize, Serialize)]
+pub enum Algorithm {
+    SoftMax{
+        min_keep: usize,
+    },
+    Greedy,
+    Mirostat{
+        min_keep: usize,
+        tau: f32,
+        eta: f32,
+        m: i32
+    },
+    MirostatV2{
+        min_keep: usize,
+        tau: f32,
+        eta: f32,
+    },
+}
+impl Default for Algorithm {
+    fn default() -> Self {
+        Algorithm::MirostatV2 { 
+            min_keep: 50, 
+            tau: 5.0, 
+            eta: 0.1,
+        }
+    }
+}
 
 #[derive(Debug, Default, Deserialize, Serialize)]
 #[serde(rename_all = "camelCase")]
@@ -171,22 +198,62 @@ pub struct Parameters {
     pub n: Option<u32>,
 
     //Local only
+    #[cfg(feature = "local-llm")]
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub last_n: Option<i32>,
+    #[cfg(feature = "local-llm")]
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub top_k: Option<i32>,
+    #[cfg(feature = "local-llm")]
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tail_free: Option<f32>,
+    #[cfg(feature = "local-llm")]
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub n_prev_tokens: Option<u32>,
+    pub typical: Option<f32>,
+    #[cfg(feature = "local-llm")]
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub negative_prompt: Option<String>,
+    pub min_p: Option<f32>,
+    #[cfg(feature = "local-llm")]
+    #[serde(default)]
+    pub algorithm: Algorithm,
 }
 
-impl From<Parameters> for StandardSampler {
+#[cfg(feature = "local-llm")]
+impl From<Parameters> for llama_cpp::standard_sampler::StandardSampler {
     fn from(parameters: Parameters) -> Self {
-        let def = StandardSampler::default();
-        StandardSampler {
-            temp: parameters.temperature.unwrap_or(def.temp),
-            top_p: parameters.top_p.unwrap_or(def.top_p),
-            penalty_repeat: parameters.presence_penalty.unwrap_or(def.penalty_repeat),
-            penalty_freq: parameters.frequency_penalty.unwrap_or(def.penalty_freq),
-            n_prev: parameters.n_prev_tokens.unwrap_or(def.n_prev as _) as _,
-            ..Default::default()
+        use llama_cpp::standard_sampler::SamplerStage;
+        let mut stages = vec![];
+        if let Some(last_n) = parameters.last_n {
+            stages.push(SamplerStage::RepetitionPenalty{
+                repetition_penalty: parameters.frequency_penalty.unwrap_or(1.0),
+                frequency_penalty: parameters.frequency_penalty.unwrap_or(0.0),
+                presence_penalty: parameters.presence_penalty.unwrap_or(0.0),
+                last_n,
+            });
+        }
+        if let Some(temp) = parameters.temperature {
+            stages.push(SamplerStage::Temperature(temp));
+        }
+        if let Some(top_k) = parameters.top_k {
+            stages.push(SamplerStage::TopK(top_k));
+        }
+        if let Some(tail_free) = parameters.tail_free {
+            stages.push(SamplerStage::TailFree(tail_free));
+        }
+        if let Some(typical) = parameters.typical {
+            stages.push(SamplerStage::Typical(typical));
+        }
+        if let Some(top_p) = parameters.top_p {
+            stages.push(SamplerStage::TopP(top_p));
+        }
+        if let Some(min_p) = parameters.min_p {
+            stages.push(SamplerStage::MinP(min_p));
+        }
+        match parameters.algorithm {
+            Algorithm::SoftMax { min_keep } => Self::new_softmax(stages, min_keep),
+            Algorithm::Greedy => Self::new_greedy(),
+            Algorithm::Mirostat { min_keep, tau, eta, m } => Self::new_mirostat(stages, min_keep, tau, eta, m),
+            Algorithm::MirostatV2 { min_keep, tau, eta } => Self::new_mirostat_v2(stages, min_keep, tau, eta),
         }
     }
 }
diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index 0b3b8a0..7165589 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -76,7 +76,8 @@ pub async fn run(
         .map_err(|_| Error::Custom("Failed to advance context".into()))?;
 
     let completion = session
-        .start_completing_with(StandardSampler::default(), prompt.parameters.max_tokens.unwrap_or(1024) as _);
+        .start_completing_with(StandardSampler::default(), prompt.parameters.max_tokens.unwrap_or(1024) as _)
+        .map_err(|e| Error::Custom(format!("Failed to start completion: {e}").into()))?;
     if log::log_enabled!(log::Level::Trace) {
         let completion_stream = StreamExt::map(completion,  |token| Ok(format!("{}({})", model.token_to_piece(token), token.0)));
         Ok(Box::pin(completion_stream))

From 48871f16b5bb552f807901550fc6b585207994e5 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Mon, 20 May 2024 01:59:55 +0200
Subject: [PATCH 085/112] add fix for llvm action

---
 .github/workflows/build.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index d5076a1..b0b0861 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -69,6 +69,7 @@ jobs:
         uses: KyleMayes/install-llvm-action@v2
         with:
           version: "17.0"
+          arch: ${{ matrix.system == 'apple-darwin' && 'arm64' || 'x64' }}
       - name: Restore cache
         uses: Swatinem/rust-cache@v2
         with:

From ca383aa4d09a7454c703d59461f13063f50712db Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Mon, 20 May 2024 02:01:28 +0200
Subject: [PATCH 086/112] update apt dependencies

---
 .github/workflows/build.yml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index b0b0861..d8cf99e 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -74,9 +74,12 @@ jobs:
         uses: Swatinem/rust-cache@v2
         with:
           key: ${{ matrix.arch }}-${{ matrix.system }}
-      - name: Setup ssl for linux via apt
+      - name: Apt update+upgrade
         if: matrix.os == 'ubuntu-latest'
-        run: sudo apt update && sudo apt upgrade -y && sudo apt install -y libssl-dev pkg-config
+        run: sudo apt update && sudo apt upgrade -y
+      - name: apt get dependencies
+        if: matrix.os == 'ubuntu-latest'
+        run: sudo apt install -y libssl-dev pkg-config ${{ matrix.gpu == 'vulkan' && 'libvulkan-dev' }}
       - name: Install gcc environnement for musl
         if: matrix.system == 'unknown-linux-musl'
         run:

From 9b8e21cdb3ffab445367c74f3746b36c1aa462ce Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Mon, 20 May 2024 02:13:21 +0200
Subject: [PATCH 087/112] fix ci

---
 .github/workflows/build.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index d8cf99e..d2d65cc 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -35,6 +35,7 @@ jobs:
           - system: apple-darwin
             os: macos-latest
           - system: apple-darwin
+            arch: aarch64
             os: macos-latest
             gpu: metal
           - system: unknown-linux-gnu
@@ -79,7 +80,7 @@ jobs:
         run: sudo apt update && sudo apt upgrade -y
       - name: apt get dependencies
         if: matrix.os == 'ubuntu-latest'
-        run: sudo apt install -y libssl-dev pkg-config ${{ matrix.gpu == 'vulkan' && 'libvulkan-dev' }}
+        run: sudo apt install -y libssl-dev pkg-config ${{ matrix.gpu == 'vulkan' && 'libvulkan-dev' || '' }}
       - name: Install gcc environnement for musl
         if: matrix.system == 'unknown-linux-musl'
         run:

From 4b1152c95f93b220cabf0f3e9158831a5102bd17 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Mon, 20 May 2024 17:13:56 +0200
Subject: [PATCH 088/112] CI: disable GPU

---
 .github/workflows/build.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index d2d65cc..0e2bb33 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -21,9 +21,9 @@ jobs:
           - aarch64
         gpu:
           - null
-          - clblast
-          - cuda
-          - vulkan
+          # - clblast
+          # - cuda
+          # - vulkan
         exclude:
           - system: apple-darwin
             arch: x86_64

From 9ab0b84a938760b61eb43e925fd947378edb3bfc Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Mon, 20 May 2024 17:15:00 +0200
Subject: [PATCH 089/112] CI: simplify build workflow

---
 .github/workflows/build.yml | 54 +++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 0e2bb33..35fc304 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -12,10 +12,16 @@ jobs:
       fail-fast: false
       matrix:
         system:
-          - unknown-linux-gnu
-          - unknown-linux-musl
-          - pc-windows-msvc
-          - apple-darwin
+          - name: unknown-linux-gnu
+            os: ubuntu-latest
+          - name: unknown-linux-musl
+            os: ubuntu-latest
+            toolchain: nightly
+          - name: apple-darwin
+            os: macos-latest
+          - name: pc-windows-msvc
+            os: windows-latest
+            ext: .exe
         arch:
           - x86_64
           - aarch64
@@ -31,30 +37,18 @@ jobs:
             gpu: cuda
           - gpu: cuda
             arch: aarch64
-        include: 
-          - system: apple-darwin
-            os: macos-latest
-          - system: apple-darwin
-            arch: aarch64
-            os: macos-latest
-            gpu: metal
-          - system: unknown-linux-gnu
-            os: ubuntu-latest
-          - system: unknown-linux-musl
-            os: ubuntu-latest
-            toolchain: nightly
-          - system: pc-windows-msvc
-            os: windows-latest
-            ext: .exe
-    runs-on: ${{ matrix.os }}
+        # include: 
+          # - system: apple-darwin
+          #   gpu: metal
+    runs-on: ${{ matrix.system.os }}
     steps:
       - uses: actions/checkout@v3
       - name: Set up Rust
         id: rust
         uses: actions-rs/toolchain@v1
         with:
-          toolchain: ${{ matrix.toolchain || 'stable' }}
-          target: ${{ matrix.arch }}-${{ matrix.system }}
+          toolchain: ${{ matrix.system.toolchain || 'stable' }}
+          target: ${{ matrix.arch }}-${{ matrix.system.name }}
           profile: minimal
           override: true
       - name: Install CUDA
@@ -74,12 +68,12 @@ jobs:
       - name: Restore cache
         uses: Swatinem/rust-cache@v2
         with:
-          key: ${{ matrix.arch }}-${{ matrix.system }}
+          key: ${{ matrix.arch }}-${{ matrix.system.name }}
       - name: Apt update+upgrade
-        if: matrix.os == 'ubuntu-latest'
+        if: matrix.system.os == 'ubuntu-latest'
         run: sudo apt update && sudo apt upgrade -y
       - name: apt get dependencies
-        if: matrix.os == 'ubuntu-latest'
+        if: matrix.system.os == 'ubuntu-latest'
         run: sudo apt install -y libssl-dev pkg-config ${{ matrix.gpu == 'vulkan' && 'libvulkan-dev' || '' }}
       - name: Install gcc environnement for musl
         if: matrix.system == 'unknown-linux-musl'
@@ -100,17 +94,17 @@ jobs:
           CFLAGS: ${{env.CUDA_PATH && format('-I {0}/include', env.CUDA_PATH) || '' }}
           CXXFLAGS: ${{env.CUDA_PATH && format('-I {0}/include', env.CUDA_PATH) || '' }}
         with:
-          toolchain: ${{ matrix.toolchain || 'stable' }}
+          toolchain: ${{ matrix.system.toolchain || 'stable' }}
           command: build
-          args: --release ${{ matrix.gpu && format('--features=llama_cpp/{0}', matrix.gpu) || '' }} --target "${{ matrix.arch }}-${{ matrix.system }}"
+          args: --release ${{ matrix.gpu && format('--features=llama_cpp/{0}', matrix.gpu) || '' }} --target "${{ matrix.arch }}-${{ matrix.system.name }}"
           use-cross: false
       - name: Rename Build
-        run: mv target/${{ matrix.arch }}-${{ matrix.system }}/release/aio${{ matrix.ext }} target/${{ matrix.arch }}-${{ matrix.system }}/release/aio-${{ matrix.arch }}-${{ matrix.system }}${{ matrix.gpu && format('-{0}', matrix.gpu) || '' }}${{ matrix.ext }}
+        run: mv target/${{ matrix.arch }}-${{ matrix.system.name }}/release/aio${{ matrix.system.ext || '' }} target/${{ matrix.arch }}-${{ matrix.system.name }}/release/aio-${{ matrix.arch }}-${{ matrix.system.name }}${{ matrix.gpu && format('-{0}', matrix.gpu) || '' }}${{ matrix.system.ext || '' }}
       - name: Upload build artifact
         uses: actions/upload-artifact@v2
         with:
-          name: aio-${{ matrix.arch }}-${{ matrix.system }}
-          path: target/${{ matrix.arch }}-${{ matrix.system }}/release/aio-${{ matrix.arch }}-${{ matrix.system }}${{ matrix.gpu && format('-{0}', matrix.gpu) || '' }}${{ matrix.ext }}
+          name: aio-${{ matrix.arch }}-${{ matrix.system.name }}
+          path: target/${{ matrix.arch }}-${{ matrix.system.name }}/release/aio-${{ matrix.arch }}-${{ matrix.system.name }}${{ matrix.gpu && format('-{0}', matrix.gpu) || '' }}${{ matrix.system.ext || '' }}
 
   release:
     needs: build

From 9afb5da42eb23dd6deadf4b501148157b55ca44c Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Mon, 20 May 2024 17:21:41 +0200
Subject: [PATCH 090/112] fix CI

---
 .github/workflows/build.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 35fc304..6bb664b 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -31,9 +31,9 @@ jobs:
           # - cuda
           # - vulkan
         exclude:
-          - system: apple-darwin
+          - system: {name: apple-darwin}
             arch: x86_64
-          - system: apple-darwin
+          - system: {name: apple-darwin}
             gpu: cuda
           - gpu: cuda
             arch: aarch64
@@ -64,7 +64,7 @@ jobs:
         uses: KyleMayes/install-llvm-action@v2
         with:
           version: "17.0"
-          arch: ${{ matrix.system == 'apple-darwin' && 'arm64' || 'x64' }}
+          arch: ${{ matrix.system.name == 'apple-darwin' && 'arm64' || 'x64' }}
       - name: Restore cache
         uses: Swatinem/rust-cache@v2
         with:
@@ -76,13 +76,13 @@ jobs:
         if: matrix.system.os == 'ubuntu-latest'
         run: sudo apt install -y libssl-dev pkg-config ${{ matrix.gpu == 'vulkan' && 'libvulkan-dev' || '' }}
       - name: Install gcc environnement for musl
-        if: matrix.system == 'unknown-linux-musl'
+        if: matrix.system.name == 'unknown-linux-musl'
         run:
           wget -nv http://more.musl.cc/x86_64-linux-musl/${{ matrix.arch }}-linux-musl-cross.tgz;
           tar -xf ${{ matrix.arch }}-linux-musl-cross.tgz;
           echo "PATH=$PWD/${{ matrix.arch }}-linux-musl-cross/bin:$PATH" >> $GITHUB_ENV;
       - name: Install gcc environnement for aarch64 gnu
-        if: matrix.system == 'unknown-linux-gnu' && matrix.arch == 'aarch64'
+        if: matrix.system.name == 'unknown-linux-gnu' && matrix.arch == 'aarch64'
         run: sudo apt install -y gcc-aarch64-linux-gnu g++-aarch64-linux-gnu binutils-aarch64-linux-gnu
       - name: Build
         uses: actions-rs/cargo@v1

From f866368d8984a58371c11f21cca60765c794d4ad Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Mon, 20 May 2024 18:20:23 +0200
Subject: [PATCH 091/112] install clang by hand

---
 .github/workflows/build.yml | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 6bb664b..661ca8b 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -60,15 +60,6 @@ jobs:
           non-cuda-sub-packages: '["libcublas", "libcublas-dev"]'
           use-local-cache: false
           use-github-cache: false
-      - name: Install LLVM and Clang
-        uses: KyleMayes/install-llvm-action@v2
-        with:
-          version: "17.0"
-          arch: ${{ matrix.system.name == 'apple-darwin' && 'arm64' || 'x64' }}
-      - name: Restore cache
-        uses: Swatinem/rust-cache@v2
-        with:
-          key: ${{ matrix.arch }}-${{ matrix.system.name }}
       - name: Apt update+upgrade
         if: matrix.system.os == 'ubuntu-latest'
         run: sudo apt update && sudo apt upgrade -y
@@ -84,6 +75,31 @@ jobs:
       - name: Install gcc environnement for aarch64 gnu
         if: matrix.system.name == 'unknown-linux-gnu' && matrix.arch == 'aarch64'
         run: sudo apt install -y gcc-aarch64-linux-gnu g++-aarch64-linux-gnu binutils-aarch64-linux-gnu
+      - name: Install LLVM 17 on MacOS
+        if: matrix.system.name == 'apple-darwin'
+        run: >
+          wget -O llvm17.tar.xz https://github.com/llvm/llvm-project/releases/download/llvmorg-17.0.6/clang%2Bllvm-17.0.6-arm64-apple-darwin22.0.tar.xz
+          tar -xf llvm17.tar.xz
+          rm llvm17.tar.xz
+          sudo mv clang%2Bllvm-17.0.6-arm64-apple-darwin22.0 /usr/local/llvm
+      - name: Install LLVM 17 on Linux
+        if: matrix.system.os == 'ubuntu-latest'
+        run: >
+          sudo apt install wget
+          wget -O llvm17.tar.xz https://github.com/llvm/llvm-project/releases/download/llvmorg-17.0.6/clang%2Bllvm-17.0.6-x86_64-linux-gnu-ubuntu-22.04.tar.xz
+          tar -xf llvm17.tar.xz
+          rm llvm17.tar.xz
+          sudo mv clang+llvm-17.0.6-x86_64-linux-gnu-ubuntu-22.04 /usr/local/llvm
+      - name: Install LLVM 17 on Windows
+        if: matrix.system.os == 'windows-latest'
+        run: >
+          Invoke-WebRequest -Uri "https://github.com/llvm/llvm-project/releases/download/llvmorg-17.0.6/LLVM-17.0.6-win64.exe" -OutFile ".\LLVM-17.0.6-win64.exe"
+          7z x -y "./LLVM-17.0.6-win64.exe" "-oC:/Program Files/LLVM"
+      - name: Restore cache
+        uses: Swatinem/rust-cache@v2
+        with:
+          key: ${{ matrix.arch }}-${{ matrix.system.name }}
+      
       - name: Build
         uses: actions-rs/cargo@v1
         env:

From 7f22dbf107fc42fe3a6354e27960483952f59375 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Mon, 20 May 2024 18:22:11 +0200
Subject: [PATCH 092/112] fix CI command lines

---
 .github/workflows/build.yml        | 22 ++++++------
 .github/workflows/clang.yml        | 26 ++++++++++++++
 act.json                           |  5 +++
 src/generators/llama/TpInfoPlus.cs | 57 ++++++++++++++++++++++++++++++
 4 files changed, 99 insertions(+), 11 deletions(-)
 create mode 100644 .github/workflows/clang.yml
 create mode 100644 act.json
 create mode 100644 src/generators/llama/TpInfoPlus.cs

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 661ca8b..5ce1fc6 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -78,23 +78,23 @@ jobs:
       - name: Install LLVM 17 on MacOS
         if: matrix.system.name == 'apple-darwin'
         run: >
-          wget -O llvm17.tar.xz https://github.com/llvm/llvm-project/releases/download/llvmorg-17.0.6/clang%2Bllvm-17.0.6-arm64-apple-darwin22.0.tar.xz
-          tar -xf llvm17.tar.xz
-          rm llvm17.tar.xz
-          sudo mv clang%2Bllvm-17.0.6-arm64-apple-darwin22.0 /usr/local/llvm
+          wget -O llvm17.tar.xz https://github.com/llvm/llvm-project/releases/download/llvmorg-17.0.6/clang%2Bllvm-17.0.6-arm64-apple-darwin22.0.tar.xz;
+          tar -xf llvm17.tar.xz;
+          rm llvm17.tar.xz;
+          sudo mv clang%2Bllvm-17.0.6-arm64-apple-darwin22.0 /usr/local/llvm;
       - name: Install LLVM 17 on Linux
         if: matrix.system.os == 'ubuntu-latest'
         run: >
-          sudo apt install wget
-          wget -O llvm17.tar.xz https://github.com/llvm/llvm-project/releases/download/llvmorg-17.0.6/clang%2Bllvm-17.0.6-x86_64-linux-gnu-ubuntu-22.04.tar.xz
-          tar -xf llvm17.tar.xz
-          rm llvm17.tar.xz
-          sudo mv clang+llvm-17.0.6-x86_64-linux-gnu-ubuntu-22.04 /usr/local/llvm
+          sudo apt install wget;
+          wget -O llvm17.tar.xz https://github.com/llvm/llvm-project/releases/download/llvmorg-17.0.6/clang%2Bllvm-17.0.6-x86_64-linux-gnu-ubuntu-22.04.tar.xz;
+          tar -xf llvm17.tar.xz;
+          rm llvm17.tar.xz;
+          sudo mv clang+llvm-17.0.6-x86_64-linux-gnu-ubuntu-22.04 /usr/local/llvm;
       - name: Install LLVM 17 on Windows
         if: matrix.system.os == 'windows-latest'
         run: >
-          Invoke-WebRequest -Uri "https://github.com/llvm/llvm-project/releases/download/llvmorg-17.0.6/LLVM-17.0.6-win64.exe" -OutFile ".\LLVM-17.0.6-win64.exe"
-          7z x -y "./LLVM-17.0.6-win64.exe" "-oC:/Program Files/LLVM"
+          Invoke-WebRequest -Uri "https://github.com/llvm/llvm-project/releases/download/llvmorg-17.0.6/LLVM-17.0.6-win64.exe" -OutFile ".\LLVM-17.0.6-win64.exe";
+          7z x -y "./LLVM-17.0.6-win64.exe" "-oC:/Program Files/LLVM";
       - name: Restore cache
         uses: Swatinem/rust-cache@v2
         with:
diff --git a/.github/workflows/clang.yml b/.github/workflows/clang.yml
new file mode 100644
index 0000000..c996e7c
--- /dev/null
+++ b/.github/workflows/clang.yml
@@ -0,0 +1,26 @@
+name: Build
+
+on:
+  workflow_dispatch:
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: download clang
+        run: |
+          wget https://github.com/llvm/llvm-project/releases/download/llvmorg-17.0.6/clang-17.0.6.src.tar.xz
+          tar -xf clang-17.0.6.src.tar.xz
+          rm clang-17.0.6.src.tar.xz
+          mv clang-17.0.6.src clang
+      - name: get-cmake
+        uses: lukka/get-cmake@v3.29.3
+      - name: build
+        run: |
+          cd clang
+          mkdir build
+          cd build
+          cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=install ..
+          cmake --build . && cmake --install .
+          ls -r install
+      
\ No newline at end of file
diff --git a/act.json b/act.json
new file mode 100644
index 0000000..954cea7
--- /dev/null
+++ b/act.json
@@ -0,0 +1,5 @@
+{
+    "pull_request": {
+      "draft": true
+    }
+  }
\ No newline at end of file
diff --git a/src/generators/llama/TpInfoPlus.cs b/src/generators/llama/TpInfoPlus.cs
new file mode 100644
index 0000000..eeff360
--- /dev/null
+++ b/src/generators/llama/TpInfoPlus.cs
@@ -0,0 +1,57 @@
+static void TpInfoPlus()
+{
+    Console.Write("Comment vous appelez vous (nom prenom)?\n> ");
+    string nom_prenom = Console.ReadLine();
+
+    Console.Write("Saisissez votre année de naissance\n> ");
+    int annee = Convert.ToInt32(Console.ReadLine());
+
+    Console.Write("Saisissez votre taille\n> ");
+    double taille = Convert.ToDouble(Console.ReadLine());
+    Console.Write("Saisissez votre poids\n> ");
+    double poids = Convert.ToDouble(Console.ReadLine());
+
+
+    string[] nom_prenom_arr = nom_prenom.Split(' ');
+    string prenom = nom_prenom_arr[1].ToUpper()[0] + nom_prenom_arr[1].Substring(1).ToLower();
+    string nom = nom_prenom_arr[0].ToUpper();
+    int age = (2024 - annee);
+
+    Console.WriteLine($"NOM: {nom}, PRENOM: {prenom}");
+    Console.WriteLine($"{age} ans");
+    Console.WriteLine($"TAILLE: {taille} metre, POIDS: {poids}kg");
+}
+
+// VERSION PLUS EVOLUE
+
+static T ReadValue<T>(string txt)
+{
+    do
+    {
+        try
+        {
+            Console.Write($"{txt}\n> ");
+            return (T)Convert.ChangeType(Console.ReadLine(), typeof(T));
+        }
+        catch (FormatException e)
+        {
+            Console.WriteLine($"Mauvaise valeur entrée: {e}");
+        }
+    } while (true);
+}
+static void TpInfoPlus1()
+{
+    string nom_prenom = ReadValue<string>("Comment vous appelez vous (nom prenom)?");
+    int annee = ReadValue<int>("Saisissez votre année de naissance");
+    double taille = ReadValue<double>("Saisissez votre taille");
+    double poids = ReadValue<double>("Saisissez votre poids");
+
+    string[] nom_prenom_arr = nom_prenom.Split(' ');
+    string prenom = nom_prenom_arr[1].ToUpper()[0] + nom_prenom_arr[1].Substring(1).ToLower();
+    string nom = nom_prenom_arr[0].ToUpper();
+    int age = (2024 - annee);
+
+    Console.WriteLine($"NOM: {nom}, PRENOM: {prenom}");
+    Console.WriteLine($"{age} ans");
+    Console.WriteLine($"TAILLE: {taille} metre, POIDS: {poids}kg");
+}
\ No newline at end of file

From 3d88f87cad7b063559bd1b13fe7d6a13dc38b2be Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Mon, 20 May 2024 18:27:46 +0200
Subject: [PATCH 093/112] CI: fix clang move path on macos

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 5ce1fc6..a49abae 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -81,7 +81,7 @@ jobs:
           wget -O llvm17.tar.xz https://github.com/llvm/llvm-project/releases/download/llvmorg-17.0.6/clang%2Bllvm-17.0.6-arm64-apple-darwin22.0.tar.xz;
           tar -xf llvm17.tar.xz;
           rm llvm17.tar.xz;
-          sudo mv clang%2Bllvm-17.0.6-arm64-apple-darwin22.0 /usr/local/llvm;
+          sudo mv clang+llvm-17.0.6-arm64-apple-darwin22.0 /usr/local/llvm;
       - name: Install LLVM 17 on Linux
         if: matrix.system.os == 'ubuntu-latest'
         run: >

From 2636b02f5b08773a10869d784152f584cc7af845 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Mon, 20 May 2024 18:28:03 +0200
Subject: [PATCH 094/112] CI: make wget quiet

---
 .github/workflows/build.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index a49abae..343f035 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -78,7 +78,7 @@ jobs:
       - name: Install LLVM 17 on MacOS
         if: matrix.system.name == 'apple-darwin'
         run: >
-          wget -O llvm17.tar.xz https://github.com/llvm/llvm-project/releases/download/llvmorg-17.0.6/clang%2Bllvm-17.0.6-arm64-apple-darwin22.0.tar.xz;
+          wget -q -O llvm17.tar.xz https://github.com/llvm/llvm-project/releases/download/llvmorg-17.0.6/clang%2Bllvm-17.0.6-arm64-apple-darwin22.0.tar.xz;
           tar -xf llvm17.tar.xz;
           rm llvm17.tar.xz;
           sudo mv clang+llvm-17.0.6-arm64-apple-darwin22.0 /usr/local/llvm;
@@ -86,7 +86,7 @@ jobs:
         if: matrix.system.os == 'ubuntu-latest'
         run: >
           sudo apt install wget;
-          wget -O llvm17.tar.xz https://github.com/llvm/llvm-project/releases/download/llvmorg-17.0.6/clang%2Bllvm-17.0.6-x86_64-linux-gnu-ubuntu-22.04.tar.xz;
+          wget -q -O llvm17.tar.xz https://github.com/llvm/llvm-project/releases/download/llvmorg-17.0.6/clang%2Bllvm-17.0.6-x86_64-linux-gnu-ubuntu-22.04.tar.xz;
           tar -xf llvm17.tar.xz;
           rm llvm17.tar.xz;
           sudo mv clang+llvm-17.0.6-x86_64-linux-gnu-ubuntu-22.04 /usr/local/llvm;

From bb9f2391b7e2653c76366c9c44153196b20ae9ed Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Mon, 20 May 2024 18:49:31 +0200
Subject: [PATCH 095/112] change aarch64 linux musl bin gcc/g++

---
 .github/workflows/build.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 343f035..392013b 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -105,6 +105,8 @@ jobs:
         env:
           CC_x86_64-unknown-linux-musl: x86_64-linux-musl-gcc
           CXX_x86_64-unknown-linux-musl: x86_64-linux-musl-g++
+          CC_aarch64-unknown-linux-musl: aarch64-linux-musl-gcc
+          CXX_aarch64-unknown-linux-musl: aarch64-linux-musl-g++
           CC_aarch64-unknown-linux-gnu: aarch64-linux-gnu-gcc
           CXX_aarch64-unknown-linux-gnu: aarch64-linux-gnu-g++
           CFLAGS: ${{env.CUDA_PATH && format('-I {0}/include', env.CUDA_PATH) || '' }}

From a10ae3f8c417bb4d6e8fa19ce3a2292b1d04d139 Mon Sep 17 00:00:00 2001
From: Gly <gabin.lefranc@gmail.com>
Date: Thu, 23 May 2024 01:01:01 +0200
Subject: [PATCH 096/112] install gcc for aarch64 also for musl

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 392013b..e1e0ec7 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -73,7 +73,7 @@ jobs:
           tar -xf ${{ matrix.arch }}-linux-musl-cross.tgz;
           echo "PATH=$PWD/${{ matrix.arch }}-linux-musl-cross/bin:$PATH" >> $GITHUB_ENV;
       - name: Install gcc environnement for aarch64 gnu
-        if: matrix.system.name == 'unknown-linux-gnu' && matrix.arch == 'aarch64'
+        if: matrix.system.os == 'ubuntu-latest' && matrix.arch == 'aarch64'
         run: sudo apt install -y gcc-aarch64-linux-gnu g++-aarch64-linux-gnu binutils-aarch64-linux-gnu
       - name: Install LLVM 17 on MacOS
         if: matrix.system.name == 'apple-darwin'

From df72194a4a324cca9fa2abcb955ed904fe12d059 Mon Sep 17 00:00:00 2001
From: Gly <gabin.lefranc@gmail.com>
Date: Thu, 23 May 2024 01:56:17 +0200
Subject: [PATCH 097/112] CI: add LDFLAGS to cargo build

---
 .github/workflows/build.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index e1e0ec7..43e7169 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -109,6 +109,7 @@ jobs:
           CXX_aarch64-unknown-linux-musl: aarch64-linux-musl-g++
           CC_aarch64-unknown-linux-gnu: aarch64-linux-gnu-gcc
           CXX_aarch64-unknown-linux-gnu: aarch64-linux-gnu-g++
+          LDFLAGS: ${{ matrix.arch == 'aarch64' && '-L /usr/aarch64-linux-gnu/lib' || '' }}
           CFLAGS: ${{env.CUDA_PATH && format('-I {0}/include', env.CUDA_PATH) || '' }}
           CXXFLAGS: ${{env.CUDA_PATH && format('-I {0}/include', env.CUDA_PATH) || '' }}
         with:

From df648c916914ce8157e74f4062ff1bd218748b6f Mon Sep 17 00:00:00 2001
From: Gly <gabin.lefranc@gmail.com>
Date: Thu, 23 May 2024 02:28:24 +0200
Subject: [PATCH 098/112] Test set musl env

---
 .github/workflows/build.yml | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 43e7169..b02cde6 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -72,6 +72,11 @@ jobs:
           wget -nv http://more.musl.cc/x86_64-linux-musl/${{ matrix.arch }}-linux-musl-cross.tgz;
           tar -xf ${{ matrix.arch }}-linux-musl-cross.tgz;
           echo "PATH=$PWD/${{ matrix.arch }}-linux-musl-cross/bin:$PATH" >> $GITHUB_ENV;
+          echo "MUSL_ROOT=$PWD/${{ matrix.arch }}-linux-musl-cross/${{ matrix.arch }}-linux-musl" >> $GITHUB_ENV;
+          echo "LIBRARY_PATH=$PWD/${{ matrix.arch }}-linux-musl-cross/${{ matrix.arch }}-linux-musl/lib" >> $GITHUB_ENV;
+          echo "CPATH=$PWD/${{ matrix.arch }}-linux-musl-cross/${{ matrix.arch }}-linux-musl/include" >> $GITHUB_ENV;
+          echo "CC_${{ matrix.arch }}-unknown-linux-musl=${{ matrix.arch }}-linux-musl-gcc" >> $GITHUB_ENV;
+          echo "CXX_${{ matrix.arch }}-unknown-linux-musl=${{ matrix.arch }}-linux-musl-g++" >> $GITHUB_ENV;
       - name: Install gcc environnement for aarch64 gnu
         if: matrix.system.os == 'ubuntu-latest' && matrix.arch == 'aarch64'
         run: sudo apt install -y gcc-aarch64-linux-gnu g++-aarch64-linux-gnu binutils-aarch64-linux-gnu
@@ -103,13 +108,15 @@ jobs:
       - name: Build
         uses: actions-rs/cargo@v1
         env:
-          CC_x86_64-unknown-linux-musl: x86_64-linux-musl-gcc
-          CXX_x86_64-unknown-linux-musl: x86_64-linux-musl-g++
-          CC_aarch64-unknown-linux-musl: aarch64-linux-musl-gcc
-          CXX_aarch64-unknown-linux-musl: aarch64-linux-musl-g++
           CC_aarch64-unknown-linux-gnu: aarch64-linux-gnu-gcc
           CXX_aarch64-unknown-linux-gnu: aarch64-linux-gnu-g++
-          LDFLAGS: ${{ matrix.arch == 'aarch64' && '-L /usr/aarch64-linux-gnu/lib' || '' }}
+          # CC_x86_64-unknown-linux-musl: x86_64-linux-musl-gcc
+          # CXX_x86_64-unknown-linux-musl: x86_64-linux-musl-g++
+          # CC_aarch64-unknown-linux-musl: aarch64-linux-musl-gcc
+          # CXX_aarch64-unknown-linux-musl: aarch64-linux-musl-g++
+          # LIBRARY_PATH: ${{ env.MUSL_ROOT }}/lib
+          # CPATH: ${{ env.MUSL_ROOT }}/include
+          # PATH: ${{ env.MUSL_ROOT }}/bin:$PATH
           CFLAGS: ${{env.CUDA_PATH && format('-I {0}/include', env.CUDA_PATH) || '' }}
           CXXFLAGS: ${{env.CUDA_PATH && format('-I {0}/include', env.CUDA_PATH) || '' }}
         with:

From 2a1918fe0339e02f713c89264ec6456ff3ef1bcb Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 26 May 2024 17:36:21 +0200
Subject: [PATCH 099/112] use  llama model parameters from config

---
 src/generators/llama/config.rs | 26 +++++++++++++++++++++++++-
 src/generators/llama/mod.rs    |  7 ++-----
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/src/generators/llama/config.rs b/src/generators/llama/config.rs
index e0afcbe..fb4e476 100644
--- a/src/generators/llama/config.rs
+++ b/src/generators/llama/config.rs
@@ -39,8 +39,32 @@ impl Default for ModelParameters {
         }
     }
 }
+impl From<ModelParameters> for llama_cpp::LlamaParams {
+    fn from(x: ModelParameters) -> Self {
+        Self {
+            n_gpu_layers: x.n_gpu_layers,
+            split_mode: x.split_mode.into(),
+            main_gpu: x.main_gpu,
+            vocab_only: x.vocab_only,
+            use_mmap: x.use_mmap,
+            use_mlock: x.use_mlock,
+        }
+    }
+}
+impl From<&ModelParameters> for llama_cpp::LlamaParams {
+    fn from(x: &ModelParameters) -> Self {
+        Self {
+            n_gpu_layers: x.n_gpu_layers,
+            split_mode: x.split_mode.into(),
+            main_gpu: x.main_gpu,
+            vocab_only: x.vocab_only,
+            use_mmap: x.use_mmap,
+            use_mlock: x.use_mlock,
+        }
+    }
+}
 
-#[derive(Debug, Deserialize, Serialize)]
+#[derive(Debug, Deserialize, Serialize, Copy, Clone)]
 #[serde(rename_all = "lowercase")]
 pub enum SplitMode {
     None,
diff --git a/src/generators/llama/mod.rs b/src/generators/llama/mod.rs
index 7165589..3baf274 100644
--- a/src/generators/llama/mod.rs
+++ b/src/generators/llama/mod.rs
@@ -17,14 +17,11 @@ use super::{Error, ResultRun};
 static LOCAL_LLAMA: OnceCell<LlamaModel> = OnceCell::new();
 
 fn init_model(config: &config::Model) -> Result<(), Error> {
-    let model_options = LlamaParams {
-        n_gpu_layers: 20000,
-        ..Default::default()
-    };
     info!("Loading LLaMA model at {}", config.path);
+    debug!("Parameters: {:?}", config.parameters);
     let Ok(llama) = LlamaModel::load_from_file(
         &config.path,
-        model_options,
+        (&config.parameters).into(),
     ) else {
         return Err(Error::Custom("Failed to load LLaMA model".into()))
     };

From a3c91f8a282f4e9341cb525269c4b031ab07804a Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 26 May 2024 18:13:22 +0200
Subject: [PATCH 100/112] CI: better musl path deduction

---
 .github/workflows/build.yml | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index b02cde6..9ca4fd5 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -71,10 +71,9 @@ jobs:
         run:
           wget -nv http://more.musl.cc/x86_64-linux-musl/${{ matrix.arch }}-linux-musl-cross.tgz;
           tar -xf ${{ matrix.arch }}-linux-musl-cross.tgz;
-          echo "PATH=$PWD/${{ matrix.arch }}-linux-musl-cross/bin:$PATH" >> $GITHUB_ENV;
-          echo "MUSL_ROOT=$PWD/${{ matrix.arch }}-linux-musl-cross/${{ matrix.arch }}-linux-musl" >> $GITHUB_ENV;
-          echo "LIBRARY_PATH=$PWD/${{ matrix.arch }}-linux-musl-cross/${{ matrix.arch }}-linux-musl/lib" >> $GITHUB_ENV;
-          echo "CPATH=$PWD/${{ matrix.arch }}-linux-musl-cross/${{ matrix.arch }}-linux-musl/include" >> $GITHUB_ENV;
+          MUSL_PATH="$(readlink -f $PWD/aarch64-linux-musl-cross)";
+          echo "PATH=$MUSL_PATH/bin:$PATH" >> $GITHUB_ENV;
+          echo "MUSL_ROOT=$MUSL_PATH/${{ matrix.arch }}-linux-musl" >> $GITHUB_ENV;
           echo "CC_${{ matrix.arch }}-unknown-linux-musl=${{ matrix.arch }}-linux-musl-gcc" >> $GITHUB_ENV;
           echo "CXX_${{ matrix.arch }}-unknown-linux-musl=${{ matrix.arch }}-linux-musl-g++" >> $GITHUB_ENV;
       - name: Install gcc environnement for aarch64 gnu

From a3ce9fcdaf7675179cf3a502968c1e20c8a59992 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 26 May 2024 18:23:43 +0200
Subject: [PATCH 101/112] fix CI build

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 9ca4fd5..7e17292 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -71,7 +71,7 @@ jobs:
         run:
           wget -nv http://more.musl.cc/x86_64-linux-musl/${{ matrix.arch }}-linux-musl-cross.tgz;
           tar -xf ${{ matrix.arch }}-linux-musl-cross.tgz;
-          MUSL_PATH="$(readlink -f $PWD/aarch64-linux-musl-cross)";
+          MUSL_PATH="$(readlink -f $PWD/${{ matrix.arch }}-linux-musl-cross)";
           echo "PATH=$MUSL_PATH/bin:$PATH" >> $GITHUB_ENV;
           echo "MUSL_ROOT=$MUSL_PATH/${{ matrix.arch }}-linux-musl" >> $GITHUB_ENV;
           echo "CC_${{ matrix.arch }}-unknown-linux-musl=${{ matrix.arch }}-linux-musl-gcc" >> $GITHUB_ENV;

From 1afe32dd80014b43475114b2c839775729e271df Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 26 May 2024 18:23:57 +0200
Subject: [PATCH 102/112] remove rust flags from cargo config

---
 .cargo/config.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.cargo/config.toml b/.cargo/config.toml
index 52141ab..2cc7590 100644
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -1,6 +1,5 @@
 [target.aarch64-unknown-linux-musl]
 linker = "aarch64-linux-musl-gcc"
-rustflags = ["-Clinker=rust-lld"]
 
 [target.x86_64-unknown-linux-musl]
 linker = "x86_64-linux-musl-gcc"

From 560ec35cf1a776fb169d8c485e2a6cdda7abf21b Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Sun, 26 May 2024 18:55:27 +0200
Subject: [PATCH 103/112] apply gpu support

---
 .github/workflows/build.yml | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 7e17292..9081ec9 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -27,9 +27,9 @@ jobs:
           - aarch64
         gpu:
           - null
-          # - clblast
-          # - cuda
-          # - vulkan
+          - clblast
+          - cuda
+          - vulkan
         exclude:
           - system: {name: apple-darwin}
             arch: x86_64
@@ -37,9 +37,11 @@ jobs:
             gpu: cuda
           - gpu: cuda
             arch: aarch64
-        # include: 
-          # - system: apple-darwin
-          #   gpu: metal
+        include: 
+          - system:
+              name: apple-darwin
+              os: macos-latest
+            gpu: metal
     runs-on: ${{ matrix.system.os }}
     steps:
       - uses: actions/checkout@v3

From 8e6ca1c32a1965bfe1887787a50c1924b1cea899 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Mon, 27 May 2024 14:58:01 +0200
Subject: [PATCH 104/112] CI: test add flag

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 9081ec9..e9d4d99 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -119,7 +119,7 @@ jobs:
           # CPATH: ${{ env.MUSL_ROOT }}/include
           # PATH: ${{ env.MUSL_ROOT }}/bin:$PATH
           CFLAGS: ${{env.CUDA_PATH && format('-I {0}/include', env.CUDA_PATH) || '' }}
-          CXXFLAGS: ${{env.CUDA_PATH && format('-I {0}/include', env.CUDA_PATH) || '' }}
+          CXXFLAGS: ${{env.CUDA_PATH && format('-I {0}/include', env.CUDA_PATH) || '' }} ${{ matrix.gpu && matrix.gpu == 'vulkan' && format('{0}VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1', matrix.os == 'windows-latest' && '/D' || '-D') || '' }}
         with:
           toolchain: ${{ matrix.system.toolchain || 'stable' }}
           command: build

From 9eeec2a82a648dc3312d3b83d509fb622d08f887 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Mon, 27 May 2024 14:58:31 +0200
Subject: [PATCH 105/112] CI: add forgotten strategy parameter

---
 .github/workflows/build.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index e9d4d99..88e708b 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -41,6 +41,7 @@ jobs:
           - system:
               name: apple-darwin
               os: macos-latest
+            arch: aarch64
             gpu: metal
     runs-on: ${{ matrix.system.os }}
     steps:

From 885bd69358a8114446fd9b65e556c04d4900a27f Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Tue, 28 May 2024 19:50:15 +0200
Subject: [PATCH 106/112] add installation of vulkan loader

---
 .github/workflows/build.yml | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 88e708b..0aaaf33 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -37,6 +37,8 @@ jobs:
             gpu: cuda
           - gpu: cuda
             arch: aarch64
+          - gpu: vulkan
+            arch: aarch64
         include: 
           - system:
               name: apple-darwin
@@ -62,7 +64,14 @@ jobs:
           sub-packages: '["nvcc"]'
           non-cuda-sub-packages: '["libcublas", "libcublas-dev"]'
           use-local-cache: false
-          use-github-cache: false
+          use-github-cache: true
+      - name: Install Vulkan SDK
+        if: matrix.gpu == 'vulkan'
+        uses: humbletim/setup-vulkan-sdk@v1.2.0
+        with:
+          vulkan-query-version: 1.3.204.0
+          vulkan-components: Vulkan-Loader
+          vulkan-use-cache: true
       - name: Apt update+upgrade
         if: matrix.system.os == 'ubuntu-latest'
         run: sudo apt update && sudo apt upgrade -y
@@ -131,7 +140,7 @@ jobs:
       - name: Upload build artifact
         uses: actions/upload-artifact@v2
         with:
-          name: aio-${{ matrix.arch }}-${{ matrix.system.name }}
+          name: aio-${{ matrix.arch }}-${{ matrix.system.name }}${{ matrix.gpu && format('-{0}', matrix.gpu) || '' }}
           path: target/${{ matrix.arch }}-${{ matrix.system.name }}/release/aio-${{ matrix.arch }}-${{ matrix.system.name }}${{ matrix.gpu && format('-{0}', matrix.gpu) || '' }}${{ matrix.system.ext || '' }}
 
   release:

From d99386fb172002785a1f39c8dbf991758ad5327f Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Tue, 28 May 2024 19:59:13 +0200
Subject: [PATCH 107/112] remove vulkan test defines

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 0aaaf33..c13701c 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -129,7 +129,7 @@ jobs:
           # CPATH: ${{ env.MUSL_ROOT }}/include
           # PATH: ${{ env.MUSL_ROOT }}/bin:$PATH
           CFLAGS: ${{env.CUDA_PATH && format('-I {0}/include', env.CUDA_PATH) || '' }}
-          CXXFLAGS: ${{env.CUDA_PATH && format('-I {0}/include', env.CUDA_PATH) || '' }} ${{ matrix.gpu && matrix.gpu == 'vulkan' && format('{0}VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1', matrix.os == 'windows-latest' && '/D' || '-D') || '' }}
+          CXXFLAGS: ${{env.CUDA_PATH && format('-I {0}/include', env.CUDA_PATH) || '' }}
         with:
           toolchain: ${{ matrix.system.toolchain || 'stable' }}
           command: build

From 529211aa7c2f510b908a428bfd7d1fa588ba4b94 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Tue, 28 May 2024 20:29:45 +0200
Subject: [PATCH 108/112] change CI

---
 .github/workflows/build.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index c13701c..3185ade 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -28,7 +28,7 @@ jobs:
         gpu:
           - null
           - clblast
-          - cuda
+          # - cuda
           - vulkan
         exclude:
           - system: {name: apple-darwin}
@@ -37,8 +37,6 @@ jobs:
             gpu: cuda
           - gpu: cuda
             arch: aarch64
-          - gpu: vulkan
-            arch: aarch64
         include: 
           - system:
               name: apple-darwin

From 8b0f72af22c15f456e1ce417b49574fc451437df Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Wed, 29 May 2024 14:58:05 +0200
Subject: [PATCH 109/112] Disable ci vk build for aarch64 and musl

---
 .github/workflows/build.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 3185ade..997f9b3 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -37,6 +37,10 @@ jobs:
             gpu: cuda
           - gpu: cuda
             arch: aarch64
+          - gpu: vulkan
+            arch: aarch64
+          - system: {name: unknown-linux-musl}
+            gpu: vulkan
         include: 
           - system:
               name: apple-darwin

From 9ab839c8f3ef8255d53b789fed3e9a0de092394d Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Wed, 29 May 2024 14:58:25 +0200
Subject: [PATCH 110/112] add missing flags for macos

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 997f9b3..9fc8e3a 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -131,7 +131,7 @@ jobs:
           # CPATH: ${{ env.MUSL_ROOT }}/include
           # PATH: ${{ env.MUSL_ROOT }}/bin:$PATH
           CFLAGS: ${{env.CUDA_PATH && format('-I {0}/include', env.CUDA_PATH) || '' }}
-          CXXFLAGS: ${{env.CUDA_PATH && format('-I {0}/include', env.CUDA_PATH) || '' }}
+          CXXFLAGS: ${{env.CUDA_PATH && format('-I {0}/include', env.CUDA_PATH) || '' }} ${{ matrix.system.name == 'apple-darwin' && '-std=c++17' || '' }}
         with:
           toolchain: ${{ matrix.system.toolchain || 'stable' }}
           command: build

From e52fce3ed5e98349a891a810a39956a49d833188 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Wed, 29 May 2024 15:01:54 +0200
Subject: [PATCH 111/112] add macos vulkan

---
 .github/workflows/build.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 9fc8e3a..21c9391 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -47,6 +47,11 @@ jobs:
               os: macos-latest
             arch: aarch64
             gpu: metal
+          - system:
+              name: apple-darwin
+              os: macos-latest
+            arch: aarch64
+            gpu: vulkan
     runs-on: ${{ matrix.system.os }}
     steps:
       - uses: actions/checkout@v3

From 7ba1364e614d82505ab94e2a09b4751bba1608f0 Mon Sep 17 00:00:00 2001
From: Gabin Lefranc <gabin.lefranc@gmail.com>
Date: Wed, 29 May 2024 16:35:18 +0200
Subject: [PATCH 112/112] remove unused files which should not been there in
 the beginning

---
 act.json                           |  5 ---
 src/generators/llama/TpInfoPlus.cs | 57 ------------------------------
 2 files changed, 62 deletions(-)
 delete mode 100644 act.json
 delete mode 100644 src/generators/llama/TpInfoPlus.cs

diff --git a/act.json b/act.json
deleted file mode 100644
index 954cea7..0000000
--- a/act.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-    "pull_request": {
-      "draft": true
-    }
-  }
\ No newline at end of file
diff --git a/src/generators/llama/TpInfoPlus.cs b/src/generators/llama/TpInfoPlus.cs
deleted file mode 100644
index eeff360..0000000
--- a/src/generators/llama/TpInfoPlus.cs
+++ /dev/null
@@ -1,57 +0,0 @@
-static void TpInfoPlus()
-{
-    Console.Write("Comment vous appelez vous (nom prenom)?\n> ");
-    string nom_prenom = Console.ReadLine();
-
-    Console.Write("Saisissez votre année de naissance\n> ");
-    int annee = Convert.ToInt32(Console.ReadLine());
-
-    Console.Write("Saisissez votre taille\n> ");
-    double taille = Convert.ToDouble(Console.ReadLine());
-    Console.Write("Saisissez votre poids\n> ");
-    double poids = Convert.ToDouble(Console.ReadLine());
-
-
-    string[] nom_prenom_arr = nom_prenom.Split(' ');
-    string prenom = nom_prenom_arr[1].ToUpper()[0] + nom_prenom_arr[1].Substring(1).ToLower();
-    string nom = nom_prenom_arr[0].ToUpper();
-    int age = (2024 - annee);
-
-    Console.WriteLine($"NOM: {nom}, PRENOM: {prenom}");
-    Console.WriteLine($"{age} ans");
-    Console.WriteLine($"TAILLE: {taille} metre, POIDS: {poids}kg");
-}
-
-// VERSION PLUS EVOLUE
-
-static T ReadValue<T>(string txt)
-{
-    do
-    {
-        try
-        {
-            Console.Write($"{txt}\n> ");
-            return (T)Convert.ChangeType(Console.ReadLine(), typeof(T));
-        }
-        catch (FormatException e)
-        {
-            Console.WriteLine($"Mauvaise valeur entrée: {e}");
-        }
-    } while (true);
-}
-static void TpInfoPlus1()
-{
-    string nom_prenom = ReadValue<string>("Comment vous appelez vous (nom prenom)?");
-    int annee = ReadValue<int>("Saisissez votre année de naissance");
-    double taille = ReadValue<double>("Saisissez votre taille");
-    double poids = ReadValue<double>("Saisissez votre poids");
-
-    string[] nom_prenom_arr = nom_prenom.Split(' ');
-    string prenom = nom_prenom_arr[1].ToUpper()[0] + nom_prenom_arr[1].Substring(1).ToLower();
-    string nom = nom_prenom_arr[0].ToUpper();
-    int age = (2024 - annee);
-
-    Console.WriteLine($"NOM: {nom}, PRENOM: {prenom}");
-    Console.WriteLine($"{age} ans");
-    Console.WriteLine($"TAILLE: {taille} metre, POIDS: {poids}kg");
-}
\ No newline at end of file