first attempt to fix #1, still failling need more digging

mtb0x1 · Aug 28, 2023 · 92bdbf3 · 92bdbf3
1 parent 11ff90a
commit 92bdbf3
Show file tree

Hide file tree

Showing 14 changed files with 168 additions and 69 deletions.
diff --git a/.cargo/config.toml → port1/.cargo/config.toml b/.cargo/config.toml → port1/.cargo/config.toml
@@ -2,4 +2,7 @@
 target = "wasm32-unknown-unknown"
 
 [target.wasm32-unknown-unknown]
-rustflags = ["-C","opt-level=3","-C", "target-feature=+relaxed-simd,+simd128"]
+rustflags = ["-C","opt-level=3","-C", "target-feature=+relaxed-simd,+simd128,+atomics,+bulk-memory,+mutable-globals"]
+
+[unstable]
+build-std = ["panic_abort", "std"]
diff --git a/port1/Cargo.lock b/port1/Cargo.lock
diff --git a/port1/Cargo.toml b/port1/Cargo.toml
@@ -7,7 +7,8 @@ edition = "2021"
 
 [dependencies]
 rand = { version = "0.8.5", features = ["small_rng"] }
-rayon-wasm = "1.6.2" #no point if we don't higher throughput
+rayon = "1.7.0"
+wasm-bindgen-rayon = "1.0"
 
 wasm-bindgen = "0.2"
 web-sys = {version="0.3.64",features=["Window","Performance","Navigator"]}

diff --git a/port1/rust-toolchain.toml b/port1/rust-toolchain.toml
@@ -0,0 +1,3 @@
+[toolchain]
+channel = "nightly"
+profile = "minimal"
diff --git a/port1/src/lib.rs b/port1/src/lib.rs
@@ -1,17 +1,14 @@
 use std::io::{Read, Seek, SeekFrom};
 use std::mem;
-use wasm_bindgen::prelude::*;
 
 use rand::rngs::SmallRng;
 use rand::{Rng, SeedableRng};
+use wasm_bindgen::prelude::wasm_bindgen;
 use std::io::{self, Write};
 
-use rayon_wasm::current_num_threads;
-//use rayon_wasm::ThreadPoolBuilder;
-use rayon_wasm::iter::IndexedParallelIterator;
-use rayon_wasm::iter::IntoParallelIterator;
-use rayon_wasm::iter::IntoParallelRefMutIterator;
-use rayon_wasm::iter::ParallelIterator;
+use rayon::current_num_threads;
+use rayon::prelude::*;
+
 
 const CONF_VALS: usize = 7;
 const CONF_SIZE: usize = std::mem::size_of::<[i32; CONF_VALS]>();
@@ -531,6 +528,10 @@ impl RunState {
     }
 }
 
+// need to be called before main_wasm
+// takes num_threads:usize, returns promise
+pub use wasm_bindgen_rayon::init_thread_pool;
+
 #[wasm_bindgen]
 pub fn main_wasm(
     model_buffer: Vec<u8>,     //model_path
@@ -547,16 +548,9 @@ pub fn main_wasm(
         .navigator()
         .hardware_concurrency() as usize;
     log::info!("--> [available 'CPUs' = {}]\n\n", cpus);
-
-    //@todo ...
-    //value: ThreadPoolBuildError
-    //{ kind: IOError(Error { kind: Unsupported, message: "operation not supported on this platform" }) }
-
-    /*let cpus_in_use = (cpus as f64*0.75) as usize;
-    ThreadPoolBuilder::new()
-    .num_threads(cpus_in_use)
-    .build_global()
-    .unwrap();*/
+
+    // we should be able to use 75% if hardware_concurrency is available,
+    // check init_thread_pool above
 
     let active_cpus = current_num_threads();
     log::info!("--> [Running Inference on {} 'CPUs']\n\n", active_cpus);

diff --git a/port1/www/index.html b/port1/www/index.html
@@ -18,11 +18,11 @@
     </style>
     <head>
         <!-- preload loads 438m+167m+60m+size_of(port1_bg.wasm) ... after few refresh browser memory goes up fast -->
-        <link rel="preload" href="/pkg/port1_bg.wasm" as="fetch" type="application/wasm" crossorigin="" />
-        <link rel="preload" href="/stories42M.bin" as="fetch" type="application/binary" crossorigin="" />
-        <link rel="preload" href="/stories15M.bin" as="fetch" type="application/binary" crossorigin="" />
-        <link rel="preload" href="/stories110M.bin" as="fetch" type="application/binary" crossorigin="" />
-        <link rel="module" href="/pkg/port1_bg.js" as="fetch" type="application/javascript" crossorigin="" /> 
+        <link rel="preload" href="/pkg/port1_bg.wasm" as="fetch" type="application/wasm"/>
+        <link rel="preload" href="/stories42M.bin" as="fetch" type="application/binary"/>
+        <link rel="preload" href="/stories15M.bin" as="fetch" type="application/binary"/>
+        <link rel="preload" href="/stories110M.bin" as="fetch" type="application/binary"/>
+        <link rel="module" href="/pkg/port1_bg.js" as="fetch" type="application/javascript"/> 
         <script>
             var backup_info = window.console.info;
             function new_info(msg,...args){
@@ -59,10 +59,19 @@
         // will "boot" the module and make it ready to use. Currently browsers
         // don't support natively imported WebAssembly as an ES module, but
         // eventually the manual initialization won't be required!
-        import init, { main_wasm } from './pkg/port1.js';
+        import init, { main_wasm, initThreadPool  } from './pkg/port1.js';
 
         async function run() {
             await init();
+            if (navigator.hardwareConcurrency >0){
+                const cpus_in_use = Math.ceil(navigator.hardwareConcurrency*0.75);
+                //requires https://stackoverflow.com/questions/72881660/web-worker-blocked-by-self-crossoriginisolated-on-cypress
+                //Cross-Origin-Opener-Policy="same-origin"
+                //Cross-Origin-Resource-Policy="same-site"
+                //Cross-Origin-Embedder-Policy="require-corp"
+                //Fixme(https://github.com/mtb0x1/llama2.rs.wasm/issues/1)
+                //await initThreadPool(cpus_in_use);
+            }
             document.getElementById("output").value ="";
             let model = document.getElementById("model").value+".bin";
             const model_buffer =await fetch(model)

diff --git a/port1/www/www_server.py b/port1/www/www_server.py
@@ -0,0 +1,17 @@
+from http.server import HTTPServer, SimpleHTTPRequestHandler
+
+class CORSRequestHandler(SimpleHTTPRequestHandler):
+    def end_headers(self):
+        self.send_header("Cross-Origin-Opener-Policy", "same-origin")
+        #self.send_header("Cross-Origin-Resource-Policy", "same-origin")
+        self.send_header("Cross-Origin-Embedder-Policy", "require-corp")
+        super().end_headers()
+
+def run(server_class=HTTPServer, handler_class=CORSRequestHandler, port=8080):
+    server_address = ("localhost", port)
+    httpd = server_class(server_address, handler_class)
+    print(f"Starting server on port {port}...")
+    httpd.serve_forever()
+
+if __name__ == "__main__":
+    run()
diff --git a/port2/.cargo/config.toml b/port2/.cargo/config.toml
@@ -0,0 +1,8 @@
+[build]
+target = "wasm32-unknown-unknown"
+
+[target.wasm32-unknown-unknown]
+rustflags = ["-C","opt-level=3","-C", "target-feature=+relaxed-simd,+simd128,+atomics,+bulk-memory,+mutable-globals"]
+
+[unstable]
+build-std = ["panic_abort", "std"]
diff --git a/port2/Cargo.lock b/port2/Cargo.lock
diff --git a/port2/Cargo.toml b/port2/Cargo.toml
@@ -6,7 +6,8 @@ edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-rayon-wasm = "1.6.2" #no point if we don't higher throughput
+rayon= "1.7.0"
+wasm-bindgen-rayon = "1.0"
 
 wasm-bindgen = "0.2"
 web-sys = {version="0.3.64",features=["Window","Performance","Navigator"]}

diff --git a/port2/rust-toolchain.toml b/port2/rust-toolchain.toml
@@ -0,0 +1,3 @@
+[toolchain]
+channel = "nightly"
+profile = "minimal"
diff --git a/port2/src/lib.rs b/port2/src/lib.rs
@@ -1,10 +1,8 @@
-use rayon_wasm::current_num_threads;
-use rayon_wasm::iter::IndexedParallelIterator;
-use rayon_wasm::iter::IntoParallelRefIterator;
-use rayon_wasm::iter::IntoParallelRefMutIterator;
-use rayon_wasm::iter::ParallelIterator;
+use rayon::current_num_threads;
+use rayon::prelude::*;
+
 use std::io::{stdout, BufReader, Read, Write};
-use wasm_bindgen::prelude::*;
+use wasm_bindgen::prelude::wasm_bindgen;
 
 // ---------------------------------------------------------------------------
 // RNG (Permuted Congruential Generator)
@@ -475,6 +473,10 @@ fn read_vec<T: FromBytes>(rdr: &mut BufReader<std::io::Cursor<Vec<u8>>>, size: i
     (0..size).map(|_| read::<T>(rdr)).collect()
 }
 
+// need to be called before main_wasm
+// takes num_threads:usize, returns promise
+pub use wasm_bindgen_rayon::init_thread_pool;
+
 #[wasm_bindgen]
 pub fn main_wasm(
     model_buffer: Vec<u8>,     //model_path
@@ -493,16 +495,9 @@ pub fn main_wasm(
         .navigator()
         .hardware_concurrency() as usize;
     log::info!("--> [available 'CPUs' = {}]\n\n", cpus);
-
-    //@todo ...
-    //value: ThreadPoolBuildError
-    //{ kind: IOError(Error { kind: Unsupported, message: "operation not supported on this platform" }) }
-
-    /*let cpus_in_use = (cpus as f64*0.75) as usize;
-    ThreadPoolBuilder::new()
-    .num_threads(cpus_in_use)
-    .build_global()
-    .unwrap();*/
+
+    // we should be able to use 75% if hardware_concurrency is available,
+    // check init_thread_pool above
 
     let active_cpus = current_num_threads();
     log::info!("--> [Running Inference on {} 'CPUs']\n\n", active_cpus);

diff --git a/port2/www/index.html b/port2/www/index.html
@@ -59,10 +59,19 @@
         // will "boot" the module and make it ready to use. Currently browsers
         // don't support natively imported WebAssembly as an ES module, but
         // eventually the manual initialization won't be required!
-        import init, { main_wasm } from './pkg/port2.js';
+        import init, { main_wasm, initThreadPool } from './pkg/port2.js';
 
         async function run() {
             await init();
+            if (navigator.hardwareConcurrency >0){
+                const cpus_in_use = Math.ceil(navigator.hardwareConcurrency*0.75);
+                //requires https://stackoverflow.com/questions/72881660/web-worker-blocked-by-self-crossoriginisolated-on-cypress
+                //Cross-Origin-Opener-Policy="same-origin"
+                //Cross-Origin-Resource-Policy="same-site"
+                //Cross-Origin-Embedder-Policy="require-corp"
+                //Fixme(https://github.com/mtb0x1/llama2.rs.wasm/issues/1)
+                //await initThreadPool(cpus_in_use);
+            }
             document.getElementById("output").value ="";
             let model = document.getElementById("model").value+".bin";
             const model_buffer =await fetch(model)
@@ -80,6 +89,7 @@
                 return new Uint8Array(buffer);
             });
             console.info("JS: tokenizer_buffer fetched");
+            console.log(tokenizer_buffer);
 
             let temperature  = document.getElementById("temperature").value;
             let steps = document.getElementById("steps").value;