refactor: Improve string handling and add tests for jvozba and jvokaha

La-Lojban · Feb 24, 2025 · a1fb6b2 · a1fb6b2
1 parent 62ba49e
commit a1fb6b2
Show file tree

Hide file tree

Showing 5 changed files with 147 additions and 39 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "vlazba"
-version = "0.7.10"
+version = "0.7.11"
 edition = "2021"
 authors = ["lagleki <[email protected]>"]
 description = "Lojban words generator and analyzer"

diff --git a/src/jvozba/jvokaha.rs b/src/jvozba/jvokaha.rs
@@ -50,51 +50,70 @@ fn jvokaha2(lujvo: &str) -> Result<Vec<String>, Box<dyn Error>> {
         // Remove hyphen
         if !res.is_empty()
             && res.last().unwrap().len() != 1
-            && (lujvo.starts_with('y')
-                || lujvo.starts_with("nr")
-                || (lujvo.starts_with('r') && get_cv_info(&lujvo[1..2]) == "C"))
         {
-            res.push(lujvo[0..1].to_string());
-            lujvo = lujvo[1..].to_string();
-            continue;
+            let first_char = lujvo.chars().next().ok_or_else(|| LujvoError {
+                message: "Unexpected end of input".to_string(),
+            })?;
+
+            let second_char = lujvo.chars().nth(1);
+
+            if first_char == 'y'
+                || (first_char == 'n' && second_char == Some('r'))
+                || (first_char == 'r' && second_char.is_some_and(|c| get_cv_info(&c.to_string()) == "C"))
+            {
+                res.push(first_char.to_string());
+                lujvo = lujvo.chars().skip(1).collect();
+                continue;
+            }
         }
 
         // Drop rafsi from front
-        if lujvo.len() >= 3
-            && (get_cv_info(&lujvo[0..3]) == "CVV"
-                && ["ai", "ei", "oi", "au"].contains(&&lujvo[1..3]))
-        {
-            res.push(lujvo[0..3].to_string());
-            lujvo = lujvo[3..].to_string();
-            continue;
+        if lujvo.chars().count() >= 3 {
+            let first_three: String = lujvo.chars().take(3).collect();
+            if get_cv_info(&first_three) == "CVV" {
+                let middle_two: String = lujvo.chars().skip(1).take(2).collect();
+                if ["ai", "ei", "oi", "au"].contains(&middle_two.as_str()) {
+                    res.push(first_three);
+                    lujvo = lujvo.chars().skip(3).collect();
+                    continue;
+                }
+            }
         }
 
-        if lujvo.len() >= 4 && get_cv_info(&lujvo[0..4]) == "CV'V" {
-            res.push(lujvo[0..4].to_string());
-            lujvo = lujvo[4..].to_string();
-            continue;
+        if lujvo.chars().count() >= 4 {
+            let first_four: String = lujvo.chars().take(4).collect();
+            if get_cv_info(&first_four) == "CV'V" {
+                res.push(first_four);
+                lujvo = lujvo.chars().skip(4).collect();
+                continue;
+            }
         }
 
-        if lujvo.len() >= 5
-            && (get_cv_info(&lujvo[0..5]) == "CVCCY" || get_cv_info(&lujvo[0..5]) == "CCVCY")
-        {
-            res.push(lujvo[0..4].to_string());
-            res.push("y".to_string());
-            lujvo = lujvo[5..].to_string();
-            continue;
+        if lujvo.chars().count() >= 5 {
+            let first_five: String = lujvo.chars().take(5).collect();
+            if get_cv_info(&first_five) == "CVCCY" || get_cv_info(&first_five) == "CCVCY" {
+                let first_four: String = lujvo.chars().take(4).collect();
+                res.push(first_four);
+                res.push("y".to_string());
+                lujvo = lujvo.chars().skip(5).collect();
+                continue;
+            }
         }
 
-        if get_cv_info(&lujvo) == "CVCCV" || get_cv_info(&lujvo) == "CCVCV" {
+        let cv_info = get_cv_info(&lujvo);
+        if cv_info == "CVCCV" || cv_info == "CCVCV" {
             res.push(lujvo);
             return Ok(res);
         }
 
-        if lujvo.len() >= 3
-            && (get_cv_info(&lujvo[0..3]) == "CVC" || get_cv_info(&lujvo[0..3]) == "CCV")
-        {
-            res.push(lujvo[0..3].to_string());
-            lujvo = lujvo[3..].to_string();
-            continue;
+        if lujvo.chars().count() >= 3 {
+            let first_three: String = lujvo.chars().take(3).collect();
+            let cv_info = get_cv_info(&first_three);
+            if cv_info == "CVC" || cv_info == "CCV" {
+                res.push(first_three);
+                lujvo = lujvo.chars().skip(3).collect();
+                continue;
+            }
         }
 
         return Err(Box::new(LujvoError {
@@ -124,6 +143,16 @@ mod tests {
         assert_eq!(jvokaha("ca'irgau").unwrap(), vec!["ca'i", "r", "gau"]);
     }
 
+    #[test]
+    fn test_valid_lujvo_with_y_hyphen() {
+        assert_eq!(jvokaha("klamyseltru").unwrap(), vec!["klam", "y", "sel", "tru"]);
+    }
+
+    #[test]
+    fn test_valid_lujvo_with_nr_hyphen() {
+        assert!(jvokaha("toinrbroda").is_err());
+    }
+
     #[test]
     fn test_invalid_klasr() {
         assert!(jvokaha("klasr").is_err());
@@ -133,4 +162,40 @@ mod tests {
     fn test_invalid_empty() {
         assert!(jvokaha("").is_err());
     }
+
+    #[test]
+    fn test_invalid_cyrillic() {
+        assert!(jvokaha("щя").is_err());
+    }
+
+    #[test]
+    fn test_invalid_multibyte() {
+        // Test with a multibyte character sequence
+        assert!(jvokaha("café").is_err());
+        // Test with a Japanese character
+        assert!(jvokaha("日本語").is_err());
+        // Test with emoji
+        assert!(jvokaha("😀").is_err());
+    }
+
+    #[test]
+    fn test_invalid_short_lujvo() {
+        assert!(jvokaha("la").is_err());
+    }
+
+    #[test]
+    fn test_invalid_rafsi_sequence() {
+        assert!(jvokaha("klamrseltru").is_err());
+    }
+
+    #[test]
+    fn test_jvokaha2_valid() {
+        let result = jvokaha2("bramlatu").unwrap();
+        assert_eq!(result, vec!["bra", "mlatu"]);
+    }
+
+    #[test]
+    fn test_jvokaha2_invalid() {
+        assert!(jvokaha2("invalid").is_err());
+    }
 }
diff --git a/src/jvozba/jvozbanarge.rs b/src/jvozba/jvozbanarge.rs
@@ -85,7 +85,7 @@ fn is_forbidden(d: &LujvoAndScore, forbid_la_lai_doi: bool) -> bool {
 
 #[inline]
 fn is_cmevla(valsi: &str) -> bool {
-    valsi.chars().last().is_some_and(|c| !"aeiouy'".contains(c))
+    valsi.chars().last().is_some_and(is_c)
 }
 
 pub fn normalize(rafsi_list: &[String]) -> Result<Vec<String>, Box<dyn std::error::Error>> {
@@ -212,18 +212,61 @@ mod tests {
         let input = vec!["klama".to_string(), "gasnu".to_string()];
         let result = jvozba(&input, false, false);
 
-        assert!(
-            !result.is_empty(),
-            "jvozba should return at least one result"
-        );
+        assert!(!result.is_empty(), "jvozba should return at least one result");
         assert_eq!(result[0].lujvo, "klagau", "First result should be 'klagau'");
     }
 
+    #[test]
+    fn test_jvozba_single_word() {
+        let input = vec!["klama".to_string()];
+        let result = jvozba(&input, false, false);
+        assert!(result.is_empty(), "Single word should return empty result");
+    }
+
+    #[test]
+    fn test_jvozba_empty_input() {
+        let input: Vec<String> = vec![];
+        let result = jvozba(&input, false, false);
+        assert!(result.is_empty(), "Empty input should return empty result");
+    }
+
+    #[test]
+    fn test_jvozba_experimental_rafsi() {
+        let input = vec!["klama".to_string(), "gasnu".to_string()];
+        let result = jvozba(&input, false, true);
+        assert!(!result.is_empty(), "Should include experimental rafsi");
+    }
+
     #[test]
     fn test_is_tosmabru() {
         // Test a valid tosmabru case
         let rafsi = "tos";
         let rest = vec!["mabru".to_string()];
         assert!(is_tosmabru(rafsi, &rest), "'tosmabru' should be a valid tosmabru");
+
+        // Test invalid case
+        let rafsi = "bad";
+        let rest = vec!["example".to_string()];
+        assert!(!is_tosmabru(rafsi, &rest), "Invalid tosmabru case should return false");
+    }
+
+    #[test]
+    fn test_normalize() {
+        let input = vec!["slak".to_string(), "gau".to_string()];
+        let result = normalize(&input).unwrap();
+        assert_eq!(result, vec!["slak", "y", "gau"], "Normalization should insert y-hyphen");
+    }
+
+    #[test]
+    fn test_normalize_error() {
+        let input = vec!["klama".to_string()];
+        let result = normalize(&input);
+        assert!(result.is_err(), "Normalizing single word should error");
+    }
+
+    #[test]
+    fn test_is_cmevla() {
+        assert!(is_cmevla("klaman"), "Should recognize cmevla");
+        assert!(!is_cmevla("klama"), "Should recognize non-cmevla");
     }
 }
diff --git a/src/jvozba/tools.rs b/src/jvozba/tools.rs
@@ -71,7 +71,7 @@ pub fn get_candid(selrafsi: &str, is_last: bool, exp_rafsi: bool) -> Vec<String>
             candid.push(gismu.to_string());
         }
 
-        let chopped = gismu[..gismu.len() - 1].to_string();
+        let chopped = gismu.chars().take(gismu.chars().count() - 1).collect::<String>();
         if chopped != "brod" {
             candid.push(chopped);
         }