Skip to content

Commit

Permalink
refactor: Improve string handling and add tests for jvozba and jvokaha
Browse files Browse the repository at this point in the history
  • Loading branch information
lagleki committed Feb 24, 2025
1 parent 62ba49e commit a1fb6b2
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 39 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "vlazba"
version = "0.7.10"
version = "0.7.11"
edition = "2021"
authors = ["lagleki <[email protected]>"]
description = "Lojban words generator and analyzer"
Expand Down
127 changes: 96 additions & 31 deletions src/jvozba/jvokaha.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,51 +50,70 @@ fn jvokaha2(lujvo: &str) -> Result<Vec<String>, Box<dyn Error>> {
// Remove hyphen
if !res.is_empty()
&& res.last().unwrap().len() != 1
&& (lujvo.starts_with('y')
|| lujvo.starts_with("nr")
|| (lujvo.starts_with('r') && get_cv_info(&lujvo[1..2]) == "C"))
{
res.push(lujvo[0..1].to_string());
lujvo = lujvo[1..].to_string();
continue;
let first_char = lujvo.chars().next().ok_or_else(|| LujvoError {
message: "Unexpected end of input".to_string(),
})?;

let second_char = lujvo.chars().nth(1);

if first_char == 'y'
|| (first_char == 'n' && second_char == Some('r'))
|| (first_char == 'r' && second_char.is_some_and(|c| get_cv_info(&c.to_string()) == "C"))
{
res.push(first_char.to_string());
lujvo = lujvo.chars().skip(1).collect();
continue;
}
}

// Drop rafsi from front
if lujvo.len() >= 3
&& (get_cv_info(&lujvo[0..3]) == "CVV"
&& ["ai", "ei", "oi", "au"].contains(&&lujvo[1..3]))
{
res.push(lujvo[0..3].to_string());
lujvo = lujvo[3..].to_string();
continue;
if lujvo.chars().count() >= 3 {
let first_three: String = lujvo.chars().take(3).collect();
if get_cv_info(&first_three) == "CVV" {
let middle_two: String = lujvo.chars().skip(1).take(2).collect();
if ["ai", "ei", "oi", "au"].contains(&middle_two.as_str()) {
res.push(first_three);
lujvo = lujvo.chars().skip(3).collect();
continue;
}
}
}

if lujvo.len() >= 4 && get_cv_info(&lujvo[0..4]) == "CV'V" {
res.push(lujvo[0..4].to_string());
lujvo = lujvo[4..].to_string();
continue;
if lujvo.chars().count() >= 4 {
let first_four: String = lujvo.chars().take(4).collect();
if get_cv_info(&first_four) == "CV'V" {
res.push(first_four);
lujvo = lujvo.chars().skip(4).collect();
continue;
}
}

if lujvo.len() >= 5
&& (get_cv_info(&lujvo[0..5]) == "CVCCY" || get_cv_info(&lujvo[0..5]) == "CCVCY")
{
res.push(lujvo[0..4].to_string());
res.push("y".to_string());
lujvo = lujvo[5..].to_string();
continue;
if lujvo.chars().count() >= 5 {
let first_five: String = lujvo.chars().take(5).collect();
if get_cv_info(&first_five) == "CVCCY" || get_cv_info(&first_five) == "CCVCY" {
let first_four: String = lujvo.chars().take(4).collect();
res.push(first_four);
res.push("y".to_string());
lujvo = lujvo.chars().skip(5).collect();
continue;
}
}

if get_cv_info(&lujvo) == "CVCCV" || get_cv_info(&lujvo) == "CCVCV" {
let cv_info = get_cv_info(&lujvo);
if cv_info == "CVCCV" || cv_info == "CCVCV" {
res.push(lujvo);
return Ok(res);
}

if lujvo.len() >= 3
&& (get_cv_info(&lujvo[0..3]) == "CVC" || get_cv_info(&lujvo[0..3]) == "CCV")
{
res.push(lujvo[0..3].to_string());
lujvo = lujvo[3..].to_string();
continue;
if lujvo.chars().count() >= 3 {
let first_three: String = lujvo.chars().take(3).collect();
let cv_info = get_cv_info(&first_three);
if cv_info == "CVC" || cv_info == "CCV" {
res.push(first_three);
lujvo = lujvo.chars().skip(3).collect();
continue;
}
}

return Err(Box::new(LujvoError {
Expand Down Expand Up @@ -124,6 +143,16 @@ mod tests {
assert_eq!(jvokaha("ca'irgau").unwrap(), vec!["ca'i", "r", "gau"]);
}

#[test]
fn test_valid_lujvo_with_y_hyphen() {
assert_eq!(jvokaha("klamyseltru").unwrap(), vec!["klam", "y", "sel", "tru"]);
}

#[test]
fn test_valid_lujvo_with_nr_hyphen() {
assert!(jvokaha("toinrbroda").is_err());
}

#[test]
fn test_invalid_klasr() {
assert!(jvokaha("klasr").is_err());
Expand All @@ -133,4 +162,40 @@ mod tests {
fn test_invalid_empty() {
assert!(jvokaha("").is_err());
}

#[test]
fn test_invalid_cyrillic() {
assert!(jvokaha("щя").is_err());
}

#[test]
fn test_invalid_multibyte() {
// Test with a multibyte character sequence
assert!(jvokaha("café").is_err());
// Test with a Japanese character
assert!(jvokaha("日本語").is_err());
// Test with emoji
assert!(jvokaha("😀").is_err());
}

#[test]
fn test_invalid_short_lujvo() {
assert!(jvokaha("la").is_err());
}

#[test]
fn test_invalid_rafsi_sequence() {
assert!(jvokaha("klamrseltru").is_err());
}

#[test]
fn test_jvokaha2_valid() {
let result = jvokaha2("bramlatu").unwrap();
assert_eq!(result, vec!["bra", "mlatu"]);
}

#[test]
fn test_jvokaha2_invalid() {
assert!(jvokaha2("invalid").is_err());
}
}
53 changes: 48 additions & 5 deletions src/jvozba/jvozbanarge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ fn is_forbidden(d: &LujvoAndScore, forbid_la_lai_doi: bool) -> bool {

#[inline]
fn is_cmevla(valsi: &str) -> bool {
valsi.chars().last().is_some_and(|c| !"aeiouy'".contains(c))
valsi.chars().last().is_some_and(is_c)
}

pub fn normalize(rafsi_list: &[String]) -> Result<Vec<String>, Box<dyn std::error::Error>> {
Expand Down Expand Up @@ -212,18 +212,61 @@ mod tests {
let input = vec!["klama".to_string(), "gasnu".to_string()];
let result = jvozba(&input, false, false);

assert!(
!result.is_empty(),
"jvozba should return at least one result"
);
assert!(!result.is_empty(), "jvozba should return at least one result");
assert_eq!(result[0].lujvo, "klagau", "First result should be 'klagau'");
}

#[test]
fn test_jvozba_single_word() {
let input = vec!["klama".to_string()];
let result = jvozba(&input, false, false);
assert!(result.is_empty(), "Single word should return empty result");
}

#[test]
fn test_jvozba_empty_input() {
let input: Vec<String> = vec![];
let result = jvozba(&input, false, false);
assert!(result.is_empty(), "Empty input should return empty result");
}

#[test]
fn test_jvozba_experimental_rafsi() {
let input = vec!["klama".to_string(), "gasnu".to_string()];
let result = jvozba(&input, false, true);
assert!(!result.is_empty(), "Should include experimental rafsi");
}

#[test]
fn test_is_tosmabru() {
// Test a valid tosmabru case
let rafsi = "tos";
let rest = vec!["mabru".to_string()];
assert!(is_tosmabru(rafsi, &rest), "'tosmabru' should be a valid tosmabru");

// Test invalid case
let rafsi = "bad";
let rest = vec!["example".to_string()];
assert!(!is_tosmabru(rafsi, &rest), "Invalid tosmabru case should return false");
}

#[test]
fn test_normalize() {
let input = vec!["slak".to_string(), "gau".to_string()];
let result = normalize(&input).unwrap();
assert_eq!(result, vec!["slak", "y", "gau"], "Normalization should insert y-hyphen");
}

#[test]
fn test_normalize_error() {
let input = vec!["klama".to_string()];
let result = normalize(&input);
assert!(result.is_err(), "Normalizing single word should error");
}

#[test]
fn test_is_cmevla() {
assert!(is_cmevla("klaman"), "Should recognize cmevla");
assert!(!is_cmevla("klama"), "Should recognize non-cmevla");
}
}
2 changes: 1 addition & 1 deletion src/jvozba/tools.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ pub fn get_candid(selrafsi: &str, is_last: bool, exp_rafsi: bool) -> Vec<String>
candid.push(gismu.to_string());
}

let chopped = gismu[..gismu.len() - 1].to_string();
let chopped = gismu.chars().take(gismu.chars().count() - 1).collect::<String>();
if chopped != "brod" {
candid.push(chopped);
}
Expand Down

0 comments on commit a1fb6b2

Please sign in to comment.