Skip to content

Commit

Permalink
fix: Rework internals to make localparts non-optional
Browse files Browse the repository at this point in the history
Since localparts from ldap syncs would end up empty, this resulted in
lots of spurious logging about user updates that weren't actually
happening.

This gets rid of that by mapping types better to what's actually
happening.
  • Loading branch information
tlater-famedly committed Feb 13, 2025
1 parent 8379073 commit 7ddbd2a
Show file tree
Hide file tree
Showing 7 changed files with 418 additions and 162 deletions.
138 changes: 102 additions & 36 deletions src/bin/migrate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,23 +111,54 @@ fn detect_database_encoding(users: Vec<SyncUser>) -> ExternalIdEncoding {

#[cfg(test)]
mod tests {
use base64::prelude::*;
use famedly_sync::user;

use super::*;
use crate::{ExternalIdEncoding, SyncUser};

fn create_test_user(external_user_id: &str) -> SyncUser {
enum UserId {
Hex(String),
Base64(String),
Plain(String),
}

impl UserId {
fn to_owned(&self) -> String {
match self {
UserId::Hex(id) => id.to_owned(),
UserId::Base64(id) => id.to_owned(),
UserId::Plain(id) => id.to_owned(),
}
}

fn get_localpart(&self) -> String {
match self {
Self::Hex(id) => user::compute_famedly_uuid(
&hex::decode(id).expect("Must be valid hex-encoded string"),
),
UserId::Base64(id) => user::compute_famedly_uuid(
&BASE64_STANDARD.decode(id).expect("Must be valid base64-encoded string"),
),
UserId::Plain(id) => user::compute_famedly_uuid(id.as_bytes()),
}
}
}

fn create_test_user(external_user_id: UserId) -> SyncUser {
SyncUser::new(
"first name".to_owned(),
"last name".to_owned(),
"[email protected]".to_owned(),
None,
true,
None,
"Example User".to_owned(),
external_user_id.to_owned(),
None,
external_user_id.get_localpart(),
)
}

fn run_detection_test(user_ids: Vec<&str>, expected_encoding: ExternalIdEncoding) {
fn run_detection_test(user_ids: Vec<UserId>, expected_encoding: ExternalIdEncoding) {
let users: Vec<SyncUser> = user_ids
.into_iter()
.map(create_test_user) // Assuming SyncUser::new(&str) exists
Expand All @@ -142,7 +173,7 @@ mod tests {
}

fn run_conversion_test(
original_id: &str,
original_id: UserId,
expected_encoding: ExternalIdEncoding,
expected_result: &str,
) {
Expand All @@ -160,7 +191,11 @@ mod tests {
#[tokio::test]
async fn test_all_hex() {
// All users look like hex: "deadbeef", "cafebabe", "0123456789abcdef"
let user_ids = vec!["deadbeef", "cafebabe", "0123456789abcdef"];
let user_ids = vec![
UserId::Hex("deadbeef".to_owned()),
UserId::Hex("cafebabe".to_owned()),
UserId::Hex("0123456789abcdef".to_owned()),
];
run_detection_test(user_ids, ExternalIdEncoding::Hex);
}

Expand All @@ -171,14 +206,25 @@ mod tests {
// "Zm9v" decodes to "foo"
// "YmFy" decodes to "bar"
// All are valid base64 and length % 4 == 0
let user_ids = vec!["Y2FmZQ==", "Zm9v", "YmFy"];
let user_ids = vec![
UserId::Base64("Y2FmZQ==".to_owned()),
UserId::Base64("Zm9v".to_owned()),
UserId::Base64("YmFy".to_owned()),
];
run_detection_test(user_ids, ExternalIdEncoding::Base64);
}

#[tokio::test]
async fn test_mixed_ambiguous() {
// Some look hex, all look base64
let user_ids = vec!["cafebabe", "deadbeef", "beefcafe", "Y2FmZQ==", "Zm9v", "YmFy"];
let user_ids = vec![
UserId::Hex("cafebabe".to_owned()),
UserId::Hex("deadbeef".to_owned()),
UserId::Hex("beefcafe".to_owned()),
UserId::Base64("Y2FmZQ==".to_owned()),
UserId::Base64("Zm9v".to_owned()),
UserId::Base64("YmFy".to_owned()),
];
run_detection_test(user_ids, ExternalIdEncoding::Base64);
}

Expand All @@ -188,7 +234,11 @@ mod tests {
// "cafeb" length is 5, not divisible by 2 or 4, so neither hex nor base64
// "abc" length is 3, not divisible by 4, and 'c' is hex valid but odd length ->
// not hex.
let user_ids = vec!["cafe", "cafeb", "abc"];
let user_ids = vec![
UserId::Hex("cafe".to_owned()),
UserId::Plain("cafeb".to_owned()),
UserId::Plain("abc".to_owned()),
];
// "cafe" might count for both hex and base64, but "cafeb" and "abc" won't count
// for either. Out of 3, maybe 1 counts as hex/base64 and 2 are plain. Ratios:
// hex = 1/3 ≈ 0.33, base64 = 1/3 ≈ 0.33, both < 0.8.
Expand All @@ -199,7 +249,7 @@ mod tests {
async fn test_invalid_characters() {
// "zzz" is not hex. It's also not base64-safe (though 'z' is alphanumeric,
// length=3 %4!=0) "+++" is not hex and length=3 not multiple of 4 for base64.
let user_ids = vec!["zzz", "+++"];
let user_ids = vec![UserId::Plain("zzz".to_owned()), UserId::Plain("+++".to_owned())];
run_detection_test(user_ids, ExternalIdEncoding::Ambiguous);
}

Expand All @@ -211,14 +261,18 @@ mod tests {
// - 3 plain (30%)
let user_ids = vec![
// Hex format users (30%)
"deadbeef", "cafebabe", "12345678",
UserId::Hex("deadbeef".to_owned()),
UserId::Hex("cafebabe".to_owned()),
UserId::Hex("12345678".to_owned()),
// Base64 format users (40%)
"Y2FmZQ==", // "cafe"
"Zm9vYmFy", // "foobar"
"aGVsbG8=", // "hello"
"d29ybGQ=", // "world"
UserId::Base64("Y2FmZQ==".to_owned()), // "cafe"
UserId::Base64("Zm9vYmFy".to_owned()), // "foobar"
UserId::Base64("aGVsbG8=".to_owned()), // "hello"
UserId::Base64("d29ybGQ=".to_owned()), // "world"
// Plain format users (30%)
"plain_1", "plain_2", "plain_3",
UserId::Plain("plain_1".to_owned()),
UserId::Plain("plain_2".to_owned()),
UserId::Plain("plain_3".to_owned()),
];

// Both hex (30%) and base64 (40%) > 20% threshold
Expand All @@ -232,8 +286,16 @@ mod tests {
// Testing near 90% threshold for hex
// 9 hex users and 1 plain = 90% exactly
let user_ids = vec![
"deadbeef", "cafebabe", "beefcafe", "12345678", "87654321", "abcdef12", "34567890",
"98765432", "fedcba98", "plain_id",
UserId::Hex("deadbeef".to_owned()),
UserId::Hex("cafebabe".to_owned()),
UserId::Hex("beefcafe".to_owned()),
UserId::Hex("12345678".to_owned()),
UserId::Hex("87654321".to_owned()),
UserId::Hex("abcdef12".to_owned()),
UserId::Hex("34567890".to_owned()),
UserId::Hex("98765432".to_owned()),
UserId::Hex("fedcba98".to_owned()),
UserId::Plain("plain_id".to_owned()),
];
// hex_ratio = 9/10 = 0.9
// Code requires > 0.9, not >=, so this should be Ambiguous
Expand All @@ -245,16 +307,16 @@ mod tests {
// Testing near 90% threshold for base64
// 9 base64 users and 1 plain = 90% exactly
let user_ids = vec![
"Y2FmZQ==", // cafe
"Zm9vYmFy", // foobar
"aGVsbG8=", // hello
"d29ybGQ=", // world
"dGVzdA==", // test
"YWJjZA==", // abcd
"eHl6Nzg=", // xyz78
"cXdlcnQ=", // qwert
"MTIzNDU=", // 12345
"plain_id",
UserId::Base64("Y2FmZQ==".to_owned()), // cafe
UserId::Base64("Zm9vYmFy".to_owned()), // foobar
UserId::Base64("aGVsbG8=".to_owned()), // hello
UserId::Base64("d29ybGQ=".to_owned()), // world
UserId::Base64("dGVzdA==".to_owned()), // test
UserId::Base64("YWJjZA==".to_owned()), // abcd
UserId::Base64("eHl6Nzg=".to_owned()), // xyz78
UserId::Base64("cXdlcnQ=".to_owned()), // qwert
UserId::Base64("MTIzNDU=".to_owned()), // 12345
UserId::Plain("plain_id".to_owned()),
];
// base64_ratio = 9/10 = 0.9
// Code requires > 0.9, not >=, so this should be Ambiguous
Expand All @@ -265,7 +327,11 @@ mod tests {
async fn test_empty_ids() {
// Empty IDs should be skipped. Only one non-empty user which is hex.
// hex_count=1, total=1 => ratio=1.0 > 0.8 => Hex
let user_ids = vec!["", "", "cafebabe"];
let user_ids = vec![
UserId::Plain("".to_owned()),
UserId::Plain("".to_owned()),
UserId::Hex("cafebabe".to_owned()),
];
run_detection_test(user_ids, ExternalIdEncoding::Hex);
}

Expand All @@ -275,14 +341,14 @@ mod tests {

#[tokio::test]
async fn test_conversion_hex_to_hex() {
let original_id = "deadbeef";
let original_id = UserId::Hex("deadbeef".to_owned());
// Expected hex, no changes should be made.
run_conversion_test(original_id, ExternalIdEncoding::Hex, "deadbeef");
}

#[tokio::test]
async fn test_conversion_base64_to_hex() {
let original_id = "Y2FmZQ=="; // "cafe"
let original_id = UserId::Base64("Y2FmZQ==".to_owned()); // "cafe"

// Expected base64, we decode base64 => "cafe" and then hex encode the bytes of
// "cafe". "cafe" as ASCII: 0x63 0x61 0x66 0x65 in hex is "63616665"
Expand All @@ -291,7 +357,7 @@ mod tests {

#[tokio::test]
async fn test_conversion_plain_to_hex() {
let original_id = "plain_id";
let original_id = UserId::Plain("plain_id".to_owned());
// Expected plain without encoding, so just hex-encode the ASCII.
// 'p' = 0x70, 'l' = 0x6c, 'a' = 0x61, 'i' = 0x69, 'n' = 0x6e, '_'=0x5f,
// 'i'=0x69, 'd'=0x64 => "706c61696e5f6964"
Expand All @@ -307,9 +373,9 @@ mod tests {
"[email protected]".to_owned(),
None,
true,
None,
"Y2FmZQ==".to_owned(), // base64 encoded external ID
Some("test.localpart".to_owned()), // localpart should be preserved
"Example User".to_owned(),
"Y2FmZQ==".to_owned(), // base64 encoded external ID
"test.localpart".to_owned(), // localpart should be preserved
);

let migrated_user = original_user
Expand All @@ -319,6 +385,6 @@ mod tests {
// External ID should be converted from base64 to hex
assert_eq!(migrated_user.get_external_id(), hex::encode("cafe"));
// Localpart should remain unchanged
assert_eq!(migrated_user.get_localpart(), Some("test.localpart"));
assert_eq!(migrated_user.get_localpart(), "test.localpart");
}
}
40 changes: 26 additions & 14 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
use anyhow::{Context, Result};
use futures::{Stream, StreamExt};
use user::User;
use zitadel::Zitadel;
use zitadel::{Zitadel, ZitadelUserBuilder};

mod config;
mod sources;
Expand All @@ -22,29 +22,41 @@ use sources::{csv::CsvSource, ldap::LdapSource, ukt::UktSource, Source};
// TODO: If async closures become a reality, this should be factored
// into the `zitadel::search_result_to_user` function
pub async fn get_next_zitadel_user(
stream: &mut (impl Stream<Item = Result<(User, String)>> + Send + Unpin),
stream: &mut (impl Stream<Item = Result<(ZitadelUserBuilder, String)>> + Send + Unpin),
zitadel: &mut Zitadel,
) -> Result<Option<(User, String)>> {
match stream.next().await.transpose()? {
Some(mut zitadel_user) => {
let preferred_username = zitadel
Some((zitadel_user, id)) => {
let Some(localpart) = zitadel
.zitadel_client
.get_user_metadata(&zitadel_user.1, "preferred_username")
.get_user_metadata(&id, "localpart")
.await
.ok()
.and_then(|metadata| metadata.metadata().value());
.and_then(|metadata| metadata.metadata().value())
else {
tracing::warn!("Zitadel user without a valid localpart: {}", id);
return Box::pin(get_next_zitadel_user(stream, zitadel)).await;
};

let localpart = zitadel
let Some(preferred_username) = zitadel
.zitadel_client
.get_user_metadata(&zitadel_user.1, "localpart")
.get_user_metadata(&id, "preferred_username")
.await
.ok()
.and_then(|metadata| metadata.metadata().value());

zitadel_user.0.preferred_username = preferred_username;
zitadel_user.0.localpart = localpart;

Ok(Some(zitadel_user))
.and_then(|metadata| metadata.metadata().value())
else {
tracing::warn!("Zitadel user without a preferred username: {}", id);
return Box::pin(get_next_zitadel_user(stream, zitadel)).await;

Check warning on line 49 in src/lib.rs

View check run for this annotation

Codecov / codecov/patch

src/lib.rs#L48-L49

Added lines #L48 - L49 were not covered by tests
};

// We've supplied the required data, so this should never
// error in practice; if it does, that's a bug
let user = zitadel_user
.with_localpart(localpart)
.with_preferred_username(preferred_username)
.build()?;

Ok(Some((user, id)))
}
None => Ok(None),
}
Expand Down
Loading

0 comments on commit 7ddbd2a

Please sign in to comment.