Skip to content

Commit

Permalink
Censor URLs and bad words.
Browse files Browse the repository at this point in the history
Signed-off-by: Gerd Zellweger <[email protected]>
  • Loading branch information
gz committed Jan 7, 2025
1 parent df46f98 commit 39c9810
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 4 deletions.
60 changes: 59 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,6 @@ log = "0.4.22"
env_logger = "0.11.5"
chrono = "0.4.38"
dashmap = "6.1.0"
tower-http = { version = "0.6.2", features = ["cors"] }
tower-http = { version = "0.6.2", features = ["cors"] }
rustrict = "0.7.33"
regex = "1.10.2"
23 changes: 21 additions & 2 deletions server/src/spreadsheet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ use axum::http::HeaderMap;
use chrono::Utc;
use futures::{sink::SinkExt, stream::StreamExt};
use log::{debug, error, trace, warn};
use regex::Regex;
use reqwest::Client;
use rustrict::Censor;
use serde::{Deserialize, Serialize};
use tokio::sync::{broadcast::Receiver, mpsc, watch, RwLock};

Expand Down Expand Up @@ -314,6 +316,21 @@ struct UpdatePayload {
ts: String,
}

fn replace_domain_in_urls(input: &str, new_domain: &str) -> String {
// Regex breakdown:
// (https?://) captures the protocol (http or https)
// ([^/\s]+) captures the domain portion (everything until a slash or whitespace)
// ([^\s]*) captures the remainder of the URL (path/query/etc. until whitespace)
let url_regex = Regex::new(r"(https?://)([^/\s]+)([^\s]*)").unwrap();

url_regex
.replace_all(input, |caps: &regex::Captures| {
// caps[1] is the scheme+://, caps[2] is the original domain, caps[3] is the path/query
format!("{}{}{}", &caps[1], new_domain, &caps[3])
})
.to_string()
}

pub(crate) async fn post_handler(
headers: HeaderMap,
ConnectInfo(addr): ConnectInfo<SocketAddr>,
Expand All @@ -338,10 +355,12 @@ pub(crate) async fn post_handler(
Json(serde_json::json!({"error": "Invalid cell ID"})),
);
}
let raw_value = update_request.raw_value.chars().take(64).collect::<String>();
let user_value = update_request.raw_value.chars().take(64).collect::<String>();
let censored_urls = replace_domain_in_urls(&user_value, "*REDACTED*");
let censored_input = Censor::new(censored_urls.chars()).censor();
let payload = UpdatePayload {
id: update_request.id,
raw_value,
raw_value: censored_input,
background: update_request.background,
ip: client_ip,
ts: Utc::now().format("%Y-%m-%d %H:%M:%S%.3f").to_string(),
Expand Down

0 comments on commit 39c9810

Please sign in to comment.