diff --git a/Cargo.lock b/Cargo.lock index 27d7cac..98a457f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3907,6 +3907,7 @@ dependencies = [ "chrono", "clap", "croner", + "csv", "ctor", "deno_core", "dotenvy", diff --git a/Cargo.toml b/Cargo.toml index 7ef1cf1..ae539c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ calamine = "0.26.1" chrono = { version = "0.4.38", default-features = false } clap = "4.5.20" croner = "2.0.6" +csv = "1.3.0" deno_core = "0.315.0" dotenvy = "0.15.7" figment = "0.10.19" diff --git a/dev/fixtures/csv_fixture.csv b/dev/fixtures/csv_fixture.csv new file mode 100644 index 0000000..c908a0a --- /dev/null +++ b/dev/fixtures/csv_fixture.csv @@ -0,0 +1,4 @@ +Header N1,Header N2, +Some string,100500, +500100,Some string 2,100 +,,Another string \ No newline at end of file diff --git a/src/trackers/api_ext.rs b/src/trackers/api_ext.rs index f8cf336..abbc50f 100644 --- a/src/trackers/api_ext.rs +++ b/src/trackers/api_ext.rs @@ -19,7 +19,7 @@ use crate::{ tasks::{EmailContent, EmailTaskType, EmailTemplate, HttpTaskType, TaskType}, trackers::{ database_ext::TrackersDatabaseExt, - parsers::XlsParser, + parsers::{CsvParser, XlsParser}, tracker_data_revisions_diff::tracker_data_revisions_diff, web_scraper::{WebScraperContentRequest, WebScraperErrorResponse}, }, @@ -906,6 +906,9 @@ where Some(ref media_type) if XlsParser::supports(media_type) => { XlsParser::parse(&response_bytes)? } + Some(ref media_type) if CsvParser::supports(media_type) => { + CsvParser::parse(&response_bytes)? + } _ => response_bytes, }; @@ -3457,6 +3460,86 @@ mod tests { Ok(()) } + #[sqlx::test] + async fn properly_saves_api_target_revision_with_parser_csv( + pool: PgPool, + ) -> anyhow::Result<()> { + let server = MockServer::start(); + let config = mock_config()?; + + let api = mock_api_with_config(pool, config).await?; + + let trackers = api.trackers(); + let tracker = trackers + .create_tracker( + TrackerCreateParams::new("name_one") + .with_schedule("0 0 * * * *") + .with_target(TrackerTarget::Api(ApiTarget { + url: server.url("/api/get-call").parse()?, + method: None, + headers: None, + body: None, + media_type: Some("text/csv".parse()?), + configurator: None, + extractor: None, + })), + ) + .await?; + + let tracker_data = trackers + .get_tracker_data(tracker.id, Default::default()) + .await?; + assert!(tracker_data.is_empty()); + + let content_mock = server.mock(|when, then| { + when.method(httpmock::Method::GET).path("/api/get-call"); + then.status(200) + .header("Content-Type", "text/csv;charset=UTF-8") + .body(load_fixture("csv_fixture.csv").unwrap()); + }); + + trackers.create_tracker_data_revision(tracker.id).await?; + content_mock.assert(); + + let revs = trackers + .get_tracker_data(tracker.id, Default::default()) + .await?; + assert_debug_snapshot!( + revs.into_iter().map(|rev| rev.data).collect::>(), + @r###" + [ + TrackerDataValue { + original: Array [ + Array [ + String("Header N1"), + String("Header N2"), + String(""), + ], + Array [ + String("Some string"), + String("100500"), + String(""), + ], + Array [ + String("500100"), + String("Some string 2"), + String("100"), + ], + Array [ + String(""), + String(""), + String("Another string"), + ], + ], + mods: None, + }, + ] + "### + ); + + Ok(()) + } + #[sqlx::test] async fn properly_saves_api_target_revision_with_remote_scripts( pool: PgPool, diff --git a/src/trackers/parsers.rs b/src/trackers/parsers.rs index 7209d9e..07e9e47 100644 --- a/src/trackers/parsers.rs +++ b/src/trackers/parsers.rs @@ -1,3 +1,4 @@ +mod csv_parser; mod xls_parser; -pub use xls_parser::XlsParser; +pub use self::{csv_parser::CsvParser, xls_parser::XlsParser}; diff --git a/src/trackers/parsers/csv_parser.rs b/src/trackers/parsers/csv_parser.rs new file mode 100644 index 0000000..b84f366 --- /dev/null +++ b/src/trackers/parsers/csv_parser.rs @@ -0,0 +1,105 @@ +use bytes::{Buf, Bytes}; +use mediatype::{ + names::{CSV, TEXT}, + MediaType, +}; +use tracing::{debug, warn}; + +/// Parser of the CSV files. Returns JSON representation of the parsed data as a binary +/// data. The JSON structure is a list of rows, each row is a list of cells. +pub struct CsvParser; +impl CsvParser { + /// Check if the given media type is supported by the parser. + pub fn supports(media_type: &MediaType) -> bool { + media_type.ty == TEXT && media_type.subty == CSV + } + + /// Parse the CSV file content and return JSON representation of the parsed data. + pub fn parse(content: &[u8]) -> anyhow::Result { + let mut reader = csv::ReaderBuilder::new() + .flexible(true) + .has_headers(false) + .from_reader(content.reader()); + + let mut rows = vec![]; + for (index, record) in reader.records().enumerate() { + let record = match record { + Ok(record) => record, + Err(err) => { + warn!("Failed to parse CSV record with index {index}: {err:?}"); + continue; + } + }; + + rows.push( + record + .into_iter() + .map(|cell| cell.to_string()) + .collect::>(), + ); + } + + debug!("Parsed CSV file with {} rows.", rows.len()); + + Ok(Bytes::from(serde_json::to_vec(&rows)?)) + } +} + +#[cfg(test)] +mod tests { + use super::CsvParser; + use crate::tests::load_fixture; + use insta::assert_json_snapshot; + use mediatype::MediaTypeBuf; + + #[test] + fn supports() -> anyhow::Result<()> { + assert!(CsvParser::supports( + &MediaTypeBuf::from_string("text/csv".to_string())?.to_ref() + )); + assert!(CsvParser::supports( + &MediaTypeBuf::from_string("text/csv; charset=utf-8".to_string())?.to_ref() + )); + assert!(!CsvParser::supports( + &MediaTypeBuf::from_string("application/json".to_string())?.to_ref() + )); + + Ok(()) + } + + #[test] + fn parse() -> anyhow::Result<()> { + let fixture = load_fixture("csv_fixture.csv")?; + let parsed_data = CsvParser::parse(&fixture)?; + + assert_json_snapshot!( + serde_json::from_slice::(&parsed_data)?, + @r###" + [ + [ + "Header N1", + "Header N2", + "" + ], + [ + "Some string", + "100500", + "" + ], + [ + "500100", + "Some string 2", + "100" + ], + [ + "", + "", + "Another string" + ] + ] + "### + ); + + Ok(()) + } +}