From 757b2e888007dcac15aef964e7193ecd2eee1b7e Mon Sep 17 00:00:00 2001 From: Dermot Haughey Date: Wed, 28 Apr 2021 09:30:22 -0500 Subject: [PATCH] add alternate delimiter (#13) * add alternate delimiter * fix test --- src/csv/csv_data.rs | 6 ++++-- src/main.rs | 8 +++++++- src/qsv.rs | 10 +++++++--- testdata/slash_as_separator.csv | 3 +++ tests/integration.rs | 9 +++++++++ 5 files changed, 30 insertions(+), 6 deletions(-) create mode 100644 testdata/slash_as_separator.csv diff --git a/src/csv/csv_data.rs b/src/csv/csv_data.rs index c721e8c..ea31386 100644 --- a/src/csv/csv_data.rs +++ b/src/csv/csv_data.rs @@ -28,11 +28,12 @@ pub struct CsvData { } impl CsvData { ///Load CSVData from a filename - pub fn from_filename(filename: &str) -> Result> { + pub fn from_filename(filename: &str, delimiter: char) -> Result> { debug!("Trying to load CSV from filename {}", filename); let mut records = Vec::with_capacity(10000); let mut rdr = csv::ReaderBuilder::new() .buffer_capacity(16 * (1 << 10)) + .delimiter(delimiter as u8) .from_path(filename)?; for result in rdr.records() { @@ -52,9 +53,10 @@ impl CsvData { #[cfg(test)] mod tests { use super::*; + const delimiter: char = ','; #[test] fn it_can_load_file() { - let csv = CsvData::from_filename("testdata/test.csv").unwrap(); + let csv = CsvData::from_filename("testdata/test.csv", delimiter).unwrap(); assert_eq!(csv.records, vec!(StringRecord::from(vec!("bar", "13")))) } } diff --git a/src/main.rs b/src/main.rs index 74783b1..e959e77 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,18 +7,24 @@ use crate::qsv::{execute_query, write_to_stdout}; use std::error::Error; use simple_logger::SimpleLogger; use clap::{AppSettings, Clap}; +use crate::qsv::Options; + #[derive(Clap)] #[clap(version = "0.1", author = "Dermot H. ")] #[clap(setting = AppSettings::ColoredHelp)] struct Opts { query: String, + #[clap(short, long, default_value=",")] + delimiter: char } fn main() -> Result<(), Box> { SimpleLogger::from_env().init()?; let opts: Opts = Opts::parse(); let query = opts.query; - let results = execute_query(query.as_str())?; + let delimiter = opts.delimiter; + let options = Options{delimiter}; + let results = execute_query(query.as_str(), &options)?; write_to_stdout(results)?; Ok(()) } diff --git a/src/qsv.rs b/src/qsv.rs index 1fe8206..408b7da 100644 --- a/src/qsv.rs +++ b/src/qsv.rs @@ -14,9 +14,12 @@ use uuid::Uuid; use log::debug; type Rows = Vec>; +pub struct Options { + pub delimiter: char +} ///Executes a query, possibly returning Rows -pub fn execute_query(query: &str) -> Result> { +pub fn execute_query(query: &str, options: &Options) -> Result> { let mut collector = Collector::new(); let ast = Parser::parse_sql(query)?; @@ -26,7 +29,7 @@ pub fn execute_query(query: &str) -> Result> { collector.collect(statement); //TODO: should we handle multiple SQL statements later? let mut files_to_tables = HashMap::new(); for filename in collector.table_identifiers.iter() { - if let Ok(()) = maybe_load_file(&mut files_to_tables, filename, &mut db) { + if let Ok(()) = maybe_load_file(&mut files_to_tables, filename, &mut db, options) { debug!("Potential filename from SQL was able to be loaded: {}", filename); } else { debug!("Identifier in SQL could not be loaded as file: {}", filename); @@ -42,8 +45,9 @@ fn maybe_load_file( files_to_tables: &mut HashMap, filename: &str, db: &mut Db, + options: &Options ) -> Result<(), Box> { - let csv = CsvData::from_filename(filename)?; + let csv = CsvData::from_filename(filename, options.delimiter)?; let path = Path::new(filename); debug!("Attempting to load identifier from SQL as file: {}", filename); let table_name = path.file_stem(); //TODO: should we canonicalize path? diff --git a/testdata/slash_as_separator.csv b/testdata/slash_as_separator.csv new file mode 100644 index 0000000..d088838 --- /dev/null +++ b/testdata/slash_as_separator.csv @@ -0,0 +1,3 @@ +occupation/minimum_age +Bartender/32 +Construction Worker/25 diff --git a/tests/integration.rs b/tests/integration.rs index bcf72ce..6aee10c 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -84,3 +84,12 @@ fn it_will_run_a_simple_query_with_stddev() -> Result<(), Box Result<(), Box> { + let mut cmd = Command::cargo_bin("qsv")?; + cmd.arg("select min(minimum_age) from testdata/slash_as_separator.csv"); + cmd.arg("--delimiter=/"); + cmd.assert().success().stdout(predicates::str::contains("25")); + Ok(()) +}