Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
hderms committed Apr 23, 2021
0 parents commit a3e57f1
Show file tree
Hide file tree
Showing 9 changed files with 360 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/target
189 changes: 189 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[package]
name = "qsv"
version = "0.1.0"
authors = ["Dermot Haughey <[email protected]>"]
edition = "2018"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
rusqlite = "0.25.1"
csv="1.1"
itertools = "0.10.0"
53 changes: 53 additions & 0 deletions src/csv/csv.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
use csv::StringRecord;
use std::error::Error;
use std::fs::File;
use std::io::BufReader;
#[derive(Eq, PartialEq, Debug)]
pub enum CsvWrapper {
Numeric(i64),
String(String),
}
impl CsvWrapper {
pub fn get_type(&self) -> CsvType {
match self {
CsvWrapper::Numeric(_) => CsvType::Numeric,
CsvWrapper::String(_) => CsvType::String,
}
}
}

#[derive(Debug, Eq, Hash, Clone, Copy, PartialEq)]
pub enum CsvType {
Numeric,
String,
}
pub struct Csv {
pub records: Vec<StringRecord>,
pub headers: StringRecord,
}
impl Csv {
fn from_filename(filename: &str) -> Result<Csv, Box<dyn Error>> {
let mut records = Vec::with_capacity(100);
let file_reader = File::open(filename)?;
let mut rdr = csv::Reader::from_reader(BufReader::new(file_reader));
for result in rdr.records() {
let record = result?;
records.push(record);
}
let headers = rdr.headers()?;
Ok(Csv {
records,
headers: headers.to_owned(),
})
}
}

#[cfg(test)]
mod tests {
use super::*;
#[test]
fn it_can_load_file() {
let csv = Csv::from_filename("testdata/test.csv").unwrap();
assert_eq!(csv.records, vec!(StringRecord::from(vec!("bar", "13"))))
}
}
71 changes: 71 additions & 0 deletions src/csv/inference.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
use crate::csv::csv::{Csv, CsvType, CsvWrapper};
use csv::StringRecord;
use itertools::Itertools;
use std::collections::HashMap;
use std::num::ParseIntError;

struct ColumnInference {
columns_to_types: HashMap<String, CsvType>,
}

impl ColumnInference {
fn from_csv(csv: Csv) -> ColumnInference {
let mut columns_to_types: HashMap<String, CsvType> = HashMap::new();
for (i, header) in csv.headers.iter().enumerate() {
let t: Vec<CsvWrapper> = csv
.records
.iter()
.map(|s| parse(s.get(i).unwrap()))
.collect();
let types: Vec<CsvType> = t.iter().map(|s| s.get_type()).collect();
let unique_types: Vec<&CsvType> = types.iter().unique().collect();

if unique_types.len() == 1 {
columns_to_types.insert(String::from(header), unique_types[0].to_owned());
} else {
columns_to_types.insert(String::from(header), CsvType::String);
}
}
ColumnInference { columns_to_types }
}
pub fn get_type(&self, s: String) -> Option<&CsvType> {
self.columns_to_types.get(s.as_str())
}
}
fn parse(s: &str) -> CsvWrapper {
let is_numeric: Result<i64, ParseIntError> = s.parse();
is_numeric
.map(CsvWrapper::Numeric)
.unwrap_or_else(|_| CsvWrapper::String(String::from(s)))
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn it_should_parse_integers() {
assert_eq!(parse("1"), CsvWrapper::Numeric(1));
assert_eq!(parse("-1"), CsvWrapper::Numeric(-1));
}
#[test]
fn it_should_parse_strings() {
assert_eq!(parse("foo"), CsvWrapper::String(String::from("foo")));
assert_eq!(parse("bar"), CsvWrapper::String(String::from("bar")));
}
#[test]
fn it_should_recognize_integer_column() {
let headers = StringRecord::from(vec!["foo", "bar"]);
let records = vec![
StringRecord::from(vec!["entry1", "1"]),
StringRecord::from(vec!["entry2", "2"]),
];
let inference = ColumnInference::from_csv(Csv { headers, records });
assert_eq!(
inference.get_type(String::from("foo")),
Some(&CsvType::String)
);
assert_eq!(
inference.get_type(String::from("bar")),
Some(&CsvType::Numeric)
);
}
}
2 changes: 2 additions & 0 deletions src/csv/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
mod csv;
mod inference;
24 changes: 24 additions & 0 deletions src/db.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use rusqlite::{Connection, Result};

struct Db {
pub connection: Connection,
}
impl Db {
fn open_in_memory() -> Result<Db> {
let connection = Connection::open_in_memory()?;
Ok(Db { connection })
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn can_execute_a_query() {
let db = Db::open_in_memory().unwrap();
let result: usize = db
.connection
.query_row("SELECT 1 = 1", [], |row| row.get(0))
.unwrap();
assert_eq!(result, 1);
}
}
6 changes: 6 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
mod csv;
mod db;

fn main() {
println!("Hello, world!");
}
2 changes: 2 additions & 0 deletions testdata/test.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
foo,age
bar,13

0 comments on commit a3e57f1

Please sign in to comment.