From 8ac105feb7f4d26d36363b783c28ca526d629ea1 Mon Sep 17 00:00:00 2001 From: Maria Dubyaga Date: Wed, 18 Mar 2020 16:33:06 -0400 Subject: [PATCH 1/2] world_population --- Cargo.toml | 3 +- src/covid19_data.rs | 141 ++++++++++++++++++++++++ src/lib.rs | 6 + src/main.rs | 129 +--------------------- src/parquet_writer.rs | 11 ++ src/world_population.rs | 88 +++++++++++++++ world_pop2020.csv | 236 ++++++++++++++++++++++++++++++++++++++++ 7 files changed, 490 insertions(+), 124 deletions(-) create mode 100644 src/covid19_data.rs create mode 100644 src/world_population.rs create mode 100644 world_pop2020.csv diff --git a/Cargo.toml b/Cargo.toml index 7e23e57..d5255b1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,4 +23,5 @@ slack-hook = "0.7" sentry = "0.12.0" log = "0.4" pretty_env_logger = "0.3" -byte-unit = "2.1.0" \ No newline at end of file +byte-unit = "2.1.0" +futures = "0.1" \ No newline at end of file diff --git a/src/covid19_data.rs b/src/covid19_data.rs new file mode 100644 index 0000000..f861cfb --- /dev/null +++ b/src/covid19_data.rs @@ -0,0 +1,141 @@ +//use dracula_covid19::*; +use crate::error::DracErr; +//use sentry::integrations::panic::register_panic_handler; + +use crate::CovidRecord; +use crate::aws::*; +use crate::cleaner::*; +use crate::parquet_writer::write_records_to_file; + +const CONFIRMED_URL: &str = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv"; +const DEATHS_URL: &str = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv"; +const RECOVERED_URL: &str = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv"; + +#[tokio::main] +pub async fn covid19_data() -> Result<(), DracErr> { + let mut records: Vec = Vec::new(); + + extract_records(CONFIRMED_URL, "confirmed", &mut records) + .await + .unwrap(); + extract_records(DEATHS_URL, "deaths", &mut records) + .await + .unwrap(); + extract_records(RECOVERED_URL, "recovered", &mut records) + .await + .unwrap(); + + write_records_to_file("combined.parquet", records); + + let bucket = "scientist-datawarehouse".to_string(); + let key = + "csse_covid_19_time_series/combined/time_series_19-covid-Combined.parquet".to_string(); + let crawler_name = "covid19-combined".to_string(); + + let key_parts = key.split('/').collect::>(); + let key_dir = key_parts[0..key_parts.len() - 1].join("/"); + let s3_path = format!("s3://{}/{}", bucket, key_dir); + + upload_file("combined.parquet", bucket.clone(), key.clone()) + .await + .unwrap(); + create_crawler(crawler_name.clone(), s3_path).await.unwrap(); + start_crawler(crawler_name.clone(), true).await.unwrap(); + + Ok(()) +} + +async fn extract_records( + input_url: &str, + status: &str, + records: &mut Vec, +) -> Result<(), DracErr> { + let req = reqwest::get(input_url).await?; + let bytes = req.bytes().await?; + let bytes_reader = std::io::Cursor::new(&bytes[..]); + + let mut reader = csv::ReaderBuilder::new().from_reader(bytes_reader); + + let dates: Vec = { + let headers = reader.headers()?; + let mut header_iter = headers.iter(); + header_iter.next(); // Province + header_iter.next(); // Country + header_iter.next(); // Lat + header_iter.next(); // Long + header_iter + .map(|date_str| { + // panic!("{}", date_str); + let res = chrono::NaiveDateTime::parse_from_str( + &format!("{} 00:00", date_str), + "%-m/%-d/%y %H:%M", + ); + if let Ok(res) = res { + res + } else { + panic!("could not parse `{}`", date_str) + } + }) + .collect() + }; + + for row in reader.records() { + let row = row?; + let mut row_iter = row.iter(); + + let province_state = row_iter.next().unwrap().to_string(); + let province_state = if province_state == "" { + None + } else { + Some(province_state) + }; + + let country_region = row_iter.next().unwrap().to_string(); + + let (city, county, state) = if country_region == "US" { + extract_us_data(&province_state.as_ref().unwrap()[..]) + } else { + (None, None, None) + }; + + let lat = row_iter.next().unwrap().to_string(); + let long = row_iter.next().unwrap().to_string(); + + for date in dates.iter().cloned() { + let date_count_str = row_iter.next().unwrap(); + + let count: i64 = date_count_str.parse().unwrap_or_default(); + + let lat = if lat == "" { + None + } else { + Some(lat.parse().unwrap()) + }; + + let lon = if long == "" { + None + } else { + Some(long.parse().unwrap()) + }; + + let mut record = CovidRecord { + status: status.to_string(), + province_state: province_state.clone(), + state: state.clone(), + county: county.clone(), + city: city.clone(), + country_region: country_region.clone(), + lat, + lon, + date, + count, + }; + + remap_territories(&mut record); + + records.push(record) + } + } + + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs index faa20e3..83e4f6c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,3 +19,9 @@ pub use parquet_writer::*; mod cleaner; pub use cleaner::*; + +pub mod covid19_data; +pub use covid19_data::*; + +pub mod world_population; +pub use world_population::*; diff --git a/src/main.rs b/src/main.rs index 10be3cf..d7b14e9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,12 +1,14 @@ use dracula_covid19::*; use sentry::integrations::panic::register_panic_handler; +//use dracula_covid19::covid19_data::covid19_data; const CONFIRMED_URL: &str = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv"; const DEATHS_URL: &str = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv"; const RECOVERED_URL: &str = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv"; -#[tokio::main] -async fn main() -> Result<(), DracErr> { +//#[tokio::main] +//async +fn main() -> Result<(), ()> { let mut records: Vec = Vec::new(); let _guard = sentry::init(( @@ -19,127 +21,8 @@ async fn main() -> Result<(), DracErr> { register_panic_handler(); - extract_records(CONFIRMED_URL, "confirmed", &mut records) - .await - .unwrap(); - extract_records(DEATHS_URL, "deaths", &mut records) - .await - .unwrap(); - extract_records(RECOVERED_URL, "recovered", &mut records) - .await - .unwrap(); - - write_records_to_file("combined.parquet", records); - - let bucket = "scientist-datawarehouse".to_string(); - let key = - "csse_covid_19_time_series/combined/time_series_19-covid-Combined.parquet".to_string(); - let crawler_name = "covid19-combined".to_string(); - - let key_parts = key.split('/').collect::>(); - let key_dir = key_parts[0..key_parts.len() - 1].join("/"); - let s3_path = format!("s3://{}/{}", bucket, key_dir); - - upload_file("combined.parquet", bucket.clone(), key.clone()) - .await - .unwrap(); - create_crawler(crawler_name.clone(), s3_path).await.unwrap(); - start_crawler(crawler_name.clone(), true).await.unwrap(); - - Ok(()) -} - -async fn extract_records( - input_url: &str, - status: &str, - records: &mut Vec, -) -> Result<(), DracErr> { - let req = reqwest::get(input_url).await?; - let bytes = req.bytes().await?; - let bytes_reader = std::io::Cursor::new(&bytes[..]); - - let mut reader = csv::ReaderBuilder::new().from_reader(bytes_reader); - - let dates: Vec = { - let headers = reader.headers()?; - let mut header_iter = headers.iter(); - header_iter.next(); // Province - header_iter.next(); // Country - header_iter.next(); // Lat - header_iter.next(); // Long - header_iter - .map(|date_str| { - // panic!("{}", date_str); - let res = chrono::NaiveDateTime::parse_from_str( - &format!("{} 00:00", date_str), - "%-m/%-d/%y %H:%M", - ); - if let Ok(res) = res { - res - } else { - panic!("could not parse `{}`", date_str) - } - }) - .collect() - }; - - for row in reader.records() { - let row = row?; - let mut row_iter = row.iter(); - - let province_state = row_iter.next().unwrap().to_string(); - let province_state = if province_state == "" { - None - } else { - Some(province_state) - }; - - let country_region = row_iter.next().unwrap().to_string(); - - let (city, county, state) = if country_region == "US" { - extract_us_data(&province_state.as_ref().unwrap()[..]) - } else { - (None, None, None) - }; - - let lat = row_iter.next().unwrap().to_string(); - let long = row_iter.next().unwrap().to_string(); - - for date in dates.iter().cloned() { - let date_count_str = row_iter.next().unwrap(); - - let count: i64 = date_count_str.parse().unwrap_or_default(); - - let lat = if lat == "" { - None - } else { - Some(lat.parse().unwrap()) - }; - - let lon = if long == "" { - None - } else { - Some(long.parse().unwrap()) - }; - - let mut record = CovidRecord { - status: status.to_string(), - province_state: province_state.clone(), - state: state.clone(), - county: county.clone(), - city: city.clone(), - country_region: country_region.clone(), - lat, - lon, - date, - count, - }; - - remap_territories(&mut record); - - records.push(record) - } - } + covid19_data(); + world_pop(); Ok(()) } diff --git a/src/parquet_writer.rs b/src/parquet_writer.rs index 169c27e..edea461 100644 --- a/src/parquet_writer.rs +++ b/src/parquet_writer.rs @@ -9,6 +9,7 @@ use std::fs::File; use std::rc::Rc; use crate::CovidRecord; +use crate::Population; pub fn write_records_to_file(path: &str, records: Vec) { let mut parquet_writer = parquet_writer::(path).unwrap(); @@ -30,3 +31,13 @@ pub fn parquet_writer( SerializedFileWriter::new(file, Rc::new(schema), props) } + +pub fn write_records_to_file_population(path: &str, records: Vec) { + let mut parquet_writer = parquet_writer::(path).unwrap(); + + let mut row_group = parquet_writer.next_row_group().unwrap(); + (&records[..]).write_to_row_group(&mut row_group).unwrap(); + parquet_writer.close_row_group(row_group).unwrap(); + + parquet_writer.close().unwrap(); +} diff --git a/src/world_population.rs b/src/world_population.rs new file mode 100644 index 0000000..eb7bfd9 --- /dev/null +++ b/src/world_population.rs @@ -0,0 +1,88 @@ +//use dracula_covid19::*; +use crate::aws::*; +use crate::cleaner::*; +use crate::error::DracErr; +use crate::parquet_writer::write_records_to_file_population; +use parquet::record::{RecordSchema, RecordWriter}; +use std::env; + +use serde::Deserialize; +use std::error::Error; +use std::ffi::OsStr; +use std::ffi::OsString; +use std::fs::File; +use std::io; +use std::process; + +#[derive(Debug, Deserialize, ParquetRecordWriter, ParquetRecordSchema)] +pub struct Population { + id: i32, + #[serde(deserialize_with = "csv::invalid_option")] + country: Option, + population: i32, + yearly_change: f64, + net_change: i32, + density_p_sq_km: f32, + land_area_sq_km: i32, + + migrants_net: Option, + #[serde(deserialize_with = "csv::invalid_option")] + fert_rate: Option, + #[serde(deserialize_with = "csv::invalid_option")] + med_age: Option, + #[serde(deserialize_with = "csv::invalid_option")] + urban_pop: Option, + #[serde(deserialize_with = "csv::invalid_option")] + world_share: Option, +} +const POPULATION_URL: &str = "world_pop2020.csv"; + +#[tokio::main] +pub async fn example() -> Result<(), Box> { + //async fn example() -> Result<(), Box> { + // let file_path = get_first_arg()?; + let mut recs: Vec = Vec::new(); + let file_path = OsString::from("world_pop2020.csv"); + let file = File::open(file_path)?; + let mut rdr = csv::Reader::from_reader(file); + // let mut rdr = csv::Reader::from_reader(io::stdin()); + for result in rdr.deserialize() { + // Notice that we need to provide a type hint for automatic + // deserialization. + let record: Population = result?; + println!("{:?}", &record); + recs.push(record); + } + write_records_to_file_population("population.parquet", recs); + + let bucket = "scientist-datawarehouse".to_string(); + let key = "Population/population.parquet".to_string(); + // let crawler_name = "population".to_string(); + let crawler_name = "population".to_string(); + + let key_parts = key.split('/').collect::>(); + let key_dir = key_parts[0..key_parts.len() - 1].join("/"); + let s3_path = format!("s3://{}/{}", bucket, key_dir); + +// use futures::future::ok; + upload_file("population.parquet", bucket.clone(), key.clone()) + .await + .unwrap(); + + create_crawler(crawler_name.clone(), s3_path) + .await + .unwrap(); + start_crawler(crawler_name.clone(), true) + .await + .unwrap(); + + Ok(()) +} + +pub fn world_pop() { + + if let Err(err) = example() { + println!("{}", err); + process::exit(1); + } +} diff --git a/world_pop2020.csv b/world_pop2020.csv new file mode 100644 index 0000000..c133088 --- /dev/null +++ b/world_pop2020.csv @@ -0,0 +1,236 @@ +id,country,population,yearly_change,net_change,density_p_sq_km,land_area_sq_km,migrants_net,fert_rate,med_age,urban_pop,world_share +1,China,1439323776,0.00,5540090,153,9388211,-348399,1.7,38,0.61,0.18 +2,India,1380004385,0.01,13586631,464,2973190,-532687,2.2,28,0.35,0.18 +3,United States,331002651,0.01,1937734,36,9147420,954806,1.8,38,0.83,0.04 +4,Indonesia,273523615,0.01,2898047,151,1811570,-98955,2.3,30,0.56,0.04 +5,Pakistan,220892340,0.02,4327022,287,770880,-233379,3.6,23,0.35,0.03 +6,Brazil,212559417,0.01,1509890,25,8358140,21200,1.7,33,0.88,0.03 +7,Nigeria,206139589,0.03,5175990,226,910770,-60000,5.4,18,0.52,0.03 +8,Bangladesh,164689383,0.01,1643222,1265,130170,-369501,2.1,28,0.39,0.02 +9,Russia,145934462,0.00,62206,9,16376870,182456,1.8,40,0.74,0.02 +10,Mexico,128932753,0.01,1357224,66,1943950,-60000,2.1,29,0.84,0.02 +11,Japan,126476461,0.00,-383840,347,364555,71560,1.4,48,0.92,0.02 +12,Ethiopia,114963588,0.03,2884858,115,1000000,30000,4.3,19,0.21,0.01 +13,Philippines,109581078,0.01,1464463,368,298170,-67152,2.6,26,0.47,0.01 +14,Egypt,102334404,0.02,1946331,103,995450,-38033,3.3,25,0.43,0.01 +15,Vietnam,97338579,0.01,876473,314,310070,-80000,2.1,32,0.38,0.01 +16,DR Congo,89561403,0.03,2770836,40,2267050,23861,6.0,17,0.46,0.01 +17,Turkey,84339067,0.01,909452,110,769630,283922,2.1,32,0.76,0.01 +18,Iran,83992949,0.01,1079043,52,1628550,-55000,2.2,32,0.76,0.01 +19,Germany,83783942,0.00,266897,240,348560,543822,1.6,46,0.76,0.01 +20,Thailand,69799978,0.00,174396,137,510890,19444,1.5,40,0.51,0.01 +21,United Kingdom,67886011,0.01,355839,281,241930,260650,1.8,40,0.83,0.01 +22,France,65273511,0.00,143783,119,547557,36527,1.9,42,0.82,0.01 +23,Italy,60461826,0.00,-88249,206,294140,148943,1.3,47,0.69,0.01 +24,Tanzania,59734218,0.03,1728755,67,885800,-40076,4.9,18,0.37,0.01 +25,South Africa,59308690,0.01,750420,49,1213090,145405,2.4,28,0.67,0.01 +26,Myanmar,54409800,0.01,364380,83,653290,-163313,2.2,29,0.31,0.01 +27,Kenya,53771296,0.02,1197323,94,569140,-10000,3.5,20,0.28,0.01 +28,South Korea,51269185,0.00,43877,527,97230,11731,1.1,44,0.82,0.01 +29,Colombia,50882891,0.01,543448,46,1109500,204796,1.8,31,0.80,0.01 +30,Spain,46754778,0.00,18002,94,498800,40000,1.3,45,0.80,0.01 +31,Uganda,45741007,0.03,1471413,229,199810,168694,5.0,17,0.26,0.01 +32,Argentina,45195774,0.01,415097,17,2736690,4800,2.3,32,0.93,0.01 +33,Algeria,43851044,0.02,797990,18,2381740,-10000,3.1,29,0.73,0.01 +34,Sudan,43849260,0.02,1036022,25,1765048,-50000,4.4,20,0.35,0.01 +35,Ukraine,43733762,-0.01,-259876,75,579320,10000,1.4,41,0.69,0.01 +36,Iraq,40222493,0.02,912710,93,434320,7834,3.7,21,0.73,0.01 +37,Afghanistan,38928346,0.02,886592,60,652860,-62920,4.6,18,0.25,0.01 +38,Poland,37846611,0.00,-41157,124,306230,-29395,1.4,42,0.60,0.00 +39,Canada,37742154,0.01,331107,4,9093510,242032,1.5,41,0.81,0.00 +40,Morocco,36910560,0.01,438791,83,446300,-51419,2.4,30,0.64,0.00 +41,Saudi Arabia,34813871,0.02,545343,16,2149690,134979,2.3,32,0.84,0.00 +42,Uzbekistan,33469203,0.01,487487,79,425400,-8863,2.4,28,0.50,0.00 +43,Peru,32971854,0.01,461401,26,1280000,99069,2.3,31,0.79,0.00 +44,Angola,32866272,0.03,1040977,26,1246700,6413,5.6,17,0.67,0.00 +45,Malaysia,32365999,0.01,416222,99,328550,50000,2.0,30,0.78,0.00 +46,Mozambique,31255435,0.03,889399,40,786380,-5000,4.9,18,0.38,0.00 +47,Ghana,31072940,0.02,655084,137,227540,-10000,3.9,22,0.57,0.00 +48,Yemen,29825964,0.02,664042,56,527970,-30000,3.8,20,0.38,0.00 +49,Nepal,29136808,0.02,528098,203,143350,41710,1.9,25,0.21,0.00 +50,Venezuela,28435940,0.00,-79889,32,882050,-653249,2.3,30,N.A.,0.00 +51,Madagascar,27691018,0.03,721711,48,581795,-1500,4.1,20,0.39,0.00 +52,Cameroon,26545863,0.03,669483,56,472710,-4800,4.6,19,0.56,0.00 +53,Côte d'Ivoire,26378274,0.03,661730,83,318000,-8000,4.7,19,0.51,0.00 +54,North Korea,25778816,0.00,112655,214,120410,-5403,1.9,35,0.63,0.00 +55,Australia,25499884,0.01,296686,3,7682300,158246,1.8,38,0.86,0.00 +56,Niger,24206644,0.04,895929,19,1266700,4000,7.0,15,0.17,0.00 +57,Taiwan,23816775,0.00,42899,673,35410,30001,1.2,42,0.79,0.00 +58,Sri Lanka,21413249,0.00,89516,341,62710,-97986,2.2,34,0.18,0.00 +59,Burkina Faso,20903273,0.03,581895,76,273600,-25000,5.2,18,0.31,0.00 +60,Mali,20250833,0.03,592802,17,1220190,-40000,5.9,16,0.44,0.00 +61,Romania,19237691,-0.01,-126866,84,230170,-73999,1.6,43,0.55,0.00 +62,Malawi,19129952,0.03,501205,203,94280,-16053,4.3,18,0.18,0.00 +63,Chile,19116201,0.01,164163,26,743532,111708,1.7,35,0.85,0.00 +64,Kazakhstan,18776707,0.01,225280,7,2699700,-18000,2.8,31,0.58,0.00 +65,Zambia,18383955,0.03,522925,25,743390,-8000,4.7,18,0.45,0.00 +66,Guatemala,17915568,0.02,334096,167,107160,-9215,2.9,23,0.52,0.00 +67,Ecuador,17643054,0.02,269392,71,248360,36400,2.4,28,0.63,0.00 +68,Syria,17500658,0.03,430523,95,183630,-427391,2.8,26,0.60,0.00 +69,Netherlands,17134872,0.00,37742,508,33720,16000,1.7,43,0.92,0.00 +70,Senegal,16743927,0.03,447563,87,192530,-20000,4.7,19,0.49,0.00 +71,Cambodia,16718965,0.01,232423,95,176520,-30000,2.5,26,0.24,0.00 +72,Chad,16425864,0.03,478988,13,1259200,2000,5.8,17,0.23,0.00 +73,Somalia,15893222,0.03,450317,25,627340,-40000,6.1,17,0.47,0.00 +74,Zimbabwe,14862924,0.01,217456,38,386850,-116858,3.6,19,0.38,0.00 +75,Guinea,13132795,0.03,361549,53,245720,-4000,4.7,18,0.39,0.00 +76,Rwanda,12952218,0.03,325268,525,24670,-9000,4.1,20,0.18,0.00 +77,Benin,12123200,0.03,322049,108,112760,-2000,4.9,19,0.48,0.00 +78,Burundi,11890784,0.03,360204,463,25680,2001,5.5,17,0.14,0.00 +79,Tunisia,11818619,0.01,123900,76,155360,-4000,2.2,33,0.70,0.00 +80,Bolivia,11673021,0.01,159921,11,1083300,-9504,2.8,26,0.69,0.00 +81,Belgium,11589623,0.00,50295,383,30280,48000,1.7,42,0.98,0.00 +82,Haiti,11402528,0.01,139451,414,27560,-35000,3.0,24,0.57,0.00 +83,Cuba,11326616,0.00,-6867,106,106440,-14400,1.6,42,0.78,0.00 +84,South Sudan,11193725,0.01,131612,18,610952,-174200,4.7,19,0.25,0.00 +85,Dominican Republic,10847910,0.01,108952,225,48320,-30000,2.4,28,0.85,0.00 +86,Czech Republic (Czechia),10708981,0.00,19772,139,77240,22011,1.6,43,0.74,0.00 +87,Greece,10423054,0.00,-50401,81,128900,-16000,1.3,46,0.85,0.00 +88,Jordan,10203134,0.01,101440,115,88780,10220,2.8,24,0.91,0.00 +89,Portugal,10196709,0.00,-29478,111,91590,-6000,1.3,46,0.66,0.00 +90,Azerbaijan,10139177,0.01,91459,123,82658,1200,2.1,32,0.56,0.00 +91,Sweden,10099265,0.01,62886,25,410340,40000,1.9,41,0.88,0.00 +92,Honduras,9904607,0.02,158490,89,111890,-6800,2.5,24,0.57,0.00 +93,United Arab Emirates,9890402,0.01,119873,118,83600,40000,1.4,33,0.86,0.00 +94,Hungary,9660351,0.00,-24328,107,90530,6000,1.5,43,0.72,0.00 +95,Tajikistan,9537645,0.02,216627,68,139960,-20000,3.6,22,0.27,0.00 +96,Belarus,9449323,0.00,-3088,47,202910,8730,1.7,40,0.79,0.00 +97,Austria,9006398,0.01,51296,109,82409,65000,1.5,43,0.57,0.00 +98,Papua New Guinea,8947024,0.02,170915,20,452860,-800,3.6,22,0.13,0.00 +99,Serbia,8737371,0.00,-34864,100,87460,4000,1.5,42,0.56,0.00 +100,Israel,8655535,0.02,136158,400,21640,10000,3.0,30,0.93,0.00 +101,Switzerland,8654622,0.01,63257,219,39516,52000,1.5,43,0.74,0.00 +102,Togo,8278724,0.02,196358,152,54390,-2000,4.4,19,0.43,0.00 +103,Sierra Leone,7976983,0.02,163768,111,72180,-4200,4.3,19,0.43,0.00 +104,Hong Kong,7496981,0.01,60827,7140,1050,29308,1.3,45,N.A.,0.00 +105,Laos,7275560,0.01,106105,32,230800,-14704,2.7,24,0.36,0.00 +106,Paraguay,7132538,0.01,87902,18,397300,-16556,2.4,26,0.62,0.00 +107,Bulgaria,6948445,-0.01,-51674,64,108560,-4800,1.6,45,0.76,0.00 +108,Libya,6871292,0.01,93840,4,1759540,-1999,2.3,29,0.78,0.00 +109,Lebanon,6825445,0.00,-30268,667,10230,-30012,2.1,30,0.78,0.00 +110,Nicaragua,6624554,0.01,79052,55,120340,-21272,2.4,26,0.57,0.00 +111,Kyrgyzstan,6524195,0.02,108345,34,191800,-4000,3.0,26,0.36,0.00 +112,El Salvador,6486205,0.01,32652,313,20720,-40539,2.1,28,0.73,0.00 +113,Turkmenistan,6031200,0.02,89111,13,469930,-5000,2.8,27,0.53,0.00 +114,Singapore,5850342,0.01,46005,8358,700,27028,1.2,42,N.A.,0.00 +115,Denmark,5792202,0.00,20326,137,42430,15200,1.8,42,0.88,0.00 +116,Finland,5540720,0.00,8564,18,303890,14000,1.5,43,0.86,0.00 +117,Congo,5518087,0.03,137579,16,341500,-4000,4.5,19,0.70,0.00 +118,Slovakia,5459642,0.00,2629,114,48088,1485,1.5,41,0.54,0.00 +119,Norway,5421241,0.01,42384,15,365268,28000,1.7,40,0.83,0.00 +120,Oman,5106626,0.03,131640,16,309500,87400,2.9,31,0.87,0.00 +121,State of Palestine,5101414,0.02,119994,847,6020,-10563,3.7,21,0.80,0.00 +122,Costa Rica,5094118,0.01,46557,100,51060,4200,1.8,33,0.80,0.00 +123,Liberia,5057681,0.02,120307,53,96320,-5000,4.4,19,0.53,0.00 +124,Ireland,4937786,0.01,55291,72,68890,23604,1.8,38,0.63,0.00 +125,Central African Republic,4829767,0.02,84582,8,622980,-40000,4.8,18,0.43,0.00 +126,New Zealand,4822233,0.01,39170,18,263310,14881,1.9,38,0.87,0.00 +127,Mauritania,4649658,0.03,123962,5,1030700,5000,4.6,20,0.57,0.00 +128,Panama,4314767,0.02,68328,58,74340,11200,2.5,30,0.68,0.00 +129,Kuwait,4270571,0.02,63488,240,17820,39520,2.1,37,N.A.,0.00 +130,Croatia,4105267,-0.01,-25037,73,55960,-8001,1.4,44,0.58,0.00 +131,Moldova,4033963,0.00,-9300,123,32850,-1387,1.3,38,0.43,0.00 +132,Georgia,3989167,0.00,-7598,57,69490,-10000,2.1,38,0.58,0.00 +133,Eritrea,3546421,0.01,49304,35,101000,-39858,4.1,19,0.63,0.00 +134,Uruguay,3473730,0.00,11996,20,175020,-3000,2.0,36,0.96,0.00 +135,Bosnia and Herzegovina,3280819,-0.01,-20181,64,51000,-21585,1.3,43,0.52,0.00 +136,Mongolia,3278290,0.02,53123,2,1553560,-852,2.9,28,0.67,0.00 +137,Armenia,2963243,0.00,5512,104,28470,-4998,1.8,35,0.63,0.00 +138,Jamaica,2961167,0.00,12888,273,10830,-11332,2.0,31,0.55,0.00 +139,Qatar,2881053,0.02,48986,248,11610,40000,1.9,32,0.96,0.00 +140,Albania,2877797,0.00,-3120,105,27400,-14000,1.6,36,0.63,0.00 +141,Puerto Rico,2860853,-0.02,-72555,323,8870,-97986,1.2,44,N.A.,0.00 +142,Lithuania,2722289,-0.01,-37338,43,62674,-32780,1.7,45,0.71,0.00 +143,Namibia,2540905,0.02,46375,3,823290,-4806,3.4,22,0.55,0.00 +144,Gambia,2416668,0.03,68962,239,10120,-3087,5.3,18,0.59,0.00 +145,Botswana,2351627,0.02,47930,4,566730,3000,2.9,24,0.73,0.00 +146,Gabon,2225734,0.02,53155,9,257670,3260,4.0,23,0.87,0.00 +147,Lesotho,2142249,0.01,16981,71,30360,-10047,3.2,24,0.31,0.00 +148,North Macedonia,2083374,0.00,-85,83,25220,-1000,1.5,39,0.59,0.00 +149,Slovenia,2078938,0.00,284,103,20140,2000,1.6,45,0.55,0.00 +150,Guinea-Bissau,1968001,0.02,47079,70,28120,-1399,4.5,19,0.45,0.00 +151,Latvia,1886198,-0.01,-20545,30,62200,-14837,1.7,44,0.69,0.00 +152,Bahrain,1701575,0.04,60403,2239,760,47800,2.0,32,0.89,0.00 +153,Equatorial Guinea,1402985,0.03,46999,50,28050,16000,4.6,22,0.73,0.00 +154,Trinidad and Tobago,1399488,0.00,4515,273,5130,-800,1.7,36,0.52,0.00 +155,Estonia,1326535,0.00,887,31,42390,3911,1.6,42,0.68,0.00 +156,Timor-Leste,1318445,0.02,25326,89,14870,-5385,4.1,21,0.33,0.00 +157,Mauritius,1271768,0.00,2100,626,2030,0,1.4,37,0.41,0.00 +158,Cyprus,1207359,0.01,8784,131,9240,5000,1.3,37,0.67,0.00 +159,Eswatini,1160164,0.01,12034,67,17200,-8353,3.0,21,0.30,0.00 +160,Djibouti,988000,0.01,14440,43,23180,900,2.8,27,0.79,0.00 +161,Fiji,896445,0.01,6492,49,18270,-6202,2.8,28,0.59,0.00 +162,Réunion,895312,0.01,6385,358,2500,-1256,2.3,36,1.00,0.00 +163,Comoros,869601,0.02,18715,467,1861,-2000,4.2,20,0.29,0.00 +164,Guyana,786552,0.00,3786,4,196850,-6000,2.5,27,0.27,0.00 +165,Bhutan,771608,0.01,8516,20,38117,320,2.0,28,0.46,0.00 +166,Solomon Islands,686884,0.03,17061,25,27990,-1600,4.4,20,0.23,0.00 +167,Macao,649335,0.01,8890,21645,30,5000,1.2,39,N.A.,0.00 +168,Montenegro,628066,0.00,79,47,13450,-480,1.8,39,0.68,0.00 +169,Luxembourg,625978,0.02,10249,242,2590,9741,1.5,40,0.88,0.00 +170,Western Sahara,597339,0.03,14876,2,266000,5582,2.4,28,0.87,0.00 +171,Suriname,586632,0.01,5260,4,156000,-1000,2.4,29,0.65,0.00 +172,Cabo Verde,555987,0.01,6052,138,4030,-1342,2.3,28,0.68,0.00 +173,Maldives,540544,0.02,9591,1802,300,11370,1.9,30,0.35,0.00 +174,Malta,441543,0.00,1171,1380,320,900,1.5,43,0.93,0.00 +175,Brunei ,437479,0.01,4194,83,5270,0,1.8,32,0.80,0.00 +176,Guadeloupe,400124,0.00,68,237,1690,-1440,2.2,44,N.A.,0.00 +177,Belize,397628,0.02,7275,17,22810,1200,2.3,25,0.46,0.00 +178,Bahamas,393244,0.01,3762,39,10010,1000,1.8,32,0.86,0.00 +179,Martinique,375265,0.00,-289,354,1060,-960,1.9,47,0.92,0.00 +180,Iceland,341243,0.01,2212,3,100250,380,1.8,37,0.94,0.00 +181,Vanuatu,307145,0.02,7263,25,12190,120,3.8,21,0.24,0.00 +182,French Guiana,298682,0.03,7850,4,82200,1200,3.4,25,0.87,0.00 +183,Barbados,287375,0.00,350,668,430,-79,1.6,40,0.31,0.00 +184,New Caledonia,285498,0.01,2748,16,18280,502,2.0,34,0.72,0.00 +185,French Polynesia,280908,0.01,1621,77,3660,-1000,2.0,34,0.64,0.00 +186,Mayotte,272815,0.03,6665,728,375,0,3.7,20,0.46,0.00 +187,Sao Tome & Principe,219159,0.02,4103,228,960,-1680,4.4,19,0.74,0.00 +188,Samoa,198414,0.01,1317,70,2830,-2803,3.9,22,0.18,0.00 +189,Saint Lucia,183627,0.00,837,301,610,0,1.4,34,0.19,0.00 +190,Channel Islands,173863,0.01,1604,915,190,1351,1.5,43,0.30,0.00 +191,Guam,168775,0.01,1481,313,540,-506,2.3,31,0.95,0.00 +192,Curaçao,164093,0.00,669,370,444,515,1.8,42,0.89,0.00 +193,Kiribati,119449,0.02,1843,147,810,-800,3.6,23,0.57,0.00 +194,Micronesia,115023,0.01,1208,164,700,-600,3.1,24,0.21,0.00 +195,Grenada,112523,0.00,520,331,340,-200,2.1,32,0.35,0.00 +196,St. Vincent & Grenadines,110940,0.00,351,284,390,-200,1.9,33,0.53,0.00 +197,Aruba,106766,0.00,452,593,180,201,1.9,41,0.44,0.00 +198,Tonga,105695,0.01,1201,147,720,-800,3.6,22,0.24,0.00 +199,U.S. Virgin Islands,104425,0.00,-153,298,350,-451,2.0,43,0.96,0.00 +200,Seychelles,98347,0.01,608,214,460,-200,2.5,34,0.56,0.00 +201,Antigua and Barbuda,97929,0.01,811,223,440,0,2.0,34,0.26,0.00 +202,Isle of Man,85033,0.01,449,149,570,,N.A.,N.A.,0.53,0.00 +203,Andorra,77265,0.00,123,164,470,,N.A.,N.A.,0.88,0.00 +204,Dominica,71986,0.00,178,96,750,,N.A.,N.A.,0.74,0.00 +205,Cayman Islands,65722,0.01,774,274,240,,N.A.,N.A.,0.97,0.00 +206,Bermuda,62278,0.00,-228,1246,50,,N.A.,N.A.,0.97,0.00 +207,Marshall Islands,59190,0.01,399,329,180,,N.A.,N.A.,0.70,0.00 +208,Northern Mariana Islands,57559,0.01,343,125,460,,N.A.,N.A.,0.88,0.00 +209,Greenland,56770,0.00,98,0,410450,,N.A.,N.A.,0.87,0.00 +210,American Samoa,55191,0.00,-121,276,200,,N.A.,N.A.,0.88,0.00 +211,Saint Kitts & Nevis,53199,0.01,376,205,260,,N.A.,N.A.,0.33,0.00 +212,Faeroe Islands,48863,0.00,185,35,1396,,N.A.,N.A.,0.43,0.00 +213,Sint Maarten,42876,0.01,488,1261,34,,N.A.,N.A.,0.96,0.00 +214,Monaco,39242,0.01,278,26337,1,,N.A.,N.A.,N.A.,0.00 +215,Turks and Caicos,38717,0.01,526,41,950,,N.A.,N.A.,0.89,0.00 +216,Saint Martin,38666,0.02,664,730,53,,N.A.,N.A.,0.00,0.00 +217,Liechtenstein,38128,0.00,109,238,160,,N.A.,N.A.,0.15,0.00 +218,San Marino,33931,0.00,71,566,60,,N.A.,N.A.,0.97,0.00 +219,Gibraltar,33691,0.00,-10,3369,10,,N.A.,N.A.,N.A.,0.00 +220,British Virgin Islands,30231,0.01,201,202,150,,N.A.,N.A.,0.52,0.00 +221,Caribbean Netherlands,26223,0.01,244,80,328,,N.A.,N.A.,0.75,0.00 +222,Palau,18094,0.00,86,39,460,,N.A.,N.A.,N.A.,0.00 +223,Cook Islands,17564,0.00,16,73,240,,N.A.,N.A.,0.75,0.00 +224,Anguilla,15003,0.01,134,167,90,,N.A.,N.A.,N.A.,0.00 +225,Tuvalu,11792,0.01,146,393,30,,N.A.,N.A.,0.62,0.00 +226,Wallis & Futuna,11239,-0.02,-193,80,140,,N.A.,N.A.,0.00,0.00 +227,Nauru,10824,0.01,68,541,20,,N.A.,N.A.,N.A.,0.00 +228,Saint Barthelemy,9877,0.00,30,470,21,,N.A.,N.A.,0.00,0.00 +229,Saint Helena,6077,0.00,18,16,390,,N.A.,N.A.,0.27,0.00 +230,Saint Pierre & Miquelon,5794,0.00,-28,25,230,,N.A.,N.A.,1.00,0.00 +231,Montserrat,4992,0.00,3,50,100,,N.A.,N.A.,0.10,0.00 +232,Falkland Islands,3480,0.03,103,0,12170,,N.A.,N.A.,0.66,0.00 +233,Niue,1626,0.01,11,6,260,,N.A.,N.A.,0.46,0.00 +234,Tokelau,1357,0.01,17,136,10,,N.A.,N.A.,0.00,0.00 +235,Holy See,801,0.00,2,2003,0,,N.A.,N.A.,N.A.,0.00 \ No newline at end of file From de4d8c4ce95e4ee4eed3f83ec11c7a8f339ce5ba Mon Sep 17 00:00:00 2001 From: Maria Dubyaga Date: Thu, 26 Mar 2020 13:35:22 -0400 Subject: [PATCH 2/2] covid data links changed, date format changed --- src/covid19_data.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/covid19_data.rs b/src/covid19_data.rs index f861cfb..4c10be3 100644 --- a/src/covid19_data.rs +++ b/src/covid19_data.rs @@ -7,9 +7,12 @@ use crate::aws::*; use crate::cleaner::*; use crate::parquet_writer::write_records_to_file; -const CONFIRMED_URL: &str = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv"; -const DEATHS_URL: &str = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv"; -const RECOVERED_URL: &str = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv"; +//const CONFIRMED_URL: &str = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv"; +const CONFIRMED_URL: &str = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"; +//const DEATHS_URL: &str = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv"; +const DEATHS_URL: &str = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"; +//const RECOVERED_URL: &str = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv"; +const RECOVERED_URL: &str = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv"; #[tokio::main] pub async fn covid19_data() -> Result<(), DracErr> { @@ -68,11 +71,11 @@ async fn extract_records( // panic!("{}", date_str); let res = chrono::NaiveDateTime::parse_from_str( &format!("{} 00:00", date_str), - "%-m/%-d/%y %H:%M", + "%-m/%-d/%Y %H:%M", ); if let Ok(res) = res { res - } else { + } else { panic!("could not parse `{}`", date_str) } }) @@ -93,7 +96,7 @@ async fn extract_records( let country_region = row_iter.next().unwrap().to_string(); let (city, county, state) = if country_region == "US" { - extract_us_data(&province_state.as_ref().unwrap()[..]) + extract_us_data(&province_state.as_ref().unwrap_or(&"".to_string())[..]) } else { (None, None, None) };