Vários arquivos tabulares (CSV), um por aula, com a presença dos alunos.
Quero contar quantas faltas cada aluno tem.
Utilizaremos o universo tidy
:
install.packages("tidyverse");
Permite encadear funções, ao invés de aninhá-las.
library(magrittr)
f <- function(p) { return(p+1); }
g <- function(p) { return(p+2); }
h <- function() { return(3); }
f(g(h()));
h() %>%
g() %>%
f();
[1] 6 [1] 6
Um arquivo CSV é uma matriz, com colunas separadas por vírgula.
library(readr);
read_csv
function (file, col_names = TRUE, col_types = NULL, locale = default_locale(), na = c("", "NA"), quoted_na = TRUE, comment = "", trim_ws = TRUE, skip = 0, n_max = Inf, guess_max = min(1000, n_max), progress = interactive()) { tokenizer <- tokenizer_csv(na = na, quoted_na = TRUE, comment = comment, trim_ws = trim_ws) read_delimited(file, tokenizer, col_names = col_names, col_types = col_types, locale = locale, skip = skip, comment = comment, n_max = n_max, guess_max = guess_max, progress = progress) } <environment: namespace:readr>
df <- read_csv("/home/schnorr/ensino/chamada/mlp.dir/alunos_mlp_2017-2_2017-08-28_16:15:30.csv");
Parsed with column specification: cols( X1 = col_integer(), N = col_integer(), ID = col_integer(), Name = col_character(), Entry = col_character(), Situation = col_character(), Email = col_character(), Course = col_character(), Date = col_date(format = ""), Present = col_logical() ) Warning message: Missing column names filled in: 'X1' [1]
Verbos
select()
colunas- criar colunas com
mutate()
- filtrar com
filter()
- fatiar linhas com
slice()
- reordenar com
arrange()
- criar grupos com
group_by()
df %>%
select(-ID, -Name, -Email, -Course)
# A tibble: 40 x 6 X1 N Entry Situation Date Present <int> <int> <chr> <chr> <date> <lgl> 1 1 1 2014/2 Matriculado 2017-08-28 FALSE 2 2 2 2015/1 Matriculado 2017-08-28 TRUE 3 3 3 2014/2 Matriculado 2017-08-28 FALSE 4 4 4 2016/2 Matriculado 2017-08-28 TRUE 5 5 5 2014/1 Matriculado 2017-08-28 TRUE 6 6 6 2014/1 Matriculado 2017-08-28 TRUE 7 7 7 2015/1 Matriculado 2017-08-28 TRUE 8 8 8 2014/1 Matriculado 2017-08-28 TRUE 9 9 9 2015/1 Matriculado 2017-08-28 TRUE 10 10 10 2015/1 Matriculado 2017-08-28 TRUE # ... with 30 more rows
list.files
function (path = ".", pattern = NULL, all.files = FALSE, full.names = FALSE, recursive = FALSE, ignore.case = FALSE, include.dirs = FALSE, no.. = FALSE) .Internal(list.files(path, pattern, all.files, full.names, recursive, ignore.case, include.dirs, no..)) <bytecode: 0x56127afe2ba8> <environment: namespace:base>
library(readr);
library(magrittr);
library(dplyr);
lista_de_arquivos <- list.files("/home/schnorr/ensino/chamada/mlp.dir", full.names=TRUE)
#lista_de_arquivos;
do.call("rbind", suppressMessages(lapply(lista_de_arquivos, function(filename) {
df <- read_csv(filename)
if ("Duration" %in% names(df)){
df %>% select(-Duration)
}else{
df;
}
}))) -> df;
df %>% group_by(Entry) %>% summarize(N = n()/6)
df %>% group_by(Name) %>% summarize(Faltas = sum(ifelse(Present == FALSE, 1, 0)),
Freq = 1 - Faltas/n(),
FF = ifelse(Freq < 0.75, TRUE, FALSE)) %>%
filter(Faltas == 1) %>%
as.data.frame();