-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdutch_surnames.R
75 lines (53 loc) · 1.49 KB
/
dutch_surnames.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
x <- "https://www.netherlandsgenealogy.com/surnames-all.php?tree="
x <-
x %>%
httr::GET() %>%
httr::content()
surnames <-
x %>%
rvest::html_nodes(".titlebox")
surnames <- surnames[-(1:2)]
surnames_index <-
surnames %>%
rvest::html_nodes("p") %>%
rvest::html_text()
surnames <-
surnames %>%
set_names(surnames_index) %>%
map(~ {
.x %>%
rvest::html_nodes(".sntable") %>%
rvest::html_nodes("a") %>%
rvest::html_text()
})
# CRAN db side --------------------------------
cran_db <- tools::CRAN_package_db()
keep_cols <- c("Package", "Author", "Authors@R", "Contact", "Maintainer","Date")
auths <- cran_db[ , keep_cols]
colnames(auths)[3] <- "NiceAuthors"
sample_auths <- cran_db[sample(20), keep_cols]
# Authors@R - eaasy
nice_auths <-
auths %>%
filter(!is.na(NiceAuthors)) %>%
mutate(
Surnames =
map(NiceAuthors, ~ {
# since its text, need to parse, eval it to get
# vec of persons, then sub them (see help ?person)
srnms <- eval(parse(text = .x))
map_chr(srnms, format, include = "family")
})
)
surnames_by_pkg <-
nice_auths %>%
select(Package, Surnames) %>%
unnest(Surnames)
# Author - bit more difficult need to
# remove stuff in between [] and <>, split on comma, remove "and"
auths %>%
filter(is.na(NiceAuthors)) %>%
mutate(
)
t <- auths[c(2,7,13,16,22), "Author"]
gsub(pattern = "\\[{1}(a-z)*,+(a-z)\\]{1}", "", t)