Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Save known tags in a json file #286

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions R/known_tags.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ known_tags <- c(
"code", # html
"col", # html
"colgroup", # html
"color-profile", # svg
"command", #
"color-profile", # deprecated
"command", # deprecated
"data", # html
"datalist", # html
"dd", # html
Expand All @@ -45,7 +45,7 @@ known_tags <- c(
"ellipse", # svg
"em", # html
"embed", # html
"eventsource", #
"eventsource", # deprecated
"feBlend", # svg
"feColorMatrix", # svg
"feComponentTransfer",# svg
Expand Down Expand Up @@ -89,7 +89,7 @@ known_tags <- c(
"hatchpath", # svg
"head", # html
"header", # html
"hgroup", # html
"hgroup", # deprecated
"hr", # html
"html", # html
"i", # html
Expand All @@ -99,7 +99,7 @@ known_tags <- c(
"input", # html
"ins", # html
"kbd", # html
"keygen", #
"keygen", # deprecated
"label", # html
"legend", # html
"li", # html
Expand All @@ -111,6 +111,7 @@ known_tags <- c(
"mark", # html
"marker", # svg
"mask", # svg
"math", # html
"menu", # html
"meta", # html
"metadata", # svg
Expand All @@ -130,15 +131,16 @@ known_tags <- c(
"picture", # html
"polygon", # svg
"polyline", # svg
"portal", # html
"pre", # html
"progress", # html
"q", # html
"radialGradient", # svg
"rb", # html
"rb", # deprecated
"rect", # svg
"rp", # html
"rt", # html
"rtc", # html
"rtc", # deprecated
"ruby", # html
"s", # html
"samp", # html
Expand All @@ -148,7 +150,7 @@ known_tags <- c(
"set", # svg
"slot", # html
"small", # html
"solidcolor", # svg
"solidcolor", # deprecated
"source", # html
"span", # html
"stop", # svg
Expand All @@ -157,7 +159,7 @@ known_tags <- c(
"sub", # html
"summary", # html
"sup", # html
"svg", # svg
"svg", # html svg
"switch", # svg
"symbol", # svg
"table", # html
Expand Down
130 changes: 88 additions & 42 deletions scripts/generate_known_tags.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,47 +3,77 @@
## This script web scrapes two Mozilla websites for HTML and SVG tag elements.
## All HTML tags

library(rvest)
library(dplyr)

# Note: Mozilla seems to have a more up to date set of what is possible / not obsolete compared to W3 schools
base_url <- "https://developer.mozilla.org/en-US/docs/Web"

html_tag_dfs <- read_html(file.path(base_url, "HTML", "Element")) %>%
html_table()

# The last table is obsolete/deprecated elements
n_dfs <- length(html_tag_dfs)

html_tags_df <- html_tag_dfs[-n_dfs] %>%
bind_rows() %>%
# h1-h6 all appear in one comma-separated row
mutate(name = strsplit(Element, ", ")) %>%
tidyr::unnest(name) %>%
select(Element = name, Description) %>%
transmute(
name = sub("^<", "", sub(">$", "", Element)),
desc = paste(
Description, "Learn more at",
file.path(base_url, "HTML", "Element", name)
)
)

svg <- read_html(file.path(base_url, "SVG", "Element"))

# Due to a lack of structure on the SVG page,
# this seems to be the best way to target just
# the hyperlinks under the "SVG elements A to Z" section
svg_tags <- lapply(letters, function(x) {
html_elements(svg, sprintf("h3[id=%s] + div > ul > li > a", x)) %>%
html_attr("href") %>%
basename()
})

# TODO: evenetually it might be nice to also scrape
# the descriptions by following the url
svg_tags_df <- tibble(
name = unlist(svg_tags),
desc = sprintf(
"Creates the <%s> SVG element. Learn more at %s",
name, file.path(base_url, "SVG", "Element", name)
)
)

library(magrittr)


get_tags <- function(url, css) {
url %>%
httr::GET() %>%
httr::content() %>%
rvest::html_nodes(css) %>%
rvest::html_text() %>%
sub("^<", "", .) %>%
sub(">$", "", .) %>%
sort() %>%
unique() %>%
print()
}

## W3 Schools
## Mozilla seemed to have a more up to date set of what is possible / not obsolete
# w3html_tags <- get_tags("https://www.w3schools.com/tags/default.asp", "#htmltags tr td:first-child a:not(.notsupported)")
## Had extra tags not seen in other places `altGlyph`
# w3svg_tags <- get_tags("https://www.w3schools.com/graphics/svg_reference.asp", "#main td:first-child")

## W3 Standard
# # The original spec websites made it very hard to determine what was obsolete / shouldn't be used and what was to be used
# html_tags <- get_tags("https://www.w3.org/TR/2018/WD-html53-20181018/single-page.html", "dfn[data-dfn-type='element']")
# svg_tags <- get_tags("https://svgwg.org/svg2-draft/single-page.html", "dfn[data-dfn-type='element']")

# Save a JSON version so other languages can read them in easily
cat(
jsonlite::toJSON(html_tags_df),
file = "scripts/html_tags.json"
)

## Mozilla
# do not include the last section of obsolete tags
html_tags <- get_tags("https://developer.mozilla.org/en-US/docs/Web/HTML/Element", "article table:not(:last-child) td:first-child code")
# html_tags_obsolete <- get_tags("https://developer.mozilla.org/en-US/docs/Web/HTML/Element", "#content table:last-child td:first-child a")
cat(
jsonlite::toJSON(svg_tags_df),
file = "scripts/svg_tags.json"
)

# do not include tags that do not contain documentation articles
# Only pull from the index, as elements not in the index are considered obsolete. (ex: altGlyph or font-face)
svg_tags <- get_tags("https://developer.mozilla.org/en-US/docs/Web/SVG/Element", "article .index a:not([rel='nofollow']) code")
html_tags <- html_tags_df$name
svg_tags <- svg_tags_df$name


# Both SVG2 and HTML5
svg_tags[svg_tags %in% html_tags]
#> [1] "a" "script" "style" "svg" "title"


new_tags <- c(svg_tags, html_tags) %>%
unique() %>%
sort()

# Call using callr::r to avoid any devtools loaded htmltools::tags namespace issues
cran_tags <- callr::r(
Expand All @@ -54,27 +84,43 @@ cran_tags <- callr::r(
show = TRUE
)

new_tags <- c(svg_tags, html_tags) %>% unique() %>% sort()

# tags which should not HTML5 / SVG2 supported
setdiff(cran_tags, new_tags)
#> "command" "eventsource" "keygen"
#> [1] "color-profile" "command" "eventsource" "hgroup"
#> [5] "keygen" "rb" "rtc" "solidcolor"


# New HTML5 tags
setdiff(html_tags, cran_tags)
#> "rb" "rtc" "slot"
#> "portal" "math"

# New SVG2 tags
setdiff(svg_tags, cran_tags)
### ...basically all svg tags
#> character(0)

# combine old and new tags so that old tags are not lost
save_tags <- c(new_tags, cran_tags) %>% unique() %>% sort()
save_tags <- c(new_tags, cran_tags) %>%
unique() %>%
sort()

save_line <- paste0(
format(paste0(" \"", save_tags, "\"", ifelse(seq_along(save_tags) == length(save_tags), "", ",")), justify = "left"), "#",
ifelse(save_tags %in% html_tags, " html", " "),
ifelse(save_tags %in% svg_tags, " svg", "")
format(
paste0(
" \"", save_tags, "\"",
ifelse(
seq_along(save_tags) == length(save_tags),
"", ","
)
),
justify = "left"
),
"#",
case_when(
save_tags %in% html_tags & save_tags %in% svg_tags ~ " html svg",
save_tags %in% html_tags ~ " html",
save_tags %in% svg_tags ~ " svg",
TRUE ~ " deprecated"
)
) %>%
sub("\\s+$", "", .)
cat(
Expand Down
1 change: 1 addition & 0 deletions scripts/html_tags.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions scripts/svg_tags.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"name":"a","desc":"Creates the <a> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/a"},{"name":"animate","desc":"Creates the <animate> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/animate"},{"name":"animateMotion","desc":"Creates the <animateMotion> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/animateMotion"},{"name":"animateTransform","desc":"Creates the <animateTransform> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/animateTransform"},{"name":"circle","desc":"Creates the <circle> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/circle"},{"name":"clipPath","desc":"Creates the <clipPath> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/clipPath"},{"name":"defs","desc":"Creates the <defs> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/defs"},{"name":"desc","desc":"Creates the <desc> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/desc"},{"name":"discard","desc":"Creates the <discard> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/discard"},{"name":"ellipse","desc":"Creates the <ellipse> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/ellipse"},{"name":"feBlend","desc":"Creates the <feBlend> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feBlend"},{"name":"feColorMatrix","desc":"Creates the <feColorMatrix> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feColorMatrix"},{"name":"feComponentTransfer","desc":"Creates the <feComponentTransfer> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feComponentTransfer"},{"name":"feComposite","desc":"Creates the <feComposite> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feComposite"},{"name":"feConvolveMatrix","desc":"Creates the <feConvolveMatrix> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feConvolveMatrix"},{"name":"feDiffuseLighting","desc":"Creates the <feDiffuseLighting> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feDiffuseLighting"},{"name":"feDisplacementMap","desc":"Creates the <feDisplacementMap> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feDisplacementMap"},{"name":"feDistantLight","desc":"Creates the <feDistantLight> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feDistantLight"},{"name":"feDropShadow","desc":"Creates the <feDropShadow> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feDropShadow"},{"name":"feFlood","desc":"Creates the <feFlood> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feFlood"},{"name":"feFuncA","desc":"Creates the <feFuncA> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feFuncA"},{"name":"feFuncB","desc":"Creates the <feFuncB> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feFuncB"},{"name":"feFuncG","desc":"Creates the <feFuncG> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feFuncG"},{"name":"feFuncR","desc":"Creates the <feFuncR> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feFuncR"},{"name":"feGaussianBlur","desc":"Creates the <feGaussianBlur> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feGaussianBlur"},{"name":"feImage","desc":"Creates the <feImage> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feImage"},{"name":"feMerge","desc":"Creates the <feMerge> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feMerge"},{"name":"feMergeNode","desc":"Creates the <feMergeNode> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feMergeNode"},{"name":"feMorphology","desc":"Creates the <feMorphology> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feMorphology"},{"name":"feOffset","desc":"Creates the <feOffset> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feOffset"},{"name":"fePointLight","desc":"Creates the <fePointLight> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/fePointLight"},{"name":"feSpecularLighting","desc":"Creates the <feSpecularLighting> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feSpecularLighting"},{"name":"feSpotLight","desc":"Creates the <feSpotLight> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feSpotLight"},{"name":"feTile","desc":"Creates the <feTile> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feTile"},{"name":"feTurbulence","desc":"Creates the <feTurbulence> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/feTurbulence"},{"name":"filter","desc":"Creates the <filter> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/filter"},{"name":"foreignObject","desc":"Creates the <foreignObject> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/foreignObject"},{"name":"g","desc":"Creates the <g> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/g"},{"name":"hatch","desc":"Creates the <hatch> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/hatch"},{"name":"hatchpath","desc":"Creates the <hatchpath> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/hatchpath"},{"name":"image","desc":"Creates the <image> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/image"},{"name":"line","desc":"Creates the <line> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/line"},{"name":"linearGradient","desc":"Creates the <linearGradient> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/linearGradient"},{"name":"marker","desc":"Creates the <marker> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/marker"},{"name":"mask","desc":"Creates the <mask> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/mask"},{"name":"metadata","desc":"Creates the <metadata> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/metadata"},{"name":"mpath","desc":"Creates the <mpath> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/mpath"},{"name":"path","desc":"Creates the <path> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/path"},{"name":"pattern","desc":"Creates the <pattern> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/pattern"},{"name":"polygon","desc":"Creates the <polygon> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/polygon"},{"name":"polyline","desc":"Creates the <polyline> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/polyline"},{"name":"radialGradient","desc":"Creates the <radialGradient> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/radialGradient"},{"name":"rect","desc":"Creates the <rect> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/rect"},{"name":"script","desc":"Creates the <script> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/script"},{"name":"set","desc":"Creates the <set> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/set"},{"name":"stop","desc":"Creates the <stop> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/stop"},{"name":"style","desc":"Creates the <style> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/style"},{"name":"svg","desc":"Creates the <svg> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/svg"},{"name":"switch","desc":"Creates the <switch> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/switch"},{"name":"symbol","desc":"Creates the <symbol> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/symbol"},{"name":"text","desc":"Creates the <text> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/text"},{"name":"textPath","desc":"Creates the <textPath> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/textPath"},{"name":"title","desc":"Creates the <title> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/title"},{"name":"tspan","desc":"Creates the <tspan> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/tspan"},{"name":"use","desc":"Creates the <use> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/use"},{"name":"view","desc":"Creates the <view> SVG element. Learn more at https://developer.mozilla.org/en-US/docs/Web/SVG/Element/view"}]