From 94a4e6958d1761d8818faa9eac7321a1587d59b3 Mon Sep 17 00:00:00 2001 From: webofceco Date: Mon, 6 May 2024 15:28:45 -0400 Subject: [PATCH] fixed styles, new functions to view tables, more modular functions --- Project.toml | 2 +- README.md | 16 +++- src/HTMLTables.jl | 18 +++- src/display.jl | 10 ++ src/get.jl | 65 +++++++++++++ src/open.jl | 16 ++++ src/read.jl | 49 +--------- src/table.jl | 182 +++++++++++++++++++++++++++++++++++++ src/themes/00-default.css | 5 + src/themes/01-red.css | 5 + src/themes/02-orange.css | 5 + src/themes/03-yellow.css | 5 + src/themes/04-green.css | 5 + src/themes/05-blue.css | 5 + src/themes/06-violet.css | 5 + src/themes/07-magenta.css | 5 + src/themes/08-brown.css | 5 + src/themes/09-gray.css | 5 + src/write.jl | 186 ++++++++++---------------------------- 19 files changed, 400 insertions(+), 194 deletions(-) create mode 100644 src/display.jl create mode 100644 src/get.jl create mode 100644 src/open.jl create mode 100644 src/table.jl diff --git a/Project.toml b/Project.toml index c2fa1c0..a9d231f 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "HTMLTables" uuid = "b1afcece-b80e-4563-b90e-36b4cc56d3fa" authors = ["Ceco E. Maples "] -version = "0.3.0" +version = "0.3.1" [deps] Cascadia = "54eefc05-d75b-58de-a785-1a3403f0919f" diff --git a/README.md b/README.md index 5e45c77..5309004 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,18 @@ # HTMLTables -Julia package for reading and writing HTML tables. +Julia package for reading, writing, and viewing HTML tables. -## Reading HTML tables -- `HTMLTables.read`: extracts data from HTML tables. +Reading HTML tables: +- `HTMLTables.get` reads an HTML table as a string. +- `HTMLTables.read` extracts data from HTML tables. -## Writing HTML tables -- `HTMLTables.write`: uses the Tables.jl interface to write an HTML table. +Writing HTML tables: +- `HTMLTables.table` uses the Tables.jl interface to write an HTML table as a string. +- `HTMLTables.write` uses the Tables.jl interface to write an HTML table in a file. + +Viewing HTML tables: +- `HTMLTables.display` displays a julia table as an HTML table in julia. +- `HTMLTables.open` opens a julia table as an HTML table in the browser. ## License Copyright © 2024 Ceco Elijah Maples diff --git a/src/HTMLTables.jl b/src/HTMLTables.jl index a1a57ca..cf06cca 100644 --- a/src/HTMLTables.jl +++ b/src/HTMLTables.jl @@ -1,21 +1,33 @@ """ HTMLTables -Julia package for reading and writing HTML tables. +Julia package for reading, writing, and viewing HTML tables. Reading HTML tables: +- `HTMLTables.get` reads an HTML table as a string. - `HTMLTables.read` extracts data from HTML tables. Writing HTML tables: -- `HTMLTables.write` uses the Tables.jl interface to write an HTML table. +- `HTMLTables.table` uses the Tables.jl interface to write an HTML table as a string. +- `HTMLTables.write` uses the Tables.jl interface to write an HTML table in a file. + +Viewing HTML tables: +- `HTMLTables.display` displays a julia table as an HTML table in julia. +- `HTMLTables.open` opens a julia table as an HTML table in the browser. """ module HTMLTables using Cascadia, Colors, ColorSchemes, Gumbo, HTTP, Tables -export read, write +export get, read, table, write, display, open +include("get.jl") include("read.jl") + +include("table.jl") include("write.jl") +include("display.jl") +include("open.jl") + end diff --git a/src/display.jl b/src/display.jl new file mode 100644 index 0000000..0f6d3c0 --- /dev/null +++ b/src/display.jl @@ -0,0 +1,10 @@ +""" + HTMLTables.display(tbl; kwargs...) + +Displays a julia table as an HTML table in julia. +""" +function display(tbl; kwargs...) + html_table::String = table(tbl; kwargs...) + + Base.display("image/svg+xml", html_table) +end \ No newline at end of file diff --git a/src/get.jl b/src/get.jl new file mode 100644 index 0000000..8084f94 --- /dev/null +++ b/src/get.jl @@ -0,0 +1,65 @@ +function isurl(source::String)::Bool + url_pattern::Regex = r"(?i)\b((?:https?|ftp):\/\/[\w-]+(\.[\w-]+)+([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?)\b" + + return Base.occursin(url_pattern, source) +end + +""" + HTMLTables.get(source::String; id::String="", classes::Union{Vector{String},String}="", index::Int=1) + +Returns an HTML table a source as a string. + +## Arguments + +- `source::String`: URL or path to the HTML table. +- `id::String`: The id of the HTML table. +- `classes::Union{Vector{String},String}`: The classes of the HTML table. +- `index::Int`: The index of the HTML table in the HTML document. + +""" +function get( + source::String; + id::String="", + classes::Union{Vector{String},String}="", + index::Int=1 +) + if index <= 0 + Base.throw(Base.ArgumentError("Index must be a positive integer")) + end + + if isurl(source) == true + response::HTTP.Response = HTTP.get(source) + html_content = Base.String(response.body) + else + html_content = Base.read(source, String) + end + + html_document::Gumbo.HTMLDocument = Gumbo.parsehtml(html_content) + + selector::String = "" + if Base.isempty(id) + if Base.isempty(classes) + selector *= "table" + elseif !Base.isempty(classes) && Base.isa(classes, String) + selector *= "table.$classes" + elseif !Base.isempty(classes) && Base.isa(classes, Vector{String}) + selector *= "table." * Base.join(classes, ".") + end + elseif !Base.isempty(id) + selector *= "#$id" + end + + tables::Vector{Gumbo.HTMLNode} = Base.eachmatch(Cascadia.Selector(selector), html_document.root) + + number_of_tables::Int = Base.length(tables) + + if number_of_tables == 0 + Base.throw(Base.ArgumentError("No HTML tables found")) + elseif index > number_of_tables + Base.throw(Base.ArgumentError("$number_of_tables table(s) found. Index $index does not exist.")) + end + + table::Gumbo.HTMLNode = tables[index] + + return table +end \ No newline at end of file diff --git a/src/open.jl b/src/open.jl new file mode 100644 index 0000000..d621dac --- /dev/null +++ b/src/open.jl @@ -0,0 +1,16 @@ +""" + HTMLTables.open(tbl; kwargs...) + +Opens a julia table as an HTML table in the browser. +""" +function open(tbl; kwargs...) + path::String = write(tbl; kwargs...) + + if Base.Sys.iswindows() + Base.run(`start $path`) + elseif Base.Sys.islinux() + Base.run(`xdg-open $path`) + elseif Base.Sys.isapple() + Base.run(`open $path`) + end +end \ No newline at end of file diff --git a/src/read.jl b/src/read.jl index 9ac2003..fae50e0 100644 --- a/src/read.jl +++ b/src/read.jl @@ -1,15 +1,7 @@ -function isurl(source::String)::Bool - url_pattern ::Regex = r"(?i)\b((?:https?|ftp):\/\/[\w-]+(\.[\w-]+)+([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?)\b" - - return Base.occursin(url_pattern, source) -end - function extractrowdata(row::Gumbo.HTMLNode)::Vector - cell_matches::Vector{Gumbo.HTMLNode} = Base.eachmatch( - Cascadia.Selector("td,th"), row - ) + cells::Vector{Gumbo.HTMLNode} = Base.eachmatch(Cascadia.Selector("td,th"), row) - return [Cascadia.nodeText(cell) for cell in cell_matches] + return [Cascadia.nodeText(cell) for cell in cells] end """ @@ -23,6 +15,7 @@ Reads a HTML table into a sink function such as `DataFrame`. - `sink`: The function that materializes the table data. - `id::String`: The id of the HTML table. - `classes::Union{Vector{String},String}`: The classes of the HTML table. +- `index::Int`: The index of the HTML table in the HTML document. ## Examples @@ -31,8 +24,6 @@ using HTMLTables, DataFrames # read an HTML table into a DataFrame df = HTMLTables.read("https://www.w3schools.com/html/html_tables.asp", DataFrame) -println(df) - ``` """ function read( @@ -42,39 +33,7 @@ function read( classes::Union{Vector{String},String}="", index::Int=1 ) - if index <= 0 - throw(ArgumentError("Index must be a positive integer")) - end - - if isurl(source) == true - response::HTTP.Response = HTTP.get(source) - html_content = Base.String(response.body) - else - html_content = Base.read(source, String) - end - - html_document::Gumbo.HTMLDocument = Gumbo.parsehtml(html_content) - - selector::String = "" - if Base.isempty(id) - if Base.isempty(classes) - selector *= "table" - elseif !Base.isempty(classes) && Base.isa(classes, String) - selector *= "table.$classes" - elseif !Base.isempty(classes) && Base.isa(classes, Vector{String}) - selector *= "table." * Base.join(classes, ".") - end - elseif !Base.isempty(id) - selector *= "#$id" - end - - tables::Vector{Gumbo.HTMLNode} = Base.eachmatch(Cascadia.Selector(selector), html_document.root) - - if Base.isempty(tables) == true - throw(ArgumentError("No HTML tables found")) - end - - table::Gumbo.HTMLNode = tables[index] + table = get(source, id=id, classes=classes, index=index) rows::Vector{Gumbo.HTMLNode} = Base.eachmatch(Cascadia.Selector("tr"), table) headers::Vector = [] diff --git a/src/table.jl b/src/table.jl new file mode 100644 index 0000000..11f60f8 --- /dev/null +++ b/src/table.jl @@ -0,0 +1,182 @@ +const THEMES::Dict{String,Vector{Union{Int,String}}} = Base.Dict( + "themes/00-default.css" => ["default", "00", 00, "0", 0], + "themes/01-red.css" => ["red", "01", 01, "1", 1], + "themes/02-orange.css" => ["orange", "02", 02, "2", 2], + "themes/03-yellow.css" => ["yellow", "03", 03, "3", 3], + "themes/04-green.css" => ["green", "04", 04, "4", 4], + "themes/05-blue.css" => ["blue", "05", 05, "5", 5], + "themes/06-violet.css" => ["violet", "06", 06, "6", 6], + "themes/07-magenta.css" => ["magenta", "07", 07, "7", 7], + "themes/08-brown.css" => ["brown", "08", 08, "8", 8], + "themes/09-gray.css" => ["gray", "09", 09, "9", 9] +) + +function loadtheme(theme::Union{Int,String})::String + if Base.isa(theme, String) || Base.isa(theme, Int) + for (css_file, themes) in THEMES + if theme in themes + theme_path = Base.Filesystem.joinpath(@__DIR__, css_file) + css_string = Base.read(theme_path, String) + return "\n" + end + end + elseif Base.Filesystem.ispath(theme) && Base.Filesystem.isfile(theme) + css_string = Base.read(theme, String) + return "\n" + else + return Base.throw(Base.ArgumentError("$theme is not a valid theme or CSS file.")) + end +end + +function rgbString(color::RGB) + r::Float64 = Colors.red(color) + g::Float64 = Colors.green(color) + b::Float64 = Colors.blue(color) + + r_int::Int = Base.round(Int, r * 255) + g_int::Int = Base.round(Int, g * 255) + b_int::Int = Base.round(Int, b * 255) + + r_string::String = Base.string(r_int) + g_string::String = Base.string(g_int) + b_string::String = Base.string(b_int) + + rgb_string::String = "rgb(" * Base.join([r_string, g_string, b_string], ",") * ")" + + return rgb_string +end + +""" + HTMLTables.table( + tbl; + header::Bool=true, + footer::Bool=true, + id::String="", + classes::Union{Vector{String},String}="", + css::Bool=true, + theme::Union{Int,String}="default", + colorscale="", + tooltips::Bool=true + ) + +Returns a julia table as an HTML table. + +## Arguments + +- `tbl`: The table to write. +- `filename`: The filename of the HTML table. +- `save_location`: The location to save the HTML table. +- `header`: Whether to include the table header. +- `footer`: Whether to include the table footer. +- `id`: The id of the HTML table. +- `classes`: The classes of the HTML table. +- `css`: Whether to include the CSS styles. +- `theme`: The theme of the HTML table. +- `colorscale`: The colorscale of the HTML table. +- `tooltips`: Whether to include tooltips. + +""" +function table( + tbl; + header::Bool=true, + footer::Bool=true, + id::String="", + classes::Union{Vector{String},String}="", + css::Bool=true, + theme::Union{Int,String}="default", + colorscale="", + tooltips::Bool=true +) + html_table::String = "" + + if theme !== "" && css + html_table *= loadtheme(theme) + end + + html_table *= " ["default", "00", 00, "0", 0], - "themes/01-red.css" => ["red", "01", 01, "1", 1], - "themes/02-orange.css" => ["orange", "02", 02, "2", 2], - "themes/03-yellow.css" => ["yellow", "03", 03, "3", 3], - "themes/04-green.css" => ["green", "04", 04, "4", 4], - "themes/05-blue.css" => ["blue", "05", 05, "5", 5], - "themes/06-violet.css" => ["violet", "06", 06, "6", 6], - "themes/07-magenta.css" => ["magenta", "07", 07, "7", 7], - "themes/08-brown.css" => ["brown", "08", 08, "8", 8], - "themes/09-gray.css" => ["gray", "09", 09, "9", 9] -) - -function loadtheme(theme::Union{Int,String})::String - if Base.isa(theme, String) || Base.isa(theme, Int) - for (css_file, themes) in THEMES - if theme in themes - theme_path = Base.Filesystem.joinpath(@__DIR__, css_file) - css_string = Base.read(theme_path, String) - return "\n" - end - end - elseif Base.Filesystem.ispath(theme) && Base.Filesystem.isfile(theme) - css_string = Base.read(theme, String) - return "\n" - else - return Base.throw(Base.ArgumentError("$theme is not a valid theme or CSS file.")) - end -end - -function rgbString(color::RGB) - r::Float64 = Colors.red(color) - g::Float64 = Colors.green(color) - b::Float64 = Colors.blue(color) - - r_int::Int = Base.round(Int, r * 255) - g_int::Int = Base.round(Int, g * 255) - b_int::Int = Base.round(Int, b * 255) - - r_string::String = Base.string(r_int) - g_string::String = Base.string(g_int) - b_string::String = Base.string(b_int) - - rgb_string::String = "rgb(" * Base.join([r_string, g_string, b_string], ",") * ")" - - return rgb_string -end - """ - HTMLTables.write() + HTMLTables.write( + tbl; + filename::String="table", + save_location::String=Base.Filesystem.pwd(), + header::Bool=true, + footer::Bool=true, + id::String="", + classes::Union{Vector{String},String}="", + css::Bool=true, + theme::Union{Int,String}="default", + colorscale="", + tooltips::Bool=true + ) + +Writes a julia table as an HTML table to an HTML file. + +## Arguments + +- `tbl`: The table to write. +- `filename`: The filename of the HTML table. +- `save_location`: The location to save the HTML table. +- `header`: Whether to include the table header. +- `footer`: Whether to include the table footer. +- `id`: The id of the HTML table. +- `classes`: The classes of the HTML table. +- `css`: Whether to include the CSS styles. +- `theme`: The theme of the HTML table. +- `colorscale`: The colorscale of the HTML table. +- `tooltips`: Whether to include tooltips. + +## Returns + +- `path`: The path to the HTML file. + """ function write( - table; + tbl; filename::String="table", save_location::String=Base.Filesystem.pwd(), header::Bool=true, @@ -62,96 +47,17 @@ function write( colorscale="", tooltips::Bool=true ) - html_table::String = "" - - if theme !== "" && css - html_table *= loadtheme(theme) - end - - html_table *= "