Skip to content

Commit

Permalink
fixed themes and read function. version 0.3
Browse files Browse the repository at this point in the history
  • Loading branch information
cecoeco committed May 6, 2024
1 parent 2f6c434 commit bda594a
Show file tree
Hide file tree
Showing 15 changed files with 102 additions and 89 deletions.
4 changes: 1 addition & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
name = "HTMLTables"
uuid = "b1afcece-b80e-4563-b90e-36b4cc56d3fa"
authors = ["Ceco E. Maples <[email protected]>"]
version = "0.2.0"
version = "0.3.0"

[deps]
Cascadia = "54eefc05-d75b-58de-a785-1a3403f0919f"
ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"
Colors = "5ae59095-9a9b-59fe-a467-6f913c188581"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Gumbo = "708ec375-b3d6-5a57-a7ce-8257bf98657a"
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
Expand All @@ -16,7 +15,6 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
Cascadia = "1"
ColorSchemes = "3"
Colors = "0.12"
DataFrames = "1"
Gumbo = "0.8"
HTTP = "1"
Tables = "1.11"
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
Julia package for reading and writing HTML tables.

## Reading HTML tables
- `HTMLTables.read`: read HTML table into a `DataFrame`.
- `HTMLTables.read`: extracts data from HTML tables.

## Writing HTML tables
- `HTMLTables.write`: write a `DataFrame` into an HTML table.
- `HTMLTables.write`: uses the Tables.jl interface to write an HTML table.

## License
Copyright © 2024 Ceco Elijah Maples
Expand Down
12 changes: 3 additions & 9 deletions src/HTMLTables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,17 @@
Julia package for reading and writing HTML tables.
Reading HTML tables:
- `HTMLTables.read` reads HTML table element `<table></table>` into a `DataFrame`.
- `HTMLTables.read` extracts data from HTML tables.
Writing HTML tables:
- `HTMLTables.write` writes a `DataFrame` into an HTML table element `<table></table>`.
- `HTMLTables.write` uses the Tables.jl interface to write an HTML table.
"""
module HTMLTables

using Cascadia, Colors, ColorSchemes, DataFrames, Gumbo, HTTP, Tables
using Cascadia, Colors, ColorSchemes, Gumbo, HTTP, Tables

export read, write

function isurl(source::String)::Bool
url_pattern::Regex = r"(?i)\b((?:https?|ftp):\/\/[\w-]+(\.[\w-]+)+([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?)\b"

return Base.occursin(url_pattern, source)
end

include("read.jl")
include("write.jl")

Expand Down
105 changes: 67 additions & 38 deletions src/read.jl
Original file line number Diff line number Diff line change
@@ -1,66 +1,95 @@
function isurl(source::String)::Bool
url_pattern ::Regex = r"(?i)\b((?:https?|ftp):\/\/[\w-]+(\.[\w-]+)+([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?)\b"

return Base.occursin(url_pattern, source)
end

function extractrowdata(row::Gumbo.HTMLNode)::Vector
cell_matches::Vector{Gumbo.HTMLNode} = Base.eachmatch(
Cascadia.Selector("td,th"), row
)

return [Cascadia.nodeText(cell) for cell in cell_matches]
end

"""
HTMLTables.read(source::String; id::String="", classes::Union{Vector{String},String}="")
Reads a HTML table into a `DataFrame`.
Reads a HTML table into a sink function such as `DataFrame`.
## Arguments
- `source::String`: URL or path to the HTML table.
- `sink`: The function that materializes the table data.
- `id::String`: The id of the HTML table.
- `classes::Union{Vector{String},String}`: The classes of the HTML table.
## Examples
```julia
using HTMLTables, DataFrames
# read an HTML table into a DataFrame
df = HTMLTables.read("https://www.w3schools.com/html/html_tables.asp", DataFrame)
println(df)
```
"""
function read(
source::String;
id::String="",
classes::Union{Vector{String},String}=""
source::String,
sink;
id::String="",
classes::Union{Vector{String},String}="",
index::Int=1
)
if isurl(source)
if index <= 0
throw(ArgumentError("Index must be a positive integer"))
end

if isurl(source) == true
response::HTTP.Response = HTTP.get(source)
html_content = Base.String(response.body)
else
html_content = Base.read(source, String)
end

html::Gumbo.HTMLDocument = Gumbo.parsehtml(html_content)
html_document::Gumbo.HTMLDocument = Gumbo.parsehtml(html_content)

selector::String = ""

if id == ""
if classes !== ""
if Base.isa(classes, String)
selector *= ".$classes"
elseif Base.isa(classes, Vector{String})
selector *= Base.join([".$cls" for cls in classes], ",")
else
Base.throw(Base.ArgumentError("classes must be a String or Vector{String}"))
end
if Base.isempty(id)
if Base.isempty(classes)
selector *= "table"
elseif !Base.isempty(classes) && Base.isa(classes, String)
selector *= "table.$classes"
elseif !Base.isempty(classes) && Base.isa(classes, Vector{String})
selector *= "table." * Base.join(classes, ".")
end
elseif id !== ""
elseif !Base.isempty(id)
selector *= "#$id"
else
Base.throw(Base.ArgumentError("id must be of type String"))
end

tables = Base.eachmatch(Cascadia.Selector(selector), html.root)

if !Base.isempty(tables)
table = tables[1]
rows = Base.eachmatch(Cascadia.Selector("tr"), table)
headers = []
data = []

for (i, row) in Base.Iterators.enumerate(rows)
cells = Base.eachmatch(Cascadia.Selector("td,th"), row)
if i == 1 && Base.isempty(headers)
headers = [Cascadia.nodeText(cell) for cell in cells]
else
Base.push!(data, [Cascadia.nodeText(cell) for cell in cells])
end
tables::Vector{Gumbo.HTMLNode} = Base.eachmatch(Cascadia.Selector(selector), html_document.root)

if Base.isempty(tables) == true
throw(ArgumentError("No HTML tables found"))
end

table::Gumbo.HTMLNode = tables[index]

rows::Vector{Gumbo.HTMLNode} = Base.eachmatch(Cascadia.Selector("tr"), table)
headers::Vector = []
data::Vector{Vector} = []

for (i, row) in Base.enumerate(rows)
rowdata = extractrowdata(row)
if (i == 1 && Base.isempty(headers)) == true
headers = rowdata
else
Base.push!(data, rowdata)
end

return DataFrames.DataFrame(data, Symbol.(headers))
else
return DataFrames.DataFrame()
end

tuples::Vector = [Base.Tuple(row) for row in data]

return sink(tuples, headers)
end
1 change: 1 addition & 0 deletions src/themes/00-default.css
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ table {
border-collapse: collapse;
border-top: 1px solid black;
border-bottom: 1px solid black;
background-color: white;
}

thead {
Expand Down
7 changes: 3 additions & 4 deletions src/themes/01-red.css
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,20 @@ table {
color: black;
width: 100%;
border-collapse: collapse;
border-top: 1px solid black;
border-bottom: 1px solid black;
}

thead {
font-weight: bold;
background-color: hsl(0, 100%, 60%);
border-bottom: 1px solid black;
}

tfoot {
height: 20px;
background-color: hsl(0, 100%, 65%);
}

thead, tfoot {
border-top: 1px solid black;
border-bottom: 1px solid black;
}

tbody tr:nth-child(even) {
Expand Down
7 changes: 3 additions & 4 deletions src/themes/02-orange.css
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,20 @@ table {
color: black;
width: 100%;
border-collapse: collapse;
border-top: 1px solid black;
border-bottom: 1px solid black;
}

thead {
font-weight: bold;
background-color: hsl(25, 100%, 60%);
border-bottom: 1px solid black;
}

tfoot {
height: 20px;
background-color: hsl(25, 100%, 65%);
}

thead, tfoot {
border-top: 1px solid black;
border-bottom: 1px solid black;
}

tbody tr:nth-child(even) {
Expand Down
7 changes: 3 additions & 4 deletions src/themes/03-yellow.css
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,20 @@ table {
color: black;
width: 100%;
border-collapse: collapse;
border-top: 1px solid black;
border-bottom: 1px solid black;
}

thead {
font-weight: bold;
background-color: hsl(60, 100%, 60%);
border-bottom: 1px solid black;
}

tfoot {
height: 20px;
background-color: hsl(60, 100%, 65%);
}

thead, tfoot {
border-top: 1px solid black;
border-bottom: 1px solid black;
}

tbody tr:nth-child(even) {
Expand Down
7 changes: 3 additions & 4 deletions src/themes/04-green.css
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,20 @@ table {
color: black;
width: 100%;
border-collapse: collapse;
border-top: 1px solid black;
border-bottom: 1px solid black;
}

thead {
font-weight: bold;
background-color: hsl(115, 100%, 60%);
border-bottom: 1px solid black;
}

tfoot {
height: 20px;
background-color: hsl(115, 100%, 65%);
}

thead, tfoot {
border-top: 1px solid black;
border-bottom: 1px solid black;
}

tbody tr:nth-child(even) {
Expand Down
7 changes: 3 additions & 4 deletions src/themes/05-blue.css
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,20 @@ table {
color: black;
width: 100%;
border-collapse: collapse;
border-top: 1px solid black;
border-bottom: 1px solid black;
}

thead {
font-weight: bold;
background-color: hsl(205, 100%, 60%);
border-bottom: 1px solid black;
}

tfoot {
height: 20px;
background-color: hsl(205, 100%, 65%);
}

thead, tfoot {
border-top: 1px solid black;
border-bottom: 1px solid black;
}

tbody tr:nth-child(even) {
Expand Down
7 changes: 3 additions & 4 deletions src/themes/06-violet.css
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,20 @@ table {
color: black;
width: 100%;
border-collapse: collapse;
border-top: 1px solid black;
border-bottom: 1px solid black;
}

thead {
font-weight: bold;
background-color: hsl(260, 100%, 60%);
border-bottom: 1px solid black;
}

tfoot {
height: 20px;
background-color: hsl(260, 100%, 65%);
}

thead, tfoot {
border-top: 1px solid black;
border-bottom: 1px solid black;
}

tbody tr:nth-child(even) {
Expand Down
7 changes: 3 additions & 4 deletions src/themes/07-magenta.css
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,20 @@ table {
color: black;
width: 100%;
border-collapse: collapse;
border-top: 1px solid black;
border-bottom: 1px solid black;
}

thead {
font-weight: bold;
background-color: hsl(320, 100%, 60%);
border-bottom: 1px solid black;
}

tfoot {
height: 20px;
background-color: hsl(320, 100%, 65%);
}

thead, tfoot {
border-top: 1px solid black;
border-bottom: 1px solid black;
}

tbody tr:nth-child(even) {
Expand Down
Loading

0 comments on commit bda594a

Please sign in to comment.