Skip to content

Commit

Permalink
header kwarg, more sinks
Browse files Browse the repository at this point in the history
  • Loading branch information
cecoeco committed Nov 1, 2024
1 parent 1daa77b commit f1a193b
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 15 deletions.
2 changes: 1 addition & 1 deletion src/HTMLTables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ include("themes.jl")
include("readtable.jl")
include("writetable.jl")

end
end
33 changes: 21 additions & 12 deletions src/readtable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ function ishtmlfile(source::String)::Bool
return Base.Filesystem.splitext(source)[2] == ".html"
end

function parse_number(html_text::String, number_type::DataType)::Union{Number,String}
function parse_number(html_text::String, number_type::Type)::Union{Number,String}
try
return parse(number_type, html_text)
return Base.parse(number_type, html_text)
catch
return html_text
end
end

function extract_row_data(row::Gumbo.HTMLNode, number_type::DataType)::Vector
function extract_row_data(row::Gumbo.HTMLNode, number_type::Type)::Vector
cells::Vector{Gumbo.HTMLNode} = Base.eachmatch(Cascadia.Selector("td,th"), row)

return [parse_number(Cascadia.nodeText(cell), number_type) for cell in cells]
Expand All @@ -27,7 +27,8 @@ end
id::String="",
class::Union{String,Vector{String}}="",
index::Int=1,
number_type::DataType=Number
header::Bool=true,
number_type::Type=Any
)
Reads an HTML table into a sink function such as `DataFrame`.
Expand All @@ -42,11 +43,14 @@ Reads an HTML table into a sink function such as `DataFrame`.
- `id::String`: the id of the HTML table in the HTML document.
- `class::Union{String,Vector{String}}`: the class of the HTML table.
- `index::Int`: the index of the HTML table in the HTML document.
- `number_type::DataType`: the return type of the numeric table data.
- `header::Bool`: whether to include the table header.
- `number_type::Type`: the return type of the numeric table data.
## Returns
- `sink`: the sink function such as `DataFrame` with the HTML table data.
- `sink::Function`: the sink function such as `DataFrame` with the HTML table data if `sink` is specified.
- `tuples::Vector`: the table data if `sink` is not specified and the `header` keyword argument is false.
- `headers::Vector`: the table headers if `sink` is not specified and the `header` keyword argument is true.
## Examples
Expand All @@ -57,7 +61,7 @@ using HTMLTables, DataFrames
url = "https://www.w3schools.com/html/html_tables.asp"
df = HTMLTables.readtable(url, DataFrame)
df = readtable(url, DataFrame)
println(df)
```
Expand All @@ -82,7 +86,7 @@ using HTMLTables, DataFrames
url = "tables.html"
df = HTMLTables.readtable(url, DataFrame, index=2)
df = readtable(url, DataFrame, index=2)
println(df)
```
Expand Down Expand Up @@ -128,7 +132,7 @@ html_str = \"\"\"
</table>
\"\"\"
df = HTMLTables.readtable(html_str, DataFrame, id="htmltable", number_type=Int64)
df = DataFrame(readtable(html_str, id="htmltable", number_type=Int64))
println(df)
```
Expand All @@ -147,11 +151,12 @@ println(df)
"""
function readtable(
source,
sink;
sink=nothing;
id::String="",
class::Union{String,Vector{String}}="",
index::Int=1,
number_type::DataType=Number,
header::Bool=true,
number_type::Type=Number,
)
if Base.isa(source, IO)
source = Base.read(source, String)
Expand Down Expand Up @@ -204,5 +209,9 @@ function readtable(

tuples::Vector = [Base.Tuple(row) for row in data]

return sink(tuples, headers)
if header
return sink !== nothing ? sink(tuples, headers) : (tuples, headers)
else
return sink !== nothing ? sink(tuples) : tuples
end
end
4 changes: 2 additions & 2 deletions src/writetable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ using HTMLTables, DataFrames
df = DataFrame(x=[1, 2, 3], y=[45, 67, 89])
HTMLTables.writetable(stdout, df, styles=false)
writetable(stdout, df, styles=false)
```
```html
Expand Down Expand Up @@ -329,7 +329,7 @@ using HTMLTables, DataFrames
df = DataFrame(x=[1, 2, 3], y=[4, 11, 28])
HTMLTables.writetable("table.html", df)
writetable("table.html", df)
```
"""
Expand Down

0 comments on commit f1a193b

Please sign in to comment.