Update

0h7z · Jun 1, 2024 · e233d74 · e233d74
1 parent ab0c503
commit e233d74
Show file tree

Hide file tree

Showing 4 changed files with 60 additions and 8 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Exts"
 uuid = "0b12d779-4123-4875-9d6c-e33c2e29e2c9"
 authors = ["Heptazhou <zhou at 0h7z dot com>"]
-version = "0.1.2"
+version = "0.1.3"
 
 [deps]
 DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"

diff --git a/ext/DataFramesExt.jl b/ext/DataFramesExt.jl
@@ -18,14 +18,61 @@ module DataFramesExt
 using DataFrames: DataFrame
 using DelimitedFiles: readdlm, writedlm
 
-function Base.read(s::IOStream, ::Type{DataFrame})
-	t::NTuple{2, Matrix} = readdlm(s, header = true, comments = true)
-	DataFrame(t[1], vec(t[2]))
+const Maybe{T} = Union{Nothing, T}
+
+"""
+	read(f::AbstractString, DataFrame, colnames = nothing;
+		quotes = true, comments = true, comment_char = '#') -> DataFrame
+	read(s::IOStream, DataFrame, colnames = nothing;
+		quotes = true, comments = true, comment_char = '#') -> DataFrame
+
+Read a DataFrame from the given I/O stream or file where each line gives one
+row.
+
+If `colnames` is not provided (or, is `nothing`), the first row of data will
+be read as header. If `colnames` is the symbol `:auto`, the column names will
+be `x1`, `x2`, and so on. Otherwise, `colnames` must be a vector of symbols
+or strings to specify column names.
+
+If `quotes` is `true`, columns enclosed within double-quote (") characters
+are allowed to contain new lines and column delimiters. Double-quote
+characters within a quoted field must be escaped with another double-quote.
+
+If `comments` is `true`, lines beginning with `comment_char` and text
+following `comment_char` in any line are ignored.
+"""
+function Base.read(f::AbstractString, ::Type{DataFrame}, xs...; kw...)
+	open(s -> read(s, DataFrame, xs...; kw...), convert(String, f)::String)
 end
+function Base.read(s::IOStream, ::Type{DataFrame},
+	colnames::Maybe{Union{Symbol, AbstractVector}} = nothing, xs...; quotes::Bool = true,
+	comments::Bool = true, comment_char::AbstractChar = '#', kw...)
+	cols, colnames = if isnothing(colnames)
+		t = readdlm(s, xs...; quotes, comments, comment_char, kw..., header = true)::NTuple{2, Matrix}
+		t[1], vec(t[2])
+	else
+		t = readdlm(s, xs...; quotes, comments, comment_char, kw..., header = false)::Matrix
+		t, colnames
+	end
+	DataFrame(cols, colnames)
+end
+
+"""
+	write(f::AbstractString, x::DataFrame; delim = '\\t', header = true) -> Int64
+	write(s::IOStream, x::DataFrame; delim = '\\t', header = true)       -> Int64
 
-function Base.write(s::IOStream, x::DataFrame)
+Write a DataFrame as text to the given I/O stream or file, using the given
+delimiter `delim` (which defaults to tab, but can be anything printable,
+typically a character or string).
+
+Return the number of bytes written into the stream or file.
+"""
+function Base.write(f::AbstractString, x::DataFrame; kw...)
+	open(s -> write(s, x; kw...), convert(String, f)::String, "w")
+end
+function Base.write(s::IOStream, x::DataFrame; delim = '\t', header::Bool = true, kw...)
 	pos₀ = position(s)
-	writedlm(s, [propertynames(x)'; Matrix(x)])
+	writedlm(s, header ? [propertynames(x)'; Matrix(x)] : Matrix(x), delim; kw...)
 	position(s) - pos₀
 end
 

diff --git a/src/Exts.jl b/src/Exts.jl
@@ -23,8 +23,11 @@ export readstr
 
 using Reexport: @reexport
 
-@reexport using Base.Iterators: map as lmap
-@reexport using Base.Threads: @spawn, @threads, nthreads
+@reexport begin
+#! format: noindent
+using Base.Iterators: map as lmap
+using Base.Threads: @spawn, @threads, nthreads
+end
 
 include("BaseExt.jl")
 
@@ -41,6 +44,7 @@ function invsqrt(x::T) where T <: Real
 	F(big(x) |> inv |> sqrt)
 end
 
+# StatisticsExt
 function nanmean end
 
 end # module

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -45,6 +45,7 @@ end
 	using CSV: CSV
 	@test 36 ≤ write(tmp, df) == filesize(tmp)
 	@test df == read(tmp, DataFrame)
+	@test df == read(tmp, DataFrame, [:x, :y], skipstart = 1)
 	@test df == CSV.read(tmp, DataFrame)
 end