diff --git a/Project.toml b/Project.toml index f8401d5..a725e49 100644 --- a/Project.toml +++ b/Project.toml @@ -12,13 +12,11 @@ EzXML = "8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" ZipArchives = "49080126-0e18-4c2a-b176-c102e4b3760c" -ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" [compat] EzXML = "1" Tables = "1" ZipArchives = "2" -ZipFile = "0.8, 0.9, 0.10" julia = "1.6" [extras] diff --git a/src/XLSX.jl b/src/XLSX.jl index eae3ce9..d45e3ef 100644 --- a/src/XLSX.jl +++ b/src/XLSX.jl @@ -4,17 +4,11 @@ module XLSX import Artifacts import Dates import Printf.@printf -import ZipFile import ZipArchives import EzXML import Tables import Base.convert -# https://github.com/fhs/ZipFile.jl/issues/39 -if !hasmethod(Base.bytesavailable, Tuple{ZipFile.ReadableFile}) - Base.bytesavailable(f::ZipFile.ReadableFile) = f.uncompressedsize - f._pos -end - const SPREADSHEET_NAMESPACE_XPATH_ARG = [ "xpath" => "http://schemas.openxmlformats.org/spreadsheetml/2006/main" ] const EXCEL_MAX_COLS = 16_384 # total columns supported by Excel per sheet const EXCEL_MAX_ROWS = 1_048_576 # total rows supported by Excel per sheet (including headers) diff --git a/src/read.jl b/src/read.jl index 9f3e304..7c31229 100644 --- a/src/read.jl +++ b/src/read.jl @@ -132,11 +132,13 @@ function openxlsx(f::F, source::Union{AbstractString, IO}; if _read @assert source isa IO || isfile(source) "File $source not found." - xf = open_or_read_xlsx(source, _write, enable_cache, _write) + + xf = open_or_read_xlsx(source, _read, enable_cache, _write) else xf = open_empty_template() end + try f(xf) finally @@ -147,8 +149,6 @@ function openxlsx(f::F, source::Union{AbstractString, IO}; close(xf) end - # fix libuv issue on windows (#42) and other systems (#173) - GC.gc() end end @@ -169,7 +169,7 @@ function openxlsx(source::Union{AbstractString, IO}; if _read @assert source isa IO || isfile(source) "File $source not found." - return open_or_read_xlsx(source, _write, enable_cache, _write) + return open_or_read_xlsx(source, _read, enable_cache, _write) else return open_empty_template() end @@ -196,43 +196,33 @@ function open_or_read_xlsx(source::Union{IO, AbstractString}, read_files::Bool, xf = XLSXFile(source, enable_cache, read_as_template) try - for f in xf.io.files - + for i in 1:ZipArchives.zip_nentries(xf.io) + f = ZipArchives.zip_name(xf.io, i) # ignore xl/calcChain.xml in any case (#31) - if f.name == "xl/calcChain.xml" + if f == "xl/calcChain.xml" continue end - # Rather than ignore custom XML internal files here, let them get passed through to write like binaries are. - if !startswith(f.name, "customXml") && (endswith(f.name, ".xml") || endswith(f.name, ".rels")) - #if endswith(f.name, ".xml") || endswith(f.name, ".rels") + # let customXML files get passed through to write like binaries are (below). + if !startswith(f, "customXml") && (endswith(f, ".xml") || endswith(f, ".rels")) # XML file - internal_xml_file_add!(xf, f.name) + internal_xml_file_add!(xf, f) if read_files # ignore worksheet files because they'll be read thru streaming # If reading as template, it will be loaded in two places: here and WorksheetCache. - if !read_as_template && startswith(f.name, "xl/worksheets") && endswith(f.name, ".xml") + if !read_as_template && startswith(f, "xl/worksheets") && endswith(f, ".xml") continue end - # ignore custom XML internal files - # no longer needed if these files are passed through like binary files - #if startswith(f.name, "customXml") - # continue - #end - - internal_xml_file_read(xf, f.name) + internal_xml_file_read(xf, f) end - elseif read_as_template - # Binary file - # we only read binary files to save the Excel file later - # Custom XML files also now get passed through this way, too - bytes = ZipFile.read(f) - @assert sizeof(bytes) == f.uncompressedsize - xf.binary_data[f.name] = bytes + elseif read_as_template + # Binary and customXML files + # we only read these files to save the Excel file later + xf.binary_data[f] = ZipArchives.zip_readentry(xf.io, f) end end @@ -260,8 +250,10 @@ function open_or_read_xlsx(source::Union{IO, AbstractString}, read_files::Bool, end finally - if read_files + if read_files # Always close(xf) + else # Never + println("read_files isn't true!") end end @@ -444,28 +436,17 @@ function internal_xml_file_read(xf::XLSXFile, filename::String) :: EzXML.Documen @assert internal_xml_file_exists(xf, filename) "Couldn't find $filename in $(xf.source)." if !internal_xml_file_isread(xf, filename) + @assert isopen(xf) "Can't read from a closed XLSXFile." - file_not_found = true - for f in xf.io.files - if f.name == filename - xf.files[filename] = true # set file as read - - try - xf.data[filename] = EzXML.readxml(f) - catch err - @error("Failed to parse internal XML file `$filename`") - rethrow() - end - file_not_found = false - break - end + try + xf.data[filename] = EzXML.parsexml(ZipArchives.zip_readentry(xf.io, filename)) + xf.files[filename] = true # set file as read + catch err + @error("Failed to parse internal XML file `$filename`") + rethrow() end - if file_not_found - # shouldn't happen - error("$filename not found in XLSX package.") - end end return xf.data[filename] @@ -473,14 +454,13 @@ end function Base.close(xl::XLSXFile) xl.io_is_open = false - close(xl.io) - +# close(xl.io) # close all internal file streams from worksheet caches - for sheet in xl.workbook.sheets - if sheet.cache != nothing && sheet.cache.stream_state != nothing - close(sheet.cache.stream_state) - end - end +# for sheet in xl.workbook.sheets +# if sheet.cache != nothing && sheet.cache.stream_state != nothing +# close(sheet.cache.stream_state) +# end +# end end Base.isopen(xl::XLSXFile) = xl.io_is_open @@ -530,14 +510,13 @@ julia> XLSX.readdata("myfile.xlsx", "mysheet!A2:B4") ``` """ function readdata(source::Union{AbstractString, IO}, sheet::Union{AbstractString, Int}, ref) - c = openxlsx(source, enable_cache=false) do xf + c = openxlsx(source, enable_cache=true) do xf getdata(getsheet(xf, sheet), ref) end return c end - function readdata(source::Union{AbstractString, IO}, sheetref::AbstractString) - c = openxlsx(source, enable_cache=false) do xf + c = openxlsx(source, enable_cache=true) do xf getdata(xf, sheetref) end return c diff --git a/src/stream.jl b/src/stream.jl index 09eedae..7b740ea 100644 --- a/src/stream.jl +++ b/src/stream.jl @@ -47,16 +47,12 @@ end Base.show(io::IO, state::SheetRowStreamIteratorState) = print(io, "SheetRowStreamIteratorState( is open = $(state.is_open) , row = $(state.row) )") # Opens a file for streaming. -@inline function open_internal_file_stream(xf::XLSXFile, filename::String) :: Tuple{ZipFile.Reader, EzXML.StreamReader} +@inline function open_internal_file_stream(xf::XLSXFile, filename::String) :: Tuple{ZipArchives.ZipReader, EzXML.StreamReader} @assert internal_xml_file_exists(xf, filename) "Couldn't find $filename in $(xf.source)." @assert xf.source isa IO || isfile(xf.source) "Can't open internal file $filename for streaming because the XLSX file $(xf.filepath) was not found." - io = ZipFile.Reader(xf.source) - for f in io.files - if f.name == filename - return io, EzXML.StreamReader(f) - end - end + f = ZipArchives.zip_openentry(xf.io, filename) + return xf.io, EzXML.StreamReader(f) error("Couldn't find $filename in $(xf.source).") end @@ -67,7 +63,7 @@ end if isopen(s) s.is_open = false close(s.xml_stream_reader) - close(s.zip_io) +# close(s.zip_io) end nothing end @@ -102,7 +98,7 @@ function Base.iterate(itr::SheetRowStreamIterator, state::Union{Nothing, SheetRo elseif is_end_of_sheet_data(reader) # this Worksheet has no rows close(reader) - close(zip_io) +# close(zip_io) return nothing end end @@ -117,7 +113,7 @@ function Base.iterate(itr::SheetRowStreamIterator, state::Union{Nothing, SheetRo reader = state.xml_stream_reader if is_end_of_sheet_data(reader) - @assert !isopen(state) +# @assert !isopen(state) return nothing else @assert isopen(state) "Error processing Worksheet $(ws.name): Can't fetch rows from a closed workbook." @@ -133,7 +129,7 @@ function Base.iterate(itr::SheetRowStreamIterator, state::Union{Nothing, SheetRo while EzXML.iterate(reader) != nothing if is_end_of_sheet_data(reader) - close(state) +# close(state) break end @@ -142,7 +138,7 @@ function Base.iterate(itr::SheetRowStreamIterator, state::Union{Nothing, SheetRo while true if is_end_of_sheet_data(reader) - close(state) +# close(state) break elseif EzXML.nodetype(reader) == EzXML.READER_ELEMENT && nodename(reader) == "row" break diff --git a/src/types.jl b/src/types.jl index 0651d36..09f6c32 100644 --- a/src/types.jl +++ b/src/types.jl @@ -194,7 +194,7 @@ Implementations: SheetRowStreamIterator, WorksheetCache. abstract type SheetRowIterator end mutable struct SheetRowStreamIteratorState - zip_io::ZipFile.Reader + zip_io::ZipArchives.ZipReader xml_stream_reader::EzXML.StreamReader is_open::Bool # indicated if zip_io and xml_stream_reader are opened row::Int # number of current row. It´s set to 0 in the start state. @@ -284,7 +284,7 @@ sh = xf["mysheet"] # get a reference to a Worksheet mutable struct XLSXFile <: MSOfficePackage source::Union{AbstractString, IO} use_cache_for_sheet_data::Bool # indicates whether Worksheet.cache will be fed while reading worksheet cells. - io::ZipFile.Reader + io::ZipArchives.ZipReader io_is_open::Bool files::Dict{String, Bool} # maps filename => isread bool data::Dict{String, EzXML.Document} # maps filename => XMLDocument @@ -295,7 +295,7 @@ mutable struct XLSXFile <: MSOfficePackage function XLSXFile(source::Union{AbstractString, IO}, use_cache::Bool, is_writable::Bool) check_for_xlsx_file_format(source) - io = ZipFile.Reader(source) + io = ZipArchives.ZipReader(read(source)) xl = new(source, use_cache, io, true, Dict{String, Bool}(), Dict{String, EzXML.Document}(), Dict{String, Vector{UInt8}}(), EmptyWorkbook(), Vector{Relationship}(), is_writable) xl.workbook.package = xl finalizer(close, xl) diff --git a/src/worksheet.jl b/src/worksheet.jl index c25b635..04c8bda 100644 --- a/src/worksheet.jl +++ b/src/worksheet.jl @@ -6,7 +6,6 @@ function Worksheet(xf::XLSXFile, sheet_element::EzXML.Node) name = sheet_element["name"] is_hidden = haskey(sheet_element, "state") && sheet_element["state"] in ["hidden", "veryHidden"] dim = read_worksheet_dimension(xf, relationship_id, name) - return Worksheet(xf, sheetId, relationship_id, name, dim, is_hidden) end @@ -26,7 +25,6 @@ end # 18.3.1.35 - dimension (Worksheet Dimensions). This is optional, and not required. function read_worksheet_dimension(xf::XLSXFile, relationship_id, name) :: Union{Nothing, CellRange} local result::Union{Nothing, CellRange} = nothing - wb = get_workbook(xf) target_file = get_relationship_target_by_id("xl", wb, relationship_id) zip_io, reader = open_internal_file_stream(xf, target_file) @@ -48,7 +46,7 @@ function read_worksheet_dimension(xf::XLSXFile, relationship_id, name) :: Union{ end finally close(reader) - close(zip_io) +# close(zip_io) end return result diff --git a/test/runtests.jl b/test/runtests.jl index 1eda966..87653f7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1354,14 +1354,21 @@ end @test dt_read.column_labels == dt.column_labels @test dt_read.column_label_index == dt.column_label_index end +end - # delete files created by this testset - delete_files = ["output_table.xlsx", "output_tables.xlsx"] - for f in delete_files - isfile(f) && rm(f) +@testset "rm after read" begin + @testset "with readtable" begin + f = "output_table.xlsx" + dtable = XLSX.readtable(f, "report", "F") + @test isfile(f) && rm(f)==nothing + end + @testset "with readdata" begin + f = "output_tables.xlsx" + dtable = XLSX.readdata(f, "REPORT_A", "A1:B3") + @test isfile(f) && rm(f)==nothing end end - + @testset "Styles" begin using XLSX: CellValue, id, getcell, setdata!, CellRef