Skip to content

Commit

Permalink
try to avoid loading file data into slices
Browse files Browse the repository at this point in the history
  • Loading branch information
dhowden committed Sep 27, 2019
1 parent d82a45e commit e397537
Showing 1 changed file with 30 additions and 15 deletions.
45 changes: 30 additions & 15 deletions docx.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"fmt"
"io"
"io/ioutil"
"os"
"time"
)

Expand All @@ -23,14 +24,31 @@ type contentTypeDefinition struct {

// ConvertDocx converts an MS Word docx file to text.
func ConvertDocx(r io.Reader) (string, map[string]string, error) {
meta := make(map[string]string)
var textHeader, textBody, textFooter string

b, err := ioutil.ReadAll(r)
if err != nil {
return "", nil, err
var size int64

// Common case: if the reader is a file (or trivial wrapper), avoid
// loading it all into memory.
var ra io.ReaderAt
if f, ok := r.(interface {
io.ReaderAt
Stat() (os.FileInfo, error)
}); ok {
si, err := f.Stat()
if err != nil {
return "", nil, err
}
size = si.Size()
ra = f
} else {
b, err := ioutil.ReadAll(r)
if err != nil {
return "", nil, nil
}
size = int64(len(b))
ra = bytes.NewReader(b)
}
zr, err := zip.NewReader(bytes.NewReader(b), int64(len(b)))

zr, err := zip.NewReader(ra, size)
if err != nil {
return "", nil, fmt.Errorf("error unzipping data: %v", err)
}
Expand All @@ -42,6 +60,8 @@ func ConvertDocx(r io.Reader) (string, map[string]string, error) {
return "", nil, err
}

meta := make(map[string]string)
var textHeader, textBody, textFooter string
for _, override := range contentTypeDefinition.Overrides {
f := zipFiles[override.PartName]

Expand Down Expand Up @@ -99,16 +119,11 @@ func getContentTypeDefinition(zf *zip.File) (*contentTypeDefinition, error) {
}
defer f.Close()

b, err := ioutil.ReadAll(f)
if err != nil {
return nil, err
}

var definition contentTypeDefinition; err = xml.Unmarshal(b, &definition)
if err != nil {
x := &contentTypeDefinition{}
if err := xml.NewDecoder(f).Decode(x); err != nil {
return nil, err
}
return &definition, nil
return x, nil
}

func mapZipFiles(files []*zip.File) map[string]*zip.File {
Expand Down

0 comments on commit e397537

Please sign in to comment.