diff --git a/docx.go b/docx.go index 79842bd..4fa5f03 100644 --- a/docx.go +++ b/docx.go @@ -10,15 +10,15 @@ import ( "time" ) -type TypeOverride struct { +type typeOverride struct { XMLName xml.Name `xml:"Override"` ContentType string `xml:"ContentType,attr"` PartName string `xml:"PartName,attr"` } -type Type struct { +type contentTypeDefinition struct { XMLName xml.Name `xml:"Types"` - Overrides []TypeOverride `xml:"Override"` + Overrides []typeOverride `xml:"Override"` } // ConvertDocx converts an MS Word docx file to text. @@ -37,12 +37,12 @@ func ConvertDocx(r io.Reader) (string, map[string]string, error) { zipFiles := mapZipFiles(zr.File) - types, err := getContentTypes(zipFiles["[Content_Types].xml"]) + contentTypeDefinition, err := getContentTypeDefinition(zipFiles["[Content_Types].xml"]) if err != nil { return "", nil, err } - for _, override := range types.Overrides { + for _, override := range contentTypeDefinition.Overrides { f := zipFiles[override.PartName] switch { @@ -92,28 +92,27 @@ func ConvertDocx(r io.Reader) (string, map[string]string, error) { return textHeader + "\n" + textBody + "\n" + textFooter, meta, nil } -func getContentTypes(f *zip.File) (*Type, error) { - contentTypesFile, err := f.Open() +func getContentTypeDefinition(zf *zip.File) (*contentTypeDefinition, error) { + f, err := zf.Open() if err != nil { return nil, err } - defer contentTypesFile.Close() + defer f.Close() - contentTypesFileBytes, err := ioutil.ReadAll(contentTypesFile) + b, err := ioutil.ReadAll(f) if err != nil { return nil, err } - var types Type - err = xml.Unmarshal(contentTypesFileBytes, &types) + var definition contentTypeDefinition; err = xml.Unmarshal(b, &definition) if err != nil { return nil, err } - return &types, nil + return &definition, nil } func mapZipFiles(files []*zip.File) map[string]*zip.File { - filesMap := map[string]*zip.File{} + filesMap := make(map[string]*zip.File, 2*len(files)) for _, f := range files { filesMap[f.Name] = f filesMap["/"+f.Name] = f diff --git a/docx_test/docx_test.go b/docx_test/docx_test.go index 7734d47..f96a4f6 100644 --- a/docx_test/docx_test.go +++ b/docx_test/docx_test.go @@ -6,14 +6,14 @@ import ( "testing" "code.sajari.com/docconv" - _ "code.sajari.com/docconv/docx_test/resources" ) func TestConvertDocx(t *testing.T) { - f, err := os.Open("./resources/sample.docx") + f, err := os.Open("./testdata/sample.docx") if err != nil { t.Fatalf("got error = %v, want nil", err) } + resp, _, err := docconv.ConvertDocx(f) if err != nil { t.Fatalf("got error = %v, want nil", err) @@ -31,7 +31,7 @@ func TestConvertDocx(t *testing.T) { } func TestConvertDocxWithUncommonValidStructure(t *testing.T) { - f, err := os.Open("./resources/sample_2.docx") + f, err := os.Open("./testdata/sample_2.docx") if err != nil { t.Fatalf("got error = %v, want nil", err) } diff --git a/docx_test/resources/package.go b/docx_test/resources/package.go deleted file mode 100644 index 18d6395..0000000 --- a/docx_test/resources/package.go +++ /dev/null @@ -1 +0,0 @@ -package resources diff --git a/docx_test/resources/sample.docx b/docx_test/testdata/sample.docx similarity index 100% rename from docx_test/resources/sample.docx rename to docx_test/testdata/sample.docx diff --git a/docx_test/resources/sample_2.docx b/docx_test/testdata/sample_2.docx similarity index 100% rename from docx_test/resources/sample_2.docx rename to docx_test/testdata/sample_2.docx