Skip to content

Commit

Permalink
bugfix: obj-ref within a stream needs to be resolved too
Browse files Browse the repository at this point in the history
  • Loading branch information
mazeForGit committed Jan 16, 2024
1 parent 8748593 commit 031f94b
Show file tree
Hide file tree
Showing 8 changed files with 58 additions and 4 deletions.
49 changes: 47 additions & 2 deletions pdf_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"testing"
"path/filepath"
)

var referenceFirstPage = `TEST FILE
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam
Expand All @@ -17,6 +18,45 @@ erat, sed diam voluptua. At vero eos et accusam et
TEST
SUBTITLE`

var referenceFirstPageWithAddLine = `TEST FILE
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam
nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam
erat, sed diam voluptua. At vero eos et accusam et
TEST
SUBTITLE`

//
// this pdf has an object within stream which is handled different!
// the original implementation calculated the stream but didn't returned the object at resolve
//
// @todo: there is an empty line added, still don't know where
//
func Test_ReadPdf_v17_linarized_xrefStream(t *testing.T) {

testFile := "./testdata/story_Word2019-2312-1601712620132-32_Print-Adobe__pdf15_linarized_xrefStream.pdf"
totalPages, content := readPdfAndGetFirstPageAsText(testFile)
if totalPages != 5 {
t.Error("Asser: incorrect numPage .. want=5 <> got " + strconv.Itoa(totalPages))
}
if referenceFirstPageWithAddLine != content {
t.Error("Asser: content different from reference:")
t.Error(content)
}
}
func Test_ReadPdf_v17_linarized_xref(t *testing.T) {

testFile := "./testdata/story_avepdf-com__pdf17_linarized_xref.pdf"
totalPages, content := readPdfAndGetFirstPageAsText(testFile)
if totalPages != 5 {
t.Error("Asser: incorrect numPage .. want=5 <> got " + strconv.Itoa(totalPages))
}
if referenceFirstPage != content {
t.Error("Asser: content different from reference:")
t.Error(content)
}
}
//
// this pdf has an array of refs at /Contents
// standard:
Expand All @@ -27,13 +67,13 @@ SUBTITLE`
func Test_ReadPdf_v17_trailer_arrayAtPageContents(t *testing.T) {

testFile := "./testdata/story_Word2019-2312-1712620132_Print-Microsoft__pdf17_trailer_array-at-page-contents.pdf"

totalPages, content := readPdfAndGetFirstPageAsText(testFile)
if totalPages != 5 {
t.Error("Asser: incorrect numPage .. want=5 <> got " + strconv.Itoa(totalPages))
}
if referenceFirstPage != content {
t.Error("Asser: content different from reference:")
t.Error(content)
}
}
func Test_ReadPdf_v17_StandardPDFA_trailer(t *testing.T) {
Expand All @@ -45,6 +85,7 @@ func Test_ReadPdf_v17_StandardPDFA_trailer(t *testing.T) {
}
if referenceFirstPage != content {
t.Error("Asser: content different from reference:")
t.Error(content)
}
}
func Test_ReadPdf_v17_MinSizePDFA_trailer(t *testing.T) {
Expand All @@ -56,6 +97,7 @@ func Test_ReadPdf_v17_MinSizePDFA_trailer(t *testing.T) {
}
if referenceFirstPage != content {
t.Error("Asser: content different from reference")
t.Error(content)
}
}
func Test_ReadPdf_v17_StandardNoPDFA_2trailer(t *testing.T) {
Expand All @@ -67,6 +109,7 @@ func Test_ReadPdf_v17_StandardNoPDFA_2trailer(t *testing.T) {
}
if referenceFirstPage != content {
t.Error("Asser: content different from reference")
t.Error(content)
}
}
func Test_ReadPdf_v17_MinSizeNoPDFA_2trailer(t *testing.T) {
Expand All @@ -78,13 +121,15 @@ func Test_ReadPdf_v17_MinSizeNoPDFA_2trailer(t *testing.T) {
}
if referenceFirstPage != content {
t.Error("Asser: content different from reference")
t.Error(content)
}
}
//
// read pdf and return content of first page for quick check
//
func readPdfAndGetFirstPageAsText(fileName string) (totalPages int, content string) {

fmt.Println("read file = " + fileName)

f, err := Open(fileName)
if err != nil {
return 0, err.Error()
Expand Down
13 changes: 11 additions & 2 deletions read.go
Original file line number Diff line number Diff line change
Expand Up @@ -726,8 +726,15 @@ func (v Value) Len() int {
}
return len(x)
}

//
// resolve xrefs
// in: the parent and the key or reference to resolve
// out: the reference
//
// bugfix: in case the object-ref is within a stream than nothing was returned
//
func (r *Reader) resolve(parent objptr, x interface{}) Value {

if ptr, ok := x.(objptr); ok {
if ptr.id >= uint32(len(r.xref)) {
return Value{}
Expand All @@ -739,6 +746,7 @@ func (r *Reader) resolve(parent objptr, x interface{}) Value {
// var obj object
if xref.inStream {
strm := r.resolve(parent, xref.stream)

Search:
for {
if strm.Kind() != Stream {
Expand All @@ -759,10 +767,11 @@ func (r *Reader) resolve(parent objptr, x interface{}) Value {
off, _ := b.readToken().(int64)
if uint32(id) == ptr.id {
b.seekForward(first + off)
_, err := b.readObject()
objinstream, err := b.readObject()
if err != nil {
return Value{}
}
x = objinstream
break Search
}
}
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 comments on commit 031f94b

Please sign in to comment.