Skip to content

Commit

Permalink
Temporary disabled panics
Browse files Browse the repository at this point in the history
  • Loading branch information
dslipak committed Oct 6, 2020
1 parent 1467e63 commit 4a7c662
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 158 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
.idea/
.DS_Store
pdf_test.go
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Features

## Install:

`go get -u github.com/ledongthuc/pdf`
`go get -u github.com/dslipak/pdf`


## Read plain text
Expand All @@ -20,7 +20,7 @@ import (
"bytes"
"fmt"

"github.com/ledongthuc/pdf"
"github.com/dslipak/pdf"
)

func main() {
Expand Down Expand Up @@ -92,7 +92,7 @@ import (
"fmt"
"os"

"github.com/dcu/pdf"
"github.com/dslipak/pdf"
)

func main() {
Expand Down
95 changes: 61 additions & 34 deletions lex.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
package pdf

import (
"errors"
"fmt"
"io"
"strconv"
Expand Down Expand Up @@ -78,36 +79,42 @@ func (b *buffer) readByte() byte {
return c
}

func (b *buffer) errorf(format string, args ...interface{}) {
panic(fmt.Errorf(format, args...))
func (b *buffer) errorf(format string, args ...interface{}) string {
// panic(fmt.Errorf(format, args...))
return fmt.Sprintf(format, args...)
}

func (b *buffer) reload() bool {
func (b *buffer) reload() (bool, error) {
n := cap(b.buf) - int(b.offset%int64(cap(b.buf)))
n, err := b.r.Read(b.buf[:n])
if n == 0 && err != nil {
b.buf = b.buf[:0]
b.pos = 0
if b.allowEOF && err == io.EOF {
b.eof = true
return false
return false, err
}
b.errorf("malformed PDF: reading at offset %d: %v", b.offset, err)
return false
fmt.Sprint(b.errorf("malformed PDF: reading at offset %d: %v", b.offset, err))
return false, err
}
b.offset += int64(n)
b.buf = b.buf[:n]
b.pos = 0
return true
return true, err
}

func (b *buffer) seekForward(offset int64) {
func (b *buffer) seekForward(offset int64) (err error) {
for b.offset < offset {
if !b.reload() {
return
rel, err := b.reload()
if err != nil {
return err
}
if !rel {
return err
}
}
b.pos = len(b.buf) - int(b.offset-offset)
return err
}

func (b *buffer) readOffset() int64 {
Expand Down Expand Up @@ -160,7 +167,7 @@ func (b *buffer) readToken() token {
return b.readLiteralString()

case '[', ']', '{', '}':
return keyword(string(c))
return keyword(c)

case '/':
return b.readName()
Expand All @@ -174,8 +181,9 @@ func (b *buffer) readToken() token {

default:
if isDelim(c) {
b.errorf("unexpected delimiter %#q", rune(c))
return nil
// b.errorf("unexpected delimiter %#q", rune(c))
return b.errorf("unexpected delimiter %#q", rune(c))
// return nil
}
b.unreadByte()
return b.readKeyword()
Expand All @@ -200,7 +208,7 @@ func (b *buffer) readHexString() token {
}
x := unhex(c)<<4 | unhex(c2)
if x < 0 {
b.errorf("malformed hex string %c %c %s", c, c2, b.buf[b.pos:])
fmt.Sprint(b.errorf("malformed hex string %c %c %s", c, c2, b.buf[b.pos:]))
break
}
tmp = append(tmp, byte(x))
Expand Down Expand Up @@ -241,7 +249,8 @@ Loop:
case '\\':
switch c = b.readByte(); c {
default:
b.errorf("invalid escape sequence \\%c", c)
// b.errorf("invalid escape sequence \\%c", c)
fmt.Sprint(b.errorf("invalid escape sequence \\%c", c))
tmp = append(tmp, '\\', c)
case 'n':
tmp = append(tmp, '\n')
Expand Down Expand Up @@ -294,7 +303,8 @@ func (b *buffer) readName() token {
if c == '#' {
x := unhex(b.readByte())<<4 | unhex(b.readByte())
if x < 0 {
b.errorf("malformed name")
// b.errorf("malformed name")
fmt.Sprint(b.errorf("malformed name"))
}
tmp = append(tmp, byte(x))
continue
Expand Down Expand Up @@ -325,13 +335,15 @@ func (b *buffer) readKeyword() token {
case isInteger(s):
x, err := strconv.ParseInt(s, 10, 64)
if err != nil {
b.errorf("invalid integer %s", s)
// b.errorf("invalid integer %s", s)
fmt.Sprint(b.errorf("invalid integer %s", s))
}
return x
case isReal(s):
x, err := strconv.ParseFloat(s, 64)
if err != nil {
b.errorf("invalid real %s", s)
// b.errorf("invalid real %s", s)
fmt.Sprint(b.errorf("invalid real %s", s))
}
return x
}
Expand Down Expand Up @@ -409,27 +421,28 @@ type objdef struct {
obj object
}

func (b *buffer) readObject() object {
func (b *buffer) readObject() (object, error) {
tok := b.readToken()
if kw, ok := tok.(keyword); ok {
switch kw {
case "null":
return nil
return nil, nil
case "<<":
return b.readDict()
return b.readDict(), nil
case "[":
return b.readArray()
return b.readArray(), nil
}
b.errorf("unexpected keyword %q parsing object", kw)
return nil
// b.errorf("unexpected keyword %q parsing object", kw)
return nil, errors.New(b.errorf("unexpected keyword %q parsing object", kw))
// return nil
}

if str, ok := tok.(string); ok && b.key != nil && b.objptr.id != 0 {
tok = decryptString(b.key, b.useAES, b.objptr, str)
}

if !b.allowObjptr {
return tok
return tok, nil
}

if t1, ok := tok.(int64); ok && int64(uint32(t1)) == t1 {
Expand All @@ -438,26 +451,30 @@ func (b *buffer) readObject() object {
tok3 := b.readToken()
switch tok3 {
case keyword("R"):
return objptr{uint32(t1), uint16(t2)}
return objptr{uint32(t1), uint16(t2)}, nil
case keyword("obj"):
old := b.objptr
b.objptr = objptr{uint32(t1), uint16(t2)}
obj := b.readObject()
obj, err := b.readObject()
if err != nil {
return nil, err
}
if _, ok := obj.(stream); !ok {
tok4 := b.readToken()
if tok4 != keyword("endobj") {
b.errorf("missing endobj after indirect object definition")
// b.errorf("missing endobj after indirect object definition")
fmt.Sprint(b.errorf("missing endobj after indirect object definition"))
b.unreadToken(tok4)
}
}
b.objptr = old
return objdef{objptr{uint32(t1), uint16(t2)}, obj}
return objdef{objptr{uint32(t1), uint16(t2)}, obj}, err
}
b.unreadToken(tok3)
}
b.unreadToken(tok2)
}
return tok
return tok, nil
}

func (b *buffer) readArray() object {
Expand All @@ -468,7 +485,11 @@ func (b *buffer) readArray() object {
break
}
b.unreadToken(tok)
x = append(x, b.readObject())
res, err := b.readObject()
if err != nil {
return err
}
x = append(x, res)
}
return x
}
Expand All @@ -482,10 +503,15 @@ func (b *buffer) readDict() object {
}
n, ok := tok.(name)
if !ok {
b.errorf("unexpected non-name key %T(%v) parsing dictionary", tok, tok)
// b.errorf("unexpected non-name key %T(%v) parsing dictionary", tok, tok)
fmt.Sprint(b.errorf("unexpected non-name key %T(%v) parsing dictionary", tok, tok))
continue
}
x[n] = b.readObject()
res, err := b.readObject()
if err != nil {
return nil
}
x[n] = res
}

if !b.allowStream {
Expand All @@ -506,7 +532,8 @@ func (b *buffer) readDict() object {
case '\n':
// ok
default:
b.errorf("stream keyword not followed by newline")
// b.errorf("stream keyword not followed by newline")
return b.errorf("stream keyword not followed by newline")
}

return stream{x, b.objptr, b.readOffset()}
Expand Down
48 changes: 48 additions & 0 deletions pdf_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package pdf

import (
"bytes"
"fmt"
"testing"
)

const testFile = "/Users/dslipak/Documents/dslipak-20190925.pdf"

func TestReadPdf(t *testing.T) {
f, err := Open(testFile)
if err != nil {
t.Error("Doc should not be nil', got ", err)
}

totalPage := f.NumPage()
var buf bytes.Buffer

for pageIndex := 1; pageIndex <= totalPage; pageIndex++ {
p := f.Page(pageIndex)
if p.V.IsNull() {
continue
}

texts := p.Content().Text
var lastY = 0.0
line := ""

for _, text := range texts {
if lastY != text.Y {
if lastY > 0 {
buf.WriteString(line + "\n")
line = text.S
} else {
line += text.S
}
} else {
line += text.S
}

lastY = text.Y
}
buf.WriteString(line)
}
fmt.Println(buf.String())
}

Loading

0 comments on commit 4a7c662

Please sign in to comment.