diff --git a/README.md b/README.md index 1327d13..cb05883 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,4 @@ -> :tada: v2 is released! It contains breaking change. If you still want to use v1, please replace `github.com/otiai10/gosseract` with `github.com/otiai10/gosseract/v1/gosseract` and it is exactly the same thing as v1 implementation_ - -# Gosseract-OCR +# gosseract OCR [![Build Status](https://travis-ci.org/otiai10/gosseract.svg?branch=master)](https://travis-ci.org/otiai10/gosseract) [![codecov](https://codecov.io/gh/otiai10/gosseract/branch/master/graph/badge.svg)](https://codecov.io/gh/otiai10/gosseract) [![Go Report Card](https://goreportcard.com/badge/github.com/otiai10/gosseract)](https://goreportcard.com/report/github.com/otiai10/gosseract) diff --git a/all_test.go b/all_test.go index 04217be..5a99026 100644 --- a/all_test.go +++ b/all_test.go @@ -79,7 +79,7 @@ func TestClient_HTML(t *testing.T) { defer client.Close() client.SetImage("./test/data/001-gosseract.png") client.SetWhitelist("otiai10/gosseract") - out, err := client.HTML() + out, err := client.HOCRText() Expect(t, err).ToBe(nil) tokenizer := html.NewTokenizer(strings.NewReader(out)) @@ -94,13 +94,19 @@ func TestClient_HTML(t *testing.T) { Expect(t, texts).ToBe([]string{"otiai10", "/", "gosseract"}) When(t, "only invalid languages are given", func(t *testing.T) { + client := NewClient() + defer client.Close() client.SetLanguage("foo") - _, err := client.HTML() + client.SetImage("./test/data/001-gosseract.png") + _, err := client.HOCRText() Expect(t, err).Not().ToBe(nil) }) When(t, "undefined key-value is tried to be set", func(t *testing.T) { + client := NewClient() + defer client.Close() client.SetVariable("foobar", "hoge") - _, err := client.HTML() + client.SetImage("./test/data/001-gosseract.png") + _, err := client.HOCRText() Expect(t, err).Not().ToBe(nil) }) } diff --git a/client.go b/client.go index af9d3fe..79d67d2 100644 --- a/client.go +++ b/client.go @@ -69,50 +69,50 @@ func NewClient() *Client { } // Close frees allocated API. This MUST be called for ANY client constructed by "NewClient" function. -func (c *Client) Close() (err error) { +func (client *Client) Close() (err error) { // defer func() { // if e := recover(); e != nil { // err = fmt.Errorf("%v", e) // } // }() - C.Free(c.api) + C.Free(client.api) return err } // SetImage sets path to image file to be processed OCR. -func (c *Client) SetImage(imagepath string) *Client { - c.ImagePath = imagepath - return c +func (client *Client) SetImage(imagepath string) *Client { + client.ImagePath = imagepath + return client } // SetLanguage sets languages to use. English as default. -func (c *Client) SetLanguage(langs ...string) *Client { - c.Languages = langs - return c +func (client *Client) SetLanguage(langs ...string) *Client { + client.Languages = langs + return client } // SetWhitelist sets whitelist chars. // See official documentation for whitelist here https://github.com/tesseract-ocr/tesseract/wiki/ImproveQuality#dictionaries-word-lists-and-patterns -func (c *Client) SetWhitelist(whitelist string) *Client { - return c.SetVariable("tessedit_char_whitelist", whitelist) +func (client *Client) SetWhitelist(whitelist string) *Client { + return client.SetVariable("tessedit_char_whitelist", whitelist) } // SetVariable sets parameters, representing tesseract::TessBaseAPI->SetVariable. // See official documentation here https://zdenop.github.io/tesseract-doc/classtesseract_1_1_tess_base_a_p_i.html#a2e09259c558c6d8e0f7e523cbaf5adf5 -func (c *Client) SetVariable(key, value string) *Client { - c.Variables[key] = value - return c +func (client *Client) SetVariable(key, value string) *Client { + client.Variables[key] = value + return client } // SetPageSegMode sets "Page Segmentation Mode" (PSM) to detect layout of characters. // See official documentation for PSM here https://github.com/tesseract-ocr/tesseract/wiki/ImproveQuality#page-segmentation-method -func (c *Client) SetPageSegMode(mode PageSegMode) *Client { - c.PageSegMode = &mode - return c +func (client *Client) SetPageSegMode(mode PageSegMode) *Client { + client.PageSegMode = &mode + return client } // SetConfigFile sets the file path to config file. -func (c *Client) SetConfigFile(fpath string) error { +func (client *Client) SetConfigFile(fpath string) error { info, err := os.Stat(fpath) if err != nil { return err @@ -120,36 +120,36 @@ func (c *Client) SetConfigFile(fpath string) error { if info.IsDir() { return fmt.Errorf("the specified config file path seems to be a directory") } - c.ConfigFilePath = fpath + client.ConfigFilePath = fpath return nil } // It's due to the caller to free this char pointer. -func (c *Client) charLangs() *C.char { +func (client *Client) charLangs() *C.char { var langs *C.char - if len(c.Languages) != 0 { - langs = C.CString(strings.Join(c.Languages, "+")) + if len(client.Languages) != 0 { + langs = C.CString(strings.Join(client.Languages, "+")) } return langs } // It's due to the caller to free this char pointer. -func (c *Client) charConfig() *C.char { +func (client *Client) charConfig() *C.char { var config *C.char - if _, err := os.Stat(c.ConfigFilePath); err == nil { - config = C.CString(c.ConfigFilePath) + if _, err := os.Stat(client.ConfigFilePath); err == nil { + config = C.CString(client.ConfigFilePath) } return config } // Initialize tesseract::TessBaseAPI // TODO: add tessdata prefix -func (c *Client) init() error { - langs := c.charLangs() +func (client *Client) init() error { + langs := client.charLangs() defer C.free(unsafe.Pointer(langs)) - config := c.charConfig() + config := client.charConfig() defer C.free(unsafe.Pointer(config)) - res := C.Init(c.api, nil, langs, config) + res := C.Init(client.api, nil, langs, config) if res != 0 { // TODO: capture and vacuum stderr from Cgo return fmt.Errorf("failed to initialize TessBaseAPI with code %d", res) @@ -159,45 +159,45 @@ func (c *Client) init() error { // Prepare tesseract::TessBaseAPI options, // must be called after `init`. -func (c *Client) prepare() error { +func (client *Client) prepare() error { // Set Image by giving path - imagepath := C.CString(c.ImagePath) + imagepath := C.CString(client.ImagePath) defer C.free(unsafe.Pointer(imagepath)) - C.SetImage(c.api, imagepath) + C.SetImage(client.api, imagepath) - for key, value := range c.Variables { - if ok := c.bind(key, value); !ok { + for key, value := range client.Variables { + if ok := client.bind(key, value); !ok { return fmt.Errorf("failed to set variable with key(%s):value(%s)", key, value) } } - if c.PageSegMode != nil { - mode := C.int(*c.PageSegMode) - C.SetPageSegMode(c.api, mode) + if client.PageSegMode != nil { + mode := C.int(*client.PageSegMode) + C.SetPageSegMode(client.api, mode) } return nil } // Binds variable to API object. // Must be called from inside `prepare`. -func (c *Client) bind(key, value string) bool { +func (client *Client) bind(key, value string) bool { k, v := C.CString(key), C.CString(value) defer C.free(unsafe.Pointer(k)) defer C.free(unsafe.Pointer(v)) - res := C.SetVariable(c.api, k, v) + res := C.SetVariable(client.api, k, v) return bool(res) } // Text finally initialize tesseract::TessBaseAPI, execute OCR and extract text detected as string. -func (c *Client) Text() (out string, err error) { - if err = c.init(); err != nil { +func (client *Client) Text() (out string, err error) { + if err = client.init(); err != nil { return } - if err = c.prepare(); err != nil { + if err = client.prepare(); err != nil { return } - out = C.GoString(C.UTF8Text(c.api)) - if c.Trim { + out = C.GoString(C.UTF8Text(client.api)) + if client.Trim { out = strings.Trim(out, "\n") } return out, err @@ -205,13 +205,13 @@ func (c *Client) Text() (out string, err error) { // HTML finally initialize tesseract::TessBaseAPI, execute OCR and returns hOCR text. // See https://en.wikipedia.org/wiki/HOCR for more information of hOCR. -func (c *Client) HTML() (out string, err error) { - if err = c.init(); err != nil { +func (client *Client) HOCRText() (out string, err error) { + if err = client.init(); err != nil { return } - if err = c.prepare(); err != nil { + if err = client.prepare(); err != nil { return } - out = C.GoString(C.HOCRText(c.api)) + out = C.GoString(C.HOCRText(client.api)) return } diff --git a/v1/gosseract/.samples/option/digest000.txt b/v1/gosseract/.samples/option/digest000.txt deleted file mode 100644 index e07aed5..0000000 --- a/v1/gosseract/.samples/option/digest000.txt +++ /dev/null @@ -1 +0,0 @@ -tessedit_char_whitelist :0123456789 diff --git a/v1/gosseract/.samples/option/digest001.txt b/v1/gosseract/.samples/option/digest001.txt deleted file mode 100644 index 22af8d8..0000000 --- a/v1/gosseract/.samples/option/digest001.txt +++ /dev/null @@ -1 +0,0 @@ -tessedit_char_whitelist 403 diff --git a/v1/gosseract/.samples/png/sample000.png b/v1/gosseract/.samples/png/sample000.png deleted file mode 100644 index 4374af1..0000000 Binary files a/v1/gosseract/.samples/png/sample000.png and /dev/null differ diff --git a/v1/gosseract/.samples/png/sample001.png b/v1/gosseract/.samples/png/sample001.png deleted file mode 100644 index cd7cf8b..0000000 Binary files a/v1/gosseract/.samples/png/sample001.png and /dev/null differ diff --git a/v1/gosseract/.samples/png/sample002.png b/v1/gosseract/.samples/png/sample002.png deleted file mode 100644 index ed5e711..0000000 Binary files a/v1/gosseract/.samples/png/sample002.png and /dev/null differ diff --git a/v1/gosseract/.samples/png/sample003.png b/v1/gosseract/.samples/png/sample003.png deleted file mode 100644 index 399c9de..0000000 Binary files a/v1/gosseract/.samples/png/sample003.png and /dev/null differ diff --git a/v1/gosseract/LICENSE b/v1/gosseract/LICENSE deleted file mode 100644 index 79a64e9..0000000 --- a/v1/gosseract/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2014 otiai10 - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/v1/gosseract/README.md b/v1/gosseract/README.md deleted file mode 100644 index d07f5ff..0000000 --- a/v1/gosseract/README.md +++ /dev/null @@ -1,67 +0,0 @@ -# Gosseract-OCR [![Build Status](https://travis-ci.org/otiai10/gosseract.svg?branch=master)](https://travis-ci.org/otiai10/gosseract) [![GoDoc](https://godoc.org/github.com/otiai10/gosseract?status.png)](https://godoc.org/github.com/otiai10/gosseract) - -[Tesseract-OCR](https://github.com/tesseract-ocr/tesseract) command for Golang - -# Quick Start - -If you have `docker` on your machine, just hit this and try with GUI - -```sh -docker run -it --rm -e PORT=8080 -p 8080:8080 otiai10/ocrserver -``` - -# Code Example - -```go -package main - -import ( - "fmt" - "github.com/otiai10/gosseract" -) - -func main() { - // This is the simplest way :) - out := gosseract.Must(gosseract.Params{ - Src: "your/img/file.png", - Languages: "eng+heb", - }) - fmt.Println(out) - - // Using client - client, _ := gosseract.NewClient() - out, _ = client.Src("your/img/file.png").Out() - fmt.Println(out) -} -``` - -# Server Application - -Here it is a ready-made solution. - -[![ocrserver](https://github.com/otiai10/ocrserver/raw/master/app/assets/favicon.png)](https://github.com/otiai10/ocrserver) -[ocrserver](https://github.com/otiai10/ocrserver): the minimum OCR server with using gosseract. - -# Installation - -1. install [tesseract-ocr](https://github.com/tesseract-ocr/tesseract) -2. install [go](http://golang.org/doc/install) -3. install [gosseract](https://godoc.org/github.com/otiai10/gosseract) - - `go get github.com/otiai10/gosseract` -4. install [mint for testing](https://godoc.org/github.com/otiai10/mint) - - `go get github.com/otiai10/mint` -5. run the tests first↓ - -# Test - -```sh -go test ./... -``` - -# Dependencies - -- [tesseract-ocr](https://github.com/tesseract-ocr/tesseract)#3.02~ -- [mint](https://github.com/otiai10/mint) to simplize tests - -# Known Issues -- https://github.com/otiai10/gosseract/issues?state=open diff --git a/v1/gosseract/all_test.go b/v1/gosseract/all_test.go deleted file mode 100644 index 234c185..0000000 --- a/v1/gosseract/all_test.go +++ /dev/null @@ -1,92 +0,0 @@ -package gosseract - -import ( - "bytes" - "image" - "image/png" - "io/ioutil" - "os" - "strings" - "testing" - - . "github.com/otiai10/mint" -) - -func Test_Must(t *testing.T) { - Expect(t, Must(Params{ - Src: "./.samples/png/sample000.png", - Languages: "eng", - })).ToBe("01:37:58\n\n") -} - -func removeWhitespace(s string) string { - return strings.TrimSpace(strings.Replace(s, " ", "", -1)) -} - -// tesseract ./.samples/png/sample000.png out -l eng ./.samples/option/digest001.txt -func Test_Must_WithDigest(t *testing.T) { - params := Params{ - Src: "./.samples/png/sample001.png", - Languages: "eng", - } - Expect(t, Must(params)).ToBe("03:41:26\n\n") - - // add optional digest - // params["digest"] = "./.samples/option/digest001.txt" - params.Whitelist = "24" - Expect(t, removeWhitespace(Must(params))).ToBe("42") -} - -func Test_NewClient(t *testing.T) { - client, e := NewClient() - Expect(t, e).ToBe(nil) - Expect(t, client).TypeOf("*gosseract.Client") -} - -func TestClient_Must(t *testing.T) { - client, _ := NewClient() - params := map[string]string{} - _, e := client.Must(params) - Expect(t, e).Not().ToBe(nil) -} - -func TestClient_Src(t *testing.T) { - client, _ := NewClient() - out, e := client.Src("./.samples/png/sample000.png").Out() - Expect(t, e).ToBe(nil) - Expect(t, out).ToBe("01:37:58\n\n") -} - -func TestClient_Image(t *testing.T) { - client, _ := NewClient() - img := fixImage("./.samples/png/sample000.png") - out, e := client.Image(img).Out() - Expect(t, e).ToBe(nil) - Expect(t, out).ToBe("01:37:58\n\n") -} - -func TestClient_Digest(t *testing.T) { - client, _ := NewClient() - img := fixImage("./.samples/png/sample001.png") - out, e := client.Image(img).Out() - Expect(t, e).ToBe(nil) - Expect(t, out).ToBe("03:41:26\n\n") - - // ./.samples/option/digest001.txt: tessedit_char_whitelist 403 - out, e = client.Digest("./.samples/option/digest001.txt").Image(img).Out() - Expect(t, e).ToBe(nil) - Expect(t, removeWhitespace(out)).ToBe("034") -} - -func fixImage(fpath string) image.Image { - f, _ := os.Open(fpath) - buf, _ := ioutil.ReadFile(f.Name()) - img, _ := png.Decode(bytes.NewReader(buf)) - return img -} - -func TestClient_Out(t *testing.T) { - client, _ := NewClient() - _, e := client.Out() - Expect(t, e.Error()).ToBe("Source is not set") -} diff --git a/v1/gosseract/benchmark_test.go b/v1/gosseract/benchmark_test.go deleted file mode 100644 index 5e58303..0000000 --- a/v1/gosseract/benchmark_test.go +++ /dev/null @@ -1,16 +0,0 @@ -package gosseract_test - -import ( - "testing" - - "github.com/otiai10/gosseract/v1/gosseract" -) - -func BenchmarkMust(b *testing.B) { - for i := 0; i < b.N; i++ { - gosseract.Must(gosseract.Params{ - Src: "./.samples/png/sample000.png", - Languages: "eng", - }) - } -} diff --git a/v1/gosseract/client.go b/v1/gosseract/client.go deleted file mode 100644 index 161b6ce..0000000 --- a/v1/gosseract/client.go +++ /dev/null @@ -1,117 +0,0 @@ -package gosseract - -import "fmt" -import "image" -import "os" -import "image/png" - -// Client is an client to use gosseract functions -type Client struct { - tesseract tesseractCmd - source path - digest path - // If the generated PNG source file needs to be deleted - needsdelete bool - Error error -} -type path struct { - value string -} - -func (p *path) Ready() bool { - return (p.value != "") -} -func (p *path) Get() string { - return p.value -} - -// NewClient provide reference to new Client -func NewClient() (c *Client, e error) { - tess, e := getTesseractCmd() - if e != nil { - return - } - c = &Client{tesseract: tess} - return -} - -// Src accepts path to target source file -func (c *Client) Src(srcPath string) *Client { - c.source = path{srcPath} - return c -} - -// Digest accepts path to target digest file -func (c *Client) Digest(digestPath string) *Client { - c.digest = path{digestPath} - return c -} - -// Image accepts image object of target -func (c *Client) Image(img image.Image) *Client { - imageFilePath, e := generateTmpFile() - if e != nil { - c.Error = e - return c - } - f, e := os.Create(imageFilePath) - // TODO: DRY - if e != nil { - c.Error = e - return c - } - defer f.Close() - png.Encode(f, img) - c.needsdelete = true - c.source = path{f.Name()} - return c -} - -// Out executes tesseract and gives results -func (c *Client) Out() (out string, e error) { - if e = c.ready(); e != nil { - return - } - // TODO: validation to call execute - out, e = c.execute() - if c.needsdelete { - os.Remove(c.source.value) - c.needsdelete = false - } - return -} - -// Must executes tesseract directly by parameter map -func (c *Client) Must(params map[string]string) (out string, e error) { - if e = c.accept(params); e != nil { - return - } - return c.Out() -} -func (c *Client) accept(params map[string]string) (e error) { - var ok bool - var src string - if src, ok = params["src"]; !ok { - return fmt.Errorf("missing parameter `src`") - } - c.source = path{src} - if digest, ok := params["digest"]; ok { - c.digest = path{digest} - } - return -} -func (c *Client) ready() (e error) { - if !c.source.Ready() { - return fmt.Errorf("Source is not set") - } - return -} -func (c *Client) execute() (res string, e error) { - args := []string{ - c.source.Get(), - } - if c.digest.Ready() { - args = append(args, c.digest.Get()) - } - return c.tesseract.Execute(args) -} diff --git a/v1/gosseract/example_test.go b/v1/gosseract/example_test.go deleted file mode 100644 index bd9facc..0000000 --- a/v1/gosseract/example_test.go +++ /dev/null @@ -1,25 +0,0 @@ -package gosseract_test - -import "github.com/otiai10/gosseract/v1/gosseract" - -import "fmt" -import "image" - -func ExampleMust() { - // TODO: it panics! error handling in *Client.accept - out := gosseract.Must(gosseract.Params{Src: "./.samples/png/sample002.png", Languages: "eng+heb"}) - fmt.Println(out) -} - -func ExampleClient_Src() { - client, _ := gosseract.NewClient() - out, _ := client.Src("./samples/png/samples000.png").Out() - fmt.Println(out) -} - -func ExampleClient_Image() { - client, _ := gosseract.NewClient() - var img image.Image // any your image instance - out, _ := client.Image(img).Out() - fmt.Println(out) -} diff --git a/v1/gosseract/goss.go b/v1/gosseract/goss.go deleted file mode 100644 index 0e53203..0000000 --- a/v1/gosseract/goss.go +++ /dev/null @@ -1,15 +0,0 @@ -package gosseract - -import "github.com/otiai10/gosseract/v1/gosseract/tesseract" - -// Params is parameters for gosseract.Must. -type Params struct { - Src string // source image file path - Whitelist string // tessedit_char_whitelist - Languages string -} - -// Must execute tesseract-OCR directly by parameter map -func Must(params Params) (out string) { - return tesseract.Simple(params.Src, params.Whitelist, params.Languages) -} diff --git a/v1/gosseract/runtime_test/all_test.sh b/v1/gosseract/runtime_test/all_test.sh deleted file mode 100755 index 6fd7c46..0000000 --- a/v1/gosseract/runtime_test/all_test.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/sh - -runtimes=`ls ./runtime_test/*.Dockerfile` - -for runtime in $runtimes; do - testcase=`basename ${runtime} | sed -e s/\.Dockerfile$//` - echo "=== ${testcase} ===" - docker build . -f ${runtime} -t gosseract/test:${testcase} 1>/dev/null - if docker run -i -t --rm gosseract/test:${testcase}; then - echo "--- ${testcase}: Pass ---" - else - echo "--- ${testcase}: Failed ---" - exit 1 - fi -done diff --git a/v1/gosseract/runtime_test/tess4.00.00dev_x_go1.9.Dockerfile b/v1/gosseract/runtime_test/tess4.00.00dev_x_go1.9.Dockerfile deleted file mode 100644 index 00a17a2..0000000 --- a/v1/gosseract/runtime_test/tess4.00.00dev_x_go1.9.Dockerfile +++ /dev/null @@ -1,19 +0,0 @@ -FROM otiai10/tesseract:4.00.00dev - -RUN apt-get update && apt-get install -y git - -RUN wget https://storage.googleapis.com/golang/go1.9.1.linux-amd64.tar.gz \ - && tar -xzf go1.9.1.linux-amd64.tar.gz -RUN mv /go /.go -ENV GOROOT=/.go - -RUN mkdir /go -ENV GOPATH=/go - -ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin - -RUN go get github.com/otiai10/mint -RUN go get github.com/otiai10/gosseract -WORKDIR ${GOPATH}/src/github.com/otiai10/gosseract - -CMD go test -run Must diff --git a/v1/gosseract/tesseract.go b/v1/gosseract/tesseract.go deleted file mode 100644 index 1b87f78..0000000 --- a/v1/gosseract/tesseract.go +++ /dev/null @@ -1,84 +0,0 @@ -package gosseract - -import "fmt" -import "os/exec" -import "bytes" -import "regexp" -import "io/ioutil" - -type tesseractCmd interface { - Version() string - Execute(args []string) (string, error) -} - -// TESSERACT specifies bin name for tesseract. -const TESSERACT = "tesseract" -const tmpFILEPREFIX = "gosseract" -const outFILEEXTENSION = ".txt" - -func getTesseractCmd() (tess tesseractCmd, e error) { - commandPath, e := lookPath() - if e != nil { - return - } - v, e := version() - if e != nil { - return - } - if regexp.MustCompile("^3.02").Match([]byte(v)) { - tess = tesseract0302{version: v, commandPath: commandPath} - return - } - if regexp.MustCompile("^3.03").Match([]byte(v)) { - tess = tesseract0303{version: v, commandPath: commandPath} - return - } - if regexp.MustCompile("^3.04").Match([]byte(v)) { - tess = tesseract0304{version: v, commandPath: commandPath} - return - } - if regexp.MustCompile("^3.05").Match([]byte(v)) { - tess = tesseract0305{version: v, commandPath: commandPath} - return - } - e = fmt.Errorf("No tesseract version is found, supporting 3.02~, 3.03~, 3.04~ and 3.05~") - return -} -func lookPath() (commandPath string, e error) { - return exec.LookPath(TESSERACT) -} -func version() (v string, e error) { - v, e = execTesseractCommandWithStderr("--version") - if e != nil { - return - } - exp := regexp.MustCompile("^tesseract ([0-9\\.]+)") - matches := exp.FindStringSubmatch(v) - if len(matches) < 2 { - e = fmt.Errorf("tesseract version not found: response is `%s`", v) - return - } - v = matches[1] - return -} -func execTesseractCommandWithStderr(opt string) (res string, e error) { - cmd := exec.Command(TESSERACT, opt) - var stdout bytes.Buffer - cmd.Stdout = &stdout - var stderr bytes.Buffer - cmd.Stderr = &stderr - if e = cmd.Run(); e != nil { - return - } - res = stdout.String() + stderr.String() - return -} -func generateTmpFile() (fname string, e error) { - myTmpDir := "" // TODO: enable to choose optionally - f, e := ioutil.TempFile(myTmpDir, tmpFILEPREFIX) - if e != nil { - return - } - fname = f.Name() - return -} diff --git a/v1/gosseract/tesseract/tess.cpp b/v1/gosseract/tesseract/tess.cpp deleted file mode 100644 index 7a5b1c1..0000000 --- a/v1/gosseract/tesseract/tess.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#if __FreeBSD__ >= 10 -#include "/usr/local/include/tesseract/baseapi.h" -#include "/usr/local/include/leptonica/allheaders.h" -#else -#include -#include -#endif - -extern "C" { - class TessClient { - private: - tesseract::TessBaseAPI *api; - Pix *image; - public: - TessClient() - { - api = new tesseract::TessBaseAPI(); - } - TessClient(char *imgPath) - { - image = pixRead(imgPath); - } - void setImage(char* imgPath) - { - image = pixRead(imgPath); - } - char* Exec() - { - api->SetImage(image); - char *outText = api->GetUTF8Text(); - pixDestroy(&image); - api->End(); - return outText; - } - }; - - char* simple(char* filepath, char* whitelist ,char* languages) { - char *out; - tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI(); - // Initialize tesseract-ocr with English, without specifying tessdata path - if (api->Init(NULL, languages)) { - fprintf(stderr, "Could not initialize tesseract.\n"); - exit(1); - } - - Pix *image = pixRead(filepath); - api->SetImage(image); - - if (strlen(whitelist) != 0) { - api->SetVariable("tessedit_char_whitelist", whitelist); - } - - out = api->GetUTF8Text(); - api->End(); - pixDestroy(&image); - - return out; - } - -}/* extern "C" */ diff --git a/v1/gosseract/tesseract/tess.h b/v1/gosseract/tesseract/tess.h deleted file mode 100644 index 7c76557..0000000 --- a/v1/gosseract/tesseract/tess.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifdef __cplusplus -extern "C" { -#endif - -char* simple(char* filepath, char* whitelist, char* languages); - -#ifdef __cplusplus -} -#endif/* extern "C" */ diff --git a/v1/gosseract/tesseract/test/all_test.go b/v1/gosseract/tesseract/test/all_test.go deleted file mode 100644 index dd09a61..0000000 --- a/v1/gosseract/tesseract/test/all_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package tesseract_test - -import ( - "testing" - - "github.com/otiai10/gosseract/v1/gosseract/tesseract" - . "github.com/otiai10/mint" -) - -func TestDo(t *testing.T) { - Expect(t, tesseract.Simple("hoge.png", "", "eng")).ToBe("otiai10 / gosseract\n\n") - Expect(t, tesseract.Simple("sample.png", "", "eng")).ToBe("2,464 total\n\n") -} diff --git a/v1/gosseract/tesseract/test/dummy.go b/v1/gosseract/tesseract/test/dummy.go deleted file mode 100644 index 17b8ad1..0000000 --- a/v1/gosseract/tesseract/test/dummy.go +++ /dev/null @@ -1,3 +0,0 @@ -package tesseract - -// https://github.com/golang/go/issues/8279 diff --git a/v1/gosseract/tesseract/test/hoge.png b/v1/gosseract/tesseract/test/hoge.png deleted file mode 100644 index bebcfe3..0000000 Binary files a/v1/gosseract/tesseract/test/hoge.png and /dev/null differ diff --git a/v1/gosseract/tesseract/test/sample.png b/v1/gosseract/tesseract/test/sample.png deleted file mode 100644 index 99904f2..0000000 Binary files a/v1/gosseract/tesseract/test/sample.png and /dev/null differ diff --git a/v1/gosseract/tesseract/wrapper.go b/v1/gosseract/tesseract/wrapper.go deleted file mode 100644 index ef544bb..0000000 --- a/v1/gosseract/tesseract/wrapper.go +++ /dev/null @@ -1,22 +0,0 @@ -package tesseract - -/* -#if __FreeBSD__ >= 10 -#cgo LDFLAGS: -L/usr/local/lib -llept -ltesseract -#else -#cgo LDFLAGS: -llept -ltesseract -#endif - -#include "tess.h" -*/ -import "C" - -// Simple executes tesseract only with source image file path. -func Simple(imgPath string, whitelist string, languages string) string { - p := C.CString(imgPath) - w := C.CString(whitelist) - l := C.CString(languages) - - s := C.simple(p, w, l) - return C.GoString(s) -} diff --git a/v1/gosseract/tesseract0302.go b/v1/gosseract/tesseract0302.go deleted file mode 100644 index c1a746f..0000000 --- a/v1/gosseract/tesseract0302.go +++ /dev/null @@ -1,60 +0,0 @@ -package gosseract - -import "fmt" -import "os" -import "os/exec" -import "bytes" -import "io/ioutil" - -type tesseract0302 struct { - version string - resultFilePath string - commandPath string -} - -func (t tesseract0302) Version() string { - return t.version -} -func (t tesseract0302) Execute(params []string) (res string, e error) { - - // command args - var args []string - // Register source file - args = append(args, params[0]) - // generate result file path - t.resultFilePath, e = generateTmpFile() - if e != nil { - return - } - // Register result file - args = append(args, t.resultFilePath) - // Register digest file - if len(params) > 1 { - args = append(args, params[1]) - } - - // prepare command - cmd := exec.Command(TESSERACT, args...) - // execute - var stderr bytes.Buffer - cmd.Stderr = &stderr - if e = cmd.Run(); e != nil { - e = fmt.Errorf(stderr.String()) - return - } - // read result - res, e = t.readResult() - os.Remove(t.resultFilePath) - return -} -func (t tesseract0302) readResult() (res string, e error) { - fpath := t.resultFilePath + outFILEEXTENSION - file, e := os.OpenFile(fpath, 1, 1) - if e != nil { - return - } - buffer, _ := ioutil.ReadFile(file.Name()) - res = string(buffer) - os.Remove(file.Name()) - return -} diff --git a/v1/gosseract/tesseract0303.go b/v1/gosseract/tesseract0303.go deleted file mode 100644 index e95a747..0000000 --- a/v1/gosseract/tesseract0303.go +++ /dev/null @@ -1,61 +0,0 @@ -package gosseract - -import "fmt" -import "os" -import "os/exec" -import "bytes" -import "io/ioutil" - -type tesseract0303 struct { - version string - resultFilePath string - commandPath string -} - -func (t tesseract0303) Version() string { - return t.version -} - -func (t tesseract0303) Execute(params []string) (res string, e error) { - // command args - var args []string - // Register source file - args = append(args, params[0]) - // generate result file path - t.resultFilePath, e = generateTmpFile() - if e != nil { - return - } - // Register result file - args = append(args, t.resultFilePath) - // Register digest file - if len(params) > 1 { - args = append(args, params[1]) - } - - // prepare command - cmd := exec.Command(TESSERACT, args...) - // execute - var stderr bytes.Buffer - cmd.Stderr = &stderr - if e = cmd.Run(); e != nil { - e = fmt.Errorf(stderr.String()) - return - } - // read result - res, e = t.readResult() - os.Remove(t.resultFilePath) - return -} - -func (t tesseract0303) readResult() (res string, e error) { - fpath := t.resultFilePath + outFILEEXTENSION - file, e := os.OpenFile(fpath, 1, 1) - if e != nil { - return - } - buffer, _ := ioutil.ReadFile(file.Name()) - res = string(buffer) - os.Remove(file.Name()) - return -} diff --git a/v1/gosseract/tesseract0304.go b/v1/gosseract/tesseract0304.go deleted file mode 100644 index 4c53f64..0000000 --- a/v1/gosseract/tesseract0304.go +++ /dev/null @@ -1,61 +0,0 @@ -package gosseract - -import "fmt" -import "os" -import "os/exec" -import "bytes" -import "io/ioutil" - -type tesseract0304 struct { - version string - resultFilePath string - commandPath string -} - -func (t tesseract0304) Version() string { - return t.version -} - -func (t tesseract0304) Execute(params []string) (res string, e error) { - // command args - var args []string - // Register source file - args = append(args, params[0]) - // generate result file path - t.resultFilePath, e = generateTmpFile() - if e != nil { - return - } - // Register result file - args = append(args, t.resultFilePath) - // Register digest file - if len(params) > 1 { - args = append(args, params[1]) - } - - // prepare command - cmd := exec.Command(TESSERACT, args...) - // execute - var stderr bytes.Buffer - cmd.Stderr = &stderr - if e = cmd.Run(); e != nil { - e = fmt.Errorf(stderr.String()) - return - } - // read result - res, e = t.readResult() - os.Remove(t.resultFilePath) - return -} - -func (t tesseract0304) readResult() (res string, e error) { - fpath := t.resultFilePath + outFILEEXTENSION - file, e := os.OpenFile(fpath, 1, 1) - if e != nil { - return - } - buffer, _ := ioutil.ReadFile(file.Name()) - res = string(buffer) - os.Remove(file.Name()) - return -} diff --git a/v1/gosseract/tesseract0305.go b/v1/gosseract/tesseract0305.go deleted file mode 100644 index e741339..0000000 --- a/v1/gosseract/tesseract0305.go +++ /dev/null @@ -1,61 +0,0 @@ -package gosseract - -import "fmt" -import "os" -import "os/exec" -import "bytes" -import "io/ioutil" - -type tesseract0305 struct { - version string - resultFilePath string - commandPath string -} - -func (t tesseract0305) Version() string { - return t.version -} - -func (t tesseract0305) Execute(params []string) (res string, e error) { - // command args - var args []string - // Register source file - args = append(args, params[0]) - // generate result file path - t.resultFilePath, e = generateTmpFile() - if e != nil { - return - } - // Register result file - args = append(args, t.resultFilePath) - // Register digest file - if len(params) > 1 { - args = append(args, params[1]) - } - - // prepare command - cmd := exec.Command(TESSERACT, args...) - // execute - var stderr bytes.Buffer - cmd.Stderr = &stderr - if e = cmd.Run(); e != nil { - e = fmt.Errorf(stderr.String()) - return - } - // read result - res, e = t.readResult() - os.Remove(t.resultFilePath) - return -} - -func (t tesseract0305) readResult() (res string, e error) { - fpath := t.resultFilePath + outFILEEXTENSION - file, e := os.OpenFile(fpath, 1, 1) - if e != nil { - return - } - buffer, _ := ioutil.ReadFile(file.Name()) - res = string(buffer) - os.Remove(file.Name()) - return -}