otiai10/gosseract

View on GitHub
client.go

Summary

Maintainability
A
1 hr
Test Coverage
package gosseract

// #include <stdlib.h>
// #include <stdbool.h>
// #include "tessbridge.h"
import "C"
import (
    "fmt"
    "image"
    "os"
    "path/filepath"
    "runtime"
    "strings"
    "unsafe"
)

var (
    // ErrClientNotConstructed is returned when a client is not constructed
    ErrClientNotConstructed = fmt.Errorf("TessBaseAPI is not constructed, please use `gosseract.NewClient`")
)

// Version returns the version of Tesseract-OCR
func Version() string {
    api := C.Create()
    defer C.Free(api)
    version := C.Version(api)
    return C.GoString(version)
}

// ClearPersistentCache clears any library-level memory caches. There are a variety of expensive-to-load constant data structures (mostly language dictionaries) that are cached globally – surviving the Init() and End() of individual TessBaseAPI's. This function allows the clearing of these caches.
func ClearPersistentCache() {
    api := C.Create()
    defer C.Free(api)
    C.ClearPersistentCache(api)
}

// Client is argument builder for tesseract::TessBaseAPI.
type Client struct {
    api C.TessBaseAPI

    // Holds a reference to the pix image to be able to destroy on client close
    // or when a new image is set
    pixImage C.PixImage

    // Trim specifies characters to trim, which would be trimed from result string.
    // As results of OCR, text often contains unnecessary characters, such as newlines, on the head/foot of string.
    // If `Trim` is set, this client will remove specified characters from the result.
    Trim bool

    // TessdataPrefix can indicate directory path to `tessdata`.
    // It is set `/usr/local/share/tessdata/` or something like that, as default.
    // TODO: Implement and test
    TessdataPrefix string

    // Languages are languages to be detected. If not specified, it's gonna be "eng".
    Languages []string

    // Variables is just a pool to evaluate "tesseract::TessBaseAPI->SetVariable" in delay.
    // TODO: Think if it should be public, or private property.
    Variables map[SettableVariable]string

    // Config is a file path to the configuration for Tesseract
    // See http://www.sk-spell.sk.cx/tesseract-ocr-parameters-in-302-version
    // TODO: Fix link to official page
    ConfigFilePath string

    // internal flag to check if the instance should be initialized again
    // i.e, we should create a new gosseract client when language or config file change
    shouldInit bool
}

// NewClient construct new Client. It's due to caller to Close this client.
func NewClient() *Client {
    client := &Client{
        api:        C.Create(),
        Variables:  map[SettableVariable]string{},
        Trim:       true,
        shouldInit: true,
        Languages:  []string{"eng"},
    }
    // set a finalizer to close the client when it's unused and not closed by the user
    runtime.SetFinalizer(client, (*Client).Close)
    return client
}

// Close frees allocated API. This MUST be called for ANY client constructed by "NewClient" function.
func (client *Client) Close() (err error) {
    // no need for a finalizer anymore
    runtime.SetFinalizer(client, nil)
    if client.api == nil {
        // already closed or not constructed
        return nil
    }
    // defer func() {
    //     if e := recover(); e != nil {
    //         err = fmt.Errorf("%v", e)
    //     }
    // }()

    C.Clear(client.api)
    C.Free(client.api)
    client.api = nil

    if client.pixImage != nil {
        C.DestroyPixImage(client.pixImage)
        client.pixImage = nil
    }
    return err
}

// Version provides the version of Tesseract used by this client.
func (client *Client) Version() string {
    if client.api == nil {
        return ""
    }
    version := C.Version(client.api)
    return C.GoString(version)
}

// SetImage sets path to image file to be processed OCR.
func (client *Client) SetImage(imagepath string) error {

    if client.api == nil {
        return ErrClientNotConstructed
    }
    if imagepath == "" {
        return fmt.Errorf("image path cannot be empty")
    }
    if _, err := os.Stat(imagepath); err != nil {
        return fmt.Errorf("cannot detect the stat of specified file: %v", err)
    }

    if client.pixImage != nil {
        C.DestroyPixImage(client.pixImage)
        client.pixImage = nil
    }

    p := C.CString(imagepath)
    defer C.free(unsafe.Pointer(p))

    img := C.CreatePixImageByFilePath(p)
    if img == nil {
        return fmt.Errorf("failed to create PixImage from file path: %s", imagepath)
    }

    client.pixImage = img

    return nil
}

// SetImageFromBytes sets the image data to be processed OCR.
func (client *Client) SetImageFromBytes(data []byte) error {

    if client.api == nil {
        return ErrClientNotConstructed
    }
    if len(data) == 0 {
        return fmt.Errorf("image data cannot be empty")
    }

    if client.pixImage != nil {
        C.DestroyPixImage(client.pixImage)
        client.pixImage = nil
    }

    img := C.CreatePixImageFromBytes((*C.uchar)(unsafe.Pointer(&data[0])), C.int(len(data)))
    if img == nil {
        return fmt.Errorf("failed to create PixImage from bytes: %d", len(data))
    }

    client.pixImage = img

    return nil
}

// SetLanguage sets languages to use. English as default.
func (client *Client) SetLanguage(langs ...string) error {
    if client.api == nil {
        return ErrClientNotConstructed
    }
    if len(langs) == 0 {
        return fmt.Errorf("languages cannot be empty")
    }

    client.Languages = langs

    client.flagForInit()

    return nil
}

// DisableOutput ...
func (client *Client) DisableOutput() error {
    return client.SetVariable(DEBUG_FILE, os.DevNull)
}

// SetWhitelist sets whitelist chars.
// See official documentation for whitelist here https://tesseract-ocr.github.io/tessdoc/ImproveQuality#dictionaries-word-lists-and-patterns
func (client *Client) SetWhitelist(whitelist string) error {
    return client.SetVariable(TESSEDIT_CHAR_WHITELIST, whitelist)
}

// SetBlacklist sets blacklist chars.
// See official documentation for blacklist here https://tesseract-ocr.github.io/tessdoc/ImproveQuality#dictionaries-word-lists-and-patterns
func (client *Client) SetBlacklist(blacklist string) error {
    return client.SetVariable(TESSEDIT_CHAR_BLACKLIST, blacklist)
}

// SetVariable sets parameters, representing tesseract::TessBaseAPI->SetVariable.
// See official documentation here https://zdenop.github.io/tesseract-doc/classtesseract_1_1_tess_base_a_p_i.html#a2e09259c558c6d8e0f7e523cbaf5adf5
// Because `api->SetVariable` must be called after `api->Init`, this method cannot detect unexpected key for variables.
// Check `client.setVariablesToInitializedAPI` for more information.
func (client *Client) SetVariable(key SettableVariable, value string) error {
    if client.api == nil {
        return ErrClientNotConstructed
    }
    client.Variables[key] = value

    return client.setVariablesToInitializedAPIIfNeeded()
}

// SetPageSegMode sets "Page Segmentation Mode" (PSM) to detect layout of characters.
// See official documentation for PSM here https://tesseract-ocr.github.io/tessdoc/ImproveQuality#page-segmentation-method
// See https://github.com/otiai10/gosseract/issues/52 for more information.
func (client *Client) SetPageSegMode(mode PageSegMode) error {
    if client.api == nil {
        return ErrClientNotConstructed
    }
    C.SetPageSegMode(client.api, C.int(mode))
    return nil
}

// SetConfigFile sets the file path to config file.
func (client *Client) SetConfigFile(fpath string) error {
    if client.api == nil {
        return ErrClientNotConstructed
    }
    info, err := os.Stat(fpath)
    if err != nil {
        return err
    }
    if info.IsDir() {
        return fmt.Errorf("the specified config file path seems to be a directory")
    }
    client.ConfigFilePath = fpath

    client.flagForInit()

    return nil
}

// SetTessdataPrefix sets path to the models directory.
// Environment variable TESSDATA_PREFIX is used as default.
func (client *Client) SetTessdataPrefix(prefix string) error {
    if client.api == nil {
        return ErrClientNotConstructed
    }
    if prefix == "" {
        return fmt.Errorf("tessdata prefix could not be empty")
    }
    client.TessdataPrefix = prefix
    client.flagForInit()
    return nil
}

// Initialize tesseract::TessBaseAPI
func (client *Client) init() error {

    if !client.shouldInit {
        C.SetPixImage(client.api, client.pixImage)
        return nil
    }

    var languages *C.char
    if len(client.Languages) != 0 {
        languages = C.CString(strings.Join(client.Languages, "+"))
    }
    defer C.free(unsafe.Pointer(languages))

    var configfile *C.char
    if _, err := os.Stat(client.ConfigFilePath); err == nil {
        configfile = C.CString(client.ConfigFilePath)
    }
    defer C.free(unsafe.Pointer(configfile))

    var tessdataPrefix *C.char
    if client.TessdataPrefix != "" {
        tessdataPrefix = C.CString(client.TessdataPrefix)
    }
    defer C.free(unsafe.Pointer(tessdataPrefix))

    errbuf := [512]C.char{}
    res := C.Init(client.api, tessdataPrefix, languages, configfile, &errbuf[0])
    msg := C.GoString(&errbuf[0])

    if res != 0 {
        return fmt.Errorf("failed to initialize TessBaseAPI with code %d: %s", res, msg)
    }

    if err := client.setVariablesToInitializedAPI(); err != nil {
        return err
    }

    if client.pixImage == nil {
        return fmt.Errorf("PixImage is not set, use SetImage or SetImageFromBytes before Text or HOCRText")
    }

    C.SetPixImage(client.api, client.pixImage)

    client.shouldInit = false

    return nil
}

// This method flag the current instance to be initialized again on the next call to a function that
// requires a gosseract API initialized: when user change the config file or the languages
// the instance needs to init a new gosseract api
func (client *Client) flagForInit() {
    client.shouldInit = true
}

// This method sets all the sspecified variables to TessBaseAPI structure.
// Because `api->SetVariable` must be called after `api->Init()`,
// gosseract.Client.SetVariable cannot call `api->SetVariable` directly.
// See https://zdenop.github.io/tesseract-doc/classtesseract_1_1_tess_base_a_p_i.html#a2e09259c558c6d8e0f7e523cbaf5adf5
func (client *Client) setVariablesToInitializedAPI() error {
    for key, value := range client.Variables {
        k, v := C.CString(string(key)), C.CString(value)
        defer C.free(unsafe.Pointer(k))
        defer C.free(unsafe.Pointer(v))
        res := C.SetVariable(client.api, k, v)
        if !bool(res) {
            return fmt.Errorf("failed to set variable with key(%v) and value(%v)", key, value)
        }
    }
    return nil
}

// Call setVariablesToInitializedAPI only if the API is initialized
// it is useful to call when changing variables that does not requires
// to init a new tesseract instance. Otherwise it is better to just flag
// the instance for re-init (Client.flagForInit())
func (client *Client) setVariablesToInitializedAPIIfNeeded() error {
    if !client.shouldInit {
        return client.setVariablesToInitializedAPI()
    }

    return nil
}

// Text finally initialize tesseract::TessBaseAPI, execute OCR and extract text detected as string.
func (client *Client) Text() (out string, err error) {
    if client.api == nil {
        return out, ErrClientNotConstructed
    }
    if err = client.init(); err != nil {
        return
    }
    out = C.GoString(C.UTF8Text(client.api))
    if client.Trim {
        out = strings.Trim(out, "\n")
    }
    return out, err
}

// HOCRText finally initialize tesseract::TessBaseAPI, execute OCR and returns hOCR text.
// See https://en.wikipedia.org/wiki/HOCR for more information of hOCR.
func (client *Client) HOCRText() (out string, err error) {
    if client.api == nil {
        return out, ErrClientNotConstructed
    }
    if err = client.init(); err != nil {
        return
    }
    out = C.GoString(C.HOCRText(client.api))
    return
}

// BoundingBox contains the position, confidence and UTF8 text of the recognized word
type BoundingBox struct {
    Box                                image.Rectangle
    Word                               string
    Confidence                         float64
    BlockNum, ParNum, LineNum, WordNum int
}

// GetBoundingBoxes returns bounding boxes for each matched word
func (client *Client) GetBoundingBoxes(level PageIteratorLevel) (out []BoundingBox, err error) {
    if client.api == nil {
        return out, ErrClientNotConstructed
    }
    if err = client.init(); err != nil {
        return
    }
    boxArray := C.GetBoundingBoxes(client.api, C.int(level))
    length := int(boxArray.length)
    defer C.free(unsafe.Pointer(boxArray.boxes))
    defer C.free(unsafe.Pointer(boxArray))
    out = make([]BoundingBox, 0, length)
    for i := 0; i < length; i++ {
        // cast to bounding_box: boxes + i*sizeof(box)
        box := (*C.struct_bounding_box)(unsafe.Pointer(uintptr(unsafe.Pointer(boxArray.boxes)) + uintptr(i)*unsafe.Sizeof(C.struct_bounding_box{})))
        out = append(out, BoundingBox{
            Box:        image.Rect(int(box.x1), int(box.y1), int(box.x2), int(box.y2)),
            Word:       C.GoString(box.word),
            Confidence: float64(box.confidence),
        })
    }

    return
}

// GetAvailableLanguages returns a list of available languages in the default tesspath
func GetAvailableLanguages() ([]string, error) {
    languages, err := filepath.Glob(filepath.Join(getDataPath(), "*.traineddata"))
    if err != nil {
        return languages, err
    }
    for i := 0; i < len(languages); i++ {
        languages[i] = filepath.Base(languages[i])
        idx := strings.Index(languages[i], ".")
        languages[i] = languages[i][:idx]
    }
    return languages, nil
}

// GetBoundingBoxesVerbose returns bounding boxes at word level with block_num, par_num, line_num and word_num
// according to the c++ api that returns a formatted TSV output. Reference: `TessBaseAPI::GetTSVText`.
func (client *Client) GetBoundingBoxesVerbose() (out []BoundingBox, err error) {
    if client.api == nil {
        return out, ErrClientNotConstructed
    }
    if err = client.init(); err != nil {
        return
    }
    boxArray := C.GetBoundingBoxesVerbose(client.api)
    length := int(boxArray.length)
    defer C.free(unsafe.Pointer(boxArray.boxes))
    defer C.free(unsafe.Pointer(boxArray))
    out = make([]BoundingBox, 0, length)
    for i := 0; i < length; i++ {
        // cast to bounding_box: boxes + i*sizeof(box)
        box := (*C.struct_bounding_box)(unsafe.Pointer(uintptr(unsafe.Pointer(boxArray.boxes)) + uintptr(i)*unsafe.Sizeof(C.struct_bounding_box{})))
        out = append(out, BoundingBox{
            Box:        image.Rect(int(box.x1), int(box.y1), int(box.x2), int(box.y2)),
            Word:       C.GoString(box.word),
            Confidence: float64(box.confidence),
            BlockNum:   int(box.block_num),
            ParNum:     int(box.par_num),
            LineNum:    int(box.line_num),
            WordNum:    int(box.word_num),
        })
    }
    return
}

// getDataPath is useful hepler to determine where current tesseract
// installation stores trained models
func getDataPath() string {
    return C.GoString(C.GetDataPath())
}