third-party/leptonica/src/pageseg.c
/*====================================================================*
- Copyright (C) 2001 Leptonica. All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
- 1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- 2. Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following
- disclaimer in the documentation and/or other materials
- provided with the distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*====================================================================*/
/*
* pageseg.c
*
* Top level page segmentation
* l_int32 pixGetRegionsBinary()
*
* Halftone region extraction
* PIX *pixGenHalftoneMask()
*
* Textline extraction
* PIX *pixGenTextlineMask()
*
* Textblock extraction
* PIX *pixGenTextblockMask()
*
* Location of page foreground
* PIX *pixFindPageForeground()
*
* Extraction of characters from image with only text
* l_int32 pixSplitIntoCharacters()
* BOXA *pixSplitComponentWithProfile()
*/
#include "allheaders.h"
/*------------------------------------------------------------------*
* Top level page segmentation *
*------------------------------------------------------------------*/
/*!
* pixGetRegionsBinary()
*
* Input: pixs (1 bpp, assumed to be 300 to 400 ppi)
* &pixhm (<optional return> halftone mask)
* &pixtm (<optional return> textline mask)
* &pixtb (<optional return> textblock mask)
* debug (flag: set to 1 for debug output)
* Return: 0 if OK, 1 on error
*
* Notes:
* (1) It is best to deskew the image before segmenting.
* (2) The debug flag enables a number of outputs. These
* are included to show how to generate and save/display
* these results.
*/
l_int32
pixGetRegionsBinary(PIX *pixs,
PIX **ppixhm,
PIX **ppixtm,
PIX **ppixtb,
l_int32 debug)
{
l_int32 htfound, tlfound;
PIX *pixr, *pixt1, *pixt2;
PIX *pixtext; /* text pixels only */
PIX *pixhm2; /* halftone mask; 2x reduction */
PIX *pixhm; /* halftone mask; */
PIX *pixtm2; /* textline mask; 2x reduction */
PIX *pixtm; /* textline mask */
PIX *pixvws; /* vertical white space mask */
PIX *pixtb2; /* textblock mask; 2x reduction */
PIX *pixtbf2; /* textblock mask; 2x reduction; small comps filtered */
PIX *pixtb; /* textblock mask */
PROCNAME("pixGetRegionsBinary");
if (ppixhm) *ppixhm = NULL;
if (ppixtm) *ppixtm = NULL;
if (ppixtb) *ppixtb = NULL;
if (!pixs)
return ERROR_INT("pixs not defined", procName, 1);
if (pixGetDepth(pixs) != 1)
return ERROR_INT("pixs not 1 bpp", procName, 1);
/* 2x reduce, to 150 -200 ppi */
pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0);
pixDisplayWrite(pixr, debug);
/* Get the halftone mask */
pixhm2 = pixGenHalftoneMask(pixr, &pixtext, &htfound, debug);
/* Get the textline mask from the text pixels */
pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, debug);
/* Get the textblock mask from the textline mask */
pixtb2 = pixGenTextblockMask(pixtm2, pixvws, debug);
pixDestroy(&pixr);
pixDestroy(&pixtext);
pixDestroy(&pixvws);
/* Remove small components from the mask, where a small
* component is defined as one with both width and height < 60 */
pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER,
L_SELECT_IF_GTE, NULL);
pixDestroy(&pixtb2);
pixDisplayWriteFormat(pixtbf2, debug, IFF_PNG);
/* Expand all masks to full resolution, and do filling or
* small dilations for better coverage. */
pixhm = pixExpandReplicate(pixhm2, 2);
pixt1 = pixSeedfillBinary(NULL, pixhm, pixs, 8);
pixOr(pixhm, pixhm, pixt1);
pixDestroy(&pixt1);
pixDisplayWriteFormat(pixhm, debug, IFF_PNG);
pixt1 = pixExpandReplicate(pixtm2, 2);
pixtm = pixDilateBrick(NULL, pixt1, 3, 3);
pixDestroy(&pixt1);
pixDisplayWriteFormat(pixtm, debug, IFF_PNG);
pixt1 = pixExpandReplicate(pixtbf2, 2);
pixtb = pixDilateBrick(NULL, pixt1, 3, 3);
pixDestroy(&pixt1);
pixDisplayWriteFormat(pixtb, debug, IFF_PNG);
pixDestroy(&pixhm2);
pixDestroy(&pixtm2);
pixDestroy(&pixtbf2);
/* Debug: identify objects that are neither text nor halftone image */
if (debug) {
pixt1 = pixSubtract(NULL, pixs, pixtm); /* remove text pixels */
pixt2 = pixSubtract(NULL, pixt1, pixhm); /* remove halftone pixels */
pixDisplayWriteFormat(pixt2, 1, IFF_PNG);
pixDestroy(&pixt1);
pixDestroy(&pixt2);
}
/* Debug: display textline components with random colors */
if (debug) {
l_int32 w, h;
BOXA *boxa;
PIXA *pixa;
boxa = pixConnComp(pixtm, &pixa, 8);
pixGetDimensions(pixtm, &w, &h, NULL);
pixt1 = pixaDisplayRandomCmap(pixa, w, h);
pixcmapResetColor(pixGetColormap(pixt1), 0, 255, 255, 255);
pixDisplay(pixt1, 100, 100);
pixDisplayWriteFormat(pixt1, 1, IFF_PNG);
pixaDestroy(&pixa);
boxaDestroy(&boxa);
pixDestroy(&pixt1);
}
/* Debug: identify the outlines of each textblock */
if (debug) {
PIXCMAP *cmap;
PTAA *ptaa;
ptaa = pixGetOuterBordersPtaa(pixtb);
lept_mkdir("pageseg");
ptaaWrite("/tmp/pageseg/tb_outlines.ptaa", ptaa, 1);
pixt1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1);
cmap = pixGetColormap(pixt1);
pixcmapResetColor(cmap, 0, 130, 130, 130);
pixDisplay(pixt1, 500, 100);
pixDisplayWriteFormat(pixt1, 1, IFF_PNG);
pixDestroy(&pixt1);
ptaaDestroy(&ptaa);
}
/* Debug: get b.b. for all mask components */
if (debug) {
BOXA *bahm, *batm, *batb;
bahm = pixConnComp(pixhm, NULL, 4);
batm = pixConnComp(pixtm, NULL, 4);
batb = pixConnComp(pixtb, NULL, 4);
boxaWrite("/tmp/pageseg/htmask.boxa", bahm);
boxaWrite("/tmp/pageseg/textmask.boxa", batm);
boxaWrite("/tmp/pageseg/textblock.boxa", batb);
boxaDestroy(&bahm);
boxaDestroy(&batm);
boxaDestroy(&batb);
}
if (ppixhm)
*ppixhm = pixhm;
else
pixDestroy(&pixhm);
if (ppixtm)
*ppixtm = pixtm;
else
pixDestroy(&pixtm);
if (ppixtb)
*ppixtb = pixtb;
else
pixDestroy(&pixtb);
return 0;
}
/*------------------------------------------------------------------*
* Halftone region extraction *
*------------------------------------------------------------------*/
/*!
* pixGenHalftoneMask()
*
* Input: pixs (1 bpp, assumed to be 150 to 200 ppi)
* &pixtext (<optional return> text part of pixs)
* &htfound (<optional return> 1 if the mask is not empty)
* debug (flag: 1 for debug output)
* Return: pixd (halftone mask), or null on error
*/
PIX *
pixGenHalftoneMask(PIX *pixs,
PIX **ppixtext,
l_int32 *phtfound,
l_int32 debug)
{
l_int32 empty;
PIX *pixt1, *pixt2, *pixhs, *pixhm, *pixd;
PROCNAME("pixGenHalftoneMask");
if (ppixtext) *ppixtext = NULL;
if (!pixs)
return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
if (pixGetDepth(pixs) != 1)
return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
/* Compute seed for halftone parts at 8x reduction */
pixt1 = pixReduceRankBinaryCascade(pixs, 4, 4, 3, 0);
pixt2 = pixOpenBrick(NULL, pixt1, 5, 5);
pixhs = pixExpandReplicate(pixt2, 8); /* back to 2x reduction */
pixDestroy(&pixt1);
pixDestroy(&pixt2);
pixDisplayWriteFormat(pixhs, debug, IFF_PNG);
/* Compute mask for connected regions */
pixhm = pixCloseSafeBrick(NULL, pixs, 4, 4);
pixDisplayWriteFormat(pixhm, debug, IFF_PNG);
/* Fill seed into mask to get halftone mask */
pixd = pixSeedfillBinary(NULL, pixhs, pixhm, 4);
#if 0
/* Moderate opening to remove thin lines, etc. */
pixOpenBrick(pixd, pixd, 10, 10);
pixDisplayWrite(pixd, debug);
#endif
/* Check if mask is empty */
pixZero(pixd, &empty);
if (phtfound) {
*phtfound = 0;
if (!empty)
*phtfound = 1;
}
/* Optionally, get all pixels that are not under the halftone mask */
if (ppixtext) {
if (empty)
*ppixtext = pixCopy(NULL, pixs);
else
*ppixtext = pixSubtract(NULL, pixs, pixd);
pixDisplayWriteFormat(*ppixtext, debug, IFF_PNG);
}
pixDestroy(&pixhs);
pixDestroy(&pixhm);
return pixd;
}
/*------------------------------------------------------------------*
* Textline extraction *
*------------------------------------------------------------------*/
/*!
* pixGenTextlineMask()
*
* Input: pixs (1 bpp, assumed to be 150 to 200 ppi)
* &pixvws (<return> vertical whitespace mask)
* &tlfound (<optional return> 1 if the mask is not empty)
* debug (flag: 1 for debug output)
* Return: pixd (textline mask), or null on error
*
* Notes:
* (1) The input pixs should be deskewed.
* (2) pixs should have no halftone pixels.
* (3) Both the input image and the returned textline mask
* are at the same resolution.
*/
PIX *
pixGenTextlineMask(PIX *pixs,
PIX **ppixvws,
l_int32 *ptlfound,
l_int32 debug)
{
l_int32 empty;
PIX *pixt1, *pixt2, *pixvws, *pixd;
PROCNAME("pixGenTextlineMask");
if (!pixs)
return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
if (!ppixvws)
return (PIX *)ERROR_PTR("&pixvws not defined", procName, NULL);
if (pixGetDepth(pixs) != 1)
return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
/* First we need a vertical whitespace mask. Invert the image. */
pixt1 = pixInvert(NULL, pixs);
/* The whitespace mask will break textlines where there
* is a large amount of white space below or above.
* This can be prevented by identifying regions of the
* inverted image that have large horizontal extent (bigger than
* the separation between columns) and significant
* vertical extent (bigger than the separation between
* textlines), and subtracting this from the bg. */
pixt2 = pixMorphCompSequence(pixt1, "o80.60", 0);
pixSubtract(pixt1, pixt1, pixt2);
pixDisplayWriteFormat(pixt1, debug, IFF_PNG);
pixDestroy(&pixt2);
/* Identify vertical whitespace by opening the remaining bg.
* o5.1 removes thin vertical bg lines and o1.200 extracts
* long vertical bg lines. */
pixvws = pixMorphCompSequence(pixt1, "o5.1 + o1.200", 0);
*ppixvws = pixvws;
pixDisplayWriteFormat(pixvws, debug, IFF_PNG);
pixDestroy(&pixt1);
/* Three steps to getting text line mask:
* (1) close the characters and words in the textlines
* (2) open the vertical whitespace corridors back up
* (3) small opening to remove noise */
pixt1 = pixCloseSafeBrick(NULL, pixs, 30, 1);
pixDisplayWrite(pixt1, debug);
pixd = pixSubtract(NULL, pixt1, pixvws);
pixOpenBrick(pixd, pixd, 3, 3);
pixDisplayWriteFormat(pixd, debug, IFF_PNG);
pixDestroy(&pixt1);
/* Check if text line mask is empty */
if (ptlfound) {
*ptlfound = 0;
pixZero(pixd, &empty);
if (!empty)
*ptlfound = 1;
}
return pixd;
}
/*------------------------------------------------------------------*
* Textblock extraction *
*------------------------------------------------------------------*/
/*!
* pixGenTextblockMask()
*
* Input: pixs (1 bpp, textline mask, assumed to be 150 to 200 ppi)
* pixvws (vertical white space mask)
* debug (flag: 1 for debug output)
* Return: pixd (textblock mask), or null on error
*
* Notes:
* (1) Both the input masks (textline and vertical white space) and
* the returned textblock mask are at the same resolution.
* (2) The result is somewhat noisy, in that small "blocks" of
* text may be included. These can be removed by post-processing,
* using, e.g.,
* pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER,
* L_SELECT_IF_GTE, NULL);
*/
PIX *
pixGenTextblockMask(PIX *pixs,
PIX *pixvws,
l_int32 debug)
{
PIX *pixt1, *pixt2, *pixt3, *pixd;
PROCNAME("pixGenTextblockMask");
if (!pixs)
return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
if (!pixvws)
return (PIX *)ERROR_PTR("pixvws not defined", procName, NULL);
if (pixGetDepth(pixs) != 1)
return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
/* Join pixels vertically to make a textblock mask */
pixt1 = pixMorphSequence(pixs, "c1.10 + o4.1", 0);
pixDisplayWriteFormat(pixt1, debug, IFF_PNG);
/* Solidify the textblock mask and remove noise:
* (1) For each cc, close the blocks and dilate slightly
* to form a solid mask.
* (2) Small horizontal closing between components.
* (3) Open the white space between columns, again.
* (4) Remove small components. */
pixt2 = pixMorphSequenceByComponent(pixt1, "c30.30 + d3.3", 8, 0, 0, NULL);
pixCloseSafeBrick(pixt2, pixt2, 10, 1);
pixDisplayWriteFormat(pixt2, debug, IFF_PNG);
pixt3 = pixSubtract(NULL, pixt2, pixvws);
pixDisplayWriteFormat(pixt3, debug, IFF_PNG);
pixd = pixSelectBySize(pixt3, 25, 5, 8, L_SELECT_IF_BOTH,
L_SELECT_IF_GTE, NULL);
pixDisplayWriteFormat(pixd, debug, IFF_PNG);
pixDestroy(&pixt1);
pixDestroy(&pixt2);
pixDestroy(&pixt3);
return pixd;
}
/*------------------------------------------------------------------*
* Location of page foreground *
*------------------------------------------------------------------*/
/*!
* pixFindPageForeground()
*
* Input: pixs (full resolution (any type or depth)
* threshold (for binarization; typically about 128)
* mindist (min distance of text from border to allow
* cleaning near border; at 2x reduction, this
* should be larger than 50; typically about 70)
* erasedist (when conditions are satisfied, erase anything
* within this distance of the edge;
* typically 30 at 2x reduction)
* pagenum (use for debugging when called repeatedly; labels
* debug images that are assembled into pdfdir)
* showmorph (set to a negative integer to show steps in
* generating masks; this is typically used
* for debugging region extraction)
* display (set to 1 to display mask and selected region
* for debugging a single page)
* pdfdir (subdirectory of /tmp where images showing the
* result are placed when called repeatedly; use
* null if no output requested)
* Return: box (region including foreground, with some pixel noise
* removed), or null if not found
*
* Notes:
* (1) This doesn't simply crop to the fg. It attempts to remove
* pixel noise and junk at the edge of the image before cropping.
* The input @threshold is used if pixs is not 1 bpp.
* (2) There are several debugging options, determined by the
* last 4 arguments.
* (3) If you want pdf output of results when called repeatedly,
* the pagenum arg labels the images written, which go into
* /tmp/<pdfdir>/<pagenum>.png. In that case,
* you would clean out the /tmp directory before calling this
* function on each page:
* lept_rmdir(pdfdir);
* lept_mkdir(pdfdir);
*/
BOX *
pixFindPageForeground(PIX *pixs,
l_int32 threshold,
l_int32 mindist,
l_int32 erasedist,
l_int32 pagenum,
l_int32 showmorph,
l_int32 display,
const char *pdfdir)
{
char buf[64];
l_int32 flag, nbox, intersects;
l_int32 w, h, bx, by, bw, bh, left, right, top, bottom;
PIX *pixb, *pixb2, *pixseed, *pixsf, *pixm, *pix1, *pixg2;
BOX *box, *boxfg, *boxin, *boxd;
BOXA *ba1, *ba2;
PROCNAME("pixFindPageForeground");
if (!pixs)
return (BOX *)ERROR_PTR("pixs not defined", procName, NULL);
/* Binarize, downscale by 0.5, remove the noise to generate a seed,
* and do a seedfill back from the seed into those 8-connected
* components of the binarized image for which there was at least
* one seed pixel. Also clear out any components that are within
* 10 pixels of the edge at 2x reduction. */
flag = (showmorph) ? -1 : 0; /* if showmorph == -1, write intermediate
* images to /tmp/seq_output_1.pdf */
pixb = pixConvertTo1(pixs, threshold);
pixb2 = pixScale(pixb, 0.5, 0.5);
pixseed = pixMorphSequence(pixb2, "o1.2 + c9.9 + o3.5", flag);
pixsf = pixSeedfillBinary(NULL, pixseed, pixb2, 8);
pixSetOrClearBorder(pixsf, 10, 10, 10, 10, PIX_SET);
pixm = pixRemoveBorderConnComps(pixsf, 8);
if (display) pixDisplay(pixm, 100, 100);
/* Now, where is the main block of text? We want to remove noise near
* the edge of the image, but to do that, we have to be convinced that
* (1) there is noise and (2) it is far enough from the text block
* and close enough to the edge. For each edge, if the block
* is more than mindist from that edge, then clean 'erasedist'
* pixels from the edge. */
pix1 = pixMorphSequence(pixm, "c50.50", flag - 1);
ba1 = pixConnComp(pix1, NULL, 8);
ba2 = boxaSort(ba1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL);
pixGetDimensions(pix1, &w, &h, NULL);
nbox = boxaGetCount(ba2);
if (nbox > 1) {
box = boxaGetBox(ba2, 0, L_CLONE);
boxGetGeometry(box, &bx, &by, &bw, &bh);
left = (bx > mindist) ? erasedist : 0;
right = (w - bx - bw > mindist) ? erasedist : 0;
top = (by > mindist) ? erasedist : 0;
bottom = (h - by - bh > mindist) ? erasedist : 0;
pixSetOrClearBorder(pixm, left, right, top, bottom, PIX_CLR);
boxDestroy(&box);
}
pixDestroy(&pix1);
boxaDestroy(&ba1);
boxaDestroy(&ba2);
/* Locate the foreground region; don't bother cropping */
pixClipToForeground(pixm, NULL, &boxfg);
/* Sanity check the fg region. Make sure it's not confined
* to a thin boundary on the left and right sides of the image,
* in which case it is likely to be noise. */
if (boxfg) {
boxin = boxCreate(0.1 * w, 0, 0.8 * w, h);
boxIntersects(boxfg, boxin, &intersects);
if (!intersects) {
L_INFO("found only noise on page %d\n", procName, pagenum);
boxDestroy(&boxfg);
}
boxDestroy(&boxin);
}
boxd = NULL;
if (!boxfg) {
L_INFO("no fg region found for page %d\n", procName, pagenum);
} else {
boxAdjustSides(boxfg, boxfg, -2, 2, -2, 2); /* tiny expansion */
boxd = boxTransform(boxfg, 0, 0, 2.0, 2.0);
/* Write image showing box for this page. This is to be
* bundled up into a pdf of all the pages, which can be
* generated by convertFilesToPdf() */
if (pdfdir) {
pixg2 = pixConvert1To4Cmap(pixb);
pixRenderBoxArb(pixg2, boxd, 3, 255, 0, 0);
snprintf(buf, sizeof(buf), "/tmp/%s/%05d.png", pdfdir, pagenum);
if (display) pixDisplay(pixg2, 700, 100);
pixWrite(buf, pixg2, IFF_PNG);
pixDestroy(&pixg2);
}
}
pixDestroy(&pixb);
pixDestroy(&pixb2);
pixDestroy(&pixseed);
pixDestroy(&pixsf);
pixDestroy(&pixm);
boxDestroy(&boxfg);
return boxd;
}
/*------------------------------------------------------------------*
* Extraction of characters from image with only text *
*------------------------------------------------------------------*/
/*!
* pixSplitIntoCharacters()
*
* Input: pixs (1 bpp, contains only deskewed text)
* minw (minimum component width for initial filtering; typ. 4)
* minh (minimum component height for initial filtering; typ. 4)
* &boxa (<optional return> character bounding boxes)
* &pixa (<optional return> character images)
* &pixdebug (<optional return> showing splittings)
*
* Return: 0 if OK, 1 on error
*
* Notes:
* (1) This is a simple function that attempts to find split points
* based on vertical pixel profiles.
* (2) It should be given an image that has an arbitrary number
* of text characters.
* (3) The returned pixa includes the boxes from which the
* (possibly split) components are extracted.
*/
l_int32
pixSplitIntoCharacters(PIX *pixs,
l_int32 minw,
l_int32 minh,
BOXA **pboxa,
PIXA **ppixa,
PIX **ppixdebug)
{
l_int32 ncomp, i, xoff, yoff;
BOXA *boxa1, *boxa2, *boxat1, *boxat2, *boxad;
BOXAA *baa;
PIX *pix, *pix1, *pix2, *pixdb;
PIXA *pixa1, *pixadb;
PROCNAME("pixSplitIntoCharacters");
if (pboxa) *pboxa = NULL;
if (ppixa) *ppixa = NULL;
if (ppixdebug) *ppixdebug = NULL;
if (!pixs || pixGetDepth(pixs) != 1)
return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
/* Remove the small stuff */
pix1 = pixSelectBySize(pixs, minw, minh, 8, L_SELECT_IF_BOTH,
L_SELECT_IF_GT, NULL);
/* Small vertical close for consolidation */
pix2 = pixMorphSequence(pix1, "c1.10", 0);
pixDestroy(&pix1);
/* Get the 8-connected components */
boxa1 = pixConnComp(pix2, &pixa1, 8);
pixDestroy(&pix2);
boxaDestroy(&boxa1);
/* Split the components if obvious */
ncomp = pixaGetCount(pixa1);
boxa2 = boxaCreate(ncomp);
pixadb = (ppixdebug) ? pixaCreate(ncomp) : NULL;
for (i = 0; i < ncomp; i++) {
pix = pixaGetPix(pixa1, i, L_CLONE);
if (ppixdebug) {
boxat1 = pixSplitComponentWithProfile(pix, 10, 7, &pixdb);
if (pixdb)
pixaAddPix(pixadb, pixdb, L_INSERT);
} else {
boxat1 = pixSplitComponentWithProfile(pix, 10, 7, NULL);
}
pixaGetBoxGeometry(pixa1, i, &xoff, &yoff, NULL, NULL);
boxat2 = boxaTransform(boxat1, xoff, yoff, 1.0, 1.0);
boxaJoin(boxa2, boxat2, 0, -1);
pixDestroy(&pix);
boxaDestroy(&boxat1);
boxaDestroy(&boxat2);
}
pixaDestroy(&pixa1);
/* Generate the debug image */
if (ppixdebug) {
if (pixaGetCount(pixadb) > 0) {
*ppixdebug = pixaDisplayTiledInRows(pixadb, 32, 1500,
1.0, 0, 20, 1);
}
pixaDestroy(&pixadb);
}
/* Do a 2D sort on the bounding boxes, and flatten the result to 1D */
baa = boxaSort2d(boxa2, NULL, 0, 0, 5);
boxad = boxaaFlattenToBoxa(baa, NULL, L_CLONE);
boxaaDestroy(&baa);
boxaDestroy(&boxa2);
/* Optionally extract the pieces from the input image */
if (ppixa)
*ppixa = pixClipRectangles(pixs, boxad);
if (pboxa)
*pboxa = boxad;
else
boxaDestroy(&boxad);
return 0;
}
/*!
* pixSplitComponentWithProfile()
*
* Input: pixs (1 bpp, exactly one connected component)
* delta (distance used in extrema finding in a numa; typ. 10)
* mindel (minimum required difference between profile minimum
* and profile values +2 and -2 away; typ. 7)
* &pixdebug (<optional return> debug image of splitting)
* Return: boxa (of c.c. after splitting), or null on error
*
* Notes:
* (1) This will split the most obvious cases of touching characters.
* The split points it is searching for are narrow and deep
* minimima in the vertical pixel projection profile, after a
* large vertical closing has been applied to the component.
*/
BOXA *
pixSplitComponentWithProfile(PIX *pixs,
l_int32 delta,
l_int32 mindel,
PIX **ppixdebug)
{
l_int32 w, h, n2, i, firstmin, xmin, xshift;
l_int32 nmin, nleft, nright, nsplit, isplit, ncomp;
l_int32 *array1, *array2;
BOX *box;
BOXA *boxad;
NUMA *na1, *na2, *nasplit;
PIX *pix1, *pixdb;
PROCNAME("pixSplitComponentsWithProfile");
if (ppixdebug) *ppixdebug = NULL;
if (!pixs || pixGetDepth(pixs) != 1)
return (BOXA *)ERROR_PTR("pixa undefined or not 1 bpp", procName, NULL);
pixGetDimensions(pixs, &w, &h, NULL);
/* Closing to consolidate characters vertically */
pix1 = pixCloseSafeBrick(NULL, pixs, 1, 100);
/* Get extrema of column projections */
boxad = boxaCreate(2);
na1 = pixCountPixelsByColumn(pix1); /* w elements */
pixDestroy(&pix1);
na2 = numaFindExtrema(na1, delta);
n2 = numaGetCount(na2);
if (n2 < 3) { /* no split possible */
box = boxCreate(0, 0, w, h);
boxaAddBox(boxad, box, L_INSERT);
numaDestroy(&na1);
numaDestroy(&na2);
return boxad;
}
/* Look for sufficiently deep and narrow minima.
* All minima of of interest must be surrounded by max on each
* side. firstmin is the index of first possible minimum. */
array1 = numaGetIArray(na1);
array2 = numaGetIArray(na2);
if (ppixdebug) numaWriteStream(stderr, na2);
firstmin = (array1[array2[0]] > array1[array2[1]]) ? 1 : 2;
nasplit = numaCreate(n2); /* will hold split locations */
for (i = firstmin; i < n2 - 1; i+= 2) {
xmin = array2[i];
nmin = array1[xmin];
if (xmin + 2 >= w) break; /* no more splits possible */
nleft = array1[xmin - 2];
nright = array1[xmin + 2];
if (ppixdebug) {
fprintf(stderr,
"Splitting: xmin = %d, w = %d; nl = %d, nmin = %d, nr = %d\n",
xmin, w, nleft, nmin, nright);
}
if (nleft - nmin >= mindel && nright - nmin >= mindel) /* split */
numaAddNumber(nasplit, xmin);
}
nsplit = numaGetCount(nasplit);
#if 0
if (ppixdebug && nsplit > 0)
gplotSimple1(na1, GPLOT_X11, "/tmp/splitroot", NULL);
#endif
numaDestroy(&na1);
numaDestroy(&na2);
FREE(array1);
FREE(array2);
if (nsplit == 0) { /* no splitting */
box = boxCreate(0, 0, w, h);
boxaAddBox(boxad, box, L_INSERT);
return boxad;
}
/* Use split points to generate b.b. after splitting */
for (i = 0, xshift = 0; i < nsplit; i++) {
numaGetIValue(nasplit, i, &isplit);
box = boxCreate(xshift, 0, isplit - xshift, h);
boxaAddBox(boxad, box, L_INSERT);
xshift = isplit + 1;
}
box = boxCreate(xshift, 0, w - xshift, h);
boxaAddBox(boxad, box, L_INSERT);
numaDestroy(&nasplit);
if (ppixdebug) {
pixdb = pixConvertTo32(pixs);
ncomp = boxaGetCount(boxad);
for (i = 0; i < ncomp; i++) {
box = boxaGetBox(boxad, i, L_CLONE);
pixRenderBoxBlend(pixdb, box, 1, 255, 0, 0, 0.5);
boxDestroy(&box);
}
*ppixdebug = pixdb;
}
return boxad;
}