pranavjha/text-detector

View on GitHub
third-party/leptonica/src/selgen.c

Summary

Maintainability
Test Coverage
/*====================================================================*
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
 -
 -  Redistribution and use in source and binary forms, with or without
 -  modification, are permitted provided that the following conditions
 -  are met:
 -  1. Redistributions of source code must retain the above copyright
 -     notice, this list of conditions and the following disclaimer.
 -  2. Redistributions in binary form must reproduce the above
 -     copyright notice, this list of conditions and the following
 -     disclaimer in the documentation and/or other materials
 -     provided with the distribution.
 -
 -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
 -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *====================================================================*/

/*
 *  selgen.c
 *
 *      This file contains functions that generate hit-miss Sels
 *      for doing a loose match to a small bitmap.  The hit-miss
 *      Sel is made from a given bitmap.  Several "knobs"
 *      are available to control the looseness of the match.
 *      In general, a tight match will have fewer false positives
 *      (bad matches) but more false negatives (missed patterns).
 *      The values to be used depend on the quality and variation
 *      of the image in which the pattern is to be searched,
 *      and the relative penalties of false positives and
 *      false negatives.  Default values for the three knobs --
 *      minimum distance to boundary pixels, number of extra pixels
 *      added to selected sides, and minimum acceptable runlength
 *      in eroded version -- are provided.
 *
 *      The generated hit-miss Sels can always be used in the
 *      rasterop implementation of binary morphology (in morph.h).
 *      If they are small enough (not more than 31 pixels extending
 *      in any direction from the Sel origin), they can also be used
 *      to auto-generate dwa code (fmorphauto.c).
 *
 *
 *      Generate a subsampled structuring element
 *            SEL     *pixGenerateSelWithRuns()
 *            SEL     *pixGenerateSelRandom()
 *            SEL     *pixGenerateSelBoundary()
 *
 *      Accumulate data on runs along lines
 *            NUMA    *pixGetRunCentersOnLine()
 *            NUMA    *pixGetRunsOnLine()
 *
 *      Subsample boundary pixels in relatively ordered way
 *            PTA     *pixSubsampleBoundaryPixels()
 *            PTA     *adjacentOnPixelInRaster()
 *
 *      Display generated sel with originating image
 *            PIX     *pixDisplayHitMissSel()
 */

#include "allheaders.h"


    /* default minimum distance of a hit-miss pixel element to
     * a boundary pixel of its color. */
static const l_int32  DEFAULT_DISTANCE_TO_BOUNDARY = 1;
static const l_int32  MAX_DISTANCE_TO_BOUNDARY = 4;

    /* default min runlength to accept a hit or miss element located
     * at its center */
static const l_int32  DEFAULT_MIN_RUNLENGTH = 3;


    /* default scalefactor for displaying image and hit-miss sel
     * that is derived from it */
static const l_int32  DEFAULT_SEL_SCALEFACTOR = 7;
static const l_int32  MAX_SEL_SCALEFACTOR = 31;  /* should be big enough */

#ifndef  NO_CONSOLE_IO
#define  DEBUG_DISPLAY_HM_SEL   0
#endif  /* ~NO_CONSOLE_IO */


/*-----------------------------------------------------------------*
 *           Generate a subsampled structuring element             *
 *-----------------------------------------------------------------*/
/*!
 *  pixGenerateSelWithRuns()
 *
 *      Input:  pix (1 bpp, typically small, to be used as a pattern)
 *              nhlines (number of hor lines along which elements are found)
 *              nvlines (number of vert lines along which elements are found)
 *              distance (min distance from boundary pixel; use 0 for default)
 *              minlength (min runlength to set hit or miss; use 0 for default)
 *              toppix (number of extra pixels of bg added above)
 *              botpix (number of extra pixels of bg added below)
 *              leftpix (number of extra pixels of bg added to left)
 *              rightpix (number of extra pixels of bg added to right)
 *              &pixe (<optional return> input pix expanded by extra pixels)
 *      Return: sel (hit-miss for input pattern), or null on error
 *
 *  Notes:
 *    (1) The horizontal and vertical lines along which elements are
 *        selected are roughly equally spaced.  The actual locations of
 *        the hits and misses are the centers of respective run-lengths.
 *    (2) No elements are selected that are less than 'distance' pixels away
 *        from a boundary pixel of the same color.  This makes the
 *        match much more robust to edge noise.  Valid inputs of
 *        'distance' are 0, 1, 2, 3 and 4.  If distance is either 0 or
 *        greater than 4, we reset it to the default value.
 *    (3) The 4 numbers for adding rectangles of pixels outside the fg
 *        can be use if the pattern is expected to be surrounded by bg
 *        (white) pixels.  On the other hand, if the pattern may be near
 *        other fg (black) components on some sides, use 0 for those sides.
 *    (4) The pixels added to a side allow you to have miss elements there.
 *        There is a constraint between distance, minlength, and
 *        the added pixels for this to work.  We illustrate using the
 *        default values.  If you add 5 pixels to the top, and use a
 *        distance of 1, then you end up with a vertical run of at least
 *        4 bg pixels along the top edge of the image.  If you use a
 *        minimum runlength of 3, each vertical line will always find
 *        a miss near the center of its run.  However, if you use a
 *        minimum runlength of 5, you will not get a miss on every vertical
 *        line.  As another example, if you have 7 added pixels and a
 *        distance of 2, you can use a runlength up to 5 to guarantee
 *        that the miss element is recorded.  We give a warning if the
 *        contraint does not guarantee a miss element outside the
 *        image proper.
 *    (5) The input pix, as extended by the extra pixels on selected sides,
 *        can optionally be returned.  For debugging, call
 *        pixDisplayHitMissSel() to visualize the hit-miss sel superimposed
 *        on the generating bitmap.
 */
SEL *
pixGenerateSelWithRuns(PIX     *pixs,
                       l_int32  nhlines,
                       l_int32  nvlines,
                       l_int32  distance,
                       l_int32  minlength,
                       l_int32  toppix,
                       l_int32  botpix,
                       l_int32  leftpix,
                       l_int32  rightpix,
                       PIX    **ppixe)
{
l_int32    ws, hs, w, h, x, y, xval, yval, i, j, nh, nm;
l_float32  delh, delw;
NUMA      *nah, *nam;
PIX       *pixt1, *pixt2, *pixfg, *pixbg;
PTA       *ptah, *ptam;
SEL       *seld, *sel;

    PROCNAME("pixGenerateSelWithRuns");

    if (ppixe) *ppixe = NULL;
    if (!pixs)
        return (SEL *)ERROR_PTR("pixs not defined", procName, NULL);
    if (pixGetDepth(pixs) != 1)
        return (SEL *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
    if (nhlines < 1 && nvlines < 1)
        return (SEL *)ERROR_PTR("nvlines and nhlines both < 1", procName, NULL);

    if (distance <= 0)
        distance = DEFAULT_DISTANCE_TO_BOUNDARY;
    if (minlength <= 0)
        minlength = DEFAULT_MIN_RUNLENGTH;
    if (distance > MAX_DISTANCE_TO_BOUNDARY) {
        L_WARNING("distance too large; setting to max value\n", procName);
        distance = MAX_DISTANCE_TO_BOUNDARY;
    }

        /* Locate the foreground */
    pixClipToForeground(pixs, &pixt1, NULL);
    if (!pixt1)
        return (SEL *)ERROR_PTR("pixt1 not made", procName, NULL);
    ws = pixGetWidth(pixt1);
    hs = pixGetHeight(pixt1);
    w = ws;
    h = hs;

        /* Crop out a region including the foreground, and add pixels
         * on sides depending on the side flags */
    if (toppix || botpix || leftpix || rightpix) {
        x = y = 0;
        if (toppix) {
            h += toppix;
            y = toppix;
            if (toppix < distance + minlength)
                L_WARNING("no miss elements in added top pixels\n", procName);
        }
        if (botpix) {
            h += botpix;
            if (botpix < distance + minlength)
                L_WARNING("no miss elements in added bot pixels\n", procName);
        }
        if (leftpix) {
            w += leftpix;
            x = leftpix;
            if (leftpix < distance + minlength)
                L_WARNING("no miss elements in added left pixels\n", procName);
        }
        if (rightpix) {
            w += rightpix;
            if (rightpix < distance + minlength)
                L_WARNING("no miss elements in added right pixels\n", procName);
        }
        pixt2 = pixCreate(w, h, 1);
        pixRasterop(pixt2, x, y, ws, hs, PIX_SRC, pixt1, 0, 0);
    } else {
        pixt2 = pixClone(pixt1);
    }
    if (ppixe)
        *ppixe = pixClone(pixt2);
    pixDestroy(&pixt1);

        /* Identify fg and bg pixels that are at least 'distance' pixels
         * away from the boundary pixels in their set */
    seld = selCreateBrick(2 * distance + 1, 2 * distance + 1,
                          distance, distance, SEL_HIT);
    pixfg = pixErode(NULL, pixt2, seld);
    pixbg = pixDilate(NULL, pixt2, seld);
    pixInvert(pixbg, pixbg);
    selDestroy(&seld);
    pixDestroy(&pixt2);

        /* Accumulate hit and miss points */
    ptah = ptaCreate(0);
    ptam = ptaCreate(0);
    if (nhlines >= 1) {
        delh = (l_float32)h / (l_float32)(nhlines + 1);
        for (i = 0, y = 0; i < nhlines; i++) {
            y += (l_int32)(delh + 0.5);
            nah = pixGetRunCentersOnLine(pixfg, -1, y, minlength);
            nam = pixGetRunCentersOnLine(pixbg, -1, y, minlength);
            nh = numaGetCount(nah);
            nm = numaGetCount(nam);
            for (j = 0; j < nh; j++) {
                numaGetIValue(nah, j, &xval);
                ptaAddPt(ptah, xval, y);
            }
            for (j = 0; j < nm; j++) {
                numaGetIValue(nam, j, &xval);
                ptaAddPt(ptam, xval, y);
            }
            numaDestroy(&nah);
            numaDestroy(&nam);
        }
    }
    if (nvlines >= 1) {
        delw = (l_float32)w / (l_float32)(nvlines + 1);
        for (i = 0, x = 0; i < nvlines; i++) {
            x += (l_int32)(delw + 0.5);
            nah = pixGetRunCentersOnLine(pixfg, x, -1, minlength);
            nam = pixGetRunCentersOnLine(pixbg, x, -1, minlength);
            nh = numaGetCount(nah);
            nm = numaGetCount(nam);
            for (j = 0; j < nh; j++) {
                numaGetIValue(nah, j, &yval);
                ptaAddPt(ptah, x, yval);
            }
            for (j = 0; j < nm; j++) {
                numaGetIValue(nam, j, &yval);
                ptaAddPt(ptam, x, yval);
            }
            numaDestroy(&nah);
            numaDestroy(&nam);
        }
    }

        /* Make the Sel with those points */
    sel = selCreateBrick(h, w, h / 2, w / 2, SEL_DONT_CARE);
    nh = ptaGetCount(ptah);
    for (i = 0; i < nh; i++) {
        ptaGetIPt(ptah, i, &x, &y);
        selSetElement(sel, y, x, SEL_HIT);
    }
    nm = ptaGetCount(ptam);
    for (i = 0; i < nm; i++) {
        ptaGetIPt(ptam, i, &x, &y);
        selSetElement(sel, y, x, SEL_MISS);
    }

    pixDestroy(&pixfg);
    pixDestroy(&pixbg);
    ptaDestroy(&ptah);
    ptaDestroy(&ptam);
    return sel;
}


/*!
 *  pixGenerateSelRandom()
 *
 *      Input:  pix (1 bpp, typically small, to be used as a pattern)
 *              hitfract (fraction of allowable fg pixels that are hits)
 *              missfract (fraction of allowable bg pixels that are misses)
 *              distance (min distance from boundary pixel; use 0 for default)
 *              toppix (number of extra pixels of bg added above)
 *              botpix (number of extra pixels of bg added below)
 *              leftpix (number of extra pixels of bg added to left)
 *              rightpix (number of extra pixels of bg added to right)
 *              &pixe (<optional return> input pix expanded by extra pixels)
 *      Return: sel (hit-miss for input pattern), or null on error
 *
 *  Notes:
 *    (1) Either of hitfract and missfract can be zero.  If both are zero,
 *        the sel would be empty, and NULL is returned.
 *    (2) No elements are selected that are less than 'distance' pixels away
 *        from a boundary pixel of the same color.  This makes the
 *        match much more robust to edge noise.  Valid inputs of
 *        'distance' are 0, 1, 2, 3 and 4.  If distance is either 0 or
 *        greater than 4, we reset it to the default value.
 *    (3) The 4 numbers for adding rectangles of pixels outside the fg
 *        can be use if the pattern is expected to be surrounded by bg
 *        (white) pixels.  On the other hand, if the pattern may be near
 *        other fg (black) components on some sides, use 0 for those sides.
 *    (4) The input pix, as extended by the extra pixels on selected sides,
 *        can optionally be returned.  For debugging, call
 *        pixDisplayHitMissSel() to visualize the hit-miss sel superimposed
 *        on the generating bitmap.
 */
SEL *
pixGenerateSelRandom(PIX       *pixs,
                     l_float32  hitfract,
                     l_float32  missfract,
                     l_int32    distance,
                     l_int32    toppix,
                     l_int32    botpix,
                     l_int32    leftpix,
                     l_int32    rightpix,
                     PIX      **ppixe)
{
l_int32    ws, hs, w, h, x, y, i, j, thresh;
l_uint32   val;
PIX       *pixt1, *pixt2, *pixfg, *pixbg;
SEL       *seld, *sel;

    PROCNAME("pixGenerateSelRandom");

    if (ppixe) *ppixe = NULL;
    if (!pixs)
        return (SEL *)ERROR_PTR("pixs not defined", procName, NULL);
    if (pixGetDepth(pixs) != 1)
        return (SEL *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
    if (hitfract <= 0.0 && missfract <= 0.0)
        return (SEL *)ERROR_PTR("no hits or misses", procName, NULL);
    if (hitfract > 1.0 || missfract > 1.0)
        return (SEL *)ERROR_PTR("fraction can't be > 1.0", procName, NULL);

    if (distance <= 0)
        distance = DEFAULT_DISTANCE_TO_BOUNDARY;
    if (distance > MAX_DISTANCE_TO_BOUNDARY) {
        L_WARNING("distance too large; setting to max value\n", procName);
        distance = MAX_DISTANCE_TO_BOUNDARY;
    }

        /* Locate the foreground */
    pixClipToForeground(pixs, &pixt1, NULL);
    if (!pixt1)
        return (SEL *)ERROR_PTR("pixt1 not made", procName, NULL);
    ws = pixGetWidth(pixt1);
    hs = pixGetHeight(pixt1);
    w = ws;
    h = hs;

        /* Crop out a region including the foreground, and add pixels
         * on sides depending on the side flags */
    if (toppix || botpix || leftpix || rightpix) {
        x = y = 0;
        if (toppix) {
            h += toppix;
            y = toppix;
        }
        if (botpix)
            h += botpix;
        if (leftpix) {
            w += leftpix;
            x = leftpix;
        }
        if (rightpix)
            w += rightpix;
        pixt2 = pixCreate(w, h, 1);
        pixRasterop(pixt2, x, y, ws, hs, PIX_SRC, pixt1, 0, 0);
    } else {
        pixt2 = pixClone(pixt1);
    }
    if (ppixe)
        *ppixe = pixClone(pixt2);
    pixDestroy(&pixt1);

        /* Identify fg and bg pixels that are at least 'distance' pixels
         * away from the boundary pixels in their set */
    seld = selCreateBrick(2 * distance + 1, 2 * distance + 1,
                          distance, distance, SEL_HIT);
    pixfg = pixErode(NULL, pixt2, seld);
    pixbg = pixDilate(NULL, pixt2, seld);
    pixInvert(pixbg, pixbg);
    selDestroy(&seld);
    pixDestroy(&pixt2);

        /* Generate the sel from a random selection of these points */
    sel = selCreateBrick(h, w, h / 2, w / 2, SEL_DONT_CARE);
    if (hitfract > 0.0) {
        thresh = (l_int32)(hitfract * (l_float64)RAND_MAX);
        for (i = 0; i < h; i++) {
            for (j = 0; j < w; j++) {
                pixGetPixel(pixfg, j, i, &val);
                if (val) {
                    if (rand() < thresh)
                        selSetElement(sel, i, j, SEL_HIT);
                }
            }
        }
    }
    if (missfract > 0.0) {
        thresh = (l_int32)(missfract * (l_float64)RAND_MAX);
        for (i = 0; i < h; i++) {
            for (j = 0; j < w; j++) {
                pixGetPixel(pixbg, j, i, &val);
                if (val) {
                    if (rand() < thresh)
                        selSetElement(sel, i, j, SEL_MISS);
                }
            }
        }
    }

    pixDestroy(&pixfg);
    pixDestroy(&pixbg);
    return sel;
}


/*!
 *  pixGenerateSelBoundary()
 *
 *      Input:  pix (1 bpp, typically small, to be used as a pattern)
 *              hitdist (min distance from fg boundary pixel)
 *              missdist (min distance from bg boundary pixel)
 *              hitskip (number of boundary pixels skipped between hits)
 *              missskip (number of boundary pixels skipped between misses)
 *              topflag (flag for extra pixels of bg added above)
 *              botflag (flag for extra pixels of bg added below)
 *              leftflag (flag for extra pixels of bg added to left)
 *              rightflag (flag for extra pixels of bg added to right)
 *              &pixe (<optional return> input pix expanded by extra pixels)
 *      Return: sel (hit-miss for input pattern), or null on error
 *
 *  Notes:
 *    (1) All fg elements selected are exactly hitdist pixels away from
 *        the nearest fg boundary pixel, and ditto for bg elements.
 *        Valid inputs of hitdist and missdist are 0, 1, 2, 3 and 4.
 *        For example, a hitdist of 0 puts the hits at the fg boundary.
 *        Usually, the distances should be > 0 avoid the effect of
 *        noise at the boundary.
 *    (2) Set hitskip < 0 if no hits are to be used.  Ditto for missskip.
 *        If both hitskip and missskip are < 0, the sel would be empty,
 *        and NULL is returned.
 *    (3) The 4 flags determine whether the sel is increased on that side
 *        to allow bg misses to be placed all along that boundary.
 *        The increase in sel size on that side is the minimum necessary
 *        to allow the misses to be placed at mindist.  For text characters,
 *        the topflag and botflag are typically set to 1, and the leftflag
 *        and rightflag to 0.
 *    (4) The input pix, as extended by the extra pixels on selected sides,
 *        can optionally be returned.  For debugging, call
 *        pixDisplayHitMissSel() to visualize the hit-miss sel superimposed
 *        on the generating bitmap.
 *    (5) This is probably the best of the three sel generators, in the
 *        sense that you have the most flexibility with the smallest number
 *        of hits and misses.
 */
SEL *
pixGenerateSelBoundary(PIX     *pixs,
                       l_int32  hitdist,
                       l_int32  missdist,
                       l_int32  hitskip,
                       l_int32  missskip,
                       l_int32  topflag,
                       l_int32  botflag,
                       l_int32  leftflag,
                       l_int32  rightflag,
                       PIX      **ppixe)
{
l_int32  ws, hs, w, h, x, y, ix, iy, i, npt;
PIX     *pixt1, *pixt2, *pixt3, *pixfg, *pixbg;
SEL     *selh, *selm, *sel_3, *sel;
PTA     *ptah, *ptam;

    PROCNAME("pixGenerateSelBoundary");

    if (ppixe) *ppixe = NULL;
    if (!pixs)
        return (SEL *)ERROR_PTR("pixs not defined", procName, NULL);
    if (pixGetDepth(pixs) != 1)
        return (SEL *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
    if (hitdist < 0 || hitdist > 4 || missdist < 0 || missdist > 4)
        return (SEL *)ERROR_PTR("dist not in {0 .. 4}", procName, NULL);
    if (hitskip < 0 && missskip < 0)
        return (SEL *)ERROR_PTR("no hits or misses", procName, NULL);

        /* Locate the foreground */
    pixClipToForeground(pixs, &pixt1, NULL);
    if (!pixt1)
        return (SEL *)ERROR_PTR("pixt1 not made", procName, NULL);
    ws = pixGetWidth(pixt1);
    hs = pixGetHeight(pixt1);
    w = ws;
    h = hs;

        /* Crop out a region including the foreground, and add pixels
         * on sides depending on the side flags */
    if (topflag || botflag || leftflag || rightflag) {
        x = y = 0;
        if (topflag) {
            h += missdist + 1;
            y = missdist + 1;
        }
        if (botflag)
            h += missdist + 1;
        if (leftflag) {
            w += missdist + 1;
            x = missdist + 1;
        }
        if (rightflag)
            w += missdist + 1;
        pixt2 = pixCreate(w, h, 1);
        pixRasterop(pixt2, x, y, ws, hs, PIX_SRC, pixt1, 0, 0);
    } else {
        pixt2 = pixClone(pixt1);
    }
    if (ppixe)
        *ppixe = pixClone(pixt2);
    pixDestroy(&pixt1);

        /* Identify fg and bg pixels that are exactly hitdist and
         * missdist (rsp) away from the boundary pixels in their set.
         * Then get a subsampled set of these points. */
    sel_3 = selCreateBrick(3, 3, 1, 1, SEL_HIT);
    if (hitskip >= 0) {
        selh = selCreateBrick(2 * hitdist + 1, 2 * hitdist + 1,
                              hitdist, hitdist, SEL_HIT);
        pixt3 = pixErode(NULL, pixt2, selh);
        pixfg = pixErode(NULL, pixt3, sel_3);
        pixXor(pixfg, pixfg, pixt3);
        ptah = pixSubsampleBoundaryPixels(pixfg, hitskip);
        pixDestroy(&pixt3);
        pixDestroy(&pixfg);
        selDestroy(&selh);
    }
    if (missskip >= 0) {
        selm = selCreateBrick(2 * missdist + 1, 2 * missdist + 1,
                              missdist, missdist, SEL_HIT);
        pixt3 = pixDilate(NULL, pixt2, selm);
        pixbg = pixDilate(NULL, pixt3, sel_3);
        pixXor(pixbg, pixbg, pixt3);
        ptam = pixSubsampleBoundaryPixels(pixbg, missskip);
        pixDestroy(&pixt3);
        pixDestroy(&pixbg);
        selDestroy(&selm);
    }
    selDestroy(&sel_3);
    pixDestroy(&pixt2);

        /* Generate the hit-miss sel from these point */
    sel = selCreateBrick(h, w, h / 2, w / 2, SEL_DONT_CARE);
    if (hitskip >= 0) {
        npt = ptaGetCount(ptah);
        for (i = 0; i < npt; i++) {
            ptaGetIPt(ptah, i, &ix, &iy);
            selSetElement(sel, iy, ix, SEL_HIT);
        }
    }
    if (missskip >= 0) {
        npt = ptaGetCount(ptam);
        for (i = 0; i < npt; i++) {
            ptaGetIPt(ptam, i, &ix, &iy);
            selSetElement(sel, iy, ix, SEL_MISS);
        }
    }

    ptaDestroy(&ptah);
    ptaDestroy(&ptam);
    return sel;
}


/*-----------------------------------------------------------------*
 *              Accumulate data on runs along lines                *
 *-----------------------------------------------------------------*/
/*!
 *  pixGetRunCentersOnLine()
 *
 *      Input:  pixs (1 bpp)
 *              x, y (set one of these to -1; see notes)
 *              minlength (minimum length of acceptable run)
 *      Return: numa of fg runs, or null on error
 *
 *  Notes:
 *      (1) Action: this function computes the fg (black) and bg (white)
 *          pixel runlengths along the specified horizontal or vertical line,
 *          and returns a Numa of the "center" pixels of each fg run
 *          whose length equals or exceeds the minimum length.
 *      (2) This only works on horizontal and vertical lines.
 *      (3) For horizontal runs, set x = -1 and y to the value
 *          for all points along the raster line.  For vertical runs,
 *          set y = -1 and x to the value for all points along the
 *          pixel column.
 *      (4) For horizontal runs, the points in the Numa are the x
 *          values in the center of fg runs that are of length at
 *          least 'minlength'.  For vertical runs, the points in the
 *          Numa are the y values in the center of fg runs, again
 *          of length 'minlength' or greater.
 *      (5) If there are no fg runs along the line that satisfy the
 *          minlength constraint, the returned Numa is empty.  This
 *          is not an error.
 */
NUMA *
pixGetRunCentersOnLine(PIX     *pixs,
                       l_int32  x,
                       l_int32  y,
                       l_int32  minlength)
{
l_int32   w, h, i, r, nruns, len;
NUMA     *naruns, *nad;

    PROCNAME("pixGetRunCentersOnLine");

    if (!pixs)
        return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL);
    if (pixGetDepth(pixs) != 1)
        return (NUMA *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
    if (x != -1 && y != -1)
        return (NUMA *)ERROR_PTR("x or y must be -1", procName, NULL);
    if (x == -1 && y == -1)
        return (NUMA *)ERROR_PTR("x or y cannot both be -1", procName, NULL);

    if ((nad = numaCreate(0)) == NULL)
        return (NUMA *)ERROR_PTR("nad not made", procName, NULL);
    w = pixGetWidth(pixs);
    h = pixGetHeight(pixs);
    if (x == -1) {  /* horizontal run */
        if (y < 0 || y >= h)
            return nad;
        naruns = pixGetRunsOnLine(pixs, 0, y, w - 1, y);
    } else {  /* vertical run */
        if (x < 0 || x >= w)
            return nad;
        naruns = pixGetRunsOnLine(pixs, x, 0, x, h - 1);
    }
    nruns = numaGetCount(naruns);

        /* extract run center values; the first run is always bg */
    r = 0;  /* cumulative distance along line */
    for (i = 0; i < nruns; i++) {
        if (i % 2 == 0) {  /* bg run */
            numaGetIValue(naruns, i, &len);
            r += len;
            continue;
        } else {
            numaGetIValue(naruns, i, &len);
            if (len >= minlength)
                numaAddNumber(nad, r + len / 2);
            r += len;
        }
    }

    numaDestroy(&naruns);
    return nad;
}


/*!
 *  pixGetRunsOnLine()
 *
 *      Input:  pixs (1 bpp)
 *              x1, y1, x2, y2
 *      Return: numa, or null on error
 *
 *  Notes:
 *      (1) Action: this function uses the bresenham algorithm to compute
 *          the pixels along the specified line.  It returns a Numa of the
 *          runlengths of the fg (black) and bg (white) runs, always
 *          starting with a white run.
 *      (2) If the first pixel on the line is black, the length of the
 *          first returned run (which is white) is 0.
 */
NUMA *
pixGetRunsOnLine(PIX     *pixs,
                 l_int32  x1,
                 l_int32  y1,
                 l_int32  x2,
                 l_int32  y2)
{
l_int32   w, h, x, y, npts;
l_int32   i, runlen, preval;
l_uint32  val;
NUMA     *numa;
PTA      *pta;

    PROCNAME("pixGetRunsOnLine");

    if (!pixs)
        return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL);
    if (pixGetDepth(pixs) != 1)
        return (NUMA *)ERROR_PTR("pixs not 1 bpp", procName, NULL);

    w = pixGetWidth(pixs);
    h = pixGetHeight(pixs);
    if (x1 < 0 || x1 >= w)
        return (NUMA *)ERROR_PTR("x1 not valid", procName, NULL);
    if (x2 < 0 || x2 >= w)
        return (NUMA *)ERROR_PTR("x2 not valid", procName, NULL);
    if (y1 < 0 || y1 >= h)
        return (NUMA *)ERROR_PTR("y1 not valid", procName, NULL);
    if (y2 < 0 || y2 >= h)
        return (NUMA *)ERROR_PTR("y2 not valid", procName, NULL);

    if ((pta = generatePtaLine(x1, y1, x2, y2)) == NULL)
        return (NUMA *)ERROR_PTR("pta not made", procName, NULL);
    if ((npts = ptaGetCount(pta)) == 0)
        return (NUMA *)ERROR_PTR("pta has no pts", procName, NULL);

    if ((numa = numaCreate(0)) == NULL)
        return (NUMA *)ERROR_PTR("numa not made", procName, NULL);

    for (i = 0; i < npts; i++) {
        ptaGetIPt(pta, i, &x, &y);
        pixGetPixel(pixs, x, y, &val);
        if (i == 0) {
            if (val == 1) {  /* black pixel; append white run of size 0 */
                numaAddNumber(numa, 0);
            }
            preval = val;
            runlen = 1;
            continue;
        }
        if (val == preval) {  /* extend current run */
            preval = val;
            runlen++;
        } else {  /* end previous run */
            numaAddNumber(numa, runlen);
            preval = val;
            runlen = 1;
        }
    }
    numaAddNumber(numa, runlen);  /* append last run */

    ptaDestroy(&pta);
    return numa;
}


/*-----------------------------------------------------------------*
 *        Subsample boundary pixels in relatively ordered way      *
 *-----------------------------------------------------------------*/
/*!
 *  pixSubsampleBoundaryPixels()
 *
 *      Input:  pixs (1 bpp, with only boundary pixels in fg)
 *              skip (number to skip between samples as you traverse boundary)
 *      Return: pta, or null on error
 *
 *  Notes:
 *      (1) If skip = 0, we take all the fg pixels.
 *      (2) We try to traverse the boundaries in a regular way.
 *          Some pixels may be missed, and these are then subsampled
 *          randomly with a fraction determined by 'skip'.
 *      (3) The most natural approach is to use a depth first (stack-based)
 *          method to find the fg pixels.  However, the pixel runs are
 *          4-connected and there are relatively few branches.  So
 *          instead of doing a proper depth-first search, we get nearly
 *          the same result using two nested while loops: the outer
 *          one continues a raster-based search for the next fg pixel,
 *          and the inner one does a reasonable job running along
 *          each 4-connected coutour.
 */
PTA *
pixSubsampleBoundaryPixels(PIX     *pixs,
                           l_int32  skip)
{
l_int32  x, y, xn, yn, xs, ys, xa, ya, count;
PIX     *pixt;
PTA     *pta;

    PROCNAME("pixSubsampleBoundaryPixels");

    if (!pixs)
        return (PTA *)ERROR_PTR("pixs not defined", procName, NULL);
    if (pixGetDepth(pixs) != 1)
        return (PTA *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
    if (skip < 0)
        return (PTA *)ERROR_PTR("skip < 0", procName, NULL);

    if (skip == 0)
        return ptaGetPixelsFromPix(pixs, NULL);

    pta = ptaCreate(0);
    pixt = pixCopy(NULL, pixs);
    xs = ys = 0;
    while (nextOnPixelInRaster(pixt, xs, ys, &xn, &yn)) {  /* new series */
        xs = xn;
        ys = yn;

            /* Add first point in this series */
        ptaAddPt(pta, xs, ys);

            /* Trace out boundary, erasing all and saving every (skip + 1)th */
        x = xs;
        y = ys;
        pixSetPixel(pixt, x, y, 0);
        count = 0;
        while (adjacentOnPixelInRaster(pixt, x, y, &xa, &ya)) {
            x = xa;
            y = ya;
            pixSetPixel(pixt, x, y, 0);
            if (count == skip) {
                ptaAddPt(pta, x, y);
                count = 0;
            } else {
                count++;
            }
        }
    }

    pixDestroy(&pixt);
    return pta;
}


/*!
 *  adjacentOnPixelInRaster()
 *
 *      Input:  pixs (1 bpp)
 *              x, y (current pixel)
 *              xa, ya (adjacent ON pixel, found by simple CCW search)
 *      Return: 1 if a pixel is found; 0 otherwise or on error
 *
 *  Notes:
 *      (1) Search is in 4-connected directions first; then on diagonals.
 *          This allows traversal along a 4-connected boundary.
 */
l_int32
adjacentOnPixelInRaster(PIX      *pixs,
                        l_int32   x,
                        l_int32   y,
                        l_int32  *pxa,
                        l_int32  *pya)
{
l_int32   w, h, i, xa, ya, found;
l_int32   xdel[] = {-1, 0, 1, 0, -1, 1, 1, -1};
l_int32   ydel[] = {0, 1, 0, -1, 1, 1, -1, -1};
l_uint32  val;

    PROCNAME("adjacentOnPixelInRaster");

    if (!pixs)
        return ERROR_INT("pixs not defined", procName, 0);
    if (pixGetDepth(pixs) != 1)
        return ERROR_INT("pixs not 1 bpp", procName, 0);
    w = pixGetWidth(pixs);
    h = pixGetHeight(pixs);
    found = 0;
    for (i = 0; i < 8; i++) {
        xa = x + xdel[i];
        ya = y + ydel[i];
        if (xa < 0 || xa >= w || ya < 0 || ya >= h)
            continue;
        pixGetPixel(pixs, xa, ya, &val);
        if (val == 1) {
            found = 1;
            *pxa = xa;
            *pya = ya;
            break;
        }
    }
    return found;
}



/*-----------------------------------------------------------------*
 *          Display generated sel with originating image           *
 *-----------------------------------------------------------------*/
/*!
 *  pixDisplayHitMissSel()
 *
 *      Input:  pixs (1 bpp)
 *              sel (hit-miss in general)
 *              scalefactor (an integer >= 1; use 0 for default)
 *              hitcolor (RGB0 color for center of hit pixels)
 *              misscolor (RGB0 color for center of miss pixels)
 *      Return: pixd (RGB showing both pixs and sel), or null on error
 *  Notes:
 *    (1) We don't allow scalefactor to be larger than MAX_SEL_SCALEFACTOR
 *    (2) The colors are conveniently given as 4 bytes in hex format,
 *        such as 0xff008800.  The least significant byte is ignored.
 */
PIX *
pixDisplayHitMissSel(PIX      *pixs,
                     SEL      *sel,
                     l_int32   scalefactor,
                     l_uint32  hitcolor,
                     l_uint32  misscolor)
{
l_int32    i, j, type;
l_float32  fscale;
PIX       *pixt, *pixd;
PIXCMAP   *cmap;

    PROCNAME("pixDisplayHitMissSel");

    if (!pixs)
        return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
    if (pixGetDepth(pixs) != 1)
        return (PIX *)ERROR_PTR("pixs not 1 bpp", procName, NULL);
    if (!sel)
        return (PIX *)ERROR_PTR("sel not defined", procName, NULL);

    if (scalefactor <= 0)
        scalefactor = DEFAULT_SEL_SCALEFACTOR;
    if (scalefactor > MAX_SEL_SCALEFACTOR) {
        L_WARNING("scalefactor too large; using max value\n", procName);
        scalefactor = MAX_SEL_SCALEFACTOR;
    }

        /* Generate a version of pixs with a colormap */
    pixt = pixConvert1To8(NULL, pixs, 0, 1);
    cmap = pixcmapCreate(8);
    pixcmapAddColor(cmap, 255, 255, 255);
    pixcmapAddColor(cmap, 0, 0, 0);
    pixcmapAddColor(cmap, hitcolor >> 24, (hitcolor >> 16) & 0xff,
                    (hitcolor >> 8) & 0xff);
    pixcmapAddColor(cmap, misscolor >> 24, (misscolor >> 16) & 0xff,
                    (misscolor >> 8) & 0xff);
    pixSetColormap(pixt, cmap);

        /* Color the hits and misses */
    for (i = 0; i < sel->sy; i++) {
        for (j = 0; j < sel->sx; j++) {
            selGetElement(sel, i, j, &type);
            if (type == SEL_DONT_CARE)
                continue;
            if (type == SEL_HIT)
                pixSetPixel(pixt, j, i, 2);
            else  /* type == SEL_MISS */
                pixSetPixel(pixt, j, i, 3);
        }
    }

        /* Scale it up */
    fscale = (l_float32)scalefactor;
    pixd = pixScaleBySampling(pixt, fscale, fscale);

    pixDestroy(&pixt);
    return pixd;
}