pranavjha/text-detector

View on GitHub
third-party/leptonica/src/boxfunc2.c

Summary

Maintainability
Test Coverage
/*====================================================================*
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
 -
 -  Redistribution and use in source and binary forms, with or without
 -  modification, are permitted provided that the following conditions
 -  are met:
 -  1. Redistributions of source code must retain the above copyright
 -     notice, this list of conditions and the following disclaimer.
 -  2. Redistributions in binary form must reproduce the above
 -     copyright notice, this list of conditions and the following
 -     disclaimer in the documentation and/or other materials
 -     provided with the distribution.
 -
 -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
 -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *====================================================================*/

/*
 *   boxfunc2.c
 *
 *      Boxa/Box transform (shift, scale) and orthogonal rotation
 *           BOXA            *boxaTransform()
 *           BOX             *boxTransform()
 *           BOXA            *boxaTransformOrdered()
 *           BOX             *boxTransformOrdered()
 *           BOXA            *boxaRotateOrth()
 *           BOX             *boxRotateOrth()
 *
 *      Boxa sort
 *           BOXA            *boxaSort()
 *           BOXA            *boxaBinSort()
 *           BOXA            *boxaSortByIndex()
 *           BOXAA           *boxaSort2d()
 *           BOXAA           *boxaSort2dByIndex()
 *
 *      Boxa statistics
 *           BOX             *boxaGetRankSize()
 *           BOX             *boxaGetMedian()
 *           l_int32          boxaGetAverageSize()
 *
 *      Boxa array extraction
 *           l_int32          boxaExtractAsNuma()
 *           l_int32          boxaExtractAsPta()
 *
 *      Other Boxaa functions
 *           l_int32          boxaaGetExtent()
 *           BOXA            *boxaaFlattenToBoxa()
 *           BOXA            *boxaaFlattenAligned()
 *           BOXAA           *boxaEncapsulateAligned()
 *           l_int32          boxaaAlignBox()
 */

#include <math.h>
#include "allheaders.h"

    /* For more than this number of c.c. in a binarized image of
     * semi-perimeter (w + h) about 5000 or less, the O(n) binsort
     * is faster than the O(nlogn) shellsort.  */
static const l_int32   MIN_COMPS_FOR_BIN_SORT = 200;


/*---------------------------------------------------------------------*
 *      Boxa/Box transform (shift, scale) and orthogonal rotation      *
 *---------------------------------------------------------------------*/
/*!
 *  boxaTransform()
 *
 *      Input:  boxa
 *              shiftx, shifty
 *              scalex, scaley
 *      Return: boxad, or null on error
 *
 *  Notes:
 *      (1) This is a very simple function that first shifts, then scales.
 */
BOXA *
boxaTransform(BOXA      *boxas,
              l_int32    shiftx,
              l_int32    shifty,
              l_float32  scalex,
              l_float32  scaley)
{
l_int32  i, n;
BOX     *boxs, *boxd;
BOXA    *boxad;

    PROCNAME("boxaTransform");

    if (!boxas)
        return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
    n = boxaGetCount(boxas);
    if ((boxad = boxaCreate(n)) == NULL)
        return (BOXA *)ERROR_PTR("boxad not made", procName, NULL);
    for (i = 0; i < n; i++) {
        if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL)
            return (BOXA *)ERROR_PTR("boxs not found", procName, NULL);
        boxd = boxTransform(boxs, shiftx, shifty, scalex, scaley);
        boxDestroy(&boxs);
        boxaAddBox(boxad, boxd, L_INSERT);
    }

    return boxad;
}


/*!
 *  boxTransform()
 *
 *      Input:  box
 *              shiftx, shifty
 *              scalex, scaley
 *      Return: boxd, or null on error
 *
 *  Notes:
 *      (1) This is a very simple function that first shifts, then scales.
 *      (2) If the box is invalid, a new invalid box is returned.
 */
BOX *
boxTransform(BOX       *box,
             l_int32    shiftx,
             l_int32    shifty,
             l_float32  scalex,
             l_float32  scaley)
{
    PROCNAME("boxTransform");

    if (!box)
        return (BOX *)ERROR_PTR("box not defined", procName, NULL);
    if (box->w <= 0 || box->h <= 0)
        return boxCreate(0, 0, 0, 0);
    else
        return boxCreate((l_int32)(scalex * (box->x + shiftx) + 0.5),
                         (l_int32)(scaley * (box->y + shifty) + 0.5),
                         (l_int32)(L_MAX(1.0, scalex * box->w + 0.5)),
                         (l_int32)(L_MAX(1.0, scaley * box->h + 0.5)));
}


/*!
 *  boxaTransformOrdered()
 *
 *      Input:  boxa
 *              shiftx, shifty
 *              scalex, scaley
 *              xcen, ycen (center of rotation)
 *              angle (in radians; clockwise is positive)
 *              order (one of 6 combinations: L_TR_SC_RO, ...)
 *      Return: boxd, or null on error
 *
 *  Notes:
 *      (1) This allows a sequence of linear transforms on each box.
 *          the transforms are from the affine set, composed of
 *          shift, scaling and rotation, and the order of the
 *          transforms is specified.
 *      (2) Although these operations appear to be on an infinite
 *          2D plane, in practice the region of interest is clipped
 *          to a finite image.  The center of rotation is usually taken
 *          with respect to the image (either the UL corner or the
 *          center).  A translation can have two very different effects:
 *            (a) Moves the boxes across the fixed image region.
 *            (b) Moves the image origin, causing a change in the image
 *                region and an opposite effective translation of the boxes.
 *          This function should only be used for (a), where the image
 *          region is fixed on translation.  If the image region is
 *          changed by the translation, use instead the functions
 *          in affinecompose.c, where the image region and rotation
 *          center can be computed from the actual clipping due to
 *          translation of the image origin.
 *      (3) See boxTransformOrdered() for usage and implementation details.
 */
BOXA *
boxaTransformOrdered(BOXA      *boxas,
                     l_int32    shiftx,
                     l_int32    shifty,
                     l_float32  scalex,
                     l_float32  scaley,
                     l_int32    xcen,
                     l_int32    ycen,
                     l_float32  angle,
                     l_int32    order)
{
l_int32  i, n;
BOX     *boxs, *boxd;
BOXA    *boxad;

    PROCNAME("boxaTransformOrdered");

    if (!boxas)
        return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
    n = boxaGetCount(boxas);
    if ((boxad = boxaCreate(n)) == NULL)
        return (BOXA *)ERROR_PTR("boxad not made", procName, NULL);
    for (i = 0; i < n; i++) {
        if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL)
            return (BOXA *)ERROR_PTR("boxs not found", procName, NULL);
        boxd = boxTransformOrdered(boxs, shiftx, shifty, scalex, scaley,
                                   xcen, ycen, angle, order);
        boxDestroy(&boxs);
        boxaAddBox(boxad, boxd, L_INSERT);
    }

    return boxad;
}


/*!
 *  boxTransformOrdered()
 *
 *      Input:  boxs
 *              shiftx, shifty
 *              scalex, scaley
 *              xcen, ycen (center of rotation)
 *              angle (in radians; clockwise is positive)
 *              order (one of 6 combinations: L_TR_SC_RO, ...)
 *      Return: boxd, or null on error
 *
 *  Notes:
 *      (1) This allows a sequence of linear transforms, composed of
 *          shift, scaling and rotation, where the order of the
 *          transforms is specified.
 *      (2) The rotation is taken about a point specified by (xcen, ycen).
 *          Let the components of the vector from the center of rotation
 *          to the box center be (xdif, ydif):
 *            xdif = (bx + 0.5 * bw) - xcen
 *            ydif = (by + 0.5 * bh) - ycen
 *          Then the box center after rotation has new components:
 *            bxcen = xcen + xdif * cosa + ydif * sina
 *            bycen = ycen + ydif * cosa - xdif * sina
 *          where cosa and sina are the cos and sin of the angle,
 *          and the enclosing box for the rotated box has size:
 *            rw = |bw * cosa| + |bh * sina|
 *            rh = |bh * cosa| + |bw * sina|
 *          where bw and bh are the unrotated width and height.
 *          Then the box UL corner (rx, ry) is
 *            rx = bxcen - 0.5 * rw
 *            ry = bycen - 0.5 * rh
 *      (3) The center of rotation specified by args @xcen and @ycen
 *          is the point BEFORE any translation or scaling.  If the
 *          rotation is not the first operation, this function finds
 *          the actual center at the time of rotation.  It does this
 *          by making the following assumptions:
 *             (1) Any scaling is with respect to the UL corner, so
 *                 that the center location scales accordingly.
 *             (2) A translation does not affect the center of
 *                 the image; it just moves the boxes.
 *          We always use assumption (1).  However, assumption (2)
 *          will be incorrect if the apparent translation is due
 *          to a clipping operation that, in effect, moves the
 *          origin of the image.  In that case, you should NOT use
 *          these simple functions.  Instead, use the functions
 *          in affinecompose.c, where the rotation center can be
 *          computed from the actual clipping due to translation
 *          of the image origin.
 */
BOX *
boxTransformOrdered(BOX       *boxs,
                    l_int32    shiftx,
                    l_int32    shifty,
                    l_float32  scalex,
                    l_float32  scaley,
                    l_int32    xcen,
                    l_int32    ycen,
                    l_float32  angle,
                    l_int32    order)
{
l_int32    bx, by, bw, bh, tx, ty, tw, th;
l_int32    xcent, ycent;  /* transformed center of rotation due to scaling */
l_float32  sina, cosa, xdif, ydif, rx, ry, rw, rh;
BOX       *boxd;

    PROCNAME("boxTransformOrdered");

    if (!boxs)
        return (BOX *)ERROR_PTR("boxs not defined", procName, NULL);
    if (order != L_TR_SC_RO && order != L_SC_RO_TR && order != L_RO_TR_SC &&
        order != L_TR_RO_SC && order != L_RO_SC_TR && order != L_SC_TR_RO)
        return (BOX *)ERROR_PTR("order invalid", procName, NULL);

    boxGetGeometry(boxs, &bx, &by, &bw, &bh);
    if (bw <= 0 || bh <= 0)  /* invalid */
        return boxCreate(0, 0, 0, 0);
    if (angle != 0.0) {
        sina = sin(angle);
        cosa = cos(angle);
    }

    if (order == L_TR_SC_RO) {
        tx = (l_int32)(scalex * (bx + shiftx) + 0.5);
        ty = (l_int32)(scaley * (by + shifty) + 0.5);
        tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5));
        th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5));
        xcent = (l_int32)(scalex * xcen + 0.5);
        ycent = (l_int32)(scaley * ycen + 0.5);
        if (angle == 0.0) {
            boxd = boxCreate(tx, ty, tw, th);
        } else {
            xdif = tx + 0.5 * tw - xcent;
            ydif = ty + 0.5 * th - ycent;
            rw = L_ABS(tw * cosa) + L_ABS(th * sina);
            rh = L_ABS(th * cosa) + L_ABS(tw * sina);
            rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw;
            ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh;
            boxd = boxCreate((l_int32)rx, (l_int32)ry, (l_int32)rw,
                             (l_int32)rh);
        }
    } else if (order == L_SC_TR_RO) {
        tx = (l_int32)(scalex * bx + shiftx + 0.5);
        ty = (l_int32)(scaley * by + shifty + 0.5);
        tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5));
        th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5));
        xcent = (l_int32)(scalex * xcen + 0.5);
        ycent = (l_int32)(scaley * ycen + 0.5);
        if (angle == 0.0) {
            boxd = boxCreate(tx, ty, tw, th);
        } else {
            xdif = tx + 0.5 * tw - xcent;
            ydif = ty + 0.5 * th - ycent;
            rw = L_ABS(tw * cosa) + L_ABS(th * sina);
            rh = L_ABS(th * cosa) + L_ABS(tw * sina);
            rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw;
            ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh;
            boxd = boxCreate((l_int32)rx, (l_int32)ry, (l_int32)rw,
                             (l_int32)rh);
        }
    } else if (order == L_RO_TR_SC) {
        if (angle == 0.0) {
            rx = bx;
            ry = by;
            rw = bw;
            rh = bh;
        } else {
            xdif = bx + 0.5 * bw - xcen;
            ydif = by + 0.5 * bh - ycen;
            rw = L_ABS(bw * cosa) + L_ABS(bh * sina);
            rh = L_ABS(bh * cosa) + L_ABS(bw * sina);
            rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw;
            ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh;
        }
        tx = (l_int32)(scalex * (rx + shiftx) + 0.5);
        ty = (l_int32)(scaley * (ry + shifty) + 0.5);
        tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5));
        th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5));
        boxd = boxCreate(tx, ty, tw, th);
    } else if (order == L_RO_SC_TR) {
        if (angle == 0.0) {
            rx = bx;
            ry = by;
            rw = bw;
            rh = bh;
        } else {
            xdif = bx + 0.5 * bw - xcen;
            ydif = by + 0.5 * bh - ycen;
            rw = L_ABS(bw * cosa) + L_ABS(bh * sina);
            rh = L_ABS(bh * cosa) + L_ABS(bw * sina);
            rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw;
            ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh;
        }
        tx = (l_int32)(scalex * rx + shiftx + 0.5);
        ty = (l_int32)(scaley * ry + shifty + 0.5);
        tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5));
        th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5));
        boxd = boxCreate(tx, ty, tw, th);
    } else if (order == L_TR_RO_SC) {
        tx = bx + shiftx;
        ty = by + shifty;
        if (angle == 0.0) {
            rx = tx;
            ry = ty;
            rw = bw;
            rh = bh;
        } else {
            xdif = tx + 0.5 * bw - xcen;
            ydif = ty + 0.5 * bh - ycen;
            rw = L_ABS(bw * cosa) + L_ABS(bh * sina);
            rh = L_ABS(bh * cosa) + L_ABS(bw * sina);
            rx = xcen + xdif * cosa - ydif * sina - 0.5 * rw;
            ry = ycen + ydif * cosa + xdif * sina - 0.5 * rh;
        }
        tx = (l_int32)(scalex * rx + 0.5);
        ty = (l_int32)(scaley * ry + 0.5);
        tw = (l_int32)(L_MAX(1.0, scalex * rw + 0.5));
        th = (l_int32)(L_MAX(1.0, scaley * rh + 0.5));
        boxd = boxCreate(tx, ty, tw, th);
    } else {  /* order == L_SC_RO_TR) */
        tx = (l_int32)(scalex * bx + 0.5);
        ty = (l_int32)(scaley * by + 0.5);
        tw = (l_int32)(L_MAX(1.0, scalex * bw + 0.5));
        th = (l_int32)(L_MAX(1.0, scaley * bh + 0.5));
        xcent = (l_int32)(scalex * xcen + 0.5);
        ycent = (l_int32)(scaley * ycen + 0.5);
        if (angle == 0.0) {
            rx = tx;
            ry = ty;
            rw = tw;
            rh = th;
        } else {
            xdif = tx + 0.5 * tw - xcent;
            ydif = ty + 0.5 * th - ycent;
            rw = L_ABS(tw * cosa) + L_ABS(th * sina);
            rh = L_ABS(th * cosa) + L_ABS(tw * sina);
            rx = xcent + xdif * cosa - ydif * sina - 0.5 * rw;
            ry = ycent + ydif * cosa + xdif * sina - 0.5 * rh;
        }
        tx = (l_int32)(rx + shiftx + 0.5);
        ty = (l_int32)(ry + shifty + 0.5);
        tw = (l_int32)(rw + 0.5);
        th = (l_int32)(rh + 0.5);
        boxd = boxCreate(tx, ty, tw, th);
    }

    return boxd;
}


/*!
 *  boxaRotateOrth()
 *
 *      Input:  boxa
 *              w, h (of image in which the boxa is embedded)
 *              rotation (0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg;
 *                        all rotations are clockwise)
 *      Return: boxad, or null on error
 *
 *  Notes:
 *      (1) See boxRotateOrth() for details.
 */
BOXA *
boxaRotateOrth(BOXA    *boxas,
               l_int32  w,
               l_int32  h,
               l_int32  rotation)
{
l_int32  i, n;
BOX     *boxs, *boxd;
BOXA    *boxad;

    PROCNAME("boxaRotateOrth");

    if (!boxas)
        return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
    if (rotation < 0 || rotation > 3)
        return (BOXA *)ERROR_PTR("rotation not in {0,1,2,3}", procName, NULL);
    if (rotation == 0)
        return boxaCopy(boxas, L_COPY);

    n = boxaGetCount(boxas);
    if ((boxad = boxaCreate(n)) == NULL)
        return (BOXA *)ERROR_PTR("boxad not made", procName, NULL);
    for (i = 0; i < n; i++) {
        if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL)
            return (BOXA *)ERROR_PTR("boxs not found", procName, NULL);
        boxd = boxRotateOrth(boxs, w, h, rotation);
        boxDestroy(&boxs);
        boxaAddBox(boxad, boxd, L_INSERT);
    }

    return boxad;
}


/*!
 *  boxRotateOrth()
 *
 *      Input:  box
 *              w, h (of image in which the box is embedded)
 *              rotation (0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg;
 *                        all rotations are clockwise)
 *      Return: boxd, or null on error
 *
 *  Notes:
 *      (1) Rotate the image with the embedded box by the specified amount.
 *      (2) After rotation, the rotated box is always measured with
 *          respect to the UL corner of the image.
 */
BOX *
boxRotateOrth(BOX     *box,
              l_int32  w,
              l_int32  h,
              l_int32  rotation)
{
l_int32  bx, by, bw, bh, xdist, ydist;

    PROCNAME("boxRotateOrth");

    if (!box)
        return (BOX *)ERROR_PTR("box not defined", procName, NULL);
    if (rotation < 0 || rotation > 3)
        return (BOX *)ERROR_PTR("rotation not in {0,1,2,3}", procName, NULL);
    if (rotation == 0)
        return boxCopy(box);

    boxGetGeometry(box, &bx, &by, &bw, &bh);
    if (bw <= 0 || bh <= 0)  /* invalid */
        return boxCreate(0, 0, 0, 0);
    ydist = h - by - bh;  /* below box */
    xdist = w - bx - bw;  /* to right of box */
    if (rotation == 1)   /* 90 deg cw */
        return boxCreate(ydist, bx, bh, bw);
    else if (rotation == 2)  /* 180 deg cw */
        return boxCreate(xdist, ydist, bw, bh);
    else  /*  rotation == 3, 270 deg cw */
        return boxCreate(by, xdist, bh, bw);
}


/*---------------------------------------------------------------------*
 *                              Boxa sort                              *
 *---------------------------------------------------------------------*/
/*!
 *  boxaSort()
 *
 *      Input:  boxa
 *              sorttype (L_SORT_BY_X, L_SORT_BY_Y,
 *                        L_SORT_BY_RIGHT, L_SORT_BY_BOT,
 *                        L_SORT_BY_WIDTH, L_SORT_BY_HEIGHT,
 *                        L_SORT_BY_MIN_DIMENSION, L_SORT_BY_MAX_DIMENSION,
 *                        L_SORT_BY_PERIMETER, L_SORT_BY_AREA,
 *                        L_SORT_BY_ASPECT_RATIO)
 *              sortorder  (L_SORT_INCREASING, L_SORT_DECREASING)
 *              &naindex (<optional return> index of sorted order into
 *                        original array)
 *      Return: boxad (sorted version of boxas), or null on error
 */
BOXA *
boxaSort(BOXA    *boxas,
         l_int32  sorttype,
         l_int32  sortorder,
         NUMA   **pnaindex)
{
l_int32    i, n, x, y, w, h, size;
BOXA      *boxad;
NUMA      *na, *naindex;

    PROCNAME("boxaSort");

    if (pnaindex) *pnaindex = NULL;
    if (!boxas)
        return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
    if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y &&
        sorttype != L_SORT_BY_RIGHT && sorttype != L_SORT_BY_BOT &&
        sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT &&
        sorttype != L_SORT_BY_MIN_DIMENSION &&
        sorttype != L_SORT_BY_MAX_DIMENSION &&
        sorttype != L_SORT_BY_PERIMETER &&
        sorttype != L_SORT_BY_AREA &&
        sorttype != L_SORT_BY_ASPECT_RATIO)
        return (BOXA *)ERROR_PTR("invalid sort type", procName, NULL);
    if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
        return (BOXA *)ERROR_PTR("invalid sort order", procName, NULL);

        /* Use O(n) binsort if possible */
    n = boxaGetCount(boxas);
    if (n > MIN_COMPS_FOR_BIN_SORT &&
        ((sorttype == L_SORT_BY_X) || (sorttype == L_SORT_BY_Y) ||
         (sorttype == L_SORT_BY_WIDTH) || (sorttype == L_SORT_BY_HEIGHT) ||
         (sorttype == L_SORT_BY_PERIMETER)))
        return boxaBinSort(boxas, sorttype, sortorder, pnaindex);

        /* Build up numa of specific data */
    if ((na = numaCreate(n)) == NULL)
        return (BOXA *)ERROR_PTR("na not made", procName, NULL);
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h);
        switch (sorttype)
        {
        case L_SORT_BY_X:
            numaAddNumber(na, x);
            break;
        case L_SORT_BY_Y:
            numaAddNumber(na, y);
            break;
        case L_SORT_BY_RIGHT:
            numaAddNumber(na, x + w - 1);
            break;
        case L_SORT_BY_BOT:
            numaAddNumber(na, y + h - 1);
            break;
        case L_SORT_BY_WIDTH:
            numaAddNumber(na, w);
            break;
        case L_SORT_BY_HEIGHT:
            numaAddNumber(na, h);
            break;
        case L_SORT_BY_MIN_DIMENSION:
            size = L_MIN(w, h);
            numaAddNumber(na, size);
            break;
        case L_SORT_BY_MAX_DIMENSION:
            size = L_MAX(w, h);
            numaAddNumber(na, size);
            break;
        case L_SORT_BY_PERIMETER:
            size = w + h;
            numaAddNumber(na, size);
            break;
        case L_SORT_BY_AREA:
            size = w * h;
            numaAddNumber(na, size);
            break;
        case L_SORT_BY_ASPECT_RATIO:
            numaAddNumber(na, (l_float32)w / (l_float32)h);
            break;
        default:
            L_WARNING("invalid sort type\n", procName);
        }
    }

        /* Get the sort index for data array */
    if ((naindex = numaGetSortIndex(na, sortorder)) == NULL)
        return (BOXA *)ERROR_PTR("naindex not made", procName, NULL);

        /* Build up sorted boxa using sort index */
    boxad = boxaSortByIndex(boxas, naindex);

    if (pnaindex)
        *pnaindex = naindex;
    else
        numaDestroy(&naindex);
    numaDestroy(&na);
    return boxad;
}


/*!
 *  boxaBinSort()
 *
 *      Input:  boxa
 *              sorttype (L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH,
 *                        L_SORT_BY_HEIGHT, L_SORT_BY_PERIMETER)
 *              sortorder  (L_SORT_INCREASING, L_SORT_DECREASING)
 *              &naindex (<optional return> index of sorted order into
 *                        original array)
 *      Return: boxad (sorted version of boxas), or null on error
 *
 *  Notes:
 *      (1) For a large number of boxes (say, greater than 1000), this
 *          O(n) binsort is much faster than the O(nlogn) shellsort.
 *          For 5000 components, this is over 20x faster than boxaSort().
 *      (2) Consequently, boxaSort() calls this function if it will
 *          likely go much faster.
 */
BOXA *
boxaBinSort(BOXA    *boxas,
            l_int32  sorttype,
            l_int32  sortorder,
            NUMA   **pnaindex)
{
l_int32  i, n, x, y, w, h;
BOXA    *boxad;
NUMA    *na, *naindex;

    PROCNAME("boxaBinSort");

    if (pnaindex) *pnaindex = NULL;
    if (!boxas)
        return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
    if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y &&
        sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT &&
        sorttype != L_SORT_BY_PERIMETER)
        return (BOXA *)ERROR_PTR("invalid sort type", procName, NULL);
    if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
        return (BOXA *)ERROR_PTR("invalid sort order", procName, NULL);

        /* Generate Numa of appropriate box dimensions */
    n = boxaGetCount(boxas);
    if ((na = numaCreate(n)) == NULL)
        return (BOXA *)ERROR_PTR("na not made", procName, NULL);
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h);
        switch (sorttype)
        {
        case L_SORT_BY_X:
            numaAddNumber(na, x);
            break;
        case L_SORT_BY_Y:
            numaAddNumber(na, y);
            break;
        case L_SORT_BY_WIDTH:
            numaAddNumber(na, w);
            break;
        case L_SORT_BY_HEIGHT:
            numaAddNumber(na, h);
            break;
        case L_SORT_BY_PERIMETER:
            numaAddNumber(na, w + h);
            break;
        default:
            L_WARNING("invalid sort type\n", procName);
        }
    }

        /* Get the sort index for data array */
    if ((naindex = numaGetBinSortIndex(na, sortorder)) == NULL)
        return (BOXA *)ERROR_PTR("naindex not made", procName, NULL);

        /* Build up sorted boxa using the sort index */
    boxad = boxaSortByIndex(boxas, naindex);

    if (pnaindex)
        *pnaindex = naindex;
    else
        numaDestroy(&naindex);
    numaDestroy(&na);
    return boxad;
}


/*!
 *  boxaSortByIndex()
 *
 *      Input:  boxas
 *              naindex (na that maps from the new boxa to the input boxa)
 *      Return: boxad (sorted), or null on error
 */
BOXA *
boxaSortByIndex(BOXA  *boxas,
                NUMA  *naindex)
{
l_int32  i, n, index;
BOX     *box;
BOXA    *boxad;

    PROCNAME("boxaSortByIndex");

    if (!boxas)
        return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
    if (!naindex)
        return (BOXA *)ERROR_PTR("naindex not defined", procName, NULL);

    n = boxaGetCount(boxas);
    boxad = boxaCreate(n);
    for (i = 0; i < n; i++) {
        numaGetIValue(naindex, i, &index);
        box = boxaGetBox(boxas, index, L_COPY);
        boxaAddBox(boxad, box, L_INSERT);
    }

    return boxad;
}


/*!
 *  boxaSort2d()
 *
 *      Input:  boxas
 *              &naa (<optional return> numaa with sorted indices
 *                    whose values are the indices of the input array)
 *              delta1 (min overlap that permits aggregation of a box
 *                      onto a boxa of horizontally-aligned boxes; pass 1)
 *              delta2 (min overlap that permits aggregation of a box
 *                      onto a boxa of horizontally-aligned boxes; pass 2)
 *              minh1 (components less than this height either join an
 *                     existing boxa or are set aside for pass 2)
 *      Return: baa (2d sorted version of boxa), or null on error
 *
 *  Notes:
 *      (1) The final result is a sort where the 'fast scan' direction is
 *          left to right, and the 'slow scan' direction is from top
 *          to bottom.  Each boxa in the baa represents a sorted set
 *          of boxes from left to right.
 *      (2) Three passes are used to aggregate the boxas, which can correspond
 *          to characters or words in a line of text.  In pass 1, only
 *          taller components, which correspond to xheight or larger,
 *          are permitted to start a new boxa.  In pass 2, the remaining
 *          vertically-challenged components are allowed to join an
 *          existing boxa or start a new one.  In pass 3, boxa whose extent
 *          is overlapping are joined.  After that, the boxes in each
 *          boxa are sorted horizontally, and finally the boxa are
 *          sorted vertically.
 *      (3) If delta1 < 0, the first pass allows aggregation when
 *          boxes in the same boxa do not overlap vertically.
 *          The distance by which they can miss and still be aggregated
 *          is the absolute value |delta1|.   Similar for delta2 on
 *          the second pass.
 *      (4) On the first pass, any component of height less than minh1
 *          cannot start a new boxa; it's put aside for later insertion.
 *      (5) On the second pass, any small component that doesn't align
 *          with an existing boxa can start a new one.
 *      (6) This can be used to identify lines of text from
 *          character or word bounding boxes.
 */
BOXAA *
boxaSort2d(BOXA    *boxas,
           NUMAA  **pnaad,
           l_int32  delta1,
           l_int32  delta2,
           l_int32  minh1)
{
l_int32  i, index, h, nt, ne, n, m, ival;
BOX     *box;
BOXA    *boxa, *boxae, *boxan, *boxa1, *boxa2, *boxa3, *boxav, *boxavs;
BOXAA   *baa, *baa1, *baad;
NUMA    *naindex, *nae, *nan, *nah, *nav, *na1, *na2, *nad, *namap;
NUMAA   *naa, *naa1, *naad;

    PROCNAME("boxaSort2d");

    if (pnaad) *pnaad = NULL;
    if (!boxas)
        return (BOXAA *)ERROR_PTR("boxas not defined", procName, NULL);

        /* Sort from left to right */
    if ((boxa = boxaSort(boxas, L_SORT_BY_X, L_SORT_INCREASING, &naindex))
                    == NULL)
        return (BOXAA *)ERROR_PTR("boxa not made", procName, NULL);

        /* First pass: assign taller boxes to boxa by row */
    nt = boxaGetCount(boxa);
    baa = boxaaCreate(0);
    naa = numaaCreate(0);
    boxae = boxaCreate(0);  /* save small height boxes here */
    nae = numaCreate(0);  /* keep track of small height boxes */
    for (i = 0; i < nt; i++) {
        box = boxaGetBox(boxa, i, L_CLONE);
        boxGetGeometry(box, NULL, NULL, NULL, &h);
        if (h < minh1) {  /* save for 2nd pass */
            boxaAddBox(boxae, box, L_INSERT);
            numaAddNumber(nae, i);
        } else {
            n = boxaaGetCount(baa);
            boxaaAlignBox(baa, box, delta1, &index);
            if (index < n) {  /* append to an existing boxa */
                boxaaAddBox(baa, index, box, L_INSERT);
            } else {  /* doesn't align, need new boxa */
                boxan = boxaCreate(0);
                boxaAddBox(boxan, box, L_INSERT);
                boxaaAddBoxa(baa, boxan, L_INSERT);
                nan = numaCreate(0);
                numaaAddNuma(naa, nan, L_INSERT);
            }
            numaGetIValue(naindex, i, &ival);
            numaaAddNumber(naa, index, ival);
        }
    }
    boxaDestroy(&boxa);
    numaDestroy(&naindex);

        /* Second pass: feed in small height boxes */
    ne = boxaGetCount(boxae);
    for (i = 0; i < ne; i++) {
        box = boxaGetBox(boxae, i, L_CLONE);
        n = boxaaGetCount(baa);
        boxaaAlignBox(baa, box, delta2, &index);
        if (index < n) {  /* append to an existing boxa */
            boxaaAddBox(baa, index, box, L_INSERT);
        } else {  /* doesn't align, need new boxa */
            boxan = boxaCreate(0);
            boxaAddBox(boxan, box, L_INSERT);
            boxaaAddBoxa(baa, boxan, L_INSERT);
            nan = numaCreate(0);
            numaaAddNuma(naa, nan, L_INSERT);
        }
        numaGetIValue(nae, i, &ival);  /* location in original boxas */
        numaaAddNumber(naa, index, ival);
    }

        /* Third pass: merge some boxa whose extent is overlapping.
         * Think of these boxa as text lines, where the bounding boxes
         * of the text lines can overlap, but likely won't have
         * a huge overlap.
         * First do a greedy find of pairs of overlapping boxa, where
         * the two boxa overlap by at least 50% of the smaller, and
         * the smaller is not more than half the area of the larger.
         * For such pairs, call the larger one the primary boxa.  The
         * boxes in the smaller one are appended to those in the primary
         * in pass 3a, and the primaries are extracted in pass 3b.
         * In this way, all boxes in the original baa are saved. */
    n = boxaaGetCount(baa);
    boxaaGetExtent(baa, NULL, NULL, NULL, &boxa3);
    boxa1 = boxaHandleOverlaps(boxa3, L_REMOVE_SMALL, 1000, 0.5, 0.5, &namap);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa3);
    for (i = 0; i < n; i++) {  /* Pass 3a: join selected copies of boxa */
        numaGetIValue(namap, i, &ival);
        if (ival >= 0) {  /* join current to primary boxa[ival] */
            boxa1 = boxaaGetBoxa(baa, i, L_COPY);
            boxa2 = boxaaGetBoxa(baa, ival, L_CLONE);
            boxaJoin(boxa2, boxa1, 0, -1);
            boxaDestroy(&boxa2);
            boxaDestroy(&boxa1);
            na1 = numaaGetNuma(naa, i, L_COPY);
            na2 = numaaGetNuma(naa, ival, L_CLONE);
            numaJoin(na2, na1, 0, -1);
            numaDestroy(&na1);
            numaDestroy(&na2);
        }
    }
    baa1 = boxaaCreate(n);
    naa1 = numaaCreate(n);
    for (i = 0; i < n; i++) {  /* Pass 3b: save primary boxa */
        numaGetIValue(namap, i, &ival);
        if (ival == -1) {
            boxa1 = boxaaGetBoxa(baa, i, L_CLONE);
            boxaaAddBoxa(baa1, boxa1, L_INSERT);
            na1 = numaaGetNuma(naa, i, L_CLONE);
            numaaAddNuma(naa1, na1, L_INSERT);
        }
    }
    numaDestroy(&namap);
    boxaaDestroy(&baa);
    baa = baa1;
    numaaDestroy(&naa);
    naa = naa1;

        /* Sort the boxes in each boxa horizontally */
    m = boxaaGetCount(baa);
    for (i = 0; i < m; i++) {
        boxa1 = boxaaGetBoxa(baa, i, L_CLONE);
        boxa2 = boxaSort(boxa1, L_SORT_BY_X, L_SORT_INCREASING, &nah);
        boxaaReplaceBoxa(baa, i, boxa2);
        na1 = numaaGetNuma(naa, i, L_CLONE);
        na2 = numaSortByIndex(na1, nah);
        numaaReplaceNuma(naa, i, na2);
        boxaDestroy(&boxa1);
        numaDestroy(&na1);
        numaDestroy(&nah);
    }

        /* Sort the boxa vertically within boxaa, using the first box
         * in each boxa. */
    m = boxaaGetCount(baa);
    boxav = boxaCreate(m);  /* holds first box in each boxa in baa */
    naad = numaaCreate(m);
    if (pnaad)
        *pnaad = naad;
    baad = boxaaCreate(m);
    for (i = 0; i < m; i++) {
        boxa1 = boxaaGetBoxa(baa, i, L_CLONE);
        box = boxaGetBox(boxa1, 0, L_CLONE);
        boxaAddBox(boxav, box, L_INSERT);
        boxaDestroy(&boxa1);
    }
    boxavs = boxaSort(boxav, L_SORT_BY_Y, L_SORT_INCREASING, &nav);
    for (i = 0; i < m; i++) {
        numaGetIValue(nav, i, &index);
        boxa = boxaaGetBoxa(baa, index, L_CLONE);
        boxaaAddBoxa(baad, boxa, L_INSERT);
        nad = numaaGetNuma(naa, index, L_CLONE);
        numaaAddNuma(naad, nad, L_INSERT);
    }


/*    fprintf(stderr, "box count = %d, numaa count = %d\n", nt,
            numaaGetNumberCount(naad)); */

    boxaaDestroy(&baa);
    boxaDestroy(&boxav);
    boxaDestroy(&boxavs);
    boxaDestroy(&boxae);
    numaDestroy(&nav);
    numaDestroy(&nae);
    numaaDestroy(&naa);
    if (!pnaad)
        numaaDestroy(&naad);

    return baad;
}


/*!
 *  boxaSort2dByIndex()
 *
 *      Input:  boxas
 *              naa (numaa that maps from the new baa to the input boxa)
 *      Return: baa (sorted boxaa), or null on error
 */
BOXAA *
boxaSort2dByIndex(BOXA   *boxas,
                  NUMAA  *naa)
{
l_int32  ntot, boxtot, i, j, n, nn, index;
BOX     *box;
BOXA    *boxa;
BOXAA   *baa;
NUMA    *na;

    PROCNAME("boxaSort2dByIndex");

    if (!boxas)
        return (BOXAA *)ERROR_PTR("boxas not defined", procName, NULL);
    if (!naa)
        return (BOXAA *)ERROR_PTR("naindex not defined", procName, NULL);

        /* Check counts */
    ntot = numaaGetNumberCount(naa);
    boxtot = boxaGetCount(boxas);
    if (ntot != boxtot)
        return (BOXAA *)ERROR_PTR("element count mismatch", procName, NULL);

    n = numaaGetCount(naa);
    baa = boxaaCreate(n);
    for (i = 0; i < n; i++) {
        na = numaaGetNuma(naa, i, L_CLONE);
        nn = numaGetCount(na);
        boxa = boxaCreate(nn);
        for (j = 0; j < nn; j++) {
            numaGetIValue(na, i, &index);
            box = boxaGetBox(boxas, index, L_COPY);
            boxaAddBox(boxa, box, L_INSERT);
        }
        boxaaAddBoxa(baa, boxa, L_INSERT);
        numaDestroy(&na);
    }

    return baa;
}


/*---------------------------------------------------------------------*
 *                        Boxa array extraction                        *
 *---------------------------------------------------------------------*/
/*!
 *  boxaExtractAsNuma()
 *
 *      Input:  boxa
 *              &nal (<optional return> array of left locations)
 *              &nat (<optional return> array of top locations)
 *              &nar (<optional return> array of right locations)
 *              &nab (<optional return> array of bottom locations)
 *              &naw (<optional return> array of widths)
 *              &nah (<optional return> array of heights)
 *              keepinvalid (1 to keep invalid boxes; 0 to remove them)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) If you are counting or sorting values, such as determining
 *          rank order, you must remove invalid boxes.
 *      (2) If you are parametrizing the values, or doing an evaluation
 *          where the position in the boxa sequence is important, you
 *          must replace the invalid boxes with valid ones before
 *          doing the extraction. This is easily done with boxaFillSequence().
 */
l_int32
boxaExtractAsNuma(BOXA    *boxa,
                  NUMA   **pnal,
                  NUMA   **pnat,
                  NUMA   **pnar,
                  NUMA   **pnab,
                  NUMA   **pnaw,
                  NUMA   **pnah,
                  l_int32  keepinvalid)
{
l_int32  i, n, left, top, right, bot, w, h;

    PROCNAME("boxaExtractAsNuma");

    if (!pnal && !pnat && !pnar && !pnab && !pnaw && !pnah)
        return ERROR_INT("no output requested", procName, 1);
    if (pnal) *pnal = NULL;
    if (pnat) *pnat = NULL;
    if (pnar) *pnar = NULL;
    if (pnab) *pnab = NULL;
    if (pnaw) *pnaw = NULL;
    if (pnah) *pnah = NULL;
    if (!boxa)
        return ERROR_INT("boxa not defined", procName, 1);
    if (!keepinvalid && boxaGetValidCount(boxa) == 0)
        return ERROR_INT("no valid boxes", procName, 1);

    n = boxaGetCount(boxa);
    if (pnal) *pnal = numaCreate(n);
    if (pnat) *pnat = numaCreate(n);
    if (pnar) *pnar = numaCreate(n);
    if (pnab) *pnab = numaCreate(n);
    if (pnaw) *pnaw = numaCreate(n);
    if (pnah) *pnah = numaCreate(n);
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxa, i, &left, &top, &w, &h);
        if (!keepinvalid && (w <= 0 || h <= 0))
            continue;
        right = left + w - 1;
        bot = top + h - 1;
        if (pnal) numaAddNumber(*pnal, left);
        if (pnat) numaAddNumber(*pnat, top);
        if (pnar) numaAddNumber(*pnar, right);
        if (pnab) numaAddNumber(*pnab, bot);
        if (pnaw) numaAddNumber(*pnaw, w);
        if (pnah) numaAddNumber(*pnah, h);
    }

    return 0;
}


/*!
 *  boxaExtractAsPta()
 *
 *      Input:  boxa
 *              &ptal (<optional return> array of left locations vs. index)
 *              &ptat (<optional return> array of top locations vs. index)
 *              &ptar (<optional return> array of right locations vs. index)
 *              &ptab (<optional return> array of bottom locations vs. index)
 *              &ptaw (<optional return> array of widths vs. index)
 *              &ptah (<optional return> array of heights vs. index)
 *              keepinvalid (1 to keep invalid boxes; 0 to remove them)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) For most applications, such as counting, sorting, fitting
 *          to some parametrized form, plotting or filtering in general,
 *          you should remove the invalid boxes.  Each pta saves the
 *          box index in the x array, so replacing invalid boxes by
 *          filling with boxaFillSequence(), which is required for
 *          boxaExtractAsNuma(), is not necessary.
 *      (2) If invalid boxes are retained, each one will result in
 *          entries (typically 0) in all selected output pta.
 */
l_int32
boxaExtractAsPta(BOXA    *boxa,
                 PTA    **pptal,
                 PTA    **pptat,
                 PTA    **pptar,
                 PTA    **pptab,
                 PTA    **pptaw,
                 PTA    **pptah,
                 l_int32  keepinvalid)
{
l_int32  i, n, left, top, right, bot, w, h;

    PROCNAME("boxaExtractAsPta");

    if (!pptal && !pptar && !pptat && !pptab && !pptaw && !pptah)
        return ERROR_INT("no output requested", procName, 1);
    if (pptal) *pptal = NULL;
    if (pptat) *pptat = NULL;
    if (pptar) *pptar = NULL;
    if (pptab) *pptab = NULL;
    if (pptaw) *pptaw = NULL;
    if (pptah) *pptah = NULL;
    if (!boxa)
        return ERROR_INT("boxa not defined", procName, 1);
    if (!keepinvalid && boxaGetValidCount(boxa) == 0)
        return ERROR_INT("no valid boxes", procName, 1);

    n = boxaGetCount(boxa);
    if (pptal) *pptal = ptaCreate(n);
    if (pptat) *pptat = ptaCreate(n);
    if (pptar) *pptar = ptaCreate(n);
    if (pptab) *pptab = ptaCreate(n);
    if (pptaw) *pptaw = ptaCreate(n);
    if (pptah) *pptah = ptaCreate(n);
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxa, i, &left, &top, &w, &h);
        if (!keepinvalid && (w <= 0 || h <= 0))
            continue;
        right = left + w - 1;
        bot = top + h - 1;
        if (pptal) ptaAddPt(*pptal, i, left);
        if (pptat) ptaAddPt(*pptat, i, top);
        if (pptar) ptaAddPt(*pptar, i, right);
        if (pptab) ptaAddPt(*pptab, i, bot);
        if (pptaw) ptaAddPt(*pptaw, i, w);
        if (pptah) ptaAddPt(*pptah, i, h);
    }

    return 0;
}


/*---------------------------------------------------------------------*
 *                            Boxa statistics                          *
 *---------------------------------------------------------------------*/
/*!
 *  boxaGetRankSize()
 *
 *      Input:  boxa
 *              fract (use 0.0 for smallest, 1.0 for largest)
 *      Return: box (with rank values for x, y, w, h), or null on error
 *              or if the boxa is empty (has no valid boxes)
 *
 *  Notes:
 *      (1) This function does not assume that all boxes in the boxa are valid
 *      (2) The four box parameters are sorted independently.
 *          For rank order, the width and height are sorted in increasing
 *          order.  But what does it mean to sort x and y in "rank order"?
 *          If the boxes are of comparable size and somewhat
 *          aligned (e.g., from multiple images), it makes some sense
 *          to give a "rank order" for x and y by sorting them in
 *          decreasing order.  But in general, the interpretation of a rank
 *          order on x and y is highly application dependent.  In summary:
 *             - x and y are sorted in decreasing order
 *             - w and h are sorted in increasing order
 */
BOX *
boxaGetRankSize(BOXA      *boxa,
                l_float32  fract)
{
l_float32  xval, yval, wval, hval;
NUMA      *nax, *nay, *naw, *nah;
BOX       *box;

    PROCNAME("boxaGetRankSize");

    if (!boxa)
        return (BOX *)ERROR_PTR("boxa not defined", procName, NULL);
    if (fract < 0.0 || fract > 1.0)
        return (BOX *)ERROR_PTR("fract not in [0.0 ... 1.0]", procName, NULL);
    if (boxaGetValidCount(boxa) == 0)
        return (BOX *)ERROR_PTR("no valid boxes in boxa", procName, NULL);

        /* Use only the valid boxes */
    boxaExtractAsNuma(boxa, &nax, &nay, NULL, NULL, &naw, &nah, 0);

    numaGetRankValue(nax, 1.0 - fract, NULL, 1, &xval);
    numaGetRankValue(nay, 1.0 - fract, NULL, 1, &yval);
    numaGetRankValue(naw, fract, NULL, 1, &wval);
    numaGetRankValue(nah, fract, NULL, 1, &hval);
    box = boxCreate((l_int32)xval, (l_int32)yval, (l_int32)wval, (l_int32)hval);

    numaDestroy(&nax);
    numaDestroy(&nay);
    numaDestroy(&naw);
    numaDestroy(&nah);
    return box;
}


/*!
 *  boxaGetMedian()
 *
 *      Input:  boxa
 *      Return: box (with median values for x, y, w, h), or null on error
 *              or if the boxa is empty.
 *
 *  Notes:
 *      (1) See boxaGetRankSize()
 */
BOX *
boxaGetMedian(BOXA  *boxa)
{
    PROCNAME("boxaGetMedian");

    if (!boxa)
        return (BOX *)ERROR_PTR("boxa not defined", procName, NULL);
    if (boxaGetCount(boxa) == 0)
        return (BOX *)ERROR_PTR("boxa is empty", procName, NULL);

    return boxaGetRankSize(boxa, 0.5);
}


/*!
 *  boxaGetAverageSize()
 *
 *      Input:  boxa
 *              &w  (<optional return> average width)
 *              &h  (<optional return> average height)
 *      Return: 0 if OK, 1 on error or if the boxa is empty
 */
l_int32
boxaGetAverageSize(BOXA       *boxa,
                   l_float32  *pw,
                   l_float32  *ph)
{
l_int32    i, n, bw, bh;
l_float32  sumw, sumh;

    PROCNAME("boxaGetAverageSize");

    if (pw) *pw = 0.0;
    if (ph) *ph = 0.0;
    if (!boxa)
        return ERROR_INT("boxa not defined", procName, 1);
    if ((n = boxaGetCount(boxa)) == 0)
        return ERROR_INT("boxa is empty", procName, 1);

    sumw = sumh = 0.0;
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxa, i, NULL, NULL, &bw, &bh);
        sumw += bw;
        sumh += bh;
    }

    if (pw) *pw = sumw / n;
    if (ph) *ph = sumh / n;
    return 0;
}


/*---------------------------------------------------------------------*
 *                        Other Boxaa functions                        *
 *---------------------------------------------------------------------*/
/*!
 *  boxaaGetExtent()
 *
 *      Input:  baa
 *              &w  (<optional return> width)
 *              &h  (<optional return> height)
 *              &box (<optional return>, minimum box containing all boxa
 *                    in boxaa)
 *              &boxa (<optional return>, boxa containing all boxes in each
 *                     boxa in the boxaa)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) The returned w and h are the minimum size image
 *          that would contain all boxes untranslated.
 *      (2) Each box in the returned boxa is the minimum box required to
 *          hold all the boxes in the respective boxa of baa.
 *      (3) If there are no valid boxes in a boxa, the box corresponding
 *          to its extent has all fields set to 0 (an invalid box).
 */
l_int32
boxaaGetExtent(BOXAA    *baa,
               l_int32  *pw,
               l_int32  *ph,
               BOX     **pbox,
               BOXA    **pboxa)
{
l_int32  i, n, x, y, w, h, xmax, ymax, xmin, ymin, found;
BOX     *box1;
BOXA    *boxa, *boxa1;

    PROCNAME("boxaaGetExtent");

    if (!pw && !ph && !pbox && !pboxa)
        return ERROR_INT("no ptrs defined", procName, 1);
    if (pw) *pw = 0;
    if (ph) *ph = 0;
    if (pbox) *pbox = NULL;
    if (pboxa) *pboxa = NULL;
    if (!baa)
        return ERROR_INT("baa not defined", procName, 1);

    n = boxaaGetCount(baa);
    if (n == 0)
        return ERROR_INT("no boxa in baa", procName, 1);

    boxa = boxaCreate(n);
    xmax = ymax = 0;
    xmin = ymin = 100000000;
    found = FALSE;
    for (i = 0; i < n; i++) {
        boxa1 = boxaaGetBoxa(baa, i, L_CLONE);
        boxaGetExtent(boxa1, NULL, NULL, &box1);
        boxaDestroy(&boxa1);
        boxGetGeometry(box1, &x, &y, &w, &h);
        if (w > 0 && h > 0) {  /* a valid extent box */
            found = TRUE;  /* found at least one valid extent box */
            xmin = L_MIN(xmin, x);
            ymin = L_MIN(ymin, y);
            xmax = L_MAX(xmax, x + w);
            ymax = L_MAX(ymax, y + h);
        }
        boxaAddBox(boxa, box1, L_INSERT);
    }
    if (found == FALSE)  /* no valid extent boxes */
        xmin = ymin = 0;

    if (pw) *pw = xmax;
    if (ph) *ph = ymax;
    if (pbox)
        *pbox = boxCreate(xmin, ymin, xmax - xmin, ymax - ymin);
    if (pboxa)
        *pboxa = boxa;
    else
        boxaDestroy(&boxa);
    return 0;
}


/*!
 *  boxaaFlattenToBoxa()
 *
 *      Input:  baa
 *              &naindex  (<optional return> the boxa index in the baa)
 *              copyflag  (L_COPY or L_CLONE)
 *      Return: boxa, or null on error
 *
 *  Notes:
 *      (1) This 'flattens' the baa to a boxa, taking the boxes in
 *          order in the first boxa, then the second, etc.
 *      (2) If a boxa is empty, we generate an invalid, placeholder box
 *          of zero size.  This is useful when converting from a baa
 *          where each boxa has either 0 or 1 boxes, and it is necessary
 *          to maintain a 1:1 correspondence between the initial
 *          boxa array and the resulting box array.
 *      (3) If &naindex is defined, we generate a Numa that gives, for
 *          each box in the baa, the index of the boxa to which it belongs.
 */
BOXA *
boxaaFlattenToBoxa(BOXAA   *baa,
                   NUMA   **pnaindex,
                   l_int32  copyflag)
{
l_int32  i, j, m, n;
BOXA    *boxa, *boxat;
BOX     *box;
NUMA    *naindex;

    PROCNAME("boxaaFlattenToBoxa");

    if (pnaindex) *pnaindex = NULL;
    if (!baa)
        return (BOXA *)ERROR_PTR("baa not defined", procName, NULL);
    if (copyflag != L_COPY && copyflag != L_CLONE)
        return (BOXA *)ERROR_PTR("invalid copyflag", procName, NULL);
    if (pnaindex) {
        naindex = numaCreate(0);
        *pnaindex = naindex;
    }

    n = boxaaGetCount(baa);
    boxa = boxaCreate(n);
    for (i = 0; i < n; i++) {
        boxat = boxaaGetBoxa(baa, i, L_CLONE);
        m = boxaGetCount(boxat);
        if (m == 0) {  /* placeholder box */
            box = boxCreate(0, 0, 0, 0);
            boxaAddBox(boxa, box, L_INSERT);
            if (pnaindex)
                numaAddNumber(naindex, i);  /* save 'row' number */
        } else {
            for (j = 0; j < m; j++) {
                box = boxaGetBox(boxat, j, copyflag);
                boxaAddBox(boxa, box, L_INSERT);
                if (pnaindex)
                    numaAddNumber(naindex, i);  /* save 'row' number */
            }
        }
        boxaDestroy(&boxat);
    }

    return boxa;
}


/*!
 *  boxaaFlattenAligned()
 *
 *      Input:  baa
 *              num (number extracted from each)
 *              fillerbox (<optional> that fills if necessary)
 *              copyflag  (L_COPY or L_CLONE)
 *      Return: boxa, or null on error
 *
 *  Notes:
 *      (1) This 'flattens' the baa to a boxa, taking the first @num
 *          boxes from each boxa.
 *      (2) In each boxa, if there are less than @num boxes, we preserve
 *          the alignment between the input baa and the output boxa
 *          by inserting one or more fillerbox(es) or, if @fillerbox == NULL,
 *          one or more invalid placeholder boxes.
 */
BOXA *
boxaaFlattenAligned(BOXAA   *baa,
                    l_int32  num,
                    BOX     *fillerbox,
                    l_int32  copyflag)
{
l_int32  i, j, m, n, mval, nshort;
BOXA    *boxat, *boxad;
BOX     *box;

    PROCNAME("boxaaFlattenAligned");

    if (!baa)
        return (BOXA *)ERROR_PTR("baa not defined", procName, NULL);
    if (copyflag != L_COPY && copyflag != L_CLONE)
        return (BOXA *)ERROR_PTR("invalid copyflag", procName, NULL);

    n = boxaaGetCount(baa);
    boxad = boxaCreate(n);
    for (i = 0; i < n; i++) {
        boxat = boxaaGetBoxa(baa, i, L_CLONE);
        m = boxaGetCount(boxat);
        mval = L_MIN(m, num);
        nshort = num - mval;
        for (j = 0; j < mval; j++) {  /* take the first @num if possible */
            box = boxaGetBox(boxat, j, copyflag);
            boxaAddBox(boxad, box, L_INSERT);
        }
        for (j = 0; j < nshort; j++) {  /* add fillers if necessary */
            if (fillerbox) {
                boxaAddBox(boxad, fillerbox, L_COPY);
            } else {
                box = boxCreate(0, 0, 0, 0);  /* invalid placeholder box */
                boxaAddBox(boxad, box, L_INSERT);
            }
        }
        boxaDestroy(&boxat);
    }

    return boxad;
}


/*!
 *  boxaEncapsulateAligned()
 *
 *      Input:  boxa
 *              num (number put into each boxa in the baa)
 *              copyflag  (L_COPY or L_CLONE)
 *      Return: baa, or null on error
 *
 *  Notes:
 *      (1) This puts @num boxes from the input @boxa into each of a
 *          set of boxa within an output baa.
 *      (2) This assumes that the boxes in @boxa are in sets of @num each.
 */
BOXAA *
boxaEncapsulateAligned(BOXA    *boxa,
                       l_int32  num,
                       l_int32  copyflag)
{
l_int32  i, j, n, nbaa, index;
BOX     *box;
BOXA    *boxat;
BOXAA   *baa;

    PROCNAME("boxaEncapsulateAligned");

    if (!boxa)
        return (BOXAA *)ERROR_PTR("boxa not defined", procName, NULL);
    if (copyflag != L_COPY && copyflag != L_CLONE)
        return (BOXAA *)ERROR_PTR("invalid copyflag", procName, NULL);

    n = boxaGetCount(boxa);
    nbaa = n / num;
    if (num * nbaa != n)
        L_ERROR("inconsistent alignment: num doesn't divide n\n", procName);
    baa = boxaaCreate(nbaa);
    for (i = 0, index = 0; i < nbaa; i++) {
        boxat = boxaCreate(num);
        for (j = 0; j < num; j++, index++) {
            box = boxaGetBox(boxa, index, copyflag);
            boxaAddBox(boxat, box, L_INSERT);
        }
        boxaaAddBoxa(baa, boxat, L_INSERT);
    }

    return baa;
}


/*!
 *  boxaaAlignBox()
 *
 *      Input:  baa
 *              box (to be aligned with the bext boxa in the baa, if possible)
 *              delta (amount by which consecutive components can miss
 *                     in overlap and still be included in the array)
 *              &index (of boxa with best overlap, or if none match,
 *                      this is the index of the next boxa to be generated)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) This is not greedy.  It finds the boxa whose vertical
 *          extent has the closest overlap with the input box.
 */
l_int32
boxaaAlignBox(BOXAA    *baa,
              BOX      *box,
              l_int32   delta,
              l_int32  *pindex)
{
l_int32  i, n, m, y, yt, h, ht, ovlp, maxovlp, maxindex;
BOX     *boxt;
BOXA    *boxa;

    PROCNAME("boxaaAlignBox");

    if (!baa)
        return ERROR_INT("baa not defined", procName, 1);
    if (!box)
        return ERROR_INT("box not defined", procName, 1);
    if (!pindex)
        return ERROR_INT("&index not defined", procName, 1);

    n = boxaaGetCount(baa);
    boxGetGeometry(box, NULL, &y, NULL, &h);
    maxovlp = -10000000;
    for (i = 0; i < n; i++) {
        boxa = boxaaGetBoxa(baa, i, L_CLONE);
        if ((m = boxaGetCount(boxa)) == 0) {
            L_WARNING("no boxes in boxa\n", procName);
            continue;
        }
        boxaGetExtent(boxa, NULL, NULL, &boxt);
        boxGetGeometry(boxt, NULL, &yt, NULL, &ht);
        boxDestroy(&boxt);
        boxaDestroy(&boxa);

            /* Overlap < 0 means the components do not overlap vertically */
        if (yt >= y)
            ovlp = y + h - 1 - yt;
        else
            ovlp = yt + ht - 1 - y;
        if (ovlp > maxovlp) {
            maxovlp = ovlp;
            maxindex = i;
        }
    }

    if (maxovlp + delta >= 0)
        *pindex = maxindex;
    else
        *pindex = n;
    return 0;
}