pranavjha/text-detector

View on GitHub
third-party/leptonica/prog/pdf2mtiff

Summary

Maintainability
Test Coverage
#!/bin/bash
#   pdf2mtiff
#
#     Rasterizes a PDF file, saving as a single multipage g4 compressed
#     tiff file
#
#     input:  PDF file
#             output file
#     output: ccitt-g4 compressed mulitipage tiff file
#
#   N.B. Requires ghostscript

scriptname=${0##*/}

if test $# != 2
then
  echo "usage: " $scriptname " pdfin tiffout"
  exit -1
fi

pdfin=$1
tiffout=$2

# assert (input pdf filename ends in .pdf)
if test ${pdfin##*.} != pdf
then
  echo $scriptname ": " $pdfin "does not end in .pdf"
  exit -1
fi

# 'echo "0 neg 0 neg" translate' is the mysterious "primer"
# choose one of the two options below

# output image size at 300 ppi
rm /tmp/junkpdf*
echo "0 neg 0 neg" translate | /usr/bin/gs -sDEVICE=tiffg4 -sOutputFile=/tmp/junkpdf%03d.tif -r300x300 -q - ${pdfin}
./writemtiff /tmp junkpdf ${tiffout}

# output image size at 600 ppi
rm /tmp/junkpdf*
echo "0 neg 0 neg" translate | /usr/bin/gs -sDEVICE=tiffg4 -sOutputFile=/tmp/junkpdf%03d.tif -r600x600 -q - ${pdfin}
./writemtiff /tmp junkpdf ${tiffout}