CloudSlang/cs-actions

View on GitHub
cs-tesseract/src/main/java/io/cloudslang/content/tesseract/services/PdfService.java

Summary

Maintainability
C
1 day
Test Coverage
/*
 * Copyright 2022-2024 Open Text
 * This program and the accompanying materials
 * are made available under the terms of the Apache License v2.0 which accompany this distribution.
 *
 * The Apache License is available at
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package io.cloudslang.content.tesseract.services;

import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import static io.cloudslang.content.tesseract.utils.Constants.*;
import static java.util.Objects.requireNonNull;
import static org.apache.commons.lang3.StringUtils.EMPTY;

public class PdfService {

    public static String imageConvert(String sourcePath, String dataPath, String lang, String dpi, String textBlocks, String deskew, String fromPage, String toPage, String pageIndex)
            throws Exception {
        StringBuilder result = new StringBuilder();
        List<File> fileList = null;
        String destination = sourcePath.substring(0, sourcePath.lastIndexOf(File.separator)) + File.separator;
        try {
            if (!sourcePath.equals(EMPTY)) {
                JsonObject outputObject = new JsonObject();
                JsonArray outputArray = new JsonArray();
                File pdf = new File(sourcePath);
                fileList = requireNonNull(convertPdfToImage(pdf, destination, dpi, fromPage, toPage,
                        pageIndex));
                if (Boolean.parseBoolean(textBlocks)) {
                    for (File image : fileList) {
                        outputArray.add(OcrService.extractTextFromImage(image.getAbsolutePath(), dataPath, lang, textBlocks,
                                deskew));
                        FileUtils.forceDelete(image);
                    }
                    outputObject.add(PAGE, outputArray);
                    result.append(outputObject.toString());
                } else {
                    for (File image : fileList) {
                        result.append(OcrService.extractTextFromImage(image.getAbsolutePath(), dataPath, lang, textBlocks,
                                deskew));
                        FileUtils.forceDelete(image);
                    }
                }
            }
            return result.toString();
        } finally {
            if (fileList != null) {
                for (File image : fileList) {
                    if (image.exists())
                        FileUtils.forceDelete(image);
                }
            }
        }
    }


    private static List<File> convertPdfToImage(File file, String destination, String dpi, String fromPage,
                                                String toPage, String pageIndex) throws Exception {
        if (file.exists()) {
            PDDocument doc = PDDocument.load(file);
            PDFRenderer renderer = new PDFRenderer(doc);
            List<File> fileList = new ArrayList<>();

            String fileName = file.getName().replace(PDF_EXTENSION, EMPTY);

            if (StringUtils.equals(fromPage, EMPTY) && StringUtils.equals(toPage, EMPTY) && StringUtils.equals(pageIndex, EMPTY))
                fileList = PdfService.iteration(destination, Integer.parseInt(dpi), renderer, fileList, fileName, 0,
                        doc.getNumberOfPages(), pageIndex);
            if (StringUtils.equals(fromPage, EMPTY) && !StringUtils.equals(toPage, EMPTY) && StringUtils.equals(pageIndex, EMPTY))
                fileList = PdfService.iteration(destination, Integer.parseInt(dpi), renderer, fileList, fileName, 0, Integer.parseInt(toPage),
                        pageIndex);
            if (!StringUtils.equals(fromPage, EMPTY) && !StringUtils.equals(toPage, EMPTY) && StringUtils.equals(pageIndex, EMPTY)) {
                int fromPageImp = Integer.parseInt(fromPage);
                fileList = PdfService.iteration(destination, Integer.parseInt(dpi), renderer, fileList, fileName, --fromPageImp, Integer.parseInt(toPage),
                        pageIndex);
            }
            if (!StringUtils.equals(pageIndex, EMPTY))
                fileList = iteration(destination, Integer.parseInt(dpi), renderer, fileList, fileName, 0, 0, pageIndex);

            doc.close();
            return fileList;
        }
        throw new Exception(FILE_NOT_EXISTS);
    }

    private static List<File> iteration(String destination, Integer dpi, PDFRenderer renderer, List<File> fileList,
                                        String fileName, Integer fromPage, Integer toPage, String pageIndex)
            throws IOException {
        if (!pageIndex.equals(EMPTY)) {
            final String[] arrSplit = pageIndex.split(COMMA);
            for (String anArrSplit : arrSplit) {
                int val = Integer.parseInt(anArrSplit);
                fileCreation(destination, dpi, renderer, fileList, fileName, --val);
            }
        } else {
            for (int i = fromPage; i < toPage; i++) {
                // default image files path: original file path
                // if necessary, file.getParent() + "/" => another path
                fileCreation(destination, dpi, renderer, fileList, fileName, i);
            }
        }
        return fileList;
    }

    private static void fileCreation(String destination, Integer dpi, PDFRenderer renderer, List<File> fileList,
                                     String fileName, Integer i) throws IOException {
        File fileTemp = new File(destination + fileName + UNDERSCORE +
                RandomStringUtils.randomAlphanumeric(15).toUpperCase() + PNG_EXTENSION); // jpg or png
        BufferedImage image = renderer.renderImageWithDPI(i, dpi);
        // if necessary, change 200 into another integer.
        ImageIO.write(image, PNG, fileTemp); // JPEG or PNG
        fileList.add(fileTemp);
    }
}