/*
 * Decompiled with CFR 0.152.
 */
package org.apache.pdfbox;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Map;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDDocumentNameDictionary;
import org.apache.pdfbox.pdmodel.PDEmbeddedFilesNameTreeNode;
import org.apache.pdfbox.pdmodel.common.COSObjectable;
import org.apache.pdfbox.pdmodel.common.filespecification.PDComplexFileSpecification;
import org.apache.pdfbox.pdmodel.common.filespecification.PDEmbeddedFile;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
import org.apache.pdfbox.util.PDFText2HTML;
import org.apache.pdfbox.util.PDFTextStripper;

public class ExtractText {
    private static final String PASSWORD = "-password";
    private static final String ENCODING = "-encoding";
    private static final String CONSOLE = "-console";
    private static final String START_PAGE = "-startPage";
    private static final String END_PAGE = "-endPage";
    private static final String SORT = "-sort";
    private static final String IGNORE_BEADS = "-ignoreBeads";
    private static final String DEBUG = "-debug";
    private static final String HTML = "-html";
    private static final String FORCE = "-force";
    private static final String NONSEQ = "-nonSeq";
    private boolean debug = false;

    private ExtractText() {
    }

    public static void main(String[] args) throws Exception {
        ExtractText extractor = new ExtractText();
        extractor.startExtraction(args);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void startExtraction(String[] args) throws Exception {
        boolean toConsole = false;
        boolean toHTML = false;
        boolean force = false;
        boolean sort = false;
        boolean separateBeads = true;
        boolean useNonSeqParser = false;
        String password = "";
        String encoding = null;
        String pdfFile = null;
        String outputFile = null;
        String ext = ".txt";
        int startPage = 1;
        int endPage = Integer.MAX_VALUE;
        for (int i = 0; i < args.length; ++i) {
            if (args[i].equals(PASSWORD)) {
                if (++i >= args.length) {
                    ExtractText.usage();
                }
                password = args[i];
                continue;
            }
            if (args[i].equals(ENCODING)) {
                if (++i >= args.length) {
                    ExtractText.usage();
                }
                encoding = args[i];
                continue;
            }
            if (args[i].equals(START_PAGE)) {
                if (++i >= args.length) {
                    ExtractText.usage();
                }
                startPage = Integer.parseInt(args[i]);
                continue;
            }
            if (args[i].equals(HTML)) {
                toHTML = true;
                ext = ".html";
                continue;
            }
            if (args[i].equals(SORT)) {
                sort = true;
                continue;
            }
            if (args[i].equals(IGNORE_BEADS)) {
                separateBeads = false;
                continue;
            }
            if (args[i].equals(DEBUG)) {
                this.debug = true;
                continue;
            }
            if (args[i].equals(END_PAGE)) {
                if (++i >= args.length) {
                    ExtractText.usage();
                }
                endPage = Integer.parseInt(args[i]);
                continue;
            }
            if (args[i].equals(CONSOLE)) {
                toConsole = true;
                continue;
            }
            if (args[i].equals(FORCE)) {
                force = true;
                continue;
            }
            if (args[i].equals(NONSEQ)) {
                useNonSeqParser = true;
                continue;
            }
            if (pdfFile == null) {
                pdfFile = args[i];
                continue;
            }
            outputFile = args[i];
        }
        if (pdfFile == null) {
            ExtractText.usage();
        } else {
            Writer output = null;
            PDDocument document = null;
            try {
                Map<String, COSObjectable> embeddedFileNames;
                PDEmbeddedFilesNameTreeNode embeddedFiles;
                long startTime = this.startProcessing("Loading PDF " + pdfFile);
                if (outputFile == null && pdfFile.length() > 4) {
                    outputFile = new File(pdfFile.substring(0, pdfFile.length() - 4) + ext).getAbsolutePath();
                }
                if (useNonSeqParser) {
                    document = PDDocument.loadNonSeq(new File(pdfFile), null, password);
                } else {
                    document = PDDocument.load(pdfFile, force);
                    if (document.isEncrypted()) {
                        StandardDecryptionMaterial sdm = new StandardDecryptionMaterial(password);
                        document.openProtection(sdm);
                    }
                }
                AccessPermission ap = document.getCurrentAccessPermission();
                if (!ap.canExtractContent()) {
                    throw new IOException("You do not have permission to extract text");
                }
                this.stopProcessing("Time for loading: ", startTime);
                if (encoding == null && toHTML) {
                    encoding = "UTF-8";
                }
                output = toConsole ? new OutputStreamWriter(System.out) : (encoding != null ? new OutputStreamWriter((OutputStream)new FileOutputStream(outputFile), encoding) : new OutputStreamWriter(new FileOutputStream(outputFile)));
                PDFTextStripper stripper = null;
                stripper = toHTML ? new PDFText2HTML(encoding) : new PDFTextStripper(encoding);
                stripper.setForceParsing(force);
                stripper.setSortByPosition(sort);
                stripper.setShouldSeparateByBeads(separateBeads);
                stripper.setStartPage(startPage);
                stripper.setEndPage(endPage);
                startTime = this.startProcessing("Starting text extraction");
                if (this.debug) {
                    System.err.println("Writing to " + outputFile);
                }
                stripper.writeText(document, output);
                PDDocumentCatalog catalog = document.getDocumentCatalog();
                PDDocumentNameDictionary names = catalog.getNames();
                if (names != null && (embeddedFiles = names.getEmbeddedFiles()) != null && (embeddedFileNames = embeddedFiles.getNames()) != null) {
                    for (Map.Entry<String, COSObjectable> ent : embeddedFileNames.entrySet()) {
                        PDComplexFileSpecification spec;
                        PDEmbeddedFile file;
                        if (this.debug) {
                            System.err.println("Processing embedded file " + ent.getKey() + ":");
                        }
                        if ((file = (spec = (PDComplexFileSpecification)ent.getValue()).getEmbeddedFile()) == null || !"application/pdf".equals(file.getSubtype())) continue;
                        if (this.debug) {
                            System.err.println("  is PDF (size=" + file.getSize() + ")");
                        }
                        InputStream fis = file.createInputStream();
                        PDDocument subDoc = null;
                        try {
                            subDoc = PDDocument.load(fis);
                        }
                        finally {
                            fis.close();
                        }
                        try {
                            stripper.writeText(subDoc, output);
                        }
                        finally {
                            subDoc.close();
                        }
                    }
                }
                this.stopProcessing("Time for extraction: ", startTime);
            }
            finally {
                if (output != null) {
                    output.close();
                }
                if (document != null) {
                    document.close();
                }
            }
        }
    }

    private long startProcessing(String message) {
        if (this.debug) {
            System.err.println(message);
        }
        return System.currentTimeMillis();
    }

    private void stopProcessing(String message, long startTime) {
        if (this.debug) {
            long stopTime = System.currentTimeMillis();
            float elapsedTime = (float)(stopTime - startTime) / 1000.0f;
            System.err.println(message + elapsedTime + " seconds");
        }
    }

    private static void usage() {
        System.err.println("Usage: java -jar pdfbox-app-x.y.z.jar ExtractText [OPTIONS] <PDF file> [Text File]\n  -password  <password>        Password to decrypt document\n  -encoding  <output encoding> (ISO-8859-1,UTF-16BE,UTF-16LE,...)\n  -console                     Send text to console instead of file\n  -html                        Output in HTML format instead of raw text\n  -sort                        Sort the text before writing\n  -ignoreBeads                 Disables the separation by beads\n  -force                       Enables pdfbox to ignore corrupt objects\n  -debug                       Enables debug output about the time consumption of every stage\n  -startPage <number>          The first page to start extraction(1 based)\n  -endPage <number>            The last page to extract(inclusive)\n  -nonSeq                      Enables the new non-sequential parser\n  <PDF file>                   The PDF document to use\n  [Text File]                  The file to write the text to\n");
        System.exit(1);
    }
}

