/*
 * Decompiled with CFR 0.152.
 */
package org.apache.solr.handler.dataimport;

import java.io.File;
import java.io.InputStream;
import java.io.StringWriter;
import java.io.Writer;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.io.IOUtils;
import org.apache.solr.handler.dataimport.Context;
import org.apache.solr.handler.dataimport.DataImportHandlerException;
import org.apache.solr.handler.dataimport.DataSource;
import org.apache.solr.handler.dataimport.EntityProcessorBase;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.EmptyParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.html.HtmlMapper;
import org.apache.tika.parser.html.IdentityHtmlMapper;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ContentHandlerDecorator;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class TikaEntityProcessor
extends EntityProcessorBase {
    private static Parser EMPTY_PARSER = new EmptyParser();
    private TikaConfig tikaConfig;
    private String format = "text";
    private boolean done = false;
    private boolean extractEmbedded = false;
    private String parser;
    static final String AUTO_PARSER = "org.apache.tika.parser.AutoDetectParser";
    private String htmlMapper;
    private String spatialMetadataField;

    public void init(Context context) {
        super.init(context);
        this.done = false;
    }

    protected void firstInit(Context context) {
        block16: {
            super.firstInit(context);
            try {
                String tikaConfigFile = context.getResolvedEntityAttribute("tikaConfig");
                if (tikaConfigFile == null) {
                    ClassLoader classLoader = context.getSolrCore().getResourceLoader().getClassLoader();
                    try (InputStream is = classLoader.getResourceAsStream("solr-default-tika-config.xml");){
                        this.tikaConfig = new TikaConfig(is);
                        break block16;
                    }
                }
                File configFile = new File(tikaConfigFile);
                if (!configFile.isAbsolute()) {
                    configFile = new File(context.getSolrCore().getResourceLoader().getConfigDir(), tikaConfigFile);
                }
                this.tikaConfig = new TikaConfig(configFile);
            }
            catch (Exception e) {
                DataImportHandlerException.wrapAndThrow((int)500, (Exception)e, (String)"Unable to load Tika Config");
            }
        }
        String extractEmbeddedString = context.getResolvedEntityAttribute("extractEmbedded");
        if ("true".equals(extractEmbeddedString)) {
            this.extractEmbedded = true;
        }
        this.format = context.getResolvedEntityAttribute("format");
        if (this.format == null) {
            this.format = "text";
        }
        if (!("html".equals(this.format) || "xml".equals(this.format) || "text".equals(this.format) || "none".equals(this.format))) {
            throw new DataImportHandlerException(500, "'format' can be one of text|html|xml|none");
        }
        this.htmlMapper = context.getResolvedEntityAttribute("htmlMapper");
        if (this.htmlMapper == null) {
            this.htmlMapper = "default";
        }
        if (!"default".equals(this.htmlMapper) && !"identity".equals(this.htmlMapper)) {
            throw new DataImportHandlerException(500, "'htmlMapper', if present, must be 'default' or 'identity'");
        }
        this.parser = context.getResolvedEntityAttribute("parser");
        if (this.parser == null) {
            this.parser = AUTO_PARSER;
        }
        this.spatialMetadataField = context.getResolvedEntityAttribute("spatialMetadataField");
    }

    public Map<String, Object> nextRow() {
        if (this.done) {
            return null;
        }
        HashMap<String, Object> row = new HashMap<String, Object>();
        DataSource dataSource = this.context.getDataSource();
        InputStream is = (InputStream)dataSource.getData(this.context.getResolvedEntityAttribute("url"));
        ContentHandler contentHandler = null;
        Metadata metadata = new Metadata();
        StringWriter sw = new StringWriter();
        try {
            if ("html".equals(this.format)) {
                contentHandler = TikaEntityProcessor.getHtmlHandler(sw);
            } else if ("xml".equals(this.format)) {
                contentHandler = TikaEntityProcessor.getXmlContentHandler(sw);
            } else if ("text".equals(this.format)) {
                contentHandler = TikaEntityProcessor.getTextContentHandler(sw);
            } else if ("none".equals(this.format)) {
                contentHandler = new DefaultHandler();
            }
        }
        catch (TransformerConfigurationException e) {
            DataImportHandlerException.wrapAndThrow((int)500, (Exception)e, (String)"Unable to create content handler");
        }
        Object tikaParser = null;
        tikaParser = this.parser.equals(AUTO_PARSER) ? new AutoDetectParser(this.tikaConfig) : (Parser)this.context.getSolrCore().getResourceLoader().newInstance(this.parser, Parser.class);
        try {
            ParseContext context = new ParseContext();
            if ("identity".equals(this.htmlMapper)) {
                context.set(HtmlMapper.class, (Object)IdentityHtmlMapper.INSTANCE);
            }
            if (this.extractEmbedded) {
                context.set(Parser.class, tikaParser);
            } else {
                context.set(Parser.class, (Object)EMPTY_PARSER);
            }
            tikaParser.parse(is, contentHandler, metadata, context);
        }
        catch (Exception e) {
            if ("skip".equals(this.onError)) {
                throw new DataImportHandlerException(301, "Document skipped :" + e.getMessage());
            }
            DataImportHandlerException.wrapAndThrow((int)500, (Exception)e, (String)"Unable to read content");
        }
        IOUtils.closeQuietly((InputStream)is);
        for (Map field : this.context.getAllEntityFields()) {
            String col;
            String s;
            if (!"true".equals(field.get("meta")) || (s = metadata.get(col = (String)field.get("column"))) == null) continue;
            row.put(col, s);
        }
        if (!"none".equals(this.format)) {
            row.put("text", sw.toString());
        }
        this.tryToAddLatLon(metadata, row);
        this.done = true;
        return row;
    }

    private void tryToAddLatLon(Metadata metadata, Map<String, Object> row) {
        if (this.spatialMetadataField == null) {
            return;
        }
        String latString = metadata.get(Metadata.LATITUDE);
        String lonString = metadata.get(Metadata.LONGITUDE);
        if (latString != null && lonString != null) {
            row.put(this.spatialMetadataField, String.format(Locale.ROOT, "%s,%s", latString, lonString));
        }
    }

    private static ContentHandler getHtmlHandler(Writer writer) throws TransformerConfigurationException {
        SAXTransformerFactory factory = (SAXTransformerFactory)TransformerFactory.newInstance();
        TransformerHandler handler = factory.newTransformerHandler();
        handler.getTransformer().setOutputProperty("method", "html");
        handler.setResult(new StreamResult(writer));
        return new ContentHandlerDecorator(handler){

            public void startElement(String uri, String localName, String name, Attributes atts) throws SAXException {
                if ("http://www.w3.org/1999/xhtml".equals(uri)) {
                    uri = null;
                }
                if (!"head".equals(localName)) {
                    super.startElement(uri, localName, name, atts);
                }
            }

            public void endElement(String uri, String localName, String name) throws SAXException {
                if ("http://www.w3.org/1999/xhtml".equals(uri)) {
                    uri = null;
                }
                if (!"head".equals(localName)) {
                    super.endElement(uri, localName, name);
                }
            }

            public void startPrefixMapping(String prefix, String uri) {
            }

            public void endPrefixMapping(String prefix) {
            }
        };
    }

    private static ContentHandler getTextContentHandler(Writer writer) {
        return new BodyContentHandler(writer);
    }

    private static ContentHandler getXmlContentHandler(Writer writer) throws TransformerConfigurationException {
        SAXTransformerFactory factory = (SAXTransformerFactory)TransformerFactory.newInstance();
        TransformerHandler handler = factory.newTransformerHandler();
        handler.getTransformer().setOutputProperty("method", "xml");
        handler.setResult(new StreamResult(writer));
        return handler;
    }
}

