/*
 * Decompiled with CFR 0.152.
 */
package org.opensearch.neuralsearch.processor;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.cluster.service.ClusterService;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexService;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.analysis.AnalysisRegistry;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.indices.IndicesService;
import org.opensearch.ingest.AbstractProcessor;
import org.opensearch.ingest.IngestDocument;
import org.opensearch.neuralsearch.processor.chunker.Chunker;
import org.opensearch.neuralsearch.processor.chunker.ChunkerFactory;
import org.opensearch.neuralsearch.processor.chunker.ChunkerParameterParser;

public final class TextChunkingProcessor
extends AbstractProcessor {
    public static final String TYPE = "text_chunking";
    public static final String FIELD_MAP_FIELD = "field_map";
    public static final String ALGORITHM_FIELD = "algorithm";
    private static final String DEFAULT_ALGORITHM = "fixed_token_length";
    private int maxChunkLimit;
    private Chunker chunker;
    private final Map<String, Object> fieldMap;
    private final ClusterService clusterService;
    private final IndicesService indicesService;
    private final AnalysisRegistry analysisRegistry;
    private final Environment environment;

    public TextChunkingProcessor(String tag, String description, Map<String, Object> fieldMap, Map<String, Object> algorithmMap, Environment environment, ClusterService clusterService, IndicesService indicesService, AnalysisRegistry analysisRegistry) {
        super(tag, description);
        this.fieldMap = fieldMap;
        this.environment = environment;
        this.clusterService = clusterService;
        this.indicesService = indicesService;
        this.analysisRegistry = analysisRegistry;
        this.parseAlgorithmMap(algorithmMap);
    }

    public String getType() {
        return TYPE;
    }

    private void parseAlgorithmMap(Map<String, Object> algorithmMap) {
        Object algorithmValue;
        String algorithmKey;
        if (algorithmMap.size() > 1) {
            throw new IllegalArgumentException(String.format(Locale.ROOT, "Unable to create %s processor as [%s] contains multiple algorithms", TYPE, ALGORITHM_FIELD));
        }
        if (algorithmMap.isEmpty()) {
            algorithmKey = DEFAULT_ALGORITHM;
            algorithmValue = new HashMap();
        } else {
            Map.Entry<String, Object> algorithmEntry = algorithmMap.entrySet().iterator().next();
            algorithmKey = algorithmEntry.getKey();
            algorithmValue = algorithmEntry.getValue();
            if (!(algorithmValue instanceof Map)) {
                throw new IllegalArgumentException(String.format(Locale.ROOT, "Unable to create %s processor as parameters for [%s] algorithm must be an object", TYPE, algorithmKey));
            }
        }
        if (!ChunkerFactory.CHUNKER_ALGORITHMS.contains(algorithmKey)) {
            throw new IllegalArgumentException(String.format(Locale.ROOT, "Chunking algorithm [%s] is not supported. Supported chunking algorithms are %s", algorithmKey, ChunkerFactory.CHUNKER_ALGORITHMS));
        }
        Map chunkerParameters = (Map)algorithmValue;
        this.maxChunkLimit = ChunkerParameterParser.parseIntegerParameter(chunkerParameters, "max_chunk_limit", 100);
        if (this.maxChunkLimit < 0 && this.maxChunkLimit != -1) {
            throw new IllegalArgumentException(String.format(Locale.ROOT, "Parameter [%s] must be positive or %s to disable this parameter", "max_chunk_limit", -1));
        }
        chunkerParameters.put("analysis_registry", this.analysisRegistry);
        this.chunker = ChunkerFactory.create(algorithmKey, chunkerParameters);
    }

    private boolean isListOfString(Object value) {
        if (!(value instanceof List)) {
            return false;
        }
        for (Object element : (List)value) {
            if (element instanceof String) continue;
            return false;
        }
        return true;
    }

    private int getMaxTokenCount(Map<String, Object> sourceAndMetadataMap) {
        String indexName = sourceAndMetadataMap.get("_index").toString();
        IndexMetadata indexMetadata = this.clusterService.state().metadata().index(indexName);
        if (Objects.isNull(indexMetadata)) {
            return (Integer)IndexSettings.MAX_TOKEN_COUNT_SETTING.get(this.environment.settings());
        }
        IndexService indexService = this.indicesService.indexServiceSafe(indexMetadata.getIndex());
        return indexService.getIndexSettings().getMaxTokenCount();
    }

    public IngestDocument execute(IngestDocument ingestDocument) {
        Map sourceAndMetadataMap = ingestDocument.getSourceAndMetadata();
        this.validateFieldsValue(sourceAndMetadataMap);
        HashMap<String, Object> runtimeParameters = new HashMap<String, Object>();
        int maxTokenCount = this.getMaxTokenCount(sourceAndMetadataMap);
        runtimeParameters.put("max_token_count", maxTokenCount);
        runtimeParameters.put("max_chunk_limit", this.maxChunkLimit);
        this.chunkMapType(sourceAndMetadataMap, this.fieldMap, runtimeParameters, 0);
        return ingestDocument;
    }

    private void validateFieldsValue(Map<String, Object> sourceAndMetadataMap) {
        for (Map.Entry<String, Object> embeddingFieldsEntry : this.fieldMap.entrySet()) {
            Object sourceValue = sourceAndMetadataMap.get(embeddingFieldsEntry.getKey());
            if (!Objects.nonNull(sourceValue)) continue;
            String sourceKey = embeddingFieldsEntry.getKey();
            if (sourceValue instanceof List || sourceValue instanceof Map) {
                this.validateNestedTypeValue(sourceKey, sourceValue, 1);
                continue;
            }
            if (sourceValue instanceof String) continue;
            throw new IllegalArgumentException(String.format(Locale.ROOT, "field [%s] is neither string nor nested type, cannot process it", sourceKey));
        }
    }

    private void validateNestedTypeValue(String sourceKey, Object sourceValue, int maxDepth) {
        if ((long)maxDepth > (Long)MapperService.INDEX_MAPPING_DEPTH_LIMIT_SETTING.get(this.environment.settings())) {
            throw new IllegalArgumentException(String.format(Locale.ROOT, "map type field [%s] reached max depth limit, cannot process it", sourceKey));
        }
        if (sourceValue instanceof List) {
            this.validateListTypeValue(sourceKey, sourceValue, maxDepth);
        } else if (sourceValue instanceof Map) {
            ((Map)sourceValue).values().stream().filter(Objects::nonNull).forEach(x -> this.validateNestedTypeValue(sourceKey, x, maxDepth + 1));
        } else if (!(sourceValue instanceof String)) {
            throw new IllegalArgumentException(String.format(Locale.ROOT, "map type field [%s] has non-string type, cannot process it", sourceKey));
        }
    }

    private void validateListTypeValue(String sourceKey, Object sourceValue, int maxDepth) {
        for (Object value : (List)sourceValue) {
            if (value instanceof Map) {
                this.validateNestedTypeValue(sourceKey, value, maxDepth + 1);
                continue;
            }
            if (value == null) {
                throw new IllegalArgumentException(String.format(Locale.ROOT, "list type field [%s] has null, cannot process it", sourceKey));
            }
            if (value instanceof String) continue;
            throw new IllegalArgumentException(String.format(Locale.ROOT, "list type field [%s] has non-string value, cannot process it", sourceKey));
        }
    }

    private int chunkMapType(Map<String, Object> sourceAndMetadataMap, Map<String, Object> fieldMap, Map<String, Object> runtimeParameters, int chunkCount) {
        int updatedChunkCount = chunkCount;
        for (Map.Entry<String, Object> fieldMapEntry : fieldMap.entrySet()) {
            String originalKey = fieldMapEntry.getKey();
            Object targetKey = fieldMapEntry.getValue();
            if (targetKey instanceof Map) {
                Object sourceObject = sourceAndMetadataMap.get(originalKey);
                if (sourceObject instanceof List) {
                    List sourceObjectList = (List)sourceObject;
                    for (Object source : sourceObjectList) {
                        if (!(source instanceof Map)) continue;
                        updatedChunkCount = this.chunkMapType((Map)source, (Map)targetKey, runtimeParameters, updatedChunkCount);
                    }
                    continue;
                }
                if (!(sourceObject instanceof Map)) continue;
                updatedChunkCount = this.chunkMapType((Map)sourceObject, (Map)targetKey, runtimeParameters, updatedChunkCount);
                continue;
            }
            Object chunkObject = sourceAndMetadataMap.get(originalKey);
            List<String> chunkedResult = this.chunkLeafType(chunkObject, runtimeParameters);
            sourceAndMetadataMap.put(String.valueOf(targetKey), chunkedResult);
        }
        return updatedChunkCount;
    }

    private List<String> chunkString(String content, Map<String, Object> runTimeParameters) {
        List<String> contentResult = this.chunker.chunk(content, runTimeParameters);
        int runtimeMaxChunkLimit = ChunkerParameterParser.parseIntegerParameter(runTimeParameters, "max_chunk_limit", this.maxChunkLimit);
        if (runtimeMaxChunkLimit != -1) {
            runTimeParameters.put("max_chunk_limit", runtimeMaxChunkLimit - contentResult.size());
        }
        return contentResult;
    }

    private List<String> chunkList(List<String> contentList, Map<String, Object> runTimeParameters) {
        ArrayList<String> result = new ArrayList<String>();
        for (String content : contentList) {
            result.addAll(this.chunkString(content, runTimeParameters));
        }
        return result;
    }

    private List<String> chunkLeafType(Object value, Map<String, Object> runTimeParameters) {
        List<String> result = new ArrayList<String>();
        if (value instanceof String) {
            result = this.chunkString(value.toString(), runTimeParameters);
        } else if (this.isListOfString(value)) {
            result = this.chunkList((List)value, runTimeParameters);
        }
        return result;
    }
}

