/*
 * Decompiled with CFR 0.152.
 */
package morfologik.tools;

import com.beust.jcommander.Parameter;
import com.beust.jcommander.Parameters;
import com.beust.jcommander.ParametersDelegate;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import morfologik.fsa.FSA;
import morfologik.fsa.builders.FSABuilder;
import morfologik.fsa.builders.FSASerializer;
import morfologik.stemming.BufferUtils;
import morfologik.stemming.DictionaryMetadata;
import morfologik.stemming.ISequenceEncoder;
import morfologik.tools.BinaryInput;
import morfologik.tools.CliTool;
import morfologik.tools.ExitStatus;
import morfologik.tools.ExitStatusException;
import morfologik.tools.SerializationFormat;
import morfologik.tools.ValidateFileExists;

@Parameters(commandNames={"dict_compile"}, commandDescription="Compiles a morphological dictionary automaton.")
public class DictCompile
extends CliTool {
    @Parameter(names={"-i", "--input"}, description="The input file (base,inflected,tag). An associated metadata (*.info) file must exist.", required=true, validateValueWith=ValidateFileExists.class)
    private Path input;
    @Parameter(names={"--validate"}, arity=1, description="Validate input to make sure it makes sense.")
    private boolean validate = true;
    @Parameter(names={"-f", "--format"}, description="Automaton serialization format.")
    private SerializationFormat format = SerializationFormat.FSA5;
    @Parameter(names={"--overwrite"}, description="Overwrite the output file if it exists.")
    private boolean overwrite;
    @ParametersDelegate
    private final BinaryInput binaryInput;

    DictCompile() {
        this.binaryInput = new BinaryInput();
    }

    public DictCompile(Path input, boolean overwrite, boolean validate, boolean acceptBom, boolean acceptCr, boolean ignoreEmpty) {
        this.input = DictCompile.checkNotNull(input);
        this.overwrite = overwrite;
        this.validate = validate;
        this.binaryInput = new BinaryInput(acceptBom, acceptCr, ignoreEmpty);
    }

    @Override
    public ExitStatus call() throws Exception {
        DictionaryMetadata metadata;
        Path metadataPath = DictionaryMetadata.getExpectedMetadataLocation((Path)this.input);
        if (!Files.isRegularFile(metadataPath, new LinkOption[0])) {
            System.err.println("Dictionary metadata file for the input does not exist: " + metadataPath);
            System.err.println("The metadata file (with at least the column separator and byte encoding) is required. Check out the examples.");
            return ExitStatus.ERROR_OTHER;
        }
        Path output = metadataPath.resolveSibling(metadataPath.getFileName().toString().replaceAll("\\.info$", ".dict"));
        if (!this.overwrite && Files.exists(output, new LinkOption[0])) {
            throw new ExitStatusException(ExitStatus.ERROR_CONFIRMATION_REQUIRED, "Output dictionary file already exists: %s, use %s to override.", output, "--overwrite");
        }
        try (BufferedInputStream is = new BufferedInputStream(Files.newInputStream(metadataPath, new OpenOption[0]));){
            metadata = DictionaryMetadata.read((InputStream)is);
        }
        List<byte[]> sequences = this.binaryInput.readBinarySequences(this.input, (byte)10);
        CharsetDecoder charsetDecoder = metadata.getDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
        byte separator = metadata.getSeparator();
        ISequenceEncoder sequenceEncoder = metadata.getSequenceEncoderType().get();
        if (!sequences.isEmpty()) {
            Iterator<byte[]> i = sequences.iterator();
            byte[] row = i.next();
            int separatorCount = DictCompile.countOf(separator, row);
            if (separatorCount < 1 || separatorCount > 2) {
                throw new ExitStatusException(ExitStatus.ERROR_OTHER, "Invalid input. Each row must consist of [base,inflected,tag?] columns, where ',' is a separator character (declared as: %s). This row contains %d separator characters: %s", Character.isJavaIdentifierPart(metadata.getSeparatorAsChar()) ? "'" + Character.toString(metadata.getSeparatorAsChar()) + "'" : "0x" + Integer.toHexString(separator & 0xFF), separatorCount, new String(row, charsetDecoder.charset()));
            }
            while (i.hasNext()) {
                row = i.next();
                int count = DictCompile.countOf(separator, row);
                if (count == separatorCount) continue;
                throw new ExitStatusException(ExitStatus.ERROR_OTHER, "The number of separators (%d) is inconsistent with previous lines: %s", count, new String(row, charsetDecoder.charset()));
            }
        }
        ByteBuffer encoded = ByteBuffer.allocate(0);
        ByteBuffer source = ByteBuffer.allocate(0);
        ByteBuffer target = ByteBuffer.allocate(0);
        ByteBuffer tag = ByteBuffer.allocate(0);
        ByteBuffer assembled = ByteBuffer.allocate(0);
        int max = sequences.size();
        for (int i = 0; i < max; ++i) {
            int sep1;
            byte[] row = sequences.get(i);
            int sep2 = DictCompile.indexOf(separator, row, (sep1 = DictCompile.indexOf(separator, row, 0)) + 1);
            if (sep2 < 0) {
                sep2 = row.length;
            }
            source = BufferUtils.clearAndEnsureCapacity((ByteBuffer)source, (int)sep1);
            source.put(row, 0, sep1);
            source.flip();
            int len = sep2 - (sep1 + 1);
            target = BufferUtils.clearAndEnsureCapacity((ByteBuffer)target, (int)len);
            target.put(row, sep1 + 1, len);
            target.flip();
            int len2 = row.length - (sep2 + 1);
            tag = BufferUtils.clearAndEnsureCapacity((ByteBuffer)tag, (int)len2);
            if (len2 > 0) {
                tag.put(row, sep2 + 1, len2);
            }
            tag.flip();
            encoded = sequenceEncoder.encode(encoded, target, source);
            assembled = BufferUtils.clearAndEnsureCapacity((ByteBuffer)assembled, (int)(target.remaining() + 1 + encoded.remaining() + 1 + tag.remaining()));
            assembled.put(target);
            assembled.put(separator);
            assembled.put(encoded);
            if (tag.hasRemaining()) {
                assembled.put(separator);
                assembled.put(tag);
            }
            assembled.flip();
            sequences.set(i, BufferUtils.toArray((ByteBuffer)assembled));
        }
        Collections.sort(sequences, FSABuilder.LEXICAL_ORDERING);
        FSA fsa = FSABuilder.build(sequences);
        FSASerializer serializer = this.format.getSerializer();
        try (BufferedOutputStream os = new BufferedOutputStream(Files.newOutputStream(output, new OpenOption[0]));){
            serializer.serialize(fsa, (OutputStream)os);
        }
        return ExitStatus.SUCCESS;
    }

    private static int countOf(byte separator, byte[] row) {
        int cnt = 0;
        int i = row.length;
        while (--i >= 0) {
            if (row[i] != separator) continue;
            ++cnt;
        }
        return cnt;
    }

    private static int indexOf(byte separator, byte[] row, int fromIndex) {
        while (fromIndex < row.length) {
            if (row[fromIndex] == separator) {
                return fromIndex;
            }
            ++fromIndex;
        }
        return -1;
    }

    public static void main(String[] args) {
        DictCompile.main(args, (CliTool)new DictCompile());
    }
}

