/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.tagging.disambiguation;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedToken;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.JLanguageTool;
import org.languagetool.tagging.disambiguation.AbstractDisambiguator;
import org.languagetool.tools.StringTools;

public class MultiWordChunker
extends AbstractDisambiguator {
    private final String filename;
    private final boolean allowFirstCapitalized;
    private Map<String, Integer> mStartSpace;
    private Map<String, Integer> mStartNoSpace;
    private Map<String, String> mFull;

    public MultiWordChunker(String filename) {
        this(filename, false);
    }

    public MultiWordChunker(String filename, boolean allowFirstCapitalized) {
        this.filename = filename;
        this.allowFirstCapitalized = allowFirstCapitalized;
    }

    private void lazyInit() {
        if (this.mStartSpace != null) {
            return;
        }
        HashMap<String, Integer> mStartSpace = new HashMap<String, Integer>();
        HashMap<String, Integer> mStartNoSpace = new HashMap<String, Integer>();
        HashMap<String, String> mFull = new HashMap<String, String>();
        try (InputStream stream = JLanguageTool.getDataBroker().getFromResourceDirAsStream(this.filename);){
            List<String> posTokens = this.loadWords(stream);
            for (String posToken : posTokens) {
                String firstToken;
                String[] firstTokens;
                boolean containsSpace;
                String[] tokenAndTag = posToken.split("\t");
                if (tokenAndTag.length != 2) {
                    throw new RuntimeException("Invalid format in " + this.filename + ": '" + posToken + "', expected two tab-separated parts");
                }
                boolean bl = containsSpace = tokenAndTag[0].indexOf(32) > 0;
                if (!containsSpace) {
                    firstTokens = new String[tokenAndTag[0].length()];
                    firstToken = tokenAndTag[0].substring(0, 1);
                    for (int i = 1; i < tokenAndTag[0].length(); ++i) {
                        firstTokens[i] = tokenAndTag[0].substring(i - 1, i);
                    }
                    if (mStartNoSpace.containsKey(firstToken)) {
                        if ((Integer)mStartNoSpace.get(firstToken) < firstTokens.length) {
                            mStartNoSpace.put(firstToken, firstTokens.length);
                        }
                    } else {
                        mStartNoSpace.put(firstToken, firstTokens.length);
                    }
                } else {
                    firstTokens = tokenAndTag[0].split(" ");
                    firstToken = firstTokens[0];
                    if (mStartSpace.containsKey(firstToken)) {
                        if ((Integer)mStartSpace.get(firstToken) < firstTokens.length) {
                            mStartSpace.put(firstToken, firstTokens.length);
                        }
                    } else {
                        mStartSpace.put(firstToken, firstTokens.length);
                    }
                }
                mFull.put(tokenAndTag[0], tokenAndTag[1]);
            }
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
        this.mStartSpace = mStartSpace;
        this.mStartNoSpace = mStartNoSpace;
        this.mFull = mFull;
    }

    @Override
    public final AnalyzedSentence disambiguate(AnalyzedSentence input) {
        AnalyzedTokenReadings[] anTokens;
        this.lazyInit();
        AnalyzedTokenReadings[] output = anTokens = input.getTokens();
        for (int i = 0; i < anTokens.length; ++i) {
            String tok = output[i].getToken();
            if (tok.length() < 1) continue;
            if (i + 1 < anTokens.length && !anTokens[i + 1].isWhitespace()) {
                tok = tok + output[i + 1].getToken();
            }
            int myCount = 0;
            while (myCount < 2) {
                StringBuilder tokens = new StringBuilder();
                int finalLen = 0;
                if (this.mStartSpace.containsKey(tok)) {
                    int len = this.mStartSpace.get(tok);
                    int j = i;
                    int lenCounter = 0;
                    while (j < anTokens.length) {
                        if (!anTokens[j].isWhitespace()) {
                            if (j == i && myCount == 1) {
                                tokens.append(anTokens[j].getToken().toLowerCase());
                            } else {
                                tokens.append(anTokens[j].getToken());
                            }
                            String toks = tokens.toString();
                            if (this.mFull.containsKey(toks)) {
                                output[i] = this.prepareNewReading(toks, output[i].getToken(), output[i], false);
                                output[finalLen] = this.prepareNewReading(toks, anTokens[finalLen].getToken(), output[finalLen], true);
                            }
                        } else {
                            if (j > 1 && !anTokens[j - 1].isWhitespace()) {
                                tokens.append(' ');
                                ++lenCounter;
                            }
                            if (lenCounter == len) break;
                        }
                        finalLen = ++j;
                    }
                }
                if (this.mStartNoSpace.containsKey(tok.substring(0, 1))) {
                    for (int j = i; j < anTokens.length && !anTokens[j].isWhitespace(); ++j) {
                        if (j == i && myCount == 1) {
                            tokens.append(anTokens[j].getToken().toLowerCase());
                        } else {
                            tokens.append(anTokens[j].getToken());
                        }
                        String toks = tokens.toString();
                        if (!this.mFull.containsKey(toks)) continue;
                        output[i] = this.prepareNewReading(toks, anTokens[i].getToken(), output[i], false);
                        output[j] = this.prepareNewReading(toks, anTokens[j].getToken(), output[j], true);
                    }
                }
                if (this.allowFirstCapitalized && StringTools.isCapitalizedWord(tok) && ++myCount == 1) {
                    tok = tok.toLowerCase();
                    continue;
                }
                myCount = 2;
            }
        }
        return new AnalyzedSentence(output);
    }

    private AnalyzedTokenReadings prepareNewReading(String tokens, String tok, AnalyzedTokenReadings token, boolean isLast) {
        StringBuilder sb = new StringBuilder();
        sb.append('<');
        if (isLast) {
            sb.append('/');
        }
        sb.append(this.mFull.get(tokens));
        sb.append('>');
        AnalyzedToken tokenStart = new AnalyzedToken(tok, sb.toString(), tokens);
        return this.setAndAnnotate(token, tokenStart);
    }

    private AnalyzedTokenReadings setAndAnnotate(AnalyzedTokenReadings oldReading, AnalyzedToken newReading) {
        String old = oldReading.toString();
        String prevAnot = oldReading.getHistoricalAnnotations();
        AnalyzedTokenReadings newAtr = new AnalyzedTokenReadings(oldReading.getReadings(), oldReading.getStartPos());
        newAtr.setWhitespaceBefore(oldReading.isWhitespaceBefore());
        newAtr.addReading(newReading);
        newAtr.setHistoricalAnnotations(this.annotateToken(prevAnot, old, newAtr.toString()));
        newAtr.setChunkTags(oldReading.getChunkTags());
        return newAtr;
    }

    private String annotateToken(String prevAnot, String oldReading, String newReading) {
        return prevAnot + "\nMULTIWORD_CHUNKER: " + oldReading + " -> " + newReading;
    }

    private List<String> loadWords(InputStream stream) {
        ArrayList<String> lines = new ArrayList<String>();
        try (BufferedReader reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));){
            String line;
            while ((line = reader.readLine()) != null) {
                if ((line = line.trim()).isEmpty() || line.charAt(0) == '#') continue;
                lines.add(line);
            }
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
        return lines;
    }
}

