/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.rules.ngrams;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.ResourceBundle;
import java.util.Set;
import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedToken;
import org.languagetool.Experimental;
import org.languagetool.Language;
import org.languagetool.languagemodel.LanguageModel;
import org.languagetool.rules.Categories;
import org.languagetool.rules.ITSIssueType;
import org.languagetool.rules.Rule;
import org.languagetool.rules.RuleMatch;
import org.languagetool.rules.ngrams.GoogleToken;
import org.languagetool.rules.ngrams.Probability;
import org.languagetool.rules.patterns.PatternRule;
import org.languagetool.rules.patterns.PatternToken;
import org.languagetool.synthesis.Synthesizer;
import org.languagetool.tokenizers.Tokenizer;

@Experimental
public class NgramProbabilityRule
extends Rule {
    public static final String RULE_ID = "NGRAM_RULE";
    private static final boolean DEBUG = false;
    private static final List<Replacement> REPLACEMENTS = Collections.unmodifiableList(Arrays.asList(new Replacement("VBG", "VB"), new Replacement("VBG", "VBN"), new Replacement("VB", "VBG"), new Replacement("VB", "VBZ"), new Replacement("VB", "VBN"), new Replacement("VBZ", "VB"), new Replacement("VBZ", "VBP"), new Replacement("NNS", "NN"), new Replacement("NN", "NNS")));
    private static final List<AdvancedReplacement> ADV_REPLACEMENTS = Collections.unmodifiableList(Arrays.asList(new AdvancedReplacement[0]));
    private final LanguageModel lm;
    private final Language language;
    private double minProbability = 1.0E-14;

    public NgramProbabilityRule(ResourceBundle messages, LanguageModel languageModel, Language language) {
        super(messages);
        this.setCategory(Categories.TYPOS.getCategory(messages));
        this.setLocQualityIssueType(ITSIssueType.NonConformance);
        this.lm = Objects.requireNonNull(languageModel);
        this.language = Objects.requireNonNull(language);
    }

    @Override
    public String getId() {
        return RULE_ID;
    }

    @Experimental
    public void setMinProbability(double minProbability) {
        this.minProbability = minProbability;
    }

    @Override
    public RuleMatch[] match(AnalyzedSentence sentence) throws IOException {
        List<GoogleToken> tokens = GoogleToken.getGoogleTokens(sentence, true, this.getGoogleStyleWordTokenizer());
        ArrayList<RuleMatch> matches = new ArrayList<RuleMatch>();
        GoogleToken prevPrevToken = null;
        GoogleToken prevToken = null;
        int i = 0;
        for (GoogleToken googleToken : tokens) {
            String token = googleToken.token;
            if (prevPrevToken != null && prevToken != null && i < tokens.size() - 1) {
                GoogleToken next = tokens.get(i + 1);
                Probability p = this.lm.getPseudoProbability(Arrays.asList(prevToken.token, token, next.token));
                String ngram = prevToken + " " + token + " " + next.token;
                double prob = p.getProb();
                if (prob < this.minProbability) {
                    Alternatives betterAlternatives = this.getBetterAlternatives(prevToken, token, next, googleToken, p, sentence);
                    if (!betterAlternatives.alternativesConsidered || betterAlternatives.alternatives.size() > 0) {
                        String message = "The phrase '" + ngram + "' rarely occurs in the reference corpus (" + p.getOccurrences() + " times)";
                        RuleMatch match = new RuleMatch((Rule)this, sentence, prevToken.startPos, next.endPos, message);
                        ArrayList<String> suggestions = new ArrayList<String>();
                        for (Alternative betterAlternative : betterAlternatives.alternatives) {
                            suggestions.add(prevToken.token + " " + betterAlternative.token + " " + next.token);
                        }
                        match.setSuggestedReplacements(suggestions);
                        if (this.acceptMatch(match, p, sentence)) {
                            matches.add(match);
                        }
                    } else {
                        this.debug("Ignoring match as all alternatives are less probable: '%s' in '%s'\n", ngram, sentence.getText());
                    }
                }
            }
            prevPrevToken = prevToken;
            prevToken = googleToken;
            ++i;
        }
        return matches.toArray(new RuleMatch[matches.size()]);
    }

    protected boolean acceptMatch(RuleMatch match, Probability p, AnalyzedSentence sentence) {
        return true;
    }

    private Alternatives getBetterAlternatives(GoogleToken prevToken, String token, GoogleToken next, GoogleToken googleToken, Probability p, AnalyzedSentence sentence) throws IOException {
        ArrayList<Alternative> betterAlternatives = new ArrayList<Alternative>();
        boolean alternativesConsidered = false;
        for (Replacement replacement : REPLACEMENTS) {
            Optional<List<Alternative>> alternatives = this.getBetterAlternatives(replacement, prevToken, googleToken, next, p);
            if (!alternatives.isPresent()) continue;
            betterAlternatives.addAll((Collection<Alternative>)alternatives.get());
            alternativesConsidered = true;
        }
        for (AdvancedReplacement advReplacement : ADV_REPLACEMENTS) {
            RuleMatch[] matches;
            PatternRule rule = new PatternRule("tmpId", this.language, advReplacement.patternTokens, "unused_description", "unused_message", "unused_shortMessage");
            for (RuleMatch match : matches = rule.match(sentence)) {
                if (googleToken.startPos <= match.getFromPos() || googleToken.endPos >= match.getToPos()) continue;
                String replacement = advReplacement.alternativeText.replace("$1", token);
                ArrayList<String> newNgram = new ArrayList<String>();
                newNgram.add(prevToken.token);
                Collections.addAll(newNgram, replacement.split(" "));
                newNgram.add(next.token);
                Probability newProb = this.lm.getPseudoProbability(newNgram);
                if (newProb.getProb() * 1000000.0 > p.getProb()) {
                    betterAlternatives.add(new Alternative(replacement, newProb));
                    this.debug("More probable: %s\n", replacement);
                } else {
                    this.debug("Less probable: %s\n", replacement);
                }
                alternativesConsidered = true;
            }
        }
        return new Alternatives(betterAlternatives, alternativesConsidered);
    }

    private Optional<List<Alternative>> getBetterAlternatives(Replacement replacement, GoogleToken prevToken, GoogleToken token, GoogleToken next, Probability p) throws IOException {
        Synthesizer synthesizer;
        Optional<AnalyzedToken> reading = this.getByPosTag(token.getPosTags(), replacement.tagRegex);
        ArrayList<Alternative> betterAlternatives = new ArrayList<Alternative>();
        if (reading.isPresent() && (synthesizer = this.language.getSynthesizer()) != null) {
            String[] forms;
            for (String alternativeToken : forms = synthesizer.synthesize(new AnalyzedToken(token.token, "not_used", reading.get().getLemma()), replacement.alternativeTag)) {
                if (alternativeToken.equals(token)) continue;
                List<String> ngram = Arrays.asList(prevToken.token, token.token, next.token);
                String[] stringArray = new String[]{prevToken.token, alternativeToken, next.token};
                List<String> alternativeNgram = Arrays.asList(stringArray);
                Probability alternativeProbability = this.lm.getPseudoProbability(alternativeNgram);
                if (alternativeProbability.getProb() >= p.getProb()) {
                    this.debug("More probable alternative to '%s': %s\n", ngram, alternativeNgram);
                    betterAlternatives.add(new Alternative(alternativeToken, alternativeProbability));
                    continue;
                }
                this.debug("Less probable alternative to '%s': %s\n", ngram, alternativeNgram);
            }
            return Optional.of(betterAlternatives);
        }
        return Optional.empty();
    }

    private Optional<AnalyzedToken> getByPosTag(Set<AnalyzedToken> tokens, String wantedPosTagRegex) {
        for (AnalyzedToken token : tokens) {
            if (token.getPOSTag() == null || !token.getPOSTag().matches(wantedPosTagRegex)) continue;
            return Optional.of(token);
        }
        return Optional.empty();
    }

    @Override
    public String getDescription() {
        return "Assume errors for phrases (ngrams) that occur rarely in a reference index";
    }

    protected Tokenizer getGoogleStyleWordTokenizer() {
        return this.language.getWordTokenizer();
    }

    private void debug(String message, Object ... vars) {
    }

    class Alternatives {
        final List<Alternative> alternatives;
        final boolean alternativesConsidered;

        Alternatives(List<Alternative> alternatives, boolean alternativesConsidered) {
            this.alternatives = alternatives;
            this.alternativesConsidered = alternativesConsidered;
        }
    }

    class Alternative {
        final String token;
        final Probability p;

        Alternative(String token, Probability p) {
            this.token = token;
            this.p = p;
        }
    }

    static class AdvancedReplacement {
        final List<PatternToken> patternTokens;
        final String alternativeText;

        AdvancedReplacement(List<PatternToken> patternTokens, String alternativeText) {
            this.patternTokens = patternTokens;
            this.alternativeText = alternativeText;
        }
    }

    static class Replacement {
        final String tagRegex;
        final String alternativeTag;

        Replacement(String tagRegex, String alternativeTag) {
            this.tagRegex = tagRegex;
            this.alternativeTag = alternativeTag;
        }
    }
}

