/*
 * Decompiled with CFR 0.152.
 */
package edu.berkeley.nlp.lm.io;

import edu.berkeley.nlp.lm.WordIndexer;
import edu.berkeley.nlp.lm.io.ArpaLmReaderCallback;
import edu.berkeley.nlp.lm.io.IOUtils;
import edu.berkeley.nlp.lm.io.LmReader;
import edu.berkeley.nlp.lm.util.Logger;
import edu.berkeley.nlp.lm.values.ProbBackoffPair;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class ArpaLmReader<W>
implements LmReader<ProbBackoffPair, ArpaLmReaderCallback<ProbBackoffPair>> {
    public static final String START_SYMBOL = "<s>";
    public static final String END_SYMBOL = "</s>";
    public static final String UNK_SYMBOL = "<unk>";
    private BufferedReader reader;
    private int currentNGramLength = 1;
    int currentNGramCount = 0;
    private int lineNumber = 1;
    private final WordIndexer<W> wordIndexer;
    private final int maxOrder;
    private final String file;

    protected String readLine() throws IOException {
        ++this.lineNumber;
        return this.reader.readLine();
    }

    public ArpaLmReader(String file, WordIndexer<W> wordIndexer, int maxNgramOrder) {
        this.file = file;
        this.wordIndexer = wordIndexer;
        this.maxOrder = maxNgramOrder;
    }

    @Override
    public void parse(ArpaLmReaderCallback<ProbBackoffPair> callback) {
        this.currentNGramLength = 1;
        this.currentNGramCount = 0;
        this.lineNumber = 1;
        this.reader = IOUtils.openInHard(this.file);
        Logger.startTrack("Parsing ARPA language model file", new Object[0]);
        List<Long> numNGrams = this.parseHeader();
        callback.initWithLengths(numNGrams);
        this.parseNGrams(callback);
        Logger.endTrack();
        callback.cleanup();
        this.wordIndexer.setStartSymbol(this.wordIndexer.getWord(this.wordIndexer.getOrAddIndexFromString(START_SYMBOL)));
        this.wordIndexer.setEndSymbol(this.wordIndexer.getWord(this.wordIndexer.getOrAddIndexFromString(END_SYMBOL)));
        this.wordIndexer.setUnkSymbol(this.wordIndexer.getWord(this.wordIndexer.getOrAddIndexFromString(UNK_SYMBOL)));
    }

    protected List<Long> parseHeader() {
        ArrayList<Long> numEachNgrams = new ArrayList<Long>();
        try {
            String readLine = null;
            while ((readLine = this.readLine()) != null) {
                String ngramTotalPrefix = "ngram ";
                if (readLine.startsWith("ngram ")) {
                    int equalsIndex = readLine.indexOf(61);
                    assert (equalsIndex >= 0);
                    long currNumNGrams = Long.parseLong(readLine.substring(equalsIndex + 1));
                    if (numEachNgrams.size() < this.maxOrder) {
                        numEachNgrams.add(currNumNGrams);
                    }
                }
                if (!readLine.contains("\\1-grams:")) continue;
                return numEachNgrams;
            }
        }
        catch (NumberFormatException e) {
            throw new RuntimeException(e);
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
        throw new RuntimeException("Something wrong with I/O.");
    }

    protected void parseNGrams(ArpaLmReaderCallback<ProbBackoffPair> callback) {
        int currLine = 0;
        Logger.startTrack("Reading 1-grams", new Object[0]);
        callback.handleNgramOrderStarted(this.currentNGramLength);
        try {
            String line = null;
            int[] ngramScratch = new int[this.currentNGramLength];
            while ((line = this.reader.readLine()) != null) {
                if (currLine % 100000 == 0) {
                    Logger.logs("Read " + currLine + " lines");
                }
                ++currLine;
                if (line.length() == 0) continue;
                if (line.charAt(0) == '\\') {
                    if (line.startsWith("\\end")) continue;
                    Logger.logs(this.currentNGramCount + " " + this.currentNGramLength + "-gram read.");
                    Logger.endTrack();
                    callback.handleNgramOrderFinished(this.currentNGramLength);
                    ++this.currentNGramLength;
                    if (this.currentNGramLength > this.maxOrder) {
                        return;
                    }
                    ngramScratch = new int[this.currentNGramLength];
                    this.currentNGramCount = 0;
                    callback.handleNgramOrderStarted(this.currentNGramLength);
                    Logger.startTrack("Reading " + this.currentNGramLength + "-grams", new Object[0]);
                    continue;
                }
                this.parseLine(callback, line, ngramScratch);
            }
            this.reader.close();
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
        Logger.endTrack();
        callback.handleNgramOrderFinished(this.currentNGramLength);
    }

    private void parseLine(ArpaLmReaderCallback<ProbBackoffPair> callback, String line, int[] ngram) {
        int firstTab = line.indexOf(9);
        int secondTab = line.indexOf(9, firstTab + 1);
        boolean hasBackOff = secondTab >= 0;
        int length = line.length();
        this.parseNGram(line, firstTab + 1, secondTab < 0 ? length : secondTab, ngram);
        String logProbString = line.substring(0, firstTab);
        float logProbability = Float.parseFloat(logProbString);
        float backoff = 0.0f;
        if (hasBackOff) {
            backoff = Float.parseFloat(line.substring(secondTab + 1, length));
        }
        if ((double)logProbability > 0.0) {
            throw new RuntimeException("Bad ARPA line " + line);
        }
        callback.call(ngram, 0, ngram.length, new ProbBackoffPair(logProbability, backoff), line);
        ++this.currentNGramCount;
    }

    private void parseNGram(String string, int start, int stringLength, int[] retVal) {
        int k = 0;
        int spaceIndex = start;
        while (true) {
            int nextIndex;
            String currWord = string.substring(spaceIndex, (nextIndex = string.indexOf(32, spaceIndex)) < 0 ? stringLength : nextIndex);
            retVal[k++] = this.wordIndexer.getOrAddIndexFromString(currWord);
            if (nextIndex < 0) break;
            spaceIndex = nextIndex + 1;
        }
    }
}

