/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.pipeline.ChunkAnnotationUtils;
import edu.stanford.nlp.process.WordToSentenceProcessor;
import edu.stanford.nlp.util.ArrayUtils;
import edu.stanford.nlp.util.CoreMap;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;

public class WordsToSentencesAnnotator
implements Annotator {
    private final WordToSentenceProcessor<CoreLabel> wts;
    private final boolean VERBOSE;
    private final boolean countLineNumbers;

    public WordsToSentencesAnnotator() {
        this(false);
    }

    public WordsToSentencesAnnotator(boolean verbose) {
        this(verbose, false, new WordToSentenceProcessor<CoreLabel>());
    }

    public WordsToSentencesAnnotator(boolean verbose, String boundaryTokenRegex, Set<String> boundaryToDiscard, Set<String> htmlElementsToDiscard, String newlineIsSentenceBreak) {
        this(verbose, false, new WordToSentenceProcessor<CoreLabel>(boundaryTokenRegex, boundaryToDiscard, htmlElementsToDiscard, WordToSentenceProcessor.stringToNewlineIsSentenceBreak(newlineIsSentenceBreak)));
    }

    public WordsToSentencesAnnotator(boolean verbose, String boundaryTokenRegex, Set<String> boundaryToDiscard, Set<String> htmlElementsToDiscard, String newlineIsSentenceBreak, String boundaryMultiTokenRegex, Set<String> tokenRegexesToDiscard) {
        this(verbose, false, new WordToSentenceProcessor<CoreMap>(boundaryTokenRegex, boundaryToDiscard, htmlElementsToDiscard, WordToSentenceProcessor.stringToNewlineIsSentenceBreak(newlineIsSentenceBreak), boundaryMultiTokenRegex != null ? TokenSequencePattern.compile(boundaryMultiTokenRegex) : null, tokenRegexesToDiscard));
    }

    private WordsToSentencesAnnotator(boolean verbose, boolean countLineNumbers, WordToSentenceProcessor<CoreLabel> wts) {
        this.VERBOSE = verbose;
        this.countLineNumbers = countLineNumbers;
        this.wts = wts;
    }

    public static WordsToSentencesAnnotator newlineSplitter(boolean verbose, String ... nlToken) {
        WordToSentenceProcessor<CoreLabel> wts = new WordToSentenceProcessor<CoreLabel>(ArrayUtils.asImmutableSet(nlToken));
        return new WordsToSentencesAnnotator(verbose, true, wts);
    }

    public static WordsToSentencesAnnotator nonSplitter(boolean verbose) {
        WordToSentenceProcessor<CoreLabel> wts = new WordToSentenceProcessor<CoreLabel>(true);
        return new WordsToSentencesAnnotator(verbose, false, wts);
    }

    @Override
    public void annotate(Annotation annotation) {
        if (this.VERBOSE) {
            System.err.print("Sentence splitting ...");
        }
        if (!annotation.has(CoreAnnotations.TokensAnnotation.class)) {
            throw new IllegalArgumentException("WordsToSentencesAnnotator: unable to find words/tokens in: " + annotation);
        }
        String text = (String)annotation.get(CoreAnnotations.TextAnnotation.class);
        List tokens = (List)annotation.get(CoreAnnotations.TokensAnnotation.class);
        int tokenOffset = 0;
        int lineNumber = 0;
        CoreMap sectionAnnotations = null;
        ArrayList<Annotation> sentences = new ArrayList<Annotation>();
        for (List<CoreLabel> sentenceTokens : this.wts.process(tokens)) {
            String sectionEnd;
            if (this.countLineNumbers) {
                ++lineNumber;
            }
            if (sentenceTokens.isEmpty()) {
                if (this.countLineNumbers) continue;
                throw new IllegalStateException("unexpected empty sentence: " + sentenceTokens);
            }
            int begin = (Integer)sentenceTokens.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
            int last = sentenceTokens.size() - 1;
            int end = (Integer)sentenceTokens.get(last).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
            String sentenceText = text.substring(begin, end);
            Annotation sentence = new Annotation(sentenceText);
            sentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, begin);
            sentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, end);
            sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
            sentence.set(CoreAnnotations.TokenBeginAnnotation.class, tokenOffset);
            sentence.set(CoreAnnotations.TokenEndAnnotation.class, tokenOffset += sentenceTokens.size());
            sentence.set(CoreAnnotations.SentenceIndexAnnotation.class, sentences.size());
            if (this.countLineNumbers) {
                sentence.set(CoreAnnotations.LineNumberAnnotation.class, lineNumber);
            }
            CoreLabel sentenceStartToken = sentenceTokens.get(0);
            CoreLabel sentenceEndToken = sentenceTokens.get(sentenceTokens.size() - 1);
            CoreMap sectionStart = (CoreMap)sentenceStartToken.get(CoreAnnotations.SectionStartAnnotation.class);
            if (sectionStart != null) {
                sectionAnnotations = sectionStart;
            }
            if (sectionAnnotations != null) {
                ChunkAnnotationUtils.copyUnsetAnnotations(sectionAnnotations, sentence);
            }
            if ((sectionEnd = (String)sentenceEndToken.get(CoreAnnotations.SectionEndAnnotation.class)) != null) {
                sectionAnnotations = null;
            }
            sentences.add(sentence);
        }
        annotation.set(CoreAnnotations.SentencesAnnotation.class, sentences);
    }

    @Override
    public Set<Annotator.Requirement> requires() {
        return Collections.singleton(TOKENIZE_REQUIREMENT);
    }

    @Override
    public Set<Annotator.Requirement> requirementsSatisfied() {
        return Collections.singleton(SSPLIT_REQUIREMENT);
    }
}

