/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.international.spanish.process;

import edu.stanford.nlp.international.spanish.SpanishVerbStripper;
import edu.stanford.nlp.international.spanish.process.SpanishLexer;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.process.AbstractTokenizer;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.LexedTokenFactory;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.regex.Pattern;

public class SpanishTokenizer<T extends HasWord>
extends AbstractTokenizer<T> {
    private static Redwood.RedwoodChannels log = Redwood.channels(SpanishTokenizer.class);
    private final SpanishLexer lexer;
    private final boolean splitCompounds;
    private final boolean splitVerbs;
    private final boolean splitContractions;
    private final boolean splitAny;
    private List<CoreLabel> compoundBuffer;
    private SpanishVerbStripper verbStripper;
    public static final String ANCORA_OPTIONS = "ptb3Ellipsis=true,normalizeParentheses=true,ptb3Dashes=false,splitAll=true";
    private static final Pattern pDash = Pattern.compile("\\-");
    private static final Pattern pSpace = Pattern.compile("\\s+");

    public SpanishTokenizer(Reader r, LexedTokenFactory<T> tf, Properties lexerProperties, boolean splitCompounds, boolean splitVerbs, boolean splitContractions) {
        this.lexer = new SpanishLexer(r, tf, lexerProperties);
        this.splitCompounds = splitCompounds;
        this.splitVerbs = splitVerbs;
        this.splitContractions = splitContractions;
        boolean bl = this.splitAny = splitCompounds || splitVerbs || splitContractions;
        if (this.splitAny) {
            this.compoundBuffer = Generics.newArrayList(4);
        }
        if (splitVerbs) {
            this.verbStripper = SpanishVerbStripper.getInstance();
        }
    }

    @Override
    protected T getNext() {
        try {
            CoreLabel cl;
            HasWord nextToken;
            do {
                HasWord hasWord = nextToken = this.splitAny && !this.compoundBuffer.isEmpty() ? (HasWord)this.compoundBuffer.remove(0) : (HasWord)this.lexer.next();
            } while (nextToken != null && nextToken.word().isEmpty());
            if (this.splitAny && nextToken instanceof CoreLabel && (cl = (CoreLabel)nextToken).containsKey(CoreAnnotations.ParentAnnotation.class)) {
                if (this.splitCompounds && ((String)cl.get(CoreAnnotations.ParentAnnotation.class)).equals("comp")) {
                    nextToken = this.processCompound(cl);
                } else if (this.splitVerbs && ((String)cl.get(CoreAnnotations.ParentAnnotation.class)).equals("vb_pn_attached")) {
                    nextToken = this.processVerb(cl);
                } else if (this.splitContractions && ((String)cl.get(CoreAnnotations.ParentAnnotation.class)).equals("contraction")) {
                    nextToken = this.processContraction(cl);
                }
            }
            return (T)nextToken;
        }
        catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }

    private static CoreLabel copyCoreLabel(CoreLabel cl, String part, int beginPosition, int endPosition) {
        CoreLabel newLabel = new CoreLabel(cl);
        newLabel.setWord(part);
        newLabel.setValue(part);
        newLabel.setBeginPosition(beginPosition);
        newLabel.setEndPosition(endPosition);
        newLabel.set(CoreAnnotations.OriginalTextAnnotation.class, part);
        return newLabel;
    }

    private static CoreLabel copyCoreLabel(CoreLabel cl, String part, int beginPosition) {
        return SpanishTokenizer.copyCoreLabel(cl, part, beginPosition, beginPosition + part.length());
    }

    private CoreLabel processContraction(CoreLabel cl) {
        String second;
        String first;
        String lowered;
        cl.remove(CoreAnnotations.ParentAnnotation.class);
        String word = cl.word();
        int secondOffset = 0;
        int secondLength = 0;
        switch (lowered = word.toLowerCase()) {
            case "del": 
            case "al": {
                first = word.substring(0, lowered.length() - 1);
                char lastChar = word.charAt(lowered.length() - 1);
                second = Character.isLowerCase(lastChar) ? "el" : "EL";
                secondOffset = 1;
                secondLength = lowered.length() - 1;
                break;
            }
            case "conmigo": 
            case "consigo": {
                first = word.substring(0, 3);
                second = word.charAt(3) + "\u00ed";
                secondOffset = 3;
                secondLength = 4;
                break;
            }
            case "contigo": {
                first = word.substring(0, 3);
                second = word.substring(3, 5);
                secondOffset = 3;
                secondLength = 4;
                break;
            }
            default: {
                throw new IllegalArgumentException("Invalid contraction provided to processContraction");
            }
        }
        int secondStart = cl.beginPosition() + secondOffset;
        int secondEnd = secondStart + secondLength;
        this.compoundBuffer.add(SpanishTokenizer.copyCoreLabel(cl, second, secondStart, secondEnd));
        return SpanishTokenizer.copyCoreLabel(cl, first, cl.beginPosition(), secondStart);
    }

    private CoreLabel processVerb(CoreLabel cl) {
        cl.remove(CoreAnnotations.ParentAnnotation.class);
        SpanishVerbStripper.StrippedVerb stripped = this.verbStripper.separatePronouns(cl.word());
        if (stripped == null) {
            return cl;
        }
        int stemEnd = cl.beginPosition() + stripped.getOriginalStem().length();
        int lengthRemoved = 0;
        for (String pronoun : stripped.getPronouns()) {
            int beginOffset = stemEnd + lengthRemoved;
            this.compoundBuffer.add(SpanishTokenizer.copyCoreLabel(cl, pronoun, beginOffset));
            lengthRemoved += pronoun.length();
        }
        CoreLabel stem = SpanishTokenizer.copyCoreLabel(cl, stripped.getStem(), cl.beginPosition(), stemEnd);
        stem.setOriginalText(stripped.getOriginalStem());
        return stem;
    }

    private CoreLabel processCompound(CoreLabel cl) {
        cl.remove(CoreAnnotations.ParentAnnotation.class);
        String[] parts = pSpace.split(pDash.matcher(cl.word()).replaceAll(" - "));
        int lengthAccum = 0;
        for (String part : parts) {
            CoreLabel newLabel = new CoreLabel(cl);
            newLabel.setWord(part);
            newLabel.setValue(part);
            newLabel.setBeginPosition(cl.beginPosition() + lengthAccum);
            newLabel.setEndPosition(cl.beginPosition() + lengthAccum + part.length());
            newLabel.set(CoreAnnotations.OriginalTextAnnotation.class, part);
            this.compoundBuffer.add(newLabel);
            lengthAccum += part.length();
        }
        return this.compoundBuffer.remove(0);
    }

    public static <T extends HasWord> TokenizerFactory<T> factory(LexedTokenFactory<T> factory, String options) {
        return new SpanishTokenizerFactory(factory, options);
    }

    public static <T extends HasWord> TokenizerFactory<T> factory(LexedTokenFactory<T> factory) {
        return new SpanishTokenizerFactory(factory, ANCORA_OPTIONS);
    }

    public static TokenizerFactory<CoreLabel> ancoraFactory() {
        TokenizerFactory<CoreLabel> tf = SpanishTokenizerFactory.newCoreLabelTokenizerFactory();
        tf.setOptions(ANCORA_OPTIONS);
        return tf;
    }

    public static TokenizerFactory<CoreLabel> coreLabelFactory() {
        return SpanishTokenizerFactory.newCoreLabelTokenizerFactory();
    }

    public static TokenizerFactory<CoreLabel> factory() {
        return SpanishTokenizer.coreLabelFactory();
    }

    private static String usage() {
        StringBuilder sb = new StringBuilder();
        String nl = System.lineSeparator();
        sb.append(String.format("Usage: java %s [OPTIONS] < file%n%n", SpanishTokenizer.class.getName()));
        sb.append("Options:").append(nl);
        sb.append("   -help          : Print this message.").append(nl);
        sb.append("   -ancora        : Tokenization style of AnCora (fixed).").append(nl);
        sb.append("   -lowerCase     : Apply lowercasing.").append(nl);
        sb.append("   -encoding type : Encoding format.").append(nl);
        sb.append("   -options str   : Orthographic options (see SpanishLexer.java)").append(nl);
        sb.append("   -tokens        : Output tokens as line-separated instead of space-separated.").append(nl);
        sb.append("   -onePerLine    : Output tokens one per line.").append(nl);
        return sb.toString();
    }

    private static Map<String, Integer> argOptionDefs() {
        Map<String, Integer> argOptionDefs = Generics.newHashMap();
        argOptionDefs.put("help", 0);
        argOptionDefs.put("ftb", 0);
        argOptionDefs.put("ancora", 0);
        argOptionDefs.put("lowerCase", 0);
        argOptionDefs.put("encoding", 1);
        argOptionDefs.put("options", 1);
        argOptionDefs.put("tokens", 0);
        return argOptionDefs;
    }

    public static void main(String[] args) {
        boolean tokens;
        String orthoOptions;
        Properties options = StringUtils.argsToProperties(args, SpanishTokenizer.argOptionDefs());
        if (options.containsKey("help")) {
            log.info(SpanishTokenizer.usage());
            return;
        }
        TokenizerFactory<CoreLabel> tf = SpanishTokenizer.coreLabelFactory();
        String string = orthoOptions = options.containsKey("ancora") ? ANCORA_OPTIONS : "";
        if (options.containsKey("options")) {
            String string2 = orthoOptions = orthoOptions.isEmpty() ? options.getProperty("options") : orthoOptions + ',' + options;
        }
        if (!(tokens = PropertiesUtils.getBool(options, "tokens", false))) {
            orthoOptions = orthoOptions.isEmpty() ? "tokenizeNLs" : orthoOptions + ",tokenizeNLs";
        }
        tf.setOptions(orthoOptions);
        String encoding = options.getProperty("encoding", "UTF-8");
        boolean toLower = PropertiesUtils.getBool(options, "lowerCase", false);
        Locale es = new Locale("es");
        boolean onePerLine = PropertiesUtils.getBool(options, "onePerLine", false);
        int nLines = 0;
        int nTokens = 0;
        long startTime = System.nanoTime();
        try {
            Tokenizer<CoreLabel> tokenizer = tf.getTokenizer(new InputStreamReader(System.in, encoding));
            boolean printSpace = false;
            while (tokenizer.hasNext()) {
                String outputToken;
                ++nTokens;
                String word = tokenizer.next().word();
                if (word.equals("*NL*")) {
                    ++nLines;
                    System.out.println();
                    if (onePerLine) continue;
                    printSpace = false;
                    continue;
                }
                String string3 = outputToken = toLower ? word.toLowerCase(es) : word;
                if (onePerLine) {
                    System.out.println(outputToken);
                    continue;
                }
                if (printSpace) {
                    System.out.print(" ");
                }
                System.out.print(outputToken);
                printSpace = true;
            }
        }
        catch (UnsupportedEncodingException e) {
            throw new RuntimeIOException("Bad character encoding", e);
        }
        long elapsedTime = System.nanoTime() - startTime;
        double linesPerSec = (double)nLines / ((double)elapsedTime / 1.0E9);
        System.err.printf("Done! Tokenized %d lines (%d tokens) at %.2f lines/sec%n", nLines, nTokens, linesPerSec);
    }

    public static class SpanishTokenizerFactory<T extends HasWord>
    implements TokenizerFactory<T>,
    Serializable {
        private static final long serialVersionUID = 946818805507187330L;
        protected final LexedTokenFactory<T> factory;
        protected Properties lexerProperties = new Properties();
        protected boolean splitCompoundOption = false;
        protected boolean splitVerbOption = false;
        protected boolean splitContractionOption = false;

        public static TokenizerFactory<CoreLabel> newCoreLabelTokenizerFactory() {
            return new SpanishTokenizerFactory<CoreLabel>(new CoreLabelTokenFactory());
        }

        public static <T extends HasWord> SpanishTokenizerFactory<T> newSpanishTokenizerFactory(LexedTokenFactory<T> factory, String options) {
            return new SpanishTokenizerFactory<T>(factory, options);
        }

        private SpanishTokenizerFactory(LexedTokenFactory<T> factory) {
            this.factory = factory;
        }

        private SpanishTokenizerFactory(LexedTokenFactory<T> factory, String options) {
            this.factory = factory;
            this.setOptions(options);
        }

        @Override
        public Iterator<T> getIterator(Reader r) {
            return this.getTokenizer(r);
        }

        @Override
        public Tokenizer<T> getTokenizer(Reader r) {
            return new SpanishTokenizer<T>(r, this.factory, this.lexerProperties, this.splitCompoundOption, this.splitVerbOption, this.splitContractionOption);
        }

        @Override
        public void setOptions(String options) {
            String[] optionList;
            if (options == null) {
                return;
            }
            for (String option : optionList = options.split(",")) {
                String[] fields = option.split("=");
                if (fields.length == 1) {
                    switch (fields[0]) {
                        case "splitAll": {
                            this.splitCompoundOption = true;
                            this.splitVerbOption = true;
                            this.splitContractionOption = true;
                            break;
                        }
                        case "splitCompounds": {
                            this.splitCompoundOption = true;
                            break;
                        }
                        case "splitVerbs": {
                            this.splitVerbOption = true;
                            break;
                        }
                        case "splitContractions": {
                            this.splitContractionOption = true;
                            break;
                        }
                        default: {
                            this.lexerProperties.setProperty(option, "true");
                            break;
                        }
                    }
                    continue;
                }
                if (fields.length == 2) {
                    switch (fields[0]) {
                        case "splitAll": {
                            this.splitCompoundOption = Boolean.valueOf(fields[1]);
                            this.splitVerbOption = Boolean.valueOf(fields[1]);
                            this.splitContractionOption = Boolean.valueOf(fields[1]);
                            break;
                        }
                        case "splitCompounds": {
                            this.splitCompoundOption = Boolean.valueOf(fields[1]);
                            break;
                        }
                        case "splitVerbs": {
                            this.splitVerbOption = Boolean.valueOf(fields[1]);
                            break;
                        }
                        case "splitContractions": {
                            this.splitContractionOption = Boolean.valueOf(fields[1]);
                            break;
                        }
                        default: {
                            this.lexerProperties.setProperty(fields[0], fields[1]);
                            break;
                        }
                    }
                    continue;
                }
                System.err.printf("%s: Bad option %s%n", this.getClass().getName(), option);
            }
        }

        @Override
        public Tokenizer<T> getTokenizer(Reader r, String extraOptions) {
            this.setOptions(extraOptions);
            return this.getTokenizer(r);
        }
    }
}

