/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.international.spanish.process;

import edu.stanford.nlp.international.spanish.SpanishVerbStripper;
import edu.stanford.nlp.international.spanish.process.SpanishLexer;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.process.AbstractTokenizer;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.LexedTokenFactory;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.StringUtils;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;

public class SpanishTokenizer<T extends HasWord>
extends AbstractTokenizer<T> {
    private final SpanishLexer lexer;
    private final boolean splitCompounds;
    private final boolean splitVerbs;
    private final boolean splitContractions;
    private final boolean splitAny;
    private List<CoreLabel> compoundBuffer;
    private SpanishVerbStripper verbStripper;
    public static final String ANCORA_OPTS = "ptb3Ellipsis=true,normalizeParentheses=true,ptb3Dashes=false,splitAll=true";

    public SpanishTokenizer(Reader r, LexedTokenFactory<T> tf, Properties lexerProperties, boolean splitCompounds, boolean splitVerbs, boolean splitContractions) {
        this.lexer = new SpanishLexer(r, tf, lexerProperties);
        this.splitCompounds = splitCompounds;
        this.splitVerbs = splitVerbs;
        this.splitContractions = splitContractions;
        boolean bl = this.splitAny = splitCompounds || splitVerbs || splitContractions;
        if (this.splitAny) {
            this.compoundBuffer = Generics.newLinkedList();
        }
        this.verbStripper = SpanishVerbStripper.getInstance();
    }

    @Override
    protected T getNext() {
        try {
            CoreLabel cl;
            HasWord nextToken = null;
            do {
                HasWord hasWord = nextToken = this.splitAny && this.compoundBuffer.size() > 0 ? (HasWord)this.compoundBuffer.remove(0) : (HasWord)this.lexer.next();
            } while (nextToken != null && nextToken.word().length() == 0);
            if (this.splitAny && nextToken instanceof CoreLabel && (cl = (CoreLabel)nextToken).containsKey(CoreAnnotations.ParentAnnotation.class)) {
                if (this.splitCompounds && ((String)cl.get(CoreAnnotations.ParentAnnotation.class)).equals("comp")) {
                    nextToken = this.processCompound(cl);
                } else if (this.splitVerbs && ((String)cl.get(CoreAnnotations.ParentAnnotation.class)).equals("vb_pn_attached")) {
                    nextToken = this.processVerb(cl);
                } else if (this.splitContractions && ((String)cl.get(CoreAnnotations.ParentAnnotation.class)).equals("contraction")) {
                    nextToken = this.processContraction(cl);
                }
            }
            return (T)nextToken;
        }
        catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }

    private CoreLabel copyCoreLabel(CoreLabel cl, String part) {
        CoreLabel newLabel = new CoreLabel(cl);
        newLabel.setWord(part);
        newLabel.setValue(part);
        newLabel.set(CoreAnnotations.OriginalTextAnnotation.class, part);
        return newLabel;
    }

    private CoreLabel processContraction(CoreLabel cl) {
        String second;
        String first;
        cl.remove(CoreAnnotations.ParentAnnotation.class);
        String word = cl.word();
        String lowered = word.toLowerCase();
        if (lowered.equals("del") || lowered.equals("al")) {
            first = word.substring(0, lowered.length() - 1);
            char lastChar = word.charAt(lowered.length() - 1);
            second = Character.isLowerCase(lastChar) ? "el" : "EL";
        } else if (lowered.equals("conmigo") || lowered.equals("consigo")) {
            first = word.substring(0, 3);
            second = word.charAt(3) + "\u00ed";
        } else if (lowered.equals("contigo")) {
            first = word.substring(0, 3);
            second = word.substring(3, 5);
        } else {
            throw new IllegalArgumentException("Invalid contraction provided to processContraction");
        }
        this.compoundBuffer.add(this.copyCoreLabel(cl, second));
        return this.copyCoreLabel(cl, first);
    }

    private CoreLabel processVerb(CoreLabel cl) {
        cl.remove(CoreAnnotations.ParentAnnotation.class);
        Pair<String, List<String>> parts = this.verbStripper.separatePronouns(cl.word());
        if (parts == null) {
            return cl;
        }
        for (String pronoun : parts.second()) {
            this.compoundBuffer.add(this.copyCoreLabel(cl, pronoun));
        }
        return this.copyCoreLabel(cl, parts.first());
    }

    private CoreLabel processCompound(CoreLabel cl) {
        String[] parts;
        cl.remove(CoreAnnotations.ParentAnnotation.class);
        for (String part : parts = cl.word().replaceAll("\\-", " - ").split("\\s+")) {
            CoreLabel newLabel = new CoreLabel(cl);
            newLabel.setWord(part);
            newLabel.setValue(part);
            newLabel.set(CoreAnnotations.OriginalTextAnnotation.class, part);
            this.compoundBuffer.add(newLabel);
        }
        return this.compoundBuffer.remove(0);
    }

    public static TokenizerFactory<CoreLabel> coreLabelFactory() {
        return SpanishTokenizerFactory.newCoreLabelTokenizerFactory();
    }

    public static <T extends HasWord> TokenizerFactory<T> factory(LexedTokenFactory<T> factory, String options) {
        return new SpanishTokenizerFactory(factory, options);
    }

    public static <T extends HasWord> TokenizerFactory<T> factory(LexedTokenFactory<T> factory) {
        return new SpanishTokenizerFactory(factory, ANCORA_OPTS);
    }

    private static String usage() {
        StringBuilder sb = new StringBuilder();
        String nl = System.getProperty("line.separator");
        sb.append(String.format("Usage: java %s [OPTIONS] < file%n%n", SpanishTokenizer.class.getName()));
        sb.append("Options:").append(nl);
        sb.append("   -help          : Print this message.").append(nl);
        sb.append("   -ancora        : Tokenization style of AnCora (fixed).").append(nl);
        sb.append("   -lowerCase     : Apply lowercasing.").append(nl);
        sb.append("   -encoding type : Encoding format.").append(nl);
        sb.append("   -orthoOpts str : Orthographic options (see SpanishLexer.java)").append(nl);
        sb.append("   -lines         : Keep tokens as space-separated, not line separated.").append(nl);
        return sb.toString();
    }

    private static Map<String, Integer> argOptionDefs() {
        Map<String, Integer> argOptionDefs = Generics.newHashMap();
        argOptionDefs.put("help", 0);
        argOptionDefs.put("ftb", 0);
        argOptionDefs.put("lowerCase", 0);
        argOptionDefs.put("encoding", 1);
        argOptionDefs.put("orthoOpts", 1);
        argOptionDefs.put("lines", 0);
        return argOptionDefs;
    }

    public static void main(String[] args) {
        Properties options = StringUtils.argsToProperties(args, SpanishTokenizer.argOptionDefs());
        if (options.containsKey("help")) {
            System.err.println(SpanishTokenizer.usage());
            return;
        }
        TokenizerFactory<CoreLabel> tf = SpanishTokenizer.coreLabelFactory();
        if (options.containsKey("ancora")) {
            tf.setOptions(ANCORA_OPTS);
        }
        String orthoOptions = options.getProperty("orthoOpts", "");
        tf.setOptions(orthoOptions);
        tf.setOptions("tokenizeNLs");
        boolean lines = options.containsKey("lines");
        String encoding = options.getProperty("encoding", "UTF-8");
        boolean toLower = PropertiesUtils.getBool(options, "lowerCase", false);
        Locale es = new Locale("es");
        int nLines = 0;
        int nTokens = 0;
        long startTime = System.nanoTime();
        try {
            Tokenizer<CoreLabel> tokenizer = tf.getTokenizer(new InputStreamReader(System.in, encoding));
            boolean printSpace = false;
            while (tokenizer.hasNext()) {
                ++nTokens;
                String word = tokenizer.next().word();
                if (word.equals("*NL*")) {
                    ++nLines;
                    printSpace = false;
                    System.out.println();
                    continue;
                }
                if (printSpace) {
                    if (lines) {
                        System.out.print(" ");
                    } else {
                        System.out.println();
                    }
                }
                String outputToken = toLower ? word.toLowerCase(es) : word;
                System.out.print(outputToken);
                printSpace = true;
            }
        }
        catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        long elapsedTime = System.nanoTime() - startTime;
        double linesPerSec = (double)nLines / ((double)elapsedTime / 1.0E9);
        System.err.printf("Done! Tokenized %d lines (%d tokens) at %.2f lines/sec%n", nLines, nTokens, linesPerSec);
    }

    public static class SpanishTokenizerFactory<T extends HasWord>
    implements TokenizerFactory<T>,
    Serializable {
        private static final long serialVersionUID = 946818805507187330L;
        protected final LexedTokenFactory<T> factory;
        protected Properties lexerProperties = new Properties();
        protected boolean splitCompoundOption = false;
        protected boolean splitVerbOption = false;
        protected boolean splitContractionOption = false;

        public static TokenizerFactory<CoreLabel> newCoreLabelTokenizerFactory() {
            return new SpanishTokenizerFactory<CoreLabel>(new CoreLabelTokenFactory(), SpanishTokenizer.ANCORA_OPTS);
        }

        public static <T extends HasWord> SpanishTokenizerFactory<T> newSpanishTokenizerFactory(LexedTokenFactory<T> factory, String options) {
            return new SpanishTokenizerFactory<T>(factory, options);
        }

        private SpanishTokenizerFactory(LexedTokenFactory<T> factory) {
            this.factory = factory;
            this.setOptions(SpanishTokenizer.ANCORA_OPTS);
        }

        private SpanishTokenizerFactory(LexedTokenFactory<T> factory, String options) {
            this.factory = factory;
            this.setOptions(options);
        }

        @Override
        public Iterator<T> getIterator(Reader r) {
            return this.getTokenizer(r);
        }

        @Override
        public Tokenizer<T> getTokenizer(Reader r) {
            return new SpanishTokenizer<T>(r, this.factory, this.lexerProperties, this.splitCompoundOption, this.splitVerbOption, this.splitContractionOption);
        }

        @Override
        public void setOptions(String options) {
            String[] optionList;
            if (options == null) {
                return;
            }
            for (String option : optionList = options.split(",")) {
                String[] fields = option.split("=");
                if (fields.length == 1) {
                    if (fields[0].equals("splitAll")) {
                        this.splitCompoundOption = true;
                        this.splitVerbOption = true;
                        this.splitContractionOption = true;
                        continue;
                    }
                    if (fields[0].equals("splitCompounds")) {
                        this.splitCompoundOption = true;
                        continue;
                    }
                    if (fields[0].equals("splitVerbs")) {
                        this.splitVerbOption = true;
                        continue;
                    }
                    if (fields[0].equals("splitContractions")) {
                        this.splitContractionOption = true;
                        continue;
                    }
                    this.lexerProperties.put(option, "true");
                    continue;
                }
                if (fields.length == 2) {
                    if (fields[0].equals("splitAll")) {
                        this.splitCompoundOption = Boolean.valueOf(fields[1]);
                        this.splitVerbOption = Boolean.valueOf(fields[1]);
                        this.splitContractionOption = Boolean.valueOf(fields[1]);
                        continue;
                    }
                    if (fields[0].equals("splitCompounds")) {
                        this.splitCompoundOption = Boolean.valueOf(fields[1]);
                        continue;
                    }
                    if (fields[0].equals("splitVerbs")) {
                        this.splitVerbOption = Boolean.valueOf(fields[1]);
                        continue;
                    }
                    if (fields[0].equals("splitContractions")) {
                        this.splitContractionOption = Boolean.valueOf(fields[1]);
                        continue;
                    }
                    this.lexerProperties.put(fields[0], fields[1]);
                    continue;
                }
                System.err.printf("%s: Bad option %s%n", this.getClass().getName(), option);
            }
        }

        @Override
        public Tokenizer<T> getTokenizer(Reader r, String extraOptions) {
            this.setOptions(extraOptions);
            return this.getTokenizer(r);
        }
    }
}

