/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.ie.crf.CRFClassifier;
import edu.stanford.nlp.ling.ChineseCoreAnnotations;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.Timing;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
import java.util.Set;

public class ChineseSegmenterAnnotator
implements Annotator {
    private AbstractSequenceClassifier<?> segmenter = null;
    private Timing timer = new Timing();
    private static long millisecondsAnnotating = 0L;
    private boolean VERBOSE = true;
    private static final String DEFAULT_SEG_LOC = "/u/nlp/data/gale/segtool/stanford-seg/classifiers-2010/05202008-ctb6.processed-chris6.lex.gz";
    private static final String DEFAULT_SER_DICTIONARY = "/u/nlp/data/gale/segtool/stanford-seg/classifiers/dict-chris6.ser.gz";
    private static final String DEFAULT_SIGHAN_CORPORA_DICT = "/u/nlp/data/gale/segtool/stanford-seg/releasedata";

    public ChineseSegmenterAnnotator() {
        this(DEFAULT_SEG_LOC, true);
    }

    public ChineseSegmenterAnnotator(boolean verbose) {
        this(DEFAULT_SEG_LOC, verbose);
    }

    public ChineseSegmenterAnnotator(String segLoc, boolean verbose) {
        this(segLoc, verbose, DEFAULT_SER_DICTIONARY, DEFAULT_SIGHAN_CORPORA_DICT);
    }

    public ChineseSegmenterAnnotator(String segLoc, boolean verbose, String serDictionary, String sighanCorporaDict) {
        this.VERBOSE = verbose;
        Properties props = new Properties();
        props.setProperty("serDictionary", serDictionary);
        props.setProperty("sighanCorporaDict", sighanCorporaDict);
        this.loadModel(segLoc, props);
    }

    public ChineseSegmenterAnnotator(String name, Properties props) {
        String model = null;
        Properties modelProps = new Properties();
        for (String key : props.stringPropertyNames()) {
            if (!key.startsWith(name + ".")) continue;
            String modelKey = key.substring(name.length() + 1);
            if (modelKey.equals("model")) {
                model = props.getProperty(key);
                continue;
            }
            modelProps.setProperty(modelKey, props.getProperty(key));
        }
        this.VERBOSE = PropertiesUtils.getBool(props, name + ".verbose", true);
        if (model == null) {
            throw new RuntimeException("Expected a property " + name + ".model");
        }
        this.loadModel(model, modelProps);
    }

    private void loadModel(String segLoc) {
        if (this.VERBOSE) {
            this.timer.start();
            System.err.print("Loading Segmentation Model [" + segLoc + "]...");
        }
        this.segmenter = CRFClassifier.getClassifierNoExceptions(segLoc);
        if (this.VERBOSE) {
            this.timer.stop("done.");
        }
    }

    private void loadModel(String segLoc, Properties props) {
        if (this.VERBOSE) {
            this.timer.start();
            System.err.print("Loading Segmentation Model [" + segLoc + "]...");
        }
        try {
            this.segmenter = CRFClassifier.getClassifier(segLoc, props);
        }
        catch (RuntimeException e) {
            throw e;
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
        if (this.VERBOSE) {
            this.timer.stop("done.");
        }
    }

    @Override
    public void annotate(Annotation annotation) {
        List sentences;
        if (this.VERBOSE) {
            this.timer.start();
            System.err.print("Adding Segmentation annotation...");
        }
        if ((sentences = (List)annotation.get(CoreAnnotations.SentencesAnnotation.class)) != null) {
            for (CoreMap sentence : sentences) {
                this.doOneSentence(sentence);
            }
        } else {
            this.doOneSentence(annotation);
        }
        if (this.VERBOSE) {
            millisecondsAnnotating += this.timer.stop("done.");
        }
    }

    public void doOneSentence(CoreMap annotation) {
        this.splitCharacters(annotation);
        this.runSegmentation(annotation);
    }

    public void splitCharacters(CoreMap annotation) {
        String origText = (String)annotation.get(CoreAnnotations.TextAnnotation.class);
        boolean seg = true;
        ArrayList<CoreLabel> words = new ArrayList<CoreLabel>();
        for (int i = 0; i < origText.length(); ++i) {
            CoreLabel wi = new CoreLabel();
            char[] ca = new char[]{origText.charAt(i)};
            String wordString = new String(ca);
            if (Character.isWhitespace(origText.charAt(i)) || Character.isISOControl(origText.charAt(i))) {
                seg = true;
                continue;
            }
            wi.set(CoreAnnotations.ChineseCharAnnotation.class, wordString);
            if (seg) {
                wi.set(CoreAnnotations.ChineseSegAnnotation.class, "1");
            } else {
                wi.set(CoreAnnotations.ChineseSegAnnotation.class, "0");
            }
            wi.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, i);
            wi.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, i + 1);
            words.add(wi);
            seg = false;
        }
        annotation.set(ChineseCoreAnnotations.CharactersAnnotation.class, words);
        if (this.VERBOSE) {
            System.err.println("output: " + words);
        }
    }

    public void runSegmentation(CoreMap annotation) {
        String text = (String)annotation.get(CoreAnnotations.TextAnnotation.class);
        List sentChars = (List)annotation.get(ChineseCoreAnnotations.CharactersAnnotation.class);
        ArrayList<CoreLabel> tokens = new ArrayList<CoreLabel>();
        annotation.set(CoreAnnotations.TokensAnnotation.class, tokens);
        List<String> words = this.segmenter.segmentString(text);
        if (this.VERBOSE) {
            System.err.println(text);
            System.err.println("--->");
            System.err.println(words);
        }
        int pos = 0;
        for (String w : words) {
            CoreLabel fl = (CoreLabel)sentChars.get(pos);
            fl.set(CoreAnnotations.ChineseSegAnnotation.class, "1");
            CoreLabel token = new CoreLabel();
            token.setWord(w);
            token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, fl.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
            fl = (CoreLabel)sentChars.get((pos += w.length()) - 1);
            token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, fl.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
            tokens.add(token);
        }
    }

    @Override
    public Set<Annotator.Requirement> requires() {
        return Collections.emptySet();
    }

    @Override
    public Set<Annotator.Requirement> requirementsSatisfied() {
        return Collections.singleton(TOKENIZE_REQUIREMENT);
    }
}

