/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.wordseg;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.process.ChineseDocumentToSentenceProcessor;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.ObjectOutputStream;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;

public class ChineseDictionary {
    private static final boolean DEBUG = false;
    public static final int MAX_LEXICON_LENGTH = 6;
    private static Redwood.RedwoodChannels logger = Redwood.channels(ChineseDictionary.class);
    private final Set<String>[] words_ = new HashSet[7];
    private final ChineseDocumentToSentenceProcessor cdtos_;
    private static final Pattern midDot = Pattern.compile("[\u00b7\u0387\u2022\u2024\u2027\u2219\u22c5\u30fb]");

    private void serializeDictionary(String serializePath) {
        logger.info("Serializing dictionaries to " + serializePath + " ... ");
        try {
            ObjectOutputStream oos = IOUtils.writeStreamFromString(serializePath);
            oos.writeObject(this.words_);
            oos.close();
            logger.info("done.");
        }
        catch (Exception e) {
            logger.error("Failed", e);
            throw new RuntimeIOException(e);
        }
    }

    private static Set<String>[] loadDictionary(String serializePath) {
        Set<E>[] dict = new HashSet[7];
        for (int i = 0; i <= 6; ++i) {
            dict[i] = Generics.newHashSet();
        }
        try {
            dict = (Set[])IOUtils.readObjectFromURLOrClasspathOrFileSystem(serializePath);
        }
        catch (Exception e) {
            logger.error("Failed to load Chinese dictionary " + serializePath, e);
            throw new RuntimeException(e);
        }
        return dict;
    }

    public ChineseDictionary(String dict) {
        this(new String[]{dict});
    }

    public ChineseDictionary(String[] dicts) {
        this(dicts, null);
    }

    public ChineseDictionary(String[] dicts, ChineseDocumentToSentenceProcessor cdtos) {
        this(dicts, cdtos, false);
    }

    public ChineseDictionary(String serDicts, ChineseDocumentToSentenceProcessor cdtos, boolean expandMidDot) {
        this(serDicts.split(","), cdtos, expandMidDot);
    }

    public ChineseDictionary(String[] dicts, ChineseDocumentToSentenceProcessor cdtos, boolean expandMidDot) {
        logger.info(String.format("Loading Chinese dictionaries from %d file%s:%n", dicts.length, dicts.length == 1 ? "" : "s"));
        for (String dict : dicts) {
            logger.info("  " + dict);
        }
        for (int i = 0; i <= 6; ++i) {
            this.words_[i] = Generics.newHashSet();
        }
        this.cdtos_ = cdtos;
        for (String dict : dicts) {
            if (dict.endsWith("ser.gz")) {
                Set<String>[] dictwords = ChineseDictionary.loadDictionary(dict);
                for (int i = 0; i <= 6; ++i) {
                    this.words_[i].addAll(dictwords[i]);
                    dictwords[i] = null;
                }
                continue;
            }
            this.addDict(dict, expandMidDot);
        }
        int total = 0;
        for (int i = 0; i <= 6; ++i) {
            total += this.words_[i].size();
        }
        logger.info(String.format("Done. Unique words in ChineseDictionary is: %d.%n", total));
    }

    private void addDict(String dict, boolean expandMidDot) {
        String content = IOUtils.slurpFileNoExceptions(dict, "utf-8");
        String[] lines = content.split("\n");
        logger.info("  " + dict + ": " + lines.length + " entries");
        for (String line : lines) {
            line = line.trim();
            if (expandMidDot) {
                line = line.replaceAll("[\u00b7\u0387\u2022\u2024\u2027\u2219\u22c5\u30fb]", "\u00b7");
            }
            this.addOneDict(line);
            if (!expandMidDot || !midDot.matcher(line).find()) continue;
            line = line.replaceAll("[\u00b7\u0387\u2022\u2024\u2027\u2219\u22c5\u30fb]", "");
            this.addOneDict(line);
        }
    }

    private void addOneDict(String item) {
        int length = item.length();
        if (length != 0) {
            if (length <= 5) {
                if (this.cdtos_ != null) {
                    item = this.cdtos_.normalization(item);
                }
                this.words_[length].add(item);
            } else {
                String subItem = new String(item.substring(0, 6));
                if (this.cdtos_ != null) {
                    subItem = this.cdtos_.normalization(subItem);
                }
                this.words_[6].add(subItem);
            }
        }
    }

    public boolean contains(String word) {
        int length = word.length();
        if (length <= 5) {
            return this.words_[length].contains(word);
        }
        length = 6;
        return this.words_[length].contains(word.substring(0, 6));
    }

    public static void main(String[] args) {
        String inputDicts = "/u/nlp/data/chinese-dictionaries/plain/ne_wikipedia-utf8.txt,/u/nlp/data/chinese-dictionaries/plain/newsexplorer_entities_utf8.txt,/u/nlp/data/chinese-dictionaries/plain/Ch-name-list-utf8.txt,/u/nlp/data/chinese-dictionaries/plain/wikilex-20070908-zh-en.txt,/u/nlp/data/chinese-dictionaries/plain/adso-1.25-050405-monolingual-clean.utf8.txt,/u/nlp/data/chinese-dictionaries/plain/lexicon_108k_normalized.txt,/u/nlp/data/chinese-dictionaries/plain/lexicon_mandarintools_normalized.txt,/u/nlp/data/chinese-dictionaries/plain/harbin-ChineseNames_utf8.txt,/u/nlp/data/chinese-dictionaries/plain/lexicon_HowNet_normalized.txt";
        String output = "/u/nlp/data/gale/segtool/stanford-seg/classifiers/dict-chris6.ser.gz";
        Map<String, Integer> flagMap = Generics.newHashMap();
        flagMap.put("-inputDicts", 1);
        flagMap.put("-output", 1);
        Map<String, String[]> argsMap = StringUtils.argsToMap(args, flagMap);
        if (argsMap.keySet().contains("-inputDicts")) {
            inputDicts = argsMap.get("-inputDicts")[0];
        }
        if (argsMap.keySet().contains("-output")) {
            output = argsMap.get("-output")[0];
        }
        String[] dicts = inputDicts.split(",");
        ChineseDocumentToSentenceProcessor cdtos = new ChineseDocumentToSentenceProcessor(null);
        boolean expandMidDot = true;
        ChineseDictionary dict = new ChineseDictionary(dicts, cdtos, expandMidDot);
        dict.serializeDictionary(output);
    }
}

