/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.wordseg;

import edu.stanford.nlp.io.EncodingPrintWriter;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.objectbank.ObjectBank;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
import edu.stanford.nlp.trees.international.pennchinese.ChineseUtils;
import java.io.File;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ChineseStringUtils {
    private static final boolean DEBUG = false;
    private static final boolean DEBUG_MORE = false;
    static Pattern[] puncsPat = null;
    static Character[] puncs = null;
    static Character[] colons = new Character[]{Character.valueOf('\ufe55'), Character.valueOf(':'), Character.valueOf('\uff1a')};
    static Pattern[] colonsPat = null;
    static Pattern[] colonsWhitePat = null;
    private static final Pattern percentsPat = Pattern.compile("[\\s\\p{Zs}]*([\uff05%])[\\s\\p{Zs}]*");
    private static final String percentStr = "[\\s\\p{Zs}]+([\uff05%])";
    private static Pattern percentsWhitePat;

    public static boolean isLetterASCII(char c) {
        return c <= '\u007f' && Character.isLetter(c);
    }

    public static String combineSegmentedSentence(List<CoreLabel> doc, SeqClassifierFlags flags) {
        int testContentIdx = 0;
        StringBuilder ans = new StringBuilder();
        StringBuilder unmod_ans = new StringBuilder();
        StringBuilder unmod_normed_ans = new StringBuilder();
        CoreLabel wi = null;
        Iterator<CoreLabel> wordIter = doc.iterator();
        while (wordIter.hasNext()) {
            char currChar;
            char prevChar;
            boolean seg;
            CoreLabel pwi = wi;
            wi = wordIter.next();
            boolean originalWhiteSpace = "1".equals(wi.get(CoreAnnotations.SpaceBeforeAnnotation.class));
            if (((String)wi.get(CoreAnnotations.AnswerAnnotation.class)).equals("1") && !"0".equals(String.valueOf(wi.get(CoreAnnotations.PositionAnnotation.class)))) {
                seg = true;
                if (flags.keepEnglishWhitespaces && testContentIdx > 0) {
                    prevChar = ((String)pwi.get(CoreAnnotations.OriginalCharAnnotation.class)).charAt(0);
                    currChar = ((String)wi.get(CoreAnnotations.OriginalCharAnnotation.class)).charAt(0);
                    if (ChineseStringUtils.isLetterASCII(prevChar) && ChineseStringUtils.isLetterASCII(currChar) && !originalWhiteSpace) {
                        seg = false;
                    }
                }
                if (flags.keepAllWhitespaces && originalWhiteSpace) {
                    seg = true;
                }
                if (seg) {
                    if (originalWhiteSpace) {
                        ans.append('\u1924');
                    } else {
                        ans.append(' ');
                    }
                }
                unmod_ans.append(' ');
                unmod_normed_ans.append(' ');
            } else {
                seg = false;
                if (!(testContentIdx <= 0 || (prevChar = ((String)pwi.get(CoreAnnotations.OriginalCharAnnotation.class)).charAt(0)) < '\u0080' == (currChar = ((String)wi.get(CoreAnnotations.OriginalCharAnnotation.class)).charAt(0)) < '\u0080' || ChineseUtils.isNumber(prevChar) && ChineseUtils.isNumber(currChar))) {
                    seg = true;
                }
                if (flags.keepEnglishWhitespaces && testContentIdx > 0) {
                    prevChar = ((String)pwi.get(CoreAnnotations.OriginalCharAnnotation.class)).charAt(0);
                    currChar = ((String)wi.get(CoreAnnotations.OriginalCharAnnotation.class)).charAt(0);
                    if ((ChineseStringUtils.isLetterASCII(prevChar) && ChineseStringUtils.isLetterASCII(currChar) || ChineseStringUtils.isLetterASCII(prevChar) && ChineseUtils.isNumber(currChar) || ChineseUtils.isNumber(prevChar) && ChineseStringUtils.isLetterASCII(currChar)) && "1".equals(wi.get(CoreAnnotations.SpaceBeforeAnnotation.class))) {
                        seg = true;
                    }
                }
                if (flags.keepAllWhitespaces && !"0".equals(String.valueOf(wi.get(CoreAnnotations.PositionAnnotation.class))) && "1".equals(wi.get(CoreAnnotations.SpaceBeforeAnnotation.class))) {
                    seg = true;
                }
                if (seg) {
                    if (originalWhiteSpace) {
                        ans.append('\u1924');
                    } else {
                        ans.append(' ');
                    }
                }
            }
            ans.append((String)wi.get(CoreAnnotations.OriginalCharAnnotation.class));
            unmod_ans.append((String)wi.get(CoreAnnotations.OriginalCharAnnotation.class));
            unmod_normed_ans.append((String)wi.get(CoreAnnotations.CharAnnotation.class));
            ++testContentIdx;
        }
        String ansStr = ans.toString();
        if (flags.sighanPostProcessing) {
            if (!flags.keepAllWhitespaces) {
                ansStr = ansStr.replaceAll("\u1924", " ");
            }
            ansStr = ChineseStringUtils.postProcessingAnswer(ansStr, flags);
        }
        ansStr = ansStr.replaceAll("\u1924", " ");
        return ansStr;
    }

    private static String postProcessingAnswer(String ans, SeqClassifierFlags flags) {
        if (flags.useHk) {
            return ChineseStringUtils.postProcessingAnswerHK(ans);
        }
        if (flags.useAs) {
            return ChineseStringUtils.postProcessingAnswerAS(ans);
        }
        if (flags.usePk) {
            return ChineseStringUtils.postProcessingAnswerPK(ans, flags.keepAllWhitespaces);
        }
        if (flags.useMsr) {
            return ChineseStringUtils.postProcessingAnswerMSR(ans);
        }
        return ChineseStringUtils.postProcessingAnswerCTB(ans, flags.keepAllWhitespaces, flags.suppressMidDotPostprocessing);
    }

    private static String separatePuncs(String ans) {
        int i;
        if (puncs == null) {
            puncs = new Character[]{Character.valueOf('\u3001'), Character.valueOf('\u3002'), Character.valueOf('\u3003'), Character.valueOf('\u3008'), Character.valueOf('\u3009'), Character.valueOf('\u300a'), Character.valueOf('\u300b'), Character.valueOf('\u300c'), Character.valueOf('\u300d'), Character.valueOf('\u300e'), Character.valueOf('\u300f'), Character.valueOf('\u3010'), Character.valueOf('\u3011'), Character.valueOf('\u3014'), Character.valueOf('\u3015')};
        }
        if (puncsPat == null) {
            puncsPat = new Pattern[puncs.length];
            for (i = 0; i < puncs.length; ++i) {
                Character punc = puncs[i];
                ChineseStringUtils.puncsPat[i] = Pattern.compile("[\\s\\p{Zs}]*" + punc + "[\\s\\p{Zs}]*");
            }
        }
        for (i = 0; i < puncsPat.length; ++i) {
            Pattern p = puncsPat[i];
            Character punc = puncs[i];
            Matcher m = p.matcher(ans);
            ans = m.replaceAll(" " + punc + " ");
        }
        ans = ans.trim();
        return ans;
    }

    private static String separatePuncs(Character[] puncs_in, String ans) {
        int i;
        if (puncs == null) {
            puncs = puncs_in;
        }
        if (puncsPat == null) {
            puncsPat = new Pattern[puncs.length];
            for (i = 0; i < puncs.length; ++i) {
                Character punc = puncs[i];
                ChineseStringUtils.puncsPat[i] = punc.charValue() == '(' || punc.charValue() == ')' ? Pattern.compile("[\\s\\p{Zs}]*\\" + punc + "[\\s\\p{Zs}]*") : Pattern.compile("[\\s\\p{Zs}]*" + punc + "[\\s\\p{Zs}]*");
            }
        }
        for (i = 0; i < puncsPat.length; ++i) {
            Pattern p = puncsPat[i];
            Character punc = puncs[i];
            Matcher m = p.matcher(ans);
            ans = m.replaceAll(" " + punc + " ");
        }
        ans = ans.trim();
        return ans;
    }

    private static String gluePunc(Character punc, String ans) {
        Pattern p = Pattern.compile("[\\s\\p{Zs}]*" + punc);
        Matcher m = p.matcher(ans);
        ans = m.replaceAll(String.valueOf(punc));
        p = Pattern.compile(punc + "[\\s\\p{Zs}]*");
        m = p.matcher(ans);
        ans = m.replaceAll(String.valueOf(punc));
        ans = ans.trim();
        return ans;
    }

    private static String processColons(String ans, String numPat) {
        Matcher m;
        Pattern p;
        Character colon;
        int i;
        if (colonsPat == null) {
            colonsPat = new Pattern[colons.length];
            for (i = 0; i < colons.length; ++i) {
                colon = colons[i];
                ChineseStringUtils.colonsPat[i] = Pattern.compile("[\\s\\p{Zs}]*" + colon + "[\\s\\p{Zs}]*");
            }
        }
        for (i = 0; i < colons.length; ++i) {
            colon = colons[i];
            p = colonsPat[i];
            m = p.matcher(ans);
            ans = m.replaceAll(" " + colon + " ");
        }
        if (colonsWhitePat == null) {
            colonsWhitePat = new Pattern[colons.length];
            for (i = 0; i < colons.length; ++i) {
                colon = colons[i];
                ChineseStringUtils.colonsWhitePat[i] = Pattern.compile("(" + numPat + ")" + "[\\s\\p{Zs}]+" + colon + "[\\s\\p{Zs}]+" + "(" + numPat + ")");
            }
        }
        for (i = 0; i < colons.length; ++i) {
            colon = colons[i];
            p = colonsWhitePat[i];
            m = p.matcher(ans);
            while (m.find()) {
                ans = m.replaceAll("$1" + colon + "$2");
                m = p.matcher(ans);
            }
        }
        ans = ans.trim();
        return ans;
    }

    private static String processPercents(String ans, String numPat) {
        Matcher m = percentsPat.matcher(ans);
        ans = m.replaceAll(" $1 ");
        if (percentsWhitePat == null) {
            percentsWhitePat = Pattern.compile("(" + numPat + ")" + percentStr);
        }
        Matcher m2 = percentsWhitePat.matcher(ans);
        ans = m2.replaceAll("$1$2");
        ans = ans.trim();
        return ans;
    }

    private static String processDots(String ans, String numPat) {
        String dots = "[\ufe52\u2027\uff0e.]";
        Pattern p = Pattern.compile("(" + numPat + ")" + "[\\s\\p{Zs}]+" + "(" + dots + ")" + "[\\s\\p{Zs}]+" + "(" + numPat + ")");
        Matcher m = p.matcher(ans);
        while (m.find()) {
            ans = m.replaceAll("$1$2$3");
            m = p.matcher(ans);
        }
        p = Pattern.compile("(" + numPat + ")(" + dots + ")" + "[\\s\\p{Zs}]+" + "(" + numPat + ")");
        m = p.matcher(ans);
        while (m.find()) {
            ans = m.replaceAll("$1$2$3");
            m = p.matcher(ans);
        }
        p = Pattern.compile("(" + numPat + ")" + "[\\s\\p{Zs}]+" + "(" + dots + ")(" + numPat + ")");
        m = p.matcher(ans);
        while (m.find()) {
            ans = m.replaceAll("$1$2$3");
            m = p.matcher(ans);
        }
        ans = ans.trim();
        return ans;
    }

    private static String processCommas(String ans) {
        String numPat = "[0-9\uff10-\uff19]";
        String nonNumPat = "[^0-9\uff10-\uff19]";
        String commas = ",";
        ans = ans.replaceAll(",", " , ");
        ans = ans.replaceAll("  ", " ");
        Pattern p = Pattern.compile("(" + numPat + ")" + "[\\s\\p{Zs}]*" + "(" + commas + ")" + "[\\s\\p{Zs}]*" + "(" + numPat + "{3}" + nonNumPat + ")");
        Matcher m = p.matcher(ans);
        if (m.find()) {
            ans = m.replaceAll("$1$2$3");
        }
        ans = ans.trim();
        return ans;
    }

    static String postProcessingAnswerCTB(String ans, boolean keepAllWhitespaces, boolean suppressMidDotPostprocessing) {
        Character[] puncs = new Character[]{Character.valueOf('\u3001'), Character.valueOf('\u3002'), Character.valueOf('\u3003'), Character.valueOf('\u3008'), Character.valueOf('\u3009'), Character.valueOf('\u300a'), Character.valueOf('\u300b'), Character.valueOf('\u300c'), Character.valueOf('\u300d'), Character.valueOf('\u300e'), Character.valueOf('\u300f'), Character.valueOf('\u3010'), Character.valueOf('\u3011'), Character.valueOf('\u3014'), Character.valueOf('\u3015'), Character.valueOf('('), Character.valueOf(')'), Character.valueOf('\"'), Character.valueOf('<'), Character.valueOf('>')};
        String numPat = "[0-9\uff10-\uff19]+";
        ans = ChineseStringUtils.separatePuncs(puncs, ans);
        if (!suppressMidDotPostprocessing) {
            ans = ChineseStringUtils.gluePunc(Character.valueOf('\u30fb'), ans);
        }
        ans = ChineseStringUtils.processColons(ans, numPat);
        ans = ChineseStringUtils.processPercents(ans, numPat);
        ans = ChineseStringUtils.processDots(ans, numPat);
        ans = ChineseStringUtils.processCommas(ans);
        ans = ans.trim();
        return ans;
    }

    private static String postProcessingAnswerPK(String ans, boolean keepAllWhitespaces) {
        Character[] puncs = new Character[]{Character.valueOf('\u3001'), Character.valueOf('\u3002'), Character.valueOf('\u3003'), Character.valueOf('\u3008'), Character.valueOf('\u3009'), Character.valueOf('\u300a'), Character.valueOf('\u300b'), Character.valueOf('\u300c'), Character.valueOf('\u300d'), Character.valueOf('\u300e'), Character.valueOf('\u300f'), Character.valueOf('\u3010'), Character.valueOf('\u3011'), Character.valueOf('\u3014'), Character.valueOf('\u3015'), Character.valueOf('\u2103')};
        ans = ChineseStringUtils.separatePuncs(puncs, ans);
        String numPat = "[0-9\uff10-\uff19\uff0e\u00b7\u4e00\u5341\u767e]+";
        if (!keepAllWhitespaces) {
            ans = ChineseStringUtils.processColons(ans, numPat);
            ans = ChineseStringUtils.processPercents(ans, numPat);
            ans = ChineseStringUtils.processDots(ans, numPat);
            ans = ChineseStringUtils.processCommas(ans);
            String[] puncPatterns = new String[]{"\u2014[\\s\\p{Zs}]*\u2014[\\s\\p{Zs}]*\u2014", "\u2026[\\s\\p{Zs}]*\u2026"};
            String[] correctPunc = new String[]{"\u2014\u2014\u2014", "\u2026\u2026"};
            for (int i = 0; i < puncPatterns.length; ++i) {
                Pattern p = Pattern.compile("[\\s\\p{Zs}]*" + puncPatterns[i] + "[\\s\\p{Zs}]*");
                Matcher m = p.matcher(ans);
                ans = m.replaceAll(" " + correctPunc[i] + " ");
            }
        }
        ans = ans.trim();
        return ans;
    }

    private static String postProcessingAnswerMSR(String ans) {
        ans = ChineseStringUtils.separatePuncs(ans);
        return ans;
    }

    private static String postProcessingAnswerAS(String ans) {
        ans = ChineseStringUtils.separatePuncs(ans);
        String numPat = "[\uff10-\uff19\u4e00\u4e8c\u4e09\u56db\u4e94\u516d\u4e03\u516b\u4e5d\u5341\u767e\u5343]+";
        ans = ChineseStringUtils.processColons(ans, numPat);
        ans = ChineseStringUtils.processPercents(ans, numPat);
        ans = ChineseStringUtils.processDots(ans, numPat);
        ans = ChineseStringUtils.processCommas(ans);
        return ans;
    }

    private static String postProcessingAnswerHK(String ans) {
        Character[] puncs = new Character[]{Character.valueOf('\u3001'), Character.valueOf('\u3002'), Character.valueOf('\u3003'), Character.valueOf('\u3008'), Character.valueOf('\u3009'), Character.valueOf('\u300a'), Character.valueOf('\u300b'), Character.valueOf('\u300c'), Character.valueOf('\u300d'), Character.valueOf('\u300e'), Character.valueOf('\u300f'), Character.valueOf('\u3010'), Character.valueOf('\u3011'), Character.valueOf('\u3014'), Character.valueOf('\u3015'), Character.valueOf('\u2103')};
        ans = ChineseStringUtils.separatePuncs(puncs, ans);
        String numPat = "[0-9]+";
        ans = ChineseStringUtils.processColons(ans, numPat);
        String[] puncPatterns = new String[]{"\u2014[\\s\\p{Zs}]*\u2014[\\s\\p{Zs}]*\u2014", "\u2026[\\s\\p{Zs}]*\u2026"};
        String[] correctPunc = new String[]{"\u2014\u2014\u2014", "\u2026\u2026"};
        for (int i = 0; i < puncPatterns.length; ++i) {
            Pattern p = Pattern.compile("[\\s\\p{Zs}]*" + puncPatterns[i] + "[\\s\\p{Zs}]*");
            Matcher m = p.matcher(ans);
            ans = m.replaceAll(" " + correctPunc[i] + " ");
        }
        ans = ans.trim();
        return ans;
    }

    public static void main(String[] args) {
        String input = args[0];
        String enc = args[1];
        for (String line : ObjectBank.getLineIterator(new File(input), enc)) {
            EncodingPrintWriter.out.println(ChineseStringUtils.processPercents(line, "[0-9\uff10-\uff19]+"), "UTF-8");
        }
    }
}

