/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.international.arabic.process;

import edu.stanford.nlp.international.arabic.ArabicMorphoFeatureSpecification;
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification;
import edu.stanford.nlp.international.morph.MorphoFeatures;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.util.Generics;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;

public class IOBUtils {
    private static final String BeginSymbol = "BEGIN";
    private static final String ContinuationSymbol = "CONT";
    private static final String NosegSymbol = "NOSEG";
    private static final String BoundarySymbol = ".##.";
    private static final String BoundaryChar = ".#.";
    private static final String RewriteTahSymbol = "REWTA";
    private static final String RewriteTareefSymbol = "REWAL";
    private static final Pattern isPunc = Pattern.compile("\\p{Punct}+");
    private static final Pattern isDigit = Pattern.compile("\\p{Digit}+");
    private static final Pattern notUnicodeArabic = Pattern.compile("\\P{InArabic}+");
    private static final Set<String> arAffixSet;

    private IOBUtils() {
    }

    public static String getBoundaryCharacter() {
        return BoundaryChar;
    }

    public static List<CoreLabel> StringToIOB(List<CoreLabel> tokenList, Character segMarker, boolean applyRewriteRules) {
        ArrayList<CoreLabel> iobList = new ArrayList<CoreLabel>(tokenList.size() * 7 + tokenList.size());
        String strSegMarker = String.valueOf(segMarker);
        boolean addWhitespace = false;
        int charIndex = 0;
        int numTokens = tokenList.size();
        String lastToken = "";
        for (int i = 0; i < numTokens; ++i) {
            if (addWhitespace) {
                iobList.add(IOBUtils.createDatum(BoundaryChar, BoundarySymbol, charIndex++));
                addWhitespace = false;
            }
            String token = tokenList.get(i).word();
            TokenType tokType = IOBUtils.getTokenType(token, strSegMarker);
            token = IOBUtils.stripSegmentationMarkers(token, tokType);
            assert (token.length() != 0);
            if (IOBUtils.shouldNotSegment(token)) {
                iobList.add(IOBUtils.createDatum(token, NosegSymbol, charIndex++));
                addWhitespace = true;
            } else {
                IOBUtils.tokenToDatums(iobList, token, tokType, tokenList.get(i), lastToken, charIndex, applyRewriteRules);
                addWhitespace = tokType == TokenType.BeginMarker || tokType == TokenType.NoMarker;
            }
            lastToken = token;
        }
        return iobList;
    }

    private static void tokenToDatums(List<CoreLabel> iobList, String token, TokenType tokType, CoreLabel tokenLabel, String lastToken, int charIndex, boolean applyRewriteRules) {
        String lastLabel = ContinuationSymbol;
        String firstLabel = BeginSymbol;
        if (applyRewriteRules) {
            String rawToken = tokenLabel.word();
            String tag = tokenLabel.tag();
            ArabicMorphoFeatureSpecification featureSpec = new ArabicMorphoFeatureSpecification();
            featureSpec.activate(MorphoFeatureSpecification.MorphoFeatureType.NGEN);
            featureSpec.activate(MorphoFeatureSpecification.MorphoFeatureType.NNUM);
            MorphoFeatures features = ((MorphoFeatureSpecification)featureSpec).strToFeatures(tag);
            if (features.getValue(MorphoFeatureSpecification.MorphoFeatureType.NGEN).equals("F") && features.getValue(MorphoFeatureSpecification.MorphoFeatureType.NNUM).equals("SG") && rawToken.endsWith("\u062a-")) {
                lastLabel = RewriteTahSymbol;
            }
            if (lastToken.equals("\u0644") && rawToken.startsWith("-\u0644")) {
                firstLabel = RewriteTareefSymbol;
            }
        }
        String firstChar = String.valueOf(token.charAt(0));
        iobList.add(IOBUtils.createDatum(firstChar, firstLabel, charIndex++));
        int numChars = token.length();
        for (int j = 1; j < numChars; ++j) {
            String thisChar = String.valueOf(token.charAt(j));
            String charLabel = j == numChars - 1 ? lastLabel : ContinuationSymbol;
            iobList.add(IOBUtils.createDatum(thisChar, charLabel, charIndex++));
        }
    }

    private static boolean shouldNotSegment(String token) {
        return isDigit.matcher(token).find() || isPunc.matcher(token).find() || notUnicodeArabic.matcher(token).find();
    }

    private static String stripSegmentationMarkers(String tok, TokenType tokType) {
        int beginOffset = tokType == TokenType.BeginMarker || tokType == TokenType.BothMarker ? 1 : 0;
        int endOffset = tokType == TokenType.EndMarker || tokType == TokenType.BothMarker ? tok.length() - 1 : tok.length();
        return tokType == TokenType.NoMarker ? tok : tok.substring(beginOffset, endOffset);
    }

    private static CoreLabel createDatum(String token, String label, int index) {
        CoreLabel newTok = new CoreLabel();
        newTok.set(CoreAnnotations.TextAnnotation.class, token);
        newTok.set(CoreAnnotations.CharAnnotation.class, token);
        newTok.set(CoreAnnotations.AnswerAnnotation.class, label);
        newTok.set(CoreAnnotations.GoldAnswerAnnotation.class, label);
        newTok.setIndex(index);
        return newTok;
    }

    private static TokenType getTokenType(String token, String segMarker) {
        if (segMarker == null || token.equals(segMarker)) {
            return TokenType.NoMarker;
        }
        TokenType tokType = TokenType.NoMarker;
        boolean startsWithMarker = token.startsWith(segMarker);
        boolean endsWithMarker = token.endsWith(segMarker);
        if (startsWithMarker && endsWithMarker) {
            tokType = TokenType.BothMarker;
        } else if (startsWithMarker) {
            tokType = TokenType.BeginMarker;
        } else if (endsWithMarker) {
            tokType = TokenType.EndMarker;
        }
        return tokType;
    }

    public static List<CoreLabel> StringToIOB(String string) {
        return IOBUtils.StringToIOB(string, null);
    }

    public static List<CoreLabel> StringToIOB(String str, Character segMarker) {
        List<CoreLabel> toks = Sentence.toCoreLabelList(str.trim().split("\\s+"));
        return IOBUtils.StringToIOB(toks, segMarker, false);
    }

    public static String IOBToString(List<CoreLabel> labeledSequence, String prefixMarker, String suffixMarker) {
        StringBuilder sb = new StringBuilder();
        String lastLabel = "";
        boolean addPrefixMarker = prefixMarker != null && prefixMarker.length() > 0;
        boolean addSuffixMarker = suffixMarker != null && suffixMarker.length() > 0;
        int sequenceLength = labeledSequence.size();
        for (int i = 0; i < sequenceLength; ++i) {
            CoreLabel labeledChar = labeledSequence.get(i);
            String token = (String)labeledChar.get(CoreAnnotations.CharAnnotation.class);
            String label = (String)labeledChar.get(CoreAnnotations.AnswerAnnotation.class);
            if (label.equals(BeginSymbol)) {
                if (lastLabel.equals(ContinuationSymbol) || lastLabel.equals(BeginSymbol)) {
                    if (addPrefixMarker && IOBUtils.addPrefixMarker(i, labeledSequence)) {
                        sb.append(prefixMarker);
                    }
                    sb.append(" ");
                    if (addSuffixMarker && IOBUtils.addSuffixMarker(i, labeledSequence)) {
                        sb.append(suffixMarker);
                    }
                }
                sb.append(token);
            } else if (label.equals(ContinuationSymbol)) {
                sb.append(token);
            } else if (label.equals(NosegSymbol)) {
                if (!lastLabel.equals(BoundarySymbol)) {
                    sb.append(" ");
                }
                sb.append(token);
            } else if (label.equals(BoundarySymbol)) {
                sb.append(" ");
            } else if (label.equals(RewriteTahSymbol)) {
                sb.append("\u0629 ");
                if (addSuffixMarker) {
                    sb.append(suffixMarker);
                }
            } else if (label.equals(RewriteTareefSymbol)) {
                if (addPrefixMarker) {
                    sb.append(prefixMarker);
                }
                sb.append(" \u0627\u0644");
            } else {
                throw new RuntimeException("Unknown label: " + label);
            }
            lastLabel = label;
        }
        return sb.toString().trim();
    }

    private static boolean addPrefixMarker(int focus, List<CoreLabel> labeledSequence) {
        StringBuilder sb = new StringBuilder();
        for (int i = focus - 1; i >= 0; --i) {
            String token = (String)labeledSequence.get(i).get(CoreAnnotations.CharAnnotation.class);
            String label = (String)labeledSequence.get(i).get(CoreAnnotations.AnswerAnnotation.class);
            sb.append(token);
            if (label.equals(BeginSymbol) || label.equals(BoundarySymbol)) break;
        }
        return arAffixSet.contains(sb.toString());
    }

    private static boolean addSuffixMarker(int focus, List<CoreLabel> labeledSequence) {
        StringBuilder sb = new StringBuilder();
        for (int i = focus; i < labeledSequence.size(); ++i) {
            String token = (String)labeledSequence.get(i).get(CoreAnnotations.CharAnnotation.class);
            String label = (String)labeledSequence.get(i).get(CoreAnnotations.AnswerAnnotation.class);
            if (label.equals(BoundarySymbol)) break;
            if (i != focus && label.equals(BeginSymbol)) {
                return false;
            }
            sb.append(token);
        }
        return arAffixSet.contains(sb.toString());
    }

    static {
        String arabicAffixString = "\u0644 \u0641 \u0648 \u0645\u0627 \u0647 \u0647\u0627 \u0647\u0645 \u0647\u0646 \u0646\u0627 \u0643\u0645 \u062a\u0646 \u062a\u0645 \u0649 \u064a \u0647\u0645\u0627 \u0643 \u0628 \u0645 \u0633";
        arAffixSet = Collections.unmodifiableSet(Generics.newHashSet(Arrays.asList(arabicAffixString.split("\\s+"))));
    }

    private static enum TokenType {
        BeginMarker,
        EndMarker,
        BothMarker,
        NoMarker;

    }
}

