/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.international.arabic.process;

import edu.stanford.nlp.international.arabic.ArabicMorphoFeatureSpecification;
import edu.stanford.nlp.international.arabic.process.ArabicDocumentReaderAndWriter;
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification;
import edu.stanford.nlp.international.morph.MorphoFeatures;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Pair;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;

public class IOBUtils {
    public static final String BeginSymbol = "BEGIN";
    public static final String ContinuationSymbol = "CONT";
    public static final String NosegSymbol = "NOSEG";
    public static final String RewriteSymbol = "REW";
    public static final String RewriteTahSymbol = "REWTA";
    public static final String RewriteTareefSymbol = "REWAL";
    private static final String BoundarySymbol = ".##.";
    private static final String BoundaryChar = ".#.";
    private static final Pattern isPunc = Pattern.compile("\\p{Punct}+");
    private static final Pattern isDigit = Pattern.compile("\\p{Digit}+");
    private static final Pattern notUnicodeArabic = Pattern.compile("\\P{InArabic}+");
    private static final Set<String> arPrefixSet;
    private static final Set<String> arSuffixSet;

    private IOBUtils() {
    }

    public static String getBoundaryCharacter() {
        return BoundaryChar;
    }

    public static List<CoreLabel> StringToIOB(List<CoreLabel> tokenList, Character segMarker, boolean applyRewriteRules) {
        return IOBUtils.StringToIOB(tokenList, segMarker, applyRewriteRules, false);
    }

    public static List<CoreLabel> StringToIOB(List<CoreLabel> tokenList, Character segMarker, boolean applyRewriteRules, boolean stripRewrites) {
        ArrayList<CoreLabel> iobList = new ArrayList<CoreLabel>(tokenList.size() * 7 + tokenList.size());
        String strSegMarker = String.valueOf(segMarker);
        boolean addWhitespace = false;
        int numTokens = tokenList.size();
        String lastToken = "";
        String currentWord = "";
        int wordStartIndex = 0;
        for (int i = 0; i < numTokens; ++i) {
            CoreLabel cl = tokenList.get(i);
            if (addWhitespace) {
                IOBUtils.fillInWordStatistics(iobList, currentWord, wordStartIndex);
                currentWord = "";
                wordStartIndex = iobList.size() + 1;
                iobList.add(IOBUtils.createDatum(cl, BoundaryChar, BoundarySymbol));
                CoreLabel boundaryDatum = (CoreLabel)iobList.get(iobList.size() - 1);
                boundaryDatum.setIndex(0);
                boundaryDatum.setWord("");
                addWhitespace = false;
            }
            String token = cl.word();
            TokenType tokType = IOBUtils.getTokenType(token, strSegMarker);
            token = IOBUtils.stripSegmentationMarkers(token, tokType);
            assert (token.length() != 0);
            if (IOBUtils.shouldNotSegment(token)) {
                iobList.add(IOBUtils.createDatum(cl, token, NosegSymbol));
                addWhitespace = true;
            } else {
                IOBUtils.tokenToDatums(iobList, cl, token, tokType, tokenList.get(i), lastToken, applyRewriteRules, stripRewrites);
                addWhitespace = tokType == TokenType.BeginMarker || tokType == TokenType.NoMarker;
            }
            currentWord = currentWord + token;
            lastToken = token;
        }
        IOBUtils.fillInWordStatistics(iobList, currentWord, wordStartIndex);
        return iobList;
    }

    private static void fillInWordStatistics(List<CoreLabel> iobList, String currentWord, int wordStartIndex) {
        for (int j = wordStartIndex; j < iobList.size(); ++j) {
            CoreLabel tok = iobList.get(j);
            tok.setIndex(j - wordStartIndex);
            tok.setWord(currentWord);
        }
    }

    private static void tokenToDatums(List<CoreLabel> iobList, CoreLabel cl, String token, TokenType tokType, CoreLabel tokenLabel, String lastToken, boolean applyRewriteRules, boolean stripRewrites) {
        if (token.isEmpty()) {
            return;
        }
        String lastLabel = ContinuationSymbol;
        String firstLabel = BeginSymbol;
        String rewritten = (String)cl.get(ArabicDocumentReaderAndWriter.RewrittenArabicAnnotation.class);
        boolean crossRefRewrites = true;
        if (rewritten == null) {
            rewritten = token;
            crossRefRewrites = false;
        } else {
            rewritten = IOBUtils.stripSegmentationMarkers(rewritten, tokType);
        }
        if (applyRewriteRules) {
            String rawToken = tokenLabel.word();
            String tag = tokenLabel.tag();
            ArabicMorphoFeatureSpecification featureSpec = new ArabicMorphoFeatureSpecification();
            featureSpec.activate(MorphoFeatureSpecification.MorphoFeatureType.NGEN);
            featureSpec.activate(MorphoFeatureSpecification.MorphoFeatureType.NNUM);
            featureSpec.activate(MorphoFeatureSpecification.MorphoFeatureType.DEF);
            featureSpec.activate(MorphoFeatureSpecification.MorphoFeatureType.TENSE);
            MorphoFeatures features = ((MorphoFeatureSpecification)featureSpec).strToFeatures(tag);
            if (features.getValue(MorphoFeatureSpecification.MorphoFeatureType.NGEN).equals("F") && features.getValue(MorphoFeatureSpecification.MorphoFeatureType.NNUM).equals("SG") && rawToken.endsWith("\u062a-") && !stripRewrites) {
                lastLabel = RewriteSymbol;
            } else if (rawToken.endsWith("\u0629-")) {
                assert (token.endsWith("\u0629"));
                token = token.substring(0, token.length() - 1) + "\u062a";
                lastLabel = RewriteSymbol;
            }
            if (lastToken.equals("\u0644") && features.getValue(MorphoFeatureSpecification.MorphoFeatureType.DEF).equals("D")) {
                if (rawToken.startsWith("-\u0627\u0644")) {
                    if (!token.startsWith("\u0627")) {
                        System.err.println("Bad REWAL: " + rawToken + " / " + token);
                    }
                    token = token.substring(1);
                    rewritten = rewritten.substring(1);
                    if (!stripRewrites) {
                        firstLabel = RewriteSymbol;
                    }
                } else if (rawToken.startsWith("-\u0644")) {
                    if (!token.startsWith("\u0644")) {
                        System.err.println("Bad REWAL: " + rawToken + " / " + token);
                    }
                    if (!stripRewrites) {
                        firstLabel = RewriteSymbol;
                    }
                } else {
                    System.err.println("Ignoring REWAL: " + rawToken + " / " + token);
                }
            }
            if (rawToken.endsWith("\u0649-")) {
                token = features.getValue(MorphoFeatureSpecification.MorphoFeatureType.TENSE) != null ? token.substring(0, token.length() - 1) + "\u0627" : token.substring(0, token.length() - 1) + "\u064a";
                if (!stripRewrites) {
                    lastLabel = RewriteSymbol;
                }
            } else if ((rawToken.equals("\u0639\u0644\u064a-") || rawToken.equals("-\u0639\u0644\u064a-")) && !stripRewrites) {
                lastLabel = RewriteSymbol;
            }
        }
        if (token.isEmpty()) {
            System.err.println("Rewriting resulted in empty token: " + tokenLabel.word());
        }
        String firstChar = String.valueOf(token.charAt(0));
        iobList.add(IOBUtils.createDatum(cl, firstChar, firstLabel));
        int numChars = token.length();
        if (crossRefRewrites && rewritten.length() != numChars) {
            System.err.printf("Rewritten annotation doesn't have correct length: %s>>>%s%n", token, rewritten);
            crossRefRewrites = false;
        }
        for (int j = 1; j < numChars; ++j) {
            String charLabel = j == numChars - 1 ? lastLabel : ContinuationSymbol;
            String thisChar = String.valueOf(token.charAt(j));
            if (crossRefRewrites && !String.valueOf(rewritten.charAt(j)).equals(thisChar)) {
                charLabel = RewriteSymbol;
            }
            if (charLabel == ContinuationSymbol && thisChar.equals("\u0649") && j != numChars - 1) {
                charLabel = RewriteSymbol;
            }
            iobList.add(IOBUtils.createDatum(cl, thisChar, charLabel));
        }
    }

    private static boolean shouldNotSegment(String token) {
        return isDigit.matcher(token).find() || isPunc.matcher(token).find() || notUnicodeArabic.matcher(token).find();
    }

    private static String stripSegmentationMarkers(String tok, TokenType tokType) {
        int beginOffset = tokType == TokenType.BeginMarker || tokType == TokenType.BothMarker ? 1 : 0;
        int endOffset = tokType == TokenType.EndMarker || tokType == TokenType.BothMarker ? tok.length() - 1 : tok.length();
        return tokType == TokenType.NoMarker ? tok : tok.substring(beginOffset, endOffset);
    }

    private static CoreLabel createDatum(CoreLabel cl, String token, String label) {
        CoreLabel newTok = new CoreLabel();
        newTok.set(CoreAnnotations.TextAnnotation.class, token);
        newTok.set(CoreAnnotations.CharAnnotation.class, token);
        newTok.set(CoreAnnotations.AnswerAnnotation.class, label);
        newTok.set(CoreAnnotations.GoldAnswerAnnotation.class, label);
        if (cl != null && cl.containsKey(CoreAnnotations.DomainAnnotation.class)) {
            newTok.set(CoreAnnotations.DomainAnnotation.class, cl.get(CoreAnnotations.DomainAnnotation.class));
        }
        return newTok;
    }

    private static TokenType getTokenType(String token, String segMarker) {
        if (segMarker == null || token.equals(segMarker)) {
            return TokenType.NoMarker;
        }
        TokenType tokType = TokenType.NoMarker;
        boolean startsWithMarker = token.startsWith(segMarker);
        boolean endsWithMarker = token.endsWith(segMarker);
        if (startsWithMarker && endsWithMarker) {
            tokType = TokenType.BothMarker;
        } else if (startsWithMarker) {
            tokType = TokenType.BeginMarker;
        } else if (endsWithMarker) {
            tokType = TokenType.EndMarker;
        }
        return tokType;
    }

    public static List<CoreLabel> StringToIOB(String string) {
        return IOBUtils.StringToIOB(string, null);
    }

    public static List<CoreLabel> StringToIOB(String str, Character segMarker) {
        List<CoreLabel> toks = Sentence.toCoreLabelList(str.trim().split("\\s+"));
        return IOBUtils.StringToIOB(toks, segMarker, false);
    }

    public static String IOBToString(List<CoreLabel> labeledSequence, String prefixMarker, String suffixMarker) {
        return IOBUtils.IOBToString(labeledSequence, prefixMarker, suffixMarker, true, true);
    }

    public static String IOBToString(List<CoreLabel> labeledSequence, String segmentationMarker) {
        return IOBUtils.IOBToString(labeledSequence, segmentationMarker, null, false, true);
    }

    public static String IOBToString(List<CoreLabel> labeledSequence) {
        return IOBUtils.IOBToString(labeledSequence, null, null, false, false);
    }

    private static String IOBToString(List<CoreLabel> labeledSequence, String prefixMarker, String suffixMarker, boolean addSpace, boolean applyRewrites) {
        boolean addSuffixMarker;
        StringBuilder sb = new StringBuilder();
        String lastLabel = "";
        boolean addPrefixMarker = prefixMarker != null && prefixMarker.length() > 0;
        boolean bl = addSuffixMarker = suffixMarker != null && suffixMarker.length() > 0;
        if (addPrefixMarker || addSuffixMarker) {
            IOBUtils.annotateMarkers(labeledSequence);
        }
        int sequenceLength = labeledSequence.size();
        for (int i = 0; i < sequenceLength; ++i) {
            CoreLabel labeledChar = labeledSequence.get(i);
            String token = (String)labeledChar.get(CoreAnnotations.CharAnnotation.class);
            if (addPrefixMarker && token.equals(prefixMarker)) {
                token = "#pm#";
            }
            if (addSuffixMarker && token.equals(suffixMarker)) {
                token = "#sm#";
            }
            String label = (String)labeledChar.get(CoreAnnotations.AnswerAnnotation.class);
            if (token.equals(BoundaryChar)) {
                sb.append(" ");
            } else if (label.equals(BeginSymbol)) {
                if (lastLabel.equals(ContinuationSymbol) || lastLabel.equals(BeginSymbol) || lastLabel.equals(RewriteSymbol)) {
                    if (addPrefixMarker && (!addSpace || IOBUtils.addPrefixMarker(i, labeledSequence))) {
                        sb.append(prefixMarker);
                    }
                    if (addSpace) {
                        sb.append(" ");
                    }
                    if (addSuffixMarker && (!addSpace || IOBUtils.addSuffixMarker(i, labeledSequence))) {
                        sb.append(suffixMarker);
                    }
                }
                sb.append(token);
            } else if (label.equals(ContinuationSymbol) || label.equals(BoundarySymbol)) {
                sb.append(token);
            } else if (label.equals(NosegSymbol)) {
                if (!lastLabel.equals(BoundarySymbol) && addSpace) {
                    sb.append(" ");
                }
                sb.append(token);
            } else if (label.equals(RewriteSymbol) || label.equals(RewriteTareefSymbol) || label.equals(RewriteTahSymbol)) {
                switch (token) {
                    case "\u062a": 
                    case "\u0647": {
                        sb.append(applyRewrites ? "\u0629" : token);
                        break;
                    }
                    case "\u0644": {
                        sb.append((addPrefixMarker ? prefixMarker : "") + (addSpace ? " " : "") + (applyRewrites ? "\u0627\u0644" : "\u0644"));
                        break;
                    }
                    case "\u064a": 
                    case "\u0627": {
                        sb.append(applyRewrites ? "\u0649" : token);
                        break;
                    }
                    case "\u0649": {
                        sb.append(applyRewrites ? "\u064a" : token);
                        break;
                    }
                    default: {
                        sb.append(token);
                        break;
                    }
                }
            } else {
                throw new RuntimeException("Unknown label: " + label);
            }
            lastLabel = label;
        }
        return sb.toString().trim();
    }

    private static void annotateMarkers(List<CoreLabel> labeledSequence) {
        StringBuilder segment = new StringBuilder();
        List<String> segments = CollectionUtils.makeList(new String[0]);
        int wordBegin = 0;
        block8: for (int i = 0; i < labeledSequence.size(); ++i) {
            String label;
            String token = (String)labeledSequence.get(i).get(CoreAnnotations.CharAnnotation.class);
            switch (label = (String)labeledSequence.get(i).get(CoreAnnotations.AnswerAnnotation.class)) {
                case "BEGIN": {
                    if (i != wordBegin) {
                        segments.add(segment.toString());
                        segment.setLength(0);
                    }
                    segment.append(token);
                    continue block8;
                }
                case ".##.": {
                    segments.add(segment.toString());
                    segment.setLength(0);
                    IOBUtils.annotateMarkersOnWord(labeledSequence, wordBegin, i, segments);
                    segments.clear();
                    wordBegin = i + 1;
                    continue block8;
                }
                default: {
                    segment.append(token);
                }
            }
        }
        segments.add(segment.toString());
        IOBUtils.annotateMarkersOnWord(labeledSequence, wordBegin, labeledSequence.size(), segments);
    }

    private static void annotateMarkersOnWord(List<CoreLabel> labeledSequence, int wordBegin, int wordEnd, List<String> segments) {
        Pair<Integer, Integer> headBounds = IOBUtils.getHeadBounds(segments);
        int currentIndex = 0;
        for (int i = wordBegin; i < wordEnd; ++i) {
            String label = (String)labeledSequence.get(i).get(CoreAnnotations.AnswerAnnotation.class);
            labeledSequence.get(i).set(PrefixMarkerAnnotation.class, Boolean.FALSE);
            labeledSequence.get(i).set(SuffixMarkerAnnotation.class, Boolean.FALSE);
            if (!label.equals(BeginSymbol)) continue;
            if (currentIndex <= (Integer)headBounds.first && currentIndex != 0) {
                labeledSequence.get(i).set(PrefixMarkerAnnotation.class, Boolean.TRUE);
            }
            if (currentIndex >= (Integer)headBounds.second) {
                labeledSequence.get(i).set(SuffixMarkerAnnotation.class, Boolean.TRUE);
            }
            ++currentIndex;
        }
    }

    private static Pair<Integer, Integer> getHeadBounds(List<String> segments) {
        int NOT_FOUND = -1;
        int potentialSuffix = segments.size() - 1;
        int nonSuffix = -1;
        int potentialPrefix = 0;
        int nonPrefix = -1;
        do {
            if (nonSuffix == -1) {
                if (arSuffixSet.contains(segments.get(potentialSuffix))) {
                    --potentialSuffix;
                } else {
                    nonSuffix = potentialSuffix;
                }
            }
            if (potentialSuffix < potentialPrefix) break;
            if (nonPrefix != -1) continue;
            if (arPrefixSet.contains(segments.get(potentialPrefix))) {
                ++potentialPrefix;
                continue;
            }
            nonPrefix = potentialPrefix;
        } while (potentialSuffix >= potentialPrefix && (nonSuffix == -1 || nonPrefix == -1));
        if (potentialSuffix < potentialPrefix) {
            if (potentialSuffix + 1 != potentialPrefix) {
                throw new RuntimeException("Suffix pointer moved too far!");
            }
            return Pair.makePair(potentialSuffix + 1, potentialSuffix + 1);
        }
        int headIndex = nonPrefix;
        for (int i = nonPrefix + 1; i <= nonSuffix; ++i) {
            if (segments.get(i).length() <= segments.get(headIndex).length()) continue;
            headIndex = i;
        }
        return Pair.makePair(headIndex, headIndex + 1);
    }

    private static boolean addPrefixMarker(int focus, List<CoreLabel> labeledSequence) {
        return (Boolean)labeledSequence.get(focus).get(PrefixMarkerAnnotation.class);
    }

    private static boolean addSuffixMarker(int focus, List<CoreLabel> labeledSequence) {
        return (Boolean)labeledSequence.get(focus).get(SuffixMarkerAnnotation.class);
    }

    public static void labelDomain(List<CoreLabel> tokenList, String domain) {
        for (CoreLabel cl : tokenList) {
            cl.set(CoreAnnotations.DomainAnnotation.class, domain);
        }
    }

    static {
        String arabicPrefixString = "\u0644 \u0641 \u0648 \u0645 \u0645\u0627 \u062d \u062d\u0627 \u0647 \u0647\u0627 \u0643 \u0628 \u0633";
        arPrefixSet = Collections.unmodifiableSet(Generics.newHashSet(Arrays.asList(arabicPrefixString.split("\\s+"))));
        String arabicSuffixString = "\u0644 \u0648 \u0645\u0627 \u0647 \u0647\u0627 \u0647\u0645 \u0647\u0646 \u0646\u0627 \u0643\u0645 \u062a\u0646 \u062a\u0645 \u0649 \u064a \u0647\u0645\u0627 \u0643 \u0628 \u0634";
        arSuffixSet = Collections.unmodifiableSet(Generics.newHashSet(Arrays.asList(arabicSuffixString.split("\\s+"))));
    }

    private static class SuffixMarkerAnnotation
    implements CoreAnnotation<Boolean> {
        private SuffixMarkerAnnotation() {
        }

        @Override
        public Class<Boolean> getType() {
            return Boolean.class;
        }
    }

    private static class PrefixMarkerAnnotation
    implements CoreAnnotation<Boolean> {
        private PrefixMarkerAnnotation() {
        }

        @Override
        public Class<Boolean> getType() {
            return Boolean.class;
        }
    }

    private static enum TokenType {
        BeginMarker,
        EndMarker,
        BothMarker,
        NoMarker;

    }
}

