/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.patterns.surface;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.patterns.surface.ConstantsAndVariables;
import edu.stanford.nlp.patterns.surface.PatternFactory;
import edu.stanford.nlp.patterns.surface.PatternToken;
import edu.stanford.nlp.patterns.surface.SurfacePattern;
import edu.stanford.nlp.patterns.surface.Token;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.Execution;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Triple;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

public class SurfacePatternFactory
extends PatternFactory {
    @Execution.Option(name="usePOS4Pattern")
    public static boolean usePOS4Pattern = true;
    @Execution.Option(name="addPatWithoutPOS")
    public static boolean addPatWithoutPOS = true;
    @Execution.Option(name="minWindow4Pattern")
    public static int minWindow4Pattern = 2;
    @Execution.Option(name="maxWindow4Pattern")
    public static int maxWindow4Pattern = 4;
    @Execution.Option(name="usePreviousContext")
    public static boolean usePreviousContext = true;
    @Execution.Option(name="useNextContext")
    public static boolean useNextContext = false;
    @Execution.Option(name="numMinStopWordsToAdd")
    public static int numMinStopWordsToAdd = 3;
    @Execution.Option(name="useTargetParserParentRestriction")
    public static boolean useTargetParserParentRestriction = false;
    @Execution.Option(name="useContextNERRestriction")
    public static boolean useContextNERRestriction = false;
    @Execution.Option(name="useFillerWordsInPat")
    public static boolean useFillerWordsInPat = true;
    static Token fw;
    static Token sw;

    public static void setUp(Properties props) {
        Execution.fillOptions(SurfacePatternFactory.class, props);
        Execution.fillOptions(SurfacePattern.class, props);
        if (!addPatWithoutPOS && !usePOS4Pattern) {
            throw new RuntimeException("addPatWithoutPOS and usePOS4Pattern both cannot be false ");
        }
        fw = new Token();
        if (useFillerWordsInPat) {
            fw.setEnvBindRestriction("$FILLER");
            fw.setNumOcc(0, 2);
        }
        sw = new Token();
        if (useStopWordsBeforeTerm) {
            sw.setEnvBindRestriction("$STOPWORD");
            sw.setNumOcc(0, 2);
        }
    }

    public static Set<SurfacePattern> getContext(List<CoreLabel> sent, int i) {
        HashSet<SurfacePattern> prevpatterns = new HashSet<SurfacePattern>();
        HashSet<SurfacePattern> nextpatterns = new HashSet<SurfacePattern>();
        HashSet<SurfacePattern> prevnextpatterns = new HashSet<SurfacePattern>();
        CoreLabel token = sent.get(i);
        String tag = null;
        if (usePOS4Pattern) {
            String fulltag = token.tag();
            tag = fulltag.substring(0, Math.min(fulltag.length(), 2));
        }
        String nerTag = (String)token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
        for (int maxWin = 1; maxWin <= maxWindow4Pattern; ++maxWin) {
            Token str;
            String strOriginal;
            Token strgeneric;
            boolean isLabeledO;
            Object tokenjStr;
            CoreLabel tokenj;
            ArrayList<Token> previousTokens = new ArrayList<Token>();
            ArrayList<Object> originalPrev = new ArrayList<Object>();
            ArrayList<Object> originalNext = new ArrayList<Object>();
            ArrayList<Token> nextTokens = new ArrayList<Token>();
            int numStopWordsprev = 0;
            int numStopWordsnext = 0;
            int numNonStopWordsNext = 0;
            int numNonStopWordsPrev = 0;
            boolean useprev = false;
            boolean usenext = false;
            PatternToken twithoutPOS = null;
            if (addPatWithoutPOS) {
                twithoutPOS = new PatternToken(tag, false, numWordsCompound > 1, numWordsCompound, nerTag, useTargetNERRestriction, useTargetParserParentRestriction, (String)token.get(CoreAnnotations.GrandparentAnnotation.class));
            }
            PatternToken twithPOS = null;
            if (usePOS4Pattern) {
                twithPOS = new PatternToken(tag, true, numWordsCompound > 1, numWordsCompound, nerTag, useTargetNERRestriction, useTargetParserParentRestriction, (String)token.get(CoreAnnotations.GrandparentAnnotation.class));
            }
            if (usePreviousContext) {
                int j = i - 1;
                int numTokens = 0;
                while (numTokens < maxWin && j >= 0) {
                    tokenj = sent.get(j);
                    tokenjStr = useLemmaContextTokens ? tokenj.lemma() : tokenj.word();
                    if (useFillerWordsInPat && fillerWords.contains(tokenj.word().toLowerCase())) {
                        --j;
                        continue;
                    }
                    Triple<Boolean, Token, String> triple = SurfacePatternFactory.getContextTokenStr(tokenj);
                    isLabeledO = (Boolean)triple.first;
                    strgeneric = (Token)triple.second;
                    strOriginal = (String)triple.third;
                    if (!isLabeledO) {
                        previousTokens.add(0, strgeneric);
                        originalPrev.add(0, strOriginal);
                        ++numNonStopWordsPrev;
                    } else {
                        if (tokenj.word().startsWith("http")) {
                            useprev = false;
                            previousTokens.clear();
                            originalPrev.clear();
                            break;
                        }
                        str = SurfacePattern.getContextToken(tokenj);
                        previousTokens.add(0, str);
                        originalPrev.add(0, tokenjStr);
                        if (SurfacePatternFactory.doNotUse(tokenjStr, ConstantsAndVariables.getStopWords())) {
                            ++numStopWordsprev;
                        } else {
                            ++numNonStopWordsPrev;
                        }
                    }
                    ++numTokens;
                    --j;
                }
            }
            if (useNextContext) {
                int numTokens = 0;
                int j = i + 1;
                while (numTokens < maxWin && j < sent.size()) {
                    tokenj = sent.get(j);
                    tokenjStr = useLemmaContextTokens ? tokenj.lemma() : tokenj.word();
                    if (useFillerWordsInPat && fillerWords.contains(tokenj.word().toLowerCase())) {
                        ++j;
                        continue;
                    }
                    Triple<Boolean, Token, String> triple = SurfacePatternFactory.getContextTokenStr(tokenj);
                    isLabeledO = (Boolean)triple.first;
                    strgeneric = (Token)triple.second;
                    strOriginal = (String)triple.third;
                    if (!isLabeledO) {
                        ++numNonStopWordsNext;
                        nextTokens.add(strgeneric);
                        originalNext.add(strOriginal);
                    } else {
                        if (tokenj.word().startsWith("http")) {
                            usenext = false;
                            nextTokens.clear();
                            originalNext.clear();
                            break;
                        }
                        str = SurfacePattern.getContextToken(tokenj);
                        nextTokens.add(str);
                        originalNext.add(tokenjStr);
                        if (SurfacePatternFactory.doNotUse(tokenjStr, ConstantsAndVariables.getStopWords())) {
                            ++numStopWordsnext;
                        } else {
                            ++numNonStopWordsNext;
                        }
                    }
                    ++j;
                    ++numTokens;
                }
            }
            Token[] prevContext = null;
            if (previousTokens.size() >= minWindow4Pattern && (numNonStopWordsPrev > 0 || numStopWordsprev > numMinStopWordsToAdd)) {
                ArrayList<Token> prevContextList = new ArrayList<Token>();
                ArrayList<String> prevOriginal = new ArrayList<String>();
                for (Token token2 : previousTokens) {
                    prevContextList.add(token2);
                    if (fw.isEmpty()) continue;
                    prevContextList.add(fw);
                }
                for (String string : originalPrev) {
                    prevOriginal.add(string);
                    if (fw.isEmpty()) continue;
                    prevOriginal.add(" FW ");
                }
                if (!sw.isEmpty()) {
                    prevContextList.add(sw);
                    prevOriginal.add(" SW ");
                }
                if (SurfacePatternFactory.isASCII(StringUtils.join(prevOriginal))) {
                    prevContext = prevContextList.toArray(new Token[0]);
                    if (previousTokens.size() >= minWindow4Pattern) {
                        if (twithoutPOS != null) {
                            SurfacePattern pat = new SurfacePattern(prevContext, twithoutPOS, null, Genre.PREV);
                            prevpatterns.add(pat);
                        }
                        if (twithPOS != null) {
                            SurfacePattern patPOS = new SurfacePattern(prevContext, twithPOS, null, Genre.PREV);
                            prevpatterns.add(patPOS);
                        }
                    }
                    useprev = true;
                }
            }
            Token[] nextContext = null;
            if (nextTokens.size() > 0 && (numNonStopWordsNext > 0 || numStopWordsnext > numMinStopWordsToAdd)) {
                ArrayList<Token> nextContextList = new ArrayList<Token>();
                ArrayList<String> nextOriginal = new ArrayList<String>();
                if (!sw.isEmpty()) {
                    nextContextList.add(sw);
                    nextOriginal.add(" SW ");
                }
                for (Token n : nextTokens) {
                    if (!fw.isEmpty()) {
                        nextContextList.add(fw);
                    }
                    nextContextList.add(n);
                }
                for (String n : originalNext) {
                    if (!fw.isEmpty()) {
                        nextOriginal.add(" FW ");
                    }
                    nextOriginal.add(n);
                }
                if (nextTokens.size() >= minWindow4Pattern) {
                    nextContext = nextContextList.toArray(new Token[0]);
                    if (twithoutPOS != null) {
                        SurfacePattern surfacePattern = new SurfacePattern(null, twithoutPOS, nextContext, Genre.NEXT);
                        nextpatterns.add(surfacePattern);
                    }
                    if (twithPOS != null) {
                        SurfacePattern surfacePattern = new SurfacePattern(null, twithPOS, nextContext, Genre.NEXT);
                        nextpatterns.add(surfacePattern);
                    }
                }
                usenext = true;
            }
            if (!useprev || !usenext || previousTokens.size() + nextTokens.size() < minWindow4Pattern) continue;
            if (twithoutPOS != null) {
                SurfacePattern pat = new SurfacePattern(prevContext, twithoutPOS, nextContext, Genre.PREVNEXT);
                prevnextpatterns.add(pat);
            }
            if (twithPOS == null) continue;
            SurfacePattern patPOS = new SurfacePattern(prevContext, twithPOS, nextContext, Genre.PREVNEXT);
            prevnextpatterns.add(patPOS);
        }
        return CollectionUtils.unionAsSet(prevpatterns, nextpatterns, prevnextpatterns);
    }

    static Triple<Boolean, Token, String> getContextTokenStr(CoreLabel tokenj) {
        String nerTag;
        Token strgeneric = new Token();
        String strOriginal = "";
        boolean isLabeledO = true;
        for (Map.Entry<String, Class> e : ConstantsAndVariables.getGeneralizeClasses().entrySet()) {
            if (tokenj.get(e.getValue()).equals(ConstantsAndVariables.backgroundSymbol)) continue;
            isLabeledO = false;
            strOriginal = strOriginal.isEmpty() ? e.getKey() : strOriginal + "|" + e.getKey();
            strgeneric.addORRestriction(e.getValue(), e.getKey());
        }
        if (useContextNERRestriction && (nerTag = (String)tokenj.get(CoreAnnotations.NamedEntityTagAnnotation.class)) != null && !nerTag.equals("O")) {
            isLabeledO = false;
            strOriginal = strOriginal.isEmpty() ? nerTag : strOriginal + "|" + nerTag;
            strgeneric.addORRestriction(CoreAnnotations.NamedEntityTagAnnotation.class, nerTag);
        }
        return new Triple<Boolean, Token, String>(isLabeledO, strgeneric, strOriginal);
    }

    public static boolean isASCII(String text) {
        Charset charset = Charset.forName("US-ASCII");
        String checked = new String(text.getBytes(charset), charset);
        return checked.equals(text);
    }

    public static enum Genre {
        PREV,
        NEXT,
        PREVNEXT;

    }
}

