/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.ie.regexp;

import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.sequences.DocumentReaderAndWriter;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class RegexNERSequenceClassifier
extends AbstractSequenceClassifier<CoreLabel> {
    private List<Entry> entries;
    private boolean overwriteMyLabels;
    private Set<String> myLabels;
    private boolean ignoreCase;
    private final Pattern validPosPattern;
    public static final String DEFAULT_VALID_POS = "^(NN|JJ)";

    public RegexNERSequenceClassifier(String mapping, boolean ignoreCase, boolean overwriteMyLabels) {
        this(mapping, ignoreCase, overwriteMyLabels, DEFAULT_VALID_POS);
    }

    public RegexNERSequenceClassifier(String mapping, boolean ignoreCase, boolean overwriteMyLabels, String validPosRegex) {
        super(new Properties());
        this.validPosPattern = validPosRegex != null && !validPosRegex.equals("") ? Pattern.compile(validPosRegex) : null;
        this.entries = this.readEntries(mapping, ignoreCase);
        this.ignoreCase = ignoreCase;
        this.overwriteMyLabels = overwriteMyLabels;
        this.myLabels = Generics.newHashSet();
        if (this.overwriteMyLabels) {
            for (Entry entry : this.entries) {
                this.myLabels.add(entry.type);
            }
        }
    }

    private boolean containsValidPos(List<CoreLabel> tokens, int start, int end) {
        if (this.validPosPattern == null) {
            return true;
        }
        for (int i = start; i < end; ++i) {
            if (tokens.get(i).tag() == null) {
                throw new IllegalArgumentException("The regex ner was asked to check for valid tags on an untagged sequence.  Either tag the sequence, perhaps with the pos annotator, or create the regex ner with an empty pos tag, perhaps with the flag regexner.validpospattern=");
            }
            Matcher m = this.validPosPattern.matcher(tokens.get(i).tag());
            if (!m.find()) continue;
            return true;
        }
        return false;
    }

    @Override
    public List<CoreLabel> classify(List<CoreLabel> document) {
        for (Entry entry : this.entries) {
            int start = 0;
            while ((start = RegexNERSequenceClassifier.findStartIndex(entry, document, start, this.myLabels)) != -1) {
                if (this.containsValidPos(document, start, start + entry.regex.size())) {
                    for (int i = start; i < start + entry.regex.size(); ++i) {
                        CoreLabel token = document.get(i);
                        token.set(CoreAnnotations.AnswerAnnotation.class, entry.type);
                    }
                }
                ++start;
            }
        }
        return document;
    }

    @Override
    public void train(Collection<List<CoreLabel>> docs, DocumentReaderAndWriter<CoreLabel> readerAndWriter) {
    }

    @Override
    public void printProbsDocument(List<CoreLabel> document) {
    }

    @Override
    public void serializeClassifier(String serializePath) {
    }

    @Override
    public void loadClassifier(ObjectInputStream in, Properties props) throws IOException, ClassCastException, ClassNotFoundException {
    }

    private List<Entry> readEntries(String mapping, boolean ignoreCase) {
        ArrayList<Entry> entries = new ArrayList<Entry>();
        try {
            String line;
            BufferedReader rd = IOUtils.readerFromString(mapping);
            int lineCount = 0;
            while ((line = rd.readLine()) != null) {
                ++lineCount;
                String[] split = line.split("\t");
                if (split.length < 2 || split.length > 4) {
                    throw new RuntimeException("Provided mapping file is in wrong format");
                }
                String[] regexes = split[0].trim().split("\\s+");
                String type = split[1].trim();
                Set<String> overwritableTypes = Generics.newHashSet();
                overwritableTypes.add(this.flags.backgroundSymbol);
                overwritableTypes.add(null);
                double priority = 0.0;
                ArrayList<Pattern> tokens = new ArrayList<Pattern>();
                try {
                    if (split.length >= 3) {
                        overwritableTypes.addAll(Arrays.asList(split[2].trim().split(",")));
                    }
                    if (split.length == 4) {
                        priority = Double.parseDouble(split[3].trim());
                    }
                    for (String str : regexes) {
                        if (ignoreCase) {
                            tokens.add(Pattern.compile(str, 2));
                            continue;
                        }
                        tokens.add(Pattern.compile(str));
                    }
                }
                catch (NumberFormatException e) {
                    System.err.println("ERROR: Invalid line " + lineCount + " in regexner file " + mapping + ": \"" + line + "\"!");
                    throw e;
                }
                entries.add(new Entry(tokens, type, overwritableTypes, priority));
            }
            rd.close();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        Collections.sort(entries);
        return entries;
    }

    private static int findStartIndex(Entry entry, List<CoreLabel> document, int searchStart, Set<String> myLabels) {
        List<Pattern> regex = entry.regex;
        for (int start = searchStart; start <= document.size() - regex.size(); ++start) {
            boolean failed = false;
            for (int i = 0; i < regex.size(); ++i) {
                Pattern pattern = regex.get(i);
                CoreLabel token = document.get(start + i);
                String NERType = (String)token.get(CoreAnnotations.NamedEntityTagAnnotation.class);
                String currentType = (String)token.get(CoreAnnotations.AnswerAnnotation.class);
                if (pattern.matcher(token.word()).matches() && currentType == null && (entry.overwritableTypes.contains(NERType) || myLabels.contains(NERType) || NERType.equals("O"))) continue;
                failed = true;
                break;
            }
            if (failed) continue;
            return start;
        }
        return -1;
    }

    @Override
    public List<CoreLabel> classifyWithGlobalInformation(List<CoreLabel> tokenSeq, CoreMap doc, CoreMap sent) {
        return this.classify(tokenSeq);
    }

    private static class Entry
    implements Comparable<Entry> {
        public List<Pattern> regex;
        public String type;
        public Set<String> overwritableTypes;
        public double priority;

        public Entry(List<Pattern> regex, String type, Set<String> overwritableTypes, double priority) {
            this.regex = regex;
            this.type = type.intern();
            this.overwritableTypes = overwritableTypes;
            this.priority = priority;
        }

        @Override
        public int compareTo(Entry other) {
            if (this.priority > other.priority) {
                return -1;
            }
            if (this.priority < other.priority) {
                return 1;
            }
            return other.regex.size() - this.regex.size();
        }
    }
}

