/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.patterns.surface;

import edu.stanford.nlp.ie.crf.CRFClassifier;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RegExFileFilter;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.patterns.surface.AnnotatedTextReader;
import edu.stanford.nlp.patterns.surface.ConstantsAndVariables;
import edu.stanford.nlp.patterns.surface.CreatePatterns;
import edu.stanford.nlp.patterns.surface.Data;
import edu.stanford.nlp.patterns.surface.LearnImportantFeatures;
import edu.stanford.nlp.patterns.surface.Pattern;
import edu.stanford.nlp.patterns.surface.PatternFactory;
import edu.stanford.nlp.patterns.surface.PatternsAnnotations;
import edu.stanford.nlp.patterns.surface.PatternsForEachToken;
import edu.stanford.nlp.patterns.surface.ScorePatterns;
import edu.stanford.nlp.patterns.surface.ScorePatternsF1;
import edu.stanford.nlp.patterns.surface.ScorePatternsFreqBased;
import edu.stanford.nlp.patterns.surface.ScorePatternsRatioModifiedFreq;
import edu.stanford.nlp.patterns.surface.ScorePhrases;
import edu.stanford.nlp.patterns.surface.SentenceIndex;
import edu.stanford.nlp.patterns.surface.Token;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.stats.TwoDimensionalCounter;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.util.ArrayUtils;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.CollectionValuedMap;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.EditDistance;
import edu.stanford.nlp.util.Execution;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.PriorityQueue;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Triple;
import edu.stanford.nlp.util.TypesafeMap;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.sql.SQLException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import javax.json.Json;
import javax.json.JsonArray;
import javax.json.JsonArrayBuilder;
import javax.json.JsonObjectBuilder;
import javax.json.JsonReader;
import javax.json.JsonValue;
import org.joda.time.Interval;
import org.joda.time.Period;

public class GetPatternsFromDataMultiClass<E extends Pattern>
implements Serializable {
    private static final long serialVersionUID = 1L;
    private PatternsForEachToken<E> patsForEachToken = null;
    public Map<String, Set<String>> wordsForOtherClass = null;
    Map<String, Boolean> writtenPatInJustification = new HashMap<String, Boolean>();
    Map<String, Counter<E>> learnedPatterns = new HashMap<String, Counter<E>>();
    Map<String, Counter<String>> learnedWords = new HashMap<String, Counter<String>>();
    public Map<String, TwoDimensionalCounter<String, E>> wordsPatExtracted = new HashMap<String, TwoDimensionalCounter<String, E>>();
    Properties props;
    public ScorePhrases scorePhrases;
    public ConstantsAndVariables<E> constVars;
    public CreatePatterns createPats;
    DecimalFormat df = new DecimalFormat("#.##");
    private boolean notComputedAllPatternsYet = true;
    static StanfordCoreNLP pipeline = null;
    public Map<String, TwoDimensionalCounter<E, String>> patternsandWords = null;
    public Map<String, Counter<E>> currentPatternWeights = null;
    static AtomicInteger numCallsToCalStats = new AtomicInteger();
    public TwoDimensionalCounter<String, ConstantsAndVariables.ScorePhraseMeasures> phInPatScoresCache = new TwoDimensionalCounter();

    public GetPatternsFromDataMultiClass(Properties props, Map<String, List<CoreLabel>> sents, Set<String> seedSet, boolean labelUsingSeedSets, String answerLabel) throws IOException, InstantiationException, IllegalAccessException, IllegalArgumentException, InvocationTargetException, NoSuchMethodException, SecurityException, InterruptedException, ExecutionException, ClassNotFoundException {
        this(props, sents, seedSet, labelUsingSeedSets, PatternsAnnotations.PatternLabel1.class, answerLabel);
    }

    public GetPatternsFromDataMultiClass(Properties props, Map<String, List<CoreLabel>> sents, Set<String> seedSet, boolean labelUsingSeedSets, Class answerClass, String answerLabel) throws IOException, InstantiationException, IllegalAccessException, IllegalArgumentException, InvocationTargetException, NoSuchMethodException, SecurityException, InterruptedException, ExecutionException, ClassNotFoundException {
        this.props = props;
        HashMap<String, Class<? extends TypesafeMap.Key<String>>> ansCl = new HashMap<String, Class<? extends TypesafeMap.Key<String>>>();
        ansCl.put(answerLabel, answerClass);
        HashMap<String, Class> generalizeClasses = new HashMap<String, Class>();
        HashMap<String, Map<Class, Object>> ignoreClasses = new HashMap<String, Map<Class, Object>>();
        ignoreClasses.put(answerLabel, new HashMap());
        HashMap<String, Set<String>> seedSets = new HashMap<String, Set<String>>();
        seedSets.put(answerLabel, seedSet);
        this.setUpConstructor(sents, seedSets, labelUsingSeedSets, ansCl, generalizeClasses, ignoreClasses);
    }

    public GetPatternsFromDataMultiClass(Properties props, Map<String, List<CoreLabel>> sents, Set<String> seedSet, boolean labelUsingSeedSets, String answerLabel, Map<String, Class> generalizeClasses, Map<Class, Object> ignoreClasses) throws IOException, InstantiationException, IllegalAccessException, IllegalArgumentException, InvocationTargetException, NoSuchMethodException, SecurityException, InterruptedException, ExecutionException, ClassNotFoundException {
        this(props, sents, seedSet, labelUsingSeedSets, PatternsAnnotations.PatternLabel1.class, answerLabel, generalizeClasses, ignoreClasses);
    }

    public GetPatternsFromDataMultiClass(Properties props, Map<String, List<CoreLabel>> sents, Set<String> seedSet, boolean labelUsingSeedSets, Class answerClass, String answerLabel, Map<String, Class> generalizeClasses, Map<Class, Object> ignoreClasses) throws IOException, InstantiationException, IllegalAccessException, IllegalArgumentException, InvocationTargetException, NoSuchMethodException, SecurityException, InterruptedException, ExecutionException, ClassNotFoundException {
        this.props = props;
        HashMap<String, Class<? extends TypesafeMap.Key<String>>> ansCl = new HashMap<String, Class<? extends TypesafeMap.Key<String>>>();
        ansCl.put(answerLabel, answerClass);
        HashMap<String, Map<Class, Object>> iC = new HashMap<String, Map<Class, Object>>();
        iC.put(answerLabel, ignoreClasses);
        HashMap<String, Set<String>> seedSets = new HashMap<String, Set<String>>();
        seedSets.put(answerLabel, seedSet);
        this.setUpConstructor(sents, seedSets, labelUsingSeedSets, ansCl, generalizeClasses, iC);
    }

    public GetPatternsFromDataMultiClass(Properties props, Map<String, List<CoreLabel>> sents, Map<String, Set<String>> seedSets, boolean labelUsingSeedSets) throws IOException, InstantiationException, IllegalAccessException, IllegalArgumentException, InvocationTargetException, NoSuchMethodException, SecurityException, ClassNotFoundException, InterruptedException, ExecutionException {
        this.props = props;
        HashMap<String, Class<? extends TypesafeMap.Key<String>>> ansCl = new HashMap<String, Class<? extends TypesafeMap.Key<String>>>();
        HashMap<String, Class> gC = new HashMap<String, Class>();
        HashMap<String, Map<Class, Object>> iC = new HashMap<String, Map<Class, Object>>();
        int i = 1;
        for (String label : seedSets.keySet()) {
            String ansclstr = "edu.stanford.nlp.patterns.surface.PatternsAnnotations$PatternLabel" + i;
            ansCl.put(label, Class.forName(ansclstr));
            iC.put(label, new HashMap());
            ++i;
        }
        this.setUpConstructor(sents, seedSets, labelUsingSeedSets, ansCl, gC, iC);
    }

    public GetPatternsFromDataMultiClass(Properties props, Map<String, List<CoreLabel>> sents, Map<String, Set<String>> seedSets, boolean labelUsingSeedSets, Map<String, Class<? extends TypesafeMap.Key<String>>> answerClass) throws IOException, InstantiationException, IllegalAccessException, IllegalArgumentException, InvocationTargetException, NoSuchMethodException, SecurityException, InterruptedException, ExecutionException, ClassNotFoundException {
        this(props, sents, seedSets, labelUsingSeedSets, answerClass, new HashMap<String, Class>(), new HashMap<String, Map<Class, Object>>());
    }

    public GetPatternsFromDataMultiClass(Properties props, Map<String, List<CoreLabel>> sents, Map<String, Set<String>> seedSets, boolean labelUsingSeedSets, Map<String, Class<? extends TypesafeMap.Key<String>>> answerClass, Map<String, Class> generalizeClasses, Map<String, Map<Class, Object>> ignoreClasses) throws IOException, InstantiationException, IllegalAccessException, IllegalArgumentException, InvocationTargetException, NoSuchMethodException, SecurityException, InterruptedException, ExecutionException, ClassNotFoundException {
        this.props = props;
        if (ignoreClasses.isEmpty()) {
            for (String label : seedSets.keySet()) {
                ignoreClasses.put(label, new HashMap());
            }
        }
        this.setUpConstructor(sents, seedSets, labelUsingSeedSets, answerClass, generalizeClasses, ignoreClasses);
    }

    private void setUpConstructor(Map<String, List<CoreLabel>> sents, Map<String, Set<String>> seedSets, boolean labelUsingSeedSets, Map<String, Class<? extends TypesafeMap.Key<String>>> answerClass, Map<String, Class> generalizeClasses, Map<String, Map<Class, Object>> ignoreClasses) throws IOException, InstantiationException, IllegalAccessException, IllegalArgumentException, InvocationTargetException, NoSuchMethodException, SecurityException, InterruptedException, ExecutionException, ClassNotFoundException {
        Data.sents = sents;
        Execution.fillOptions(Data.class, this.props);
        Execution.fillOptions(ConstantsAndVariables.class, this.props);
        PatternFactory.setUp(this.props);
        this.constVars = new ConstantsAndVariables(this.props, seedSets, answerClass, generalizeClasses, ignoreClasses);
        if (this.constVars.writeMatchedTokensFiles && this.constVars.batchProcessSents) {
            throw new RuntimeException("writeMatchedTokensFiles and batchProcessSents cannot be true at the same time (not implemented; also doesn't make sense to save a large sentences json file)");
        }
        if (this.constVars.debug < 1) {
            Redwood.hideChannelsEverywhere(ConstantsAndVariables.minimaldebug);
        }
        if (this.constVars.debug < 2) {
            Redwood.hideChannelsEverywhere(new Object[]{Redwood.DBG});
        }
        this.constVars.justify = true;
        if (this.constVars.debug < 3) {
            this.constVars.justify = false;
        }
        if (this.constVars.debug < 4) {
            Redwood.hideChannelsEverywhere(ConstantsAndVariables.extremedebug);
        }
        Redwood.log(new Object[]{Redwood.DBG, "Running with debug output"});
        Redwood.log(ConstantsAndVariables.extremedebug, "Running with extreme debug output");
        this.wordsPatExtracted = new HashMap<String, TwoDimensionalCounter<String, E>>();
        for (String label : answerClass.keySet()) {
            this.wordsPatExtracted.put(label, new TwoDimensionalCounter());
        }
        this.scorePhrases = new ScorePhrases(this.props, this.constVars);
        this.createPats = new CreatePatterns(this.props, this.constVars);
        assert (!this.constVars.doNotApplyPatterns || !PatternFactory.useStopWordsBeforeTerm && PatternFactory.numWordsCompound <= 1) : " Cannot have both doNotApplyPatterns and (useStopWordsBeforeTerm true or numWordsCompound > 1)!";
        if (this.constVars.invertedIndexDirectory == null) {
            File f = File.createTempFile("inv", "index");
            f.deleteOnExit();
            f.mkdir();
            this.constVars.invertedIndexDirectory = f.getAbsolutePath();
        }
        Set<String> extremelySmallStopWordsList = CollectionUtils.asSet(new String[]{".", ",", "in", "on", "of", "a", "the", "an"});
        Function<CoreLabel, Map<String, String>> transformCoreLabelToString = new Function<CoreLabel, Map<String, String>>(){

            @Override
            public Map<String, String> apply(CoreLabel l) {
                HashMap<String, String> add = new HashMap<String, String>();
                ConstantsAndVariables cfr_ignored_0 = GetPatternsFromDataMultiClass.this.constVars;
                for (Class gn : ConstantsAndVariables.getGeneralizeClasses().values()) {
                    Object b = l.get(gn);
                    if (b == null) continue;
                    ConstantsAndVariables cfr_ignored_1 = GetPatternsFromDataMultiClass.this.constVars;
                    if (b.toString().equals(ConstantsAndVariables.backgroundSymbol)) continue;
                    add.put(Token.getKeyForClass(gn), b.toString());
                }
                return add;
            }
        };
        boolean createIndex = false;
        if (this.constVars.loadInvertedIndex) {
            this.constVars.invertedIndex = SentenceIndex.loadIndex(this.constVars.invertedIndexClass, this.props, extremelySmallStopWordsList, this.constVars.invertedIndexDirectory, transformCoreLabelToString);
        } else {
            this.constVars.invertedIndex = SentenceIndex.createIndex(this.constVars.invertedIndexClass, null, this.props, extremelySmallStopWordsList, this.constVars.invertedIndexDirectory, transformCoreLabelToString);
            createIndex = true;
        }
        int totalNumSents = 0;
        boolean computeDataFreq = false;
        if (Data.rawFreq == null) {
            Data.rawFreq = new ClassicCounter<String>();
            computeDataFreq = true;
        }
        if (this.constVars.batchProcessSents) {
            for (File f : Data.sentsFiles) {
                if (!f.exists()) {
                    throw new RuntimeException("File " + f + " does not exist. Something is wrong. Contact the author with full details.");
                }
                Redwood.log(new Object[]{Redwood.DBG, "Reading file from " + f.getAbsolutePath()});
                Map sentsf = (Map)IOUtils.readObjectFromFile(f);
                for (Map.Entry entry : sentsf.entrySet()) {
                    Data.sentId2File.put((String)entry.getKey(), f);
                }
                totalNumSents += sentsf.size();
                if (computeDataFreq) {
                    Data.computeRawFreqIfNull(sentsf, PatternFactory.numWordsCompound);
                }
                Redwood.log(new Object[]{Redwood.DBG, "Initializing sents from " + f + " with " + sentsf.size() + " sentences, either by labeling with the seed set or just setting the right classes"});
                for (String string : this.constVars.getAnswerClass().keySet()) {
                    HashSet<String> hashSet = seedSets == null || !labelUsingSeedSets ? new HashSet<String>() : (seedSets.containsKey(string) ? seedSets.get(string) : new HashSet<String>());
                    GetPatternsFromDataMultiClass.runLabelSeedWords(sentsf, this.constVars.getAnswerClass().get(string), string, hashSet, this.constVars);
                    if (!this.constVars.addIndvWordsFromPhrasesExceptLastAsNeg) continue;
                    HashSet<String> otherseed = new HashSet<String>();
                    for (String s : hashSet) {
                        String[] t = s.split("\\s+");
                        for (int i = 0; i < t.length - 1; ++i) {
                            if (hashSet.contains(t[i])) continue;
                            otherseed.add(t[i]);
                        }
                    }
                    GetPatternsFromDataMultiClass.runLabelSeedWords(sentsf, PatternsAnnotations.OtherSemanticLabel.class, "OTHERSEM", otherseed, this.constVars);
                }
                if (this.constVars.getOtherSemanticClassesWords() != null) {
                    GetPatternsFromDataMultiClass.runLabelSeedWords(sentsf, PatternsAnnotations.OtherSemanticLabel.class, "OTHERSEM", this.constVars.getOtherSemanticClassesWords(), this.constVars);
                }
                if (this.constVars.removeOverLappingLabelsFromSeed) {
                    this.removeOverLappingLabels(sentsf);
                }
                this.constVars.invertedIndex.add(sentsf, true);
                Redwood.log(new Object[]{Redwood.DBG, "Saving the labeled seed sents (if given the option) to the same file " + f});
                IOUtils.writeObjectToFile((Object)sentsf, f);
            }
        } else {
            totalNumSents = Data.sents.size();
            if (computeDataFreq) {
                Data.computeRawFreqIfNull(Data.sents, PatternFactory.numWordsCompound);
            }
            Redwood.log(new Object[]{Redwood.DBG, "Initializing sents " + Data.sents.size() + " sentences, either by labeling with the seed set or just setting the right classes"});
            for (String l : this.constVars.getAnswerClass().keySet()) {
                HashSet<String> seed = seedSets == null || !labelUsingSeedSets ? new HashSet<String>() : (seedSets.containsKey(l) ? seedSets.get(l) : new HashSet<String>());
                GetPatternsFromDataMultiClass.runLabelSeedWords(Data.sents, this.constVars.getAnswerClass().get(l), l, seed, this.constVars);
                if (!this.constVars.addIndvWordsFromPhrasesExceptLastAsNeg) continue;
                HashSet<String> otherseed = new HashSet<String>();
                for (String string : seed) {
                    String[] t = string.split("\\s+");
                    for (int i = 0; i < t.length - 1; ++i) {
                        if (seed.contains(t[i])) continue;
                        otherseed.add(t[i]);
                    }
                }
                GetPatternsFromDataMultiClass.runLabelSeedWords(Data.sents, PatternsAnnotations.OtherSemanticLabel.class, "OTHERSEM", otherseed, this.constVars);
            }
            if (this.constVars.getOtherSemanticClassesWords() != null) {
                GetPatternsFromDataMultiClass.runLabelSeedWords(Data.sents, PatternsAnnotations.OtherSemanticLabel.class, "OTHERSEM", this.constVars.getOtherSemanticClassesWords(), this.constVars);
            }
            if (this.constVars.removeOverLappingLabelsFromSeed) {
                this.removeOverLappingLabels(Data.sents);
            }
            if (createIndex) {
                this.constVars.invertedIndex.add(Data.sents, true);
            }
        }
        Redwood.log(new Object[]{Redwood.DBG, "Done loading/creating inverted index of tokens and labeling data with total of " + this.constVars.invertedIndex.size() + " sentences"});
        if (this.constVars.usePatternEvalWordClass || this.constVars.usePhraseEvalWordClass) {
            if (this.constVars.externalFeatureWeightsFile == null) {
                File f = File.createTempFile("tempfeat", ".txt");
                f.delete();
                f.deleteOnExit();
                this.constVars.externalFeatureWeightsFile = f.getAbsolutePath();
            }
            for (String label : seedSets.keySet()) {
                String externalFeatureWeightsFileLabel = this.constVars.externalFeatureWeightsFile + "_" + label;
                File f = new File(externalFeatureWeightsFileLabel);
                if (!f.exists()) {
                    Redwood.log(new Object[]{Redwood.DBG, "externalweightsfile for the label " + label + " does not exist: learning weights!"});
                    LearnImportantFeatures learnImportantFeatures = new LearnImportantFeatures();
                    Execution.fillOptions((Object)learnImportantFeatures, this.props);
                    learnImportantFeatures.answerClass = answerClass.get(label);
                    learnImportantFeatures.answerLabel = label;
                    learnImportantFeatures.setUp();
                    learnImportantFeatures.getTopFeatures(new ConstantsAndVariables.DataSentsIterator(this.constVars.batchProcessSents), this.constVars.perSelectRand, this.constVars.perSelectNeg, externalFeatureWeightsFileLabel);
                }
                ClassicCounter<Integer> classicCounter = new ClassicCounter<Integer>();
                for (String line : IOUtils.readLines(externalFeatureWeightsFileLabel)) {
                    String s;
                    String[] t = line.split(":");
                    if (!t[0].startsWith("Cluster")) continue;
                    s = t[0].replace("Cluster-", "");
                    Integer clusterNum = Integer.parseInt(s);
                    classicCounter.setCount(clusterNum, Double.parseDouble(t[1]));
                }
                this.constVars.distSimWeights.put(label, classicCounter);
            }
        }
        if (this.constVars.usePatternEvalSemanticOdds || this.constVars.usePhraseEvalSemanticOdds) {
            Counter dictOddsWeightsLabel = new ClassicCounter();
            Counter<String> otherSemanticClassFreq = new ClassicCounter();
            for (String s : this.constVars.getOtherSemanticClassesWords()) {
                for (String string : StringUtils.getNgrams(Arrays.asList(s.split("\\s+")), 1, PatternFactory.numWordsCompound)) {
                    otherSemanticClassFreq.incrementCount(string);
                }
            }
            otherSemanticClassFreq = Counters.add(otherSemanticClassFreq, 1.0);
            HashMap labelDictNgram = new HashMap();
            for (String string : seedSets.keySet()) {
                ClassicCounter<String> classicCounter = new ClassicCounter<String>();
                for (String s : seedSets.get(string)) {
                    for (String s1 : StringUtils.getNgrams(Arrays.asList(s.split("\\s+")), 1, PatternFactory.numWordsCompound)) {
                        classicCounter.incrementCount(s1);
                    }
                }
                Counter counter = Counters.add(classicCounter, 1.0);
                labelDictNgram.put(string, counter);
            }
            for (String string : seedSets.keySet()) {
                ClassicCounter<String> classicCounter = new ClassicCounter<String>();
                for (String label2 : seedSets.keySet()) {
                    if (string.equals(label2)) continue;
                    classicCounter.addAll((Counter)labelDictNgram.get(label2));
                }
                classicCounter.addAll(otherSemanticClassFreq);
                dictOddsWeightsLabel = Counters.divisionNonNaN((Counter)labelDictNgram.get(string), classicCounter);
                this.constVars.dictOddsWeights.put(string, dictOddsWeightsLabel);
            }
        }
    }

    public PatternsForEachToken getPatsForEachToken() {
        return this.patsForEachToken;
    }

    public void removeOverLappingLabels(Map<String, List<CoreLabel>> sents) {
        for (Map.Entry<String, List<CoreLabel>> sentEn : sents.entrySet()) {
            for (CoreLabel l : sentEn.getValue()) {
                Map longestMatchingMap = (Map)l.get(PatternsAnnotations.LongestMatchedPhraseForEachLabel.class);
                String longestMatchingString = "";
                String longestMatchingLabel = null;
                for (Map.Entry entry : longestMatchingMap.entrySet()) {
                    if (((String)entry.getValue()).length() <= longestMatchingString.length()) continue;
                    longestMatchingLabel = (String)entry.getKey();
                    longestMatchingString = (String)entry.getValue();
                }
                if (longestMatchingLabel == null) continue;
                if (!"OTHERSEM".equals(longestMatchingLabel)) {
                    l.set(PatternsAnnotations.OtherSemanticLabel.class, ConstantsAndVariables.backgroundSymbol);
                }
                for (Map.Entry<Object, Object> entry : this.constVars.getAnswerClass().entrySet()) {
                    if (!((String)entry.getKey()).equals(longestMatchingLabel)) {
                        l.set((Class)entry.getValue(), ConstantsAndVariables.backgroundSymbol);
                        continue;
                    }
                    l.set((Class)entry.getValue(), entry.getKey());
                }
            }
        }
    }

    public static Map<String, List<CoreLabel>> runPOSNEROnTokens(List<CoreMap> sentsCM, String posModelPath, boolean useTargetNERRestriction, String prefix, boolean useTargetParserParentRestriction, String numThreads) {
        Annotation doc = new Annotation(sentsCM);
        Properties props = new Properties();
        ArrayList<String> anns = new ArrayList<String>();
        anns.add("pos");
        anns.add("lemma");
        if (useTargetParserParentRestriction) {
            anns.add("parse");
        }
        if (useTargetNERRestriction) {
            anns.add("ner");
        }
        props.setProperty("annotators", StringUtils.join(anns, ","));
        props.setProperty("parse.maxlen", "80");
        props.setProperty("nthreads", numThreads);
        props.setProperty("threads", numThreads);
        if (posModelPath != null) {
            props.setProperty("pos.model", posModelPath);
        }
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props, false);
        Redwood.log(new Object[]{Redwood.DBG, "Annotating text"});
        pipeline.annotate(doc);
        Redwood.log(new Object[]{Redwood.DBG, "Done annotating text"});
        HashMap<String, List<CoreLabel>> sents = new HashMap<String, List<CoreLabel>>();
        for (CoreMap s : (List)doc.get(CoreAnnotations.SentencesAnnotation.class)) {
            if (useTargetParserParentRestriction) {
                GetPatternsFromDataMultiClass.inferParentParseTag((Tree)s.get(TreeCoreAnnotations.TreeAnnotation.class));
            }
            sents.put(prefix + (String)s.get(CoreAnnotations.DocIDAnnotation.class), (List<CoreLabel>)s.get(CoreAnnotations.TokensAnnotation.class));
        }
        return sents;
    }

    public static int tokenize(Iterator<String> textReader, String posModelPath, boolean lowercase, boolean useTargetNERRestriction, String sentIDPrefix, boolean useTargetParserParentRestriction, String numThreads, boolean batchProcessSents, int numMaxSentencesPerBatchFile, File saveSentencesSerDirFile, Map<String, List<CoreLabel>> sents, int numFilesTillNow) throws InterruptedException, ExecutionException, IOException {
        if (pipeline == null) {
            Properties props = new Properties();
            ArrayList<String> anns = new ArrayList<String>();
            anns.add("tokenize");
            anns.add("ssplit");
            anns.add("pos");
            anns.add("lemma");
            if (useTargetParserParentRestriction) {
                anns.add("parse");
            }
            if (useTargetNERRestriction) {
                anns.add("ner");
            }
            props.setProperty("annotators", StringUtils.join(anns, ","));
            props.setProperty("parse.maxlen", "80");
            props.setProperty("threads", numThreads);
            props.put("tokenize.options", "ptb3Escaping=false,normalizeParentheses=false,escapeForwardSlashAsterisk=false");
            if (posModelPath != null) {
                props.setProperty("pos.model", posModelPath);
            }
            pipeline = new StanfordCoreNLP(props);
        }
        String text = "";
        int numLines = 0;
        while (textReader.hasNext()) {
            String line = textReader.next();
            if (batchProcessSents && ++numLines > numMaxSentencesPerBatchFile) break;
            if (lowercase) {
                line = line.toLowerCase();
            }
            text = text + line + "\n";
        }
        Annotation doc = new Annotation(text);
        pipeline.annotate(doc);
        Redwood.log(new Object[]{Redwood.DBG, "Done annotating text"});
        int i = -1;
        for (CoreMap s : (List)doc.get(CoreAnnotations.SentencesAnnotation.class)) {
            ++i;
            if (useTargetParserParentRestriction) {
                GetPatternsFromDataMultiClass.inferParentParseTag((Tree)s.get(TreeCoreAnnotations.TreeAnnotation.class));
            }
            sents.put(sentIDPrefix + i, (List<CoreLabel>)s.get(CoreAnnotations.TokensAnnotation.class));
        }
        if (sents.size() > 0 && batchProcessSents) {
            File file = new File(saveSentencesSerDirFile + "/sents_" + ++numFilesTillNow);
            IOUtils.writeObjectToFile(sents, file);
            Data.sentsFiles.add(file);
            for (String sentid : sents.keySet()) {
                assert (!Data.sentId2File.containsKey(sentid)) : "Data.sentId2File already contains " + sentid + ". Make sure sentIds are unique!";
                Data.sentId2File.put(sentid, file);
            }
            sents.clear();
        }
        if (batchProcessSents) {
            sents = null;
        }
        return numFilesTillNow;
    }

    static void inferParentParseTag(Tree tree) {
        String grandstr = tree.value();
        for (Tree child : tree.children()) {
            for (Tree grand : child.children()) {
                if (!grand.isLeaf()) continue;
                ((CoreLabel)grand.label()).set(CoreAnnotations.GrandparentAnnotation.class, grandstr);
            }
            GetPatternsFromDataMultiClass.inferParentParseTag(child);
        }
    }

    public static List<Integer> getSubListIndex(String[] l1, String[] l2, String[] subl2, Set<String> englishWords, HashSet<String> seenFuzzyMatches, int minLen4Fuzzy) {
        if (l1.length > l2.length) {
            return null;
        }
        EditDistance editDistance = new EditDistance(true);
        ArrayList<Integer> allIndices = new ArrayList<Integer>();
        boolean matched = false;
        int index = -1;
        int lastUnmatchedIndex = 0;
        int i = 0;
        while (i < l2.length) {
            int j = 0;
            while (j < l1.length) {
                boolean d1 = false;
                boolean d2 = false;
                boolean compareFuzzy = true;
                if (englishWords.contains(l2[i]) || englishWords.contains(subl2[i]) || l2[i].length() <= minLen4Fuzzy || subl2[i].length() <= minLen4Fuzzy) {
                    compareFuzzy = false;
                }
                if (!compareFuzzy || l1[j].length() <= minLen4Fuzzy) {
                    boolean bl = d1 = l1[j].equals(l2[i]);
                    if (!d1) {
                        d2 = subl2[i].equals(l1[j]);
                    }
                } else {
                    String combo = l1[j] + "#" + l2[i];
                    if (l1[j].equals(l2[i]) || seenFuzzyMatches.contains(combo)) {
                        d1 = true;
                    } else {
                        boolean bl = d1 = editDistance.score(l1[j], l2[i]) <= 1.0;
                        if (!d1) {
                            String combo2 = l1[j] + "#" + subl2[i];
                            if (l1[j].equals(subl2[i]) || seenFuzzyMatches.contains(combo2)) {
                                d2 = true;
                            } else {
                                boolean bl2 = d2 = editDistance.score(l1[j], subl2[i]) <= 1.0;
                                if (d2) {
                                    seenFuzzyMatches.add(combo2);
                                }
                            }
                        } else if (d1) {
                            seenFuzzyMatches.add(combo);
                        }
                    }
                }
                if (d1 || d2) {
                    index = i++;
                    if (++j == l1.length) {
                        matched = true;
                        break;
                    }
                } else {
                    j = 0;
                    lastUnmatchedIndex = i = lastUnmatchedIndex + 1;
                    index = -1;
                    if (lastUnmatchedIndex == l2.length) break;
                }
                if (i < l2.length) continue;
                index = -1;
                break;
            }
            if (i != l2.length && !matched) continue;
            if (index >= 0) {
                allIndices.add(index - l1.length + 1);
            }
            matched = false;
            lastUnmatchedIndex = index;
        }
        return allIndices;
    }

    public static void runLabelSeedWords(Map<String, List<CoreLabel>> sents, Class answerclass, String label, Set<String> seedWords, final ConstantsAndVariables constVars) throws InterruptedException, ExecutionException, IOException {
        Function<CoreLabel, String> stringTransformationFunction = new Function<CoreLabel, String>(){

            @Override
            public String apply(CoreLabel l) {
                String s = PatternFactory.useLemmaContextTokens ? l.lemma() : l.word();
                if (constVars.matchLowerCaseContext) {
                    s = s.toLowerCase();
                }
                return s;
            }
        };
        ArrayList<String> keyset = new ArrayList<String>(sents.keySet());
        Redwood.log(new Object[]{Redwood.DBG, "Labeling " + keyset.size() + " sentences with " + seedWords.size() + " seeds for the label " + label});
        int num = constVars.numThreads == 1 ? keyset.size() : keyset.size() / (constVars.numThreads - 1);
        ExecutorService executor = Executors.newFixedThreadPool(constVars.numThreads);
        Redwood.log(ConstantsAndVariables.extremedebug, "keyset size is " + keyset.size());
        ArrayList<Future<Map<String, List<CoreLabel>>>> list = new ArrayList<Future<Map<String, List<CoreLabel>>>>();
        for (int i = 0; i < constVars.numThreads; ++i) {
            List<String> list2 = keyset.subList(i * num, Math.min(keyset.size(), (i + 1) * num));
            Redwood.log(ConstantsAndVariables.extremedebug, "assigning from " + i * num + " till " + Math.min(keyset.size(), (i + 1) * num));
            LabelWithSeedWords task = new LabelWithSeedWords(seedWords, sents, list2, answerclass, label, constVars.minLen4FuzzyForPattern, ConstantsAndVariables.backgroundSymbol, constVars.getEnglishWords(), stringTransformationFunction);
            Future<Map<String, List<CoreLabel>>> submit = executor.submit(task);
            list.add(submit);
        }
        for (Future future : list) {
            try {
                sents.putAll((Map)future.get());
            }
            catch (Exception e) {
                executor.shutdownNow();
                throw new RuntimeException(e);
            }
        }
        executor.shutdown();
    }

    public void processSents(Map<String, List<CoreLabel>> sents, Boolean deleteExistingIndex) throws IOException, ClassNotFoundException {
        if (this.constVars.computeAllPatterns) {
            this.props.setProperty("createTable", deleteExistingIndex.toString());
            this.props.setProperty("deleteExisting", deleteExistingIndex.toString());
            this.props.setProperty("createPatLuceneIndex", deleteExistingIndex.toString());
            Redwood.log(new Object[]{Redwood.DBG, "Computing all patterns"});
            this.createPats.getAllPatterns(sents, this.props, this.constVars.storePatsForEachToken);
        } else {
            Redwood.log(new Object[]{Redwood.DBG, "Reading patterns from existing dir"});
        }
        this.props.setProperty("createTable", "false");
        this.props.setProperty("deleteExisting", "false");
        this.props.setProperty("createPatLuceneIndex", "false");
    }

    void readSavedPatternsAndIndex() throws IOException, ClassNotFoundException {
        if (!this.constVars.computeAllPatterns) {
            assert (this.constVars.allPatternsDir != null) : "allPatternsDir flag cannot be emoty if computeAllPatterns is false!";
            if (this.constVars.storePatsForEachToken.equals((Object)ConstantsAndVariables.PatternForEachTokenWay.MEMORY)) {
                this.patsForEachToken.load(this.constVars.allPatternsDir);
            }
        }
    }

    /*
     * WARNING - void declaration
     */
    public Counter<E> getPatterns(String label, Set<E> alreadyIdentifiedPatterns, E p0, Counter<String> p0Set, Set<E> ignorePatterns) throws IOException, ClassNotFoundException {
        ScorePatterns scorePatterns;
        TwoDimensionalCounter patternsandWords4Label = new TwoDimensionalCounter();
        TwoDimensionalCounter negPatternsandWords4Label = new TwoDimensionalCounter();
        TwoDimensionalCounter unLabeledPatternsandWords4Label = new TwoDimensionalCounter();
        HashSet<String> allCandidatePhrases = new HashSet<String>();
        ConstantsAndVariables.DataSentsIterator sentsIter = new ConstantsAndVariables.DataSentsIterator(this.constVars.batchProcessSents);
        boolean firstCallToProcessSents = true;
        while (sentsIter.hasNext()) {
            Object sentsPair = sentsIter.next();
            if (this.notComputedAllPatternsYet) {
                this.processSents((Map)((Pair)sentsPair).first(), firstCallToProcessSents);
                firstCallToProcessSents = false;
                if (this.patsForEachToken == null) {
                    this.patsForEachToken = PatternsForEachToken.getPatternsInstance(this.props, this.constVars.storePatsForEachToken);
                    this.readSavedPatternsAndIndex();
                }
            }
            this.calculateSufficientStats((Map)((Pair)sentsPair).first(), this.patsForEachToken, label, patternsandWords4Label, negPatternsandWords4Label, unLabeledPatternsandWords4Label, allCandidatePhrases);
        }
        this.notComputedAllPatternsYet = false;
        if (this.constVars.computeAllPatterns) {
            if (this.constVars.storePatsForEachToken.equals((Object)ConstantsAndVariables.PatternForEachTokenWay.DB)) {
                this.patsForEachToken.createIndexIfUsingDBAndNotExists();
            }
            this.patsForEachToken.save(this.constVars.allPatternsDir);
        }
        this.patsForEachToken.close();
        this.constVars.computeAllPatterns = false;
        if (this.patternsandWords == null) {
            this.patternsandWords = new HashMap<String, TwoDimensionalCounter<E, String>>();
        }
        if (this.currentPatternWeights == null) {
            this.currentPatternWeights = new HashMap<String, Counter<E>>();
        }
        Counter currentPatternWeights4Label = new ClassicCounter();
        Set removePats = this.enforceMinSupportRequirements(patternsandWords4Label, unLabeledPatternsandWords4Label);
        Counters.removeKeys(patternsandWords4Label, removePats);
        Counters.removeKeys(unLabeledPatternsandWords4Label, removePats);
        Counters.removeKeys(negPatternsandWords4Label, removePats);
        Class patternscoringclass = GetPatternsFromDataMultiClass.getPatternScoringClass(this.constVars.patternScoring);
        if (patternscoringclass != null && patternscoringclass.equals(ScorePatternsF1.class)) {
            ScorePatternsF1 scorePatterns2 = new ScorePatternsF1(this.constVars, this.constVars.patternScoring, label, allCandidatePhrases, patternsandWords4Label, negPatternsandWords4Label, unLabeledPatternsandWords4Label, this.props, p0Set, p0);
            Counter finalPat = ((ScorePatterns)scorePatterns2).score();
            Counters.removeKeys(finalPat, alreadyIdentifiedPatterns);
            Counters.retainNonZeros(finalPat);
            Counters.retainTop(finalPat, 1);
            if (Double.isNaN(Counters.max(finalPat))) {
                throw new RuntimeException("how is the value NaN");
            }
            Redwood.log(ConstantsAndVariables.minimaldebug, "Selected Pattern: " + finalPat);
            return finalPat;
        }
        if (patternscoringclass != null && patternscoringclass.equals(ScorePatternsRatioModifiedFreq.class)) {
            scorePatterns = new ScorePatternsRatioModifiedFreq(this.constVars, this.constVars.patternScoring, label, allCandidatePhrases, patternsandWords4Label, negPatternsandWords4Label, unLabeledPatternsandWords4Label, this.phInPatScoresCache, this.scorePhrases, this.props);
        } else if (patternscoringclass != null && patternscoringclass.equals(ScorePatternsFreqBased.class)) {
            scorePatterns = new ScorePatternsFreqBased(this.constVars, this.constVars.patternScoring, label, allCandidatePhrases, patternsandWords4Label, negPatternsandWords4Label, unLabeledPatternsandWords4Label, this.props);
        } else if (this.constVars.patternScoring.equals((Object)PatternScoring.kNN)) {
            try {
                Class<?> clazz = Class.forName("edu.stanford.nlp.patterns.surface.ScorePatternsKNN");
                Constructor<?> ctor = clazz.getConstructor(ConstantsAndVariables.class, PatternScoring.class, String.class, Set.class, TwoDimensionalCounter.class, TwoDimensionalCounter.class, TwoDimensionalCounter.class, ScorePhrases.class, Properties.class);
                scorePatterns = (ScorePatterns)ctor.newInstance(new Object[]{this.constVars, this.constVars.patternScoring, label, allCandidatePhrases, patternsandWords4Label, negPatternsandWords4Label, unLabeledPatternsandWords4Label, this.scorePhrases, this.props});
            }
            catch (ClassNotFoundException e) {
                throw new RuntimeException("kNN pattern scoring is not released yet. Stay tuned.");
            }
            catch (NoSuchMethodException e) {
                throw new RuntimeException("newinstance of kNN not created", e);
            }
            catch (InvocationTargetException e) {
                throw new RuntimeException("newinstance of kNN not created", e);
            }
            catch (IllegalAccessException e) {
                throw new RuntimeException("newinstance of kNN not created", e);
            }
            catch (InstantiationException e) {
                throw new RuntimeException("newinstance of kNN not created", e);
            }
        } else {
            throw new RuntimeException((Object)((Object)this.constVars.patternScoring) + " is not implemented (check spelling?). ");
        }
        scorePatterns.setUp(this.props);
        currentPatternWeights4Label = scorePatterns.score();
        Redwood.log(ConstantsAndVariables.extremedebug, "patterns counter size is " + currentPatternWeights4Label.size());
        if (ignorePatterns != null && !ignorePatterns.isEmpty()) {
            Counters.removeKeys(currentPatternWeights4Label, ignorePatterns);
            Redwood.log(ConstantsAndVariables.extremedebug, "Removing patterns from ignorePatterns of size  " + ignorePatterns.size() + ". New patterns size " + currentPatternWeights4Label.size());
        }
        if (alreadyIdentifiedPatterns != null && !alreadyIdentifiedPatterns.isEmpty()) {
            Redwood.log(ConstantsAndVariables.extremedebug, "Patterns size is " + currentPatternWeights4Label.size());
            Counters.removeKeys(currentPatternWeights4Label, alreadyIdentifiedPatterns);
            Redwood.log(ConstantsAndVariables.extremedebug, "Removing already identified patterns of size  " + alreadyIdentifiedPatterns.size() + ". New patterns size " + currentPatternWeights4Label.size());
        }
        PriorityQueue q = Counters.toPriorityQueue(currentPatternWeights4Label);
        int num = 0;
        ClassicCounter<Pattern> chosenPat = new ClassicCounter<Pattern>();
        HashSet removePatterns = new HashSet();
        HashSet<Pattern> removeIdentifiedPatterns = null;
        while (num < this.constVars.numPatterns && !q.isEmpty()) {
            void var23_32;
            int n;
            Pattern pat = (Pattern)q.removeFirst();
            if (currentPatternWeights4Label.getCount(pat) < this.constVars.thresholdSelectPattern) {
                Redwood.log(new Object[]{Redwood.DBG, "The max weight of candidate patterns is " + this.df.format(currentPatternWeights4Label.getCount(pat)) + " so not adding anymore patterns"});
                break;
            }
            boolean notchoose = false;
            if (!unLabeledPatternsandWords4Label.containsFirstKey(pat) || ((ClassicCounter)unLabeledPatternsandWords4Label.getCounter(pat)).isEmpty()) {
                Redwood.log(ConstantsAndVariables.extremedebug, "Removing pattern " + pat + " because it has no unlab support; pos words: " + patternsandWords4Label.getCounter(pat));
                notchoose = true;
                continue;
            }
            Object var23_33 = null;
            if (!notchoose && alreadyIdentifiedPatterns != null) {
                for (Pattern pattern : alreadyIdentifiedPatterns) {
                    if (Pattern.subsumes(this.constVars.patternType, pat, pattern)) {
                        Redwood.log(ConstantsAndVariables.extremedebug, "Not choosing pattern " + pat + " because it is contained in or contains the already chosen pattern " + pattern);
                        notchoose = true;
                        break;
                    }
                    n = pat.equalContext(pattern);
                    if (n == Integer.MAX_VALUE) continue;
                    if (n < 0) {
                        if (removeIdentifiedPatterns == null) {
                            removeIdentifiedPatterns = new HashSet<Pattern>();
                        }
                        removeIdentifiedPatterns.add(pattern);
                        continue;
                    }
                    notchoose = true;
                    break;
                }
            }
            if (!notchoose) {
                for (Pattern pattern : chosenPat.keySet()) {
                    n = 0;
                    if (!Pattern.sameGenre(this.constVars.patternType, pat, pattern)) continue;
                    if (Pattern.subsumes(this.constVars.patternType, pat, pattern)) {
                        Redwood.log(ConstantsAndVariables.extremedebug, "Not choosing pattern " + pat + " because it is contained in or contains the already chosen pattern " + pattern);
                        notchoose = true;
                        break;
                    }
                    if (Pattern.subsumes(this.constVars.patternType, pattern, pat)) {
                        int rest = pat.equalContext(pattern);
                        if (rest == Integer.MAX_VALUE) {
                            Redwood.log(ConstantsAndVariables.extremedebug, "Not choosing pattern " + pattern + " because it is contained in or contains another chosen pattern in this iteration " + pat);
                            n = 1;
                        } else if (rest < 0) {
                            n = 1;
                        } else {
                            notchoose = true;
                            break;
                        }
                    }
                    if (n == 0) continue;
                    if (var23_32 == null) {
                        HashSet hashSet = new HashSet();
                    }
                    var23_32.add(pat);
                    --num;
                }
            }
            if (notchoose) {
                Redwood.log(new Object[]{Redwood.DBG, "Not choosing " + pat + " for whatever reason!"});
                continue;
            }
            if (var23_32 != null) {
                Redwood.log(ConstantsAndVariables.extremedebug, "Removing already chosen patterns in this iteration " + var23_32 + " in favor of " + pat);
                Counters.removeKeys(chosenPat, var23_32);
            }
            if (removeIdentifiedPatterns != null) {
                Redwood.log(ConstantsAndVariables.extremedebug, "Removing already identified patterns " + removeIdentifiedPatterns + " in favor of " + pat);
                removePatterns.addAll(removeIdentifiedPatterns);
            }
            chosenPat.setCount(pat, currentPatternWeights4Label.getCount(pat));
            ++num;
        }
        this.removeLearnedPatterns(label, removePatterns);
        Redwood.log(new Object[]{Redwood.DBG, "final size of the patterns is " + chosenPat.size()});
        Redwood.log(ConstantsAndVariables.minimaldebug, "\n\n## Selected Patterns for " + label + "##\n");
        List chosenPatSorted = Counters.toSortedListWithCounts(chosenPat);
        for (Pair pair : chosenPatSorted) {
            Redwood.log(ConstantsAndVariables.minimaldebug, ((Pattern)pair.first()).toString() + ":" + this.df.format(pair.second) + "\n");
        }
        if (this.constVars.outDir != null && !this.constVars.outDir.isEmpty()) {
            CollectionValuedMap posWords = new CollectionValuedMap();
            for (Map.Entry entry : patternsandWords4Label.entrySet()) {
                posWords.addAll(entry.getKey(), entry.getValue().keySet());
            }
            CollectionValuedMap collectionValuedMap = new CollectionValuedMap();
            for (Map.Entry entry : negPatternsandWords4Label.entrySet()) {
                collectionValuedMap.addAll(entry.getKey(), entry.getValue().keySet());
            }
            CollectionValuedMap collectionValuedMap2 = new CollectionValuedMap();
            for (Map.Entry entry : unLabeledPatternsandWords4Label.entrySet()) {
                collectionValuedMap2.addAll(entry.getKey(), entry.getValue().keySet());
            }
            String string = this.constVars.outDir + "/" + this.constVars.identifier + "/" + label;
            Redwood.log(ConstantsAndVariables.minimaldebug, "Saving output in " + string);
            IOUtils.ensureDir(new File(string));
            String string2 = string + "/patterns" + ".json";
            JsonArrayBuilder obj = Json.createArrayBuilder();
            if (this.writtenPatInJustification.containsKey(label) && this.writtenPatInJustification.get(label).booleanValue()) {
                JsonReader jsonReader = Json.createReader((InputStream)new BufferedInputStream(new FileInputStream(string2)));
                JsonArray objarr = jsonReader.readArray();
                jsonReader.close();
                for (JsonObjectBuilder o : objarr) {
                    obj.add((JsonValue)o);
                }
            } else {
                obj = Json.createArrayBuilder();
            }
            JsonObjectBuilder objThisIter = Json.createObjectBuilder();
            for (Pair pat : chosenPatSorted) {
                String w;
                JsonObjectBuilder o;
                o = Json.createObjectBuilder();
                JsonArrayBuilder pos = Json.createArrayBuilder();
                JsonArrayBuilder neg = Json.createArrayBuilder();
                JsonArrayBuilder unlab = Json.createArrayBuilder();
                Iterator iterator = posWords.get(pat.first()).iterator();
                while (iterator.hasNext()) {
                    w = (String)iterator.next();
                    pos.add(w);
                }
                iterator = collectionValuedMap.get(pat.first()).iterator();
                while (iterator.hasNext()) {
                    w = (String)iterator.next();
                    neg.add(w);
                }
                iterator = collectionValuedMap2.get(pat.first()).iterator();
                while (iterator.hasNext()) {
                    w = (String)iterator.next();
                    unlab.add(w);
                }
                o.add("Positive", pos);
                o.add("Negative", neg);
                o.add("Unlabeled", unlab);
                o.add("Score", pat.second().doubleValue());
                objThisIter.add(((Pattern)pat.first()).toStringSimple(), o);
            }
            obj.add((JsonValue)objThisIter.build());
            IOUtils.ensureDir(new File(string2).getParentFile());
            IOUtils.writeStringToFile(obj.build().toString(), string2, "utf8");
            this.writtenPatInJustification.put(label, true);
        }
        if (this.constVars.justify) {
            Redwood.log(new Object[]{Redwood.DBG, "Justification for Patterns:"});
            for (Pattern pattern : chosenPat.keySet()) {
                Redwood.log(new Object[]{Redwood.DBG, "\nPattern: " + pattern.toString()});
                Redwood.log(new Object[]{Redwood.DBG, "Positive Words:" + Counters.toSortedString(patternsandWords4Label.getCounter(pattern), ((ClassicCounter)patternsandWords4Label.getCounter(pattern)).size(), "%1$s:%2$f", ";")});
                Redwood.log(new Object[]{Redwood.DBG, "Negative Words:" + Counters.toSortedString(negPatternsandWords4Label.getCounter(pattern), ((ClassicCounter)negPatternsandWords4Label.getCounter(pattern)).size(), "%1$s:%2$f", ";")});
                Redwood.log(new Object[]{Redwood.DBG, "Unlabeled Words:" + Counters.toSortedString(unLabeledPatternsandWords4Label.getCounter(pattern), ((ClassicCounter)unLabeledPatternsandWords4Label.getCounter(pattern)).size(), "%1$s:%2$f", ";")});
            }
        }
        this.patternsandWords.put(label, patternsandWords4Label);
        this.currentPatternWeights.put(label, currentPatternWeights4Label);
        return chosenPat;
    }

    public static Class getPatternScoringClass(PatternScoring patternScoring) {
        if (patternScoring.equals((Object)PatternScoring.F1SeedPattern)) {
            return ScorePatternsF1.class;
        }
        if (patternScoring.equals((Object)PatternScoring.PosNegUnlabOdds) || patternScoring.equals((Object)PatternScoring.PosNegOdds) || patternScoring.equals((Object)PatternScoring.RatioAll) || patternScoring.equals((Object)PatternScoring.PhEvalInPat) || patternScoring.equals((Object)PatternScoring.PhEvalInPatLogP) || patternScoring.equals((Object)PatternScoring.LOGREG) || patternScoring.equals((Object)PatternScoring.LOGREGlogP) || patternScoring.equals((Object)PatternScoring.SqrtAllRatio)) {
            return ScorePatternsRatioModifiedFreq.class;
        }
        if (patternScoring.equals((Object)PatternScoring.RlogF) || patternScoring.equals((Object)PatternScoring.RlogFPosNeg) || patternScoring.equals((Object)PatternScoring.RlogFUnlabNeg) || patternScoring.equals((Object)PatternScoring.RlogFNeg) || patternScoring.equals((Object)PatternScoring.YanGarber02) || patternScoring.equals((Object)PatternScoring.LinICML03)) {
            return ScorePatternsFreqBased.class;
        }
        return null;
    }

    public static <E> List<List<E>> splitIntoNumThreads(List<E> c, int n, int numThreads) {
        if (n < 0) {
            throw new IllegalArgumentException("n < 0: " + n);
        }
        if (n > c.size()) {
            throw new IllegalArgumentException("n > size of collection: " + n + ", " + c.size());
        }
        ArrayList resultAll = new ArrayList(numThreads);
        int num = numThreads == 1 ? n : n / (numThreads - 1);
        System.out.println("shuffled " + c.size() + " sentences and selecting " + num + " sentences per thread");
        ArrayList<E> result = new ArrayList<E>(num);
        int totalitems = 0;
        int nitem = 0;
        Random r = new Random(numCallsToCalStats.incrementAndGet());
        boolean[] added = new boolean[c.size()];
        Arrays.fill(added, false);
        while (totalitems < n) {
            int index;
            while (added[index = r.nextInt(c.size())]) {
            }
            added[index] = true;
            E c1 = c.get(index);
            if (nitem == num) {
                resultAll.add(result);
                result = new ArrayList(num);
                nitem = 0;
            }
            result.add(c1);
            ++totalitems;
            ++nitem;
        }
        if (!result.isEmpty()) {
            resultAll.add(result);
        }
        return resultAll;
    }

    private void calculateSufficientStats(Map<String, List<CoreLabel>> sents, PatternsForEachToken patternsForEachToken, String label, TwoDimensionalCounter<E, String> patternsandWords4Label, TwoDimensionalCounter<E, String> negPatternsandWords4Label, TwoDimensionalCounter<E, String> unLabeledPatternsandWords4Label, Set<String> allCandidatePhrases) {
        Redwood.log(new Object[]{Redwood.DBG, "calculating sufficient stats"});
        patternsForEachToken.setupSearch();
        Class<TypesafeMap.Key<String>> answerClass4Label = this.constVars.getAnswerClass().get(label);
        int sampleSize = this.constVars.sampleSentencesForSufficientStats == 1.0 ? sents.size() : (int)Math.round(this.constVars.sampleSentencesForSufficientStats * (double)sents.size());
        List<List<String>> sampledSentIds = GetPatternsFromDataMultiClass.splitIntoNumThreads(CollectionUtils.toList(sents.keySet()), sampleSize, this.constVars.numThreads);
        Redwood.log(new Object[]{Redwood.DBG, "sampled " + sampleSize + " sentences (" + this.constVars.sampleSentencesForSufficientStats * 100.0 + "%)"});
        ExecutorService executor = Executors.newFixedThreadPool(this.constVars.numThreads);
        ArrayList list = new ArrayList();
        for (List<String> list2 : sampledSentIds) {
            CalculateSufficientStatsThreads task = new CalculateSufficientStatsThreads(patternsForEachToken, list2, sents, label, answerClass4Label);
            Future submit = executor.submit(task);
            list.add(submit);
        }
        for (Future future : list) {
            try {
                Triple stats = (Triple)future.get();
                this.addStats(patternsandWords4Label, (List)stats.first());
                this.addStats(negPatternsandWords4Label, (List)stats.second());
                this.addStats(unLabeledPatternsandWords4Label, (List)stats.third());
            }
            catch (Exception e) {
                executor.shutdownNow();
                throw new RuntimeException(e);
            }
        }
        executor.shutdown();
    }

    private void addStats(TwoDimensionalCounter<E, String> pw, List<Pair<E, String>> v) {
        for (Pair<E, String> w : v) {
            pw.incrementCount(w.first(), w.second());
        }
    }

    private Set<E> enforceMinSupportRequirements(TwoDimensionalCounter<E, String> patternsandWords4Label, TwoDimensionalCounter<E, String> unLabeledPatternsandWords4Label) {
        HashSet<E> remove = new HashSet<E>();
        for (Map.Entry<E, ClassicCounter<String>> en : patternsandWords4Label.entrySet()) {
            if (en.getValue().size() >= this.constVars.minPosPhraseSupportForPat) continue;
            remove.add(en.getKey());
        }
        int numRemoved = remove.size();
        Redwood.log(new Object[]{Redwood.DBG, "Removing " + numRemoved + " patterns that do not meet minPosPhraseSupportForPat requirement of >= " + this.constVars.minPosPhraseSupportForPat});
        for (Map.Entry<E, ClassicCounter<String>> en : unLabeledPatternsandWords4Label.entrySet()) {
            if (en.getValue().size() >= this.constVars.minUnlabPhraseSupportForPat) continue;
            remove.add(en.getKey());
        }
        Redwood.log(new Object[]{Redwood.DBG, "Removing " + (remove.size() - numRemoved) + " patterns that do not meet minUnlabPhraseSupportForPat requirement of >= " + this.constVars.minUnlabPhraseSupportForPat});
        return remove;
    }

    void removeLearnedPattern(String label, E p) {
        this.learnedPatterns.get(label).remove(p);
        if (this.wordsPatExtracted.containsKey(label)) {
            for (Map.Entry<String, ClassicCounter<E>> en : this.wordsPatExtracted.get(label).entrySet()) {
                en.getValue().remove(p);
            }
        }
    }

    void removeLearnedPatterns(String label, Collection<E> pats) {
        Counters.removeKeys(this.learnedPatterns.get(label), pats);
        if (this.wordsPatExtracted.containsKey(label)) {
            for (Map.Entry<String, ClassicCounter<E>> en : this.wordsPatExtracted.get(label).entrySet()) {
                Counters.removeKeys(en.getValue(), pats);
            }
        }
    }

    public static Counter<String> normalizeSoftMaxMinMaxScores(Counter<String> scores, boolean minMaxNorm, boolean softmax, boolean oneMinusSoftMax) {
        double minScore = Double.MAX_VALUE;
        double maxScore = Double.MIN_VALUE;
        ClassicCounter<String> newscores = new ClassicCounter<String>();
        if (softmax) {
            for (Map.Entry<String, Double> entry : scores.entrySet()) {
                Double score = null;
                score = oneMinusSoftMax ? Double.valueOf(1.0 / (1.0 + Math.exp(Math.min(7.0, entry.getValue())))) : Double.valueOf(1.0 / (1.0 + Math.exp(-1.0 * Math.min(7.0, entry.getValue()))));
                if (score < minScore) {
                    minScore = score;
                }
                if (score > maxScore) {
                    maxScore = score;
                }
                newscores.setCount(entry.getKey(), score);
            }
        } else {
            newscores.addAll(scores);
            minScore = Counters.min(newscores);
            maxScore = Counters.max(newscores);
        }
        if (minMaxNorm) {
            for (Map.Entry<String, Double> entry : newscores.entrySet()) {
                double score = minScore == maxScore ? minScore : (entry.getValue() - minScore + 1.0E-10) / (maxScore - minScore);
                newscores.setCount(entry.getKey(), score);
            }
        }
        return newscores;
    }

    /*
     * Could not resolve type clashes
     */
    public void labelWords(String label, Map<String, List<CoreLabel>> sents, Set<String> identifiedWords, String outFile, CollectionValuedMap<E, Triple<String, Integer, Integer>> matchedTokensByPat) throws IOException {
        Date startTime = new Date();
        Redwood.log(new Object[]{Redwood.DBG, "Labeling " + sents.size() + " sentences with " + identifiedWords.size() + " phrases for label " + label});
        CollectionValuedMap tokensMatchedPatterns = null;
        if (this.constVars.restrictToMatched) {
            tokensMatchedPatterns = new CollectionValuedMap();
            for (Object en : matchedTokensByPat.entrySet()) {
                for (Triple en2 : (Collection)en.getValue()) {
                    for (int i = ((Integer)en2.second()).intValue(); i <= (Integer)en2.third(); ++i) {
                        tokensMatchedPatterns.add(en2.first(), i);
                    }
                }
            }
        }
        HashMap tempPatsForSents = new HashMap();
        for (Map.Entry<String, List<CoreLabel>> sentEn : sents.entrySet()) {
            boolean sentenceChanged = false;
            HashSet<String[]> identifiedWordsTokens = new HashSet<String[]>();
            for (String s : identifiedWords) {
                String[] toks = s.split("\\s+");
                identifiedWordsTokens.add(toks);
            }
            Object[] sent = new String[sentEn.getValue().size()];
            int i = 0;
            HashSet<Integer> contextWordsRecalculatePats = new HashSet<Integer>();
            for (CoreLabel l : sentEn.getValue()) {
                sent[i] = l.word();
                ++i;
            }
            for (Object[] ph : identifiedWordsTokens) {
                List<Integer> ints = ArrayUtils.getSubListIndex(ph, sent);
                if (ints == null) continue;
                for (Integer idx : ints) {
                    boolean donotuse = false;
                    if (this.constVars.restrictToMatched) {
                        for (int j = 0; j < ph.length; ++j) {
                            if (tokensMatchedPatterns.get(sentEn.getKey()).contains(idx + j)) continue;
                            Redwood.log(ConstantsAndVariables.extremedebug, "not labeling " + sentEn.getValue().get(idx + j).word());
                            donotuse = true;
                            break;
                        }
                    }
                    if (donotuse) continue;
                    String phStr = StringUtils.join(ph, " ");
                    Redwood.log(ConstantsAndVariables.extremedebug, "Labeling because of phrase " + phStr);
                    for (int j = 0; j < ph.length; ++j) {
                        int index = idx + j;
                        CoreLabel l = sentEn.getValue().get(index);
                        if (!this.constVars.usePatternResultAsLabel) continue;
                        sentenceChanged = true;
                        l.set(this.constVars.getAnswerClass().get(label), label);
                        CollectionValuedMap<String, String> matched = new CollectionValuedMap<String, String>();
                        matched.add(label, phStr);
                        if (!l.containsKey(PatternsAnnotations.MatchedPhrases.class)) {
                            l.set(PatternsAnnotations.MatchedPhrases.class, matched);
                        } else {
                            ((CollectionValuedMap)l.get(PatternsAnnotations.MatchedPhrases.class)).addAll(matched);
                        }
                        for (int k = Math.max(0, index - PatternFactory.numWordsCompound); k < sentEn.getValue().size() && k <= index + PatternFactory.numWordsCompound + 1; ++k) {
                            contextWordsRecalculatePats.add(k);
                        }
                    }
                }
            }
            if (this.patsForEachToken != null) {
                Iterator<CoreLabel> iterator = contextWordsRecalculatePats.iterator();
                while (iterator.hasNext()) {
                    int index = (Integer)((Object)iterator.next());
                    if (!tempPatsForSents.containsKey(sentEn.getKey())) {
                        tempPatsForSents.put(sentEn.getKey(), new HashMap());
                    }
                    ((Map)tempPatsForSents.get(sentEn.getKey())).put(index, Pattern.getContext(this.constVars.patternType, sentEn.getValue(), index));
                }
            }
            if (!sentenceChanged) continue;
            this.constVars.invertedIndex.update(sentEn.getValue(), sentEn.getKey());
        }
        if (this.patsForEachToken != null) {
            this.patsForEachToken.updatePatterns(tempPatsForSents);
        }
        this.constVars.invertedIndex.finishUpdating();
        if (outFile != null) {
            Redwood.log(ConstantsAndVariables.minimaldebug, "Writing results to " + outFile);
            IOUtils.writeObjectToFile(sents, outFile);
        }
        Date endTime = new Date();
        Redwood.log(new Object[]{Redwood.DBG, "Done labeling provided sents in " + GetPatternsFromDataMultiClass.elapsedTime(startTime, endTime)});
    }

    public void iterateExtractApply() throws IOException, ClassNotFoundException {
        this.iterateExtractApply(null, null, null, null, null, null);
    }

    public void iterateExtractApply(Map<String, E> p0, Map<String, Counter<String>> p0Set, String wordsOutputFile, String sentsOutFile, String patternsOutFile, Map<String, Set<E>> ignorePatterns) throws IOException, ClassNotFoundException {
        HashMap matchedTokensByPatAllLabels = new HashMap();
        HashMap termsAllLabels = new HashMap();
        HashMap<String, Object> ignoreWordsAll = new HashMap<String, Object>();
        for (String label : this.constVars.getLabelDictionary().keySet()) {
            matchedTokensByPatAllLabels.put(label, new CollectionValuedMap());
            termsAllLabels.put(label, new TwoDimensionalCounter());
            if (!this.constVars.useOtherLabelsWordsasNegative) continue;
            HashSet w = new HashSet();
            for (Map.Entry<String, Set<String>> en : this.constVars.getLabelDictionary().entrySet()) {
                if (en.getKey().equals(label)) continue;
                w.addAll(en.getValue());
            }
            ignoreWordsAll.put(label, w);
        }
        Redwood.log(ConstantsAndVariables.minimaldebug, "Iterating " + this.constVars.numIterationsForPatterns + " times.");
        HashMap<String, BufferedWriter> wordsOutput = new HashMap<String, BufferedWriter>();
        HashMap<String, BufferedWriter> patternsOutput = new HashMap<String, BufferedWriter>();
        for (String string : this.constVars.getLabelDictionary().keySet()) {
            IOUtils.ensureDir(new File(this.constVars.outDir + "/" + this.constVars.identifier + "/" + string));
            String wordsOutputFileLabel = wordsOutputFile == null ? this.constVars.outDir + "/" + this.constVars.identifier + "/" + string + "/learnedwords.txt" : wordsOutputFile + "_" + string;
            wordsOutput.put(string, new BufferedWriter(new FileWriter(wordsOutputFileLabel)));
            Redwood.log(ConstantsAndVariables.minimaldebug, "Saving the learned words for label " + string + " in " + wordsOutputFileLabel);
            Object patternsOutputFileLabel = patternsOutFile + "_" + string;
            if (patternsOutFile == null) {
                patternsOutputFileLabel = this.constVars.outDir + "/" + this.constVars.identifier + "/" + string + "/learnedpatterns.txt";
            }
            patternsOutput.put(string, new BufferedWriter(new FileWriter((String)patternsOutputFileLabel)));
            Redwood.log(ConstantsAndVariables.minimaldebug, "Saving the learned patterns for label " + string + " in " + (String)patternsOutputFileLabel);
        }
        for (int i = 0; i < this.constVars.numIterationsForPatterns; ++i) {
            boolean bl;
            Redwood.log(ConstantsAndVariables.minimaldebug, "\n\n################################ Iteration " + (i + 1) + " ##############################");
            boolean bl2 = false;
            HashMap<String, Counter<String>> learnedWordsThisIter = new HashMap<String, Counter<String>>();
            for (String label : this.constVars.getLabelDictionary().keySet()) {
                Redwood.log(ConstantsAndVariables.minimaldebug, "\n###Learning for label " + label + " ######");
                Iterator sentout = sentsOutFile == null ? null : sentsOutFile + "_" + label;
                Pair<Counter<Pattern>, Counter<String>> learnedPatWords4label = this.iterateExtractApply4Label(label, (E)(p0 != null ? (Pattern)p0.get(label) : null), p0Set != null ? p0Set.get(label) : null, (BufferedWriter)wordsOutput.get(label), (String)((Object)sentout), (BufferedWriter)patternsOutput.get(label), ignorePatterns != null ? ignorePatterns.get(label) : null, 1, (Set)ignoreWordsAll.get(label), (CollectionValuedMap)matchedTokensByPatAllLabels.get(label), (TwoDimensionalCounter)termsAllLabels.get(label));
                learnedWordsThisIter.put(label, learnedPatWords4label.second());
                if (learnedPatWords4label.first().size() <= 0) continue;
                bl = true;
            }
            if (this.constVars.useOtherLabelsWordsasNegative) {
                for (String label : this.constVars.getLabelDictionary().keySet()) {
                    for (Map.Entry en : learnedWordsThisIter.entrySet()) {
                        if (((String)en.getKey()).equals(label)) continue;
                        ((Set)ignoreWordsAll.get(label)).addAll(((Counter)en.getValue()).keySet());
                    }
                }
            }
            if (bl) continue;
            if (!this.constVars.tuneThresholdKeepRunning) {
                Redwood.log(ConstantsAndVariables.minimaldebug, "No patterns learned for all labels. Ending iterations.");
                break;
            }
            this.constVars.thresholdSelectPattern = 0.8 * this.constVars.thresholdSelectPattern;
            Redwood.log(ConstantsAndVariables.minimaldebug, "\n\nTuning thresholds to keep running. New Pattern threshold is  " + this.constVars.thresholdSelectPattern);
        }
        if (this.constVars.outDir != null && !this.constVars.outDir.isEmpty()) {
            Redwood.log(ConstantsAndVariables.minimaldebug, "Writing justification files");
            HashSet allMatchedSents = new HashSet();
            for (String label : this.constVars.getLabelDictionary().keySet()) {
                CollectionValuedMap tokensMatchedPat = (CollectionValuedMap)matchedTokensByPatAllLabels.get(label);
                IOUtils.ensureDir(new File(this.constVars.outDir + "/" + this.constVars.identifier + "/" + label));
                if (!this.constVars.writeMatchedTokensFiles) continue;
                String matchedtokensfilename = this.constVars.outDir + "/" + this.constVars.identifier + "/" + label + "/tokensmatchedpatterns" + ".json";
                JsonObjectBuilder pats = Json.createObjectBuilder();
                for (Map.Entry entry : tokensMatchedPat.entrySet()) {
                    CollectionValuedMap matchedStrs = new CollectionValuedMap();
                    for (Triple en22 : (Collection)entry.getValue()) {
                        allMatchedSents.add(en22.first());
                        matchedStrs.add(en22.first(), new Pair(en22.second(), en22.third()));
                    }
                    JsonObjectBuilder senttokens = Json.createObjectBuilder();
                    for (Map.Entry sen : matchedStrs.entrySet()) {
                        JsonArrayBuilder obj = Json.createArrayBuilder();
                        for (Pair sen2 : sen.getValue()) {
                            JsonArrayBuilder startend = Json.createArrayBuilder();
                            startend.add(((Integer)sen2.first()).intValue());
                            startend.add(((Integer)sen2.second()).intValue());
                            obj.add(startend);
                        }
                        senttokens.add((String)sen.getKey(), obj);
                    }
                    pats.add(((Pattern)entry.getKey()).toStringSimple(), senttokens);
                }
                IOUtils.writeStringToFile(pats.build().toString(), matchedtokensfilename, "utf8");
                JsonObjectBuilder senttokens = Json.createObjectBuilder();
                for (String sentId : allMatchedSents) {
                    JsonArrayBuilder sent = Json.createArrayBuilder();
                    for (CoreLabel l : Data.sents.get(sentId)) {
                        sent.add(l.word());
                    }
                    senttokens.add(sentId, sent);
                }
                String string = this.constVars.outDir + "/" + this.constVars.identifier + "/sentences" + ".json";
                IOUtils.writeStringToFile(senttokens.build().toString(), string, "utf8");
            }
        }
        System.out.println("\n\nAll patterns learned:");
        for (Map.Entry<String, Counter<E>> entry : this.learnedPatterns.entrySet()) {
            System.out.println(entry.getKey() + ":\t\t" + StringUtils.join(entry.getValue().keySet()));
        }
        System.out.println("\n\nAll words learned:");
        for (Map.Entry<String, Counter<Object>> entry : this.learnedWords.entrySet()) {
            System.out.println(entry.getKey() + ":\t\t" + entry.getValue().keySet() + "\n\n");
        }
        for (String string : this.constVars.getLabelDictionary().keySet()) {
            ((BufferedWriter)wordsOutput.get(string)).close();
            ((BufferedWriter)patternsOutput.get(string)).close();
        }
    }

    public Pair<Counter<E>, Counter<String>> iterateExtractApply4Label(String label, E p0, Counter<String> p0Set, BufferedWriter wordsOutput, String sentsOutFile, BufferedWriter patternsOut, Set<E> ignorePatterns, int numIter, Set<String> ignoreWords, CollectionValuedMap<E, Triple<String, Integer, Integer>> matchedTokensByPat, TwoDimensionalCounter<String, E> terms) throws IOException, ClassNotFoundException {
        if (!this.learnedPatterns.containsKey(label)) {
            this.learnedPatterns.put(label, new ClassicCounter());
        }
        if (!this.learnedWords.containsKey(label)) {
            this.learnedWords.put(label, new ClassicCounter());
        }
        ClassicCounter<String> identifiedWords = new ClassicCounter<String>();
        ClassicCounter<E> patterns = new ClassicCounter<E>();
        for (int i = 0; i < numIter; ++i) {
            Counter<E> patternThisIter = this.getPatterns(label, this.learnedPatterns.get(label).keySet(), p0, p0Set, ignorePatterns);
            patterns.addAll(patternThisIter);
            this.learnedPatterns.get(label).addAll(patternThisIter);
            if (sentsOutFile != null) {
                sentsOutFile = sentsOutFile + "_" + i + "iter.ser";
            }
            ClassicCounter<String> scoreForAllWordsThisIteration = new ClassicCounter<String>();
            identifiedWords.addAll(this.scorePhrases.learnNewPhrases(label, this.patsForEachToken, patterns, this.learnedPatterns.get(label), matchedTokensByPat, scoreForAllWordsThisIteration, terms, this.wordsPatExtracted.get(label), this.patternsandWords.get(label), this.constVars.identifier, ignoreWords));
            if (identifiedWords.size() > 0) {
                if (this.constVars.usePatternResultAsLabel) {
                    if (this.constVars.getLabelDictionary().containsKey(label)) {
                        ConstantsAndVariables.DataSentsIterator sentsIter = new ConstantsAndVariables.DataSentsIterator(this.constVars.batchProcessSents);
                        while (sentsIter.hasNext()) {
                            Object sentsf = sentsIter.next();
                            Redwood.log(new Object[]{Redwood.DBG, "labeling sentences from " + ((Pair)sentsf).second()});
                            this.labelWords(label, (Map)((Pair)sentsf).first(), identifiedWords.keySet(), sentsOutFile, matchedTokensByPat);
                            if (!((File)((Pair)sentsf).second()).exists()) continue;
                            IOUtils.writeObjectToFile(((Pair)sentsf).first(), (File)((Pair)sentsf).second());
                        }
                    } else {
                        throw new RuntimeException("why is the answer label null?");
                    }
                    this.learnedWords.get(label).addAll(identifiedWords);
                }
                if (wordsOutput != null) {
                    wordsOutput.write("\n" + Counters.toSortedString(identifiedWords, identifiedWords.size(), "%1$s", "\n"));
                    wordsOutput.flush();
                }
            }
            if (patterns.size() != 0 || identifiedWords.size() != 0) continue;
            if (this.learnedWords.get(label).size() >= this.constVars.maxExtractNumWords) {
                System.out.println("Ending because no new words identified and total words learned till now >= max words " + this.constVars.maxExtractNumWords);
                break;
            }
            if (!this.constVars.tuneThresholdKeepRunning) break;
            this.constVars.thresholdSelectPattern = 0.8 * this.constVars.thresholdSelectPattern;
            System.out.println("\n\nTuning thresholds to keep running. New Pattern threshold is  " + this.constVars.thresholdSelectPattern);
        }
        if (patternsOut != null) {
            this.writePatternsToFile(patterns, patternsOut);
        }
        return new Pair<Counter<E>, Counter<String>>(patterns, identifiedWords);
    }

    void writePatternsToFile(Counter<E> pattern, BufferedWriter outFile) throws IOException {
        for (Map.Entry<E, Double> en : pattern.entrySet()) {
            outFile.write(((Pattern)en.getKey()).toString() + "\t" + en.getValue() + "\n");
        }
    }

    void writeWordsToFile(Counter<String> words, BufferedWriter outFile) throws IOException {
        for (Map.Entry<String, Double> en : words.entrySet()) {
            outFile.write(en.getKey() + "\t" + en.getValue() + "\n");
        }
    }

    Counter<String> readLearnedWordsFromFile(File file) {
        ClassicCounter<String> words = new ClassicCounter<String>();
        for (String line : IOUtils.readLines(file)) {
            String[] t = line.split("\t");
            words.setCount(t[0], Double.parseDouble(t[1]));
        }
        return words;
    }

    public Counter<String> getLearnedWords(String label) {
        return this.learnedWords.get(label);
    }

    public Counter<E> getLearnedPatterns(String label) {
        return this.learnedPatterns.get(label);
    }

    public Map<String, Counter<E>> getLearnedPatterns() {
        return this.learnedPatterns;
    }

    public void setLearnedWords(Counter<String> words, String label) {
        this.learnedWords.put(label, words);
    }

    public void setLearnedPatterns(Counter<E> patterns, String label) {
        this.learnedPatterns.put(label, patterns);
    }

    public static boolean countResultsPerEntity(List<CoreLabel> doc, Counter<String> entityTP, Counter<String> entityFP, Counter<String> entityFN, String background, Counter<String> wordTP, Counter<String> wordTN, Counter<String> wordFP, Counter<String> wordFN, Class<? extends TypesafeMap.Key<String>> whichClassToCompare) {
        int index = 0;
        int goldIndex = 0;
        int guessIndex = 0;
        String lastGold = background;
        String lastGuess = background;
        String str = "";
        String s = "";
        for (CoreLabel l : doc) {
            s = s + " " + l.word() + ":" + (String)l.get(CoreAnnotations.GoldAnswerAnnotation.class) + ":" + (String)l.get(whichClassToCompare);
        }
        for (CoreLabel line : doc) {
            String gold = (String)line.get(CoreAnnotations.GoldAnswerAnnotation.class);
            String guess = (String)line.get(whichClassToCompare);
            if (gold == null || guess == null) {
                return false;
            }
            if (lastGold != null && !lastGold.equals(gold) && !lastGold.equals(background)) {
                if (lastGuess.equals(lastGold) && !lastGuess.equals(guess) && goldIndex == guessIndex) {
                    wordTP.incrementCount(str);
                    entityTP.incrementCount(lastGold, 1.0);
                } else {
                    wordFN.incrementCount(str);
                    entityFN.incrementCount(lastGold, 1.0);
                    str = "";
                }
            }
            if (lastGuess != null && !lastGuess.equals(guess) && !lastGuess.equals(background)) {
                if (!lastGuess.equals(lastGold) || lastGuess.equals(guess) || goldIndex != guessIndex || lastGold.equals(gold)) {
                    entityFP.incrementCount(lastGuess, 1.0);
                    wordFP.incrementCount(str);
                }
                str = "";
            }
            if (lastGuess != null && lastGold != null && lastGold.equals(background) && lastGuess.equals(background)) {
                str = "";
            }
            if (lastGold == null || !lastGold.equals(gold)) {
                lastGold = gold;
                goldIndex = index;
            }
            if (lastGuess == null || !lastGuess.equals(guess)) {
                lastGuess = guess;
                guessIndex = index;
            }
            ++index;
            if (str.isEmpty()) {
                str = line.word();
                continue;
            }
            str = str + " " + line.word();
        }
        if (lastGold != null && !lastGold.equals(background)) {
            if (lastGold.equals(lastGuess) && goldIndex == guessIndex) {
                entityTP.incrementCount(lastGold, 1.0);
                wordTP.incrementCount(str);
            } else {
                entityFN.incrementCount(lastGold, 1.0);
                wordFN.incrementCount(str);
            }
            str = "";
        }
        if (lastGuess != null && !lastGuess.equals(background)) {
            if (!lastGold.equals(lastGuess) || goldIndex != guessIndex) {
                entityFP.incrementCount(lastGuess, 1.0);
                wordFP.incrementCount(str);
            }
            str = "";
        }
        return true;
    }

    public static void countResultsPerToken(List<CoreLabel> doc, Counter<String> entityTP, Counter<String> entityFP, Counter<String> entityFN, String background, Counter<String> wordTP, Counter<String> wordTN, Counter<String> wordFP, Counter<String> wordFN, Class<? extends TypesafeMap.Key<String>> whichClassToCompare) {
        CRFClassifier.countResults(doc, entityTP, entityFP, entityFN, background);
        for (CoreLabel line : doc) {
            String gold = (String)line.get(CoreAnnotations.GoldAnswerAnnotation.class);
            String guess = (String)line.get(whichClassToCompare);
            assert (gold != null) : "gold is null";
            assert (guess != null) : "guess is null";
            if (gold.equals(guess) && !gold.equalsIgnoreCase(background)) {
                entityTP.incrementCount(gold);
                wordTP.incrementCount(line.word());
                continue;
            }
            if (!gold.equals(guess) && !gold.equalsIgnoreCase(background) && guess.equalsIgnoreCase(background)) {
                entityFN.incrementCount(gold);
                wordFN.incrementCount(line.word());
                continue;
            }
            if (!gold.equals(guess) && !guess.equalsIgnoreCase(background) && gold.equalsIgnoreCase(background)) {
                wordFP.incrementCount(line.word());
                entityFP.incrementCount(guess);
                continue;
            }
            if (gold.equals(guess) && !gold.equalsIgnoreCase(background)) {
                wordTN.incrementCount(line.word());
                continue;
            }
            if (gold.equalsIgnoreCase(background) && guess.equalsIgnoreCase(background)) continue;
            throw new RuntimeException("don't know reached here. not meant for more than one entity label: " + gold + " and " + guess);
        }
    }

    public static void countResults(List<CoreLabel> doc, Counter<String> entityTP, Counter<String> entityFP, Counter<String> entityFN, String background, Counter<String> wordTP, Counter<String> wordTN, Counter<String> wordFP, Counter<String> wordFN, Class<? extends TypesafeMap.Key<String>> whichClassToCompare, boolean evalPerEntity) {
        if (evalPerEntity) {
            GetPatternsFromDataMultiClass.countResultsPerEntity(doc, entityTP, entityFP, entityFN, background, wordTP, wordTN, wordFP, wordFN, whichClassToCompare);
        } else {
            GetPatternsFromDataMultiClass.countResultsPerToken(doc, entityTP, entityFP, entityFN, background, wordTP, wordTN, wordFP, wordFN, whichClassToCompare);
        }
    }

    private void writeLabelDataSents(Map<String, List<CoreLabel>> sents, BufferedWriter writer) throws IOException {
        for (Map.Entry<String, List<CoreLabel>> sent : sents.entrySet()) {
            writer.write(sent.getKey() + "\t");
            HashMap<String, Boolean> lastWordLabeled = new HashMap<String, Boolean>();
            for (String label : this.constVars.getLabelDictionary().keySet()) {
                lastWordLabeled.put(label, false);
            }
            for (CoreLabel s : sent.getValue()) {
                String str = "";
                ArrayList<String> listEndedLabels = new ArrayList<String>();
                ArrayList<String> startingLabels = new ArrayList<String>();
                for (Map.Entry<String, Class<TypesafeMap.Key<String>>> as : this.constVars.getAnswerClass().entrySet()) {
                    String label = as.getKey();
                    boolean lastwordlabeled = (Boolean)lastWordLabeled.get(label);
                    if (s.get(as.getValue()).equals(label)) {
                        if (!lastwordlabeled) {
                            startingLabels.add(label);
                        }
                        lastWordLabeled.put(label, true);
                        continue;
                    }
                    if (lastwordlabeled) {
                        listEndedLabels.add(label);
                    }
                    lastWordLabeled.put(label, false);
                }
                for (int i = listEndedLabels.size() - 1; i >= 0; --i) {
                    str = str + " </" + (String)listEndedLabels.get(i) + ">";
                }
                for (String label : startingLabels) {
                    str = str + " <" + label + "> ";
                }
                str = str + " " + s.word();
                writer.write(str.trim() + " ");
            }
            writer.write("\n");
        }
    }

    public void writeLabeledData(String outFile) throws IOException, ClassNotFoundException {
        BufferedWriter writer = new BufferedWriter(new FileWriter(outFile));
        ConstantsAndVariables.DataSentsIterator sentsIter = new ConstantsAndVariables.DataSentsIterator(this.constVars.batchProcessSents);
        while (sentsIter.hasNext()) {
            Object sentsf = sentsIter.next();
            this.writeLabelDataSents((Map)((Pair)sentsf).first(), writer);
        }
        writer.close();
    }

    public void writeColumnOutput(String outFile) throws IOException, ClassNotFoundException {
        BufferedWriter writer = new BufferedWriter(new FileWriter(outFile));
        ConstantsAndVariables.DataSentsIterator sentsIter = new ConstantsAndVariables.DataSentsIterator(this.constVars.batchProcessSents);
        while (sentsIter.hasNext()) {
            Object sentsf = sentsIter.next();
            this.writeColumnOutputSents((Map)((Pair)sentsf).first(), writer);
        }
        writer.close();
    }

    private void writeColumnOutputSents(Map<String, List<CoreLabel>> sents, BufferedWriter writer) throws IOException {
        for (Map.Entry<String, List<CoreLabel>> sent : sents.entrySet()) {
            for (CoreLabel s : sent.getValue()) {
                writer.write(s.word() + "\t");
                HashSet<String> labels = new HashSet<String>();
                for (Map.Entry<String, Class<TypesafeMap.Key<String>>> as : this.constVars.getAnswerClass().entrySet()) {
                    String label = as.getKey();
                    if (!s.get(as.getValue()).equals(label)) continue;
                    labels.add(label);
                }
                if (labels.isEmpty()) {
                    writer.write("O\n");
                    continue;
                }
                writer.write(StringUtils.join(labels, ",") + "\n");
            }
            writer.write("\n");
        }
    }

    public void evaluate(Map<String, List<CoreLabel>> testSentences, boolean evalPerEntity) throws IOException {
        for (Map.Entry<String, Class<TypesafeMap.Key<String>>> anscl : this.constVars.getAnswerClass().entrySet()) {
            String label = anscl.getKey();
            ClassicCounter<String> entityTP = new ClassicCounter<String>();
            ClassicCounter<String> entityFP = new ClassicCounter<String>();
            ClassicCounter<String> entityFN = new ClassicCounter<String>();
            ClassicCounter<String> wordTP = new ClassicCounter<String>();
            ClassicCounter<String> wordTN = new ClassicCounter<String>();
            ClassicCounter<String> wordFP = new ClassicCounter<String>();
            ClassicCounter<String> wordFN = new ClassicCounter<String>();
            for (Map.Entry<String, List<CoreLabel>> docEn : testSentences.entrySet()) {
                List<CoreLabel> doc = docEn.getValue();
                ArrayList<CoreLabel> doceval = new ArrayList<CoreLabel>();
                for (CoreLabel l : doc) {
                    CoreLabel l2 = new CoreLabel();
                    l2.setWord(l.word());
                    if (l.get(anscl.getValue()).equals(label)) {
                        l2.set(CoreAnnotations.AnswerAnnotation.class, label);
                    } else {
                        l2.set(CoreAnnotations.AnswerAnnotation.class, ConstantsAndVariables.backgroundSymbol);
                    }
                    if (!((String)l.get(CoreAnnotations.GoldAnswerAnnotation.class)).equals(label)) {
                        l2.set(CoreAnnotations.GoldAnswerAnnotation.class, ConstantsAndVariables.backgroundSymbol);
                    } else {
                        l2.set(CoreAnnotations.GoldAnswerAnnotation.class, label);
                    }
                    doceval.add(l2);
                }
                GetPatternsFromDataMultiClass.countResults(doceval, entityTP, entityFP, entityFN, ConstantsAndVariables.backgroundSymbol, wordTP, wordTN, wordFP, wordFN, CoreAnnotations.AnswerAnnotation.class, evalPerEntity);
            }
            System.out.println("False Positives: " + Counters.toSortedString(wordFP, wordFP.size(), "%s:%.2f", ";"));
            System.out.println("False Negatives: " + Counters.toSortedString(wordFN, wordFN.size(), "%s:%.2f", ";"));
            Redwood.log(new Object[]{Redwood.DBG, "\nFor label " + label + " True Positives: " + entityTP + "\tFalse Positives: " + entityFP + "\tFalse Negatives: " + entityFN});
            Counter<String> precision = Counters.division(entityTP, Counters.add(entityTP, entityFP));
            Counter<String> recall = Counters.division(entityTP, Counters.add(entityTP, entityFN));
            Redwood.log(ConstantsAndVariables.minimaldebug, "\nFor label " + label + " Precision: " + precision + ", Recall: " + recall + ", F1 score:  " + GetPatternsFromDataMultiClass.FScore(precision, recall, 1.0));
        }
    }

    public static <D> Counter<D> FScore(Counter<D> precision, Counter<D> recall, double beta) {
        double betasq = beta * beta;
        return Counters.divisionNonNaN(Counters.scale(Counters.product(precision, recall), 1.0 + betasq), Counters.add(Counters.scale(precision, betasq), recall));
    }

    public static List<File> getAllFiles(String file) {
        ArrayList<File> allFiles = new ArrayList<File>();
        for (String tokfile : file.split("[,;]")) {
            File dir;
            File filef = new File(tokfile);
            if (filef.isDirectory()) {
                Redwood.log(new Object[]{Redwood.DBG, "Will read from directory " + filef});
                String path = ".*";
                dir = filef;
                for (File f : IOUtils.iterFilesRecursive(dir, java.util.regex.Pattern.compile(path))) {
                    Redwood.log(ConstantsAndVariables.extremedebug, "Will read from file " + f);
                    allFiles.add(f);
                }
                continue;
            }
            if (filef.exists()) {
                Redwood.log(new Object[]{Redwood.DBG, "Will read from file " + filef});
                allFiles.add(filef);
                continue;
            }
            Redwood.log(new Object[]{Redwood.DBG, "trying to read from file " + filef});
            RegExFileFilter fileFilter = new RegExFileFilter(java.util.regex.Pattern.compile(filef.getName()));
            dir = new File(tokfile.substring(0, tokfile.lastIndexOf("/")));
            File[] files = dir.listFiles(fileFilter);
            allFiles.addAll(Arrays.asList(files));
        }
        return allFiles;
    }

    private Pair<Double, Double> getPrecisionRecall(String label, Map<String, Boolean> goldWords4Label) {
        Set<String> learnedWords = this.getLearnedWords(label).keySet();
        int numcorrect = 0;
        int numincorrect = 0;
        int numgoldcorrect = 0;
        for (Map.Entry<String, Boolean> en : goldWords4Label.entrySet()) {
            if (!en.getValue().booleanValue()) continue;
            ++numgoldcorrect;
        }
        HashSet<String> assumedNeg = new HashSet<String>();
        for (String e : learnedWords) {
            if (!goldWords4Label.containsKey(e)) {
                assumedNeg.add(e);
                ++numincorrect;
                continue;
            }
            if (goldWords4Label.get(e).booleanValue()) {
                ++numcorrect;
                continue;
            }
            ++numincorrect;
        }
        if (!assumedNeg.isEmpty()) {
            System.err.println("Gold entity list does not contain words " + assumedNeg + " for label " + label + ". Assuming them as negative.");
        }
        double precision = (double)numcorrect / (double)(numcorrect + numincorrect);
        double recall = (double)numcorrect / (double)numgoldcorrect;
        return new Pair<Double, Double>(precision, recall);
    }

    public double FScore(double precision, double recall, double beta) {
        double betasq = beta * beta;
        return (1.0 + betasq) * precision * recall / (betasq * precision + recall);
    }

    public Set<String> getNonBackgroundLabels(CoreLabel l) {
        HashSet<String> labels = new HashSet<String>();
        for (Map.Entry<String, Class<TypesafeMap.Key<String>>> en : this.constVars.getAnswerClass().entrySet()) {
            if (l.get(en.getValue()).equals(ConstantsAndVariables.backgroundSymbol)) continue;
            labels.add(en.getKey());
        }
        return labels;
    }

    public static Map<String, Set<String>> readSeedWords(Properties props) {
        return GetPatternsFromDataMultiClass.readSeedWords(props.getProperty("seedWordsFiles"));
    }

    public static Map<String, Set<String>> readSeedWords(String seedWordsFiles) {
        HashMap<String, Set<String>> seedWords = new HashMap<String, Set<String>>();
        if (seedWordsFiles == null) {
            throw new RuntimeException("Needs both seedWordsFiles and file parameters to run this class!\nseedWordsFiles has format: label1,filewithlistofwords1;label2,filewithlistofwords2;...");
        }
        for (String seedFile : seedWordsFiles.split(";")) {
            String[] t = seedFile.split(",");
            String label = t[0];
            String seedWordsFile = t[1];
            HashSet<String> seedWords4Label = new HashSet<String>();
            for (String line : IOUtils.readLines(seedWordsFile)) {
                if ((line = line.trim()).isEmpty() || line.startsWith("#")) continue;
                seedWords4Label.add(line);
            }
            seedWords.put(label, seedWords4Label);
            Redwood.log(ConstantsAndVariables.minimaldebug, "Number of seed words for label " + label + " is " + seedWords4Label.size());
        }
        return seedWords;
    }

    public static <E extends Pattern> GetPatternsFromDataMultiClass<E> run(Properties props) throws IOException, ClassNotFoundException, IllegalAccessException, InterruptedException, ExecutionException, InstantiationException, NoSuchMethodException, InvocationTargetException, SQLException {
        boolean savePatternsWordsDir;
        HashMap<String, Set<E>> ignorePatterns = new HashMap<String, Set<E>>();
        HashMap p0 = new HashMap();
        HashMap<String, Counter<String>> p0Set = new HashMap<String, Counter<String>>();
        String fileFormat = props.getProperty("fileFormat");
        Map<String, Set<String>> seedWords = GetPatternsFromDataMultiClass.readSeedWords(props);
        HashMap answerClasses = new HashMap();
        String ansClasses = props.getProperty("answerClasses");
        if (ansClasses != null) {
            for (String l : ansClasses.split(";")) {
                String[] t = l.split(",");
                String label = t[0];
                String cl = t[1];
                Class<?> answerClass = ClassLoader.getSystemClassLoader().loadClass(cl);
                answerClasses.put(label, answerClass);
            }
        }
        HashMap<String, List<CoreLabel>> sents = null;
        boolean batchProcessSents = Boolean.parseBoolean(props.getProperty("batchProcessSents", "false"));
        int numMaxSentencesPerBatchFile = Integer.parseInt(props.getProperty("numMaxSentencesPerBatchFile", String.valueOf(Integer.MAX_VALUE)));
        if (!batchProcessSents) {
            sents = new HashMap<String, List<CoreLabel>>();
        } else {
            Data.sentsFiles = new ArrayList<File>();
            Data.sentId2File = new ConcurrentHashMap<String, File>();
        }
        String file = props.getProperty("file");
        String posModelPath = props.getProperty("posModelPath");
        boolean lowercase = Boolean.parseBoolean(props.getProperty("lowercaseText"));
        boolean useTargetNERRestriction = Boolean.parseBoolean(props.getProperty("useTargetNERRestriction"));
        boolean useTargetParserParentRestriction = Boolean.parseBoolean(props.getProperty("useTargetParserParentRestriction"));
        boolean useContextNERRestriction = Boolean.parseBoolean(props.getProperty("useContextNERRestriction"));
        boolean evaluate = Boolean.parseBoolean(props.getProperty("evaluate"));
        boolean addEvalSentsToTrain = Boolean.parseBoolean(props.getProperty("addEvalSentsToTrain"));
        String evalFileWithGoldLabels = props.getProperty("evalFileWithGoldLabels");
        if (!(file != null || evalFileWithGoldLabels != null && addEvalSentsToTrain)) {
            throw new RuntimeException("No training data! file is " + file + " and evalFileWithGoldLabels is " + evalFileWithGoldLabels + " and addEvalSentsToTrain is " + addEvalSentsToTrain);
        }
        File saveSentencesSerDir = null;
        File tempSaveSentencesDir = null;
        if (file != null) {
            String saveSentencesSerDirstr = props.getProperty("saveSentencesSerDir");
            if (saveSentencesSerDirstr != null) {
                saveSentencesSerDir = new File(saveSentencesSerDirstr);
                IOUtils.ensureDir(saveSentencesSerDir);
                if (!batchProcessSents) {
                    IOUtils.writeObjectToFile(sents, saveSentencesSerDirstr + "/sents_all.ser");
                }
            }
            String systemdir = System.getProperty("java.io.tmpdir");
            tempSaveSentencesDir = File.createTempFile("sents", ".tmp", new File(systemdir));
            tempSaveSentencesDir.deleteOnExit();
            tempSaveSentencesDir.delete();
            tempSaveSentencesDir.mkdir();
            int numFilesTillNow = 0;
            if (fileFormat == null || fileFormat.equalsIgnoreCase("text") || fileFormat.equalsIgnoreCase("txt")) {
                HashMap sentsthis = new HashMap();
                for (File f : GetPatternsFromDataMultiClass.getAllFiles(file)) {
                    Redwood.log(new Object[]{Redwood.DBG, "Annotating text in " + f});
                    Iterator<String> reader = IOUtils.readLines(f).iterator();
                    while (reader.hasNext()) {
                        numFilesTillNow = GetPatternsFromDataMultiClass.tokenize(reader, posModelPath, lowercase, useTargetNERRestriction || useContextNERRestriction, f.getName() + "-" + numFilesTillNow + "-", useTargetParserParentRestriction, props.getProperty("numThreads"), batchProcessSents, numMaxSentencesPerBatchFile, saveSentencesSerDir == null ? tempSaveSentencesDir : saveSentencesSerDir, sentsthis, numFilesTillNow);
                    }
                    if (batchProcessSents) continue;
                    sents.putAll(sentsthis);
                }
                if (!batchProcessSents) {
                    IOUtils.writeObjectToFile(sents, (saveSentencesSerDir == null ? tempSaveSentencesDir : saveSentencesSerDir) + "/sents_" + numFilesTillNow);
                }
            } else if (fileFormat.equalsIgnoreCase("ser")) {
                for (File f : GetPatternsFromDataMultiClass.getAllFiles(file)) {
                    Redwood.log(new Object[]{Redwood.DBG, "reading from ser file " + f});
                    if (!batchProcessSents) {
                        sents.putAll((Map)IOUtils.readObjectFromFile(f));
                        continue;
                    }
                    File newf = new File(tempSaveSentencesDir.getAbsolutePath() + "/" + f.getAbsolutePath().replaceAll(java.util.regex.Pattern.quote("/"), "_"));
                    IOUtils.cp(f, newf);
                    Data.sentsFiles.add(newf);
                }
            } else {
                throw new RuntimeException("Cannot identify the file format. Valid values are text (or txt) and ser, where the serialized file is of the type Map<String, List<CoreLabel>>.");
            }
        }
        HashMap<String, List<CoreLabel>> evalsents = new HashMap<String, List<CoreLabel>>();
        File saveEvalSentencesSerFileFile = null;
        if (evaluate && evalFileWithGoldLabels != null) {
            String saveEvalSentencesSerFile = props.getProperty("saveEvalSentencesSerFile");
            if (saveEvalSentencesSerFile == null) {
                String systemdir = System.getProperty("java.io.tmpdir");
                saveEvalSentencesSerFileFile = File.createTempFile("evalsents", ".tmp", new File(systemdir));
            } else {
                saveEvalSentencesSerFileFile = new File(saveEvalSentencesSerFile);
            }
            HashMap<String, Class<? extends TypesafeMap.Key<String>>> setClassForTheseLabels = new HashMap<String, Class<? extends TypesafeMap.Key<String>>>();
            List<File> allFiles = GetPatternsFromDataMultiClass.getAllFiles(evalFileWithGoldLabels);
            int numFile = 0;
            String evalFileFormat = props.getProperty("evalFileFormat");
            if (evalFileFormat == null || evalFileFormat.equalsIgnoreCase("text") || evalFileFormat.equalsIgnoreCase("txt")) {
                for (File f : allFiles) {
                    Redwood.log(new Object[]{Redwood.DBG, "Annotating text in " + f + ". Num file " + ++numFile});
                    List<CoreMap> sentsCMs = AnnotatedTextReader.parseFile(new BufferedReader(new FileReader(f)), seedWords.keySet(), setClassForTheseLabels, true, f.getName());
                    evalsents.putAll(GetPatternsFromDataMultiClass.runPOSNEROnTokens(sentsCMs, posModelPath, useTargetNERRestriction || useContextNERRestriction, "", useTargetParserParentRestriction, props.getProperty("numThreads")));
                }
            } else if (fileFormat.equalsIgnoreCase("ser")) {
                for (File f : allFiles) {
                    evalsents.putAll((Map)IOUtils.readObjectFromFile(f));
                }
            }
            Redwood.log(new Object[]{Redwood.DBG, "Adding " + evalsents.size() + " eval sents to the training set"});
            IOUtils.writeObjectToFile(evalsents, saveEvalSentencesSerFileFile);
            if (batchProcessSents) {
                Data.sentsFiles.add(saveEvalSentencesSerFileFile);
                for (String k : evalsents.keySet()) {
                    Data.sentId2File.put(k, saveEvalSentencesSerFileFile);
                }
            } else {
                sents.putAll(evalsents);
            }
        }
        boolean learn = Boolean.parseBoolean(props.getProperty("learn", "true"));
        boolean labelUsingSeedSets = Boolean.parseBoolean(props.getProperty("labelUsingSeedSets", "true"));
        GetPatternsFromDataMultiClass model = new GetPatternsFromDataMultiClass(props, sents, seedWords, labelUsingSeedSets);
        Execution.fillOptions(model, props);
        String sentsOutFile = props.getProperty("sentsOutFile");
        String wordsOutputFile = props.getProperty("wordsOutputFile");
        String patternOutFile = props.getProperty("patternOutFile");
        boolean loadSavedPatternsWordsDir = Boolean.parseBoolean(props.getProperty("loadSavedPatternsWordsDir"));
        boolean labelSentsUsingModel = Boolean.parseBoolean(props.getProperty("labelSentsUsingModel", "true"));
        boolean applyPatsUsingModel = Boolean.parseBoolean(props.getProperty("applyPatsUsingModel", "true"));
        if (loadSavedPatternsWordsDir) {
            GetPatternsFromDataMultiClass.loadFromSavedPatternsWordsDir(model, props, labelSentsUsingModel, applyPatsUsingModel);
        }
        if (learn) {
            model.iterateExtractApply(p0, p0Set, wordsOutputFile, sentsOutFile, patternOutFile, ignorePatterns);
        }
        if (model.constVars.markedOutputTextFile != null) {
            model.writeLabeledData(model.constVars.markedOutputTextFile);
        }
        if (model.constVars.columnOutputFile != null) {
            model.writeColumnOutput(model.constVars.columnOutputFile);
        }
        if (savePatternsWordsDir = Boolean.parseBoolean(props.getProperty("savePatternsWordsDir"))) {
            String patternsWordsDir = props.getProperty("patternsWordsDir");
            for (String label : model.constVars.getLabelDictionary().keySet()) {
                IOUtils.ensureDir(new File(patternsWordsDir + "/" + label));
                Counter<E> pats = model.getLearnedPatterns(label);
                IOUtils.writeObjectToFile(pats, patternsWordsDir + "/" + label + "/patterns.ser");
                BufferedWriter w = new BufferedWriter(new FileWriter(patternsWordsDir + "/" + label + "/phrases.txt"));
                model.writeWordsToFile(model.getLearnedWords(label), w);
                w.close();
            }
        }
        if (evaluate) {
            String goldEntitiesEvalFiles = props.getProperty("goldEntitiesEvalFiles");
            if (goldEntitiesEvalFiles != null) {
                for (String gfile : goldEntitiesEvalFiles.split(";")) {
                    String[] t = gfile.split(",");
                    String label = t[0];
                    String goldfile = t[1];
                    HashMap<String, Boolean> goldWords4Label = new HashMap<String, Boolean>();
                    for (String line : IOUtils.readLines(goldfile)) {
                        if ((line = line.trim()).isEmpty()) continue;
                        if (line.endsWith("#")) {
                            goldWords4Label.put(line.substring(0, line.length() - 1), false);
                            continue;
                        }
                        goldWords4Label.put(line, true);
                    }
                    Pair<Double, Double> pr = super.getPrecisionRecall(label, goldWords4Label);
                    Redwood.log(ConstantsAndVariables.minimaldebug, "\nFor label " + label + ": Number of gold entities is " + goldWords4Label.size() + ", Precision is " + model.df.format(pr.first() * 100.0) + ", Recall is " + model.df.format(pr.second() * 100.0) + ", F1 is " + model.df.format(model.FScore(pr.first(), pr.second(), 1.0) * 100.0) + "\n\n");
                }
            }
            if (saveEvalSentencesSerFileFile != null && saveEvalSentencesSerFileFile.exists()) {
                if (batchProcessSents) {
                    evalsents = (Map)IOUtils.readObjectFromFile(saveEvalSentencesSerFileFile);
                }
                boolean evalPerEntity = Boolean.parseBoolean(props.getProperty("evalPerEntity", "true"));
                model.evaluate(evalsents, evalPerEntity);
            }
            if (evalsents.size() == 0 && goldEntitiesEvalFiles == null) {
                System.err.println("No eval sentences or list of gold entities provided to evaluate! Make sure evalFileWithGoldLabels or goldEntitiesEvalFiles is set, or turn off the evaluate flag");
            }
        }
        if (model.constVars.saveInvertedIndex) {
            model.constVars.invertedIndex.saveIndex(model.constVars.invertedIndexDirectory);
        }
        if (model.constVars.storePatsForEachToken.equals((Object)ConstantsAndVariables.PatternForEachTokenWay.LUCENE)) {
            model.patsForEachToken.close();
        }
        return model;
    }

    private static <E extends Pattern> void loadFromSavedPatternsWordsDir(GetPatternsFromDataMultiClass<E> model, Properties props, boolean labelSentsUsingModel, boolean applyPatsUsingModel) throws IOException, ClassNotFoundException {
        String patternsWordsDir = props.getProperty("patternsWordsDir");
        String sentsOutFile = props.getProperty("sentsOutFile");
        for (String label : model.constVars.getLabelDictionary().keySet()) {
            File wordf;
            assert (new File(patternsWordsDir + "/" + label).exists());
            File patf = new File(patternsWordsDir + "/" + label + "/patterns.ser");
            if (patf.exists()) {
                Counter patterns;
                Counter patternsIndexed = patterns = (Counter)IOUtils.readObjectFromFile(patf);
                for (Map.Entry en : patterns.entrySet()) {
                    model.setLearnedPatterns(patternsIndexed, label);
                }
                Redwood.log(new Object[]{Redwood.DBG, "Loaded " + patterns.size() + " patterns from " + patf});
            }
            if ((wordf = new File(patternsWordsDir + "/" + label + "/phrases.txt")).exists()) {
                Counter<String> words = model.readLearnedWordsFromFile(wordf);
                model.setLearnedWords(words, label);
                Redwood.log(new Object[]{Redwood.DBG, "Loaded " + words.size() + " phrases from " + wordf});
            }
            CollectionValuedMap matchedTokensByPat = new CollectionValuedMap();
            ConstantsAndVariables.DataSentsIterator sentsIter = new ConstantsAndVariables.DataSentsIterator(model.constVars.batchProcessSents);
            TwoDimensionalCounter wordsandLemmaPatExtracted = new TwoDimensionalCounter();
            while (sentsIter.hasNext()) {
                Pair sents = (Pair)sentsIter.next();
                if (model.constVars.restrictToMatched || applyPatsUsingModel) {
                    model.constVars.invertedIndex.add((Map)sents.first(), true);
                    model.constVars.invertedIndex.add((Map)sents.first(), true);
                    model.scorePhrases.applyPats(model.getLearnedPatterns(label), label, wordsandLemmaPatExtracted, matchedTokensByPat);
                }
                if (!labelSentsUsingModel) continue;
                Redwood.log(new Object[]{Redwood.DBG, "labeling sentences from " + sents.second() + " with the already learned words"});
                assert (sents.first() != null) : "Why are sents null";
                model.labelWords(label, (Map)sents.first(), model.getLearnedWords(label).keySet(), sentsOutFile, matchedTokensByPat);
                if (!((File)sents.second()).exists()) continue;
                IOUtils.writeObjectToFile((Object)sents, (File)sents.second());
            }
        }
    }

    public static String elapsedTime(Date d1, Date d2) {
        Interval interval = new Interval(d1.getTime(), d2.getTime());
        Period period = interval.toPeriod();
        return period.getDays() + " days, " + period.getHours() + " hours, " + period.getMinutes() + " minutes, " + period.getSeconds() + " seconds";
    }

    public static void main(String[] args) {
        try {
            Properties props = StringUtils.argsToPropertiesWithResolve(args);
            GetPatternsFromDataMultiClass.run(props);
        }
        catch (OutOfMemoryError e) {
            System.out.println("Out of memory! Either change the memory alloted by running as java -mx20g ... for example if you want to allot 20G. Or consider using batchProcessSents and numMaxSentencesPerBatchFile flags");
            e.printStackTrace();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    private class CalculateSufficientStatsThreads
    implements Callable {
        private final Map<String, List<CoreLabel>> sents;
        private final PatternsForEachToken patternsForEachToken;
        private final Collection<String> sentIds;
        private final String label;
        private final Class answerClass4Label;

        public CalculateSufficientStatsThreads(PatternsForEachToken patternsForEachToken, Collection<String> sentIds, Map<String, List<CoreLabel>> sents, String label, Class answerClass4Label) {
            this.patternsForEachToken = patternsForEachToken;
            this.sentIds = sentIds;
            this.sents = sents;
            this.label = label;
            this.answerClass4Label = answerClass4Label;
        }

        public Triple<List<Pair<Integer, String>>, List<Pair<Integer, String>>, List<Pair<Integer, String>>> call() throws Exception {
            ArrayList<Pair<Pattern, String>> posWords = new ArrayList<Pair<Pattern, String>>();
            ArrayList<Pair<Pattern, String>> negWords = new ArrayList<Pair<Pattern, String>>();
            ArrayList<Pair<Pattern, String>> unlabWords = new ArrayList<Pair<Pattern, String>>();
            for (String sentId : this.sentIds) {
                Map pat4Sent = this.patternsForEachToken.getPatternsForAllTokens(sentId);
                if (pat4Sent == null) {
                    throw new RuntimeException("How come there are no patterns for " + sentId);
                }
                List<CoreLabel> sent = this.sents.get(sentId);
                for (int i = 0; i < sent.size(); ++i) {
                    Map longestMatchingPhrases;
                    CoreLabel token = sent.get(i);
                    String tokenWordOrLemma = token.word();
                    String longestMatchingPhrase = null;
                    longestMatchingPhrase = GetPatternsFromDataMultiClass.this.constVars.useMatchingPhrase ? ((longestMatchingPhrase = (String)(longestMatchingPhrases = (Map)token.get(PatternsAnnotations.LongestMatchedPhraseForEachLabel.class)).get(this.label)) != null && longestMatchingPhrase.length() > tokenWordOrLemma.length() ? longestMatchingPhrase : tokenWordOrLemma) : tokenWordOrLemma;
                    Set pats = pat4Sent.get(i);
                    if (pats == null) {
                        throw new RuntimeException("Why are patterns null for sentence " + sentId + " and token " + i + ". pat4Sent has token ids " + pat4Sent.keySet() + (GetPatternsFromDataMultiClass.this.constVars.batchProcessSents ? "" : ". The sentence is " + Data.sents.get(sentId)) + ". If you have switched batchProcessSents, recompute the patterns.");
                    }
                    if (PatternFactory.ignoreWordRegex.matcher(token.word()).matches()) continue;
                    String tag = token.tag();
                    if (GetPatternsFromDataMultiClass.this.constVars.allowedTagsInitials != null && GetPatternsFromDataMultiClass.this.constVars.allowedTagsInitials.containsKey(this.label)) {
                        boolean use = false;
                        for (String allowed : GetPatternsFromDataMultiClass.this.constVars.allowedTagsInitials.get(this.label)) {
                            if (!tag.startsWith(allowed)) continue;
                            use = true;
                            break;
                        }
                        if (!use) continue;
                    }
                    String nertag = token.ner();
                    if (GetPatternsFromDataMultiClass.this.constVars.allowedNERsforLabels != null && GetPatternsFromDataMultiClass.this.constVars.allowedNERsforLabels.containsKey(this.label) && !GetPatternsFromDataMultiClass.this.constVars.allowedNERsforLabels.get(this.label).contains(nertag)) continue;
                    if (token.get(this.answerClass4Label).equals(this.label)) {
                        for (Pattern s : pats) {
                            posWords.add(new Pair<Pattern, String>(s, longestMatchingPhrase));
                        }
                        continue;
                    }
                    boolean negToken = false;
                    Map<Class, Object> ignore = GetPatternsFromDataMultiClass.this.constVars.getIgnoreWordswithClassesDuringSelection().get(this.label);
                    for (Class igCl : ignore.keySet()) {
                        if (!((Boolean)token.get(igCl)).booleanValue()) continue;
                        negToken = true;
                        break;
                    }
                    if (!negToken && (GetPatternsFromDataMultiClass.this.constVars.getOtherSemanticClassesWords().contains(token.word()) || GetPatternsFromDataMultiClass.this.constVars.getOtherSemanticClassesWords().contains(token.lemma()))) {
                        negToken = true;
                    }
                    for (Pattern sindex : pats) {
                        if (negToken) {
                            negWords.add(new Pair<Pattern, String>(sindex, tokenWordOrLemma));
                            continue;
                        }
                        unlabWords.add(new Pair<Pattern, String>(sindex, tokenWordOrLemma));
                    }
                }
            }
            return new Triple<List<Pair<Integer, String>>, List<Pair<Integer, String>>, List<Pair<Integer, String>>>(posWords, negWords, unlabWords);
        }
    }

    public static class LabelWithSeedWords
    implements Callable<Map<String, List<CoreLabel>>> {
        Set<String[]> seedwordsTokens = new HashSet<String[]>();
        Map<String, List<CoreLabel>> sents;
        List<String> keyset;
        Class labelClass;
        HashSet<String> seenFuzzyMatches = new HashSet();
        String label;
        int minLen4FuzzyForPattern;
        String backgroundSymbol = "O";
        Set<String> dictWords = null;
        Function<CoreLabel, String> stringTransformation;

        public LabelWithSeedWords(Set<String> seedwords, Map<String, List<CoreLabel>> sents, List<String> keyset, Class labelclass, String label, int minLen4FuzzyForPattern, String backgroundSymbol, Set<String> dictWords, Function<CoreLabel, String> stringTransformation) {
            for (String s : seedwords) {
                this.seedwordsTokens.add(s.split("\\s+"));
            }
            this.sents = sents;
            this.keyset = keyset;
            this.labelClass = labelclass;
            this.label = label;
            this.minLen4FuzzyForPattern = minLen4FuzzyForPattern;
            this.backgroundSymbol = backgroundSymbol;
            this.dictWords = dictWords;
            this.stringTransformation = stringTransformation;
        }

        @Override
        public Map<String, List<CoreLabel>> call() throws Exception {
            HashMap<String, List<CoreLabel>> newsent = new HashMap<String, List<CoreLabel>>();
            for (String k : this.keyset) {
                List<CoreLabel> sent = this.sents.get(k);
                String[] tokens = new String[sent.size()];
                String[] tokenslemma = new String[sent.size()];
                int num = 0;
                for (CoreLabel l : sent) {
                    l.set(PatternsAnnotations.ProcessedTextAnnotation.class, this.stringTransformation.apply(l));
                    tokens[num] = l.word();
                    if (l.lemma() == null) {
                        throw new RuntimeException("how come lemma is null");
                    }
                    tokenslemma[num] = l.lemma();
                    ++num;
                }
                boolean[] labels = new boolean[tokens.length];
                CollectionValuedMap<Integer, String> matchedPhrases = new CollectionValuedMap<Integer, String>();
                HashMap<Integer, String> longestMatchedPhrases = new HashMap<Integer, String>();
                for (Object[] objectArray : this.seedwordsTokens) {
                    List<Integer> indices = GetPatternsFromDataMultiClass.getSubListIndex((String[])objectArray, tokens, tokenslemma, this.dictWords, this.seenFuzzyMatches, this.minLen4FuzzyForPattern);
                    if (indices == null || indices.isEmpty()) continue;
                    for (int index : indices) {
                        for (int i = 0; i < objectArray.length; ++i) {
                            String ph = StringUtils.join(objectArray, " ");
                            matchedPhrases.add(index + i, ph);
                            String longPh = (String)longestMatchedPhrases.get(index + i);
                            longPh = longPh != null && longPh.length() > ph.length() ? longPh : ph;
                            longestMatchedPhrases.put(index + i, longPh);
                            labels[index + i] = true;
                        }
                    }
                }
                int i = -1;
                for (CoreLabel l : sent) {
                    ++i;
                    if (!l.containsKey(PatternsAnnotations.MatchedPhrases.class) || !PatternsAnnotations.MatchedPhrases.class.isInstance(l.get(PatternsAnnotations.MatchedPhrases.class))) {
                        l.set(PatternsAnnotations.MatchedPhrases.class, new CollectionValuedMap());
                    }
                    if (!l.containsKey(PatternsAnnotations.LongestMatchedPhraseForEachLabel.class)) {
                        l.set(PatternsAnnotations.LongestMatchedPhraseForEachLabel.class, new HashMap());
                    }
                    if (labels[i]) {
                        l.set(this.labelClass, this.label);
                        if (!l.containsKey(PatternsAnnotations.SeedLabeledOrNot.class)) {
                            l.set(PatternsAnnotations.SeedLabeledOrNot.class, new HashMap());
                        }
                        ((Map)l.get(PatternsAnnotations.SeedLabeledOrNot.class)).put(this.labelClass, true);
                        String longestMatching = (String)((Map)l.get(PatternsAnnotations.LongestMatchedPhraseForEachLabel.class)).get(this.label);
                        longestMatching = longestMatching != null && longestMatching.length() > ((String)longestMatchedPhrases.get(i)).length() ? longestMatching : (String)longestMatchedPhrases.get(i);
                        ((Map)l.get(PatternsAnnotations.LongestMatchedPhraseForEachLabel.class)).put(this.label, longestMatching);
                        ((CollectionValuedMap)l.get(PatternsAnnotations.MatchedPhrases.class)).addAll(this.label, matchedPhrases.get(i));
                        Redwood.log(ConstantsAndVariables.extremedebug, "labeling " + l.word() + " or its lemma " + l.lemma() + " as " + this.label + " because of the dict phrases " + (Set)matchedPhrases.get(i));
                        continue;
                    }
                    l.set(this.labelClass, this.backgroundSymbol);
                }
                newsent.put(k, sent);
            }
            return newsent;
        }
    }

    static enum WordScoring {
        BPB,
        WEIGHTEDNORM;

    }

    public static enum PatternScoring {
        F1SeedPattern,
        RlogF,
        RlogFPosNeg,
        RlogFUnlabNeg,
        RlogFNeg,
        PhEvalInPat,
        PhEvalInPatLogP,
        PosNegOdds,
        YanGarber02,
        PosNegUnlabOdds,
        RatioAll,
        LOGREG,
        LOGREGlogP,
        SqrtAllRatio,
        LinICML03,
        kNN,
        Precision,
        Recall,
        FMeasure;

    }
}

