/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.patterns.surface;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.tokensregex.Env;
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern;
import edu.stanford.nlp.patterns.surface.ApplyPatterns;
import edu.stanford.nlp.patterns.surface.ConstantsAndVariables;
import edu.stanford.nlp.patterns.surface.Data;
import edu.stanford.nlp.patterns.surface.GetPatternsFromDataMultiClass;
import edu.stanford.nlp.patterns.surface.Pattern;
import edu.stanford.nlp.patterns.surface.PatternsForEachToken;
import edu.stanford.nlp.patterns.surface.PhraseScorer;
import edu.stanford.nlp.patterns.surface.ScorePhrasesAverageFeatures;
import edu.stanford.nlp.patterns.surface.SurfacePattern;
import edu.stanford.nlp.patterns.surface.Token;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.stats.TwoDimensionalCounter;
import edu.stanford.nlp.stats.TwoDimensionalCounterInterface;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.CollectionValuedMap;
import edu.stanford.nlp.util.Execution;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.Triple;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import javax.json.Json;
import javax.json.JsonArray;
import javax.json.JsonArrayBuilder;
import javax.json.JsonObjectBuilder;
import javax.json.JsonReader;
import javax.json.JsonValue;

public class ScorePhrases<E extends Pattern> {
    Map<String, Boolean> writtenInJustification = new HashMap<String, Boolean>();
    ConstantsAndVariables<E> constVars = null;
    @Execution.Option(name="phraseScorerClass")
    Class<? extends PhraseScorer> phraseScorerClass = ScorePhrasesAverageFeatures.class;
    PhraseScorer phraseScorer = null;

    public ScorePhrases(Properties props, ConstantsAndVariables cv) {
        Execution.fillOptions((Object)this, props);
        this.constVars = cv;
        try {
            this.phraseScorer = this.phraseScorerClass.getConstructor(ConstantsAndVariables.class).newInstance(this.constVars);
        }
        catch (InstantiationException e) {
            throw new RuntimeException(e);
        }
        catch (IllegalAccessException e) {
            throw new RuntimeException(e);
        }
        catch (InvocationTargetException e) {
            throw new RuntimeException(e);
        }
        catch (NoSuchMethodException e) {
            throw new RuntimeException(e);
        }
        Execution.fillOptions((Object)this.phraseScorer, props);
    }

    public Counter<String> chooseTopWords(Counter<String> newdt, TwoDimensionalCounter<String, E> terms, Counter<String> useThresholdNumPatternsForTheseWords, Set<String> ignoreWords, double thresholdWordExtract) {
        String w;
        Iterator termIter = Counters.toPriorityQueue(newdt).iterator();
        ClassicCounter<String> finalwords = new ClassicCounter<String>();
        while (termIter.hasNext() && finalwords.size() < this.constVars.numWordsToAdd && !(newdt.getCount(w = (String)termIter.next()) < thresholdWordExtract)) {
            assert (newdt.getCount(w) != Double.POSITIVE_INFINITY);
            if (useThresholdNumPatternsForTheseWords.containsKey(w) && this.numNonRedundantPatterns(terms, w) < this.constVars.thresholdNumPatternsApplied) {
                Redwood.log("extremePatDebug", "Not adding " + w + " because the number of non redundant patterns are below threshold: " + ((ClassicCounter)terms.getCounter((Object)w)).keySet());
                continue;
            }
            String matchedFuzzy = null;
            if (this.constVars.minLen4FuzzyForPattern > 0 && ignoreWords != null) {
                matchedFuzzy = ConstantsAndVariables.containsFuzzy(ignoreWords, w, this.constVars.minLen4FuzzyForPattern);
            }
            if (matchedFuzzy == null) {
                Redwood.log("extremePatDebug", "adding word " + w);
                finalwords.setCount(w, newdt.getCount(w));
                continue;
            }
            Redwood.log("extremePatDebug", "not adding " + w + " because it matched " + matchedFuzzy + " in common English word");
            ignoreWords.add(w);
        }
        String nextFive = "";
        int n = 0;
        while (termIter.hasNext() && ++n <= 5) {
            String w2 = (String)termIter.next();
            nextFive = nextFive + ";\t" + w2 + ":" + newdt.getCount(w2);
        }
        Redwood.log(new Object[]{Redwood.DBG, "Next five phrases were " + nextFive});
        return finalwords;
    }

    public static <E, F> void removeKeys(TwoDimensionalCounter<E, F> counter, Collection<E> removeKeysCollection) {
        for (E key : removeKeysCollection) {
            counter.remove(key);
        }
    }

    private double numNonRedundantPatterns(TwoDimensionalCounter<String, E> terms, String w) {
        Object[] pats = ((ClassicCounter)terms.getCounter((Object)w)).keySet().toArray();
        int numPat = 0;
        for (int i = 0; i < pats.length; ++i) {
            String pati = pats[i].toString();
            boolean contains = false;
            for (int j = i + 1; j < pats.length; ++j) {
                String patj = pats[j].toString();
                if (!patj.contains(pati) && !pati.contains(patj)) continue;
                contains = true;
                break;
            }
            if (contains) continue;
            ++numPat;
        }
        return numPat;
    }

    public Counter<String> learnNewPhrases(String label, PatternsForEachToken patternsForEachToken, Counter<E> patternsLearnedThisIter, Counter<E> allSelectedPatterns, CollectionValuedMap<E, Triple<String, Integer, Integer>> tokensMatchedPatterns, Counter<String> scoreForAllWordsThisIteration, TwoDimensionalCounter<String, E> terms, TwoDimensionalCounter<String, E> wordsPatExtracted, TwoDimensionalCounter<E, String> patternsAndWords4Label, String identifier, Set<String> ignoreWords) throws IOException, ClassNotFoundException {
        boolean computeProcDataFreq = false;
        if (Data.processedDataFreq == null) {
            computeProcDataFreq = true;
            Data.processedDataFreq = new ClassicCounter<String>();
            assert (Data.rawFreq != null);
        }
        Counter<String> words = this.learnNewPhrasesPrivate(label, patternsForEachToken, patternsLearnedThisIter, allSelectedPatterns, this.constVars.getLabelDictionary().get(label), tokensMatchedPatterns, scoreForAllWordsThisIteration, terms, wordsPatExtracted, patternsAndWords4Label, identifier, ignoreWords, computeProcDataFreq);
        this.constVars.addLabelDictionary(label, words.keySet());
        return words;
    }

    void runParallelApplyPats(Map<String, List<CoreLabel>> sents, String label, E pattern, TwoDimensionalCounter<Pair<String, String>, E> wordsandLemmaPatExtracted, CollectionValuedMap<E, Triple<String, Integer, Integer>> matchedTokensByPat) {
        Redwood.log(new Object[]{Redwood.DBG, "Applying pattern " + pattern + " to a total of " + sents.size() + " sentences "});
        ArrayList<String> notAllowedClasses = new ArrayList<String>();
        List<String> sentids = CollectionUtils.toList(sents.keySet());
        if (this.constVars.doNotExtractPhraseAnyWordLabeledOtherClass) {
            for (String l : this.constVars.getAnswerClass().keySet()) {
                if (l.equals(label)) continue;
                notAllowedClasses.add(l + ":" + l);
            }
            notAllowedClasses.add("OTHERSEM:OTHERSEM");
        }
        HashMap<TokenSequencePattern, E> patternsLearnedThisIterConverted = new HashMap<TokenSequencePattern, E>();
        TokenSequencePattern pat = TokenSequencePattern.compile(this.constVars.env.get(label), ((SurfacePattern)pattern).toString(notAllowedClasses));
        patternsLearnedThisIterConverted.put(pat, pattern);
        int numThreads = this.constVars.numThreads;
        if (sents.size() < 50) {
            numThreads = 1;
        }
        int num = numThreads == 1 ? sents.size() : sents.size() / (numThreads - 1);
        ExecutorService executor = Executors.newFixedThreadPool(this.constVars.numThreads);
        ArrayList list = new ArrayList();
        for (int i = 0; i < numThreads; ++i) {
            Object var15_17 = null;
            ApplyPatterns applyPatterns = new ApplyPatterns(sents, num == sents.size() ? sentids : sentids.subList(i * num, Math.min(sentids.size(), (i + 1) * num)), patternsLearnedThisIterConverted, label, this.constVars.removeStopWordsFromSelectedPhrases, this.constVars.removePhrasesWithStopWords, this.constVars);
            Future submit = executor.submit(applyPatterns);
            list.add(submit);
        }
        for (Future future : list) {
            try {
                Pair result = (Pair)future.get();
                Redwood.log(ConstantsAndVariables.extremedebug, "Pattern " + pat + " extracted phrases " + result.first());
                wordsandLemmaPatExtracted.addAll((TwoDimensionalCounterInterface)result.first());
                matchedTokensByPat.addAll((CollectionValuedMap)result.second());
            }
            catch (Exception e) {
                executor.shutdownNow();
                throw new RuntimeException(e);
            }
        }
        executor.shutdown();
    }

    protected Map<E, Map<String, List<CoreLabel>>> getSentences(Map<E, Set<String>> sentids) {
        try {
            HashSet<File> files = new HashSet<File>();
            HashMap sentsAll = new HashMap();
            CollectionValuedMap<String, E> sentIds2Pats = new CollectionValuedMap<String, E>();
            for (Map.Entry<E, Set<String>> entry : sentids.entrySet()) {
                if (!sentsAll.containsKey(entry.getKey())) {
                    sentsAll.put(entry.getKey(), new HashMap());
                }
                for (String string : entry.getValue()) {
                    sentIds2Pats.add(string, entry.getKey());
                    if (!this.constVars.batchProcessSents) continue;
                    File file = Data.sentId2File.get(string);
                    assert (file != null) : "How come no file for sentence " + string;
                    files.add(file);
                }
            }
            if (this.constVars.batchProcessSents) {
                for (File file : files) {
                    Map sentsf = (Map)IOUtils.readObjectFromFile(file);
                    for (Map.Entry entry : sentsf.entrySet()) {
                        Iterator iterator = sentIds2Pats.get(entry.getKey()).iterator();
                        while (iterator.hasNext()) {
                            Pattern pat = (Pattern)iterator.next();
                            ((Map)sentsAll.get(pat)).put(entry.getKey(), entry.getValue());
                        }
                    }
                }
            } else {
                for (Map.Entry<Object, Collection<Object>> entry : Data.sents.entrySet()) {
                    Iterator<String> iterator = sentIds2Pats.get(entry.getKey()).iterator();
                    while (iterator.hasNext()) {
                        Pattern pattern = (Pattern)((Object)iterator.next());
                        ((Map)sentsAll.get(pattern)).put(entry.getKey(), entry.getValue());
                    }
                }
            }
            return sentsAll;
        }
        catch (ClassNotFoundException e) {
            throw new RuntimeException(e);
        }
        catch (IOException e1) {
            throw new RuntimeException(e1);
        }
    }

    public void applyPats(Counter<E> patterns, String label, TwoDimensionalCounter<Pair<String, String>, E> wordsandLemmaPatExtracted, CollectionValuedMap<E, Triple<String, Integer, Integer>> matchedTokensByPat) {
        for (Map.Entry<String, Env> en : this.constVars.env.entrySet()) {
            en.getValue().getVariables().putAll(Token.env.getVariables());
        }
        Map<E, Map<String, List<CoreLabel>>> sentencesForPatterns = this.getSentences(this.constVars.invertedIndex.queryIndex(patterns.keySet()));
        for (Map.Entry<E, Map<String, List<CoreLabel>>> en : sentencesForPatterns.entrySet()) {
            this.runParallelApplyPats(en.getValue(), label, (Pattern)en.getKey(), wordsandLemmaPatExtracted, matchedTokensByPat);
        }
        Redwood.log(new Object[]{Redwood.DBG, "# words/lemma and pattern pairs are " + wordsandLemmaPatExtracted.size()});
    }

    private void statsWithoutApplyingPatterns(Map<String, List<CoreLabel>> sents, PatternsForEachToken patternsForEachToken, Counter<E> patternsLearnedThisIter, TwoDimensionalCounter<Pair<String, String>, E> wordsandLemmaPatExtracted) {
        for (Map.Entry<String, List<CoreLabel>> sentEn : sents.entrySet()) {
            Map pat4Sent = patternsForEachToken.getPatternsForAllTokens(sentEn.getKey());
            if (pat4Sent == null) {
                throw new RuntimeException("How come there are no patterns for " + sentEn.getKey());
            }
            for (Map.Entry en : pat4Sent.entrySet()) {
                CoreLabel token = null;
                Set p1 = en.getValue();
                for (Pattern index : patternsLearnedThisIter.keySet()) {
                    if (!p1.contains(index)) continue;
                    if (token == null) {
                        token = sentEn.getValue().get(en.getKey());
                    }
                    wordsandLemmaPatExtracted.incrementCount(new Pair<String, String>(token.word(), token.lemma()), index);
                }
            }
        }
    }

    /*
     * WARNING - void declaration
     */
    private Counter<String> learnNewPhrasesPrivate(String label, PatternsForEachToken patternsForEachToken, Counter<E> patternsLearnedThisIter, Counter<E> allSelectedPatterns, Set<String> alreadyIdentifiedWords, CollectionValuedMap<E, Triple<String, Integer, Integer>> matchedTokensByPat, Counter<String> scoreForAllWordsThisIteration, TwoDimensionalCounter<String, E> terms, TwoDimensionalCounter<String, E> wordsPatExtracted, TwoDimensionalCounter<E, String> patternsAndWords4Label, String identifier, Set<String> ignoreWords, boolean computeProcDataFreq) throws IOException, ClassNotFoundException {
        TwoDimensionalCounter wordsandLemmaPatExtracted = new TwoDimensionalCounter();
        if (this.constVars.doNotApplyPatterns) {
            Iterator<Pair<Map<String, List<CoreLabel>>, File>> sentsIter = new ConstantsAndVariables.DataSentsIterator(this.constVars.batchProcessSents);
            while (((ConstantsAndVariables.DataSentsIterator)sentsIter).hasNext()) {
                Object object = ((ConstantsAndVariables.DataSentsIterator)sentsIter).next();
                this.statsWithoutApplyingPatterns((Map)((Pair)object).first(), patternsForEachToken, patternsLearnedThisIter, wordsandLemmaPatExtracted);
            }
        } else if (patternsLearnedThisIter.size() > 0) {
            this.applyPats(patternsLearnedThisIter, label, wordsandLemmaPatExtracted, matchedTokensByPat);
        }
        if (computeProcDataFreq) {
            if (!this.phraseScorer.wordFreqNorm.equals((Object)PhraseScorer.Normalization.NONE)) {
                Redwood.log(new Object[]{Redwood.DBG, "computing processed freq"});
                for (Map.Entry entry : Data.rawFreq.entrySet()) {
                    double in = (Double)entry.getValue();
                    if (this.phraseScorer.wordFreqNorm.equals((Object)PhraseScorer.Normalization.SQRT)) {
                        in = Math.sqrt(in);
                    } else if (this.phraseScorer.wordFreqNorm.equals((Object)PhraseScorer.Normalization.LOG)) {
                        in = 1.0 + Math.log(in);
                    } else {
                        throw new RuntimeException("can't understand the normalization");
                    }
                    Data.processedDataFreq.setCount((String)entry.getKey(), in);
                }
            } else {
                Data.processedDataFreq = Data.rawFreq;
            }
        }
        if (this.constVars.wordScoring.equals((Object)GetPatternsFromDataMultiClass.WordScoring.WEIGHTEDNORM)) {
            void var16_23;
            for (Pair<Map<String, List<CoreLabel>>, File> pair : wordsandLemmaPatExtracted.firstKeySet()) {
                if (!this.constVars.getOtherSemanticClassesWords().contains(pair.first()) && !this.constVars.getOtherSemanticClassesWords().contains(pair.second())) {
                    terms.addAll((String)((Object)pair.first()), wordsandLemmaPatExtracted.getCounter(pair));
                }
                wordsPatExtracted.addAll((String)((Object)pair.first()), wordsandLemmaPatExtracted.getCounter(pair));
            }
            ScorePhrases.removeKeys(terms, ConstantsAndVariables.getStopWords());
            Counter<String> phraseScores = this.phraseScorer.scorePhrases(label, terms, wordsPatExtracted, allSelectedPatterns, alreadyIdentifiedWords, false);
            if (ignoreWords != null && !ignoreWords.isEmpty()) {
                Set<String> set = CollectionUtils.unionAsSet(ignoreWords, this.constVars.getOtherSemanticClassesWords());
            } else {
                Set<String> set = this.constVars.getOtherSemanticClassesWords();
            }
            var16_23.addAll((Collection)this.constVars.getLabelDictionary().get(label));
            Counter<String> finalwords = this.chooseTopWords(phraseScores, terms, phraseScores, (Set<String>)var16_23, this.constVars.thresholdWordExtract);
            scoreForAllWordsThisIteration.clear();
            Counters.addInPlace(scoreForAllWordsThisIteration, phraseScores);
            Redwood.log(ConstantsAndVariables.minimaldebug, "\n\n## Selected Words for " + label + " : " + Counters.toSortedString(finalwords, finalwords.size(), "%1$s:%2$.2f", "\t"));
            if (this.constVars.outDir != null && !this.constVars.outDir.isEmpty()) {
                String outputdir = this.constVars.outDir + "/" + identifier + "/" + label;
                IOUtils.ensureDir(new File(outputdir));
                TwoDimensionalCounter<String, String> reasonForWords = new TwoDimensionalCounter<String, String>();
                for (String word : finalwords.keySet()) {
                    for (Pattern l : ((ClassicCounter)wordsPatExtracted.getCounter((Object)word)).keySet()) {
                        Iterator iterator = ((ClassicCounter)patternsAndWords4Label.getCounter(l)).iterator();
                        while (iterator.hasNext()) {
                            String w2 = (String)iterator.next();
                            reasonForWords.incrementCount(word, w2);
                        }
                    }
                }
                Redwood.log(ConstantsAndVariables.minimaldebug, "Saving output in " + (String)outputdir);
                String filename = (String)outputdir + "/words.json";
                JsonArrayBuilder obj = Json.createArrayBuilder();
                if (this.writtenInJustification.containsKey(label) && this.writtenInJustification.get(label).booleanValue()) {
                    JsonReader jsonReader = Json.createReader((InputStream)new BufferedInputStream(new FileInputStream(filename)));
                    JsonArray objarr = jsonReader.readArray();
                    for (JsonValue o : objarr) {
                        obj.add(o);
                    }
                    jsonReader.close();
                }
                JsonArrayBuilder objThisIter = Json.createArrayBuilder();
                for (String w : reasonForWords.firstKeySet()) {
                    JsonObjectBuilder objinner = Json.createObjectBuilder();
                    JsonArrayBuilder l = Json.createArrayBuilder();
                    for (String w2 : ((ClassicCounter)reasonForWords.getCounter((Object)w)).keySet()) {
                        l.add(w2);
                    }
                    JsonArrayBuilder pats = Json.createArrayBuilder();
                    Iterator iterator = ((ClassicCounter)wordsPatExtracted.getCounter((Object)w)).iterator();
                    while (iterator.hasNext()) {
                        Pattern p = (Pattern)iterator.next();
                        pats.add(p.toStringSimple());
                    }
                    objinner.add("reasonwords", l);
                    objinner.add("patterns", pats);
                    objinner.add("score", finalwords.getCount(w));
                    objinner.add("entity", w);
                    objThisIter.add((JsonValue)objinner.build());
                }
                obj.add(objThisIter);
                IOUtils.writeStringToFile(obj.build().toString(), filename, "utf8");
                this.writtenInJustification.put(label, true);
            }
            if (this.constVars.justify) {
                Redwood.log(new Object[]{Redwood.DBG, "\nJustification for phrases:\n"});
                for (String word : finalwords.keySet()) {
                    Redwood.log(new Object[]{Redwood.DBG, "Phrase " + word + " extracted because of patterns: \t" + Counters.toSortedString(wordsPatExtracted.getCounter((Object)word), ((ClassicCounter)wordsPatExtracted.getCounter((Object)word)).size(), "%1$s:%2$f", "\n")});
                }
            }
            return finalwords;
        }
        if (this.constVars.wordScoring.equals((Object)GetPatternsFromDataMultiClass.WordScoring.BPB)) {
            Counters.addInPlace(terms, wordsPatExtracted);
            ClassicCounter<String> maxPatWeightTerms = new ClassicCounter<String>();
            HashMap hashMap = new HashMap();
            for (Map.Entry<String, ClassicCounter<E>> en : terms.entrySet()) {
                ClassicCounter<Pattern> weights = new ClassicCounter<Pattern>();
                for (Pattern k : en.getValue().keySet()) {
                    weights.setCount(k, patternsLearnedThisIter.getCount(k));
                }
                maxPatWeightTerms.setCount(en.getKey(), Counters.max(weights));
                hashMap.put(en.getKey(), Counters.argmax(weights));
            }
            Counters.removeKeys(maxPatWeightTerms, alreadyIdentifiedWords);
            double maxvalue = Counters.max(maxPatWeightTerms);
            Set<String> words = Counters.keysAbove(maxPatWeightTerms, maxvalue - 1.0E-10);
            String bestw = null;
            if (words.size() > 1) {
                double max = Double.NEGATIVE_INFINITY;
                for (String w : words) {
                    if (!(terms.getCount(w, hashMap.get(w)) > max)) continue;
                    max = terms.getCount(w, hashMap.get(w));
                    bestw = w;
                }
            } else if (words.size() == 1) {
                bestw = words.iterator().next();
            } else {
                return new ClassicCounter<String>();
            }
            Redwood.log(ConstantsAndVariables.minimaldebug, "Selected Words: " + bestw);
            return Counters.asCounter(Arrays.asList(bestw));
        }
        throw new RuntimeException("wordscoring " + (Object)((Object)this.constVars.wordScoring) + " not identified");
    }

    Counter<String> getLearnedScores() {
        return this.phraseScorer.getLearnedScores();
    }
}

