edu.stanford.nlp.dcoref
Class SieveCoreferenceSystem

java.lang.Object
  extended by edu.stanford.nlp.dcoref.SieveCoreferenceSystem

public class SieveCoreferenceSystem
extends java.lang.Object

Multi-pass Sieve coreference resolution system (see EMNLP 2010 paper).

The main entry point for API is coref(Document document). The output is a map from CorefChain ID to corresponding CorefChain.

Author:
Jenny Finkel, Mihai Surdeanu, Karthik Raghunathan, Heeyoung Lee, Sudarshan Rangarajan

Field Summary
 java.lang.String conllMentionEvalScript
          Path for the official CoNLL scorer
 int currentSieve
          Current sieve index
 java.util.List<Pair<java.lang.Integer,java.lang.Integer>> linksCountInPass
          counter for links in passes (Pair)
static java.util.logging.Logger logger
           
 java.util.List<CorefScorer> scoreBcubed
           
 java.util.List<CorefScorer> scoreMUC
           
 java.util.List<CorefScorer> scorePairwise
          Scores for each pass
 Semantics semantics
          Semantic knowledge: WordNet
 java.lang.String[] sieveClassNames
           
 LogisticClassifier<java.lang.String,java.lang.String> singletonPredictor
           
 
Constructor Summary
SieveCoreferenceSystem(java.util.Properties props)
           
 
Method Summary
static boolean checkClusters(java.util.logging.Logger logger, java.lang.String tag, Document document)
           
 java.util.Map<java.lang.Integer,CorefChain> coref(Document document)
          Extracts coreference clusters.
static void debugPrintMentions(java.io.PrintStream out, java.lang.String tag, java.util.List<java.util.List<Mention>> mentions)
           
 Dictionaries dictionaries()
           
 boolean doScore()
           
static java.util.List<java.util.List<Mention>> filterMentionsWithSingletonClusters(Document document, java.util.List<java.util.List<Mention>> mentions)
          Remove singleton clusters
static java.lang.String formatPennTree(Tree parseTree)
          For printing tree in a better format
static java.lang.String getConllEvalSummary(java.lang.String conllMentionEvalScript, java.lang.String goldFile, java.lang.String predictFile)
           
static java.util.List<Pair<IntTuple,IntTuple>> getLinks(java.util.Map<java.lang.Integer,CorefChain> result)
           
static LogisticClassifier<java.lang.String,java.lang.String> getSingletonPredictorFromSerializedFile(java.lang.String serializedFile)
           
 void initScorers()
           
static void main(java.lang.String[] args)
          Needs the following properties: -props 'Location of coref.properties'
 void optimizeSieveOrdering(MentionExtractor mentionExtractor, java.util.Properties props, java.lang.String timestamp)
          Given a set of sieves, select an optimal ordering for the sieves by iterating over sieves, and selecting the one that gives the best score and adding sieves one at a time until no more sieves left
static void printConllOutput(Document document, java.io.PrintWriter writer, boolean gold)
           
static void printConllOutput(Document document, java.io.PrintWriter writer, boolean gold, boolean filterSingletons)
           
static void printConllOutput(Document document, java.io.PrintWriter writer, java.util.List<java.util.List<Mention>> orderedMentions, boolean gold)
           
 void printF1(boolean printF1First)
           
protected static void printList(java.util.logging.Logger logger, java.lang.String... args)
           
static void printRawDoc(Document document, boolean gold)
          Print raw document for analysis
 void printTopK(java.util.logging.Logger logger, Document document, Semantics semantics)
          Print logs for error analysis
static double runAndScoreCoref(SieveCoreferenceSystem corefSystem, MentionExtractor mentionExtractor, java.util.Properties props, java.lang.String timeStamp)
           
static void runAndScoreCorefDist(java.lang.String runDistCmd, java.util.Properties props, java.lang.String propsFile)
          Run and score coref distributed
static void runConllEval(java.lang.String conllMentionEvalScript, java.lang.String goldFile, java.lang.String predictFile, java.lang.String evalFile, java.lang.String errFile)
           
 Semantics semantics()
           
static java.lang.String signature(java.util.Properties props)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

logger

public static final java.util.logging.Logger logger

conllMentionEvalScript

public final java.lang.String conllMentionEvalScript
Path for the official CoNLL scorer


sieveClassNames

public java.lang.String[] sieveClassNames

semantics

public final Semantics semantics
Semantic knowledge: WordNet


singletonPredictor

public LogisticClassifier<java.lang.String,java.lang.String> singletonPredictor

currentSieve

public int currentSieve
Current sieve index


linksCountInPass

public java.util.List<Pair<java.lang.Integer,java.lang.Integer>> linksCountInPass
counter for links in passes (Pair)


scorePairwise

public java.util.List<CorefScorer> scorePairwise
Scores for each pass


scoreBcubed

public java.util.List<CorefScorer> scoreBcubed

scoreMUC

public java.util.List<CorefScorer> scoreMUC
Constructor Detail

SieveCoreferenceSystem

public SieveCoreferenceSystem(java.util.Properties props)
                       throws java.lang.Exception
Throws:
java.lang.Exception
Method Detail

signature

public static java.lang.String signature(java.util.Properties props)

initScorers

public void initScorers()

doScore

public boolean doScore()

dictionaries

public Dictionaries dictionaries()

semantics

public Semantics semantics()

main

public static void main(java.lang.String[] args)
                 throws java.lang.Exception
Needs the following properties: -props 'Location of coref.properties'

Throws:
java.lang.Exception

runAndScoreCoref

public static double runAndScoreCoref(SieveCoreferenceSystem corefSystem,
                                      MentionExtractor mentionExtractor,
                                      java.util.Properties props,
                                      java.lang.String timeStamp)
                               throws java.lang.Exception
Throws:
java.lang.Exception

runAndScoreCorefDist

public static void runAndScoreCorefDist(java.lang.String runDistCmd,
                                        java.util.Properties props,
                                        java.lang.String propsFile)
                                 throws java.lang.Exception
Run and score coref distributed

Throws:
java.lang.Exception

optimizeSieveOrdering

public void optimizeSieveOrdering(MentionExtractor mentionExtractor,
                                  java.util.Properties props,
                                  java.lang.String timestamp)
                           throws java.lang.Exception
Given a set of sieves, select an optimal ordering for the sieves by iterating over sieves, and selecting the one that gives the best score and adding sieves one at a time until no more sieves left

Throws:
java.lang.Exception

coref

public java.util.Map<java.lang.Integer,CorefChain> coref(Document document)
                                                  throws java.lang.Exception
Extracts coreference clusters. This is the main API entry point for coreference resolution. Return a map from CorefChain ID to corresponding CorefChain.

Throws:
java.lang.Exception

getSingletonPredictorFromSerializedFile

public static LogisticClassifier<java.lang.String,java.lang.String> getSingletonPredictorFromSerializedFile(java.lang.String serializedFile)

filterMentionsWithSingletonClusters

public static java.util.List<java.util.List<Mention>> filterMentionsWithSingletonClusters(Document document,
                                                                                          java.util.List<java.util.List<Mention>> mentions)
Remove singleton clusters


runConllEval

public static void runConllEval(java.lang.String conllMentionEvalScript,
                                java.lang.String goldFile,
                                java.lang.String predictFile,
                                java.lang.String evalFile,
                                java.lang.String errFile)
                         throws java.io.IOException
Throws:
java.io.IOException

getConllEvalSummary

public static java.lang.String getConllEvalSummary(java.lang.String conllMentionEvalScript,
                                                   java.lang.String goldFile,
                                                   java.lang.String predictFile)
                                            throws java.io.IOException
Throws:
java.io.IOException

printTopK

public void printTopK(java.util.logging.Logger logger,
                      Document document,
                      Semantics semantics)
Print logs for error analysis


printF1

public void printF1(boolean printF1First)

printList

protected static void printList(java.util.logging.Logger logger,
                                java.lang.String... args)

formatPennTree

public static java.lang.String formatPennTree(Tree parseTree)
For printing tree in a better format


printConllOutput

public static void printConllOutput(Document document,
                                    java.io.PrintWriter writer,
                                    boolean gold)

printConllOutput

public static void printConllOutput(Document document,
                                    java.io.PrintWriter writer,
                                    boolean gold,
                                    boolean filterSingletons)

printConllOutput

public static void printConllOutput(Document document,
                                    java.io.PrintWriter writer,
                                    java.util.List<java.util.List<Mention>> orderedMentions,
                                    boolean gold)

printRawDoc

public static void printRawDoc(Document document,
                               boolean gold)
                        throws java.io.FileNotFoundException
Print raw document for analysis

Throws:
java.io.FileNotFoundException

getLinks

public static java.util.List<Pair<IntTuple,IntTuple>> getLinks(java.util.Map<java.lang.Integer,CorefChain> result)

debugPrintMentions

public static void debugPrintMentions(java.io.PrintStream out,
                                      java.lang.String tag,
                                      java.util.List<java.util.List<Mention>> mentions)

checkClusters

public static boolean checkClusters(java.util.logging.Logger logger,
                                    java.lang.String tag,
                                    Document document)


Stanford NLP Group