|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||
java.lang.Objectedu.stanford.nlp.ling.tokensregex.PhraseTable
public class PhraseTable
Table used to lookup multi-word phrases. This class provides functions for looking up all instances of known phrases in a document in an efficient manner. Phrases can be added to the phrase table using
| Nested Class Summary | |
|---|---|
static class |
PhraseTable.Phrase
A phrase is a multiword expression |
static class |
PhraseTable.PhraseMatch
Represents a matched phrase |
static class |
PhraseTable.PhraseStringCollection
|
static class |
PhraseTable.StringList
|
static class |
PhraseTable.TokenList
|
static interface |
PhraseTable.WordList
|
| Field Summary | |
|---|---|
boolean |
caseInsensitive
|
boolean |
ignorePunctuation
|
boolean |
ignorePunctuationTokens
|
boolean |
normalize
|
static java.util.Comparator<PhraseTable.PhraseMatch> |
PHRASEMATCH_LENGTH_ENDPOINTS_COMPARATOR
|
Annotator |
tokenizer
|
| Constructor Summary | |
|---|---|
PhraseTable()
|
|
PhraseTable(boolean normalize,
boolean caseInsensitive,
boolean ignorePunctuation)
|
|
PhraseTable(int initSize)
|
|
| Method Summary | |
|---|---|
boolean |
addPhrase(java.util.List<java.lang.String> tokens)
|
boolean |
addPhrase(java.util.List<java.lang.String> tokens,
java.lang.String tag)
|
boolean |
addPhrase(java.util.List<java.lang.String> tokens,
java.lang.String tag,
java.lang.Object phraseData)
|
boolean |
addPhrase(java.lang.String phraseText)
|
boolean |
addPhrase(java.lang.String phraseText,
java.lang.String tag)
|
boolean |
addPhrase(java.lang.String phraseText,
java.lang.String tag,
java.lang.Object phraseData)
|
void |
addPhrases(java.util.Collection<java.lang.String> phraseTexts)
|
void |
addPhrases(java.util.Map<java.lang.String,java.lang.String> taggedPhraseTexts)
|
protected int |
checkWordListMatch(PhraseTable.Phrase phrase,
PhraseTable.WordList tokens,
int tokenStart,
int tokenEnd,
int checkStart,
boolean matchEnd)
|
void |
clear()
|
java.util.List<PhraseTable.PhraseMatch> |
findAllMatches(java.util.List<PhraseTable.Phrase> acceptablePhrases,
PhraseTable.WordList tokens)
Given a list of tokens, returns list of spans (PhraseMatch) that corresponds to a phrase in the table (filtered by the list of acceptable phrase) |
java.util.List<PhraseTable.PhraseMatch> |
findAllMatches(java.util.List<PhraseTable.Phrase> acceptablePhrases,
PhraseTable.WordList tokens,
int tokenStart,
int tokenEnd,
boolean needNormalization)
|
java.util.List<PhraseTable.PhraseMatch> |
findAllMatches(java.util.List<PhraseTable.Phrase> acceptablePhrases,
java.lang.String text)
Given a segment of text, returns list of spans (PhraseMatch) that corresponds to a phrase in the table (filtered by the list of acceptable phrase) |
java.util.List<PhraseTable.PhraseMatch> |
findAllMatches(PhraseTable.WordList tokens)
Given a list of tokens, returns list of spans (PhraseMatch) that corresponds to a phrase in the table |
java.util.List<PhraseTable.PhraseMatch> |
findAllMatches(PhraseTable.WordList tokens,
int tokenStart,
int tokenEnd,
boolean needNormalization)
|
java.util.List<PhraseTable.PhraseMatch> |
findAllMatches(java.lang.String text)
Given a segment of text, returns list of spans (PhraseMatch) that corresponds to a phrase in the table |
protected java.util.List<PhraseTable.PhraseMatch> |
findMatches(java.util.Collection<PhraseTable.Phrase> acceptablePhrases,
PhraseTable.WordList tokens,
int tokenStart,
int tokenEnd,
boolean needNormalization,
boolean findAll,
boolean matchEnd)
|
java.util.List<PhraseTable.PhraseMatch> |
findMatches(PhraseTable.WordList tokens)
|
java.util.List<PhraseTable.PhraseMatch> |
findMatches(PhraseTable.WordList tokens,
int tokenStart,
int tokenEnd,
boolean needNormalization)
|
java.util.List<PhraseTable.PhraseMatch> |
findMatches(java.lang.String text)
|
java.util.List<PhraseTable.PhraseMatch> |
findMatches(java.lang.String text,
int tokenStart,
int tokenEnd,
boolean needNormalization)
|
protected java.util.List<PhraseTable.PhraseMatch> |
findMatchesNormalized(java.util.Collection<PhraseTable.Phrase> acceptablePhrases,
PhraseTable.WordList tokens,
int tokenStart,
int tokenEnd,
boolean findAll,
boolean matchEnd)
|
java.util.List<PhraseTable.PhraseMatch> |
findNonOverlappingPhrases(java.util.List<PhraseTable.PhraseMatch> phraseMatches)
|
static PhraseTable.Phrase |
getLongestPhrase(java.util.List<PhraseTable.Phrase> phrases)
|
java.lang.String |
getNormalizedForm(java.lang.String word)
|
java.util.Iterator<PhraseTable.Phrase> |
iterator()
|
PhraseTable.Phrase |
lookup(PhraseTable.WordList wordList)
|
PhraseTable.Phrase |
lookup(java.lang.String phrase)
|
PhraseTable.Phrase |
lookupNormalized(java.lang.String phrase)
|
void |
readPhrases(java.lang.String filename,
boolean checkTag)
|
void |
readPhrases(java.lang.String filename,
boolean checkTag,
java.util.regex.Pattern delimiterPattern)
|
void |
readPhrases(java.lang.String filename,
boolean checkTag,
java.lang.String delimiterRegex)
|
void |
readPhrases(java.lang.String filename,
int phraseColIndex,
int tagColIndex)
|
void |
setNormalizationCacheSize(int cacheSize)
|
java.lang.String[] |
splitText(java.lang.String phraseText)
|
PhraseTable.WordList |
toNormalizedWordList(java.lang.String phraseText)
|
static java.lang.String |
toString(PhraseTable.WordList wordList)
|
PhraseTable.WordList |
toWordList(java.lang.String phraseText)
|
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
|---|
public boolean normalize
public boolean caseInsensitive
public boolean ignorePunctuation
public boolean ignorePunctuationTokens
public Annotator tokenizer
public static final java.util.Comparator<PhraseTable.PhraseMatch> PHRASEMATCH_LENGTH_ENDPOINTS_COMPARATOR
| Constructor Detail |
|---|
public PhraseTable()
public PhraseTable(int initSize)
public PhraseTable(boolean normalize,
boolean caseInsensitive,
boolean ignorePunctuation)
| Method Detail |
|---|
public void clear()
public void setNormalizationCacheSize(int cacheSize)
public void readPhrases(java.lang.String filename,
boolean checkTag)
throws java.io.IOException
java.io.IOException
public void readPhrases(java.lang.String filename,
boolean checkTag,
java.lang.String delimiterRegex)
throws java.io.IOException
java.io.IOException
public void readPhrases(java.lang.String filename,
boolean checkTag,
java.util.regex.Pattern delimiterPattern)
throws java.io.IOException
java.io.IOException
public void readPhrases(java.lang.String filename,
int phraseColIndex,
int tagColIndex)
throws java.io.IOException
java.io.IOExceptionpublic static PhraseTable.Phrase getLongestPhrase(java.util.List<PhraseTable.Phrase> phrases)
public java.lang.String[] splitText(java.lang.String phraseText)
public PhraseTable.WordList toWordList(java.lang.String phraseText)
public PhraseTable.WordList toNormalizedWordList(java.lang.String phraseText)
public void addPhrases(java.util.Collection<java.lang.String> phraseTexts)
public void addPhrases(java.util.Map<java.lang.String,java.lang.String> taggedPhraseTexts)
public boolean addPhrase(java.lang.String phraseText)
public boolean addPhrase(java.lang.String phraseText,
java.lang.String tag)
public boolean addPhrase(java.lang.String phraseText,
java.lang.String tag,
java.lang.Object phraseData)
public boolean addPhrase(java.util.List<java.lang.String> tokens)
public boolean addPhrase(java.util.List<java.lang.String> tokens,
java.lang.String tag)
public boolean addPhrase(java.util.List<java.lang.String> tokens,
java.lang.String tag,
java.lang.Object phraseData)
public java.lang.String getNormalizedForm(java.lang.String word)
public PhraseTable.Phrase lookup(java.lang.String phrase)
public PhraseTable.Phrase lookupNormalized(java.lang.String phrase)
public PhraseTable.Phrase lookup(PhraseTable.WordList wordList)
public java.util.List<PhraseTable.PhraseMatch> findAllMatches(java.lang.String text)
text - Input text to search over
public java.util.List<PhraseTable.PhraseMatch> findAllMatches(PhraseTable.WordList tokens)
tokens - List of tokens to search over
public java.util.List<PhraseTable.PhraseMatch> findAllMatches(java.util.List<PhraseTable.Phrase> acceptablePhrases,
java.lang.String text)
acceptablePhrases - - What phrases to look for (need to be subset of phrases already in table)text - Input text to search over
public java.util.List<PhraseTable.PhraseMatch> findAllMatches(java.util.List<PhraseTable.Phrase> acceptablePhrases,
PhraseTable.WordList tokens)
acceptablePhrases - - What phrases to look for (need to be subset of phrases already in table)tokens - List of tokens to search over
public java.util.List<PhraseTable.PhraseMatch> findAllMatches(PhraseTable.WordList tokens,
int tokenStart,
int tokenEnd,
boolean needNormalization)
public java.util.List<PhraseTable.PhraseMatch> findAllMatches(java.util.List<PhraseTable.Phrase> acceptablePhrases,
PhraseTable.WordList tokens,
int tokenStart,
int tokenEnd,
boolean needNormalization)
public java.util.List<PhraseTable.PhraseMatch> findMatches(java.lang.String text)
public java.util.List<PhraseTable.PhraseMatch> findMatches(PhraseTable.WordList tokens)
public java.util.List<PhraseTable.PhraseMatch> findMatches(PhraseTable.WordList tokens,
int tokenStart,
int tokenEnd,
boolean needNormalization)
public java.util.List<PhraseTable.PhraseMatch> findMatches(java.lang.String text,
int tokenStart,
int tokenEnd,
boolean needNormalization)
protected int checkWordListMatch(PhraseTable.Phrase phrase,
PhraseTable.WordList tokens,
int tokenStart,
int tokenEnd,
int checkStart,
boolean matchEnd)
public java.util.List<PhraseTable.PhraseMatch> findNonOverlappingPhrases(java.util.List<PhraseTable.PhraseMatch> phraseMatches)
protected java.util.List<PhraseTable.PhraseMatch> findMatches(java.util.Collection<PhraseTable.Phrase> acceptablePhrases,
PhraseTable.WordList tokens,
int tokenStart,
int tokenEnd,
boolean needNormalization,
boolean findAll,
boolean matchEnd)
protected java.util.List<PhraseTable.PhraseMatch> findMatchesNormalized(java.util.Collection<PhraseTable.Phrase> acceptablePhrases,
PhraseTable.WordList tokens,
int tokenStart,
int tokenEnd,
boolean findAll,
boolean matchEnd)
public java.util.Iterator<PhraseTable.Phrase> iterator()
public static java.lang.String toString(PhraseTable.WordList wordList)
|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||