|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||
java.lang.Objectedu.stanford.nlp.ling.tokensregex.MultiWordStringMatcher
public class MultiWordStringMatcher
Finds multi word strings in a piece of text
| Nested Class Summary | |
|---|---|
static class |
MultiWordStringMatcher.LongestStringComparator
|
static class |
MultiWordStringMatcher.MatchType
if matchType is EXCT: match exact string
if matchType is EXCTWS: match exact string, except whitespace can match multiple whitespaces
if matchType is LWS: match case insensitive string, except whitespace can match multiple whitespaces
if matchType is LNRM: disregards punctuation, does case insensitive match
if matchType is REGEX: interprets string as regex already |
| Field Summary | |
|---|---|
static java.util.Comparator<java.lang.String> |
LONGEST_STRING_COMPARATOR
|
| Constructor Summary | |
|---|---|
MultiWordStringMatcher(MultiWordStringMatcher.MatchType matchType)
|
|
MultiWordStringMatcher(java.lang.String matchTypeStr)
|
|
| Method Summary | |
|---|---|
java.util.regex.Pattern |
createPattern(java.lang.String targetString)
|
static java.util.List<IntPair> |
findOffsets(java.util.regex.Pattern pattern,
java.lang.String text)
Finds pattern in text and returns offsets |
static java.util.List<IntPair> |
findOffsets(java.util.regex.Pattern pattern,
java.lang.String text,
int start,
int end)
Finds pattern in text span from character start to end (exclusive) and returns offsets |
java.util.List<IntPair> |
findTargetStringOffsets(java.lang.String text,
java.lang.String targetString)
Finds target string in text and returns offsets (matches based on set matchType) |
java.util.List<IntPair> |
findTargetStringOffsets(java.lang.String text,
java.lang.String targetString,
int start,
int end)
Finds target string in text span from character start to end (exclusive) and returns offsets (matches based on set matchType) |
protected java.util.List<IntPair> |
findTargetStringOffsetsExct(java.lang.String text,
java.lang.String targetString,
int start,
int end)
Finds target string in text span from character start to end (exclusive) and returns offsets (does EXCT string matching) |
protected java.util.List<IntPair> |
findTargetStringOffsetsRegex(java.lang.String text,
java.lang.String targetString,
int start,
int end)
Finds target string in text and returns offsets using regular expressions (matches based on set matchType) |
java.lang.String |
getExctWsRegex(java.lang.String targetString)
|
java.lang.String |
getLnrmRegex(java.lang.String targetString)
|
java.lang.String |
getLWsRegex(java.lang.String targetString)
|
MultiWordStringMatcher.MatchType |
getMatchType()
|
java.util.regex.Pattern |
getPattern(java.lang.String targetString)
|
java.util.regex.Pattern |
getPattern(java.lang.String[] targetStrings)
|
java.lang.String |
getRegex(java.lang.String targetString)
|
java.lang.String |
getRegex(java.lang.String[] targetStrings)
|
protected java.lang.String |
markTargetString(java.lang.String text,
java.lang.String targetString,
java.lang.String beginMark,
java.lang.String endMark,
boolean markOnlyIfSpace)
|
java.lang.String |
putSpacesAroundTargetString(java.lang.String text,
java.lang.String targetString)
Finds target string in text and put spaces around it so it will be matched with we match against tokens |
void |
setMatchType(MultiWordStringMatcher.MatchType matchType)
|
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
|---|
public static final java.util.Comparator<java.lang.String> LONGEST_STRING_COMPARATOR
| Constructor Detail |
|---|
public MultiWordStringMatcher(MultiWordStringMatcher.MatchType matchType)
public MultiWordStringMatcher(java.lang.String matchTypeStr)
| Method Detail |
|---|
public MultiWordStringMatcher.MatchType getMatchType()
public void setMatchType(MultiWordStringMatcher.MatchType matchType)
public java.lang.String putSpacesAroundTargetString(java.lang.String text,
java.lang.String targetString)
text - - String in which to look for the target stringtargetString - - Target string to look for
protected java.lang.String markTargetString(java.lang.String text,
java.lang.String targetString,
java.lang.String beginMark,
java.lang.String endMark,
boolean markOnlyIfSpace)
protected java.util.List<IntPair> findTargetStringOffsetsExct(java.lang.String text,
java.lang.String targetString,
int start,
int end)
text - - String in which to look for the target stringtargetString - - Target string to look forstart - - position to start searchend - - position to end search
public java.util.regex.Pattern getPattern(java.lang.String[] targetStrings)
public java.lang.String getRegex(java.lang.String[] targetStrings)
public java.util.regex.Pattern getPattern(java.lang.String targetString)
public java.util.regex.Pattern createPattern(java.lang.String targetString)
public java.lang.String getRegex(java.lang.String targetString)
public java.lang.String getExctWsRegex(java.lang.String targetString)
public java.lang.String getLWsRegex(java.lang.String targetString)
public java.lang.String getLnrmRegex(java.lang.String targetString)
protected java.util.List<IntPair> findTargetStringOffsetsRegex(java.lang.String text,
java.lang.String targetString,
int start,
int end)
text - - String in which to find target stringtargetString - - Target string to look forstart - - position to start searchend - - position to end search
public static java.util.List<IntPair> findOffsets(java.util.regex.Pattern pattern,
java.lang.String text)
pattern - - pattern to look fortext - - String in which to look for the pattern
public static java.util.List<IntPair> findOffsets(java.util.regex.Pattern pattern,
java.lang.String text,
int start,
int end)
pattern - - pattern to look fortext - - String in which to look for the patternstart - - position to start searchend - - position to end search
public java.util.List<IntPair> findTargetStringOffsets(java.lang.String text,
java.lang.String targetString)
text - - String in which to look for the target stringtargetString - - Target string to look for
public java.util.List<IntPair> findTargetStringOffsets(java.lang.String text,
java.lang.String targetString,
int start,
int end)
text - - String in which to look for the target stringtargetString - - Target string to look forstart - - position to start searchend - - position to end search
|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||