public class IOBUtils
extends java.lang.Object
| Modifier and Type | Field and Description |
|---|---|
static java.lang.String |
BeginSymbol |
static java.lang.String |
ContinuationSymbol |
static java.lang.String |
NosegSymbol |
static java.lang.String |
RewriteSymbol |
static java.lang.String |
RewriteTahSymbol
Deprecated.
use RewriteSymbol instead
|
static java.lang.String |
RewriteTareefSymbol
Deprecated.
use RewriteSymbol instead
|
| Modifier and Type | Method and Description |
|---|---|
static java.lang.String |
getBoundaryCharacter() |
static java.lang.String |
IOBToString(java.util.List<CoreLabel> labeledSequence)
Convert a list of labeled characters to a String.
|
static java.lang.String |
IOBToString(java.util.List<CoreLabel> labeledSequence,
java.lang.String segmentationMarker)
Convert a list of labeled characters to a String.
|
static java.lang.String |
IOBToString(java.util.List<CoreLabel> labeledSequence,
java.lang.String prefixMarker,
java.lang.String suffixMarker)
Convert a list of labeled characters to a String.
|
static void |
labelDomain(java.util.List<CoreLabel> tokenList,
java.lang.String domain) |
static java.util.List<CoreLabel> |
StringToIOB(java.util.List<CoreLabel> tokenList,
java.lang.Character segMarker,
boolean applyRewriteRules)
Convert a String to a list of characters suitable for labeling in an IOB
segmentation model.
|
static java.util.List<CoreLabel> |
StringToIOB(java.util.List<CoreLabel> tokenList,
java.lang.Character segMarker,
boolean applyRewriteRules,
boolean stripRewrites)
Convert a String to a list of characters suitable for labeling in an IOB
segmentation model.
|
static java.util.List<CoreLabel> |
StringToIOB(java.lang.String string)
This version is for turning an unsegmented string to an IOB input, i.e.,
for processing raw text.
|
static java.util.List<CoreLabel> |
StringToIOB(java.lang.String str,
java.lang.Character segMarker) |
public static final java.lang.String BeginSymbol
public static final java.lang.String ContinuationSymbol
public static final java.lang.String NosegSymbol
public static final java.lang.String RewriteSymbol
public static final java.lang.String RewriteTahSymbol
public static final java.lang.String RewriteTareefSymbol
public static java.lang.String getBoundaryCharacter()
public static java.util.List<CoreLabel> StringToIOB(java.util.List<CoreLabel> tokenList, java.lang.Character segMarker, boolean applyRewriteRules)
tokenList - segMarker - applyRewriteRules - add rewrite labels (for training data)public static java.util.List<CoreLabel> StringToIOB(java.util.List<CoreLabel> tokenList, java.lang.Character segMarker, boolean applyRewriteRules, boolean stripRewrites)
tokenList - segMarker - applyRewriteRules - add rewrite labels (for training data)stripRewrites - revert training data to old Green & DeNero model (remove
rewrite labels but still rewrite to try to preserve raw text)public static java.util.List<CoreLabel> StringToIOB(java.lang.String string)
public static java.util.List<CoreLabel> StringToIOB(java.lang.String str, java.lang.Character segMarker)
public static java.lang.String IOBToString(java.util.List<CoreLabel> labeledSequence, java.lang.String prefixMarker, java.lang.String suffixMarker)
public static java.lang.String IOBToString(java.util.List<CoreLabel> labeledSequence, java.lang.String segmentationMarker)
public static java.lang.String IOBToString(java.util.List<CoreLabel> labeledSequence)
public static void labelDomain(java.util.List<CoreLabel> tokenList, java.lang.String domain)