|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||
java.lang.Objectedu.stanford.nlp.international.arabic.process.ArabicSegmenter
public class ArabicSegmenter
Arabic word segmentation model based on conditional random fields (CRF). This is a re-implementation (with extensions) of the model described in (Green and DeNero, 2012).
This package includes a JFlex-based orthographic normalization package that runs on the input prior to processing by the CRF-based segmentation model. The normalization options are configurable, but must be consistent for both training and test data.
| Constructor Summary | |
|---|---|
ArabicSegmenter(ArabicSegmenter other)
Copy constructor. |
|
ArabicSegmenter(java.util.Properties props)
|
|
| Method Summary | |
|---|---|
void |
finishTraining()
|
void |
initializeTraining(double numTrees)
|
void |
loadSegmenter(java.lang.String filename)
|
void |
loadSegmenter(java.lang.String filename,
java.util.Properties p)
|
static void |
main(java.lang.String[] args)
|
ThreadsafeProcessor<java.lang.String,java.lang.String> |
newInstance()
Return a new threadsafe instance. |
java.lang.String |
process(java.lang.String nextInput)
Set the input item that will be processed when a thread is allocated to this processor. |
long |
segment(java.io.BufferedReader br,
java.io.PrintWriter pwOut)
Segment all strings from an input. |
java.util.List<HasWord> |
segment(java.lang.String line)
|
java.lang.String |
segmentString(java.lang.String line)
|
void |
serializeSegmenter(java.lang.String filename)
|
void |
train()
Train a segmenter from raw text. |
void |
train(java.util.Collection<Tree> trees)
|
void |
train(java.util.List<TaggedWord> sentence)
|
void |
train(Tree tree)
|
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Constructor Detail |
|---|
public ArabicSegmenter(java.util.Properties props)
public ArabicSegmenter(ArabicSegmenter other)
other - | Method Detail |
|---|
public void initializeTraining(double numTrees)
initializeTraining in interface WordSegmenterpublic void train(java.util.Collection<Tree> trees)
train in interface WordSegmenterpublic void train(Tree tree)
train in interface WordSegmenterpublic void train(java.util.List<TaggedWord> sentence)
train in interface WordSegmenterpublic void finishTraining()
finishTraining in interface WordSegmenterpublic java.lang.String process(java.lang.String nextInput)
ThreadsafeProcessor
process in interface ThreadsafeProcessor<java.lang.String,java.lang.String>public ThreadsafeProcessor<java.lang.String,java.lang.String> newInstance()
ThreadsafeProcessor
newInstance in interface ThreadsafeProcessor<java.lang.String,java.lang.String>public java.util.List<HasWord> segment(java.lang.String line)
segment in interface WordSegmenterpublic java.lang.String segmentString(java.lang.String line)
public long segment(java.io.BufferedReader br,
java.io.PrintWriter pwOut)
br - -- input stream to segmentpwOut - -- output stream to write the segmenter text
public void train()
public void serializeSegmenter(java.lang.String filename)
public void loadSegmenter(java.lang.String filename,
java.util.Properties p)
public void loadSegmenter(java.lang.String filename)
loadSegmenter in interface WordSegmenterpublic static void main(java.lang.String[] args)
args -
|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||