public class CleanXmlAnnotator extends Object implements Annotator
Annotator.Requirement| Modifier and Type | Field and Description |
|---|---|
static boolean |
DEFAULT_ALLOW_FLAWS |
static String |
DEFAULT_DATE_TAGS |
static String |
DEFAULT_DOC_ANNOTATIONS_PATTERNS |
static String |
DEFAULT_DOCID_TAGS |
static String |
DEFAULT_DOCTYPE_TAGS |
static String |
DEFAULT_SECTION_ANNOTATIONS_PATTERNS |
static String |
DEFAULT_SECTION_TAGS |
static String |
DEFAULT_SENTENCE_ENDERS |
static String |
DEFAULT_SINGLE_SENTENCE_TAGS |
static String |
DEFAULT_SPEAKER_TAGS |
static String |
DEFAULT_TOKEN_ANNOTATIONS_PATTERNS |
static String |
DEFAULT_UTTERANCE_TURN_TAGS |
static String |
DEFAULT_XML_TAGS |
BINARIZED_TREES_REQUIREMENT, CLEAN_XML_REQUIREMENT, DETERMINISTIC_COREF_REQUIREMENT, GENDER_REQUIREMENT, GUTIME_REQUIREMENT, HEIDELTIME_REQUIREMENT, LEMMA_REQUIREMENT, NER_REQUIREMENT, NUMBER_REQUIREMENT, PARSE_AND_TAG, PARSE_REQUIREMENT, PARSE_TAG_BINARIZED_TREES, POS_REQUIREMENT, QUANTIFIABLE_ENTITY_NORMALIZATION_REQUIREMENT, RELATION_EXTRACTOR_REQUIREMENT, SSPLIT_REQUIREMENT, STANFORD_CLEAN_XML, STANFORD_DETERMINISTIC_COREF, STANFORD_GENDER, STANFORD_LEMMA, STANFORD_NER, STANFORD_PARSE, STANFORD_POS, STANFORD_REGEXNER, STANFORD_RELATION, STANFORD_SENTIMENT, STANFORD_SSPLIT, STANFORD_TOKENIZE, STANFORD_TRUECASE, STEM_REQUIREMENT, SUTIME_REQUIREMENT, TIME_WORDS_REQUIREMENT, TOKENIZE_AND_SSPLIT, TOKENIZE_REQUIREMENT, TOKENIZE_SSPLIT_NER, TOKENIZE_SSPLIT_PARSE, TOKENIZE_SSPLIT_PARSE_NER, TOKENIZE_SSPLIT_POS, TOKENIZE_SSPLIT_POS_LEMMA, TRUECASE_REQUIREMENT| Constructor and Description |
|---|
CleanXmlAnnotator() |
CleanXmlAnnotator(String xmlTagsToRemove,
String sentenceEndingTags,
String dateTags,
boolean allowFlawedXml) |
| Modifier and Type | Method and Description |
|---|---|
void |
annotate(Annotation annotation)
Given an Annotation, perform a task on this Annotation.
|
List<CoreLabel> |
process(Annotation annotation,
List<CoreLabel> tokens) |
List<CoreLabel> |
process(List<CoreLabel> tokens) |
Set<Annotator.Requirement> |
requirementsSatisfied()
Returns a set of requirements for which tasks this annotator can
provide.
|
Set<Annotator.Requirement> |
requires()
Returns the set of tasks which this annotator requires in order
to perform.
|
void |
setDiscourseTags(String utteranceTurnTags,
String speakerTags) |
void |
setDocAnnotationPatterns(String conf) |
void |
setDocIdTagMatcher(String docIdTags) |
void |
setDocTypeTagMatcher(String docTypeTags) |
void |
setSectionAnnotationPatterns(String conf) |
void |
setSectionTagMatcher(String sectionTags) |
void |
setSingleSentenceTagMatcher(String tags) |
void |
setSsplitDiscardTokensMatcher(String tags) |
void |
setTokenAnnotationPatterns(String conf) |
public static final String DEFAULT_XML_TAGS
public static final String DEFAULT_SENTENCE_ENDERS
public static final String DEFAULT_SINGLE_SENTENCE_TAGS
public static final String DEFAULT_DATE_TAGS
public static final String DEFAULT_DOCID_TAGS
public static final String DEFAULT_DOCTYPE_TAGS
public static final String DEFAULT_UTTERANCE_TURN_TAGS
public static final String DEFAULT_SPEAKER_TAGS
public static final String DEFAULT_DOC_ANNOTATIONS_PATTERNS
public static final String DEFAULT_TOKEN_ANNOTATIONS_PATTERNS
public static final String DEFAULT_SECTION_TAGS
public static final String DEFAULT_SECTION_ANNOTATIONS_PATTERNS
public static final boolean DEFAULT_ALLOW_FLAWS
public void setSsplitDiscardTokensMatcher(String tags)
public void setSingleSentenceTagMatcher(String tags)
public void setDocIdTagMatcher(String docIdTags)
public void setDocTypeTagMatcher(String docTypeTags)
public void setSectionTagMatcher(String sectionTags)
public void setDocAnnotationPatterns(String conf)
public void setTokenAnnotationPatterns(String conf)
public void setSectionAnnotationPatterns(String conf)
public void annotate(Annotation annotation)
Annotatorpublic List<CoreLabel> process(Annotation annotation, List<CoreLabel> tokens)
public Set<Annotator.Requirement> requires()
Annotatorpublic Set<Annotator.Requirement> requirementsSatisfied()
AnnotatorrequirementsSatisfied in interface Annotator