public class AceToken extends Object
| Modifier and Type | Field and Description |
|---|---|
static int |
CASE_ALLCAPS |
static int |
CASE_ALLCAPSORDOTS |
static int |
CASE_ALLDIGITS |
static int |
CASE_ALLDIGITSORDOTS |
static int |
CASE_CAPINI |
static int |
CASE_INCAP |
static int |
CASE_OTHER |
static StringDictionary |
LEMMAS
Dictionary for all lemmas in the corpus
|
static StringDictionary |
OTHERS
Dictionary for all other strings in the corpus
|
static Map<Integer,ArrayList<Integer>> |
PROX_CLASSES
Map of all proximity classes
|
static StringDictionary |
WORDS
Dictionary for all words in the corpus
|
| Constructor and Description |
|---|
AceToken(String word,
String lemma,
String pos,
String chunk,
String nerc,
String start,
String end,
int sentence)
Constructs an AceToken from a tokenized line generated by Tokey
|
| Modifier and Type | Method and Description |
|---|---|
int |
adjustPhrasePositions(int offsetToSubtract,
String word)
Recomputes start/end phrase positions by removing SGML tag strings This is
required because ACE annotations skip over SGML tags when computing
positions in stream, hence annotations do not match with our preprocessing
positions, which count everything
|
String |
display()
Pretty display
|
static boolean |
exists(Map<String,String> dict,
String elem)
Verifies if the given string exists in the given dictionary
|
int |
getByteEnd() |
Span |
getByteOffset() |
int |
getByteStart() |
int |
getCase() |
int |
getChunk() |
int |
getLemma() |
String |
getLiteral() |
String |
getMassiBbn() |
String |
getMassiClass() |
String |
getMassiWnss() |
int |
getNerc() |
int |
getPos() |
int |
getRawByteEnd() |
Span |
getRawByteOffset() |
int |
getRawByteStart() |
int |
getSentence() |
int[] |
getSuffixes() |
int |
getWord() |
static boolean |
isFirstName(String lower) |
static boolean |
isLastName(String lower) |
static boolean |
isLocation(String lower) |
static boolean |
isSgml(String s) |
static String |
isTriggerWord(String lower) |
static void |
loadGazetteers(String dataPath) |
static void |
loadProximityClasses(String proxFileName)
Loads all proximity classes from the hard disk The WORDS map must be
created before!
|
static String |
removeSpaces(String s) |
void |
setMassiBbn(String i) |
void |
setMassiClass(String i) |
void |
setMassiWnss(String i) |
String |
toString() |
public static final StringDictionary WORDS
public static final StringDictionary LEMMAS
public static final StringDictionary OTHERS
public static final Map<Integer,ArrayList<Integer>> PROX_CLASSES
public static final int CASE_OTHER
public static final int CASE_ALLCAPS
public static final int CASE_ALLCAPSORDOTS
public static final int CASE_CAPINI
public static final int CASE_INCAP
public static final int CASE_ALLDIGITS
public static final int CASE_ALLDIGITSORDOTS
public static void loadGazetteers(String dataPath) throws FileNotFoundException, IOException
FileNotFoundExceptionIOExceptionpublic static boolean isLocation(String lower)
public static boolean isFirstName(String lower)
public static boolean isLastName(String lower)
public static boolean exists(Map<String,String> dict, String elem)
public static void loadProximityClasses(String proxFileName) throws IOException
IOExceptionpublic String getLiteral()
public int getWord()
public int getCase()
public int[] getSuffixes()
public int getLemma()
public int getPos()
public int getChunk()
public int getNerc()
public Span getByteOffset()
public int getByteStart()
public int getByteEnd()
public int getSentence()
public Span getRawByteOffset()
public int getRawByteStart()
public int getRawByteEnd()
public void setMassiClass(String i)
public String getMassiClass()
public void setMassiBbn(String i)
public String getMassiBbn()
public void setMassiWnss(String i)
public String getMassiWnss()
public static boolean isSgml(String s)
public int adjustPhrasePositions(int offsetToSubtract,
String word)
public String display()