edu.stanford.nlp.ie.machinereading.domains.ace.reader
Class AceToken

java.lang.Object
  extended by edu.stanford.nlp.ie.machinereading.domains.ace.reader.AceToken

public class AceToken
extends java.lang.Object


Field Summary
static int CASE_ALLCAPS
           
static int CASE_ALLCAPSORDOTS
           
static int CASE_ALLDIGITS
           
static int CASE_ALLDIGITSORDOTS
           
static int CASE_CAPINI
           
static int CASE_INCAP
           
static int CASE_OTHER
           
static StringDictionary LEMMAS
          Dictionary for all lemmas in the corpus
static StringDictionary OTHERS
          Dictionary for all other strings in the corpus
static java.util.HashMap<java.lang.Integer,java.util.ArrayList<java.lang.Integer>> PROX_CLASSES
          Map of all proximity classes
static StringDictionary WORDS
          Dictionary for all words in the corpus
 
Constructor Summary
AceToken(java.lang.String word, java.lang.String lemma, java.lang.String pos, java.lang.String chunk, java.lang.String nerc, java.lang.String start, java.lang.String end, int sentence)
          Constructs an AceToken from a tokenized line generated by Tokey
 
Method Summary
 int adjustPhrasePositions(int offsetToSubtract, java.lang.String word)
          Recomputes start/end phrase positions by removing SGML tag strings This is required because ACE annotations skip over SGML tags when computing positions in stream, hence annotations do not match with our preprocessing positions, which count everything
 java.lang.String display()
          Pretty display
static boolean exists(java.util.HashMap<java.lang.String,java.lang.String> dict, java.lang.String elem)
          Verifies if the given string exists in the given dictionary
 int getByteEnd()
           
 Span getByteOffset()
           
 int getByteStart()
           
 int getCase()
           
 int getChunk()
           
 int getLemma()
           
 java.lang.String getLiteral()
           
 java.lang.String getMassiBbn()
           
 java.lang.String getMassiClass()
           
 java.lang.String getMassiWnss()
           
 int getNerc()
           
 int getPos()
           
 int getRawByteEnd()
           
 Span getRawByteOffset()
           
 int getRawByteStart()
           
 int getSentence()
           
 int[] getSuffixes()
           
 int getWord()
           
static boolean isFirstName(java.lang.String lower)
           
static boolean isLastName(java.lang.String lower)
           
static boolean isLocation(java.lang.String lower)
           
static boolean isSgml(java.lang.String s)
           
static java.lang.String isTriggerWord(java.lang.String lower)
           
static void loadGazetteers(java.lang.String dataPath)
           
static void loadProximityClasses(java.lang.String proxFileName)
          Loads all proximity classes from the hard disk The WORDS map must be created before!
static java.lang.String removeSpaces(java.lang.String s)
           
 void setMassiBbn(java.lang.String i)
           
 void setMassiClass(java.lang.String i)
           
 void setMassiWnss(java.lang.String i)
           
 java.lang.String toString()
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
 

Field Detail

WORDS

public static StringDictionary WORDS
Dictionary for all words in the corpus


LEMMAS

public static StringDictionary LEMMAS
Dictionary for all lemmas in the corpus


OTHERS

public static StringDictionary OTHERS
Dictionary for all other strings in the corpus


PROX_CLASSES

public static java.util.HashMap<java.lang.Integer,java.util.ArrayList<java.lang.Integer>> PROX_CLASSES
Map of all proximity classes


CASE_OTHER

public static final int CASE_OTHER
See Also:
Constant Field Values

CASE_ALLCAPS

public static final int CASE_ALLCAPS
See Also:
Constant Field Values

CASE_ALLCAPSORDOTS

public static final int CASE_ALLCAPSORDOTS
See Also:
Constant Field Values

CASE_CAPINI

public static final int CASE_CAPINI
See Also:
Constant Field Values

CASE_INCAP

public static final int CASE_INCAP
See Also:
Constant Field Values

CASE_ALLDIGITS

public static final int CASE_ALLDIGITS
See Also:
Constant Field Values

CASE_ALLDIGITSORDOTS

public static final int CASE_ALLDIGITSORDOTS
See Also:
Constant Field Values
Constructor Detail

AceToken

public AceToken(java.lang.String word,
                java.lang.String lemma,
                java.lang.String pos,
                java.lang.String chunk,
                java.lang.String nerc,
                java.lang.String start,
                java.lang.String end,
                int sentence)
Constructs an AceToken from a tokenized line generated by Tokey

Method Detail

loadGazetteers

public static void loadGazetteers(java.lang.String dataPath)
                           throws java.io.FileNotFoundException,
                                  java.io.IOException
Throws:
java.io.FileNotFoundException
java.io.IOException

isLocation

public static boolean isLocation(java.lang.String lower)

isFirstName

public static boolean isFirstName(java.lang.String lower)

isLastName

public static boolean isLastName(java.lang.String lower)

isTriggerWord

public static java.lang.String isTriggerWord(java.lang.String lower)

exists

public static boolean exists(java.util.HashMap<java.lang.String,java.lang.String> dict,
                             java.lang.String elem)
Verifies if the given string exists in the given dictionary


loadProximityClasses

public static void loadProximityClasses(java.lang.String proxFileName)
                                 throws java.io.IOException
Loads all proximity classes from the hard disk The WORDS map must be created before!

Throws:
java.io.IOException

getLiteral

public java.lang.String getLiteral()

getWord

public int getWord()

getCase

public int getCase()

getSuffixes

public int[] getSuffixes()

getLemma

public int getLemma()

getPos

public int getPos()

getChunk

public int getChunk()

getNerc

public int getNerc()

getByteOffset

public Span getByteOffset()

getByteStart

public int getByteStart()

getByteEnd

public int getByteEnd()

getSentence

public int getSentence()

getRawByteOffset

public Span getRawByteOffset()

getRawByteStart

public int getRawByteStart()

getRawByteEnd

public int getRawByteEnd()

setMassiClass

public void setMassiClass(java.lang.String i)

getMassiClass

public java.lang.String getMassiClass()

setMassiBbn

public void setMassiBbn(java.lang.String i)

getMassiBbn

public java.lang.String getMassiBbn()

setMassiWnss

public void setMassiWnss(java.lang.String i)

getMassiWnss

public java.lang.String getMassiWnss()

isSgml

public static boolean isSgml(java.lang.String s)

removeSpaces

public static java.lang.String removeSpaces(java.lang.String s)

adjustPhrasePositions

public int adjustPhrasePositions(int offsetToSubtract,
                                 java.lang.String word)
Recomputes start/end phrase positions by removing SGML tag strings This is required because ACE annotations skip over SGML tags when computing positions in stream, hence annotations do not match with our preprocessing positions, which count everything


display

public java.lang.String display()
Pretty display


toString

public java.lang.String toString()
Overrides:
toString in class java.lang.Object


Stanford NLP Group