|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||
java.lang.Objectedu.stanford.nlp.classify.GeneralDataset<L,F>
L - The type of the labels in the DatasetF - The type of the features in the Datasetpublic abstract class GeneralDataset<L,F>
The purpose of this interface is to unify Dataset and RVFDataset.
| Field Summary | |
|---|---|
protected int[][] |
data
|
Index<F> |
featureIndex
|
Index<L> |
labelIndex
|
protected int[] |
labels
|
protected int |
size
|
| Constructor Summary | |
|---|---|
GeneralDataset()
|
|
| Method Summary | ||
|---|---|---|
abstract void |
add(Datum<L,F> d)
|
|
void |
addAll(java.lang.Iterable<? extends Datum<L,F>> data)
Adds all Datums in the given collection of data to this dataset |
|
void |
applyFeatureCountThreshold(int k)
Applies a feature count threshold to the Dataset. |
|
void |
applyFeatureMaxCountThreshold(int k)
Applies a max feature count threshold to the Dataset. |
|
void |
clear()
Resets the Dataset so that it is empty and ready to collect data. |
|
void |
clear(int numDatums)
Resets the Dataset so that it is empty and ready to collect data. |
|
Index<F> |
featureIndex()
|
|
int[][] |
getDataArray()
|
|
abstract Datum<L,F> |
getDatum(int index)
|
|
float[] |
getFeatureCounts()
Get the total count (over all data instances) of each feature |
|
int[] |
getLabelsArray()
|
|
abstract RVFDatum<L,F> |
getRVFDatum(int index)
|
|
abstract double[][] |
getValuesArray()
|
|
protected abstract void |
initialize(int numDatums)
This method takes care of resetting values of the dataset such that it is empty with an initial capacity of numDatums. |
|
java.util.Iterator<RVFDatum<L,F>> |
iterator()
|
|
Index<L> |
labelIndex()
|
|
java.util.Iterator<L> |
labelIterator()
Returns an iterator over the class labels of the Dataset |
|
java.lang.String[] |
makeSvmLabelMap()
Maps our labels to labels that are compatible with svm_light |
|
GeneralDataset<L,F> |
mapDataset(GeneralDataset<L,F> dataset)
|
|
|
mapDataset(GeneralDataset<L,F> dataset,
Index<L2> newLabelIndex,
java.util.Map<L,L2> labelMapping,
L2 defaultLabel)
|
|
static
|
mapDatum(Datum<L,F> d,
java.util.Map<L,L2> labelMapping,
L2 defaultLabel)
|
|
int |
numClasses()
|
|
int |
numFeatures()
|
|
int |
numFeatureTokens()
returns the number of feature tokens in the Dataset. |
|
int |
numFeatureTypes()
returns the number of distinct feature types in the Dataset. |
|
void |
printSVMLightFormat()
Dumps the Dataset as a training/test file for SVMLight. |
|
void |
printSVMLightFormat(java.io.PrintWriter pw)
Print SVM Light Format file. |
|
void |
randomize(int randomSeed)
Randomizes the data array in place. |
|
GeneralDataset<L,F> |
sampleDataset(int randomSeed,
double sampleFrac,
boolean sampleWithReplacement)
|
|
int |
size()
Returns the number of examples ( Datums) in the Dataset. |
|
abstract Pair<GeneralDataset<L,F>,GeneralDataset<L,F>> |
split(double p)
|
|
abstract Pair<GeneralDataset<L,F>,GeneralDataset<L,F>> |
split(int start,
int end)
|
|
abstract void |
summaryStatistics()
Print some statistics summarizing the dataset |
|
protected void |
trimData()
|
|
protected void |
trimLabels()
|
|
protected double[][] |
trimToSize(double[][] i)
|
|
protected int[] |
trimToSize(int[] i)
|
|
protected int[][] |
trimToSize(int[][] i)
|
|
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
|---|
public Index<L> labelIndex
public Index<F> featureIndex
protected int[] labels
protected int[][] data
protected int size
| Constructor Detail |
|---|
public GeneralDataset()
| Method Detail |
|---|
public Index<L> labelIndex()
public Index<F> featureIndex()
public int numFeatures()
public int numClasses()
public int[] getLabelsArray()
public int[][] getDataArray()
public abstract double[][] getValuesArray()
public void clear()
public void clear(int numDatums)
numDatums - initial capacity of datasetprotected abstract void initialize(int numDatums)
numDatums - initial capacity of datasetpublic abstract RVFDatum<L,F> getRVFDatum(int index)
public abstract Datum<L,F> getDatum(int index)
public abstract void add(Datum<L,F> d)
public float[] getFeatureCounts()
public void applyFeatureCountThreshold(int k)
public void applyFeatureMaxCountThreshold(int k)
public int numFeatureTokens()
public int numFeatureTypes()
public void addAll(java.lang.Iterable<? extends Datum<L,F>> data)
data - collection of datums you would like to add to the dataset
public abstract Pair<GeneralDataset<L,F>,GeneralDataset<L,F>> split(int start,
int end)
public abstract Pair<GeneralDataset<L,F>,GeneralDataset<L,F>> split(double p)
public int size()
Datums) in the Dataset.
protected void trimData()
protected void trimLabels()
protected int[] trimToSize(int[] i)
protected int[][] trimToSize(int[][] i)
protected double[][] trimToSize(double[][] i)
public void randomize(int randomSeed)
randomSeed -
public GeneralDataset<L,F> sampleDataset(int randomSeed,
double sampleFrac,
boolean sampleWithReplacement)
public abstract void summaryStatistics()
public java.util.Iterator<L> labelIterator()
public GeneralDataset<L,F> mapDataset(GeneralDataset<L,F> dataset)
dataset -
public static <L,L2,F> Datum<L2,F> mapDatum(Datum<L,F> d,
java.util.Map<L,L2> labelMapping,
L2 defaultLabel)
public <L2> GeneralDataset<L2,F> mapDataset(GeneralDataset<L,F> dataset,
Index<L2> newLabelIndex,
java.util.Map<L,L2> labelMapping,
L2 defaultLabel)
dataset -
public void printSVMLightFormat()
public java.lang.String[] makeSvmLabelMap()
public void printSVMLightFormat(java.io.PrintWriter pw)
public java.util.Iterator<RVFDatum<L,F>> iterator()
iterator in interface java.lang.Iterable<RVFDatum<L,F>>
|
|||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | ||||||||