/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.classify;

import edu.stanford.nlp.classify.GeneralDataset;
import edu.stanford.nlp.classify.RVFDataset;
import edu.stanford.nlp.classify.WeightedDataset;
import edu.stanford.nlp.ling.BasicDatum;
import edu.stanford.nlp.ling.Datum;
import edu.stanford.nlp.ling.RVFDatum;
import edu.stanford.nlp.objectbank.ObjectBank;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.stats.TwoDimensionalCounter;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.HashIndex;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.ScoredComparator;
import edu.stanford.nlp.util.ScoredObject;
import java.io.File;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Dataset<L, F>
extends GeneralDataset<L, F> {
    private static final long serialVersionUID = -3883164942879961091L;
    private static int line1 = 0;

    public Dataset() {
        this(10);
    }

    public Dataset(int numDatums) {
        this.initialize(numDatums);
    }

    public Dataset(int numDatums, Index<F> featureIndex, Index<L> labelIndex) {
        this.initialize(numDatums);
        this.featureIndex = featureIndex;
        this.labelIndex = labelIndex;
    }

    public Dataset(Index<F> featureIndex, Index<L> labelIndex) {
        this(10, featureIndex, labelIndex);
    }

    public Dataset(Index<L> labelIndex, int[] labels, Index<F> featureIndex, int[][] data) {
        this(labelIndex, labels, featureIndex, data, data.length);
    }

    public Dataset(Index<L> labelIndex, int[] labels, Index<F> featureIndex, int[][] data, int size) {
        this.labelIndex = labelIndex;
        this.labels = labels;
        this.featureIndex = featureIndex;
        this.data = data;
        this.size = size;
    }

    @Override
    public Pair<GeneralDataset<L, F>, GeneralDataset<L, F>> split(double percentDev) {
        return this.split(0, (int)(percentDev * (double)this.size()));
    }

    @Override
    public Pair<GeneralDataset<L, F>, GeneralDataset<L, F>> split(int start, int end) {
        int devSize = end - start;
        int trainSize = this.size() - devSize;
        int[][] devData = new int[devSize][];
        int[] devLabels = new int[devSize];
        int[][] trainData = new int[trainSize][];
        int[] trainLabels = new int[trainSize];
        System.arraycopy(this.data, start, devData, 0, devSize);
        System.arraycopy(this.labels, start, devLabels, 0, devSize);
        System.arraycopy(this.data, 0, trainData, 0, start);
        System.arraycopy(this.data, end, trainData, start, this.size() - end);
        System.arraycopy(this.labels, 0, trainLabels, 0, start);
        System.arraycopy(this.labels, end, trainLabels, start, this.size() - end);
        if (this instanceof WeightedDataset) {
            float[] trainWeights = new float[trainSize];
            float[] devWeights = new float[devSize];
            WeightedDataset w = (WeightedDataset)this;
            System.arraycopy(w.weights, start, devWeights, 0, devSize);
            System.arraycopy(w.weights, 0, trainWeights, 0, start);
            System.arraycopy(w.weights, end, trainWeights, start, this.size() - end);
            WeightedDataset dev = new WeightedDataset(this.labelIndex, devLabels, this.featureIndex, devData, devSize, devWeights);
            WeightedDataset train = new WeightedDataset(this.labelIndex, trainLabels, this.featureIndex, trainData, trainSize, trainWeights);
            return new Pair<GeneralDataset<L, F>, GeneralDataset<L, F>>(train, dev);
        }
        Dataset<L, F> dev = new Dataset<L, F>(this.labelIndex, devLabels, this.featureIndex, devData, devSize);
        Dataset<L, F> train = new Dataset<L, F>(this.labelIndex, trainLabels, this.featureIndex, trainData, trainSize);
        return new Pair<GeneralDataset<L, F>, GeneralDataset<L, F>>(train, dev);
    }

    public Dataset<L, F> getRandomSubDataset(double p, int seed) {
        int newSize = (int)(p * (double)this.size());
        Set<Integer> indicesToKeep = Generics.newHashSet();
        Random r = new Random(seed);
        int s = this.size();
        while (indicesToKeep.size() < newSize) {
            indicesToKeep.add(r.nextInt(s));
        }
        int[][] newData = new int[newSize][];
        int[] newLabels = new int[newSize];
        int i = 0;
        Iterator i$ = indicesToKeep.iterator();
        while (i$.hasNext()) {
            int j = (Integer)i$.next();
            newData[i] = this.data[j];
            newLabels[i] = this.labels[j];
            ++i;
        }
        return new Dataset<L, F>(this.labelIndex, newLabels, this.featureIndex, newData);
    }

    @Override
    public double[][] getValuesArray() {
        return null;
    }

    public static Dataset<String, String> readSVMLightFormat(String filename) {
        return Dataset.readSVMLightFormat(filename, new HashIndex<String>(), new HashIndex<String>());
    }

    public static Dataset<String, String> readSVMLightFormat(String filename, List<String> lines) {
        return Dataset.readSVMLightFormat(filename, new HashIndex<String>(), new HashIndex<String>(), lines);
    }

    public static Dataset<String, String> readSVMLightFormat(String filename, Index<String> featureIndex, Index<String> labelIndex) {
        return Dataset.readSVMLightFormat(filename, featureIndex, labelIndex, null);
    }

    public static Dataset<String, String> readSVMLightFormat(String filename, Index<String> featureIndex, Index<String> labelIndex, List<String> lines) {
        Dataset<String, String> dataset;
        try {
            dataset = new Dataset<String, String>(10, featureIndex, labelIndex);
            for (String line : ObjectBank.getLineIterator(new File(filename))) {
                if (lines != null) {
                    lines.add(line);
                }
                dataset.add(Dataset.svmLightLineToDatum(line));
            }
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
        return dataset;
    }

    public static Datum<String, String> svmLightLineToDatum(String l) {
        ++line1;
        l = l.replaceAll("#.*", "");
        String[] line = l.split("\\s+");
        ArrayList<String> features = new ArrayList<String>();
        for (int i = 1; i < line.length; ++i) {
            String[] f = line[i].split(":");
            if (f.length != 2) {
                System.err.println("Dataset error: line " + line1);
            }
            int val = (int)Double.parseDouble(f[1]);
            for (int j = 0; j < val; ++j) {
                features.add(f[0]);
            }
        }
        features.add(String.valueOf(Integer.MAX_VALUE));
        BasicDatum<String, String> d = new BasicDatum<String, String>(features, line[0]);
        return d;
    }

    public Counter<F> getFeatureCounter() {
        ClassicCounter featureCounts = new ClassicCounter();
        for (int i = 0; i < this.size(); ++i) {
            BasicDatum datum = (BasicDatum)this.getDatum(i);
            Set featureSet = Generics.newHashSet(datum.asFeatures());
            for (Object key : featureSet) {
                featureCounts.incrementCount(key, 1.0);
            }
        }
        return featureCounts;
    }

    public RVFDatum<L, F> getL1NormalizedTFIDFDatum(Datum<L, F> datum, Counter<F> featureDocCounts) {
        ClassicCounter<Object> tfidfFeatures = new ClassicCounter<Object>();
        for (Object feature : datum.asFeatures()) {
            if (!featureDocCounts.containsKey(feature)) continue;
            tfidfFeatures.incrementCount(feature, 1.0);
        }
        double l1norm = 0.0;
        for (Object feature : tfidfFeatures.keySet()) {
            double idf = Math.log((double)(this.size() + 1) / (featureDocCounts.getCount(feature) + 0.5));
            double tf = tfidfFeatures.getCount(feature);
            tfidfFeatures.setCount(feature, tf * idf);
            l1norm += tf * idf;
        }
        for (Object feature : tfidfFeatures.keySet()) {
            double tfidf = tfidfFeatures.getCount(feature);
            tfidfFeatures.setCount(feature, tfidf / l1norm);
        }
        RVFDatum rvfDatum = new RVFDatum(tfidfFeatures, datum.label());
        return rvfDatum;
    }

    public RVFDataset<L, F> getL1NormalizedTFIDFDataset() {
        RVFDataset<L, F> rvfDataset = new RVFDataset<L, F>(this.size(), this.featureIndex, this.labelIndex);
        Counter<F> featureDocCounts = this.getFeatureCounter();
        for (int i = 0; i < this.size(); ++i) {
            Datum<L, F> datum = this.getDatum(i);
            RVFDatum<L, F> rvfDatum = this.getL1NormalizedTFIDFDatum(datum, featureDocCounts);
            rvfDataset.add(rvfDatum);
        }
        return rvfDataset;
    }

    @Override
    public void add(Datum<L, F> d) {
        this.add(d.asFeatures(), d.label());
    }

    public void add(Collection<F> features, L label) {
        this.add(features, label, true);
    }

    public void add(Collection<F> features, L label, boolean addNewFeatures) {
        this.ensureSize();
        this.addLabel(label);
        this.addFeatures(features, addNewFeatures);
        ++this.size;
    }

    public void add(int[] features, int label) {
        this.ensureSize();
        this.addLabelIndex(label);
        this.addFeatureIndices(features);
        ++this.size;
    }

    protected void ensureSize() {
        if (this.labels.length == this.size) {
            int[] newLabels = new int[this.size * 2];
            System.arraycopy(this.labels, 0, newLabels, 0, this.size);
            this.labels = newLabels;
            int[][] newData = new int[this.size * 2][];
            System.arraycopy(this.data, 0, newData, 0, this.size);
            this.data = newData;
        }
    }

    protected void addLabel(L label) {
        this.labelIndex.add(label);
        this.labels[this.size] = this.labelIndex.indexOf(label);
    }

    protected void addLabelIndex(int label) {
        this.labels[this.size] = label;
    }

    protected void addFeatures(Collection<F> features) {
        this.addFeatures(features, true);
    }

    protected void addFeatures(Collection<F> features, boolean addNewFeatures) {
        int[] intFeatures = new int[features.size()];
        int j = 0;
        for (F feature : features) {
            int index;
            if (addNewFeatures) {
                this.featureIndex.add(feature);
            }
            if ((index = this.featureIndex.indexOf(feature)) < 0) continue;
            intFeatures[j] = this.featureIndex.indexOf(feature);
            ++j;
        }
        this.data[this.size] = new int[j];
        System.arraycopy(intFeatures, 0, this.data[this.size], 0, j);
    }

    protected void addFeatureIndices(int[] features) {
        this.data[this.size] = features;
    }

    @Override
    protected final void initialize(int numDatums) {
        this.labelIndex = new HashIndex();
        this.featureIndex = new HashIndex();
        this.labels = new int[numDatums];
        this.data = new int[numDatums][];
        this.size = 0;
    }

    @Override
    public Datum<L, F> getDatum(int index) {
        return new BasicDatum(this.featureIndex.objects(this.data[index]), this.labelIndex.get(this.labels[index]));
    }

    @Override
    public RVFDatum<L, F> getRVFDatum(int index) {
        ClassicCounter c = new ClassicCounter();
        for (Object key : this.featureIndex.objects(this.data[index])) {
            c.incrementCount(key);
        }
        return new RVFDatum(c, this.labelIndex.get(this.labels[index]));
    }

    @Override
    public void summaryStatistics() {
        System.err.println(this.toSummaryStatistics());
    }

    public String toSummaryStatistics() {
        StringBuilder sb = new StringBuilder();
        sb.append("numDatums: ").append(this.size).append('\n');
        sb.append("numLabels: ").append(this.labelIndex.size()).append(" [");
        Iterator iter = this.labelIndex.iterator();
        while (iter.hasNext()) {
            sb.append(iter.next());
            if (!iter.hasNext()) continue;
            sb.append(", ");
        }
        sb.append("]\n");
        sb.append("numFeatures (Phi(X) types): ").append(this.featureIndex.size()).append('\n');
        return sb.toString();
    }

    public void applyFeatureCountThreshold(List<Pair<Pattern, Integer>> thresholds) {
        int i;
        float[] counts = this.getFeatureCounts();
        HashIndex<Object> newFeatureIndex = new HashIndex<Object>();
        block0: for (Object f : this.featureIndex) {
            for (Pair<Pattern, Integer> threshold : thresholds) {
                Pattern p = threshold.first();
                Matcher m = p.matcher(f.toString());
                if (!m.matches()) continue;
                if (!(counts[this.featureIndex.indexOf(f)] >= (float)((Integer)threshold.second).intValue())) continue block0;
                newFeatureIndex.add(f);
                continue block0;
            }
            newFeatureIndex.add(f);
        }
        counts = null;
        int[] featMap = new int[this.featureIndex.size()];
        for (i = 0; i < featMap.length; ++i) {
            featMap[i] = newFeatureIndex.indexOf(this.featureIndex.get(i));
        }
        this.featureIndex = null;
        for (i = 0; i < this.size; ++i) {
            int j;
            ArrayList<Integer> featList = new ArrayList<Integer>(this.data[i].length);
            for (j = 0; j < this.data[i].length; ++j) {
                if (featMap[this.data[i][j]] < 0) continue;
                featList.add(featMap[this.data[i][j]]);
            }
            this.data[i] = new int[featList.size()];
            for (j = 0; j < this.data[i].length; ++j) {
                this.data[i][j] = (Integer)featList.get(j);
            }
        }
        this.featureIndex = newFeatureIndex;
    }

    public void printFullFeatureMatrix(PrintWriter pw) {
        int i;
        String sep = "\t";
        for (i = 0; i < this.featureIndex.size(); ++i) {
            pw.print(sep + this.featureIndex.get(i));
        }
        pw.println();
        for (i = 0; i < this.labels.length; ++i) {
            int j;
            pw.print(this.labelIndex.get(i));
            Set<Integer> feats = Generics.newHashSet();
            for (j = 0; j < this.data[i].length; ++j) {
                int feature = this.data[i][j];
                feats.add(feature);
            }
            for (j = 0; j < this.featureIndex.size(); ++j) {
                if (feats.contains(j)) {
                    pw.print(sep + '1');
                    continue;
                }
                pw.print(sep + '0');
            }
        }
    }

    @Override
    public void printSparseFeatureMatrix() {
        this.printSparseFeatureMatrix(new PrintWriter(System.out, true));
    }

    @Override
    public void printSparseFeatureMatrix(PrintWriter pw) {
        String sep = "\t";
        for (int i = 0; i < this.size; ++i) {
            int[] datum;
            pw.print(this.labelIndex.get(this.labels[i]));
            for (int j : datum = this.data[i]) {
                pw.print(sep + this.featureIndex.get(j));
            }
            pw.println();
        }
    }

    public void changeLabelIndex(Index<L> newLabelIndex) {
        this.labels = this.trimToSize(this.labels);
        for (int i = 0; i < this.labels.length; ++i) {
            this.labels[i] = newLabelIndex.indexOf(this.labelIndex.get(this.labels[i]));
        }
        this.labelIndex = newLabelIndex;
    }

    public void changeFeatureIndex(Index<F> newFeatureIndex) {
        this.data = this.trimToSize(this.data);
        this.labels = this.trimToSize(this.labels);
        int[][] newData = new int[this.data.length][];
        for (int i = 0; i < this.data.length; ++i) {
            int[] newD = new int[this.data[i].length];
            int k = 0;
            for (int j = 0; j < this.data[i].length; ++j) {
                int newIndex = newFeatureIndex.indexOf(this.featureIndex.get(this.data[i][j]));
                if (newIndex < 0) continue;
                newD[k++] = newIndex;
            }
            newData[i] = new int[k];
            System.arraycopy(newD, 0, newData[i], 0, k);
        }
        this.data = newData;
        this.featureIndex = newFeatureIndex;
    }

    public void selectFeaturesBinaryInformationGain(int numFeatures) {
        double[] scores = this.getInformationGains();
        this.selectFeatures(numFeatures, scores);
    }

    public void selectFeatures(int numFeatures, double[] scores) {
        int i;
        ArrayList scoredFeatures = new ArrayList();
        for (int i2 = 0; i2 < scores.length; ++i2) {
            scoredFeatures.add(new ScoredObject(this.featureIndex.get(i2), scores[i2]));
        }
        Collections.sort(scoredFeatures, ScoredComparator.DESCENDING_COMPARATOR);
        HashIndex<Object> newFeatureIndex = new HashIndex<Object>();
        for (i = 0; i < scoredFeatures.size() && i < numFeatures; ++i) {
            newFeatureIndex.add(((ScoredObject)scoredFeatures.get(i)).object());
        }
        for (i = 0; i < this.size; ++i) {
            int[] newData = new int[this.data[i].length];
            int curIndex = 0;
            for (int j = 0; j < this.data[i].length; ++j) {
                int index = newFeatureIndex.indexOf(this.featureIndex.get(this.data[i][j]));
                if (index == -1) continue;
                newData[curIndex++] = index;
            }
            int[] newDataTrimmed = new int[curIndex];
            System.arraycopy(newData, 0, newDataTrimmed, 0, curIndex);
            this.data[i] = newDataTrimmed;
        }
        this.featureIndex = newFeatureIndex;
    }

    public double[] getInformationGains() {
        this.labels = this.trimToSize(this.labels);
        ClassicCounter featureCounter = new ClassicCounter();
        ClassicCounter labelCounter = new ClassicCounter();
        TwoDimensionalCounter condCounter = new TwoDimensionalCounter();
        for (int i = 0; i < this.labels.length; ++i) {
            int j;
            labelCounter.incrementCount(this.labelIndex.get(this.labels[i]));
            boolean[] doc = new boolean[this.featureIndex.size()];
            for (j = 0; j < this.data[i].length; ++j) {
                doc[this.data[i][j]] = true;
            }
            for (j = 0; j < doc.length; ++j) {
                if (!doc[j]) continue;
                featureCounter.incrementCount(this.featureIndex.get(j));
                condCounter.incrementCount(this.featureIndex.get(j), this.labelIndex.get(this.labels[i]), 1.0);
            }
        }
        double entropy = 0.0;
        for (int i = 0; i < this.labelIndex.size(); ++i) {
            double labelCount = labelCounter.getCount(this.labelIndex.get(i));
            double p = labelCount / (double)this.size();
            entropy -= p * (Math.log(p) / Math.log(2.0));
        }
        double[] ig = new double[this.featureIndex.size()];
        Arrays.fill(ig, entropy);
        for (int i = 0; i < this.featureIndex.size(); ++i) {
            Object feature = this.featureIndex.get(i);
            double featureCount = featureCounter.getCount(feature);
            double notFeatureCount = (double)this.size() - featureCount;
            double pFeature = featureCount / (double)this.size();
            double pNotFeature = 1.0 - pFeature;
            if (featureCount == 0.0) {
                ig[i] = 0.0;
                continue;
            }
            if (notFeatureCount == 0.0) {
                ig[i] = 0.0;
                continue;
            }
            double sumFeature = 0.0;
            double sumNotFeature = 0.0;
            for (int j = 0; j < this.labelIndex.size(); ++j) {
                Object label = this.labelIndex.get(j);
                double featureLabelCount = condCounter.getCount(feature, label);
                double notFeatureLabelCount = (double)this.size() - featureLabelCount;
                double p = featureLabelCount / featureCount;
                double pNot = notFeatureLabelCount / notFeatureCount;
                if (featureLabelCount != 0.0) {
                    sumFeature += p * (Math.log(p) / Math.log(2.0));
                }
                if (notFeatureLabelCount == 0.0) continue;
                sumNotFeature += pNot * (Math.log(pNot) / Math.log(2.0));
            }
            int n = i;
            ig[n] = ig[n] + (pFeature * sumFeature + pNotFeature * sumNotFeature);
        }
        return ig;
    }

    public void updateLabels(int[] labels) {
        if (labels.length != this.size()) {
            throw new IllegalArgumentException("size of labels array does not match dataset size");
        }
        this.labels = labels;
    }

    public String toString() {
        return "Dataset of size " + this.size;
    }

    public String toSummaryString() {
        StringWriter sw = new StringWriter();
        PrintWriter pw = new PrintWriter(sw);
        pw.println("Number of data points: " + this.size());
        pw.println("Number of active feature tokens: " + this.numFeatureTokens());
        pw.println("Number of active feature types:" + this.numFeatureTypes());
        return pw.toString();
    }

    public static void printSVMLightFormat(PrintWriter pw, ClassicCounter<Integer> c, int classNo) {
        Object[] features = c.keySet().toArray(new Integer[c.keySet().size()]);
        Arrays.sort(features);
        StringBuilder sb = new StringBuilder();
        sb.append(classNo);
        sb.append(' ');
        Object[] arr$ = features;
        int len$ = arr$.length;
        for (int i$ = 0; i$ < len$; ++i$) {
            int f = (Integer)arr$[i$];
            sb.append(f + 1).append(':').append(c.getCount(f)).append(' ');
        }
        pw.println(sb.toString());
    }
}

