/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.classify;

import edu.stanford.nlp.classify.Classifier;
import edu.stanford.nlp.classify.ClassifierFactory;
import edu.stanford.nlp.classify.Dataset;
import edu.stanford.nlp.classify.GeneralDataset;
import edu.stanford.nlp.classify.LinearClassifier;
import edu.stanford.nlp.classify.LinearClassifierFactory;
import edu.stanford.nlp.classify.LogPrior;
import edu.stanford.nlp.classify.LogisticClassifierFactory;
import edu.stanford.nlp.classify.NBLinearClassifierFactory;
import edu.stanford.nlp.classify.RVFClassifier;
import edu.stanford.nlp.classify.RVFDataset;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.BasicDatum;
import edu.stanford.nlp.ling.Datum;
import edu.stanford.nlp.ling.RVFDatum;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.objectbank.ObjectBank;
import edu.stanford.nlp.optimization.Minimizer;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.process.WordShapeClassifier;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.stats.Distribution;
import edu.stanford.nlp.stats.TwoDimensionalCounter;
import edu.stanford.nlp.util.ErasureUtils;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.ReflectionLoading;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Timing;
import java.io.File;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.PrintWriter;
import java.io.Serializable;
import java.io.StringReader;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Enumeration;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

public class ColumnDataClassifier {
    private static final double DEFAULT_VALUE = 1.0;
    private static final String DEFAULT_IGNORE_REGEXP = "\\s+";
    private final Flags[] flags;
    private final Flags globalFlags;
    private Classifier<String, String> classifier;
    private TokenizerFactory<Word> ptbFactory;
    private static final Pattern tab = Pattern.compile("\\t");
    private static int numGroups = 0;
    private static String lastGroup = "";
    private static int numInGroup = 0;
    private static double bestProb = 0.0;
    private static double bestSim = 0.0;
    private static boolean currentHighestProbCorrect = false;
    private static boolean foundAnswerInGroup = false;
    private static final NumberFormat nf = new DecimalFormat("0.000");
    private static final Map<String, Collection<String>> wordToSubstrings = new ConcurrentHashMap<String, Collection<String>>();
    private static PrintWriter cliqueWriter;

    public Datum<String, String> makeDatumFromLine(String line) {
        return this.makeDatumFromStrings(this.splitLineToFields(line));
    }

    public Datum<String, String> makeDatumFromStrings(String[] strings) {
        if (this.globalFlags.usesRealValues) {
            return this.makeRVFDatumFromStrings(strings);
        }
        if (this.globalFlags.featureFormat) {
            ArrayList<String> theFeatures = new ArrayList<String>();
            for (int i = 0; i < strings.length; ++i) {
                if (i == this.globalFlags.goldAnswerColumn) continue;
                if (this.globalFlags.significantColumnId) {
                    theFeatures.add(String.format("%d:%s", i, strings[i]));
                    continue;
                }
                theFeatures.add(strings[i]);
            }
            return new BasicDatum<String, String>(theFeatures, strings[this.globalFlags.goldAnswerColumn]);
        }
        return this.makeDatum(strings);
    }

    private RVFDatum<String, String> makeRVFDatumFromStrings(String[] strings) {
        if (this.globalFlags.featureFormat) {
            ClassicCounter<String> theFeatures = new ClassicCounter<String>();
            for (int i = 0; i < strings.length; ++i) {
                if (i == this.globalFlags.goldAnswerColumn) continue;
                if (this.flags[i] != null && (this.flags[i].isRealValued || this.flags[i].logTransform || this.flags[i].logitTransform || this.flags[i].sqrtTransform)) {
                    ColumnDataClassifier.addFeatureValue(strings[i], this.flags[i], theFeatures);
                    continue;
                }
                theFeatures.setCount(strings[i], 1.0);
            }
            return new RVFDatum<String, String>(theFeatures, strings[this.globalFlags.goldAnswerColumn]);
        }
        return this.makeRVFDatum(strings);
    }

    public GeneralDataset<String, String> readTrainingExamples(String fileName) {
        return this.readAndReturnTrainingExamples(fileName).first();
    }

    public Pair<GeneralDataset<String, String>, List<String[]>> readAndReturnTrainingExamples(String fileName) {
        if (this.globalFlags.printFeatures != null) {
            ColumnDataClassifier.newFeaturePrinter(this.globalFlags.printFeatures, "train", Flags.encoding);
        }
        Pair<GeneralDataset<String, String>, List<String[]>> dataInfo = this.readDataset(fileName, true);
        GeneralDataset<String, String> train = dataInfo.first();
        if (this.globalFlags.featureMinimumSupport > 1) {
            System.err.println("Removing Features with counts < " + this.globalFlags.featureMinimumSupport);
            train.applyFeatureCountThreshold(this.globalFlags.featureMinimumSupport);
        }
        train.summaryStatistics();
        return dataInfo;
    }

    public Pair<GeneralDataset<String, String>, List<String[]>> readTestExamples(String filename) {
        return this.readDataset(filename, true);
    }

    private static List<String[]> makeSVMLightLineInfos(List<String> lines) {
        ArrayList<String[]> lineInfos = new ArrayList<String[]>(lines.size());
        for (String line : lines) {
            line = line.replaceFirst("#.*$", "");
            lineInfos.add(line.split(DEFAULT_IGNORE_REGEXP));
        }
        return lineInfos;
    }

    private Pair<GeneralDataset<String, String>, List<String[]>> readDataset(String filename, boolean inTestPhase) {
        GeneralDataset dataset;
        Timing tim = new Timing();
        System.err.print("Reading dataset from " + filename + " ... ");
        List<Object> lineInfos = null;
        if (inTestPhase && Flags.testFromSVMLight || !inTestPhase && Flags.trainFromSVMLight) {
            ArrayList<String> lines = null;
            if (inTestPhase) {
                lines = new ArrayList<String>();
            }
            dataset = this.globalFlags.usesRealValues ? RVFDataset.readSVMLightFormat(filename, lines) : Dataset.readSVMLightFormat(filename, lines);
            if (lines != null) {
                lineInfos = ColumnDataClassifier.makeSVMLightLineInfos(lines);
            }
        } else {
            try {
                if (inTestPhase) {
                    lineInfos = new ArrayList();
                }
                dataset = this.globalFlags.usesRealValues ? new RVFDataset() : new Dataset();
                int lineNo = 0;
                int minColumns = Integer.MAX_VALUE;
                int maxColumns = 0;
                for (String line : ObjectBank.getLineIterator(new File(filename), Flags.encoding)) {
                    ++lineNo;
                    String[] strings = this.splitLineToFields(line);
                    if (strings.length < 2) {
                        throw new RuntimeException("Line format error at line " + lineNo + ": " + line);
                    }
                    if (strings.length < minColumns) {
                        minColumns = strings.length;
                    }
                    if (strings.length > maxColumns) {
                        maxColumns = strings.length;
                    }
                    if (inTestPhase) {
                        lineInfos.add(strings);
                    }
                    dataset.add(this.makeDatumFromStrings(strings));
                }
                if (lineNo > 0 && minColumns != maxColumns) {
                    System.err.println();
                    System.err.println("WARNING: Number of tab-separated columns in " + filename + " varies between " + minColumns + " and " + maxColumns);
                }
            }
            catch (Exception e) {
                throw new RuntimeException("Dataset could not be processed", e);
            }
        }
        System.err.println("done [" + tim.toSecondsString() + "s, " + dataset.size() + " items].");
        return new Pair<GeneralDataset<String, String>, List<String[]>>(dataset, lineInfos);
    }

    private String[] splitLineToFields(String line) {
        if (Flags.csvFormat) {
            String[] strings = StringUtils.splitOnCharWithQuoting(line, ',', '\"', '\"');
            for (int i = 0; i < strings.length; ++i) {
                if (!strings[i].startsWith("\"") || !strings[i].endsWith("\"")) continue;
                strings[i] = strings[i].substring(1, strings[i].length() - 1);
            }
            return strings;
        }
        return tab.split(line);
    }

    private Pair<Double, Double> writeResultsSummary(int num, Counter<String> contingency, Collection<String> labels) {
        System.err.println();
        System.err.print(num + " examples");
        if (this.globalFlags.groupingColumn >= 0 && this.globalFlags.rankingAccuracyClass != null) {
            System.err.print(" and " + numGroups + " ranking groups");
        }
        System.err.println(" in test set");
        int numClasses = 0;
        double microAccuracy = 0.0;
        double macroF1 = 0.0;
        for (String key : labels) {
            ++numClasses;
            int tp = (int)contingency.getCount(key + "|TP");
            int fn = (int)contingency.getCount(key + "|FN");
            int fp = (int)contingency.getCount(key + "|FP");
            int tn = (int)contingency.getCount(key + "|TN");
            double p = tp + fp == 0 ? 1.0 : (double)tp / (double)(tp + fp);
            double r = tp + fn == 0 ? 1.0 : (double)tp / (double)(tp + fn);
            double f = p == 0.0 && r == 0.0 ? 0.0 : 2.0 * p * r / (p + r);
            double acc = ((double)tp + (double)tn) / (double)num;
            macroF1 += f;
            microAccuracy += (double)tp;
            System.err.println("Cls " + key + ": TP=" + tp + " FN=" + fn + " FP=" + fp + " TN=" + tn + "; Acc " + nf.format(acc) + " P " + nf.format(p) + " R " + nf.format(r) + " F1 " + nf.format(f));
        }
        if (this.globalFlags.groupingColumn >= 0 && this.globalFlags.rankingAccuracyClass != null) {
            double covacc;
            double err2;
            double cor = (int)contingency.getCount("Ranking|Correct");
            double rankacc = cor + (err2 = (double)((int)contingency.getCount("Ranking|Error"))) == 0.0 ? 0.0 : cor / (cor + err2);
            System.err.print("Ranking accuracy: " + nf.format(rankacc));
            double cov = (int)contingency.getCount("Ranking|Covered");
            double coverr = (int)contingency.getCount("Ranking|Uncovered");
            double d = covacc = cov + coverr == 0.0 ? 0.0 : cov / (cov + coverr);
            if (coverr > 0.5) {
                double ce = (int)(contingency.getCount("Ranking|Error") - contingency.getCount("Ranking|Uncovered"));
                double crankacc = cor + ce == 0.0 ? 0.0 : cor / (cor + ce);
                System.err.println(" (on " + nf.format(covacc) + " of groups with correct answer: " + nf.format(crankacc) + ')');
            } else {
                System.err.println();
            }
            if (this.globalFlags.rankingScoreColumn >= 0) {
                double totalSim = contingency.getCount("Ranking|Score");
                double ranksim = cor + err2 == 0.0 ? 0.0 : totalSim / (cor + err2);
                System.err.println("Ranking average score: " + nf.format(ranksim));
            }
        }
        DecimalFormat nf2 = new DecimalFormat("0.00000");
        System.err.println("Accuracy/micro-averaged F1: " + nf2.format(microAccuracy /= (double)num));
        System.err.println("Macro-averaged F1: " + nf2.format(macroF1 /= (double)numClasses));
        return new Pair<Double, Double>(microAccuracy, macroF1);
    }

    private void writeAnswer(String[] strs, String clAnswer, Distribution<String> cntr, Counter<String> contingency, Classifier<String, String> c, double sim) {
        String results;
        String goldAnswer = strs[this.globalFlags.goldAnswerColumn];
        String printedText = "";
        if (this.globalFlags.displayedColumn >= 0) {
            printedText = strs[this.globalFlags.displayedColumn];
        }
        if (Flags.displayAllAnswers) {
            TreeSet<Pair<Double, String>> sortedLabels = new TreeSet<Pair<Double, String>>();
            for (String string : cntr.keySet()) {
                sortedLabels.add(new Pair<Double, String>(cntr.probabilityOf(string), string));
            }
            StringBuilder builder = new StringBuilder();
            for (Pair pair : sortedLabels.descendingSet()) {
                if (builder.length() > 0) {
                    builder.append("\t");
                }
                builder.append(((Double)pair.first()).toString()).append('\t').append((String)pair.second());
            }
            results = builder.toString();
        } else {
            results = clAnswer + '\t' + nf.format(cntr.probabilityOf(clAnswer)) + '\t' + nf.format(cntr.probabilityOf(goldAnswer));
        }
        String line = printedText.isEmpty() ? goldAnswer + '\t' + results : printedText + '\t' + goldAnswer + '\t' + results;
        System.out.println(line);
        for (String string : c.labels()) {
            if (string.equals(goldAnswer)) {
                if (string.equals(clAnswer)) {
                    contingency.incrementCount(string + "|TP");
                    continue;
                }
                contingency.incrementCount(string + "|FN");
                continue;
            }
            if (string.equals(clAnswer)) {
                contingency.incrementCount(string + "|FP");
                continue;
            }
            contingency.incrementCount(string + "|TN");
        }
        if (this.globalFlags.groupingColumn >= 0 && this.globalFlags.rankingAccuracyClass != null) {
            String group = strs[this.globalFlags.groupingColumn];
            if (group.equals(lastGroup)) {
                ++numInGroup;
                double d = cntr.probabilityOf(this.globalFlags.rankingAccuracyClass);
                if (d > bestProb) {
                    bestProb = d;
                    bestSim = sim;
                    currentHighestProbCorrect = goldAnswer.equals(this.globalFlags.rankingAccuracyClass);
                }
                if (this.globalFlags.rankingAccuracyClass.equals(goldAnswer)) {
                    foundAnswerInGroup = true;
                }
            } else {
                this.finishRanking(contingency, bestSim);
                ++numGroups;
                lastGroup = group;
                bestProb = cntr.probabilityOf(this.globalFlags.rankingAccuracyClass);
                bestSim = sim;
                numInGroup = 1;
                currentHighestProbCorrect = goldAnswer.equals(this.globalFlags.rankingAccuracyClass);
                foundAnswerInGroup = this.globalFlags.rankingAccuracyClass.equals(goldAnswer);
            }
        }
    }

    private void finishRanking(Counter<String> contingency, double sim) {
        if (numInGroup > 0) {
            if (this.globalFlags.justify) {
                System.err.print("Previous group of " + numInGroup + ": ");
                if (!foundAnswerInGroup) {
                    System.err.print("no correct answer; ");
                }
                System.err.print("highest ranked guess was: " + (currentHighestProbCorrect ? "correct" : "incorrect"));
                System.err.println(" (sim. = " + nf.format(sim) + ')');
            }
            if (currentHighestProbCorrect) {
                contingency.incrementCount("Ranking|Correct");
            } else {
                contingency.incrementCount("Ranking|Error");
            }
            if (foundAnswerInGroup) {
                contingency.incrementCount("Ranking|Covered");
            } else {
                contingency.incrementCount("Ranking|Uncovered");
            }
            contingency.incrementCount("Ranking|Score", sim);
        }
    }

    private Pair<Double, Double> testExamples(Classifier<String, String> cl, GeneralDataset<String, String> test, List<String[]> lineInfos) {
        System.err.print("Output format: ");
        if (this.globalFlags.displayedColumn >= 0) {
            System.err.printf("dataColumn%d\t", this.globalFlags.displayedColumn);
        }
        System.err.print("goldAnswer\t");
        if (Flags.displayAllAnswers) {
            System.err.println("[P(class) class]+ {sorted by probability}");
        } else {
            System.err.println("classifierAnswer\tP(clAnswer)\tP(goldAnswer)");
        }
        ClassicCounter<String> contingency = new ClassicCounter<String>();
        int sz = test.size();
        for (int i = 0; i < sz; ++i) {
            String[] simpleLineInfo = lineInfos.get(i);
            Datum<String, String> d = this.globalFlags.usesRealValues ? test.getRVFDatum(i) : test.getDatum(i);
            if (this.globalFlags.justify) {
                System.err.println("### Test item " + i);
                for (String field : simpleLineInfo) {
                    System.err.print(field);
                    System.err.print('\t');
                }
                System.err.println();
                if (cl instanceof LinearClassifier) {
                    ((LinearClassifier)cl).justificationOf(d);
                }
                System.err.println();
            }
            Counter<String> logScores = this.globalFlags.usesRealValues ? ((RVFClassifier)ErasureUtils.uncheckedCast(cl)).scoresOf(d) : cl.scoresOf(d);
            Distribution<String> dist = Distribution.distributionFromLogisticCounter(logScores);
            String answer = null;
            if (this.globalFlags.biasedHyperplane != null) {
                ArrayList<String> biggestKeys = new ArrayList<String>(logScores.keySet());
                Collections.sort(biggestKeys, Counters.toComparatorDescending(logScores));
                for (String key : biggestKeys) {
                    double threshold;
                    double prob = dist.probabilityOf(key);
                    if (!(prob > (threshold = this.globalFlags.biasedHyperplane.getCount(key)))) continue;
                    answer = key;
                    break;
                }
            }
            if (answer == null) {
                answer = this.globalFlags.usesRealValues ? ((RVFClassifier)ErasureUtils.uncheckedCast(cl)).classOf(d) : cl.classOf(d);
            }
            double sim = 0.0;
            if (this.globalFlags.rankingScoreColumn >= 0) {
                try {
                    sim = Double.parseDouble(simpleLineInfo[this.globalFlags.rankingScoreColumn]);
                }
                catch (NumberFormatException nfe) {
                    // empty catch block
                }
            }
            this.writeAnswer(simpleLineInfo, answer, dist, contingency, cl, sim);
        }
        if (this.globalFlags.groupingColumn >= 0 && this.globalFlags.rankingAccuracyClass != null) {
            this.finishRanking(contingency, bestSim);
        }
        if (this.globalFlags.printFeatures != null) {
            ColumnDataClassifier.closeFeaturePrinter();
        }
        return this.writeResultsSummary(test.size(), contingency, cl.labels());
    }

    private Datum<String, String> makeDatum(String[] strs) {
        ArrayList<String> theFeatures = new ArrayList<String>();
        Set<String> globalFeatures = Generics.newHashSet();
        if (this.globalFlags.useClassFeature) {
            globalFeatures.add("CLASS");
        }
        this.addAllInterningAndPrefixing(theFeatures, globalFeatures, "");
        for (int i = 0; i < this.flags.length; ++i) {
            Set<String> featuresC = Generics.newHashSet();
            this.makeDatum(strs[i], this.flags[i], featuresC, strs[this.globalFlags.goldAnswerColumn]);
            this.addAllInterningAndPrefixing(theFeatures, featuresC, i + "-");
        }
        if (this.globalFlags.printFeatures != null) {
            ColumnDataClassifier.printFeatures(strs, theFeatures);
        }
        return new BasicDatum<String, String>(theFeatures, strs[this.globalFlags.goldAnswerColumn]);
    }

    private RVFDatum<String, String> makeRVFDatum(String[] strs) {
        ClassicCounter<String> theFeatures = new ClassicCounter<String>();
        ClassicCounter<String> globalFeatures = new ClassicCounter<String>();
        if (this.globalFlags.useClassFeature) {
            globalFeatures.setCount("CLASS", 1.0);
        }
        this.addAllInterningAndPrefixingRVF(theFeatures, globalFeatures, "");
        for (int i = 0; i < this.flags.length; ++i) {
            ClassicCounter<String> featuresC = new ClassicCounter<String>();
            this.makeDatum(strs[i], this.flags[i], featuresC, strs[this.globalFlags.goldAnswerColumn]);
            this.addAllInterningAndPrefixingRVF(theFeatures, featuresC, i + "-");
        }
        if (this.globalFlags.printFeatures != null) {
            ColumnDataClassifier.printFeatures(strs, theFeatures);
        }
        return new RVFDatum<String, String>(theFeatures, strs[this.globalFlags.goldAnswerColumn]);
    }

    private void addAllInterningAndPrefixingRVF(ClassicCounter<String> accumulator, ClassicCounter<String> addend, String prefix) {
        assert (prefix != null);
        for (String protoFeat : addend.keySet()) {
            double count = addend.getCount(protoFeat);
            if (!prefix.isEmpty()) {
                protoFeat = prefix + protoFeat;
            }
            if (this.globalFlags.intern) {
                protoFeat = protoFeat.intern();
            }
            accumulator.incrementCount(protoFeat, count);
        }
    }

    private void addAllInterningAndPrefixing(Collection<String> accumulator, Collection<String> addend, String prefix) {
        assert (prefix != null);
        for (String protoFeat : addend) {
            if (!prefix.isEmpty()) {
                protoFeat = prefix + protoFeat;
            }
            if (this.globalFlags.intern) {
                protoFeat = protoFeat.intern();
            }
            accumulator.add(protoFeat);
        }
    }

    private static void addFeatureValue(String cWord, Flags flags, Object featuresC) {
        double value = Double.valueOf(cWord);
        if (flags.logTransform) {
            double log = Math.log(value);
            if (Double.isInfinite(log) || Double.isNaN(log)) {
                System.err.println("WARNING: Log transform attempted on out of range value; feature ignored");
            } else {
                ColumnDataClassifier.addFeature(featuresC, "Log", log);
            }
        } else if (flags.logitTransform) {
            double logit = Math.log(value / (1.0 - value));
            if (Double.isInfinite(logit) || Double.isNaN(logit)) {
                System.err.println("WARNING: Logit transform attempted on out of range value; feature ignored");
            } else {
                ColumnDataClassifier.addFeature(featuresC, "Logit", logit);
            }
        } else if (flags.sqrtTransform) {
            double sqrt = Math.sqrt(value);
            ColumnDataClassifier.addFeature(featuresC, "Sqrt", sqrt);
        } else {
            ColumnDataClassifier.addFeature(featuresC, "Value", value);
        }
    }

    private static <F> void addFeature(Object features, F newFeature, double value) {
        if (features instanceof Counter) {
            ((Counter)ErasureUtils.uncheckedCast(features)).setCount(newFeature, value);
        } else if (features instanceof Collection) {
            ((Collection)ErasureUtils.uncheckedCast(features)).add(newFeature);
        } else {
            throw new RuntimeException("addFeature was called with a features object that is neither a counter nor a collection!");
        }
    }

    private void makeDatum(String cWord, Flags flags, Object featuresC, String goldAns) {
        int i;
        if (flags == null) {
            return;
        }
        if (flags.filename) {
            cWord = IOUtils.slurpFileNoExceptions(cWord);
        }
        if (flags.lowercase) {
            cWord = cWord.toLowerCase(Locale.ENGLISH);
        }
        if (flags.useString) {
            ColumnDataClassifier.addFeature(featuresC, "S-" + cWord, 1.0);
        }
        if (flags.binnedLengths != null) {
            int len = cWord.length();
            String featureName = null;
            for (int i2 = 0; i2 <= flags.binnedLengths.length; ++i2) {
                if (i2 != flags.binnedLengths.length && len > flags.binnedLengths[i2]) continue;
                featureName = "Len-" + (i2 == 0 ? 0 : flags.binnedLengths[i2 - 1] + 1) + '-' + (i2 == flags.binnedLengths.length ? "Inf" : Integer.toString(flags.binnedLengths[i2]));
                if (flags.binnedLengthsCounter == null) break;
                flags.binnedLengthsCounter.incrementCount(featureName, goldAns);
                break;
            }
            ColumnDataClassifier.addFeature(featuresC, featureName, 1.0);
        }
        if (flags.binnedValues != null) {
            double val = flags.binnedValuesNaN;
            try {
                val = Double.parseDouble(cWord);
            }
            catch (NumberFormatException nfe) {
                // empty catch block
            }
            String featureName = null;
            for (i = 0; i <= flags.binnedValues.length; ++i) {
                if (i != flags.binnedValues.length && !(val <= flags.binnedValues[i])) continue;
                featureName = "Val-(" + (i == 0 ? "-Inf" : Double.toString(flags.binnedValues[i - 1])) + ',' + (i == flags.binnedValues.length ? "Inf" : Double.toString(flags.binnedValues[i])) + ']';
                if (flags.binnedValuesCounter == null) break;
                flags.binnedValuesCounter.incrementCount(featureName, goldAns);
                break;
            }
            ColumnDataClassifier.addFeature(featuresC, featureName, 1.0);
        }
        if (flags.countChars != null) {
            int i3;
            int[] cnts = new int[flags.countChars.length];
            for (i3 = 0; i3 < cnts.length; ++i3) {
                cnts[i3] = 0;
            }
            int len = cWord.length();
            for (i3 = 0; i3 < len; ++i3) {
                char ch = cWord.charAt(i3);
                for (int j = 0; j < cnts.length; ++j) {
                    if (ch != flags.countChars[j]) continue;
                    int n = j;
                    cnts[n] = cnts[n] + 1;
                }
            }
            for (int j = 0; j < cnts.length; ++j) {
                String featureName = null;
                for (i = 0; i <= flags.countCharsBins.length; ++i) {
                    if (i != flags.countCharsBins.length && cnts[j] > flags.countCharsBins[i]) continue;
                    featureName = "Char-" + flags.countChars[j] + '-' + (i == 0 ? 0 : flags.countCharsBins[i - 1] + 1) + '-' + (i == flags.countCharsBins.length ? "Inf" : Integer.toString(flags.countCharsBins[i]));
                    break;
                }
                ColumnDataClassifier.addFeature(featuresC, featureName, 1.0);
            }
        }
        if (flags.splitWordsPattern != null || flags.splitWordsTokenizerPattern != null || flags.splitWordsWithPTBTokenizer) {
            Object[] bits = flags.splitWordsTokenizerPattern != null ? ColumnDataClassifier.regexpTokenize(flags.splitWordsTokenizerPattern, flags.splitWordsIgnorePattern, cWord) : (flags.splitWordsPattern != null ? ColumnDataClassifier.splitTokenize(flags.splitWordsPattern, flags.splitWordsIgnorePattern, cWord) : this.ptbTokenize(cWord));
            if (flags.showTokenization) {
                System.err.print("Tokenization: ");
                System.err.println(Arrays.toString(bits));
            }
            for (int i4 = 0; i4 < bits.length; ++i4) {
                int j;
                if (flags.useSplitWords) {
                    ColumnDataClassifier.addFeature(featuresC, "SW-" + (String)bits[i4], 1.0);
                }
                if (flags.useLowercaseSplitWords) {
                    ColumnDataClassifier.addFeature(featuresC, "LSW-" + ((String)bits[i4]).toLowerCase(), 1.0);
                }
                if (flags.useSplitWordPairs && i4 + 1 < bits.length) {
                    ColumnDataClassifier.addFeature(featuresC, "SWP-" + (String)bits[i4] + '-' + (String)bits[i4 + 1], 1.0);
                }
                if (flags.useAllSplitWordPairs) {
                    for (j = i4 + 1; j < bits.length; ++j) {
                        if (((String)bits[i4]).compareTo((String)bits[j]) < 0) {
                            ColumnDataClassifier.addFeature(featuresC, "ASWP-" + (String)bits[i4] + '-' + (String)bits[j], 1.0);
                            continue;
                        }
                        ColumnDataClassifier.addFeature(featuresC, "ASWP-" + (String)bits[j] + '-' + (String)bits[i4], 1.0);
                    }
                }
                if (flags.useAllSplitWordTriples) {
                    for (j = i4 + 1; j < bits.length; ++j) {
                        for (int k = j + 1; k < bits.length; ++k) {
                            Object[] triple = new String[]{bits[i4], bits[j], bits[k]};
                            Arrays.sort(triple);
                            ColumnDataClassifier.addFeature(featuresC, "ASWT-" + (String)triple[0] + '-' + (String)triple[1] + '-' + (String)triple[2], 1.0);
                        }
                    }
                }
                if (flags.useSplitWordNGrams) {
                    StringBuilder sb = new StringBuilder("SW#");
                    for (int j2 = i4; j2 < i4 + flags.minWordNGramLeng - 1 && j2 < bits.length; ++j2) {
                        sb.append('-');
                        sb.append((String)bits[j2]);
                    }
                    int maxIndex = flags.maxWordNGramLeng > 0 ? Math.min(bits.length, i4 + flags.maxWordNGramLeng) : bits.length;
                    for (int j3 = i4 + flags.minWordNGramLeng - 1; !(j3 >= maxIndex || flags.wordNGramBoundaryRegexp != null && flags.wordNGramBoundaryPattern.matcher((CharSequence)bits[j3]).matches()); ++j3) {
                        sb.append('-');
                        sb.append((String)bits[j3]);
                        ColumnDataClassifier.addFeature(featuresC, sb.toString(), 1.0);
                    }
                }
                if (flags.useSplitFirstLastWords) {
                    if (i4 == 0) {
                        ColumnDataClassifier.addFeature(featuresC, "SFW-" + (String)bits[i4], 1.0);
                    } else if (i4 == bits.length - 1) {
                        ColumnDataClassifier.addFeature(featuresC, "SLW-" + (String)bits[i4], 1.0);
                    }
                }
                if (flags.useSplitNGrams || flags.useSplitPrefixSuffixNGrams) {
                    Collection<String> featureNames = this.makeNGramFeatures((String)bits[i4], flags, true, "S#");
                    for (String featureName : featureNames) {
                        ColumnDataClassifier.addFeature(featuresC, featureName, 1.0);
                    }
                }
                if (flags.splitWordShape <= -1) continue;
                String shape = WordShapeClassifier.wordShape((String)bits[i4], flags.splitWordShape);
                ColumnDataClassifier.addFeature(featuresC, "SSHAPE-" + shape, 1.0);
            }
        }
        if (flags.wordShape > -1) {
            String shape = WordShapeClassifier.wordShape(cWord, flags.wordShape);
            ColumnDataClassifier.addFeature(featuresC, "SHAPE-" + shape, 1.0);
        }
        if (flags.useNGrams || flags.usePrefixSuffixNGrams) {
            Collection<String> featureNames = this.makeNGramFeatures(cWord, flags, false, "#");
            for (String featureName : featureNames) {
                ColumnDataClassifier.addFeature(featuresC, featureName, 1.0);
            }
        }
        if (flags.isRealValued || flags.logTransform || flags.logitTransform || flags.sqrtTransform) {
            ColumnDataClassifier.addFeatureValue(cWord, flags, featuresC);
        }
    }

    private String[] ptbTokenize(String cWord) {
        if (this.ptbFactory == null) {
            this.ptbFactory = PTBTokenizer.factory();
        }
        Tokenizer<Word> tokenizer = this.ptbFactory.getTokenizer(new StringReader(cWord));
        List<Word> words = tokenizer.tokenize();
        String[] res = new String[words.size()];
        for (int i = 0; i < words.size(); ++i) {
            res[i] = words.get(i).word();
        }
        return res;
    }

    private String intern(String s) {
        if (this.globalFlags.intern) {
            return s.intern();
        }
        return s;
    }

    private Collection<String> makeNGramFeatures(String input, Flags flags, boolean useSplit, String featPrefix) {
        Matcher m;
        boolean prefixSuffixNGrams;
        boolean internalNGrams;
        String toNGrams = input;
        if (useSplit) {
            internalNGrams = flags.useSplitNGrams;
            prefixSuffixNGrams = flags.useSplitPrefixSuffixNGrams;
        } else {
            internalNGrams = flags.useNGrams;
            prefixSuffixNGrams = flags.usePrefixSuffixNGrams;
        }
        if (flags.lowercaseNGrams) {
            toNGrams = toNGrams.toLowerCase(Locale.ENGLISH);
        }
        if (flags.partialNGramRegexp != null && (m = flags.partialNGramPattern.matcher(toNGrams)).find()) {
            toNGrams = m.groupCount() > 0 ? m.group(1) : m.group();
        }
        Collection<String> subs = null;
        if (flags.cacheNGrams) {
            subs = wordToSubstrings.get(toNGrams);
        }
        if (subs == null) {
            subs = new ArrayList<String>();
            String strN = featPrefix + '-';
            String strB = featPrefix + "B-";
            String strE = featPrefix + "E-";
            int wleng = toNGrams.length();
            for (int i = 0; i < wleng; ++i) {
                int min = Math.min(wleng, i + flags.maxNGramLeng);
                for (int j = i + flags.minNGramLeng; j <= min; ++j) {
                    if (prefixSuffixNGrams) {
                        if (i == 0) {
                            subs.add(this.intern(strB + toNGrams.substring(i, j)));
                        }
                        if (j == wleng) {
                            subs.add(this.intern(strE + toNGrams.substring(i, j)));
                        }
                    }
                    if (!internalNGrams) continue;
                    subs.add(this.intern(strN + toNGrams.substring(i, j)));
                }
            }
            if (flags.cacheNGrams) {
                wordToSubstrings.put(toNGrams, subs);
            }
        }
        return subs;
    }

    private static void newFeaturePrinter(String prefix, String suffix, String encoding) {
        if (cliqueWriter != null) {
            ColumnDataClassifier.closeFeaturePrinter();
        }
        try {
            cliqueWriter = IOUtils.getPrintWriter(prefix + '.' + suffix, encoding);
        }
        catch (IOException ioe) {
            cliqueWriter = null;
        }
    }

    private static void closeFeaturePrinter() {
        cliqueWriter.close();
        cliqueWriter = null;
    }

    private static void printFeatures(String[] wi, ClassicCounter<String> features) {
        if (cliqueWriter != null) {
            for (int i = 0; i < wi.length; ++i) {
                if (i > 0) {
                    cliqueWriter.print("\t");
                }
                cliqueWriter.print(wi[i]);
            }
            for (String feat : features.keySet()) {
                cliqueWriter.print("\t");
                cliqueWriter.print(feat);
                cliqueWriter.print("\t");
                cliqueWriter.print(features.getCount(feat));
            }
            cliqueWriter.println();
        }
    }

    private static void printFeatures(String[] wi, List<String> features) {
        if (cliqueWriter != null) {
            for (int i = 0; i < wi.length; ++i) {
                if (i > 0) {
                    cliqueWriter.print("\t");
                }
                cliqueWriter.print(wi[i]);
            }
            for (String feat : features) {
                cliqueWriter.print("\t");
                cliqueWriter.print(feat);
            }
            cliqueWriter.println();
        }
    }

    private Classifier<String, String> makeClassifierAdaptL1(GeneralDataset<String, String> train) {
        double l1reg;
        Classifier lc;
        block11: {
            int limitFeatureTol;
            block12: {
                assert (this.globalFlags.useAdaptL1 && this.globalFlags.limitFeatures > 0);
                lc = null;
                l1reg = this.globalFlags.l1reg;
                double l1regmax = this.globalFlags.l1regmax;
                double l1regmin = this.globalFlags.l1regmin;
                if (this.globalFlags.l1reg <= 0.0) {
                    System.err.println("WARNING: useAdaptL1 set and limitFeatures to " + this.globalFlags.limitFeatures + ", but invalid value of l1reg=" + this.globalFlags.l1reg + ", defaulting to " + this.globalFlags.l1regmax);
                    l1reg = l1regmax;
                } else {
                    System.err.println("TRAIN: useAdaptL1 set and limitFeatures to " + this.globalFlags.limitFeatures + ", l1reg=" + this.globalFlags.l1reg + ", l1regmax=" + this.globalFlags.l1regmax + ", l1regmin=" + this.globalFlags.l1regmin);
                }
                Set<String> limitFeatureLabels = null;
                if (this.globalFlags.limitFeaturesLabels != null) {
                    String[] labels = this.globalFlags.limitFeaturesLabels.split(",");
                    limitFeatureLabels = Generics.newHashSet();
                    for (String label : labels) {
                        limitFeatureLabels.add(label.trim());
                    }
                }
                double l1regtop = l1regmax;
                double l1regbottom = l1regmin;
                limitFeatureTol = 5;
                double l1regminchange = 0.05;
                while (true) {
                    System.err.println("Training: l1reg=" + l1reg + ", threshold=" + this.globalFlags.featureWeightThreshold + ", target=" + this.globalFlags.limitFeatures);
                    Minimizer minim = (Minimizer)ReflectionLoading.loadByReflection("edu.stanford.nlp.optimization.OWLQNMinimizer", l1reg);
                    LinearClassifierFactory lcf = new LinearClassifierFactory(minim, this.globalFlags.tolerance, this.globalFlags.useSum, this.globalFlags.prior, this.globalFlags.sigma, this.globalFlags.epsilon);
                    int featureCount = -1;
                    try {
                        Classifier c;
                        lc = c = lcf.trainClassifier((GeneralDataset)train);
                        featureCount = ((LinearClassifier)c).getFeatureCount(limitFeatureLabels, this.globalFlags.featureWeightThreshold, false);
                        System.err.println("Training Done: l1reg=" + l1reg + ", threshold=" + this.globalFlags.featureWeightThreshold + ", features=" + featureCount + ", target=" + this.globalFlags.limitFeatures);
                        List topFeatures = ((LinearClassifier)c).getTopFeatures(limitFeatureLabels, this.globalFlags.featureWeightThreshold, false, this.globalFlags.limitFeatures, true);
                        String classifierDesc = ((LinearClassifier)c).topFeaturesToString(topFeatures);
                        System.err.println("Printing top " + this.globalFlags.limitFeatures + " features with weights above " + this.globalFlags.featureWeightThreshold);
                        if (this.globalFlags.limitFeaturesLabels != null) {
                            System.err.println("  Limited to labels: " + this.globalFlags.limitFeaturesLabels);
                        }
                        System.err.println(classifierDesc);
                    }
                    catch (RuntimeException ex) {
                        if (ex.getMessage() != null && ex.getMessage().startsWith("L-BFGS chose a non-descent direction")) {
                            System.err.println("Error in optimization, will try again with different l1reg");
                            ex.printStackTrace(System.err);
                        }
                        throw ex;
                    }
                    if (featureCount < 0 || featureCount < this.globalFlags.limitFeatures - limitFeatureTol) {
                        l1regtop = l1reg;
                        if (!(l1regtop - (l1reg = 0.5 * (l1reg + l1regbottom)) < l1regminchange)) continue;
                        System.err.println("Stopping: old l1reg  " + l1regtop + "- new l1reg " + l1reg + ", difference less than " + l1regminchange);
                        break block11;
                    }
                    if (featureCount <= this.globalFlags.limitFeatures + limitFeatureTol) break block12;
                    l1regbottom = l1reg;
                    if ((l1reg = 0.5 * (l1reg + l1regtop)) - l1regbottom < l1regminchange) break;
                }
                System.err.println("Stopping: new l1reg  " + l1reg + "- old l1reg " + l1regbottom + ", difference less than " + l1regminchange);
                break block11;
            }
            System.err.println("Stopping: # of features within " + limitFeatureTol + " of target");
        }
        this.globalFlags.l1reg = l1reg;
        return lc;
    }

    public Classifier<String, String> makeClassifier(GeneralDataset<String, String> train) {
        Object lc;
        if (this.globalFlags.useClassifierFactory != null) {
            ClassifierFactory cf = this.globalFlags.classifierFactoryArgs != null ? (ClassifierFactory)ReflectionLoading.loadByReflection(this.globalFlags.useClassifierFactory, this.globalFlags.classifierFactoryArgs) : (ClassifierFactory)ReflectionLoading.loadByReflection(this.globalFlags.useClassifierFactory, new Object[0]);
            lc = cf.trainClassifier(train);
        } else if (this.globalFlags.useNB) {
            double sigma = this.globalFlags.prior == 0 ? 0.0 : this.globalFlags.sigma;
            lc = new NBLinearClassifierFactory(sigma, this.globalFlags.useClassFeature).trainClassifier((GeneralDataset)train);
        } else if (this.globalFlags.useBinary) {
            LogisticClassifierFactory<String, String> lcf = new LogisticClassifierFactory<String, String>();
            LogPrior prior = new LogPrior(this.globalFlags.prior, this.globalFlags.sigma, this.globalFlags.epsilon);
            lc = lcf.trainClassifier(train, this.globalFlags.l1reg, this.globalFlags.tolerance, prior, this.globalFlags.biased);
        } else if (this.globalFlags.biased) {
            LogisticClassifierFactory<String, String> lcf = new LogisticClassifierFactory<String, String>();
            LogPrior prior = new LogPrior(this.globalFlags.prior, this.globalFlags.sigma, this.globalFlags.epsilon);
            lc = lcf.trainClassifier(train, prior, true);
        } else if (this.globalFlags.useAdaptL1 && this.globalFlags.limitFeatures > 0) {
            lc = this.makeClassifierAdaptL1(train);
        } else {
            LinearClassifierFactory lcf;
            if (this.globalFlags.l1reg > 0.0) {
                Minimizer minim = (Minimizer)ReflectionLoading.loadByReflection("edu.stanford.nlp.optimization.OWLQNMinimizer", this.globalFlags.l1reg);
                lcf = new LinearClassifierFactory(minim, this.globalFlags.tolerance, this.globalFlags.useSum, this.globalFlags.prior, this.globalFlags.sigma, this.globalFlags.epsilon);
            } else {
                lcf = new LinearClassifierFactory(this.globalFlags.tolerance, this.globalFlags.useSum, this.globalFlags.prior, this.globalFlags.sigma, this.globalFlags.epsilon, this.globalFlags.QNsize);
            }
            if (!this.globalFlags.useQN) {
                lcf.useConjugateGradientAscent();
            }
            lc = lcf.trainClassifier((GeneralDataset)train);
        }
        return lc;
    }

    private static String[] regexpTokenize(Pattern tokenizerRegexp, Pattern ignoreRegexp, String inWord) {
        ArrayList<String> al = new ArrayList<String>();
        String word = inWord;
        while (word.length() > 0) {
            Matcher mig = null;
            if (ignoreRegexp != null) {
                mig = ignoreRegexp.matcher(word);
            }
            if (mig != null && mig.lookingAt()) {
                word = word.substring(mig.end());
                continue;
            }
            Matcher m = tokenizerRegexp.matcher(word);
            if (m.lookingAt()) {
                al.add(word.substring(0, m.end()));
                word = word.substring(m.end());
                continue;
            }
            System.err.println("Warning: regexpTokenize pattern " + tokenizerRegexp + " didn't match on |" + word.substring(0, 1) + "| of |" + word + '|');
            al.add(word.substring(0, 1));
            word = word.substring(1);
        }
        String[] bits = al.toArray(new String[al.size()]);
        return bits;
    }

    private static String[] splitTokenize(Pattern splitRegexp, Pattern ignoreRegexp, String cWord) {
        String[] bits = splitRegexp.split(cWord);
        if (ignoreRegexp != null) {
            ArrayList<String> keepBits = new ArrayList<String>(bits.length);
            for (String bit : bits) {
                if (ignoreRegexp.matcher(bit).matches()) continue;
                keepBits.add(bit);
            }
            if (keepBits.size() != bits.length) {
                bits = new String[keepBits.size()];
                keepBits.toArray(bits);
            }
        }
        return bits;
    }

    private Flags[] setProperties(Properties props) {
        Flags[] myFlags;
        Pattern prefix;
        boolean myUsesRealValues = false;
        try {
            prefix = Pattern.compile("([0-9]+)\\.(.*)");
        }
        catch (PatternSyntaxException pse) {
            throw new RuntimeException(pse);
        }
        String loadPath = props.getProperty("loadClassifier");
        if (loadPath != null) {
            System.err.println("Loading classifier from " + loadPath + "...");
            ObjectInputStream ois = null;
            try {
                ois = IOUtils.readStreamFromString(loadPath);
                this.classifier = (Classifier)ErasureUtils.uncheckedCast(ois.readObject());
                myFlags = (Flags[])ois.readObject();
                assert (this.flags.length > 0);
                System.err.println("Done.");
            }
            catch (Exception e) {
                throw new RuntimeIOException("Error deserializing " + loadPath, e);
            }
            finally {
                IOUtils.closeIgnoringExceptions(ois);
            }
        } else {
            myFlags = new Flags[]{new Flags()};
        }
        Enumeration<?> e = props.propertyNames();
        while (e.hasMoreElements()) {
            String key = (String)e.nextElement();
            String val = props.getProperty(key);
            int col = 0;
            System.err.println(key + " = " + val);
            Matcher matcher = prefix.matcher(key);
            if (matcher.matches()) {
                col = Integer.parseInt(matcher.group(1));
                key = matcher.group(2);
            }
            if (col >= myFlags.length) {
                Flags[] newFl = new Flags[col + 1];
                System.arraycopy(myFlags, 0, newFl, 0, myFlags.length);
                myFlags = newFl;
            }
            if (myFlags[col] == null) {
                myFlags[col] = new Flags();
            }
            if (key.equals("useString")) {
                myFlags[col].useString = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("binnedLengths")) {
                if (val == null) continue;
                String[] binnedLengthStrs = val.split("[, ]+");
                myFlags[col].binnedLengths = new int[binnedLengthStrs.length];
                for (int i = 0; i < myFlags[col].binnedLengths.length; ++i) {
                    myFlags[col].binnedLengths[i] = Integer.parseInt(binnedLengthStrs[i]);
                }
                continue;
            }
            if (key.equals("binnedLengthsStatistics")) {
                if (!Boolean.parseBoolean(val)) continue;
                myFlags[col].binnedLengthsCounter = new TwoDimensionalCounter();
                continue;
            }
            if (key.equals("countChars")) {
                myFlags[col].countChars = val.toCharArray();
                continue;
            }
            if (key.equals("countCharsBins")) {
                if (val == null) continue;
                String[] binnedCountStrs = val.split("[, ]+");
                myFlags[col].countCharsBins = new int[binnedCountStrs.length];
                for (int i = 0; i < binnedCountStrs.length; ++i) {
                    myFlags[col].countCharsBins[i] = Integer.parseInt(binnedCountStrs[i]);
                }
                continue;
            }
            if (key.equals("binnedValues")) {
                if (val == null) continue;
                String[] binnedValuesStrs = val.split("[, ]+");
                myFlags[col].binnedValues = new double[binnedValuesStrs.length];
                for (int i = 0; i < myFlags[col].binnedValues.length; ++i) {
                    myFlags[col].binnedValues[i] = Double.parseDouble(binnedValuesStrs[i]);
                }
                continue;
            }
            if (key.equals("binnedValuesNaN")) {
                myFlags[col].binnedValuesNaN = Double.parseDouble(val);
                continue;
            }
            if (key.equals("binnedValuesStatistics")) {
                if (!Boolean.parseBoolean(val)) continue;
                myFlags[col].binnedValuesCounter = new TwoDimensionalCounter();
                continue;
            }
            if (key.equals("useNGrams")) {
                myFlags[col].useNGrams = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("usePrefixSuffixNGrams")) {
                myFlags[col].usePrefixSuffixNGrams = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("useSplitNGrams")) {
                myFlags[col].useSplitNGrams = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("wordShape")) {
                myFlags[col].wordShape = WordShapeClassifier.lookupShaper(val);
                continue;
            }
            if (key.equals("splitWordShape")) {
                myFlags[col].splitWordShape = WordShapeClassifier.lookupShaper(val);
                continue;
            }
            if (key.equals("useSplitPrefixSuffixNGrams")) {
                myFlags[col].useSplitPrefixSuffixNGrams = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("lowercaseNGrams")) {
                myFlags[col].lowercaseNGrams = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("lowercase")) {
                myFlags[col].lowercase = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("useLowercaseSplitWords")) {
                myFlags[col].useLowercaseSplitWords = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("useSum")) {
                myFlags[col].useSum = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("tolerance")) {
                myFlags[col].tolerance = Double.parseDouble(val);
                continue;
            }
            if (key.equals("printFeatures")) {
                myFlags[col].printFeatures = val;
                continue;
            }
            if (key.equals("printClassifier")) {
                myFlags[col].printClassifier = val;
                continue;
            }
            if (key.equals("printClassifierParam")) {
                myFlags[col].printClassifierParam = Integer.parseInt(val);
                continue;
            }
            if (key.equals("exitAfterTrainingFeaturization")) {
                myFlags[col].exitAfterTrainingFeaturization = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("intern") || key.equals("intern2")) {
                myFlags[col].intern = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("cacheNGrams")) {
                myFlags[col].cacheNGrams = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("useClassifierFactory")) {
                myFlags[col].useClassifierFactory = val;
                continue;
            }
            if (key.equals("classifierFactoryArgs")) {
                myFlags[col].classifierFactoryArgs = val;
                continue;
            }
            if (key.equals("useNB")) {
                myFlags[col].useNB = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("useBinary")) {
                myFlags[col].useBinary = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("l1reg")) {
                myFlags[col].l1reg = Double.parseDouble(val);
                continue;
            }
            if (key.equals("useAdaptL1")) {
                myFlags[col].useAdaptL1 = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("limitFeatures")) {
                myFlags[col].limitFeatures = Integer.parseInt(val);
                continue;
            }
            if (key.equals("l1regmin")) {
                myFlags[col].l1regmin = Double.parseDouble(val);
                continue;
            }
            if (key.equals("l1regmax")) {
                myFlags[col].l1regmax = Double.parseDouble(val);
                continue;
            }
            if (key.equals("limitFeaturesLabels")) {
                myFlags[col].limitFeaturesLabels = val;
                continue;
            }
            if (key.equals("featureWeightThreshold")) {
                myFlags[col].featureWeightThreshold = Double.parseDouble(val);
                continue;
            }
            if (key.equals("useClassFeature")) {
                myFlags[col].useClassFeature = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("featureMinimumSupport")) {
                myFlags[col].featureMinimumSupport = Integer.parseInt(val);
                continue;
            }
            if (key.equals("prior")) {
                if (val.equalsIgnoreCase("no")) {
                    myFlags[col].prior = LogPrior.LogPriorType.NULL.ordinal();
                    continue;
                }
                if (val.equalsIgnoreCase("huber")) {
                    myFlags[col].prior = LogPrior.LogPriorType.HUBER.ordinal();
                    continue;
                }
                if (val.equalsIgnoreCase("quadratic")) {
                    myFlags[col].prior = LogPrior.LogPriorType.QUADRATIC.ordinal();
                    continue;
                }
                if (val.equalsIgnoreCase("quartic")) {
                    myFlags[col].prior = LogPrior.LogPriorType.QUARTIC.ordinal();
                    continue;
                }
                try {
                    myFlags[col].prior = Integer.parseInt(val);
                }
                catch (NumberFormatException nfe) {
                    System.err.println("Unknown prior " + val + "; using none.");
                }
                continue;
            }
            if (key.equals("sigma")) {
                myFlags[col].sigma = Double.parseDouble(val);
                continue;
            }
            if (key.equals("epsilon")) {
                myFlags[col].epsilon = Double.parseDouble(val);
                continue;
            }
            if (key.equals("maxNGramLeng")) {
                myFlags[col].maxNGramLeng = Integer.parseInt(val);
                continue;
            }
            if (key.equals("minNGramLeng")) {
                myFlags[col].minNGramLeng = Integer.parseInt(val);
                continue;
            }
            if (key.equals("partialNGramRegexp")) {
                myFlags[col].partialNGramRegexp = val;
                try {
                    myFlags[col].partialNGramPattern = Pattern.compile(myFlags[col].partialNGramRegexp);
                }
                catch (PatternSyntaxException pse) {
                    System.err.println("Ill-formed partialNGramPattern: " + myFlags[col].partialNGramPattern);
                    myFlags[col].partialNGramRegexp = null;
                }
                continue;
            }
            if (key.equals("splitWordsRegexp")) {
                try {
                    myFlags[col].splitWordsPattern = Pattern.compile(val);
                }
                catch (PatternSyntaxException pse) {
                    System.err.println("Ill-formed splitWordsRegexp: " + val);
                }
                continue;
            }
            if (key.equals("splitWordsTokenizerRegexp")) {
                try {
                    myFlags[col].splitWordsTokenizerPattern = Pattern.compile(val);
                }
                catch (PatternSyntaxException pse) {
                    System.err.println("Ill-formed splitWordsTokenizerRegexp: " + val);
                }
                continue;
            }
            if (key.equals("splitWordsIgnoreRegexp")) {
                String trimVal = val.trim();
                if (trimVal.isEmpty()) {
                    myFlags[col].splitWordsIgnorePattern = null;
                    continue;
                }
                try {
                    myFlags[col].splitWordsIgnorePattern = Pattern.compile(trimVal);
                }
                catch (PatternSyntaxException pse) {
                    System.err.println("Ill-formed splitWordsIgnoreRegexp: " + trimVal);
                }
                continue;
            }
            if (key.equals("useSplitWords")) {
                myFlags[col].useSplitWords = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("useSplitWordPairs")) {
                myFlags[col].useSplitWordPairs = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("useAllSplitWordPairs")) {
                myFlags[col].useAllSplitWordPairs = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("useAllSplitWordTriples")) {
                myFlags[col].useAllSplitWordTriples = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("useSplitWordNGrams")) {
                myFlags[col].useSplitWordNGrams = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("maxWordNGramLeng")) {
                myFlags[col].maxWordNGramLeng = Integer.parseInt(val);
                continue;
            }
            if (key.equals("minWordNGramLeng")) {
                myFlags[col].minWordNGramLeng = Integer.parseInt(val);
                if (myFlags[col].minWordNGramLeng >= 1) continue;
                System.err.println("minWordNGramLeng set to " + myFlags[col].minWordNGramLeng + ", resetting to 1");
                myFlags[col].minWordNGramLeng = 1;
                continue;
            }
            if (key.equals("wordNGramBoundaryRegexp")) {
                myFlags[col].wordNGramBoundaryRegexp = val;
                try {
                    myFlags[col].wordNGramBoundaryPattern = Pattern.compile(myFlags[col].wordNGramBoundaryRegexp);
                }
                catch (PatternSyntaxException pse) {
                    System.err.println("Ill-formed wordNGramBoundary regexp: " + myFlags[col].wordNGramBoundaryRegexp);
                    myFlags[col].wordNGramBoundaryRegexp = null;
                }
                continue;
            }
            if (key.equals("useSplitFirstLastWords")) {
                myFlags[col].useSplitFirstLastWords = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("loadClassifier")) {
                myFlags[col].loadClassifier = val;
                continue;
            }
            if (key.equals("serializeTo")) {
                Flags.serializeTo = val;
                continue;
            }
            if (key.equals("printTo")) {
                Flags.printTo = val;
                continue;
            }
            if (key.equals("trainFile")) {
                Flags.trainFile = val;
                continue;
            }
            if (key.equals("displayAllAnswers")) {
                Flags.displayAllAnswers = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("testFile")) {
                myFlags[col].testFile = val;
                continue;
            }
            if (key.equals("trainFromSVMLight")) {
                Flags.trainFromSVMLight = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("testFromSVMLight")) {
                Flags.testFromSVMLight = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("encoding")) {
                Flags.encoding = val;
                continue;
            }
            if (key.equals("printSVMLightFormatTo")) {
                Flags.printSVMLightFormatTo = val;
                continue;
            }
            if (key.equals("displayedColumn")) {
                myFlags[col].displayedColumn = Integer.parseInt(val);
                continue;
            }
            if (key.equals("groupingColumn")) {
                myFlags[col].groupingColumn = Integer.parseInt(val);
                continue;
            }
            if (key.equals("rankingScoreColumn")) {
                myFlags[col].rankingScoreColumn = Integer.parseInt(val);
                continue;
            }
            if (key.equals("rankingAccuracyClass")) {
                myFlags[col].rankingAccuracyClass = val;
                continue;
            }
            if (key.equals("goldAnswerColumn")) {
                myFlags[col].goldAnswerColumn = Integer.parseInt(val);
                continue;
            }
            if (key.equals("useQN")) {
                myFlags[col].useQN = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("QNsize")) {
                myFlags[col].QNsize = Integer.parseInt(val);
                continue;
            }
            if (key.equals("featureFormat")) {
                myFlags[col].featureFormat = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("significantColumnId")) {
                myFlags[col].significantColumnId = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("justify")) {
                myFlags[col].justify = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("realValued")) {
                myFlags[col].isRealValued = Boolean.parseBoolean(val);
                myUsesRealValues = myUsesRealValues || myFlags[col].isRealValued;
                continue;
            }
            if (key.equals("logTransform")) {
                myFlags[col].logTransform = Boolean.parseBoolean(val);
                myUsesRealValues = myUsesRealValues || myFlags[col].logTransform;
                continue;
            }
            if (key.equals("logitTransform")) {
                myFlags[col].logitTransform = Boolean.parseBoolean(val);
                myUsesRealValues = myUsesRealValues || myFlags[col].logitTransform;
                continue;
            }
            if (key.equals("sqrtTransform")) {
                myFlags[col].sqrtTransform = Boolean.parseBoolean(val);
                myUsesRealValues = myUsesRealValues || myFlags[col].sqrtTransform;
                continue;
            }
            if (key.equals("filename")) {
                myFlags[col].filename = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("biased")) {
                myFlags[col].biased = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("biasedHyperplane")) {
                if (val == null || val.trim().length() <= 0) continue;
                String[] bits = val.split("[, ]+");
                myFlags[col].biasedHyperplane = new ClassicCounter();
                for (int i = 0; i < bits.length; i += 2) {
                    myFlags[col].biasedHyperplane.setCount(bits[i], Double.parseDouble(bits[i + 1]));
                }
                continue;
            }
            if (key.equals("crossValidationFolds")) {
                myFlags[col].crossValidationFolds = Integer.parseInt(val);
                continue;
            }
            if (key.equals("shuffleTrainingData")) {
                myFlags[col].shuffleTrainingData = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("shuffleSeed")) {
                myFlags[col].shuffleSeed = Long.parseLong(val);
                continue;
            }
            if (key.equals("csvFormat")) {
                Flags cfr_ignored_0 = myFlags[col];
                Flags.csvFormat = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("splitWordsWithPTBTokenizer")) {
                System.out.println("splitting with ptb tokenizer");
                myFlags[col].splitWordsWithPTBTokenizer = Boolean.parseBoolean(val);
                continue;
            }
            if (key.equals("showTokenization")) {
                myFlags[col].showTokenization = Boolean.parseBoolean(val);
                continue;
            }
            if (key.isEmpty() || key.equals("prop")) continue;
            System.err.println("Unknown property: |" + key + '|');
        }
        myFlags[0].usesRealValues = myUsesRealValues;
        return myFlags;
    }

    public ColumnDataClassifier(String filename) {
        this(StringUtils.propFileToProperties(filename));
    }

    public ColumnDataClassifier(Properties props) {
        this.flags = this.setProperties(props);
        this.globalFlags = this.flags[0];
    }

    public static void main(String[] args) throws IOException {
        System.err.println(StringUtils.toInvocationString("ColumnDataClassifier", args));
        ColumnDataClassifier cdc = new ColumnDataClassifier(StringUtils.argsToProperties(args));
        String testFile = cdc.globalFlags.testFile;
        if (testFile == null && Flags.serializeTo == null && cdc.globalFlags.crossValidationFolds < 2 || Flags.trainFile == null && cdc.globalFlags.loadClassifier == null) {
            System.err.println("usage: java edu.stanford.nlp.classify.ColumnDataClassifier -prop propFile");
            System.err.println("  and/or: -trainFile trainFile -testFile testFile|-serializeTo modelFile [-useNGrams|-sigma sigma|...]");
            return;
        }
        if (cdc.globalFlags.loadClassifier == null && !cdc.trainClassifier()) {
            return;
        }
        if (testFile != null) {
            cdc.testClassifier(testFile);
        }
    }

    private boolean trainClassifier() throws IOException {
        int i;
        Pair<GeneralDataset<String, String>, List<String[]>> dataInfo = this.readAndReturnTrainingExamples(Flags.trainFile);
        GeneralDataset<String, String> train = dataInfo.first();
        List<String[]> lineInfos = dataInfo.second();
        if (this.globalFlags.shuffleTrainingData) {
            long seed = this.globalFlags.shuffleSeed != 0L ? this.globalFlags.shuffleSeed : System.nanoTime();
            train.shuffleWithSideInformation(seed, lineInfos);
        }
        for (i = 0; i < this.flags.length; ++i) {
            if (this.flags[i] == null || this.flags[i].binnedValuesCounter == null) continue;
            System.err.println("BinnedValuesStatistics for column " + i);
            System.err.println(this.flags[i].binnedValuesCounter.toString());
        }
        for (i = 0; i < this.flags.length; ++i) {
            if (this.flags[i] == null || this.flags[i].binnedLengthsCounter == null) continue;
            System.err.println("BinnedLengthsStatistics for column " + i);
            System.err.println(this.flags[i].binnedLengthsCounter.toString());
        }
        if (Flags.printSVMLightFormatTo != null) {
            PrintWriter pw = IOUtils.getPrintWriter(Flags.printSVMLightFormatTo, Flags.encoding);
            train.printSVMLightFormat(pw);
            IOUtils.closeIgnoringExceptions(pw);
            train.featureIndex().saveToFilename(Flags.printSVMLightFormatTo + ".featureIndex");
            train.labelIndex().saveToFilename(Flags.printSVMLightFormatTo + ".labelIndex");
        }
        if (this.globalFlags.crossValidationFolds > 1) {
            this.crossValidate(train, lineInfos);
        }
        if (this.globalFlags.exitAfterTrainingFeaturization) {
            return false;
        }
        this.classifier = this.makeClassifier(train);
        this.printClassifier(this.classifier);
        String serializeTo = Flags.serializeTo;
        if (serializeTo != null) {
            System.err.println("Serializing classifier to " + serializeTo + "...");
            ObjectOutputStream oos = IOUtils.writeStreamFromString(serializeTo);
            oos.writeObject(this.classifier);
            String testFile = this.globalFlags.testFile;
            this.globalFlags.testFile = null;
            oos.writeObject(this.flags);
            this.globalFlags.testFile = testFile;
            oos.close();
            System.err.println("Done.");
        }
        return true;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void printClassifier(Classifier classifier) {
        String classString = classifier instanceof LinearClassifier ? ((LinearClassifier)classifier).toString(this.globalFlags.printClassifier, this.globalFlags.printClassifierParam) : classifier.toString();
        if (Flags.printTo != null) {
            PrintWriter fw = null;
            try {
                fw = IOUtils.getPrintWriter(Flags.printTo, Flags.encoding);
                fw.write(classString);
                fw.println();
            }
            catch (IOException ioe) {
                ioe.printStackTrace();
            }
            finally {
                IOUtils.closeIgnoringExceptions(fw);
            }
            System.err.println("Built classifier described in file " + Flags.printTo);
        } else {
            System.err.print("Built this classifier: ");
            System.err.println(classString);
        }
    }

    private void testClassifier(String testFile) {
        if (this.globalFlags.printFeatures != null) {
            ColumnDataClassifier.newFeaturePrinter(this.globalFlags.printFeatures, "test", Flags.encoding);
        }
        Pair<GeneralDataset<String, String>, List<String[]>> testInfo = this.readTestExamples(testFile);
        GeneralDataset<String, String> test = testInfo.first();
        List<String[]> lineInfos = testInfo.second();
        this.testExamples(this.classifier, test, lineInfos);
    }

    public Pair<Double, Double> crossValidate(GeneralDataset<String, String> dataset, List<String[]> lineInfos) {
        int numFolds = this.globalFlags.crossValidationFolds;
        double accuracySum = 0.0;
        double macroF1Sum = 0.0;
        for (int fold = 0; fold < numFolds; ++fold) {
            System.err.println();
            System.err.println("### Fold " + fold);
            Pair<GeneralDataset<String, String>, GeneralDataset<String, String>> split = dataset.splitOutFold(fold, numFolds);
            GeneralDataset<String, String> devTrain = split.first();
            GeneralDataset<String, String> devTest = split.second();
            Classifier<String, String> cl = this.makeClassifier(devTrain);
            this.printClassifier(cl);
            int normalFoldSize = lineInfos.size() / numFolds;
            int start = normalFoldSize * fold;
            int end = start + normalFoldSize;
            if (fold == numFolds - 1) {
                end = lineInfos.size();
            }
            List<String[]> devTestLineInfos = lineInfos.subList(start, end);
            Pair<Double, Double> accuracies = this.testExamples(cl, devTest, devTestLineInfos);
            accuracySum += accuracies.first().doubleValue();
            macroF1Sum += accuracies.second().doubleValue();
        }
        double averageAccuracy = accuracySum / (double)numFolds;
        double averageMacroF1 = macroF1Sum / (double)numFolds;
        DecimalFormat nf2 = new DecimalFormat("0.00000");
        System.err.println("Average accuracy/micro-averaged F1: " + nf2.format(averageAccuracy));
        System.err.println("Average macro-averaged F1: " + nf2.format(averageMacroF1));
        System.err.println();
        return new Pair<Double, Double>(averageAccuracy, averageMacroF1);
    }

    public String classOf(Datum<String, String> example) {
        if (this.classifier == null) {
            throw new RuntimeException("Classifier is not initialized");
        }
        return this.classifier.classOf(example);
    }

    static class Flags
    implements Serializable {
        private static final long serialVersionUID = -7076671761070232566L;
        boolean useNGrams = false;
        boolean usePrefixSuffixNGrams = false;
        boolean lowercaseNGrams = false;
        boolean lowercase;
        boolean useSplitNGrams = false;
        boolean useSplitPrefixSuffixNGrams = false;
        boolean cacheNGrams = false;
        int maxNGramLeng = -1;
        int minNGramLeng = 2;
        String partialNGramRegexp = null;
        Pattern partialNGramPattern = null;
        boolean useSum = false;
        double tolerance = 1.0E-4;
        String printFeatures = null;
        String printClassifier = null;
        int printClassifierParam = 100;
        boolean exitAfterTrainingFeaturization = false;
        boolean intern = false;
        Pattern splitWordsPattern = null;
        Pattern splitWordsTokenizerPattern = null;
        Pattern splitWordsIgnorePattern = Pattern.compile("\\s+");
        boolean useSplitWords = false;
        boolean useSplitWordPairs = false;
        boolean useSplitFirstLastWords = false;
        boolean useLowercaseSplitWords = false;
        int wordShape = -1;
        int splitWordShape = -1;
        boolean useString = false;
        boolean useClassFeature = false;
        int[] binnedLengths = null;
        TwoDimensionalCounter<String, String> binnedLengthsCounter = null;
        double[] binnedValues = null;
        TwoDimensionalCounter<String, String> binnedValuesCounter = null;
        double binnedValuesNaN = -1.0;
        boolean isRealValued = false;
        public static final String realValuedFeaturePrefix = "Value";
        boolean logitTransform = false;
        boolean logTransform = false;
        boolean sqrtTransform = false;
        char[] countChars = null;
        int[] countCharsBins = new int[]{0, 1};
        ClassicCounter<String> biasedHyperplane = null;
        boolean justify = false;
        boolean featureFormat = false;
        boolean significantColumnId = false;
        String useClassifierFactory;
        String classifierFactoryArgs;
        boolean useNB = false;
        boolean useQN = true;
        int QNsize = 15;
        int prior = LogPrior.LogPriorType.QUADRATIC.ordinal();
        double sigma = 1.0;
        double epsilon = 0.01;
        int featureMinimumSupport = 0;
        int displayedColumn = 1;
        int groupingColumn = -1;
        int rankingScoreColumn = -1;
        String rankingAccuracyClass = null;
        int goldAnswerColumn = 0;
        boolean biased;
        boolean useSplitWordNGrams = false;
        int maxWordNGramLeng = -1;
        int minWordNGramLeng = 1;
        boolean useBinary = false;
        double l1reg = 0.0;
        String wordNGramBoundaryRegexp;
        Pattern wordNGramBoundaryPattern;
        boolean useAdaptL1 = false;
        int limitFeatures = 0;
        String limitFeaturesLabels = null;
        double l1regmin = 0.0;
        double l1regmax = 500.0;
        double featureWeightThreshold = 0.0;
        String testFile = null;
        String loadClassifier = null;
        static String trainFile = null;
        static String serializeTo = null;
        static String printTo = null;
        static boolean trainFromSVMLight = false;
        static boolean testFromSVMLight = false;
        static String encoding = null;
        static String printSVMLightFormatTo;
        static boolean displayAllAnswers;
        boolean usesRealValues;
        boolean filename;
        boolean useAllSplitWordPairs;
        boolean useAllSplitWordTriples;
        boolean showTokenization = false;
        int crossValidationFolds = -1;
        boolean shuffleTrainingData = false;
        long shuffleSeed = 0L;
        static boolean csvFormat;
        boolean splitWordsWithPTBTokenizer = false;

        Flags() {
        }

        public String toString() {
            return "Flags[goldAnswerColumn = " + this.goldAnswerColumn + ", useString = " + this.useString + ", useNGrams = " + this.useNGrams + ", usePrefixSuffixNGrams = " + this.usePrefixSuffixNGrams + ']';
        }

        static {
            displayAllAnswers = false;
            csvFormat = false;
        }
    }
}

