/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.international.arabic.process;

import edu.stanford.nlp.ie.crf.CRFClassifier;
import edu.stanford.nlp.international.arabic.process.ArabicDocumentReaderAndWriter;
import edu.stanford.nlp.international.arabic.process.ArabicTokenizer;
import edu.stanford.nlp.international.arabic.process.IOBUtils;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.objectbank.ObjectBank;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.process.WordSegmenter;
import edu.stanford.nlp.sequences.DocumentReaderAndWriter;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.concurrent.MulticoreWrapper;
import edu.stanford.nlp.util.concurrent.ThreadsafeProcessor;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.Serializable;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Properties;

public class ArabicSegmenter
implements WordSegmenter,
Serializable,
ThreadsafeProcessor<String, String> {
    private static final long serialVersionUID = -4791848633597417788L;
    private final String optTokenized = "tokenized";
    private final String optTokenizer = "orthoOptions";
    private final String optPrefix = "prefixMarker";
    private final String optSuffix = "suffixMarker";
    private final String optThreads = "nthreads";
    private transient CRFClassifier<CoreLabel> classifier;
    private final SeqClassifierFlags flags;
    private final TokenizerFactory<CoreLabel> tf;
    private final String prefixMarker;
    private final String suffixMarker;
    private final boolean isTokenized;
    private final String tokenizerOptions;

    public ArabicSegmenter(Properties props) {
        this.isTokenized = props.containsKey("tokenized");
        this.tokenizerOptions = props.getProperty("orthoOptions", null);
        this.tf = this.getTokenizerFactory();
        this.prefixMarker = props.getProperty("prefixMarker", "");
        this.suffixMarker = props.getProperty("suffixMarker", "");
        props.remove("orthoOptions");
        props.remove("tokenized");
        props.remove("prefixMarker");
        props.remove("suffixMarker");
        props.remove("nthreads");
        props.put("featureFactory", "edu.stanford.nlp.international.arabic.process.ArabicSegmenterFeatureFactory");
        this.flags = new SeqClassifierFlags(props);
        this.classifier = new CRFClassifier(this.flags);
    }

    public ArabicSegmenter(ArabicSegmenter other) {
        this.isTokenized = other.isTokenized;
        this.tokenizerOptions = other.tokenizerOptions;
        this.prefixMarker = other.prefixMarker;
        this.suffixMarker = other.suffixMarker;
        this.flags = other.flags;
        this.tf = this.getTokenizerFactory();
        this.classifier = other.classifier;
    }

    private TokenizerFactory<CoreLabel> getTokenizerFactory() {
        TokenizerFactory<CoreLabel> tokFactory = null;
        if (!this.isTokenized) {
            if (this.tokenizerOptions == null) {
                tokFactory = ArabicTokenizer.atbFactory();
                String atbVocOptions = "removeProMarker,removeMorphMarker";
                tokFactory.setOptions(atbVocOptions);
            } else {
                if (this.tokenizerOptions.contains("removeSegMarker")) {
                    throw new RuntimeException("Option 'removeSegMarker' cannot be used with ArabicSegmenter");
                }
                tokFactory = ArabicTokenizer.factory();
                tokFactory.setOptions(this.tokenizerOptions);
            }
            System.err.println("Loaded ArabicTokenizer with options: " + this.tokenizerOptions);
        }
        return tokFactory;
    }

    @Override
    public void initializeTraining(double numTrees) {
        throw new UnsupportedOperationException("Training is not supported!");
    }

    @Override
    public void train(Collection<Tree> trees) {
        throw new UnsupportedOperationException("Training is not supported!");
    }

    @Override
    public void train(Tree tree) {
        throw new UnsupportedOperationException("Training is not supported!");
    }

    @Override
    public void train(List<TaggedWord> sentence) {
        throw new UnsupportedOperationException("Training is not supported!");
    }

    @Override
    public void finishTraining() {
        throw new UnsupportedOperationException("Training is not supported!");
    }

    @Override
    public String process(String nextInput) {
        return this.segmentString(nextInput);
    }

    @Override
    public ThreadsafeProcessor<String, String> newInstance() {
        return new ArabicSegmenter(this);
    }

    @Override
    public List<HasWord> segment(String line) {
        String segmentedString = this.segmentString(line);
        return Sentence.toWordList(segmentedString.split("\\s+"));
    }

    public String segmentString(String line) {
        List<CoreLabel> tokenList;
        if (this.tf == null) {
            tokenList = IOBUtils.StringToIOB(line);
        } else {
            List<CoreLabel> tokens = this.tf.getTokenizer(new StringReader(line)).tokenize();
            tokenList = IOBUtils.StringToIOB(tokens, null, false);
        }
        tokenList = this.classifier.classify(tokenList);
        String segmentedString = IOBUtils.IOBToString(tokenList, this.prefixMarker, this.suffixMarker);
        return segmentedString;
    }

    public long segment(BufferedReader br, PrintWriter pwOut) {
        long nSegmented = 0L;
        try {
            String line;
            while ((line = br.readLine()) != null) {
                nSegmented += (long)line.length();
                String segmentedLine = this.segmentString(line);
                pwOut.println(segmentedLine);
            }
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        return nSegmented;
    }

    public void train() {
        boolean hasSegmentationMarkers = true;
        boolean hasTags = true;
        ArabicDocumentReaderAndWriter docReader = new ArabicDocumentReaderAndWriter(hasSegmentationMarkers, hasTags, this.tf);
        ObjectBank lines = this.classifier.makeObjectBankFromFile(this.flags.trainFile, docReader);
        this.classifier.train(lines, (DocumentReaderAndWriter<CoreLabel>)docReader);
        System.err.println("Finished training.");
    }

    private void evaluate(PrintWriter pwOut) {
        System.err.println("Starting evaluation...");
        boolean hasSegmentationMarkers = true;
        boolean hasTags = true;
        ArabicDocumentReaderAndWriter docReader = new ArabicDocumentReaderAndWriter(hasSegmentationMarkers, hasTags, this.tf);
        ObjectBank<List<CoreLabel>> lines = this.classifier.makeObjectBankFromFile(this.flags.testFile, docReader);
        ClassicCounter<String> labelTotal = new ClassicCounter<String>();
        ClassicCounter<String> labelCorrect = new ClassicCounter<String>();
        int total = 0;
        int correct = 0;
        for (List<CoreLabel> line : lines) {
            line = this.classifier.classify(line);
            for (CoreLabel label : line) {
                String observation = (String)label.get(CoreAnnotations.CharAnnotation.class);
                if (observation.equals(IOBUtils.getBoundaryCharacter())) continue;
                ++total;
                String hypothesis = (String)label.get(CoreAnnotations.AnswerAnnotation.class);
                String reference = (String)label.get(CoreAnnotations.GoldAnswerAnnotation.class);
                labelTotal.incrementCount(reference);
                if (!hypothesis.equals(reference)) continue;
                ++correct;
                labelCorrect.incrementCount(reference);
            }
        }
        double accuracy = (double)correct / (double)total;
        accuracy *= 100.0;
        pwOut.println("EVALUATION RESULTS");
        pwOut.printf("#datums:\t%d%n", total);
        pwOut.printf("#correct:\t%d%n", correct);
        pwOut.printf("accuracy:\t%.2f%n", accuracy);
        pwOut.println("==================");
        pwOut.println("PER LABEL ACCURACIES");
        for (String refLabel : labelTotal.keySet()) {
            double nTotal = labelTotal.getCount(refLabel);
            double nCorrect = labelCorrect.getCount(refLabel);
            double acc = nCorrect / nTotal * 100.0;
            pwOut.printf(" %s\t%.2f%n", refLabel, acc);
        }
    }

    private void evaluateRawText(PrintWriter pwOut) {
        throw new RuntimeException("Not yet implemented!");
    }

    public void serializeSegmenter(String filename) {
        this.classifier.serializeClassifier(filename);
    }

    public void loadSegmenter(String filename, Properties p) {
        this.classifier = new CRFClassifier(p);
        try {
            this.classifier.loadClassifier(new File(filename), p);
        }
        catch (ClassCastException e) {
            e.printStackTrace();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        catch (ClassNotFoundException e) {
            e.printStackTrace();
        }
    }

    @Override
    public void loadSegmenter(String filename) {
        this.loadSegmenter(filename, new Properties());
    }

    private static String usage() {
        String nl = System.getProperty("line.separator");
        StringBuilder sb = new StringBuilder();
        sb.append("Usage: java ").append(ArabicSegmenter.class.getName()).append(" OPTS < file_to_segment").append(nl);
        sb.append(nl).append(" Options:").append(nl);
        sb.append("  -help                : Print this message.").append(nl);
        sb.append("  -orthoOptions str    : Comma-separated list of orthographic normalization options to pass to ArabicTokenizer.").append(nl);
        sb.append("  -tokenized           : Text is already tokenized. Do not run internal tokenizer.").append(nl);
        sb.append("  -trainFile file      : Gold segmented IOB training file.").append(nl);
        sb.append("  -testFile  file      : Gold segmented IOB evaluation file.").append(nl);
        sb.append("  -textFile  file      : Raw input file to be segmented.").append(nl);
        sb.append("  -loadClassifier file : Load serialized classifier from file.").append(nl);
        sb.append("  -prefixMarker char   : Mark segmented prefixes with specified character.").append(nl);
        sb.append("  -suffixMarker char   : Mark segmented suffixes with specified character.").append(nl);
        sb.append("  -nthreads num        : Number of threads  (default: 1)").append(nl);
        sb.append(nl).append(" Otherwise, all flags correspond to those present in SeqClassifierFlags.java.").append(nl);
        return sb.toString();
    }

    private static Map<String, Integer> optionArgDefs() {
        Map<String, Integer> optionArgDefs = Generics.newHashMap();
        optionArgDefs.put("help", 0);
        optionArgDefs.put("orthoOptions", 1);
        optionArgDefs.put("tokenized", 0);
        optionArgDefs.put("trainFile", 1);
        optionArgDefs.put("testFile", 1);
        optionArgDefs.put("textFile", 1);
        optionArgDefs.put("loadClassifier", 1);
        optionArgDefs.put("prefixMarker", 1);
        optionArgDefs.put("suffixMarker", 1);
        optionArgDefs.put("nthreads", 1);
        return optionArgDefs;
    }

    public static void main(String[] args) {
        Properties options = StringUtils.argsToProperties(args, ArabicSegmenter.optionArgDefs());
        if (options.containsKey("help") || args.length == 0) {
            System.err.println(ArabicSegmenter.usage());
            System.exit(-1);
        }
        int nThreads = PropertiesUtils.getInt(options, "nthreads", 1);
        ArabicSegmenter segmenter = ArabicSegmenter.getSegmenter(options);
        try {
            PrintWriter pwOut = new PrintWriter(System.out, true);
            if (segmenter.flags.testFile != null) {
                if (segmenter.flags.answerFile == null) {
                    segmenter.evaluate(pwOut);
                } else {
                    segmenter.evaluateRawText(pwOut);
                }
            } else {
                BufferedReader br = segmenter.flags.textFile == null ? new BufferedReader(new InputStreamReader(System.in)) : new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(segmenter.flags.textFile), segmenter.flags.inputEncoding));
                double charsPerSec = ArabicSegmenter.decode(segmenter, br, pwOut, nThreads);
                IOUtils.closeIgnoringExceptions(br);
                System.err.printf("Done! Processed input text at %.2f input characters/second%n", charsPerSec);
            }
        }
        catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        catch (FileNotFoundException e) {
            System.err.printf("%s: Could not open %s%n", ArabicSegmenter.class.getName(), segmenter.flags.textFile);
        }
    }

    private static double decode(ArabicSegmenter segmenter, BufferedReader br, PrintWriter pwOut, int nThreads) {
        assert (nThreads > 0);
        long nChars = 0L;
        long startTime = System.nanoTime();
        if (nThreads > 1) {
            MulticoreWrapper<String, String> wrapper = new MulticoreWrapper<String, String>(nThreads, segmenter);
            try {
                String line;
                while ((line = br.readLine()) != null) {
                    nChars += (long)line.length();
                    wrapper.put(line);
                    while (wrapper.peek()) {
                        pwOut.println(wrapper.poll());
                    }
                }
                wrapper.join();
                while (wrapper.peek()) {
                    pwOut.println(wrapper.poll());
                }
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        } else {
            nChars = segmenter.segment(br, pwOut);
        }
        long duration = System.nanoTime() - startTime;
        double charsPerSec = (double)nChars / ((double)duration / 1.0E9);
        return charsPerSec;
    }

    private static ArabicSegmenter getSegmenter(Properties options) {
        ArabicSegmenter segmenter = new ArabicSegmenter(options);
        if (segmenter.flags.inputEncoding == null) {
            segmenter.flags.inputEncoding = System.getProperty("file.encoding");
        }
        if (segmenter.flags.loadClassifier != null) {
            segmenter.loadSegmenter(segmenter.flags.loadClassifier, options);
        } else if (segmenter.flags.trainFile != null) {
            segmenter.train();
            if (segmenter.flags.serializeTo != null) {
                segmenter.serializeSegmenter(segmenter.flags.serializeTo);
                System.err.println("Serialized segmenter to: " + segmenter.flags.serializeTo);
            }
        } else {
            System.err.println("No training file or trained model specified!");
            System.err.println(ArabicSegmenter.usage());
            System.exit(-1);
        }
        return segmenter;
    }
}

