/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.international.arabic.process;

import edu.stanford.nlp.international.arabic.process.IOBUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.objectbank.IteratorFromReaderFactory;
import edu.stanford.nlp.objectbank.LineIterator;
import edu.stanford.nlp.objectbank.TokenizerFactory;
import edu.stanford.nlp.process.SerializableFunction;
import edu.stanford.nlp.sequences.DocumentReaderAndWriter;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;

public class ArabicDocumentReaderAndWriter
implements DocumentReaderAndWriter<CoreLabel> {
    private static final long serialVersionUID = 6730676681967976015L;
    private final IteratorFromReaderFactory<List<CoreLabel>> factory;
    private final TokenizerFactory<CoreLabel> tf;
    private static final Character DEFAULT_SEG_MARKER = Character.valueOf('-');
    private final Character segMarker;
    private final String tagDelimiter = "|||";
    private final boolean inputHasTags;

    public ArabicDocumentReaderAndWriter(boolean hasSegMarkers) {
        this(hasSegMarkers, null);
    }

    public ArabicDocumentReaderAndWriter(boolean hasSegMarkers, TokenizerFactory<CoreLabel> tokFactory) {
        this(hasSegMarkers, false, tokFactory);
    }

    public ArabicDocumentReaderAndWriter(boolean hasSegMarkers, boolean hasTags, TokenizerFactory<CoreLabel> tokFactory) {
        this.tf = tokFactory;
        this.inputHasTags = hasTags;
        this.segMarker = hasSegMarkers ? DEFAULT_SEG_MARKER : null;
        this.factory = LineIterator.getFactory(new SerializableFunction<String, List<CoreLabel>>(){
            private static final long serialVersionUID = 5243251505653686497L;

            @Override
            public List<CoreLabel> apply(String in) {
                if (ArabicDocumentReaderAndWriter.this.inputHasTags) {
                    String[] toks = in.split("\\s+");
                    ArrayList<CoreLabel> input = new ArrayList<CoreLabel>(toks.length);
                    String delim = Pattern.quote("|||");
                    for (String wordTag : toks) {
                        String[] wordTagPair = wordTag.split(delim);
                        assert (wordTagPair.length == 2);
                        CoreLabel cl = new CoreLabel();
                        String word = wordTagPair[0];
                        if (ArabicDocumentReaderAndWriter.this.tf != null) {
                            List lexList = ArabicDocumentReaderAndWriter.this.tf.getTokenizer(new StringReader(word)).tokenize();
                            if (lexList.size() == 0) continue;
                            if (lexList.size() > 1) {
                                System.err.printf("%s: Raw token generates multiple segments: %s%n", this.getClass().getName(), word);
                            }
                            word = ((CoreLabel)lexList.get(0)).value();
                        }
                        cl.setValue(word);
                        cl.setWord(word);
                        cl.setTag(wordTagPair[1]);
                        input.add(cl);
                    }
                    return IOBUtils.StringToIOB(input, ArabicDocumentReaderAndWriter.this.segMarker, true);
                }
                if (ArabicDocumentReaderAndWriter.this.tf == null) {
                    return IOBUtils.StringToIOB(in, ArabicDocumentReaderAndWriter.this.segMarker);
                }
                List<CoreLabel> line = ArabicDocumentReaderAndWriter.this.tf.getTokenizer(new StringReader(in)).tokenize();
                return IOBUtils.StringToIOB(line, ArabicDocumentReaderAndWriter.this.segMarker, false);
            }
        });
    }

    @Override
    public void init(SeqClassifierFlags flags) {
    }

    @Override
    public Iterator<List<CoreLabel>> getIterator(Reader r) {
        return this.factory.getIterator(r);
    }

    @Override
    public void printAnswers(List<CoreLabel> doc, PrintWriter pw) {
        pw.println("Answer\tGoldAnswer\tCharacter");
        for (CoreLabel word : doc) {
            pw.printf("%s\t%s\t%s%n", word.get(CoreAnnotations.AnswerAnnotation.class), word.get(CoreAnnotations.GoldAnswerAnnotation.class), word.get(CoreAnnotations.CharAnnotation.class));
        }
    }
}

