package de.dfki.km.perspecting.obie.model;

import cern.colt.Arrays;
import de.dfki.km.perspecting.obie.connection.KnowledgeBase;
import de.dfki.km.perspecting.obie.workflow.Pipeline;
import gnu.trove.TIntHashSet;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Logger;
import org.apache.commons.io.FileUtils;

/* loaded from: input_file:de/dfki/km/perspecting/obie/model/TextCorpus.class */
public class TextCorpus {
    protected File corpus;
    private static final String NEWLINE = "\n";
    private static final String SPACE = " ";
    private final Logger log = Logger.getLogger(TextCorpus.class.getName());

    public TextCorpus(File file) {
        this.corpus = file;
    }

    public List<?> forEach(DocumentProcedure<?> documentProcedure) throws Exception {
        ArrayList arrayList = new ArrayList();
        for (File file : getFiles(this.corpus)) {
            this.log.info("processing file: " + file.getName());
            FileReader fileReader = new FileReader(file);
            arrayList.add(documentProcedure.process(fileReader, file.getAbsolutePath()));
            fileReader.close();
        }
        return arrayList;
    }

    public LabeledTextCorpus labelRDFTypes(File file, KnowledgeBase knowledgeBase, final Pipeline pipeline, final String str, Set<Integer> set, final Map<Integer, Integer> map) throws Exception {
        final BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(file));
        forEach(new DocumentProcedure<String>() { // from class: de.dfki.km.perspecting.obie.model.TextCorpus.1
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // de.dfki.km.perspecting.obie.model.DocumentProcedure
            public String process(Reader reader, String str2) throws Exception {
                Document createDocument = pipeline.createDocument(new File(str2), str);
                int i = 0;
                while (true) {
                    int i2 = i;
                    if (!pipeline.hasNext(i2)) {
                        break;
                    }
                    i = pipeline.execute(i2, createDocument);
                }
                Iterator<Token> it = createDocument.iterator();
                while (it.hasNext()) {
                    Token next = it.next();
                    String token = next.toString();
                    String partOfSpeechTag = next.getPartOfSpeechTag();
                    String nounPhraseTag = next.getNounPhraseTag();
                    String str3 = "O";
                    TIntHashSet tIntHashSet = new TIntHashSet();
                    Iterator<Integer> it2 = next.getTypes(0.0d).iterator();
                    while (it2.hasNext()) {
                        tIntHashSet.add(((Integer) map.get(it2.next())).intValue());
                    }
                    if (tIntHashSet.size() == 1) {
                        str3 = nounPhraseTag.substring(0, 2) + Integer.toString(tIntHashSet.toArray()[0]);
                        TextCorpus.this.log.info("labeled " + Arrays.toString(tIntHashSet.toArray()) + " for " + next);
                    } else {
                        TextCorpus.this.log.info("skipped ambiguous labels " + Arrays.toString(tIntHashSet.toArray()) + " for " + next);
                    }
                    bufferedWriter.append((CharSequence) token);
                    bufferedWriter.append((CharSequence) TextCorpus.SPACE);
                    bufferedWriter.append((CharSequence) partOfSpeechTag);
                    bufferedWriter.append((CharSequence) TextCorpus.SPACE);
                    bufferedWriter.append((CharSequence) nounPhraseTag);
                    bufferedWriter.append((CharSequence) TextCorpus.SPACE);
                    bufferedWriter.append((CharSequence) str3);
                    bufferedWriter.newLine();
                }
                return str2;
            }
        });
        bufferedWriter.close();
        return new LabeledTextCorpus(file, this);
    }

    private Collection<File> getFiles(File file) {
        return FileUtils.listFiles(file, (String[]) null, false);
    }

    public File getCorpus() {
        return this.corpus;
    }
}
