package cc.mallet.extract.test;

import cc.mallet.extract.BIOTokenizationFilter;
import cc.mallet.extract.DefaultTokenizationFilter;
import cc.mallet.extract.DocumentExtraction;
import cc.mallet.extract.HierarchicalTokenizationFilter;
import cc.mallet.extract.LabeledSpan;
import cc.mallet.extract.LabeledSpans;
import cc.mallet.extract.StringTokenization;
import cc.mallet.extract.Tokenization;
import cc.mallet.types.Label;
import cc.mallet.types.LabelAlphabet;
import cc.mallet.types.LabelSequence;
import cc.mallet.util.CharSequenceLexer;
import java.util.regex.Pattern;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
import junit.textui.TestRunner;

/* loaded from: input_file:WEB-INF/lib/mallet-2.0.7.jar:cc/mallet/extract/test/TestDocumentExtraction.class */
public class TestDocumentExtraction extends TestCase {
    public TestDocumentExtraction(String str) {
        super(str);
    }

    public static Test suite() {
        return new TestSuite((Class<?>) TestDocumentExtraction.class);
    }

    public void testToXml() {
        LabelAlphabet labelAlphabet = new LabelAlphabet();
        StringTokenization stringTokenization = new StringTokenization("the quick brown fox leapt over the lazy dog", new CharSequenceLexer());
        Label lookupLabel = labelAlphabet.lookupLabel("O");
        Label lookupLabel2 = labelAlphabet.lookupLabel("ANIMAL");
        assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<doc>the <ANIMAL>quick brown fox </ANIMAL><VERB>leapt </VERB>over the <ANIMAL>lazy dog</ANIMAL></doc>\r\n", new DocumentExtraction("Test", labelAlphabet, stringTokenization, new LabelSequence(new Label[]{lookupLabel, lookupLabel2, lookupLabel2, lookupLabel2, labelAlphabet.lookupLabel("VERB"), lookupLabel, lookupLabel, lookupLabel2, lookupLabel2}), "O").toXmlString());
    }

    public void testToXmlBIO() {
        LabelAlphabet labelAlphabet = new LabelAlphabet();
        StringTokenization stringTokenization = new StringTokenization("the quick brown fox leapt over the lazy dog", new CharSequenceLexer());
        Label lookupLabel = labelAlphabet.lookupLabel("O");
        Label lookupLabel2 = labelAlphabet.lookupLabel("B-ANIMAL");
        Label lookupLabel3 = labelAlphabet.lookupLabel("ANIMAL");
        assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<doc>the <ANIMAL>quick brown </ANIMAL><ANIMAL>fox </ANIMAL><VERB>leapt over </VERB>the <ANIMAL>lazy dog</ANIMAL></doc>\r\n", new DocumentExtraction("Test", labelAlphabet, stringTokenization, new LabelSequence(new Label[]{lookupLabel, lookupLabel2, lookupLabel3, lookupLabel2, labelAlphabet.lookupLabel("B-VERB"), labelAlphabet.lookupLabel("I-VERB"), lookupLabel, lookupLabel3, lookupLabel3}), null, "O", new BIOTokenizationFilter()).toXmlString());
    }

    public void testNestedToXML() {
        LabelAlphabet labelAlphabet = new LabelAlphabet();
        StringTokenization stringTokenization = new StringTokenization("the quick brown fox leapt over the lazy dog", new CharSequenceLexer());
        Label lookupLabel = labelAlphabet.lookupLabel("O");
        Label lookupLabel2 = labelAlphabet.lookupLabel("ANIMAL");
        Label lookupLabel3 = labelAlphabet.lookupLabel("VERB");
        Label lookupLabel4 = labelAlphabet.lookupLabel("ADJ");
        Label lookupLabel5 = labelAlphabet.lookupLabel("MAMMAL");
        LabeledSpans constructLabeledSpans = new DefaultTokenizationFilter().constructLabeledSpans(labelAlphabet, "the quick brown fox leapt over the lazy dog", lookupLabel, stringTokenization, new LabelSequence(new Label[]{lookupLabel, lookupLabel2, lookupLabel2, lookupLabel2, lookupLabel3, lookupLabel, lookupLabel2, lookupLabel2, lookupLabel2}));
        constructLabeledSpans.add(new LabeledSpan(stringTokenization.subspan(3, 4), lookupLabel5, false));
        constructLabeledSpans.add(new LabeledSpan(stringTokenization.subspan(7, 8), lookupLabel4, false));
        assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the <ADJ>lazy </ADJ>dog</ANIMAL></doc>\r\n", new DocumentExtraction("Test", labelAlphabet, (Tokenization) stringTokenization, constructLabeledSpans, (LabeledSpans) null, "O").toXmlString());
    }

    public void testNestedXMLTokenizationFilter() {
        LabelAlphabet labelAlphabet = new LabelAlphabet();
        StringTokenization stringTokenization = new StringTokenization("the quick brown fox leapt over the lazy dog", new CharSequenceLexer());
        Label lookupLabel = labelAlphabet.lookupLabel("O");
        Label lookupLabel2 = labelAlphabet.lookupLabel("ANIMAL");
        LabelSequence labelSequence = new LabelSequence(new Label[]{lookupLabel, lookupLabel2, lookupLabel2, labelAlphabet.lookupLabel("ANIMAL|MAMMAL"), labelAlphabet.lookupLabel("VERB"), lookupLabel, lookupLabel2, labelAlphabet.lookupLabel("ANIMAL|ADJ"), labelAlphabet.lookupLabel("ANIMAL|ADJ|MAMMAL")});
        assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the <ADJ>lazy <MAMMAL>dog</MAMMAL></ADJ></ANIMAL></doc>\r\n", new DocumentExtraction("Test", labelAlphabet, stringTokenization, labelSequence, null, "O", new HierarchicalTokenizationFilter()).toXmlString());
        assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the lazy <MAMMAL>dog</MAMMAL></ANIMAL></doc>\r\n", new DocumentExtraction("Test", labelAlphabet, stringTokenization, labelSequence, null, "O", new HierarchicalTokenizationFilter(Pattern.compile("AD.*"))).toXmlString());
    }

    public static void main(String[] strArr) throws Throwable {
        TestSuite testSuite;
        if (strArr.length > 0) {
            testSuite = new TestSuite();
            for (String str : strArr) {
                testSuite.addTest(new TestDocumentExtraction(str));
            }
        } else {
            testSuite = (TestSuite) suite();
        }
        TestRunner.run(testSuite);
    }
}
