HTML5Filter generates incorrect "known" Code types: "a" instead of "link" (TYPE_LINK), etc.

Issue #746 new
Mihai Nita created an issue

They should be "link" (TYPE_LINK), or "bold" (TYPE_BOLD), "image" (TYPE_IMAGE) and so on.

See http://docs.oasis-open.org/xliff/v1.2/os/xliff-core.html#ctype, and the net.sf.okapi.common.resource.Code class.

The HtmlFilter filter produces the correct types. Running the code below gives this:

===== HtmlFilter =====
Text Unit : Click [#$dp1]here</a> for <b>more</b>!
    Code: { id:1 type:link tagType:OPENING data:[#$dp1] outerData:[#$dp1] }
    Code: { id:1 type:link tagType:CLOSING data:</a> outerData:</a> }
    Code: { id:2 type:bold tagType:OPENING data:<b> outerData:<b> }
    Code: { id:2 type:bold tagType:CLOSING data:</b> outerData:</b> }
===== HTML5Filter =====
Text Unit : Click <a href="foo.com">here</a> for <b>more</b>!
    Code: { id:1 type:a tagType:OPENING data:<a href="foo.com"> outerData:<a href="foo.com"> }
    Code: { id:1 type:a tagType:CLOSING data:</a> outerData:</a> }
    Code: { id:2 type:b tagType:OPENING data:<b> outerData:<b> }
    Code: { id:2 type:b tagType:CLOSING data:</b> outerData:</b> }

Code to reproduce the problem:

package com.mihnita.okapicodebug;

import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

import net.sf.okapi.common.Event;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.filters.IFilter;
import net.sf.okapi.common.resource.Code;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.RawDocument;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.filters.html.HtmlFilter;
import net.sf.okapi.filters.its.html5.HTML5Filter;

@RunWith(JUnit4.class)
@SuppressWarnings("static-method")
public class TestHtmlEvents {

    private static void dumpCode(Code code) {
        System.out.print("    Code: {");
        System.out.print(" id:" + code.getId());
        System.out.print(" type:" + code.getType());
        System.out.print(" tagType:" + code.getTagType());
        System.out.print(" data:" + code.getData());
        System.out.print(" outerData:" + code.getOuterData());
        System.out.print(" }\n");
    }

    public void testProper(IFilter filter, String testData) {
        try (RawDocument rawDoc = new RawDocument(testData, LocaleId.ENGLISH)) {
            filter.open(rawDoc, true);

            while (filter.hasNext()) {
                Event e = filter.next();
                if (e.isTextUnit()) {
                    System.out.println(e.toString() + " : " + e.getResource());
                    ITextUnit tu = e.getTextUnit();
                    TextFragment tf = tu.getSource().getUnSegmentedContentCopy();
                    for (Code code : tf.getCodes()) {
                        dumpCode(code);
                    }
                }
            }
        }
    }

    @Test
    public void test() {
        String testData = "<p>Click <a href='foo.com'>here</a> for <b>more</b>!</p>";
        try (IFilter filter = new HtmlFilter()) {
            System.out.println("===== HtmlFilter =====");
            testProper(filter, testData);
        }
        try (IFilter filter = new HTML5Filter()) {
            System.out.println("===== HTML5Filter =====");
            testProper(filter, testData);
        }
    }
}

Comments (0)

  1. Log in to comment