HTML5Filter generates incorrect "known" Code types: "a" instead of "link" (TYPE_LINK), etc.
Issue #746
new
They should be "link" (TYPE_LINK
), or "bold" (TYPE_BOLD
), "image" (TYPE_IMAGE
) and so on.
See http://docs.oasis-open.org/xliff/v1.2/os/xliff-core.html#ctype, and the net.sf.okapi.common.resource.Code
class.
The HtmlFilter
filter produces the correct types.
Running the code below gives this:
===== HtmlFilter =====
Text Unit : Click [#$dp1]here</a> for <b>more</b>!
Code: { id:1 type:link tagType:OPENING data:[#$dp1] outerData:[#$dp1] }
Code: { id:1 type:link tagType:CLOSING data:</a> outerData:</a> }
Code: { id:2 type:bold tagType:OPENING data:<b> outerData:<b> }
Code: { id:2 type:bold tagType:CLOSING data:</b> outerData:</b> }
===== HTML5Filter =====
Text Unit : Click <a href="foo.com">here</a> for <b>more</b>!
Code: { id:1 type:a tagType:OPENING data:<a href="foo.com"> outerData:<a href="foo.com"> }
Code: { id:1 type:a tagType:CLOSING data:</a> outerData:</a> }
Code: { id:2 type:b tagType:OPENING data:<b> outerData:<b> }
Code: { id:2 type:b tagType:CLOSING data:</b> outerData:</b> }
Code to reproduce the problem:
package com.mihnita.okapicodebug;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.filters.IFilter;
import net.sf.okapi.common.resource.Code;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.RawDocument;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.filters.html.HtmlFilter;
import net.sf.okapi.filters.its.html5.HTML5Filter;
@RunWith(JUnit4.class)
@SuppressWarnings("static-method")
public class TestHtmlEvents {
private static void dumpCode(Code code) {
System.out.print(" Code: {");
System.out.print(" id:" + code.getId());
System.out.print(" type:" + code.getType());
System.out.print(" tagType:" + code.getTagType());
System.out.print(" data:" + code.getData());
System.out.print(" outerData:" + code.getOuterData());
System.out.print(" }\n");
}
public void testProper(IFilter filter, String testData) {
try (RawDocument rawDoc = new RawDocument(testData, LocaleId.ENGLISH)) {
filter.open(rawDoc, true);
while (filter.hasNext()) {
Event e = filter.next();
if (e.isTextUnit()) {
System.out.println(e.toString() + " : " + e.getResource());
ITextUnit tu = e.getTextUnit();
TextFragment tf = tu.getSource().getUnSegmentedContentCopy();
for (Code code : tf.getCodes()) {
dumpCode(code);
}
}
}
}
}
@Test
public void test() {
String testData = "<p>Click <a href='foo.com'>here</a> for <b>more</b>!</p>";
try (IFilter filter = new HtmlFilter()) {
System.out.println("===== HtmlFilter =====");
testProper(filter, testData);
}
try (IFilter filter = new HTML5Filter()) {
System.out.println("===== HTML5Filter =====");
testProper(filter, testData);
}
}
}