OpenXML Filter: DOCX: extraction fails on replacing breaks in fields with paragraphs

Issue #1172 resolved
Denis Konovalyenko created an issue

Conditional parameter bPreferenceAddLineSeparatorAsCharacterhas to be set to true.

The following exception can be observed on extraction with an attached sample document:

java.lang.IllegalStateException: Unexpected structure

    at net.sf.okapi.filters.openxml.SkippableElements$Default.skip(SkippableElements.java:138)
    at net.sf.okapi.filters.openxml.RunParser.parseContent(RunParser.java:621)
    at net.sf.okapi.filters.openxml.RunParser.parseContentFrom(RunParser.java:524)
    at net.sf.okapi.filters.openxml.RunParser.parseComplexField(RunParser.java:406)
    at net.sf.okapi.filters.openxml.RunParser.startRunParsing(RunParser.java:237)
    at net.sf.okapi.filters.openxml.RunParser.parse(RunParser.java:161)
    at net.sf.okapi.filters.openxml.BlockParser.processRun(BlockParser.java:335)
    at net.sf.okapi.filters.openxml.BlockParser.parse(BlockParser.java:246)
    at net.sf.okapi.filters.openxml.StyledTextPart.process(StyledTextPart.java:280)
    at net.sf.okapi.filters.openxml.StyledTextPart.open(StyledTextPart.java:236)
    at net.sf.okapi.filters.openxml.StyledTextPart.open(StyledTextPart.java:130)
    at net.sf.okapi.filters.openxml.OpenXMLFilter.nextInDocument(OpenXMLFilter.java:444)
    at net.sf.okapi.filters.openxml.OpenXMLFilter.next(OpenXMLFilter.java:252)
    at net.sf.okapi.filters.openxml.OpenXMLFilter.next(OpenXMLFilter.java:261)

UI:

Document part:

    <w:p w14:paraId="273EE7B9" w14:textId="77777777" w:rsidR="002906B8" w:rsidRPr="002906B8"
         w:rsidRDefault="002906B8" w:rsidP="002906B8">
      <w:pPr>
        <w:pStyle w:val="NoSpacing"/>
      </w:pPr>
      <w:r w:rsidRPr="002906B8">
        <w:fldChar w:fldCharType="begin"/>
      </w:r>
      <w:r w:rsidRPr="002906B8">
        <w:instrText xml:space="preserve"> HYPERLINK "http://okapiframework.org/" \t "_blank" </w:instrText>
      </w:r>
      <w:r w:rsidRPr="002906B8">
        <w:fldChar w:fldCharType="separate"/>
      </w:r>
    </w:p>
    <w:p w14:paraId="334C5D4E" w14:textId="3325B7FC" w:rsidR="002906B8" w:rsidRPr="002906B8"
         w:rsidRDefault="00322FAF" w:rsidP="002906B8">
      <w:pPr>
        <w:pStyle w:val="NoSpacing"/>
      </w:pPr>
      <w:r>
        <w:t>A hyperlink</w:t>
      </w:r>
      <w:r w:rsidR="002906B8" w:rsidRPr="002906B8">
        <w:br/>
      </w:r>
      <w:r w:rsidR="002906B8" w:rsidRPr="002906B8">
        <w:br/>
        <w:t xml:space="preserve">with details</w:t>
      </w:r>
    </w:p>
    <w:p w14:paraId="1A04A42E" w14:textId="6DC6E614" w:rsidR="00C63EB3" w:rsidRDefault="002906B8"
         w:rsidP="00CF25CC">
      <w:pPr>
        <w:pStyle w:val="NoSpacing"/>
      </w:pPr>
      <w:r w:rsidRPr="002906B8">
        <w:fldChar w:fldCharType="end"/>
      </w:r>
      <w:bookmarkStart w:id="0" w:name="_GoBack"/>
      <w:bookmarkEnd w:id="0"/>
    </w:p>

Comments (4)

  1. Log in to comment