Wiki

Clone wiki

Aspose Java for Docx4j / Convert-to-Formats

Aspose.Words

// Load the document from disk.
Document doc = new Document("data/document.doc");

doc.save("data/html/Aspose_DocToHTML.html",SaveFormat.HTML); //Save the document in HTML format.
doc.save("data/Aspose_DocToPDF.pdf",SaveFormat.PDF); //Save the document in PDF format.
doc.save("data/Aspose_DocToTxt.txt",SaveFormat.TEXT); //Save the document in TXT format.
doc.save("data/Aspose_DocToJPG.jpg",SaveFormat.JPEG); //Save the document in JPEG format.

Docx4j

// Document loading (required)
WordprocessingMLPackage wordMLPackage;
if (inputfilepath==null) {
    // Create a docx
    System.out.println("No imput path passed, creating dummy document");
     wordMLPackage = WordprocessingMLPackage.createPackage();
    SampleDocument.createContent(wordMLPackage.getMainDocumentPart());  
} else {
    System.out.println("Loading file from " + inputfilepath);
    wordMLPackage = Docx4J.load(new java.io.File(inputfilepath));
}

// HTML exporter setup (required)
// .. the HTMLSettings object
HTMLSettings htmlSettings = Docx4J.createHTMLSettings();

htmlSettings.setImageDirPath(inputfilepath + "_files");
htmlSettings.setImageTargetUri(inputfilepath.substring(inputfilepath.lastIndexOf("/")+1)
        + "_files");
htmlSettings.setWmlPackage(wordMLPackage);


/* CSS reset, see http://itumbcom.blogspot.com.au/2013/06/css-reset-how-complex-it-should-be.html 
 * 
 * motivated by vertical space in tables in Firefox and Google Chrome.

    If you have unwanted vertical space, in Chrome this may be coming from -webkit-margin-before and -webkit-margin-after
    (in Firefox, margin-top is set to 1em in html.css)

    Setting margin: 0 on p is enough to fix it.

    See further http://www.css-101.org/articles/base-styles-sheet-for-webkit-based-browsers/        
*/
String userCSS = "html, body, div, span, h1, h2, h3, h4, h5, h6, p, a, img,  ol, ul, li, table, caption, tbody, tfoot, thead, tr, th, td " +
        "{ margin: 0; padding: 0; border: 0;}" +
        "body {line-height: 1;} ";
htmlSettings.setUserCSS(userCSS);

// output to an OutputStream.       
OutputStream os; 
os = new FileOutputStream(inputfilepath + ".html");

// If you want XHTML output
Docx4jProperties.setProperty("docx4j.Convert.Out.HTML.OutputMethodXML", true);

//Don't care what type of exporter you use
//      Docx4J.toHTML(htmlSettings, os, Docx4J.FLAG_NONE);
//Prefer the exporter, that uses a xsl transformation
Docx4J.toHTML(htmlSettings, os, Docx4J.FLAG_EXPORT_PREFER_XSL);
//Prefer the exporter, that doesn't use a xsl transformation (= uses a visitor)
//      Docx4J.toHTML(htmlSettings, os, Docx4J.FLAG_EXPORT_PREFER_NONXSL);

System.out.println("Saved: " + inputfilepath + ".html ");

Download Source Code

Updated