Wiki
Clone wikiAspose for Apache POI / Convert-Word-Doc-to-Other-Formats
Apache POI WP
HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc(new FileInputStream("data/document.doc")); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter( DocumentBuilderFactory.newInstance().newDocumentBuilder() .newDocument()); wordToHtmlConverter.processDocument(wordDocument); Document htmlDocument = wordToHtmlConverter.getDocument(); ByteArrayOutputStream out = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(out); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); out.close(); FileOutputStream outputStream = new FileOutputStream("data/ApacheDocToHTML.html"); outputStream.write(out.toByteArray()); outputStream.close();
Aspose.Words
// Load the document from disk. Document doc = new Document("data/document.doc"); doc.save("data/html/AsposeDocToHTML.html",SaveFormat.HTML); //Save the document in HTML format. doc.save("data/AsposeDocToPDF.pdf",SaveFormat.PDF); //Save the document in PDF format. doc.save("data/AsposeDocToTxt.txt",SaveFormat.TEXT); //Save the document in TXT format. doc.save("data/AsposeDocToJPG.jpg",SaveFormat.JPEG); //Save the document in JPEG format.
Download Source Code
Updated