Apache POI WP

HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc(new FileInputStream("data/document.doc"));

WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
        DocumentBuilderFactory.newInstance().newDocumentBuilder()
                .newDocument());
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out);

TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
out.close();

FileOutputStream outputStream = new FileOutputStream("data/ApacheDocToHTML.html");
outputStream.write(out.toByteArray());
outputStream.close();

Aspose.Words

// Load the document from disk.
Document doc = new Document("data/document.doc");

doc.save("data/html/AsposeDocToHTML.html",SaveFormat.HTML); //Save the document in HTML format.
doc.save("data/AsposeDocToPDF.pdf",SaveFormat.PDF); //Save the document in PDF format.
doc.save("data/AsposeDocToTxt.txt",SaveFormat.TEXT); //Save the document in TXT format.
doc.save("data/AsposeDocToJPG.jpg",SaveFormat.JPEG); //Save the document in JPEG format.

Download Source Code

Last edited Mar 26, 2014 at 7:12 AM by shoaibkhan, version 1