Wiki

Clone wiki

Aspose for Apache POI / Extract-Images-from-Word-Document

Apache POI WP

HWPFDocument doc = new HWPFDocument(new FileInputStream(
        "data/document.doc"));
List<Picture> pics = doc.getPicturesTable().getAllPictures();

for (int i = 0; i < pics.size(); i++)
{
    Picture pic = (Picture) pics.get(i);

    FileOutputStream outputStream = new FileOutputStream(
            "data/apacheImages/" + "Apache_"
                    + pic.suggestFullFileName());
    outputStream.write(pic.getContent());
    outputStream.close();
}       

Aspose.Words

Document doc = new Document("data/document.doc");

NodeCollection shapes = doc.getChildNodes(NodeType.SHAPE, true);
int imageIndex = 0;
for (Shape shape : (Iterable<Shape>) shapes)
{
    if (shape.hasImage())
    {
        String imageFileName = java.text.MessageFormat.format(
                "Aspose.Images.{0}{1}", imageIndex, FileFormatUtil
                        .imageTypeToExtension(shape.getImageData()
                                .getImageType()));
        shape.getImageData().save("data/asposeImages/" + imageFileName);

        imageIndex++;
    }
}

Download Source Code

Updated