Question:
I have a PDF document with multiple annotations that uses the same image. Each annotation contains a copy of the image, increasing the size of the PDF. How can I consolidate the duplicate images?
Answer:
You can post-process the PDF to go through all annotations and replace all image references with a pointer to the first one.
/*
* This code assumes every annotation reuses the same image.
* Replace all these image references with a pointer to the first one.
* When PDFDoc.Save is called all the extra/duplicate images will be discarded.
*
* TODO: Add your own heuristic/conditional to decide which ones to merge. E.g. pre-populated list of annotation IDs
*/
for (PageIterator pitr = doc.getPageIterator(); pitr.hasNext(); )
{
Page page = pitr.next();
int num_annots = page.getNumAnnots();
for (int i = 0; i < num_annots; ++i)
{
Annot annot = page.getAnnot(i);
if(!annot.isValid()) return;
Obj ap = annot.getAppearance();
if(ap == null) continue;
Obj objResources = ap.findObj("Resources");
if(objResources == null || !objResources.isDict()) continue;
Obj objXObjects = objResources.findObj("XObject");
if(objXObjects == null || !objXObjects.isDict()) continue;
DictIterator itr = objXObjects.getDictIterator();
for (; itr.hasNext(); itr.next()) {
// itr.key();
Obj objXobject = itr.value();
if(!objXobject.isIndirect()) continue;
Obj objsubType = objXobject.findObj("Subtype");
if(objsubType == null && !objsubType.isName()) continue;
if(!objsubType.getName().equals("Image")) continue;
long objNum = objXobject.getObjNum();
if(imageToReuseObjNum == 0)
{
// first image
imageToReuseObjNum = objNum;
System.out.println("First annot image " + imageToReuseObjNum);
}
else
{
System.out.println("Replacing reference to " + objNum + " with " + imageToReuseObjNum);
objXObjects.put(itr.key().getName(), sdfdoc.getObj(imageToReuseObjNum));
}
break;
}
}
}