I’ve used the following python code to iterate over the page resource dictionaries and collect all unique font objs. It then loops over them and gets the names, before doing a font swap if necessary.
def embedFonts(infile):
“”“Iterate over all pages and embed the fonts used.”""
print “embedding fonts…”
fontsToEmbed = {}
doc = PDFDoc(infile)
doc.InitSecurityHandler()
itr = doc.GetPageIterator()
while itr.HasNext():
page = itr.Current()
res = page.GetResourceDict()
if (res != None):
fonts = res.FindObj(“Font”)
if (fonts != None):
itr2 = fonts.GetDictIterator()
while itr2.HasNext():
fnt_dict = itr2.Value()
font = Font(fnt_dict)
if font.GetSDFObj().GetObjNum() not in fontsToEmbed.keys():
fontsToEmbed[font.GetSDFObj().GetObjNum()] = font
itr2.Next()
itr.Next()
timesRoman = Font.Create(doc.GetSDFDoc(), Font.e_times_roman, True)
#Helvetica = Font.CreateCIDTrueTypeFont(doc.GetSDFDoc(), “c:/windows/fonts/helvetica.ttf”, True, True)
courierNew = Font.Create(doc.GetSDFDoc(), Font.e_courier, True)
for f in fontsToEmbed.values():
if f.GetName().find(‘Times-Roman’) != -1:
doc.GetSDFDoc().Swap(timesRoman.GetSDFObj().GetObjNum(), f.GetSDFObj().GetObjNum())
elif f.GetName().find(‘TimesNewRomanPSMT’) != -1:
doc.GetSDFDoc().Swap(timesRoman.GetSDFObj().GetObjNum(), f.GetSDFObj().GetObjNum())
elif f.GetName() == ‘CourierNewPSMT’:
doc.GetSDFDoc().Swap(courierNew.GetSDFObj().GetObjNum(), f.GetSDFObj().GetObjNum())
elif f.GetName() == ‘Courier’:
doc.GetSDFDoc().Swap(courierNew.GetSDFObj().GetObjNum(), f.GetSDFObj().GetObjNum())
else:
print(“Unknown font: {0}”.format(f.GetName()))
doc.Save(infile, SDFDoc.e_remove_unused)
doc.Close()
Spencer Rathbun