Q:
I am testing getting the tiffs for a lot of PDFs (+6K) and there is a
memory issue. After taking 11h8min to extract the tiffs for only 4253
PDFs it is generating a virtual memory issue. Please advice.
The current VB.NET code is as follows:
PDFNet.Initialize()
PDFNet.SetResourcesPath("../../../../resources")
Dim input_path As String = "C:\TestingPDF2Txt\"
Dim StartTime As DateTime
Dim EndTime As DateTime
Dim output_path As String = "../../../TestFiles/Output/"
Dim draw As PDFDraw = New PDFDraw
Try
' TIFF 300 DPI, G4 Compression, 1bpp, B/W Dithered
Dim strProcessingFolder As String
Dim strOutputFolder As String
Dim strPrefixOutputFolder As String
Dim strPDFFileName As String
StartTime = Now()
strProcessingFolder = "C:\Input\"
strPrefixOutputFolder = "C:\PDFTron\Output\"
' Get all files in the directory
Dim dirs As String() =
System.IO.Directory.GetFiles(strProcessingFolder)
Dim strPDFFileNameFullPath As String
Dim doc As PDFDoc
Dim encoder_param As SDF.Obj
Dim pend As PageIterator
Dim itr As PageIterator
Dim outname As String
Dim lngPDFFilesCounter As Long
draw.SetDPI(300)
lngPDFFilesCounter = 0
For Each strPDFFileNameFullPath In dirs
lngPDFFilesCounter = lngPDFFilesCounter + 1
strPDFFileName =
System.IO.Path.GetFileNameWithoutExtension(strPDFFileNameFullPath)
strOutputFolder = strPrefixOutputFolder & strPDFFileName & "\"
System.IO.Directory.CreateDirectory(strOutputFolder)
If Not doc Is Nothing Then
doc = Nothing
End If
doc = New PDFDoc(strProcessingFolder + strPDFFileName + ".pdf")
doc.InitSecurityHandler()
If Not encoder_param Is Nothing Then
encoder_param = Nothing
End If
encoder_param = PDFTRON.SDF.Obj.CreateDict()
encoder_param.Put("BPC", PDFTRON.SDF.Obj.CreateNumber(1))
If Not pend Is Nothing Then
pend = Nothing
End If
If Not itr Is Nothing Then
itr = Nothing
End If
pend = doc.PageEnd()
itr = doc.PageBegin()
While Not itr.Equals(pend)
outname = String.Format("{0}{1}{2:d}.tif", strOutputFolder,
strPDFFileName, itr.GetPageNumber())
draw.Export(itr.Current(), outname, "TIFF",
encoder_param)
itr.Next()
End While
Next
EndTime = Now()
Console.WriteLine(StartTime.ToString & " " & EndTime.ToString)
MsgBox(StartTime.ToString & " " & EndTime.ToString)
Console.WriteLine("Done")
Catch e As PDFNetException
Console.WriteLine(e.Message)
End Try
' Explicitly clean-up allocated memory
-----
A:
Under .NET, you need to call doc.Close() in order to make sure that
memory is released on time. You could call this method when document
is no ling in use (e.g. after draw.Dispose()):
' Explicitly clean-up allocated memory
draw.Dispose()
doc.Close()