Extract Text From PDF

Here is an Example showing the extraction of text from a pdf file, using ddx. The extracted text will be in the destination file.

<!--- The ddx file --->
<cfset ddxfile = Expandpath("doc_text.ddx")>
<!--- The source pdf file --->
<cfset sourcefile1 = Expandpath("pdf-file1.pdf")>
<!--- The destination file --->
<cfset destinationfile = Expandpath("ddx_result_doc_text.xml")>

<cfset inputStruct=StructNew()>
<cfset inputStruct.Doc1="#sourcefile1#">

<cfset outputStruct=StructNew()>
<cfset outputStruct.Out1="#destinationfile#">

<cfpdf action="processddx" ddxfile="#ddxfile#" inputfiles="#inputStruct#" outputfiles="#outputStruct#" name="ddxVar">

<cfoutput>The ddx operation was #ddxVar.Out1#</cfoutput><br>

<cfif #ddxVar.Out1# eq "successful">
   <cffile action="read" file="#destinationfile#" variable="filedata">
<cfdump var="#filedata#">
</cfif>

You can download the files using the download button.

Populate Multiple PDF Forms and Merge

Example to populate multiple PDF forms and then merge them together...

<cfset sourceform1 = "#ExpandPath('expense_claim_form1.pdf')#">
<cfset sourceform2 = "#ExpandPath('expense_claim_form2.pdf')#">
<cfset resultfile1 = "#ExpandPath('result1.pdf')#">
<cfset resultfile2 = "#ExpandPath('result2.pdf')#">

<cfset flatfile1 = "#ExpandPath('flat1.pdf')#">
<cfset flatfile2 = "#ExpandPath('flat2.pdf')#">

<cfset finalresultfile = "#ExpandPath('finalresult.pdf')#">

<cfset myname1 = "source_test1">
<cfset myempcode1 = "source_code1">
<cfset mylocation1 = "source_loc1">
<cfset mymgrname1 = "source_mgr1">
<cfset mymgremail1 = "source1@email.com">
<cfset myname2 = "source_test2">

<cfset myempcode2 = "source_code2">
<cfset mylocation2 = "source_loc2">
<cfset mymgrname2 = "source_mgr2">
<cfset mymgremail2 = "source2@email.com">

<!--- Fill up the pdf form1 --->
<cfpdfform source= "#sourceform1#" action="populate" destination="#resultfile1#" overwrite="true">
<cfpdfsubform name="form1">
<cfpdfsubform name="expense">
<cfpdfformparam name="name" value="#myname1#">
<cfpdfformparam name="empcode" value="#myempcode1#">
<cfpdfformparam name="location" value="#mylocation1#">
<cfpdfformparam name="mgrname" value="#mymgrname1#">
<cfpdfformparam name="mgremail" value="#mymgremail1#">
</cfpdfsubform>
</cfpdfsubform>
</cfpdfform>


<!--- Fill up the pdf form2 --->
<cfpdfform source= "#sourceform2#" action="populate" destination="#resultfile2#" overwrite="true">
<cfpdfsubform name="form1">
<cfpdfsubform name="expense">
<cfpdfformparam name="name" value="#myname2#">
<cfpdfformparam name="empcode" value="#myempcode2#">
<cfpdfformparam name="location" value="#mylocation2#">
<cfpdfformparam name="mgrname" value="#mymgrname2#">
<cfpdfformparam name="mgremail" value="#mymgremail2#">
</cfpdfsubform>
</cfpdfsubform>
</cfpdfform>

<!--- Flatten the filled-up pdf forms first. --->
<cfpdf action="write" source="#resultfile1#" destination="#flatfile1#" flatten="yes" overwrite="true">
<cfpdf action="write" source="#resultfile2#" destination="#flatfile2#" flatten="yes" overwrite="true">

<!--- Now merge the flattened forms. --->
<cfpdf action="merge" source="#flatfile1#,#flatfile2#" destination="#finalresultfile#" overwrite="true">

<cfoutput>Done...</cfoutput>

cfpdf merge example

Here is an example to merge pages from a single source pdf file... You can download the entire example(including the source pdf and cfm) from the download button.

<cfset sourcefile = ExpandPath('source_file1.pdf')>
<cfset destfile = ExpandPath('result_file1.pdf')>

<!--- getinfo on the source file. --->
<cfpdf action=getinfo source="#sourcefile#" name="myVar">
<br><cfoutput>The total pages of the original source file = #myVar.TotalPages#</cfoutput><br><br>

<!--- perform the merge operation. merge 3 pages - 7, 10 and 11 into a single pdf file. --->
<cfpdf action=merge source="#sourcefile#" Pages="7, 10, 11" destination="#destfile#" overwrite="true">

<!--- getinfo on the result file and verify that the total number of pages is 3. --->
<cfpdf action=getinfo source="#destfile#" name="myVar">
<cfoutput>The pages in the resultant merged file = #myVar.TotalPages#</cfoutput><br><br>

BlogCFC was created by Raymond Camden. This blog is running version 5.9.002. Contact Blog Owner