Sample code for 30+ languages & platforms
DataFlex

Extract Metadata XML from PDF

See more PDF Signatures Examples

Demonstrates how to extract the metadata XML from a PDF.

Chilkat DataFlex Downloads

DataFlex
Use ChilkatAx-win32.pkg

Procedure Test
    Boolean iSuccess
    Handle hoPdf
    Variant vSbXml
    Handle hoSbXml
    Handle hoXml
    String sX_xmpmeta_xmlns_x
    String sX_xmpmeta_x_xmptk
    String sRdf_RDF_xmlns_rdf
    String sRdf_Description_rdf_about
    String sRdf_Description_xmlns_xmp
    String sRdf_Description_xmlns_dc
    String sRdf_Description_xmlns_xmpMM
    String sRdf_Description_xmlns_pdf
    String sRdf_Description_xmlns_xmpRights
    String sXmp_ModifyDate
    String sXmp_CreateDate
    String sXmp_MetadataDate
    String sXmp_CreatorTool
    String sDc_format
    String sRdf_li_xml_lang
    String sRdf_li
    Integer i
    Integer iCount_i
    String sXmpMM_DocumentID
    String sXmpMM_InstanceID
    String sPdf_Producer
    String sPdf_Keywords
    String sXmpRights_WebStatement
    String sTemp1

    Move False To iSuccess

    // This example requires the Chilkat API to have been previously unlocked.
    // See Global Unlock Sample for sample code.

    Get Create (RefClass(cComChilkatPdf)) To hoPdf
    If (Not(IsComObjectCreated(hoPdf))) Begin
        Send CreateComObject of hoPdf
    End

    Get ComLoadFile Of hoPdf "qa_data/pdf/blank_with_metadata.pdf" To iSuccess
    If (iSuccess = False) Begin
        Get ComLastErrorText Of hoPdf To sTemp1
        Showln sTemp1
        Procedure_Return
    End

    Get Create (RefClass(cComChilkatStringBuilder)) To hoSbXml
    If (Not(IsComObjectCreated(hoSbXml))) Begin
        Send CreateComObject of hoSbXml
    End
    // Note: Not all PDF files have metadata.  Metadata is optional.
    Get pvComObject of hoSbXml to vSbXml
    Get ComGetMetadata Of hoPdf vSbXml To iSuccess
    If (iSuccess = False) Begin
        Get ComLastErrorText Of hoPdf To sTemp1
        Showln sTemp1
        Procedure_Return
    End

    Get Create (RefClass(cComChilkatXml)) To hoXml
    If (Not(IsComObjectCreated(hoXml))) Begin
        Send CreateComObject of hoXml
    End
    Get pvComObject of hoSbXml to vSbXml
    Get ComLoadSb Of hoXml vSbXml True To iSuccess

    Get ComGetXml Of hoXml To sTemp1
    Showln sTemp1

    // Sample PDF metadata XML:
    // (Code for parsing follows)

    // Use this online tool to generate parsing code from sample XML: 
    // Generate Parsing Code from XML

    // <?xml version="1.0" encoding="utf-8"?>
    // <x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="Adobe XMP Core 9.1-c001 79.675d0f7, 2023/06/11-19:21:16        ">
    //     <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    //         <rdf:Description rdf:about="" xmlns:xmp="http://ns.adobe.com/xap/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xmpMM="http://ns.adobe.com/xap/1.0/mm/" xmlns:pdf="http://ns.adobe.com/pdf/1.3/" xmlns:xmpRights="http://ns.adobe.com/xap/1.0/rights/">
    //             <xmp:ModifyDate>2024-11-27T11:33:12-06:00</xmp:ModifyDate>
    //             <xmp:CreateDate>2024-11-27T11:28:23-06:00</xmp:CreateDate>
    //             <xmp:MetadataDate>2024-11-27T11:33:12-06:00</xmp:MetadataDate>
    //             <xmp:CreatorTool>Adobe Acrobat Pro (32-bit) 24.3.20112</xmp:CreatorTool>
    //             <dc:format>application/pdf</dc:format>
    //             <dc:title>
    //                 <rdf:Alt>
    //                     <rdf:li xml:lang="x-default">Blank</rdf:li>
    //                 </rdf:Alt>
    //             </dc:title>
    //             <dc:creator>
    //                 <rdf:Bag>
    //                     <rdf:li>Chilkat Software</rdf:li>
    //                 </rdf:Bag>
    //             </dc:creator>
    //             <dc:description>
    //                 <rdf:Alt>
    //                     <rdf:li xml:lang="x-default">Blank Document</rdf:li>
    //                 </rdf:Alt>
    //             </dc:description>
    //             <dc:subject>
    //                 <rdf:Bag>
    //                     <rdf:li>blank</rdf:li>
    //                     <rdf:li>metadata</rdf:li>
    //                     <rdf:li>document</rdf:li>
    //                 </rdf:Bag>
    //             </dc:subject>
    //             <xmpMM:DocumentID>uuid:34535ffa-b632-43f1-b1fd-80cea6fdc351</xmpMM:DocumentID>
    //             <xmpMM:InstanceID>uuid:69ace620-4c54-407f-8d45-6eebc90f34c2</xmpMM:InstanceID>
    //             <pdf:Producer>Adobe Acrobat Pro (32-bit) 24.3.20112</pdf:Producer>
    //             <pdf:Keywords>blank; metadata; document</pdf:Keywords>
    //             <xmpRights:WebStatement>https://www.chilkatsoft.com/</xmpRights:WebStatement>
    //         </rdf:Description>
    //     </rdf:RDF>
    // </x:xmpmeta>

    Get ComGetAttrValue Of hoXml "xmlns:x" To sX_xmpmeta_xmlns_x
    Get ComGetAttrValue Of hoXml "x:xmptk" To sX_xmpmeta_x_xmptk
    Get ComChilkatPath Of hoXml "rdf:RDF|(xmlns:rdf)" To sRdf_RDF_xmlns_rdf
    Get ComChilkatPath Of hoXml "rdf:RDF|rdf:Description|(rdf:about)" To sRdf_Description_rdf_about
    Get ComChilkatPath Of hoXml "rdf:RDF|rdf:Description|(xmlns:xmp)" To sRdf_Description_xmlns_xmp
    Get ComChilkatPath Of hoXml "rdf:RDF|rdf:Description|(xmlns:dc)" To sRdf_Description_xmlns_dc
    Get ComChilkatPath Of hoXml "rdf:RDF|rdf:Description|(xmlns:xmpMM)" To sRdf_Description_xmlns_xmpMM
    Get ComChilkatPath Of hoXml "rdf:RDF|rdf:Description|(xmlns:pdf)" To sRdf_Description_xmlns_pdf
    Get ComChilkatPath Of hoXml "rdf:RDF|rdf:Description|(xmlns:xmpRights)" To sRdf_Description_xmlns_xmpRights
    Get ComGetChildContent Of hoXml "rdf:RDF|rdf:Description|xmp:ModifyDate" To sXmp_ModifyDate
    Get ComGetChildContent Of hoXml "rdf:RDF|rdf:Description|xmp:CreateDate" To sXmp_CreateDate
    Get ComGetChildContent Of hoXml "rdf:RDF|rdf:Description|xmp:MetadataDate" To sXmp_MetadataDate
    Get ComGetChildContent Of hoXml "rdf:RDF|rdf:Description|xmp:CreatorTool" To sXmp_CreatorTool
    Get ComGetChildContent Of hoXml "rdf:RDF|rdf:Description|dc:format" To sDc_format
    Get ComChilkatPath Of hoXml "rdf:RDF|rdf:Description|dc:title|rdf:Alt|rdf:li|(xml:lang)" To sRdf_li_xml_lang
    Get ComGetChildContent Of hoXml "rdf:RDF|rdf:Description|dc:title|rdf:Alt|rdf:li" To sRdf_li
    Get ComGetChildContent Of hoXml "rdf:RDF|rdf:Description|dc:creator|rdf:Bag|rdf:li" To sRdf_li
    Get ComChilkatPath Of hoXml "rdf:RDF|rdf:Description|dc:description|rdf:Alt|rdf:li|(xml:lang)" To sRdf_li_xml_lang
    Get ComGetChildContent Of hoXml "rdf:RDF|rdf:Description|dc:description|rdf:Alt|rdf:li" To sRdf_li
    Move 0 To i
    Get ComNumChildrenHavingTag Of hoXml "rdf:RDF|rdf:Description|dc:subject|rdf:Bag|rdf:li" To iCount_i
    While (i < iCount_i)
        Set ComI Of hoXml To i
        Get ComGetChildContent Of hoXml "rdf:RDF|rdf:Description|dc:subject|rdf:Bag|rdf:li[i]" To sRdf_li
        Move (i + 1) To i
    Loop

    Get ComGetChildContent Of hoXml "rdf:RDF|rdf:Description|xmpMM:DocumentID" To sXmpMM_DocumentID
    Get ComGetChildContent Of hoXml "rdf:RDF|rdf:Description|xmpMM:InstanceID" To sXmpMM_InstanceID
    Get ComGetChildContent Of hoXml "rdf:RDF|rdf:Description|pdf:Producer" To sPdf_Producer
    Get ComGetChildContent Of hoXml "rdf:RDF|rdf:Description|pdf:Keywords" To sPdf_Keywords
    Get ComGetChildContent Of hoXml "rdf:RDF|rdf:Description|xmpRights:WebStatement" To sXmpRights_WebStatement


End_Procedure