Unicode C
XML Path Performance Optimizations

Discusses some important things to know about using Chilkat paths in the Chilkat XML API.
Chilkat Unicode C Downloads

Unicode C
#include <C_CkXmlW.h>

void ChilkatSample(void)
    {
    BOOL success;
    HCkXmlW xml;
    int licCount;
    const wchar_t *s;
    int i;

    success = FALSE;

    xml = CkXmlW_Create();

    //  Let's load XML containing the following:

    //  <?xml version="1.0" encoding="utf-8"?>
    //  <xyz>
    //      <licenses>
    //          <license>
    //              <id>1234</id>
    //          </license>
    //          <license>
    //              <id>1234</id>
    //          </license>
    //  ...
    //  My sample XML contains 64,000 "license" nodes ..
    //  ...
    //          <license>
    //              <id>1234</id>
    //          </license>
    //          <license>
    //              <id>1234</id>
    //          </license>
    //      </licenses>
    //  </xyz>
    //  
    success = CkXmlW_LoadXmlFile(xml,L"qa_output/large.xml");
    if (success != TRUE) {
        wprintf(L"%s\n",CkXmlW_lastErrorText(xml));
        CkXmlW_Dispose(xml);
        return;
    }

    //  Iterating over the individual "license" nodes with this code snippet is
    //  extremely slow:
    licCount = CkXmlW_NumChildrenHavingTag(xml,L"licenses|license");
    wprintf(L"license count = %d\n",licCount);

    i = 0;
    //  If "10" is changed to licCount, then it becomes apparent that this loop gets slower with each iteration.
    while (i < 10) {
        CkXmlW_putI(xml,i);
        s = CkXmlW_getChildContent(xml,L"licenses|license[i]|id");
        wprintf(L"%d: %s\n",i,s);
        i = i + 1;
    }

    //  The reason it is extremely slow is that the "license[i]" part of the path passed to GetChildContent
    //  says: find the i'th child of "licenses" having the tag "license".  Chilkat cannot assume that all
    //  children of an XML node have the same tag.  Therefore it's not possible to directly access the i'th child.
    //  Internally, Chilkat must start at the 1st child and iterate until it reaches the i'th child having the
    //  tag "license".

    //  For example, imagine if the XML was like this:

    //  <?xml version="1.0" encoding="utf-8"?>
    //  <xyz>
    //      <licenses>
    //          <license>
    //              <id>1234</id>
    //          </license>
    //          <somethingElse>
    //              <a>abc</a>
    //          </somethingElse>
    //          <license>
    //              <id>1234</id>
    //          </license>
    //  ...

    //  In the above XML, the 1st "license" is the 1st child of "licenses", but the 2nd "license"
    //  is the 3rd child of "licenses".

    //  If you already know that all children have the same tag, there is a shortcut that allows
    //  for direct access to that child.  Just leave off the tag name, like this:

    i = 0;
    //  If "10" is changed to licCount, then we can see the time for each loop is the same, and it's fast.
    while (i < 10) {
        CkXmlW_putI(xml,i);
        s = CkXmlW_getChildContent(xml,L"licenses|[i]|id");
        wprintf(L"%d: %s\n",i,s);
        i = i + 1;
    }

    //  When we pass just the index "[i]", we're saying: Get the i'th child regardless of tag.
    //  This is extremely fast because internally we can just access the i'th child directly.

    //  Another performance improvement is to call NumChildrenAt rather than NumChildrenHavingTag.
    //  For example:
    licCount = CkXmlW_NumChildrenAt(xml,L"licenses");
    wprintf(L"licCount = %d\n",licCount);

    //  NumChildrenAt returns the total number of children at the tag path.  If we already know
    //  all children will have the same tag, we can just get the count.


    CkXmlW_Dispose(xml);

    }