Unicode C
Unicode C
XML Path Performance Optimizations
See more XML Examples
Discusses some important things to know about using Chilkat paths in the Chilkat XML API.Chilkat Unicode C Downloads
#include <C_CkXmlW.h>
void ChilkatSample(void)
{
BOOL success;
HCkXmlW xml;
int licCount;
const wchar_t *s;
int i;
success = FALSE;
xml = CkXmlW_Create();
// Let's load XML containing the following:
// <?xml version="1.0" encoding="utf-8"?>
// <xyz>
// <licenses>
// <license>
// <id>1234</id>
// </license>
// <license>
// <id>1234</id>
// </license>
// ...
// My sample XML contains 64,000 "license" nodes ..
// ...
// <license>
// <id>1234</id>
// </license>
// <license>
// <id>1234</id>
// </license>
// </licenses>
// </xyz>
//
success = CkXmlW_LoadXmlFile(xml,L"qa_output/large.xml");
if (success != TRUE) {
wprintf(L"%s\n",CkXmlW_lastErrorText(xml));
CkXmlW_Dispose(xml);
return;
}
// Iterating over the individual "license" nodes with this code snippet is
// extremely slow:
licCount = CkXmlW_NumChildrenHavingTag(xml,L"licenses|license");
wprintf(L"license count = %d\n",licCount);
i = 0;
// If "10" is changed to licCount, then it becomes apparent that this loop gets slower with each iteration.
while (i < 10) {
CkXmlW_putI(xml,i);
s = CkXmlW_getChildContent(xml,L"licenses|license[i]|id");
wprintf(L"%d: %s\n",i,s);
i = i + 1;
}
// The reason it is extremely slow is that the "license[i]" part of the path passed to GetChildContent
// says: find the i'th child of "licenses" having the tag "license". Chilkat cannot assume that all
// children of an XML node have the same tag. Therefore it's not possible to directly access the i'th child.
// Internally, Chilkat must start at the 1st child and iterate until it reaches the i'th child having the
// tag "license".
// For example, imagine if the XML was like this:
// <?xml version="1.0" encoding="utf-8"?>
// <xyz>
// <licenses>
// <license>
// <id>1234</id>
// </license>
// <somethingElse>
// <a>abc</a>
// </somethingElse>
// <license>
// <id>1234</id>
// </license>
// ...
// In the above XML, the 1st "license" is the 1st child of "licenses", but the 2nd "license"
// is the 3rd child of "licenses".
// If you already know that all children have the same tag, there is a shortcut that allows
// for direct access to that child. Just leave off the tag name, like this:
i = 0;
// If "10" is changed to licCount, then we can see the time for each loop is the same, and it's fast.
while (i < 10) {
CkXmlW_putI(xml,i);
s = CkXmlW_getChildContent(xml,L"licenses|[i]|id");
wprintf(L"%d: %s\n",i,s);
i = i + 1;
}
// When we pass just the index "[i]", we're saying: Get the i'th child regardless of tag.
// This is extremely fast because internally we can just access the i'th child directly.
// Another performance improvement is to call NumChildrenAt rather than NumChildrenHavingTag.
// For example:
licCount = CkXmlW_NumChildrenAt(xml,L"licenses");
wprintf(L"licCount = %d\n",licCount);
// NumChildrenAt returns the total number of children at the tag path. If we already know
// all children will have the same tag, we can just get the count
CkXmlW_Dispose(xml);
}