Objective-C
Efficiently Process a Huge XML File

Demonstrates a technique for processing a huge XML file (can be any size, even many gigabytes).
Note: This example requires Chilkat v9.5.0.80 or greater.
Chilkat Objective-C Downloads

Objective-C
#import <CkoFileAccess.h>
#import <CkoXml.h>
#import <CkoStringBuilder.h>
#import <NSString.h>

BOOL success = NO;

//  This example shows a way to efficiently process a gigantic XML file -- one that may be too large
//  to fit in memory.  
//  
//  Two types of XML parsers exist: DOM parsers and SAX parsers.

//  A DOM parser is a Document Object Model parser, where the entire XML is loaded into memory
//  and the application has the luxury of interacting with the XML in a convenient, random-access
//  way.  The Chilkat Xml class is a DOM parser.  Because the entire XML is loaded into memory,
//  huge XML files (on the order of gigabytes) are usually not loadable for memory constraints.

//  A SAX parser is such that the XML file is parsed as an input stream.  No DOM exists.  
//  Using a SAX parser is generally less palatable than using a DOM parser, for many reasons.
//  
//  The technique described here is a hybrid.  It streams the XML file as unstructured text
//  to extract fragments that are individually treated as separate XML documents loaded into
//  the Chilkat Xml parser.
//  
//  For example, imagine your XML file is several GBs in size, but has a relatively simple structure, such as:
//  
//  <Transactions>
//      <Transaction id="1">
//           ...
//      </Transaction>
//      <Transaction id="2">
//           ...
//      </Transaction>
//      <Transaction id="3">
//           ...
//      </Transaction>
//  ...
//  </Transactions>

//  In the following code, each <Transaction ...> ... </Transaction>
//  is extracted and loaded separately into an Xml object, where it can be manipulated
//  independently.  The entire XML file is never entirely loaded into memory.

CkoFileAccess *fac = [[CkoFileAccess alloc] init];

success = [fac OpenForRead: @"qa_data/xml/transactions.xml"];
if (success == NO) {
    NSLog(@"%@",fac.LastErrorText);
    return;
}

CkoXml *xml = [[CkoXml alloc] init];
CkoStringBuilder *sb = [[CkoStringBuilder alloc] init];
BOOL firstIteration = YES;
int retval = 1;
int numTransactions = 0;

//  The begin marker is "XML tag aware".  If the begin marker begins with "<"
//  and ends with ">", then it is assumed to be an XML tag and it will also match
//  substrings where the ">" can be a whitespace char.
NSString *beginMarker = @"<Transaction>";
NSString *endMarker = @"</Transaction>";

while (retval == 1) {
    [sb Clear];
    //  The retval can have the following values:
    //  0: No more fragments exist.
    //  1: Captured the next fragment.  The text from beginMarker to endMarker, including the markers, are returned in sb.
    //  -1: Error.
    retval = [[fac ReadNextFragment: firstIteration beginMarker: beginMarker endMarker: endMarker charset: @"utf-8" sb: sb] intValue];
    firstIteration = NO;

    if (retval == 1) {
        numTransactions = numTransactions + 1;
        success = [xml LoadSb: sb autoTrim: YES];
        //  Your application may now do what it needs with this particular XML fragment...
    }

}

if (retval < 0) {
    NSLog(@"%@",fac.LastErrorText);
}

NSLog(@"%@%d",@"numTransactions: ",numTransactions);