SQL Server
SQL Server
Efficiently Process a Huge XML File
See more XML Examples
Demonstrates a technique for processing a huge XML file (can be any size, even many gigabytes).Note: This example requires Chilkat v9.5.0.80 or greater.
Chilkat SQL Server Downloads
-- Important: See this note about string length limitations for strings returned by sp_OAMethod calls.
--
CREATE PROCEDURE ChilkatSample
AS
BEGIN
DECLARE @hr int
-- Important: Do not use nvarchar(max). See the warning about using nvarchar(max).
DECLARE @sTmp0 nvarchar(4000)
DECLARE @success int
SELECT @success = 0
-- This example shows a way to efficiently process a gigantic XML file -- one that may be too large
-- to fit in memory.
--
-- Two types of XML parsers exist: DOM parsers and SAX parsers.
-- A DOM parser is a Document Object Model parser, where the entire XML is loaded into memory
-- and the application has the luxury of interacting with the XML in a convenient, random-access
-- way. The Chilkat Xml class is a DOM parser. Because the entire XML is loaded into memory,
-- huge XML files (on the order of gigabytes) are usually not loadable for memory constraints.
-- A SAX parser is such that the XML file is parsed as an input stream. No DOM exists.
-- Using a SAX parser is generally less palatable than using a DOM parser, for many reasons.
--
-- The technique described here is a hybrid. It streams the XML file as unstructured text
-- to extract fragments that are individually treated as separate XML documents loaded into
-- the Chilkat Xml parser.
--
-- For example, imagine your XML file is several GBs in size, but has a relatively simple structure, such as:
--
-- <Transactions>
-- <Transaction id="1">
-- ...
-- </Transaction>
-- <Transaction id="2">
-- ...
-- </Transaction>
-- <Transaction id="3">
-- ...
-- </Transaction>
-- ...
-- </Transactions>
-- In the following code, each <Transaction ...> ... </Transaction>
-- is extracted and loaded separately into an Xml object, where it can be manipulated
-- independently. The entire XML file is never entirely loaded into memory.
DECLARE @fac int
EXEC @hr = sp_OACreate 'Chilkat.FileAccess', @fac OUT
IF @hr <> 0
BEGIN
PRINT 'Failed to create ActiveX component'
RETURN
END
EXEC sp_OAMethod @fac, 'OpenForRead', @success OUT, 'qa_data/xml/transactions.xml'
IF @success = 0
BEGIN
EXEC sp_OAGetProperty @fac, 'LastErrorText', @sTmp0 OUT
PRINT @sTmp0
EXEC @hr = sp_OADestroy @fac
RETURN
END
DECLARE @xml int
EXEC @hr = sp_OACreate 'Chilkat.Xml', @xml OUT
DECLARE @sb int
EXEC @hr = sp_OACreate 'Chilkat.StringBuilder', @sb OUT
DECLARE @firstIteration int
SELECT @firstIteration = 1
DECLARE @retval int
SELECT @retval = 1
DECLARE @numTransactions int
SELECT @numTransactions = 0
-- The begin marker is "XML tag aware". If the begin marker begins with "<"
-- and ends with ">", then it is assumed to be an XML tag and it will also match
-- substrings where the ">" can be a whitespace char.
DECLARE @beginMarker nvarchar(4000)
SELECT @beginMarker = '<Transaction>'
DECLARE @endMarker nvarchar(4000)
SELECT @endMarker = '</Transaction>'
WHILE @retval = 1
BEGIN
EXEC sp_OAMethod @sb, 'Clear', NULL
-- The retval can have the following values:
-- 0: No more fragments exist.
-- 1: Captured the next fragment. The text from beginMarker to endMarker, including the markers, are returned in sb.
-- -1: Error.
EXEC sp_OAMethod @fac, 'ReadNextFragment', @retval OUT, @firstIteration, @beginMarker, @endMarker, 'utf-8', @sb
SELECT @firstIteration = 0
IF @retval = 1
BEGIN
SELECT @numTransactions = @numTransactions + 1
EXEC sp_OAMethod @xml, 'LoadSb', @success OUT, @sb, 1
-- Your application may now do what it needs with this particular XML fragment...
END
END
IF @retval < 0
BEGIN
EXEC sp_OAGetProperty @fac, 'LastErrorText', @sTmp0 OUT
PRINT @sTmp0
END
PRINT 'numTransactions: ' + @numTransactions
EXEC @hr = sp_OADestroy @fac
EXEC @hr = sp_OADestroy @xml
EXEC @hr = sp_OADestroy @sb
END
GO