SQL Server
SQL Server
HTML Table to CSV
See more HTML-to-XML/Text Examples
Demonstrates a method for converting an HTML table to a CSV file.Note: This example requires Chilkat v9.5.0.77 or greater.
Chilkat SQL Server Downloads
-- Important: See this note about string length limitations for strings returned by sp_OAMethod calls.
--
CREATE PROCEDURE ChilkatSample
AS
BEGIN
DECLARE @hr int
-- Important: Do not use nvarchar(max). See the warning about using nvarchar(max).
DECLARE @sTmp0 nvarchar(4000)
DECLARE @success int
SELECT @success = 0
-- This example requires the Chilkat API to have been previously unlocked.
-- See Global Unlock Sample for sample code.
-- First download the HTML containing the table
DECLARE @http int
EXEC @hr = sp_OACreate 'Chilkat.Http', @http OUT
IF @hr <> 0
BEGIN
PRINT 'Failed to create ActiveX component'
RETURN
END
DECLARE @bdHtml int
EXEC @hr = sp_OACreate 'Chilkat.BinData', @bdHtml OUT
EXEC sp_OAMethod @http, 'QuickGetBd', @success OUT, 'https://example-code.com/data/etf_table.html', @bdHtml
IF @success <> 1
BEGIN
EXEC sp_OAGetProperty @http, 'LastErrorText', @sTmp0 OUT
PRINT @sTmp0
EXEC @hr = sp_OADestroy @http
EXEC @hr = sp_OADestroy @bdHtml
RETURN
END
-- Convert to XML.
DECLARE @htx int
EXEC @hr = sp_OACreate 'Chilkat.HtmlToXml', @htx OUT
EXEC sp_OAMethod @htx, 'SetHtmlBd', @success OUT, @bdHtml
DECLARE @sbXml int
EXEC @hr = sp_OACreate 'Chilkat.StringBuilder', @sbXml OUT
EXEC sp_OAMethod @htx, 'ToXmlSb', @success OUT, @sbXml
DECLARE @xml int
EXEC @hr = sp_OACreate 'Chilkat.Xml', @xml OUT
EXEC sp_OAMethod @xml, 'LoadSb', @success OUT, @sbXml, 1
-- Remove attributes and sub-trees we don't need.
-- (In other words, we're getting rid of clutter...)
DECLARE @numRemoved int
EXEC sp_OAMethod @xml, 'PruneTag', @numRemoved OUT, 'thead'
EXEC sp_OAMethod @xml, 'PruneAttribute', @numRemoved OUT, 'style'
EXEC sp_OAMethod @xml, 'PruneAttribute', @numRemoved OUT, 'class'
-- Scrub the element and attribute content.
EXEC sp_OAMethod @xml, 'Scrub', NULL, 'ContentTrimEnds,ContentTrimInside,AttrTrimEnds,AttrTrimInside'
-- Let's see what we have...
EXEC sp_OAMethod @xml, 'GetXml', @sTmp0 OUT
PRINT @sTmp0
-- We have the following XML.
-- Copy this XML into the online tool at Generate Parsing Code from XML
-- as a starting point for accessing the data..
-- <?xml version="1.0" encoding="utf-8"?>
-- <root>
-- <html>
-- <head>
-- <meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
-- </head>
-- <body text="#000000" bgcolor="#FFFFFF">
-- <div>
-- <div>
-- <table role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1"/>
-- </div>
-- </div>
-- <div>
-- <table id="topHoldingsTable" role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1">
-- <tbody>
-- <tr role="row">
-- <td>
-- <text>ITUB4</text>
-- </td>
-- <td>
-- <text>ITAU UNIBANCO HOLDING PREF SA</text>
-- </td>
-- <td>
-- <text>Financials</text>
-- </td>
-- <td>
-- <text>Brazil</text>
-- </td>
-- <td>
-- <text>10.94</text>
-- </td>
-- <td>
-- <text>998,954,813.73</text>
-- </td>
-- </tr>
-- <tr role="row">
-- <td>
-- <text>BBDC4</text>
-- </td>
-- <td>
-- <text>BANCO BRADESCO PREF SA</text>
-- </td>
-- <td>
-- <text>Financials</text>
-- </td>
-- <td>
-- <text>Brazil</text>
-- </td>
-- <td>
-- <text>9.01</text>
-- </td>
-- <td>
-- <text>822,164,622.75</text>
-- </td>
-- </tr>
-- ...
-- ...
-- ...
-- </tbody>
-- </table>
-- </div>
-- </body>
-- </html>
-- </root>
--
-- This is the code generated by the online tool:
--
DECLARE @i int
DECLARE @count_i int
DECLARE @table_role nvarchar(4000)
DECLARE @table_data_scrollx nvarchar(4000)
DECLARE @table_data_sortdirection nvarchar(4000)
DECLARE @table_data_sorton nvarchar(4000)
DECLARE @table_id nvarchar(4000)
DECLARE @j int
DECLARE @count_j int
DECLARE @tr_role nvarchar(4000)
DECLARE @k int
DECLARE @count_k int
DECLARE @tagPath nvarchar(4000)
DECLARE @text nvarchar(4000)
SELECT @i = 0
EXEC sp_OAMethod @xml, 'NumChildrenHavingTag', @count_i OUT, 'html|body|div'
WHILE @i < @count_i
BEGIN
EXEC sp_OASetProperty @xml, 'I', @i
EXEC sp_OAMethod @xml, 'ChilkatPath', @table_role OUT, 'html|body|div[i]|div|table|(role)'
EXEC sp_OAMethod @xml, 'ChilkatPath', @table_data_scrollx OUT, 'html|body|div[i]|div|table|(data-scrollx)'
EXEC sp_OAMethod @xml, 'ChilkatPath', @table_data_sortdirection OUT, 'html|body|div[i]|div|table|(data-sortdirection)'
EXEC sp_OAMethod @xml, 'ChilkatPath', @table_data_sorton OUT, 'html|body|div[i]|div|table|(data-sorton)'
EXEC sp_OAMethod @xml, 'ChilkatPath', @table_id OUT, 'html|body|div[i]|table|(id)'
EXEC sp_OAMethod @xml, 'ChilkatPath', @table_role OUT, 'html|body|div[i]|table|(role)'
EXEC sp_OAMethod @xml, 'ChilkatPath', @table_data_scrollx OUT, 'html|body|div[i]|table|(data-scrollx)'
EXEC sp_OAMethod @xml, 'ChilkatPath', @table_data_sortdirection OUT, 'html|body|div[i]|table|(data-sortdirection)'
EXEC sp_OAMethod @xml, 'ChilkatPath', @table_data_sorton OUT, 'html|body|div[i]|table|(data-sorton)'
SELECT @j = 0
EXEC sp_OAMethod @xml, 'NumChildrenHavingTag', @count_j OUT, 'html|body|div[i]|table|tbody|tr'
WHILE @j < @count_j
BEGIN
EXEC sp_OASetProperty @xml, 'J', @j
EXEC sp_OAMethod @xml, 'ChilkatPath', @tr_role OUT, 'html|body|div[i]|table|tbody|tr[j]|(role)'
SELECT @k = 0
EXEC sp_OAMethod @xml, 'NumChildrenHavingTag', @count_k OUT, 'html|body|div[i]|table|tbody|tr[j]|td'
WHILE @k < @count_k
BEGIN
EXEC sp_OASetProperty @xml, 'K', @k
EXEC sp_OAMethod @xml, 'GetChildContent', @text OUT, 'html|body|div[i]|table|tbody|tr[j]|td[k]|text'
SELECT @k = @k + 1
END
SELECT @j = @j + 1
END
SELECT @i = @i + 1
END
-- Let's modify the above code to build the CSV.
DECLARE @csv int
EXEC @hr = sp_OACreate 'Chilkat.Csv', @csv OUT
EXEC sp_OAMethod @csv, 'SetColumnName', @success OUT, 0, 'Ticker'
EXEC sp_OAMethod @csv, 'SetColumnName', @success OUT, 1, 'Name'
EXEC sp_OAMethod @csv, 'SetColumnName', @success OUT, 2, 'Sector'
EXEC sp_OAMethod @csv, 'SetColumnName', @success OUT, 3, 'Country'
EXEC sp_OAMethod @csv, 'SetColumnName', @success OUT, 4, 'Weight'
EXEC sp_OAMethod @csv, 'SetColumnName', @success OUT, 5, 'Notional Vaue'
SELECT @i = 0
EXEC sp_OAMethod @xml, 'NumChildrenHavingTag', @count_i OUT, 'html|body|div'
WHILE @i < @count_i
BEGIN
EXEC sp_OASetProperty @xml, 'I', @i
SELECT @j = 0
EXEC sp_OAMethod @xml, 'NumChildrenHavingTag', @count_j OUT, 'html|body|div[i]|table|tbody|tr'
WHILE @j < @count_j
BEGIN
EXEC sp_OASetProperty @xml, 'J', @j
SELECT @k = 0
EXEC sp_OAMethod @xml, 'NumChildrenHavingTag', @count_k OUT, 'html|body|div[i]|table|tbody|tr[j]|td'
WHILE @k < @count_k
BEGIN
EXEC sp_OASetProperty @xml, 'K', @k
EXEC sp_OAMethod @xml, 'GetChildContent', @sTmp0 OUT, 'html|body|div[i]|table|tbody|tr[j]|td[k]|text'
EXEC sp_OAMethod @csv, 'SetCell', @success OUT, @j, @k, @sTmp0
SELECT @k = @k + 1
END
SELECT @j = @j + 1
END
SELECT @i = @i + 1
END
EXEC sp_OAMethod @csv, 'SaveFile', @success OUT, 'qa_output/brasil_etf.csv'
DECLARE @csvStr nvarchar(4000)
EXEC sp_OAMethod @csv, 'SaveToString', @csvStr OUT
PRINT @csvStr
-- Our CSV looks like this:
-- Ticker,Name,Sector,Country,Weight,Notional Vaue
-- ITUB4,ITAU UNIBANCO HOLDING PREF SA,Financials,Brazil,10.94,"998,954,813.73"
-- BBDC4,BANCO BRADESCO PREF SA,Financials,Brazil,9.01,"822,164,622.75"
-- VALE3,CIA VALE DO RIO DOCE SH,Materials,Brazil,8.60,"785,290,260.07"
-- PETR4,PETROLEO BRASILEIRO PREF SA,Energy,Brazil,5.68,"518,124,434.10"
-- PETR3,PETROBRAS,Energy,Brazil,4.86,"443,254,438.53"
-- B3SA3,B3 BRASIL BOLSA BALCAO SA,Financials,Brazil,4.57,"417,636,740.16"
-- ABEV3,AMBEV SA,Consumer Staples,Brazil,4.57,"417,216,913.63"
-- BBAS3,BANCO DO BRASIL SA,Financials,Brazil,3.25,"296,921,232.15"
-- ITSA4,ITAUSA INVESTIMENTOS ITAU PREF SA,Financials,Brazil,2.90,"265,153,684.52"
-- LREN3,LOJAS RENNER SA,Consumer Discretionary,Brazil,2.25,"205,832,175.98"
--
EXEC @hr = sp_OADestroy @http
EXEC @hr = sp_OADestroy @bdHtml
EXEC @hr = sp_OADestroy @htx
EXEC @hr = sp_OADestroy @sbXml
EXEC @hr = sp_OADestroy @xml
EXEC @hr = sp_OADestroy @csv
END
GO