Sample code for 30+ languages & platforms
Visual FoxPro

HTML Table to CSV

See more HTML-to-XML/Text Examples

Demonstrates a method for converting an HTML table to a CSV file.

Note: This example requires Chilkat v9.5.0.77 or greater.

Chilkat Visual FoxPro Downloads

Visual FoxPro
LOCAL lnSuccess
LOCAL loHttp
LOCAL loBdHtml
LOCAL loHtx
LOCAL loSbXml
LOCAL loXml
LOCAL lnNumRemoved
LOCAL i
LOCAL lnCount_i
LOCAL lcTable_role
LOCAL lcTable_data_scrollx
LOCAL lcTable_data_sortdirection
LOCAL lcTable_data_sorton
LOCAL lcTable_id
LOCAL j
LOCAL lnCount_j
LOCAL lcTr_role
LOCAL k
LOCAL lnCount_k
LOCAL lcTagPath
LOCAL lcText
LOCAL loCsv
LOCAL lcCsvStr

lnSuccess = 0

* This example requires the Chilkat API to have been previously unlocked.
* See Global Unlock Sample for sample code.

* First download the HTML containing the table
loHttp = CreateObject('Chilkat.Http')
loBdHtml = CreateObject('Chilkat.BinData')

lnSuccess = loHttp.QuickGetBd("https://example-code.com/data/etf_table.html",loBdHtml)
IF (lnSuccess <> 1) THEN
    ? loHttp.LastErrorText
    RELEASE loHttp
    RELEASE loBdHtml
    CANCEL
ENDIF

* Convert to XML.
loHtx = CreateObject('Chilkat.HtmlToXml')
loHtx.SetHtmlBd(loBdHtml)

loSbXml = CreateObject('Chilkat.StringBuilder')
loHtx.ToXmlSb(loSbXml)

loXml = CreateObject('Chilkat.Xml')
loXml.LoadSb(loSbXml,1)

* Remove attributes and sub-trees we don't need.
* (In other words, we're getting rid of clutter...)
lnNumRemoved = loXml.PruneTag("thead")
lnNumRemoved = loXml.PruneAttribute("style")
lnNumRemoved = loXml.PruneAttribute("class")

* Scrub the element and attribute content.
loXml.Scrub("ContentTrimEnds,ContentTrimInside,AttrTrimEnds,AttrTrimInside")

* Let's see what we have...
? loXml.GetXml()

* We have the following XML.
* Copy this XML into the online tool at Generate Parsing Code from XML
* as a starting point for accessing the data..

* <?xml version="1.0" encoding="utf-8"?>
* <root>
*     <html>
*         <head>
*             <meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
*         </head>
*         <body text="#000000" bgcolor="#FFFFFF">
*             <div>
*                 <div>
*                     <table role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1"/>
*                 </div>
*             </div>
*             <div>
*                 <table id="topHoldingsTable" role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1">
*                     <tbody>
*                         <tr role="row">
*                             <td>
*                                 <text>ITUB4</text>
*                             </td>
*                             <td>
*                                 <text>ITAU UNIBANCO HOLDING PREF SA</text>
*                             </td>
*                             <td>
*                                 <text>Financials</text>
*                             </td>
*                             <td>
*                                 <text>Brazil</text>
*                             </td>
*                             <td>
*                                 <text>10.94</text>
*                             </td>
*                             <td>
*                                 <text>998,954,813.73</text>
*                             </td>
*                         </tr>
*                         <tr role="row">
*                             <td>
*                                 <text>BBDC4</text>
*                             </td>
*                             <td>
*                                 <text>BANCO BRADESCO PREF SA</text>
*                             </td>
*                             <td>
*                                 <text>Financials</text>
*                             </td>
*                             <td>
*                                 <text>Brazil</text>
*                             </td>
*                             <td>
*                                 <text>9.01</text>
*                             </td>
*                             <td>
*                                 <text>822,164,622.75</text>
*                             </td>
*                         </tr>
* 			...
* 			...
* 			...
*                     </tbody>
*                 </table>
*             </div>
*         </body>
*     </html>
* </root>

* 
* This is the code generated by the online tool:
* 

i = 0
lnCount_i = loXml.NumChildrenHavingTag("html|body|div")
DO WHILE i < lnCount_i
    loXml.I = i
    lcTable_role = loXml.ChilkatPath("html|body|div[i]|div|table|(role)")
    lcTable_data_scrollx = loXml.ChilkatPath("html|body|div[i]|div|table|(data-scrollx)")
    lcTable_data_sortdirection = loXml.ChilkatPath("html|body|div[i]|div|table|(data-sortdirection)")
    lcTable_data_sorton = loXml.ChilkatPath("html|body|div[i]|div|table|(data-sorton)")
    lcTable_id = loXml.ChilkatPath("html|body|div[i]|table|(id)")
    lcTable_role = loXml.ChilkatPath("html|body|div[i]|table|(role)")
    lcTable_data_scrollx = loXml.ChilkatPath("html|body|div[i]|table|(data-scrollx)")
    lcTable_data_sortdirection = loXml.ChilkatPath("html|body|div[i]|table|(data-sortdirection)")
    lcTable_data_sorton = loXml.ChilkatPath("html|body|div[i]|table|(data-sorton)")
    j = 0
    lnCount_j = loXml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr")
    DO WHILE j < lnCount_j
        loXml.J = j
        lcTr_role = loXml.ChilkatPath("html|body|div[i]|table|tbody|tr[j]|(role)")
        k = 0
        lnCount_k = loXml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr[j]|td")
        DO WHILE k < lnCount_k
            loXml.K = k
            lcText = loXml.GetChildContent("html|body|div[i]|table|tbody|tr[j]|td[k]|text")
            k = k + 1
        ENDDO
        j = j + 1
    ENDDO
    i = i + 1
ENDDO

* Let's modify the above code to build the CSV.
loCsv = CreateObject('Chilkat.Csv')
loCsv.SetColumnName(0,"Ticker")
loCsv.SetColumnName(1,"Name")
loCsv.SetColumnName(2,"Sector")
loCsv.SetColumnName(3,"Country")
loCsv.SetColumnName(4,"Weight")
loCsv.SetColumnName(5,"Notional Vaue")

i = 0
lnCount_i = loXml.NumChildrenHavingTag("html|body|div")
DO WHILE i < lnCount_i
    loXml.I = i
    j = 0
    lnCount_j = loXml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr")
    DO WHILE j < lnCount_j
        loXml.J = j
        k = 0
        lnCount_k = loXml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr[j]|td")
        DO WHILE k < lnCount_k
            loXml.K = k
            loCsv.SetCell(j,k,loXml.GetChildContent("html|body|div[i]|table|tbody|tr[j]|td[k]|text"))
            k = k + 1
        ENDDO
        j = j + 1
    ENDDO
    i = i + 1
ENDDO

loCsv.SaveFile("qa_output/brasil_etf.csv")
lcCsvStr = loCsv.SaveToString()
? lcCsvStr

* Our CSV looks like this:
* Ticker,Name,Sector,Country,Weight,Notional Vaue
* ITUB4,ITAU UNIBANCO HOLDING PREF SA,Financials,Brazil,10.94,"998,954,813.73"
* BBDC4,BANCO BRADESCO PREF SA,Financials,Brazil,9.01,"822,164,622.75"
* VALE3,CIA VALE DO RIO DOCE SH,Materials,Brazil,8.60,"785,290,260.07"
* PETR4,PETROLEO BRASILEIRO PREF SA,Energy,Brazil,5.68,"518,124,434.10"
* PETR3,PETROBRAS,Energy,Brazil,4.86,"443,254,438.53"
* B3SA3,B3 BRASIL BOLSA BALCAO SA,Financials,Brazil,4.57,"417,636,740.16"
* ABEV3,AMBEV SA,Consumer Staples,Brazil,4.57,"417,216,913.63"
* BBAS3,BANCO DO BRASIL SA,Financials,Brazil,3.25,"296,921,232.15"
* ITSA4,ITAUSA INVESTIMENTOS ITAU PREF SA,Financials,Brazil,2.90,"265,153,684.52"
* LREN3,LOJAS RENNER SA,Consumer Discretionary,Brazil,2.25,"205,832,175.98"
* 

RELEASE loHttp
RELEASE loBdHtml
RELEASE loHtx
RELEASE loSbXml
RELEASE loXml
RELEASE loCsv