Visual FoxPro
Visual FoxPro
HTML Table to CSV
See more HTML-to-XML/Text Examples
Demonstrates a method for converting an HTML table to a CSV file.Note: This example requires Chilkat v9.5.0.77 or greater.
Chilkat Visual FoxPro Downloads
LOCAL lnSuccess
LOCAL loHttp
LOCAL loBdHtml
LOCAL loHtx
LOCAL loSbXml
LOCAL loXml
LOCAL lnNumRemoved
LOCAL i
LOCAL lnCount_i
LOCAL lcTable_role
LOCAL lcTable_data_scrollx
LOCAL lcTable_data_sortdirection
LOCAL lcTable_data_sorton
LOCAL lcTable_id
LOCAL j
LOCAL lnCount_j
LOCAL lcTr_role
LOCAL k
LOCAL lnCount_k
LOCAL lcTagPath
LOCAL lcText
LOCAL loCsv
LOCAL lcCsvStr
lnSuccess = 0
* This example requires the Chilkat API to have been previously unlocked.
* See Global Unlock Sample for sample code.
* First download the HTML containing the table
loHttp = CreateObject('Chilkat.Http')
loBdHtml = CreateObject('Chilkat.BinData')
lnSuccess = loHttp.QuickGetBd("https://example-code.com/data/etf_table.html",loBdHtml)
IF (lnSuccess <> 1) THEN
? loHttp.LastErrorText
RELEASE loHttp
RELEASE loBdHtml
CANCEL
ENDIF
* Convert to XML.
loHtx = CreateObject('Chilkat.HtmlToXml')
loHtx.SetHtmlBd(loBdHtml)
loSbXml = CreateObject('Chilkat.StringBuilder')
loHtx.ToXmlSb(loSbXml)
loXml = CreateObject('Chilkat.Xml')
loXml.LoadSb(loSbXml,1)
* Remove attributes and sub-trees we don't need.
* (In other words, we're getting rid of clutter...)
lnNumRemoved = loXml.PruneTag("thead")
lnNumRemoved = loXml.PruneAttribute("style")
lnNumRemoved = loXml.PruneAttribute("class")
* Scrub the element and attribute content.
loXml.Scrub("ContentTrimEnds,ContentTrimInside,AttrTrimEnds,AttrTrimInside")
* Let's see what we have...
? loXml.GetXml()
* We have the following XML.
* Copy this XML into the online tool at Generate Parsing Code from XML
* as a starting point for accessing the data..
* <?xml version="1.0" encoding="utf-8"?>
* <root>
* <html>
* <head>
* <meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
* </head>
* <body text="#000000" bgcolor="#FFFFFF">
* <div>
* <div>
* <table role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1"/>
* </div>
* </div>
* <div>
* <table id="topHoldingsTable" role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1">
* <tbody>
* <tr role="row">
* <td>
* <text>ITUB4</text>
* </td>
* <td>
* <text>ITAU UNIBANCO HOLDING PREF SA</text>
* </td>
* <td>
* <text>Financials</text>
* </td>
* <td>
* <text>Brazil</text>
* </td>
* <td>
* <text>10.94</text>
* </td>
* <td>
* <text>998,954,813.73</text>
* </td>
* </tr>
* <tr role="row">
* <td>
* <text>BBDC4</text>
* </td>
* <td>
* <text>BANCO BRADESCO PREF SA</text>
* </td>
* <td>
* <text>Financials</text>
* </td>
* <td>
* <text>Brazil</text>
* </td>
* <td>
* <text>9.01</text>
* </td>
* <td>
* <text>822,164,622.75</text>
* </td>
* </tr>
* ...
* ...
* ...
* </tbody>
* </table>
* </div>
* </body>
* </html>
* </root>
*
* This is the code generated by the online tool:
*
i = 0
lnCount_i = loXml.NumChildrenHavingTag("html|body|div")
DO WHILE i < lnCount_i
loXml.I = i
lcTable_role = loXml.ChilkatPath("html|body|div[i]|div|table|(role)")
lcTable_data_scrollx = loXml.ChilkatPath("html|body|div[i]|div|table|(data-scrollx)")
lcTable_data_sortdirection = loXml.ChilkatPath("html|body|div[i]|div|table|(data-sortdirection)")
lcTable_data_sorton = loXml.ChilkatPath("html|body|div[i]|div|table|(data-sorton)")
lcTable_id = loXml.ChilkatPath("html|body|div[i]|table|(id)")
lcTable_role = loXml.ChilkatPath("html|body|div[i]|table|(role)")
lcTable_data_scrollx = loXml.ChilkatPath("html|body|div[i]|table|(data-scrollx)")
lcTable_data_sortdirection = loXml.ChilkatPath("html|body|div[i]|table|(data-sortdirection)")
lcTable_data_sorton = loXml.ChilkatPath("html|body|div[i]|table|(data-sorton)")
j = 0
lnCount_j = loXml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr")
DO WHILE j < lnCount_j
loXml.J = j
lcTr_role = loXml.ChilkatPath("html|body|div[i]|table|tbody|tr[j]|(role)")
k = 0
lnCount_k = loXml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr[j]|td")
DO WHILE k < lnCount_k
loXml.K = k
lcText = loXml.GetChildContent("html|body|div[i]|table|tbody|tr[j]|td[k]|text")
k = k + 1
ENDDO
j = j + 1
ENDDO
i = i + 1
ENDDO
* Let's modify the above code to build the CSV.
loCsv = CreateObject('Chilkat.Csv')
loCsv.SetColumnName(0,"Ticker")
loCsv.SetColumnName(1,"Name")
loCsv.SetColumnName(2,"Sector")
loCsv.SetColumnName(3,"Country")
loCsv.SetColumnName(4,"Weight")
loCsv.SetColumnName(5,"Notional Vaue")
i = 0
lnCount_i = loXml.NumChildrenHavingTag("html|body|div")
DO WHILE i < lnCount_i
loXml.I = i
j = 0
lnCount_j = loXml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr")
DO WHILE j < lnCount_j
loXml.J = j
k = 0
lnCount_k = loXml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr[j]|td")
DO WHILE k < lnCount_k
loXml.K = k
loCsv.SetCell(j,k,loXml.GetChildContent("html|body|div[i]|table|tbody|tr[j]|td[k]|text"))
k = k + 1
ENDDO
j = j + 1
ENDDO
i = i + 1
ENDDO
loCsv.SaveFile("qa_output/brasil_etf.csv")
lcCsvStr = loCsv.SaveToString()
? lcCsvStr
* Our CSV looks like this:
* Ticker,Name,Sector,Country,Weight,Notional Vaue
* ITUB4,ITAU UNIBANCO HOLDING PREF SA,Financials,Brazil,10.94,"998,954,813.73"
* BBDC4,BANCO BRADESCO PREF SA,Financials,Brazil,9.01,"822,164,622.75"
* VALE3,CIA VALE DO RIO DOCE SH,Materials,Brazil,8.60,"785,290,260.07"
* PETR4,PETROLEO BRASILEIRO PREF SA,Energy,Brazil,5.68,"518,124,434.10"
* PETR3,PETROBRAS,Energy,Brazil,4.86,"443,254,438.53"
* B3SA3,B3 BRASIL BOLSA BALCAO SA,Financials,Brazil,4.57,"417,636,740.16"
* ABEV3,AMBEV SA,Consumer Staples,Brazil,4.57,"417,216,913.63"
* BBAS3,BANCO DO BRASIL SA,Financials,Brazil,3.25,"296,921,232.15"
* ITSA4,ITAUSA INVESTIMENTOS ITAU PREF SA,Financials,Brazil,2.90,"265,153,684.52"
* LREN3,LOJAS RENNER SA,Consumer Discretionary,Brazil,2.25,"205,832,175.98"
*
RELEASE loHttp
RELEASE loBdHtml
RELEASE loHtx
RELEASE loSbXml
RELEASE loXml
RELEASE loCsv