VB.NET
VB.NET
HTML Table to CSV
See more HTML-to-XML/Text Examples
Demonstrates a method for converting an HTML table to a CSV file.Note: This example requires Chilkat v9.5.0.77 or greater.
Chilkat VB.NET Downloads
Dim success As Boolean = False
' This example requires the Chilkat API to have been previously unlocked.
' See Global Unlock Sample for sample code.
' First download the HTML containing the table
Dim http As New Chilkat.Http
Dim bdHtml As New Chilkat.BinData
success = http.QuickGetBd("https://example-code.com/data/etf_table.html",bdHtml)
If (success <> True) Then
Debug.WriteLine(http.LastErrorText)
Exit Sub
End If
' Convert to XML.
Dim htx As New Chilkat.HtmlToXml
htx.SetHtmlBd(bdHtml)
Dim sbXml As New Chilkat.StringBuilder
htx.ToXmlSb(sbXml)
Dim xml As New Chilkat.Xml
xml.LoadSb(sbXml,True)
' Remove attributes and sub-trees we don't need.
' (In other words, we're getting rid of clutter...)
Dim numRemoved As Integer = xml.PruneTag("thead")
numRemoved = xml.PruneAttribute("style")
numRemoved = xml.PruneAttribute("class")
' Scrub the element and attribute content.
xml.Scrub("ContentTrimEnds,ContentTrimInside,AttrTrimEnds,AttrTrimInside")
' Let's see what we have...
Debug.WriteLine(xml.GetXml())
' We have the following XML.
' Copy this XML into the online tool at Generate Parsing Code from XML
' as a starting point for accessing the data..
' <?xml version="1.0" encoding="utf-8"?>
' <root>
' <html>
' <head>
' <meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
' </head>
' <body text="#000000" bgcolor="#FFFFFF">
' <div>
' <div>
' <table role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1"/>
' </div>
' </div>
' <div>
' <table id="topHoldingsTable" role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1">
' <tbody>
' <tr role="row">
' <td>
' <text>ITUB4</text>
' </td>
' <td>
' <text>ITAU UNIBANCO HOLDING PREF SA</text>
' </td>
' <td>
' <text>Financials</text>
' </td>
' <td>
' <text>Brazil</text>
' </td>
' <td>
' <text>10.94</text>
' </td>
' <td>
' <text>998,954,813.73</text>
' </td>
' </tr>
' <tr role="row">
' <td>
' <text>BBDC4</text>
' </td>
' <td>
' <text>BANCO BRADESCO PREF SA</text>
' </td>
' <td>
' <text>Financials</text>
' </td>
' <td>
' <text>Brazil</text>
' </td>
' <td>
' <text>9.01</text>
' </td>
' <td>
' <text>822,164,622.75</text>
' </td>
' </tr>
' ...
' ...
' ...
' </tbody>
' </table>
' </div>
' </body>
' </html>
' </root>
'
' This is the code generated by the online tool:
'
Dim i As Integer
Dim count_i As Integer
Dim table_role As String
Dim table_data_scrollx As String
Dim table_data_sortdirection As String
Dim table_data_sorton As String
Dim table_id As String
Dim j As Integer
Dim count_j As Integer
Dim tr_role As String
Dim k As Integer
Dim count_k As Integer
Dim tagPath As String
Dim text As String
i = 0
count_i = xml.NumChildrenHavingTag("html|body|div")
While i < count_i
xml.I = i
table_role = xml.ChilkatPath("html|body|div[i]|div|table|(role)")
table_data_scrollx = xml.ChilkatPath("html|body|div[i]|div|table|(data-scrollx)")
table_data_sortdirection = xml.ChilkatPath("html|body|div[i]|div|table|(data-sortdirection)")
table_data_sorton = xml.ChilkatPath("html|body|div[i]|div|table|(data-sorton)")
table_id = xml.ChilkatPath("html|body|div[i]|table|(id)")
table_role = xml.ChilkatPath("html|body|div[i]|table|(role)")
table_data_scrollx = xml.ChilkatPath("html|body|div[i]|table|(data-scrollx)")
table_data_sortdirection = xml.ChilkatPath("html|body|div[i]|table|(data-sortdirection)")
table_data_sorton = xml.ChilkatPath("html|body|div[i]|table|(data-sorton)")
j = 0
count_j = xml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr")
While j < count_j
xml.J = j
tr_role = xml.ChilkatPath("html|body|div[i]|table|tbody|tr[j]|(role)")
k = 0
count_k = xml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr[j]|td")
While k < count_k
xml.K = k
text = xml.GetChildContent("html|body|div[i]|table|tbody|tr[j]|td[k]|text")
k = k + 1
End While
j = j + 1
End While
i = i + 1
End While
' Let's modify the above code to build the CSV.
Dim csv As New Chilkat.Csv
csv.SetColumnName(0,"Ticker")
csv.SetColumnName(1,"Name")
csv.SetColumnName(2,"Sector")
csv.SetColumnName(3,"Country")
csv.SetColumnName(4,"Weight")
csv.SetColumnName(5,"Notional Vaue")
i = 0
count_i = xml.NumChildrenHavingTag("html|body|div")
While i < count_i
xml.I = i
j = 0
count_j = xml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr")
While j < count_j
xml.J = j
k = 0
count_k = xml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr[j]|td")
While k < count_k
xml.K = k
csv.SetCell(j,k,xml.GetChildContent("html|body|div[i]|table|tbody|tr[j]|td[k]|text"))
k = k + 1
End While
j = j + 1
End While
i = i + 1
End While
csv.SaveFile("qa_output/brasil_etf.csv")
Dim csvStr As String = csv.SaveToString()
Debug.WriteLine(csvStr)
' Our CSV looks like this:
' Ticker,Name,Sector,Country,Weight,Notional Vaue
' ITUB4,ITAU UNIBANCO HOLDING PREF SA,Financials,Brazil,10.94,"998,954,813.73"
' BBDC4,BANCO BRADESCO PREF SA,Financials,Brazil,9.01,"822,164,622.75"
' VALE3,CIA VALE DO RIO DOCE SH,Materials,Brazil,8.60,"785,290,260.07"
' PETR4,PETROLEO BRASILEIRO PREF SA,Energy,Brazil,5.68,"518,124,434.10"
' PETR3,PETROBRAS,Energy,Brazil,4.86,"443,254,438.53"
' B3SA3,B3 BRASIL BOLSA BALCAO SA,Financials,Brazil,4.57,"417,636,740.16"
' ABEV3,AMBEV SA,Consumer Staples,Brazil,4.57,"417,216,913.63"
' BBAS3,BANCO DO BRASIL SA,Financials,Brazil,3.25,"296,921,232.15"
' ITSA4,ITAUSA INVESTIMENTOS ITAU PREF SA,Financials,Brazil,2.90,"265,153,684.52"
' LREN3,LOJAS RENNER SA,Consumer Discretionary,Brazil,2.25,"205,832,175.98"
'