Sample code for 30+ languages & platforms
PowerShell

HTML Table to CSV

See more HTML-to-XML/Text Examples

Demonstrates a method for converting an HTML table to a CSV file.

Note: This example requires Chilkat v9.5.0.77 or greater.

Chilkat PowerShell Downloads

PowerShell
Add-Type -Path "C:\chilkat\ChilkatDotNet47-x64\ChilkatDotNet47.dll"

$success = $false

# This example requires the Chilkat API to have been previously unlocked.
# See Global Unlock Sample for sample code.

# First download the HTML containing the table
$http = New-Object Chilkat.Http
$bdHtml = New-Object Chilkat.BinData

$success = $http.QuickGetBd("https://example-code.com/data/etf_table.html",$bdHtml)
if ($success -ne $true) {
    $($http.LastErrorText)
    exit
}

# Convert to XML.
$htx = New-Object Chilkat.HtmlToXml
$htx.SetHtmlBd($bdHtml)

$sbXml = New-Object Chilkat.StringBuilder
$htx.ToXmlSb($sbXml)

$xml = New-Object Chilkat.Xml
$xml.LoadSb($sbXml,$true)

# Remove attributes and sub-trees we don't need.
# (In other words, we're getting rid of clutter...)
$numRemoved = $xml.PruneTag("thead")
$numRemoved = $xml.PruneAttribute("style")
$numRemoved = $xml.PruneAttribute("class")

# Scrub the element and attribute content.
$xml.Scrub("ContentTrimEnds,ContentTrimInside,AttrTrimEnds,AttrTrimInside")

# Let's see what we have...
$($xml.GetXml())

# We have the following XML.
# Copy this XML into the online tool at Generate Parsing Code from XML
# as a starting point for accessing the data..

# <?xml version="1.0" encoding="utf-8"?>
# <root>
#     <html>
#         <head>
#             <meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
#         </head>
#         <body text="#000000" bgcolor="#FFFFFF">
#             <div>
#                 <div>
#                     <table role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1"/>
#                 </div>
#             </div>
#             <div>
#                 <table id="topHoldingsTable" role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1">
#                     <tbody>
#                         <tr role="row">
#                             <td>
#                                 <text>ITUB4</text>
#                             </td>
#                             <td>
#                                 <text>ITAU UNIBANCO HOLDING PREF SA</text>
#                             </td>
#                             <td>
#                                 <text>Financials</text>
#                             </td>
#                             <td>
#                                 <text>Brazil</text>
#                             </td>
#                             <td>
#                                 <text>10.94</text>
#                             </td>
#                             <td>
#                                 <text>998,954,813.73</text>
#                             </td>
#                         </tr>
#                         <tr role="row">
#                             <td>
#                                 <text>BBDC4</text>
#                             </td>
#                             <td>
#                                 <text>BANCO BRADESCO PREF SA</text>
#                             </td>
#                             <td>
#                                 <text>Financials</text>
#                             </td>
#                             <td>
#                                 <text>Brazil</text>
#                             </td>
#                             <td>
#                                 <text>9.01</text>
#                             </td>
#                             <td>
#                                 <text>822,164,622.75</text>
#                             </td>
#                         </tr>
# 			...
# 			...
# 			...
#                     </tbody>
#                 </table>
#             </div>
#         </body>
#     </html>
# </root>

# 
# This is the code generated by the online tool:
# 

$i = 0
$count_i = $xml.NumChildrenHavingTag("html|body|div")
while ($i -lt $count_i) {
    $xml.I = $i
    $table_role = $xml.ChilkatPath("html|body|div[i]|div|table|(role)")
    $table_data_scrollx = $xml.ChilkatPath("html|body|div[i]|div|table|(data-scrollx)")
    $table_data_sortdirection = $xml.ChilkatPath("html|body|div[i]|div|table|(data-sortdirection)")
    $table_data_sorton = $xml.ChilkatPath("html|body|div[i]|div|table|(data-sorton)")
    $table_id = $xml.ChilkatPath("html|body|div[i]|table|(id)")
    $table_role = $xml.ChilkatPath("html|body|div[i]|table|(role)")
    $table_data_scrollx = $xml.ChilkatPath("html|body|div[i]|table|(data-scrollx)")
    $table_data_sortdirection = $xml.ChilkatPath("html|body|div[i]|table|(data-sortdirection)")
    $table_data_sorton = $xml.ChilkatPath("html|body|div[i]|table|(data-sorton)")
    $j = 0
    $count_j = $xml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr")
    while ($j -lt $count_j) {
        $xml.J = $j
        $tr_role = $xml.ChilkatPath("html|body|div[i]|table|tbody|tr[j]|(role)")
        $k = 0
        $count_k = $xml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr[j]|td")
        while ($k -lt $count_k) {
            $xml.K = $k
            $text = $xml.GetChildContent("html|body|div[i]|table|tbody|tr[j]|td[k]|text")
            $k = $k + 1
        }

        $j = $j + 1
    }

    $i = $i + 1
}

# Let's modify the above code to build the CSV.
$csv = New-Object Chilkat.Csv
$csv.SetColumnName(0,"Ticker")
$csv.SetColumnName(1,"Name")
$csv.SetColumnName(2,"Sector")
$csv.SetColumnName(3,"Country")
$csv.SetColumnName(4,"Weight")
$csv.SetColumnName(5,"Notional Vaue")

$i = 0
$count_i = $xml.NumChildrenHavingTag("html|body|div")
while ($i -lt $count_i) {
    $xml.I = $i
    $j = 0
    $count_j = $xml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr")
    while ($j -lt $count_j) {
        $xml.J = $j
        $k = 0
        $count_k = $xml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr[j]|td")
        while ($k -lt $count_k) {
            $xml.K = $k
            $csv.SetCell($j,$k,$xml.GetChildContent("html|body|div[i]|table|tbody|tr[j]|td[k]|text"))
            $k = $k + 1
        }

        $j = $j + 1
    }

    $i = $i + 1
}

$csv.SaveFile("qa_output/brasil_etf.csv")
$csvStr = $csv.SaveToString()
$($csvStr)

# Our CSV looks like this:
# Ticker,Name,Sector,Country,Weight,Notional Vaue
# ITUB4,ITAU UNIBANCO HOLDING PREF SA,Financials,Brazil,10.94,"998,954,813.73"
# BBDC4,BANCO BRADESCO PREF SA,Financials,Brazil,9.01,"822,164,622.75"
# VALE3,CIA VALE DO RIO DOCE SH,Materials,Brazil,8.60,"785,290,260.07"
# PETR4,PETROLEO BRASILEIRO PREF SA,Energy,Brazil,5.68,"518,124,434.10"
# PETR3,PETROBRAS,Energy,Brazil,4.86,"443,254,438.53"
# B3SA3,B3 BRASIL BOLSA BALCAO SA,Financials,Brazil,4.57,"417,636,740.16"
# ABEV3,AMBEV SA,Consumer Staples,Brazil,4.57,"417,216,913.63"
# BBAS3,BANCO DO BRASIL SA,Financials,Brazil,3.25,"296,921,232.15"
# ITSA4,ITAUSA INVESTIMENTOS ITAU PREF SA,Financials,Brazil,2.90,"265,153,684.52"
# LREN3,LOJAS RENNER SA,Consumer Discretionary,Brazil,2.25,"205,832,175.98"
#