HTML Table to CSV

Demonstrates a method for converting an HTML table to a CSV file.
Note: This example requires Chilkat v9.5.0.77 or greater.
Chilkat Go Downloads

    success := false

    // This example requires the Chilkat API to have been previously unlocked.
    // See Global Unlock Sample for sample code.

    // First download the HTML containing the table
    http := chilkat.NewHttp()
    bdHtml := chilkat.NewBinData()

    success = http.QuickGetBd("https://example-code.com/data/etf_table.html",bdHtml)
    if success != true {
        fmt.Println(http.LastErrorText())
        http.DisposeHttp()
        bdHtml.DisposeBinData()
        return
    }

    // Convert to XML.
    htx := chilkat.NewHtmlToXml()
    htx.SetHtmlBd(bdHtml)

    sbXml := chilkat.NewStringBuilder()
    htx.ToXmlSb(sbXml)

    xml := chilkat.NewXml()
    xml.LoadSb(sbXml,true)

    // Remove attributes and sub-trees we don't need.
    // (In other words, we're getting rid of clutter...)
    numRemoved := xml.PruneTag("thead")
    numRemoved = xml.PruneAttribute("style")
    numRemoved = xml.PruneAttribute("class")

    // Scrub the element and attribute content.
    xml.Scrub("ContentTrimEnds,ContentTrimInside,AttrTrimEnds,AttrTrimInside")

    // Let's see what we have...
    fmt.Println(*xml.GetXml())

    // We have the following XML.
    // Copy this XML into the online tool at Generate Parsing Code from XML
    // as a starting point for accessing the data..

    // <?xml version="1.0" encoding="utf-8"?>
    // <root>
    //     <html>
    //         <head>
    //             <meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
    //         </head>
    //         <body text="#000000" bgcolor="#FFFFFF">
    //             <div>
    //                 <div>
    //                     <table role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1"/>
    //                 </div>
    //             </div>
    //             <div>
    //                 <table id="topHoldingsTable" role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1">
    //                     <tbody>
    //                         <tr role="row">
    //                             <td>
    //                                 <text>ITUB4</text>
    //                             </td>
    //                             <td>
    //                                 <text>ITAU UNIBANCO HOLDING PREF SA</text>
    //                             </td>
    //                             <td>
    //                                 <text>Financials</text>
    //                             </td>
    //                             <td>
    //                                 <text>Brazil</text>
    //                             </td>
    //                             <td>
    //                                 <text>10.94</text>
    //                             </td>
    //                             <td>
    //                                 <text>998,954,813.73</text>
    //                             </td>
    //                         </tr>
    //                         <tr role="row">
    //                             <td>
    //                                 <text>BBDC4</text>
    //                             </td>
    //                             <td>
    //                                 <text>BANCO BRADESCO PREF SA</text>
    //                             </td>
    //                             <td>
    //                                 <text>Financials</text>
    //                             </td>
    //                             <td>
    //                                 <text>Brazil</text>
    //                             </td>
    //                             <td>
    //                                 <text>9.01</text>
    //                             </td>
    //                             <td>
    //                                 <text>822,164,622.75</text>
    //                             </td>
    //                         </tr>
    // 			...
    // 			...
    // 			...
    //                     </tbody>
    //                 </table>
    //             </div>
    //         </body>
    //     </html>
    // </root>

    // 
    // This is the code generated by the online tool:
    // 
    var i int
    var count_i int
    var table_role *string = new(string)
    var table_data_scrollx *string = new(string)
    var table_data_sortdirection *string = new(string)
    var table_data_sorton *string = new(string)
    var table_id *string = new(string)
    var j int
    var count_j int
    var tr_role *string = new(string)
    var k int
    var count_k int
    var tagPath string
    var text *string = new(string)

    i = 0
    count_i = xml.NumChildrenHavingTag("html|body|div")
    for i < count_i {
        xml.SetI(i)
        table_role = xml.ChilkatPath("html|body|div[i]|div|table|(role)")
        table_data_scrollx = xml.ChilkatPath("html|body|div[i]|div|table|(data-scrollx)")
        table_data_sortdirection = xml.ChilkatPath("html|body|div[i]|div|table|(data-sortdirection)")
        table_data_sorton = xml.ChilkatPath("html|body|div[i]|div|table|(data-sorton)")
        table_id = xml.ChilkatPath("html|body|div[i]|table|(id)")
        table_role = xml.ChilkatPath("html|body|div[i]|table|(role)")
        table_data_scrollx = xml.ChilkatPath("html|body|div[i]|table|(data-scrollx)")
        table_data_sortdirection = xml.ChilkatPath("html|body|div[i]|table|(data-sortdirection)")
        table_data_sorton = xml.ChilkatPath("html|body|div[i]|table|(data-sorton)")
        j = 0
        count_j = xml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr")
        for j < count_j {
            xml.SetJ(j)
            tr_role = xml.ChilkatPath("html|body|div[i]|table|tbody|tr[j]|(role)")
            k = 0
            count_k = xml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr[j]|td")
            for k < count_k {
                xml.SetK(k)
                text = xml.GetChildContent("html|body|div[i]|table|tbody|tr[j]|td[k]|text")
                k = k + 1
            }

            j = j + 1
        }

        i = i + 1
    }

    // Let's modify the above code to build the CSV.
    csv := chilkat.NewCsv()
    csv.SetColumnName(0,"Ticker")
    csv.SetColumnName(1,"Name")
    csv.SetColumnName(2,"Sector")
    csv.SetColumnName(3,"Country")
    csv.SetColumnName(4,"Weight")
    csv.SetColumnName(5,"Notional Vaue")

    i = 0
    count_i = xml.NumChildrenHavingTag("html|body|div")
    for i < count_i {
        xml.SetI(i)
        j = 0
        count_j = xml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr")
        for j < count_j {
            xml.SetJ(j)
            k = 0
            count_k = xml.NumChildrenHavingTag("html|body|div[i]|table|tbody|tr[j]|td")
            for k < count_k {
                xml.SetK(k)
                csv.SetCell(j,k,*xml.GetChildContent("html|body|div[i]|table|tbody|tr[j]|td[k]|text"))
                k = k + 1
            }

            j = j + 1
        }

        i = i + 1
    }

    csv.SaveFile("qa_output/brasil_etf.csv")
    csvStr := csv.SaveToString()
    fmt.Println(*csvStr)

    // Our CSV looks like this:
    // Ticker,Name,Sector,Country,Weight,Notional Vaue
    // ITUB4,ITAU UNIBANCO HOLDING PREF SA,Financials,Brazil,10.94,"998,954,813.73"
    // BBDC4,BANCO BRADESCO PREF SA,Financials,Brazil,9.01,"822,164,622.75"
    // VALE3,CIA VALE DO RIO DOCE SH,Materials,Brazil,8.60,"785,290,260.07"
    // PETR4,PETROLEO BRASILEIRO PREF SA,Energy,Brazil,5.68,"518,124,434.10"
    // PETR3,PETROBRAS,Energy,Brazil,4.86,"443,254,438.53"
    // B3SA3,B3 BRASIL BOLSA BALCAO SA,Financials,Brazil,4.57,"417,636,740.16"
    // ABEV3,AMBEV SA,Consumer Staples,Brazil,4.57,"417,216,913.63"
    // BBAS3,BANCO DO BRASIL SA,Financials,Brazil,3.25,"296,921,232.15"
    // ITSA4,ITAUSA INVESTIMENTOS ITAU PREF SA,Financials,Brazil,2.90,"265,153,684.52"
    // LREN3,LOJAS RENNER SA,Consumer Discretionary,Brazil,2.25,"205,832,175.98"
    // 

    http.DisposeHttp()
    bdHtml.DisposeBinData()
    htx.DisposeHtmlToXml()
    sbXml.DisposeStringBuilder()
    xml.DisposeXml()
    csv.DisposeCsv()