Chilkat HOME Android™ AutoIt C C# C++ Chilkat2-Python CkPython Classic ASP DataFlex Delphi DLL Go Java Node.js Objective-C PHP Extension Perl PowerBuilder PowerShell PureBasic Ruby SQL Server Swift Tcl Unicode C Unicode C++ VB.NET VBScript Visual Basic 6.0 Visual FoxPro Xojo Plugin
(Tcl) HTML Table to CSVDemonstrates a method for converting an HTML table to a CSV file. Note: This example requires Chilkat v9.5.0.77 or greater.
load ./chilkat.dll # This example requires the Chilkat API to have been previously unlocked. # See Global Unlock Sample for sample code. # First download the HTML containing the table set http [new_CkHttp] set bdHtml [new_CkBinData] set success [CkHttp_QuickGetBd $http "https://example-code.com/data/etf_table.html" $bdHtml] if {$success != 1} then { puts [CkHttp_lastErrorText $http] delete_CkHttp $http delete_CkBinData $bdHtml exit } # Convert to XML. set htx [new_CkHtmlToXml] CkHtmlToXml_SetHtmlBd $htx $bdHtml set sbXml [new_CkStringBuilder] CkHtmlToXml_ToXmlSb $htx $sbXml set xml [new_CkXml] CkXml_LoadSb $xml $sbXml 1 # Remove attributes and sub-trees we don't need. # (In other words, we're getting rid of clutter...) set numRemoved [CkXml_PruneTag $xml "thead"] set numRemoved [CkXml_PruneAttribute $xml "style"] set numRemoved [CkXml_PruneAttribute $xml "class"] # Scrub the element and attribute content. CkXml_Scrub $xml "ContentTrimEnds,ContentTrimInside,AttrTrimEnds,AttrTrimInside" # Let's see what we have... puts [CkXml_getXml $xml] # We have the following XML. # Copy this XML into the online tool at Generate Parsing Code from XML # as a starting point for accessing the data.. # <?xml version="1.0" encoding="utf-8"?> # <root> # <html> # <head> # <meta http-equiv="content-type" content="text/html; charset=UTF-8"/> # </head> # <body text="#000000" bgcolor="#FFFFFF"> # <div> # <div> # <table role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1"/> # </div> # </div> # <div> # <table id="topHoldingsTable" role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1"> # <tbody> # <tr role="row"> # <td> # <text>ITUB4</text> # </td> # <td> # <text>ITAU UNIBANCO HOLDING PREF SA</text> # </td> # <td> # <text>Financials</text> # </td> # <td> # <text>Brazil</text> # </td> # <td> # <text>10.94</text> # </td> # <td> # <text>998,954,813.73</text> # </td> # </tr> # <tr role="row"> # <td> # <text>BBDC4</text> # </td> # <td> # <text>BANCO BRADESCO PREF SA</text> # </td> # <td> # <text>Financials</text> # </td> # <td> # <text>Brazil</text> # </td> # <td> # <text>9.01</text> # </td> # <td> # <text>822,164,622.75</text> # </td> # </tr> # ... # ... # ... # </tbody> # </table> # </div> # </body> # </html> # </root> # # This is the code generated by the online tool: # set i 0 set count_i [CkXml_NumChildrenHavingTag $xml "html|body|div"] while {$i < $count_i} { CkXml_put_I $xml $i set table_role [CkXml_chilkatPath $xml "html|body|div[i]|div|table|(role)"] set table_data_scrollx [CkXml_chilkatPath $xml "html|body|div[i]|div|table|(data-scrollx)"] set table_data_sortdirection [CkXml_chilkatPath $xml "html|body|div[i]|div|table|(data-sortdirection)"] set table_data_sorton [CkXml_chilkatPath $xml "html|body|div[i]|div|table|(data-sorton)"] set table_id [CkXml_chilkatPath $xml "html|body|div[i]|table|(id)"] set table_role [CkXml_chilkatPath $xml "html|body|div[i]|table|(role)"] set table_data_scrollx [CkXml_chilkatPath $xml "html|body|div[i]|table|(data-scrollx)"] set table_data_sortdirection [CkXml_chilkatPath $xml "html|body|div[i]|table|(data-sortdirection)"] set table_data_sorton [CkXml_chilkatPath $xml "html|body|div[i]|table|(data-sorton)"] set j 0 set count_j [CkXml_NumChildrenHavingTag $xml "html|body|div[i]|table|tbody|tr"] while {$j < $count_j} { CkXml_put_J $xml $j set tr_role [CkXml_chilkatPath $xml "html|body|div[i]|table|tbody|tr[j]|(role)"] set k 0 set count_k [CkXml_NumChildrenHavingTag $xml "html|body|div[i]|table|tbody|tr[j]|td"] while {$k < $count_k} { CkXml_put_K $xml $k set text [CkXml_getChildContent $xml "html|body|div[i]|table|tbody|tr[j]|td[k]|text"] set k [expr $k + 1] } set j [expr $j + 1] } set i [expr $i + 1] } # Let's modify the above code to build the CSV. set csv [new_CkCsv] CkCsv_SetColumnName $csv 0 "Ticker" CkCsv_SetColumnName $csv 1 "Name" CkCsv_SetColumnName $csv 2 "Sector" CkCsv_SetColumnName $csv 3 "Country" CkCsv_SetColumnName $csv 4 "Weight" CkCsv_SetColumnName $csv 5 "Notional Vaue" set i 0 set count_i [CkXml_NumChildrenHavingTag $xml "html|body|div"] while {$i < $count_i} { CkXml_put_I $xml $i set j 0 set count_j [CkXml_NumChildrenHavingTag $xml "html|body|div[i]|table|tbody|tr"] while {$j < $count_j} { CkXml_put_J $xml $j set k 0 set count_k [CkXml_NumChildrenHavingTag $xml "html|body|div[i]|table|tbody|tr[j]|td"] while {$k < $count_k} { CkXml_put_K $xml $k CkCsv_SetCell $csv $j $k [CkXml_getChildContent $xml "html|body|div[i]|table|tbody|tr[j]|td[k]|text"] set k [expr $k + 1] } set j [expr $j + 1] } set i [expr $i + 1] } CkCsv_SaveFile $csv "qa_output/brasil_etf.csv" set csvStr [CkCsv_saveToString $csv] puts "$csvStr" # Our CSV looks like this: # Ticker,Name,Sector,Country,Weight,Notional Vaue # ITUB4,ITAU UNIBANCO HOLDING PREF SA,Financials,Brazil,10.94,"998,954,813.73" # BBDC4,BANCO BRADESCO PREF SA,Financials,Brazil,9.01,"822,164,622.75" # VALE3,CIA VALE DO RIO DOCE SH,Materials,Brazil,8.60,"785,290,260.07" # PETR4,PETROLEO BRASILEIRO PREF SA,Energy,Brazil,5.68,"518,124,434.10" # PETR3,PETROBRAS,Energy,Brazil,4.86,"443,254,438.53" # B3SA3,B3 BRASIL BOLSA BALCAO SA,Financials,Brazil,4.57,"417,636,740.16" # ABEV3,AMBEV SA,Consumer Staples,Brazil,4.57,"417,216,913.63" # BBAS3,BANCO DO BRASIL SA,Financials,Brazil,3.25,"296,921,232.15" # ITSA4,ITAUSA INVESTIMENTOS ITAU PREF SA,Financials,Brazil,2.90,"265,153,684.52" # LREN3,LOJAS RENNER SA,Consumer Discretionary,Brazil,2.25,"205,832,175.98" # delete_CkHttp $http delete_CkBinData $bdHtml delete_CkHtmlToXml $htx delete_CkStringBuilder $sbXml delete_CkXml $xml delete_CkCsv $csv |
© 2000-2025 Chilkat Software, Inc. All Rights Reserved.