Chilkat HOME Android™ AutoIt C C# C++ Chilkat2-Python CkPython Classic ASP DataFlex Delphi DLL Go Java Node.js Objective-C PHP Extension Perl PowerBuilder PowerShell PureBasic Ruby SQL Server Swift Tcl Unicode C Unicode C++ VB.NET VBScript Visual Basic 6.0 Visual FoxPro Xojo Plugin
(Unicode C) HTML Table to CSVDemonstrates a method for converting an HTML table to a CSV file. Note: This example requires Chilkat v9.5.0.77 or greater.
#include <C_CkHttpW.h> #include <C_CkBinDataW.h> #include <C_CkHtmlToXmlW.h> #include <C_CkStringBuilderW.h> #include <C_CkXmlW.h> #include <C_CkCsvW.h> void ChilkatSample(void) { HCkHttpW http; HCkBinDataW bdHtml; BOOL success; HCkHtmlToXmlW htx; HCkStringBuilderW sbXml; HCkXmlW xml; int numRemoved; int i; int count_i; const wchar_t *table_role; const wchar_t *table_data_scrollx; const wchar_t *table_data_sortdirection; const wchar_t *table_data_sorton; const wchar_t *table_id; int j; int count_j; const wchar_t *tr_role; int k; int count_k; const wchar_t *tagPath; const wchar_t *text; HCkCsvW csv; const wchar_t *csvStr; // This example requires the Chilkat API to have been previously unlocked. // See Global Unlock Sample for sample code. // First download the HTML containing the table http = CkHttpW_Create(); bdHtml = CkBinDataW_Create(); success = CkHttpW_QuickGetBd(http,L"https://example-code.com/data/etf_table.html",bdHtml); if (success != TRUE) { wprintf(L"%s\n",CkHttpW_lastErrorText(http)); CkHttpW_Dispose(http); CkBinDataW_Dispose(bdHtml); return; } // Convert to XML. htx = CkHtmlToXmlW_Create(); CkHtmlToXmlW_SetHtmlBd(htx,bdHtml); sbXml = CkStringBuilderW_Create(); CkHtmlToXmlW_ToXmlSb(htx,sbXml); xml = CkXmlW_Create(); CkXmlW_LoadSb(xml,sbXml,TRUE); // Remove attributes and sub-trees we don't need. // (In other words, we're getting rid of clutter...) numRemoved = CkXmlW_PruneTag(xml,L"thead"); numRemoved = CkXmlW_PruneAttribute(xml,L"style"); numRemoved = CkXmlW_PruneAttribute(xml,L"class"); // Scrub the element and attribute content. CkXmlW_Scrub(xml,L"ContentTrimEnds,ContentTrimInside,AttrTrimEnds,AttrTrimInside"); // Let's see what we have... wprintf(L"%s\n",CkXmlW_getXml(xml)); // We have the following XML. // Copy this XML into the online tool at Generate Parsing Code from XML // as a starting point for accessing the data.. // <?xml version="1.0" encoding="utf-8"?> // <root> // <html> // <head> // <meta http-equiv="content-type" content="text/html; charset=UTF-8"/> // </head> // <body text="#000000" bgcolor="#FFFFFF"> // <div> // <div> // <table role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1"/> // </div> // </div> // <div> // <table id="topHoldingsTable" role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1"> // <tbody> // <tr role="row"> // <td> // <text>ITUB4</text> // </td> // <td> // <text>ITAU UNIBANCO HOLDING PREF SA</text> // </td> // <td> // <text>Financials</text> // </td> // <td> // <text>Brazil</text> // </td> // <td> // <text>10.94</text> // </td> // <td> // <text>998,954,813.73</text> // </td> // </tr> // <tr role="row"> // <td> // <text>BBDC4</text> // </td> // <td> // <text>BANCO BRADESCO PREF SA</text> // </td> // <td> // <text>Financials</text> // </td> // <td> // <text>Brazil</text> // </td> // <td> // <text>9.01</text> // </td> // <td> // <text>822,164,622.75</text> // </td> // </tr> // ... // ... // ... // </tbody> // </table> // </div> // </body> // </html> // </root> // // This is the code generated by the online tool: // i = 0; count_i = CkXmlW_NumChildrenHavingTag(xml,L"html|body|div"); while (i < count_i) { CkXmlW_putI(xml,i); table_role = CkXmlW_chilkatPath(xml,L"html|body|div[i]|div|table|(role)"); table_data_scrollx = CkXmlW_chilkatPath(xml,L"html|body|div[i]|div|table|(data-scrollx)"); table_data_sortdirection = CkXmlW_chilkatPath(xml,L"html|body|div[i]|div|table|(data-sortdirection)"); table_data_sorton = CkXmlW_chilkatPath(xml,L"html|body|div[i]|div|table|(data-sorton)"); table_id = CkXmlW_chilkatPath(xml,L"html|body|div[i]|table|(id)"); table_role = CkXmlW_chilkatPath(xml,L"html|body|div[i]|table|(role)"); table_data_scrollx = CkXmlW_chilkatPath(xml,L"html|body|div[i]|table|(data-scrollx)"); table_data_sortdirection = CkXmlW_chilkatPath(xml,L"html|body|div[i]|table|(data-sortdirection)"); table_data_sorton = CkXmlW_chilkatPath(xml,L"html|body|div[i]|table|(data-sorton)"); j = 0; count_j = CkXmlW_NumChildrenHavingTag(xml,L"html|body|div[i]|table|tbody|tr"); while (j < count_j) { CkXmlW_putJ(xml,j); tr_role = CkXmlW_chilkatPath(xml,L"html|body|div[i]|table|tbody|tr[j]|(role)"); k = 0; count_k = CkXmlW_NumChildrenHavingTag(xml,L"html|body|div[i]|table|tbody|tr[j]|td"); while (k < count_k) { CkXmlW_putK(xml,k); text = CkXmlW_getChildContent(xml,L"html|body|div[i]|table|tbody|tr[j]|td[k]|text"); k = k + 1; } j = j + 1; } i = i + 1; } // Let's modify the above code to build the CSV. csv = CkCsvW_Create(); CkCsvW_SetColumnName(csv,0,L"Ticker"); CkCsvW_SetColumnName(csv,1,L"Name"); CkCsvW_SetColumnName(csv,2,L"Sector"); CkCsvW_SetColumnName(csv,3,L"Country"); CkCsvW_SetColumnName(csv,4,L"Weight"); CkCsvW_SetColumnName(csv,5,L"Notional Vaue"); i = 0; count_i = CkXmlW_NumChildrenHavingTag(xml,L"html|body|div"); while (i < count_i) { CkXmlW_putI(xml,i); j = 0; count_j = CkXmlW_NumChildrenHavingTag(xml,L"html|body|div[i]|table|tbody|tr"); while (j < count_j) { CkXmlW_putJ(xml,j); k = 0; count_k = CkXmlW_NumChildrenHavingTag(xml,L"html|body|div[i]|table|tbody|tr[j]|td"); while (k < count_k) { CkXmlW_putK(xml,k); CkCsvW_SetCell(csv,j,k,CkXmlW_getChildContent(xml,L"html|body|div[i]|table|tbody|tr[j]|td[k]|text")); k = k + 1; } j = j + 1; } i = i + 1; } CkCsvW_SaveFile(csv,L"qa_output/brasil_etf.csv"); csvStr = CkCsvW_saveToString(csv); wprintf(L"%s\n",csvStr); // Our CSV looks like this: // Ticker,Name,Sector,Country,Weight,Notional Vaue // ITUB4,ITAU UNIBANCO HOLDING PREF SA,Financials,Brazil,10.94,"998,954,813.73" // BBDC4,BANCO BRADESCO PREF SA,Financials,Brazil,9.01,"822,164,622.75" // VALE3,CIA VALE DO RIO DOCE SH,Materials,Brazil,8.60,"785,290,260.07" // PETR4,PETROLEO BRASILEIRO PREF SA,Energy,Brazil,5.68,"518,124,434.10" // PETR3,PETROBRAS,Energy,Brazil,4.86,"443,254,438.53" // B3SA3,B3 BRASIL BOLSA BALCAO SA,Financials,Brazil,4.57,"417,636,740.16" // ABEV3,AMBEV SA,Consumer Staples,Brazil,4.57,"417,216,913.63" // BBAS3,BANCO DO BRASIL SA,Financials,Brazil,3.25,"296,921,232.15" // ITSA4,ITAUSA INVESTIMENTOS ITAU PREF SA,Financials,Brazil,2.90,"265,153,684.52" // LREN3,LOJAS RENNER SA,Consumer Discretionary,Brazil,2.25,"205,832,175.98" // CkHttpW_Dispose(http); CkBinDataW_Dispose(bdHtml); CkHtmlToXmlW_Dispose(htx); CkStringBuilderW_Dispose(sbXml); CkXmlW_Dispose(xml); CkCsvW_Dispose(csv); } |
© 2000-2025 Chilkat Software, Inc. All Rights Reserved.