Unicode C
Unicode C
HTML Table to CSV
See more HTML-to-XML/Text Examples
Demonstrates a method for converting an HTML table to a CSV file.Note: This example requires Chilkat v9.5.0.77 or greater.
Chilkat Unicode C Downloads
#include <C_CkHttpW.h>
#include <C_CkBinDataW.h>
#include <C_CkHtmlToXmlW.h>
#include <C_CkStringBuilderW.h>
#include <C_CkXmlW.h>
#include <C_CkCsvW.h>
void ChilkatSample(void)
{
BOOL success;
HCkHttpW http;
HCkBinDataW bdHtml;
HCkHtmlToXmlW htx;
HCkStringBuilderW sbXml;
HCkXmlW xml;
int numRemoved;
int i;
int count_i;
const wchar_t *table_role;
const wchar_t *table_data_scrollx;
const wchar_t *table_data_sortdirection;
const wchar_t *table_data_sorton;
const wchar_t *table_id;
int j;
int count_j;
const wchar_t *tr_role;
int k;
int count_k;
const wchar_t *tagPath;
const wchar_t *text;
HCkCsvW csv;
const wchar_t *csvStr;
success = FALSE;
// This example requires the Chilkat API to have been previously unlocked.
// See Global Unlock Sample for sample code.
// First download the HTML containing the table
http = CkHttpW_Create();
bdHtml = CkBinDataW_Create();
success = CkHttpW_QuickGetBd(http,L"https://example-code.com/data/etf_table.html",bdHtml);
if (success != TRUE) {
wprintf(L"%s\n",CkHttpW_lastErrorText(http));
CkHttpW_Dispose(http);
CkBinDataW_Dispose(bdHtml);
return;
}
// Convert to XML.
htx = CkHtmlToXmlW_Create();
CkHtmlToXmlW_SetHtmlBd(htx,bdHtml);
sbXml = CkStringBuilderW_Create();
CkHtmlToXmlW_ToXmlSb(htx,sbXml);
xml = CkXmlW_Create();
CkXmlW_LoadSb(xml,sbXml,TRUE);
// Remove attributes and sub-trees we don't need.
// (In other words, we're getting rid of clutter...)
numRemoved = CkXmlW_PruneTag(xml,L"thead");
numRemoved = CkXmlW_PruneAttribute(xml,L"style");
numRemoved = CkXmlW_PruneAttribute(xml,L"class");
// Scrub the element and attribute content.
CkXmlW_Scrub(xml,L"ContentTrimEnds,ContentTrimInside,AttrTrimEnds,AttrTrimInside");
// Let's see what we have...
wprintf(L"%s\n",CkXmlW_getXml(xml));
// We have the following XML.
// Copy this XML into the online tool at Generate Parsing Code from XML
// as a starting point for accessing the data..
// <?xml version="1.0" encoding="utf-8"?>
// <root>
// <html>
// <head>
// <meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
// </head>
// <body text="#000000" bgcolor="#FFFFFF">
// <div>
// <div>
// <table role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1"/>
// </div>
// </div>
// <div>
// <table id="topHoldingsTable" role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1">
// <tbody>
// <tr role="row">
// <td>
// <text>ITUB4</text>
// </td>
// <td>
// <text>ITAU UNIBANCO HOLDING PREF SA</text>
// </td>
// <td>
// <text>Financials</text>
// </td>
// <td>
// <text>Brazil</text>
// </td>
// <td>
// <text>10.94</text>
// </td>
// <td>
// <text>998,954,813.73</text>
// </td>
// </tr>
// <tr role="row">
// <td>
// <text>BBDC4</text>
// </td>
// <td>
// <text>BANCO BRADESCO PREF SA</text>
// </td>
// <td>
// <text>Financials</text>
// </td>
// <td>
// <text>Brazil</text>
// </td>
// <td>
// <text>9.01</text>
// </td>
// <td>
// <text>822,164,622.75</text>
// </td>
// </tr>
// ...
// ...
// ...
// </tbody>
// </table>
// </div>
// </body>
// </html>
// </root>
//
// This is the code generated by the online tool:
//
i = 0;
count_i = CkXmlW_NumChildrenHavingTag(xml,L"html|body|div");
while (i < count_i) {
CkXmlW_putI(xml,i);
table_role = CkXmlW_chilkatPath(xml,L"html|body|div[i]|div|table|(role)");
table_data_scrollx = CkXmlW_chilkatPath(xml,L"html|body|div[i]|div|table|(data-scrollx)");
table_data_sortdirection = CkXmlW_chilkatPath(xml,L"html|body|div[i]|div|table|(data-sortdirection)");
table_data_sorton = CkXmlW_chilkatPath(xml,L"html|body|div[i]|div|table|(data-sorton)");
table_id = CkXmlW_chilkatPath(xml,L"html|body|div[i]|table|(id)");
table_role = CkXmlW_chilkatPath(xml,L"html|body|div[i]|table|(role)");
table_data_scrollx = CkXmlW_chilkatPath(xml,L"html|body|div[i]|table|(data-scrollx)");
table_data_sortdirection = CkXmlW_chilkatPath(xml,L"html|body|div[i]|table|(data-sortdirection)");
table_data_sorton = CkXmlW_chilkatPath(xml,L"html|body|div[i]|table|(data-sorton)");
j = 0;
count_j = CkXmlW_NumChildrenHavingTag(xml,L"html|body|div[i]|table|tbody|tr");
while (j < count_j) {
CkXmlW_putJ(xml,j);
tr_role = CkXmlW_chilkatPath(xml,L"html|body|div[i]|table|tbody|tr[j]|(role)");
k = 0;
count_k = CkXmlW_NumChildrenHavingTag(xml,L"html|body|div[i]|table|tbody|tr[j]|td");
while (k < count_k) {
CkXmlW_putK(xml,k);
text = CkXmlW_getChildContent(xml,L"html|body|div[i]|table|tbody|tr[j]|td[k]|text");
k = k + 1;
}
j = j + 1;
}
i = i + 1;
}
// Let's modify the above code to build the CSV.
csv = CkCsvW_Create();
CkCsvW_SetColumnName(csv,0,L"Ticker");
CkCsvW_SetColumnName(csv,1,L"Name");
CkCsvW_SetColumnName(csv,2,L"Sector");
CkCsvW_SetColumnName(csv,3,L"Country");
CkCsvW_SetColumnName(csv,4,L"Weight");
CkCsvW_SetColumnName(csv,5,L"Notional Vaue");
i = 0;
count_i = CkXmlW_NumChildrenHavingTag(xml,L"html|body|div");
while (i < count_i) {
CkXmlW_putI(xml,i);
j = 0;
count_j = CkXmlW_NumChildrenHavingTag(xml,L"html|body|div[i]|table|tbody|tr");
while (j < count_j) {
CkXmlW_putJ(xml,j);
k = 0;
count_k = CkXmlW_NumChildrenHavingTag(xml,L"html|body|div[i]|table|tbody|tr[j]|td");
while (k < count_k) {
CkXmlW_putK(xml,k);
CkCsvW_SetCell(csv,j,k,CkXmlW_getChildContent(xml,L"html|body|div[i]|table|tbody|tr[j]|td[k]|text"));
k = k + 1;
}
j = j + 1;
}
i = i + 1;
}
CkCsvW_SaveFile(csv,L"qa_output/brasil_etf.csv");
csvStr = CkCsvW_saveToString(csv);
wprintf(L"%s\n",csvStr);
// Our CSV looks like this:
// Ticker,Name,Sector,Country,Weight,Notional Vaue
// ITUB4,ITAU UNIBANCO HOLDING PREF SA,Financials,Brazil,10.94,"998,954,813.73"
// BBDC4,BANCO BRADESCO PREF SA,Financials,Brazil,9.01,"822,164,622.75"
// VALE3,CIA VALE DO RIO DOCE SH,Materials,Brazil,8.60,"785,290,260.07"
// PETR4,PETROLEO BRASILEIRO PREF SA,Energy,Brazil,5.68,"518,124,434.10"
// PETR3,PETROBRAS,Energy,Brazil,4.86,"443,254,438.53"
// B3SA3,B3 BRASIL BOLSA BALCAO SA,Financials,Brazil,4.57,"417,636,740.16"
// ABEV3,AMBEV SA,Consumer Staples,Brazil,4.57,"417,216,913.63"
// BBAS3,BANCO DO BRASIL SA,Financials,Brazil,3.25,"296,921,232.15"
// ITSA4,ITAUSA INVESTIMENTOS ITAU PREF SA,Financials,Brazil,2.90,"265,153,684.52"
// LREN3,LOJAS RENNER SA,Consumer Discretionary,Brazil,2.25,"205,832,175.98"
//
CkHttpW_Dispose(http);
CkBinDataW_Dispose(bdHtml);
CkHtmlToXmlW_Dispose(htx);
CkStringBuilderW_Dispose(sbXml);
CkXmlW_Dispose(xml);
CkCsvW_Dispose(csv);
}