Chilkat HOME Android™ AutoIt C C# C++ Chilkat2-Python CkPython Classic ASP DataFlex Delphi DLL Go Java Node.js Objective-C PHP Extension Perl PowerBuilder PowerShell PureBasic Ruby SQL Server Swift Tcl Unicode C Unicode C++ VB.NET VBScript Visual Basic 6.0 Visual FoxPro Xojo Plugin
(PHP Extension) HTML Table to CSVDemonstrates a method for converting an HTML table to a CSV file. Note: This example requires Chilkat v9.5.0.77 or greater.
<?php // The version number (9_5_0) should match version of the Chilkat extension used, omitting the micro-version number. // For example, if using Chilkat v9.5.0.48, then include as shown here: include("chilkat_9_5_0.php"); // This example requires the Chilkat API to have been previously unlocked. // See Global Unlock Sample for sample code. // First download the HTML containing the table $http = new CkHttp(); $bdHtml = new CkBinData(); $success = $http->QuickGetBd('https://example-code.com/data/etf_table.html',$bdHtml); if ($success != true) { print $http->lastErrorText() . "\n"; exit; } // Convert to XML. $htx = new CkHtmlToXml(); $htx->SetHtmlBd($bdHtml); $sbXml = new CkStringBuilder(); $htx->ToXmlSb($sbXml); $xml = new CkXml(); $xml->LoadSb($sbXml,true); // Remove attributes and sub-trees we don't need. // (In other words, we're getting rid of clutter...) $numRemoved = $xml->PruneTag('thead'); $numRemoved = $xml->PruneAttribute('style'); $numRemoved = $xml->PruneAttribute('class'); // Scrub the element and attribute content. $xml->Scrub('ContentTrimEnds,ContentTrimInside,AttrTrimEnds,AttrTrimInside'); // Let's see what we have... print $xml->getXml() . "\n"; // We have the following XML. // Copy this XML into the online tool at Generate Parsing Code from XML // as a starting point for accessing the data.. // <?xml version="1.0" encoding="utf-8"?> // <root> // <html> // <head> // <meta http-equiv="content-type" content="text/html; charset=UTF-8"/> // </head> // <body text="#000000" bgcolor="#FFFFFF"> // <div> // <div> // <table role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1"/> // </div> // </div> // <div> // <table id="topHoldingsTable" role="grid" data-scrollx="true" data-sortdirection="desc" data-sorton="-1"> // <tbody> // <tr role="row"> // <td> // <text>ITUB4</text> // </td> // <td> // <text>ITAU UNIBANCO HOLDING PREF SA</text> // </td> // <td> // <text>Financials</text> // </td> // <td> // <text>Brazil</text> // </td> // <td> // <text>10.94</text> // </td> // <td> // <text>998,954,813.73</text> // </td> // </tr> // <tr role="row"> // <td> // <text>BBDC4</text> // </td> // <td> // <text>BANCO BRADESCO PREF SA</text> // </td> // <td> // <text>Financials</text> // </td> // <td> // <text>Brazil</text> // </td> // <td> // <text>9.01</text> // </td> // <td> // <text>822,164,622.75</text> // </td> // </tr> // ... // ... // ... // </tbody> // </table> // </div> // </body> // </html> // </root> // // This is the code generated by the online tool: // $i = 0; $count_i = $xml->NumChildrenHavingTag('html|body|div'); while ($i < $count_i) { $xml->put_I($i); $table_role = $xml->chilkatPath('html|body|div[i]|div|table|(role)'); $table_data_scrollx = $xml->chilkatPath('html|body|div[i]|div|table|(data-scrollx)'); $table_data_sortdirection = $xml->chilkatPath('html|body|div[i]|div|table|(data-sortdirection)'); $table_data_sorton = $xml->chilkatPath('html|body|div[i]|div|table|(data-sorton)'); $table_id = $xml->chilkatPath('html|body|div[i]|table|(id)'); $table_role = $xml->chilkatPath('html|body|div[i]|table|(role)'); $table_data_scrollx = $xml->chilkatPath('html|body|div[i]|table|(data-scrollx)'); $table_data_sortdirection = $xml->chilkatPath('html|body|div[i]|table|(data-sortdirection)'); $table_data_sorton = $xml->chilkatPath('html|body|div[i]|table|(data-sorton)'); $j = 0; $count_j = $xml->NumChildrenHavingTag('html|body|div[i]|table|tbody|tr'); while ($j < $count_j) { $xml->put_J($j); $tr_role = $xml->chilkatPath('html|body|div[i]|table|tbody|tr[j]|(role)'); $k = 0; $count_k = $xml->NumChildrenHavingTag('html|body|div[i]|table|tbody|tr[j]|td'); while ($k < $count_k) { $xml->put_K($k); $text = $xml->getChildContent('html|body|div[i]|table|tbody|tr[j]|td[k]|text'); $k = $k + 1; } $j = $j + 1; } $i = $i + 1; } // Let's modify the above code to build the CSV. $csv = new CkCsv(); $csv->SetColumnName(0,'Ticker'); $csv->SetColumnName(1,'Name'); $csv->SetColumnName(2,'Sector'); $csv->SetColumnName(3,'Country'); $csv->SetColumnName(4,'Weight'); $csv->SetColumnName(5,'Notional Vaue'); $i = 0; $count_i = $xml->NumChildrenHavingTag('html|body|div'); while ($i < $count_i) { $xml->put_I($i); $j = 0; $count_j = $xml->NumChildrenHavingTag('html|body|div[i]|table|tbody|tr'); while ($j < $count_j) { $xml->put_J($j); $k = 0; $count_k = $xml->NumChildrenHavingTag('html|body|div[i]|table|tbody|tr[j]|td'); while ($k < $count_k) { $xml->put_K($k); $csv->SetCell($j,$k,$xml->getChildContent('html|body|div[i]|table|tbody|tr[j]|td[k]|text')); $k = $k + 1; } $j = $j + 1; } $i = $i + 1; } $csv->SaveFile('qa_output/brasil_etf.csv'); $csvStr = $csv->saveToString(); print $csvStr . "\n"; // Our CSV looks like this: // Ticker,Name,Sector,Country,Weight,Notional Vaue // ITUB4,ITAU UNIBANCO HOLDING PREF SA,Financials,Brazil,10.94,"998,954,813.73" // BBDC4,BANCO BRADESCO PREF SA,Financials,Brazil,9.01,"822,164,622.75" // VALE3,CIA VALE DO RIO DOCE SH,Materials,Brazil,8.60,"785,290,260.07" // PETR4,PETROLEO BRASILEIRO PREF SA,Energy,Brazil,5.68,"518,124,434.10" // PETR3,PETROBRAS,Energy,Brazil,4.86,"443,254,438.53" // B3SA3,B3 BRASIL BOLSA BALCAO SA,Financials,Brazil,4.57,"417,636,740.16" // ABEV3,AMBEV SA,Consumer Staples,Brazil,4.57,"417,216,913.63" // BBAS3,BANCO DO BRASIL SA,Financials,Brazil,3.25,"296,921,232.15" // ITSA4,ITAUSA INVESTIMENTOS ITAU PREF SA,Financials,Brazil,2.90,"265,153,684.52" // LREN3,LOJAS RENNER SA,Consumer Discretionary,Brazil,2.25,"205,832,175.98" // ?> |
© 2000-2025 Chilkat Software, Inc. All Rights Reserved.