Delphi DLL
Delphi DLL
Avoid URLs Matching Any of a Set of Patterns
See more Spider Examples
Demonstrates how to use "avoid patterns" to prevent spidering any URL that matches a wildcarded pattern. This example avoids URLs containing the substrings "java", "python", or "perl".Chilkat Delphi DLL Downloads
uses
Winapi.Windows, Winapi.Messages, System.SysUtils, System.Variants, System.Classes, Vcl.Graphics,
Vcl.Controls, Vcl.Forms, Vcl.Dialogs, Vcl.StdCtrls, Spider;
...
procedure TForm1.Button1Click(Sender: TObject);
var
success: Boolean;
spider: HCkSpider;
i: Integer;
begin
success := False;
spider := CkSpider_Create();
// The spider object crawls a single web site at a time. As you'll see
// in later examples, you can collect outbound links and use them to
// crawl the web. For now, we'll simply spider 10 pages of chilkatsoft.com
CkSpider_Initialize(spider,'www.chilkatsoft.com');
// Add the 1st URL:
CkSpider_AddUnspidered(spider,'http://www.chilkatsoft.com/');
// Avoid URLs matching these patterns:
CkSpider_AddAvoidPattern(spider,'*java*');
CkSpider_AddAvoidPattern(spider,'*python*');
CkSpider_AddAvoidPattern(spider,'*perl*');
// Begin crawling the site by calling CrawlNext repeatedly.
for i := 0 to 9 do
begin
success := CkSpider_CrawlNext(spider);
if (success = True) then
begin
// Show the URL of the page just spidered.
Memo1.Lines.Add(CkSpider__lastUrl(spider));
// The HTML is available in the LastHtml property
end
else
begin
// Did we get an error or are there no more URLs to crawl?
if (CkSpider_getNumUnspidered(spider) = 0) then
begin
Memo1.Lines.Add('No more URLs to spider');
end
else
begin
Memo1.Lines.Add(CkSpider__lastErrorText(spider));
end;
end;
// Sleep 1 second before spidering the next URL.
CkSpider_SleepMs(spider,1000);
end;
CkSpider_Dispose(spider);
end;