Sample code for 30+ languages & platforms
Delphi DLL

Avoiding Outbound Links Matching Patterns

See more Spider Examples

The spider accumulates outbound links when crawling. Your program may specify any number of "avoid patterns" to prevent any link matching at least one of the wildcarded patterns from being added.

Chilkat Delphi DLL Downloads

Delphi DLL
uses
    Winapi.Windows, Winapi.Messages, System.SysUtils, System.Variants, System.Classes, Vcl.Graphics,
    Vcl.Controls, Vcl.Forms, Vcl.Dialogs, Vcl.StdCtrls, Spider;

...

procedure TForm1.Button1Click(Sender: TObject);
var
success: Boolean;
spider: HCkSpider;
i: Integer;
url: PWideChar;

begin
success := False;

spider := CkSpider_Create();

// --------------------------------------------------------------------
// Note: The URLs in this example are no longer valid.
// You should replace the URLs with URLs from a site of your
// own choosing -- preferably your own site if testing.
// (Google's Directory no longer exists.)
// --------------------------------------------------------------------

// First, we'll get the outbound links for a page in the
// Google directory.  Then we'll add some avoid patterns
// and then re-fetch, to see it work...

CkSpider_Initialize(spider,'directory.google.com');
CkSpider_AddUnspidered(spider,'http://directory.google.com/Top/Recreation/Food/Cheese/');

success := CkSpider_CrawlNext(spider);

// Display the outbound links

for i := 0 to CkSpider_getNumOutboundLinks(spider) - 1 do
  begin
    Memo1.Lines.Add(CkSpider__getOutboundLink(spider,i));
  end;

// The output:
// http://www.cheese.com/
// http://www.cheesediaries.com/
// http://www.WisDairy.com/
// http://www.newenglandcheese.com
// http://www.ilovecheese.com
// http://www.cheesefromspain.com
// http://www.realcaliforniacheese.com/
// http://www.frencheese.co.uk/
// http://www.cheesesociety.org/
// http://www.specialcheese.com/queso.htm
// http://www.franceway.com/cheese/intro.htm
// http://www.foodsubs.com/Chesfirm.html
// http://www.cheeseboard.co.uk/
// http://www.thecheeseweb.com/
// http://www.vtcheese.com/
// http://www.coldbacon.com/cheese.html
// http://www.norwegiancheeses.co.uk/
// http://www.reluctantgourmet.com/cheese.htm
// http://www.lancewood.co.za/
// http://www.switzerlandcheese.ca
// http://www.frenchcheese.dk/
// http://www.dolcevita.com/cuisine/cheese/cheese.htm
// http://cheeseisland.net/
// http://www.cheestrings.ca/
// http://www.dreamcheese.co.uk
// http://hgic.clemson.edu/factsheets/HGIC3506.htm
// http://www.epicurious.com/cooking/how_to/food_dictionary/entry?id=1815
// http://www.mousetrapcheese.co.uk
// http://taquitos.net/yum/gc.shtml
// http://www.greek-recipe.com/static/greek-cheese
// http://www.park.org/Netherlands/pavilions/food_and_markets/cheese/introduction.html
// http://www.dairyfarmers.org/engl/recipes/4_1.asp
// http://www.prairieridgecheese.com/wischeesguid.html
// http://dmoz.org/cgi-bin/add.cgi?where=Recreation/Food/Cheese
// http://dmoz.org/about.html
// http://dmoz.org/cgi-bin/apply.cgi?where=Recreation/Food/Cheese

// Do it again, but this time with avoid patterns.
CkSpider_Initialize(spider,'directory.google.com');
CkSpider_AddUnspidered(spider,'http://directory.google.com/Top/Recreation/Food/Cheese/');

// Add some avoid patterns:
CkSpider_AddAvoidOutboundLinkPattern(spider,'*dmoz.org*');
CkSpider_AddAvoidOutboundLinkPattern(spider,'*?id=*');
CkSpider_AddAvoidOutboundLinkPattern(spider,'*.co.uk*');
success := CkSpider_CrawlNext(spider);

Memo1.Lines.Add('-----------------------');

// Display the outbound links
for i := 0 to CkSpider_getNumOutboundLinks(spider) - 1 do
  begin
    Memo1.Lines.Add(CkSpider__getOutboundLink(spider,i));
  end;

// Output:
// http://www.cheese.com/
// http://www.cheesediaries.com/
// http://www.WisDairy.com/
// http://www.newenglandcheese.com
// http://www.ilovecheese.com
// http://www.cheesefromspain.com
// http://www.realcaliforniacheese.com/
// http://www.cheesesociety.org/
// http://www.specialcheese.com/queso.htm
// http://www.franceway.com/cheese/intro.htm
// http://www.foodsubs.com/Chesfirm.html
// http://www.thecheeseweb.com/
// http://www.vtcheese.com/
// http://www.coldbacon.com/cheese.html
// http://www.reluctantgourmet.com/cheese.htm
// http://www.lancewood.co.za/
// http://www.switzerlandcheese.ca
// http://www.frenchcheese.dk/
// http://www.dolcevita.com/cuisine/cheese/cheese.htm
// http://cheeseisland.net/
// http://www.cheestrings.ca/
// http://hgic.clemson.edu/factsheets/HGIC3506.htm
// http://taquitos.net/yum/gc.shtml
// http://www.greek-recipe.com/static/greek-cheese
// http://www.park.org/Netherlands/pavilions/food_and_markets/cheese/introduction.html
// http://www.dairyfarmers.org/engl/recipes/4_1.asp
// http://www.prairieridgecheese.com/wischeesguid.htm

CkSpider_Dispose(spider);

end;