Sample code for 30+ languages & platforms
.NET Core C#

Avoid URLs Matching Any of a Set of Patterns

See more Spider Examples

Demonstrates how to use "avoid patterns" to prevent spidering any URL that matches a wildcarded pattern. This example avoids URLs containing the substrings "java", "python", or "perl".

Chilkat .NET Core C# Downloads

.NET Core C#
bool success = false;

Chilkat.Spider spider = new Chilkat.Spider();

// The spider object crawls a single web site at a time.  As you'll see
// in later examples, you can collect outbound links and use them to 
// crawl the web.  For now, we'll simply spider 10 pages of chilkatsoft.com
spider.Initialize("www.chilkatsoft.com");

// Add the 1st URL:
spider.AddUnspidered("http://www.chilkatsoft.com/");

// Avoid URLs matching these patterns:
spider.AddAvoidPattern("*java*");
spider.AddAvoidPattern("*python*");
spider.AddAvoidPattern("*perl*");

// Begin crawling the site by calling CrawlNext repeatedly.
int i;
for (i = 0; i <= 9; i++) {

    success = spider.CrawlNext();
    if (success == true) {
        // Show the URL of the page just spidered.
        Debug.WriteLine(spider.LastUrl);
        // The HTML is available in the LastHtml property
    }
    else {
        // Did we get an error or are there no more URLs to crawl?
        if (spider.NumUnspidered == 0) {
            Debug.WriteLine("No more URLs to spider");
        }
        else {
            Debug.WriteLine(spider.LastErrorText);
        }

    }

    // Sleep 1 second before spidering the next URL.
    spider.SleepMs(1000);
}