Swift
Swift
Avoid URLs Matching Any of a Set of Patterns
See more Spider Examples
Demonstrates how to use "avoid patterns" to prevent spidering any URL that matches a wildcarded pattern. This example avoids URLs containing the substrings "java", "python", or "perl".Chilkat Swift Downloads
func chilkatTest() {
var success: Bool = false
let spider = CkoSpider()!
// The spider object crawls a single web site at a time. As you'll see
// in later examples, you can collect outbound links and use them to
// crawl the web. For now, we'll simply spider 10 pages of chilkatsoft.com
spider.initialize(domain: "www.chilkatsoft.com")
// Add the 1st URL:
spider.addUnspidered(url: "http://www.chilkatsoft.com/")
// Avoid URLs matching these patterns:
spider.addAvoidPattern(pattern: "*java*")
spider.addAvoidPattern(pattern: "*python*")
spider.addAvoidPattern(pattern: "*perl*")
// Begin crawling the site by calling CrawlNext repeatedly.
var i: Int
for i = 0; i <= 9; i++ {
success = spider.crawlNext()
if success == true {
// Show the URL of the page just spidered.
print("\(spider.lastUrl!)")
// The HTML is available in the LastHtml property
}
else {
// Did we get an error or are there no more URLs to crawl?
if spider.numUnspidered.intValue == 0 {
print("No more URLs to spider")
}
else {
print("\(spider.lastErrorText!)")
}
}
// Sleep 1 second before spidering the next URL.
spider.sleepMs(millisec: 1000)
}
}