SQL Server
SQL Server
Getting Started Spidering a Site
See more Spider Examples
This is a very simple "getting started" example for spidering a web site. As you'll see in future examples, the Chilkat Spider library can be used to crawl the Web. For now, we'll concentrate on spidering a single site.Chilkat SQL Server Downloads
-- Important: See this note about string length limitations for strings returned by sp_OAMethod calls.
--
CREATE PROCEDURE ChilkatSample
AS
BEGIN
DECLARE @hr int
DECLARE @iTmp0 int
-- Important: Do not use nvarchar(max). See the warning about using nvarchar(max).
DECLARE @sTmp0 nvarchar(4000)
DECLARE @success int
SELECT @success = 0
DECLARE @spider int
EXEC @hr = sp_OACreate 'Chilkat.Spider', @spider OUT
IF @hr <> 0
BEGIN
PRINT 'Failed to create ActiveX component'
RETURN
END
-- The spider object crawls a single web site at a time. As you'll see
-- in later examples, you can collect outbound links and use them to
-- crawl the web. For now, we'll simply spider 10 pages of chilkatsoft.com
EXEC sp_OAMethod @spider, 'Initialize', NULL, 'www.chilkatsoft.com'
-- Add the 1st URL:
EXEC sp_OAMethod @spider, 'AddUnspidered', NULL, 'http://www.chilkatsoft.com/'
-- Begin crawling the site by calling CrawlNext repeatedly.
DECLARE @i int
SELECT @i = 0
WHILE @i <= 9
BEGIN
EXEC sp_OAMethod @spider, 'CrawlNext', @success OUT
IF @success = 1
BEGIN
-- Show the URL of the page just spidered.
EXEC sp_OAGetProperty @spider, 'LastUrl', @sTmp0 OUT
PRINT @sTmp0
-- The HTML is available in the LastHtml property
END
ELSE
BEGIN
-- Did we get an error or are there no more URLs to crawl?
EXEC sp_OAGetProperty @spider, 'NumUnspidered', @iTmp0 OUT
IF @iTmp0 = 0
BEGIN
PRINT 'No more URLs to spider'
END
ELSE
BEGIN
EXEC sp_OAGetProperty @spider, 'LastErrorText', @sTmp0 OUT
PRINT @sTmp0
END
END
-- Sleep 1 second before spidering the next URL.
EXEC sp_OAMethod @spider, 'SleepMs', NULL, 1000
SELECT @i = @i + 1
END
EXEC @hr = sp_OADestroy @spider
END
GO