SQL Server
SQL Server
Avoiding Outbound Links Matching Patterns
See more Spider Examples
The spider accumulates outbound links when crawling. Your program may specify any number of "avoid patterns" to prevent any link matching at least one of the wildcarded patterns from being added.Chilkat SQL Server Downloads
-- Important: See this note about string length limitations for strings returned by sp_OAMethod calls.
--
CREATE PROCEDURE ChilkatSample
AS
BEGIN
DECLARE @hr int
DECLARE @iTmp0 int
-- Important: Do not use nvarchar(max). See the warning about using nvarchar(max).
DECLARE @sTmp0 nvarchar(4000)
DECLARE @success int
SELECT @success = 0
DECLARE @spider int
EXEC @hr = sp_OACreate 'Chilkat.Spider', @spider OUT
IF @hr <> 0
BEGIN
PRINT 'Failed to create ActiveX component'
RETURN
END
-- --------------------------------------------------------------------
-- Note: The URLs in this example are no longer valid.
-- You should replace the URLs with URLs from a site of your
-- own choosing -- preferably your own site if testing.
-- (Google's Directory no longer exists.)
-- --------------------------------------------------------------------
-- First, we'll get the outbound links for a page in the
-- Google directory. Then we'll add some avoid patterns
-- and then re-fetch, to see it work...
EXEC sp_OAMethod @spider, 'Initialize', NULL, 'directory.google.com'
EXEC sp_OAMethod @spider, 'AddUnspidered', NULL, 'http://directory.google.com/Top/Recreation/Food/Cheese/'
EXEC sp_OAMethod @spider, 'CrawlNext', @success OUT
-- Display the outbound links
DECLARE @i int
DECLARE @url nvarchar(4000)
EXEC sp_OAGetProperty @spider, 'NumOutboundLinks', @iTmp0 OUT
SELECT @i = 0
WHILE @i <= @iTmp0 - 1
BEGIN
EXEC sp_OAMethod @spider, 'GetOutboundLink', @sTmp0 OUT, @i
PRINT @sTmp0
SELECT @i = @i + 1
END
-- The output:
-- http://www.cheese.com/
-- http://www.cheesediaries.com/
-- http://www.WisDairy.com/
-- http://www.newenglandcheese.com
-- http://www.ilovecheese.com
-- http://www.cheesefromspain.com
-- http://www.realcaliforniacheese.com/
-- http://www.frencheese.co.uk/
-- http://www.cheesesociety.org/
-- http://www.specialcheese.com/queso.htm
-- http://www.franceway.com/cheese/intro.htm
-- http://www.foodsubs.com/Chesfirm.html
-- http://www.cheeseboard.co.uk/
-- http://www.thecheeseweb.com/
-- http://www.vtcheese.com/
-- http://www.coldbacon.com/cheese.html
-- http://www.norwegiancheeses.co.uk/
-- http://www.reluctantgourmet.com/cheese.htm
-- http://www.lancewood.co.za/
-- http://www.switzerlandcheese.ca
-- http://www.frenchcheese.dk/
-- http://www.dolcevita.com/cuisine/cheese/cheese.htm
-- http://cheeseisland.net/
-- http://www.cheestrings.ca/
-- http://www.dreamcheese.co.uk
-- http://hgic.clemson.edu/factsheets/HGIC3506.htm
-- http://www.epicurious.com/cooking/how_to/food_dictionary/entry?id=1815
-- http://www.mousetrapcheese.co.uk
-- http://taquitos.net/yum/gc.shtml
-- http://www.greek-recipe.com/static/greek-cheese
-- http://www.park.org/Netherlands/pavilions/food_and_markets/cheese/introduction.html
-- http://www.dairyfarmers.org/engl/recipes/4_1.asp
-- http://www.prairieridgecheese.com/wischeesguid.html
-- http://dmoz.org/cgi-bin/add.cgi?where=Recreation/Food/Cheese
-- http://dmoz.org/about.html
-- http://dmoz.org/cgi-bin/apply.cgi?where=Recreation/Food/Cheese
-- Do it again, but this time with avoid patterns.
EXEC sp_OAMethod @spider, 'Initialize', NULL, 'directory.google.com'
EXEC sp_OAMethod @spider, 'AddUnspidered', NULL, 'http://directory.google.com/Top/Recreation/Food/Cheese/'
-- Add some avoid patterns:
EXEC sp_OAMethod @spider, 'AddAvoidOutboundLinkPattern', NULL, '*dmoz.org*'
EXEC sp_OAMethod @spider, 'AddAvoidOutboundLinkPattern', NULL, '*?id=*'
EXEC sp_OAMethod @spider, 'AddAvoidOutboundLinkPattern', NULL, '*.co.uk*'
EXEC sp_OAMethod @spider, 'CrawlNext', @success OUT
PRINT '-----------------------'
-- Display the outbound links
EXEC sp_OAGetProperty @spider, 'NumOutboundLinks', @iTmp0 OUT
SELECT @i = 0
WHILE @i <= @iTmp0 - 1
BEGIN
EXEC sp_OAMethod @spider, 'GetOutboundLink', @sTmp0 OUT, @i
PRINT @sTmp0
SELECT @i = @i + 1
END
-- Output:
-- http://www.cheese.com/
-- http://www.cheesediaries.com/
-- http://www.WisDairy.com/
-- http://www.newenglandcheese.com
-- http://www.ilovecheese.com
-- http://www.cheesefromspain.com
-- http://www.realcaliforniacheese.com/
-- http://www.cheesesociety.org/
-- http://www.specialcheese.com/queso.htm
-- http://www.franceway.com/cheese/intro.htm
-- http://www.foodsubs.com/Chesfirm.html
-- http://www.thecheeseweb.com/
-- http://www.vtcheese.com/
-- http://www.coldbacon.com/cheese.html
-- http://www.reluctantgourmet.com/cheese.htm
-- http://www.lancewood.co.za/
-- http://www.switzerlandcheese.ca
-- http://www.frenchcheese.dk/
-- http://www.dolcevita.com/cuisine/cheese/cheese.htm
-- http://cheeseisland.net/
-- http://www.cheestrings.ca/
-- http://hgic.clemson.edu/factsheets/HGIC3506.htm
-- http://taquitos.net/yum/gc.shtml
-- http://www.greek-recipe.com/static/greek-cheese
-- http://www.park.org/Netherlands/pavilions/food_and_markets/cheese/introduction.html
-- http://www.dairyfarmers.org/engl/recipes/4_1.asp
-- http://www.prairieridgecheese.com/wischeesguid.htm
EXEC @hr = sp_OADestroy @spider
END
GO