Sample code for 30+ languages & platforms
SQL Server

Avoiding Outbound Links Matching Patterns

See more Spider Examples

The spider accumulates outbound links when crawling. Your program may specify any number of "avoid patterns" to prevent any link matching at least one of the wildcarded patterns from being added.

Chilkat SQL Server Downloads

SQL Server
-- Important: See this note about string length limitations for strings returned by sp_OAMethod calls.
--
CREATE PROCEDURE ChilkatSample
AS
BEGIN
    DECLARE @hr int
    DECLARE @iTmp0 int
    -- Important: Do not use nvarchar(max).  See the warning about using nvarchar(max).
    DECLARE @sTmp0 nvarchar(4000)
    DECLARE @success int
    SELECT @success = 0

    DECLARE @spider int
    EXEC @hr = sp_OACreate 'Chilkat.Spider', @spider OUT
    IF @hr <> 0
    BEGIN
        PRINT 'Failed to create ActiveX component'
        RETURN
    END

    -- --------------------------------------------------------------------
    -- Note: The URLs in this example are no longer valid.
    -- You should replace the URLs with URLs from a site of your
    -- own choosing -- preferably your own site if testing.
    -- (Google's Directory no longer exists.)
    -- --------------------------------------------------------------------

    -- First, we'll get the outbound links for a page in the
    -- Google directory.  Then we'll add some avoid patterns
    -- and then re-fetch, to see it work...

    EXEC sp_OAMethod @spider, 'Initialize', NULL, 'directory.google.com'
    EXEC sp_OAMethod @spider, 'AddUnspidered', NULL, 'http://directory.google.com/Top/Recreation/Food/Cheese/'

    EXEC sp_OAMethod @spider, 'CrawlNext', @success OUT

    -- Display the outbound links
    DECLARE @i int

    DECLARE @url nvarchar(4000)

    EXEC sp_OAGetProperty @spider, 'NumOutboundLinks', @iTmp0 OUT
    SELECT @i = 0
    WHILE @i <= @iTmp0 - 1
      BEGIN
        EXEC sp_OAMethod @spider, 'GetOutboundLink', @sTmp0 OUT, @i
        PRINT @sTmp0
        SELECT @i = @i + 1
      END

    -- The output:
    -- http://www.cheese.com/
    -- http://www.cheesediaries.com/
    -- http://www.WisDairy.com/
    -- http://www.newenglandcheese.com
    -- http://www.ilovecheese.com
    -- http://www.cheesefromspain.com
    -- http://www.realcaliforniacheese.com/
    -- http://www.frencheese.co.uk/
    -- http://www.cheesesociety.org/
    -- http://www.specialcheese.com/queso.htm
    -- http://www.franceway.com/cheese/intro.htm
    -- http://www.foodsubs.com/Chesfirm.html
    -- http://www.cheeseboard.co.uk/
    -- http://www.thecheeseweb.com/
    -- http://www.vtcheese.com/
    -- http://www.coldbacon.com/cheese.html
    -- http://www.norwegiancheeses.co.uk/
    -- http://www.reluctantgourmet.com/cheese.htm
    -- http://www.lancewood.co.za/
    -- http://www.switzerlandcheese.ca
    -- http://www.frenchcheese.dk/
    -- http://www.dolcevita.com/cuisine/cheese/cheese.htm
    -- http://cheeseisland.net/
    -- http://www.cheestrings.ca/
    -- http://www.dreamcheese.co.uk
    -- http://hgic.clemson.edu/factsheets/HGIC3506.htm
    -- http://www.epicurious.com/cooking/how_to/food_dictionary/entry?id=1815
    -- http://www.mousetrapcheese.co.uk
    -- http://taquitos.net/yum/gc.shtml
    -- http://www.greek-recipe.com/static/greek-cheese
    -- http://www.park.org/Netherlands/pavilions/food_and_markets/cheese/introduction.html
    -- http://www.dairyfarmers.org/engl/recipes/4_1.asp
    -- http://www.prairieridgecheese.com/wischeesguid.html
    -- http://dmoz.org/cgi-bin/add.cgi?where=Recreation/Food/Cheese
    -- http://dmoz.org/about.html
    -- http://dmoz.org/cgi-bin/apply.cgi?where=Recreation/Food/Cheese

    -- Do it again, but this time with avoid patterns.
    EXEC sp_OAMethod @spider, 'Initialize', NULL, 'directory.google.com'
    EXEC sp_OAMethod @spider, 'AddUnspidered', NULL, 'http://directory.google.com/Top/Recreation/Food/Cheese/'

    -- Add some avoid patterns:
    EXEC sp_OAMethod @spider, 'AddAvoidOutboundLinkPattern', NULL, '*dmoz.org*'
    EXEC sp_OAMethod @spider, 'AddAvoidOutboundLinkPattern', NULL, '*?id=*'
    EXEC sp_OAMethod @spider, 'AddAvoidOutboundLinkPattern', NULL, '*.co.uk*'
    EXEC sp_OAMethod @spider, 'CrawlNext', @success OUT


    PRINT '-----------------------'

    -- Display the outbound links
    EXEC sp_OAGetProperty @spider, 'NumOutboundLinks', @iTmp0 OUT
    SELECT @i = 0
    WHILE @i <= @iTmp0 - 1
      BEGIN
        EXEC sp_OAMethod @spider, 'GetOutboundLink', @sTmp0 OUT, @i
        PRINT @sTmp0
        SELECT @i = @i + 1
      END

    -- Output:
    -- http://www.cheese.com/
    -- http://www.cheesediaries.com/
    -- http://www.WisDairy.com/
    -- http://www.newenglandcheese.com
    -- http://www.ilovecheese.com
    -- http://www.cheesefromspain.com
    -- http://www.realcaliforniacheese.com/
    -- http://www.cheesesociety.org/
    -- http://www.specialcheese.com/queso.htm
    -- http://www.franceway.com/cheese/intro.htm
    -- http://www.foodsubs.com/Chesfirm.html
    -- http://www.thecheeseweb.com/
    -- http://www.vtcheese.com/
    -- http://www.coldbacon.com/cheese.html
    -- http://www.reluctantgourmet.com/cheese.htm
    -- http://www.lancewood.co.za/
    -- http://www.switzerlandcheese.ca
    -- http://www.frenchcheese.dk/
    -- http://www.dolcevita.com/cuisine/cheese/cheese.htm
    -- http://cheeseisland.net/
    -- http://www.cheestrings.ca/
    -- http://hgic.clemson.edu/factsheets/HGIC3506.htm
    -- http://taquitos.net/yum/gc.shtml
    -- http://www.greek-recipe.com/static/greek-cheese
    -- http://www.park.org/Netherlands/pavilions/food_and_markets/cheese/introduction.html
    -- http://www.dairyfarmers.org/engl/recipes/4_1.asp
    -- http://www.prairieridgecheese.com/wischeesguid.htm

    EXEC @hr = sp_OADestroy @spider


END
GO