Chilkat HOME .NET Core C# Android™ AutoIt C C# C++ Chilkat2-Python CkPython Classic ASP DataFlex Delphi ActiveX Delphi DLL Go Java Lianja Mono C# Node.js Objective-C PHP ActiveX PHP Extension Perl PowerBuilder PowerShell PureBasic Ruby SQL Server Swift 2 Swift 3,4,5... Tcl Unicode C Unicode C++ VB.NET VBScript Visual Basic 6.0 Visual FoxPro Xojo Plugin
(SQL Server) Avoiding Outbound Links Matching PatternsThe spider accumulates outbound links when crawling. Your program may specify any number of "avoid patterns" to prevent any link matching at least one of the wildcarded patterns from being added.
-- Important: See this note about string length limitations for strings returned by sp_OAMethod calls. -- CREATE PROCEDURE ChilkatSample AS BEGIN DECLARE @hr int DECLARE @iTmp0 int -- Important: Do not use nvarchar(max). See the warning about using nvarchar(max). DECLARE @sTmp0 nvarchar(4000) DECLARE @spider int -- Use "Chilkat_9_5_0.Spider" for versions of Chilkat < 10.0.0 EXEC @hr = sp_OACreate 'Chilkat.Spider', @spider OUT IF @hr <> 0 BEGIN PRINT 'Failed to create ActiveX component' RETURN END -- -------------------------------------------------------------------- -- Note: The URLs in this example are no longer valid. -- You should replace the URLs with URLs from a site of your -- own choosing -- preferably your own site if testing. -- (Google's Directory no longer exists.) -- -------------------------------------------------------------------- -- First, we'll get the outbound links for a page in the -- Google directory. Then we'll add some avoid patterns -- and then re-fetch, to see it work... EXEC sp_OAMethod @spider, 'Initialize', NULL, 'directory.google.com' EXEC sp_OAMethod @spider, 'AddUnspidered', NULL, 'http://directory.google.com/Top/Recreation/Food/Cheese/' DECLARE @success int EXEC sp_OAMethod @spider, 'CrawlNext', @success OUT -- Display the outbound links DECLARE @i int DECLARE @url nvarchar(4000) EXEC sp_OAGetProperty @spider, 'NumOutboundLinks', @iTmp0 OUT SELECT @i = 0 WHILE @i <= @iTmp0 - 1 BEGIN EXEC sp_OAMethod @spider, 'GetOutboundLink', @sTmp0 OUT, @i PRINT @sTmp0 SELECT @i = @i + 1 END -- The output: -- http://www.cheese.com/ -- http://www.cheesediaries.com/ -- http://www.WisDairy.com/ -- http://www.newenglandcheese.com -- http://www.ilovecheese.com -- http://www.cheesefromspain.com -- http://www.realcaliforniacheese.com/ -- http://www.frencheese.co.uk/ -- http://www.cheesesociety.org/ -- http://www.specialcheese.com/queso.htm -- http://www.franceway.com/cheese/intro.htm -- http://www.foodsubs.com/Chesfirm.html -- http://www.cheeseboard.co.uk/ -- http://www.thecheeseweb.com/ -- http://www.vtcheese.com/ -- http://www.coldbacon.com/cheese.html -- http://www.norwegiancheeses.co.uk/ -- http://www.reluctantgourmet.com/cheese.htm -- http://www.lancewood.co.za/ -- http://www.switzerlandcheese.ca -- http://www.frenchcheese.dk/ -- http://www.dolcevita.com/cuisine/cheese/cheese.htm -- http://cheeseisland.net/ -- http://www.cheestrings.ca/ -- http://www.dreamcheese.co.uk -- http://hgic.clemson.edu/factsheets/HGIC3506.htm -- http://www.epicurious.com/cooking/how_to/food_dictionary/entry?id=1815 -- http://www.mousetrapcheese.co.uk -- http://taquitos.net/yum/gc.shtml -- http://www.greek-recipe.com/static/greek-cheese -- http://www.park.org/Netherlands/pavilions/food_and_markets/cheese/introduction.html -- http://www.dairyfarmers.org/engl/recipes/4_1.asp -- http://www.prairieridgecheese.com/wischeesguid.html -- http://dmoz.org/cgi-bin/add.cgi?where=Recreation/Food/Cheese -- http://dmoz.org/about.html -- http://dmoz.org/cgi-bin/apply.cgi?where=Recreation/Food/Cheese -- Do it again, but this time with avoid patterns. EXEC sp_OAMethod @spider, 'Initialize', NULL, 'directory.google.com' EXEC sp_OAMethod @spider, 'AddUnspidered', NULL, 'http://directory.google.com/Top/Recreation/Food/Cheese/' -- Add some avoid patterns: EXEC sp_OAMethod @spider, 'AddAvoidOutboundLinkPattern', NULL, '*dmoz.org*' EXEC sp_OAMethod @spider, 'AddAvoidOutboundLinkPattern', NULL, '*?id=*' EXEC sp_OAMethod @spider, 'AddAvoidOutboundLinkPattern', NULL, '*.co.uk*' EXEC sp_OAMethod @spider, 'CrawlNext', @success OUT PRINT '-----------------------' -- Display the outbound links EXEC sp_OAGetProperty @spider, 'NumOutboundLinks', @iTmp0 OUT SELECT @i = 0 WHILE @i <= @iTmp0 - 1 BEGIN EXEC sp_OAMethod @spider, 'GetOutboundLink', @sTmp0 OUT, @i PRINT @sTmp0 SELECT @i = @i + 1 END -- Output: -- http://www.cheese.com/ -- http://www.cheesediaries.com/ -- http://www.WisDairy.com/ -- http://www.newenglandcheese.com -- http://www.ilovecheese.com -- http://www.cheesefromspain.com -- http://www.realcaliforniacheese.com/ -- http://www.cheesesociety.org/ -- http://www.specialcheese.com/queso.htm -- http://www.franceway.com/cheese/intro.htm -- http://www.foodsubs.com/Chesfirm.html -- http://www.thecheeseweb.com/ -- http://www.vtcheese.com/ -- http://www.coldbacon.com/cheese.html -- http://www.reluctantgourmet.com/cheese.htm -- http://www.lancewood.co.za/ -- http://www.switzerlandcheese.ca -- http://www.frenchcheese.dk/ -- http://www.dolcevita.com/cuisine/cheese/cheese.htm -- http://cheeseisland.net/ -- http://www.cheestrings.ca/ -- http://hgic.clemson.edu/factsheets/HGIC3506.htm -- http://taquitos.net/yum/gc.shtml -- http://www.greek-recipe.com/static/greek-cheese -- http://www.park.org/Netherlands/pavilions/food_and_markets/cheese/introduction.html -- http://www.dairyfarmers.org/engl/recipes/4_1.asp -- http://www.prairieridgecheese.com/wischeesguid.htm EXEC @hr = sp_OADestroy @spider END GO |
© 2000-2024 Chilkat Software, Inc. All Rights Reserved.