Chilkat Examples

ChilkatHOME.NET Core C#Android™AutoItCC#C++Chilkat2-PythonCkPythonClassic ASPDataFlexDelphi ActiveXDelphi DLLGoJavaLianjaMono C#Node.jsObjective-CPHP ActiveXPHP ExtensionPerlPowerBuilderPowerShellPureBasicRubySQL ServerSwift 2Swift 3,4,5...TclUnicode CUnicode C++VB.NETVBScriptVisual Basic 6.0Visual FoxProXojo Plugin

SQL Server Examples

Web API Categories

ASN.1
AWS KMS
AWS Misc
Amazon EC2
Amazon Glacier
Amazon S3
Amazon S3 (new)
Amazon SES
Amazon SNS
Amazon SQS
Azure Cloud Storage
Azure Key Vault
Azure Service Bus
Azure Table Service
Base64
Bounced Email
Box
CAdES
CSR
CSV
Certificates
Cloud Signature CSC
Code Signing
Compression
DKIM / DomainKey
DNS
DSA
Diffie-Hellman
Digital Signatures
Dropbox
Dynamics CRM
EBICS
ECC
Ed25519
Email Object
Encryption
FTP
FileAccess
Firebase
GMail REST API
GMail SMTP/IMAP/POP
Geolocation
Google APIs
Google Calendar
Google Cloud SQL
Google Cloud Storage
Google Drive
Google Photos
Google Sheets
Google Tasks
Gzip
HTML-to-XML/Text
HTTP

HTTP Misc
IMAP
JSON
JSON Web Encryption (JWE)
JSON Web Signatures (JWS)
JSON Web Token (JWT)
Java KeyStore (JKS)
MHT / HTML Email
MIME
MS Storage Providers
Microsoft Graph
Misc
NTLM
OAuth1
OAuth2
OIDC
Office365
OneDrive
OpenSSL
Outlook
Outlook Calendar
Outlook Contact
PDF Signatures
PEM
PFX/P12
PKCS11
POP3
PRNG
REST
REST Misc
RSA
SCP
SCard
SFTP
SMTP
SSH
SSH Key
SSH Tunnel
ScMinidriver
SharePoint
SharePoint Online
Signing in the Cloud
Socket/SSL/TLS
Spider
Stream
Tar Archive
ULID/UUID
Upload
WebSocket
XAdES
XML
XML Digital Signatures
XMP
Zip
curl
uncategorized

 

 

 

(SQL Server) A Simple Web Crawler

This demonstrates a very simple web crawler using the Chilkat Spider component.

Chilkat ActiveX Downloads

ActiveX for 32-bit and 64-bit Windows

-- Important: See this note about string length limitations for strings returned by sp_OAMethod calls.
--
CREATE PROCEDURE ChilkatSample
AS
BEGIN
    DECLARE @hr int
    DECLARE @iTmp0 int
    -- Important: Do not use nvarchar(max).  See the warning about using nvarchar(max).
    DECLARE @sTmp0 nvarchar(4000)
    DECLARE @spider int
    -- Use "Chilkat_9_5_0.Spider" for versions of Chilkat < 10.0.0
    EXEC @hr = sp_OACreate 'Chilkat.Spider', @spider OUT
    IF @hr <> 0
    BEGIN
        PRINT 'Failed to create ActiveX component'
        RETURN
    END

    DECLARE @seenDomains int
    -- Use "Chilkat_9_5_0.StringArray" for versions of Chilkat < 10.0.0
    EXEC @hr = sp_OACreate 'Chilkat.StringArray', @seenDomains OUT

    DECLARE @seedUrls int
    -- Use "Chilkat_9_5_0.StringArray" for versions of Chilkat < 10.0.0
    EXEC @hr = sp_OACreate 'Chilkat.StringArray', @seedUrls OUT

    EXEC sp_OASetProperty @seenDomains, 'Unique', 1
    EXEC sp_OASetProperty @seedUrls, 'Unique', 1

    -- You will need to change the start URL to something else...
    DECLARE @success int
    EXEC sp_OAMethod @seedUrls, 'Append', @success OUT, 'http://something.whateverYouWant.com/'

    -- Set outbound URL exclude patterns
    -- URLs matching any of these patterns will not be added to the 
    -- collection of outbound links.
    EXEC sp_OAMethod @spider, 'AddAvoidOutboundLinkPattern', NULL, '*?id=*'
    EXEC sp_OAMethod @spider, 'AddAvoidOutboundLinkPattern', NULL, '*.mypages.*'
    EXEC sp_OAMethod @spider, 'AddAvoidOutboundLinkPattern', NULL, '*.personal.*'
    EXEC sp_OAMethod @spider, 'AddAvoidOutboundLinkPattern', NULL, '*.comcast.*'
    EXEC sp_OAMethod @spider, 'AddAvoidOutboundLinkPattern', NULL, '*.aol.*'
    EXEC sp_OAMethod @spider, 'AddAvoidOutboundLinkPattern', NULL, '*~*'

    -- Use a cache so we don't have to re-fetch URLs previously fetched.
    EXEC sp_OASetProperty @spider, 'CacheDir', 'c:/spiderCache/'
    EXEC sp_OASetProperty @spider, 'FetchFromCache', 1
    EXEC sp_OASetProperty @spider, 'UpdateCache', 1

    EXEC sp_OAGetProperty @seedUrls, 'Count', @iTmp0 OUT
    WHILE @iTmp0 > 0
      BEGIN

        DECLARE @url nvarchar(4000)
        EXEC sp_OAMethod @seedUrls, 'Pop', @url OUT
        EXEC sp_OAMethod @spider, 'Initialize', NULL, @url

        -- Spider 5 URLs of this domain.
        -- but first, save the base domain in seenDomains
        DECLARE @domain nvarchar(4000)
        EXEC sp_OAMethod @spider, 'GetUrlDomain', @domain OUT, @url
        EXEC sp_OAMethod @spider, 'GetBaseDomain', @sTmp0 OUT, @domain
        EXEC sp_OAMethod @seenDomains, 'Append', @success OUT, @sTmp0

        DECLARE @i int

        DECLARE @success int

        SELECT @i = 0
        WHILE @i <= 4
          BEGIN
            EXEC sp_OAMethod @spider, 'CrawlNext', @success OUT
            IF @success = 1
              BEGIN

                -- Display the URL we just crawled.
                EXEC sp_OAGetProperty @spider, 'LastUrl', @sTmp0 OUT
                PRINT @sTmp0

                -- If the last URL was retrieved from cache,
                -- we won't wait.  Otherwise we'll wait 1 second
                -- before fetching the next URL.
                EXEC sp_OAGetProperty @spider, 'LastFromCache', @iTmp0 OUT
                IF @iTmp0 <> 1
                  BEGIN
                    EXEC sp_OAMethod @spider, 'SleepMs', NULL, 1000
                  END

              END
            ELSE
              BEGIN
                -- cause the loop to exit..
                SELECT @i = 999
              END

            SELECT @i = @i + 1
          END

        -- Add the outbound links to seedUrls, except
        -- for the domains we've already seen.
        EXEC sp_OAGetProperty @spider, 'NumOutboundLinks', @iTmp0 OUT
        SELECT @i = 0
        WHILE @i <= @iTmp0 - 1
          BEGIN

            EXEC sp_OAMethod @spider, 'GetOutboundLink', @url OUT, @i
            DECLARE @domain nvarchar(4000)
            EXEC sp_OAMethod @spider, 'GetUrlDomain', @domain OUT, @url
            DECLARE @baseDomain nvarchar(4000)
            EXEC sp_OAMethod @spider, 'GetBaseDomain', @baseDomain OUT, @domain
            EXEC sp_OAMethod @seenDomains, 'Contains', @iTmp0 OUT, @baseDomain
            IF @iTmp0 = 0
              BEGIN
                -- Don't let our list of seedUrls grow too large.
                EXEC sp_OAGetProperty @seedUrls, 'Count', @iTmp0 OUT
                IF @iTmp0 < 1000
                  BEGIN
                    EXEC sp_OAMethod @seedUrls, 'Append', @success OUT, @url
                  END
              END

            SELECT @i = @i + 1
          END

      END

    EXEC @hr = sp_OADestroy @spider
    EXEC @hr = sp_OADestroy @seenDomains
    EXEC @hr = sp_OADestroy @seedUrls


END
GO

 

© 2000-2024 Chilkat Software, Inc. All Rights Reserved.