(Ruby) Google Drive - Build a Local Cache of Metadata

This example demonstrates how to download the metadata for all files in a Google Drive account to create a local filesystem cache with the information. The cache can be used to fetch information without having to query Google Drive.

Note: This example requires Chilkat v11.0.0 or greater.

Chilkat Ruby Downloads

install from rubygems.org
gem install chilkat
or download... Ruby Library for Windows, MacOS, Linux, Alpine Linux

require 'chilkat'

success = false

success = true

# It requires the Chilkat API to have been previously unlocked.
# See Global Unlock Sample for sample code.

# This example uses a previously obtained access token having permission for the 
# Google Drive scope.

gAuth = Chilkat::CkAuthGoogle.new()
gAuth.put_AccessToken("GOOGLE_DRIVE_ACCESS_TOKEN")

rest = Chilkat::CkRest.new()

# Connect using TLS.
bAutoReconnect = true
success = rest.Connect("www.googleapis.com",443,true,bAutoReconnect)

# Provide the authentication credentials (i.e. the access token)
rest.SetAuthGoogle(gAuth)

# -------------------------------------------------------------------
# Initialize our cache object.  Indicate the location of the root cache directory, and how many cache levels are to exist.
# For small caches (level 0) all cache files are in the root directory.
# For medium caches (level 1) cache files are located in 256 sub-directories from the root.
# For large caches (level 2) cache files are located in 256x256 sub-directories two levels down from the root.
gdCache = Chilkat::CkCache.new()
gdCache.put_Level(0)
# Use a root directory that makes sense on your operating system..
gdCache.AddRoot("C:/ckCache/googleDrive")

# If we are re-building the cache, we can first delete the entire contents of the cache.
numCacheFilesDeleted = gdCache.DeleteAll()

# Create a date/time object with an time 7 days from the current date/time.
dtExpire = Chilkat::CkDateTime.new()
dtExpire.SetFromCurrentSystemTime()
dtExpire.AddDays(7)

# Indicate that we want ALL possible fields.
# If no fields are indicated, then only the basic fields are returned.
allFields = "appProperties,capabilities,contentHints,createdTime,description,explicitlyTrashed,fileExtension,folderColorRgb,fullFileExtension,headRevisionId,iconLink,id,imageMediaMetadata,isAppAuthorized,kind,lastModifyingUser,md5Checksum,mimeType,modifiedByMeTime,modifiedTime,name,originalFilename,ownedByMe,owners,parents,permissions,properties,quotaBytesUsed,shared,sharedWithMeTime,sharingUser,size,spaces,starred,thumbnailLink,trashed,version,videoMediaMetadata,viewedByMe,viewedByMeTime,viewersCanCopyContent,webContentLink,webViewLink,writersCanShare"

# We're going to keep a master list of fileId's as we iterate over all the files in this Google Drive account.
# This master list will also be saved to the cache under the key "AllGoogleDriveFileIds".
jsonMaster = Chilkat::CkJsonObject.new()

jsonMasterArr = Chilkat::CkJsonArray.new()
jsonMaster.AppendArray2("fileIds",jsonMasterArr)

# Also keep a list of file paths.
jsonMasterPaths = Chilkat::CkJsonArray.new()
jsonMaster.AppendArray2("filePaths",jsonMasterPaths)

# The default page size is 100, with a max of 1000.
rest.AddQueryParam("pageSize","200")

json = Chilkat::CkJsonObject.new()

# Send the request for the 1st page.
jsonResponse = rest.fullRequestNoBody("GET","/drive/v3/files")

pageNumber = 1

bContinueLoop = rest.get_LastMethodSuccess() and (rest.get_ResponseStatusCode() == 200)

while bContinueLoop == true

    print "---- Page " + pageNumber.to_s() + " ----" + "\n";
    json.Load(jsonResponse)

    numFiles = json.SizeOfArray("files")
    i = 0
    while i < numFiles
        # Add this file ID to the master list.
        json.put_I(i)
        jsonMasterArr.AddStringAt(-1,json.stringOf("files[i].id"))

        i = i + 1
    end

    # Get the next page of files.
    # If the "nextPageToken" is present in the JSON response, then use it in the "pageToken" parameter
    # for the next request.   If no "nextPageToken" was present, then this was the last page of files.
    pageToken = json.stringOf("nextPageToken")
    bContinueLoop = false
    bHasMorePages = json.get_LastMethodSuccess()
    if (bHasMorePages == true)
        rest.ClearAllQueryParams()
        rest.AddQueryParam("pageSize","200")
        rest.AddQueryParam("pageToken",pageToken)
        jsonResponse = rest.fullRequestNoBody("GET","/drive/v3/files")
        bContinueLoop = rest.get_LastMethodSuccess() and (rest.get_ResponseStatusCode() == 200)
        pageNumber = pageNumber + 1
    end

end

# Check to see if the above loop exited with errors...
if (rest.get_LastMethodSuccess() == false)
    print rest.lastErrorText() + "\n";
    exit
end

# Check to see if the above loop exited with errors...
# A successful response will have a status code equal to 200.
if (rest.get_ResponseStatusCode() != 200)
    print "response status code = " + rest.get_ResponseStatusCode().to_s() + "\n";
    print "response status text = " + rest.responseStatusText() + "\n";
    print "response header: " + rest.responseHeader() + "\n";
    print "response JSON: " + jsonResponse + "\n";
    exit
end

# Iterate over the file IDs and download the metadata for each, saving each to the cache...
# Also, keep in-memory hash entries of the name and parent[0] so we can quickly 
# build the path-->fileId cache entries. (Given that the Google Drive REST API uses
# fileIds, this gives us an easy way to lookup a fileId based on a filePath.)
hashTable = Chilkat::CkHashtable.new()
# Set the capacity of the hash table to something reasonable for the number of files
# to be hashed.
hashTable.ClearWithNewCapacity(521)

sbPathForFileId = Chilkat::CkStringBuilder.new()

# Used for storing the file name and parents[0] in the hashTable.
saFileInfo = Chilkat::CkStringArray.new()
saFileInfo.put_Unique(false)

numFiles = jsonMaster.SizeOfArray("fileIds")
i = 0
while i < numFiles
    jsonMaster.put_I(i)
    fileId = jsonMaster.stringOf("fileIds[i]")
    sbPathForFileId.SetString("/drive/v3/files/")
    sbPathForFileId.Append(fileId)

    rest.ClearAllQueryParams()
    rest.AddQueryParam("fields",allFields)
    jsonResponse = rest.fullRequestNoBody("GET",sbPathForFileId.getAsString())
    if ((rest.get_LastMethodSuccess() != true) or (rest.get_ResponseStatusCode() != 200))
        # Force an exit of this loop..
        numFiles = 0
    end

    # Save this file's metadata to the local cache.
    # The lookup key is the fileId.
    gdCache.SaveTextDt(fileId,dtExpire,"",jsonResponse)

    # Get this file's name and parent[0], and put this information
    # in our in-memory hashtable to be used below..
    json.Load(jsonResponse)

    saFileInfo.Clear()
    saFileInfo.Append(json.stringOf("name"))
    saFileInfo.Append(json.stringOf("parents[0]"))
    hashTable.AddStr(fileId,saFileInfo.serialize())

    print json.stringOf("name") + ", " + json.stringOf("parents[0]") + "\n";

    i = i + 1
end

# Check to see if the above loop exited with errors...
if (rest.get_LastMethodSuccess() == false)
    print rest.lastErrorText() + "\n";
    exit
end

# Check to see if the above loop exited with errors...
# A successful response will have a status code equal to 200.
if (rest.get_ResponseStatusCode() != 200)
    print "response status code = " + rest.get_ResponseStatusCode().to_s() + "\n";
    print "response status text = " + rest.responseStatusText() + "\n";
    print "response header: " + rest.responseHeader() + "\n";
    print "response JSON: " + jsonResponse + "\n";
    exit
end

# Now that all the fileId's are in the cache, let's build the directory path
# for each fileID.  

# (Technically, a fileId can have multiple parents, which means it can be in multiple directories
# at once.  This is only going to build directory paths following the 0'th parent ID in the parents list.)

# The directory path for files in "My Drive" will be just the filename.
# For files in sub-directories, the path will be relative, such as "subdir1/subdir2/something.pdf"
# 

print "---- building paths ----" + "\n";

sbPath = Chilkat::CkStringBuilder.new()
numFiles = jsonMaster.SizeOfArray("fileIds")
i = 0
while i < numFiles
    jsonMaster.put_I(i)

    sbPath.Clear()

    fileId = jsonMaster.stringOf("fileIds[i]")
    bFinished = false
    while (bFinished == false)
        saFileInfo.Clear()
        saFileInfo.AppendSerialized(hashTable.lookupStr(fileId))
        # Append this file or directory name.
        sbPath.Prepend(saFileInfo.getString(0))
        # Get the parent fileId
        fileId = saFileInfo.getString(1)
        # If this fileId is not in the hashtable, then it's the fileId for "My Drive", and we are finished.
        if (hashTable.Contains(fileId) == false)
            bFinished = true
        else
            sbPath.Prepend("/")
        end

    end

    print i.to_s() + ": " + sbPath.getAsString() + "\n";

    # Store the filePath --> fileId mapping in our local cache.
    fileId = jsonMaster.stringOf("fileIds[i]")
    gdCache.SaveTextDt(sbPath.getAsString(),dtExpire,"",fileId)

    jsonMasterPaths.AddStringAt(-1,sbPath.getAsString())

    i = i + 1
end

# Save the master list of file IDs and file paths to the local cache.
jsonMaster.put_EmitCompact(false)
strJsonMaster = jsonMaster.emit()
gdCache.SaveTextNoExpire("AllGoogleDriveFileIds","",strJsonMaster)
print "JSON Master Record:" + "\n";
print strJsonMaster + "\n";

# The JSON Master Cache Record looks something like this:
# An application can load the JSON master record and iterate over all the files
# in Google Drive by file ID, or by path.  
# {
#   "fileIds": [
#     "0B53Q6OSTWYolQlExSlBQT1phZXM",
#     "0B53Q6OSTWYolVHRPVkxtYWFtZkk",
#     "0B53Q6OSTWYolRGZEV3ZGUTZfNFk",
#     "0B53Q6OSTWYolS2FXSjliMXQxSU0",
#     "0B53Q6OSTWYolZUhxckMzb0dRMzg",
#     "0B53Q6OSTWYolbUF6WS1Gei1oalk",
#     "0B53Q6OSTWYola296ODZUSm5GYU0",
#     "0B53Q6OSTWYolbTE3c3J5RHBUcHM",
#     "0B53Q6OSTWYolTmhybWJSUGd5Q2c",
#     "0B53Q6OSTWYolY2tPU1BnYW02T2c",
#     "0B53Q6OSTWYolTTBBR2NvUE81Zzg",
#   ],
#   "filePaths": [
#     "testFolder/abc/123/pigs.json",
#     "testFolder/starfish20.jpg",
#     "testFolder/penguins2.jpg",
#     "testFolder/starfish.jpg",
#     "testFolder/abc/123/starfish.jpg",
#     "testFolder/abc/123/penguins.jpg",
#     "testFolder/abc/123",
#     "testFolder/abc",
#     "testFolder/testHello.txt",
#     "testFolder",
#     "helloWorld.txt",
#   ]
# }

print "Entire cache rebuilt..." + "\n";