CkPython
Google Drive - Build a Local Cache of Metadata

This example demonstrates how to download the metadata for all files in a Google Drive account to create a local filesystem cache with the information. The cache can be used to fetch information without having to query Google Drive.
Chilkat CkPython Downloads

Download Chilkat for CkPython
CkPython
import sys
import chilkat

success = False

success = True

# It requires the Chilkat API to have been previously unlocked.
# See Global Unlock Sample for sample code.

# This example uses a previously obtained access token having permission for the 
# Google Drive scope.

gAuth = chilkat.CkAuthGoogle()
gAuth.put_AccessToken("GOOGLE_DRIVE_ACCESS_TOKEN")

rest = chilkat.CkRest()

# Connect using TLS.
bAutoReconnect = True
success = rest.Connect("www.googleapis.com",443,True,bAutoReconnect)

# Provide the authentication credentials (i.e. the access token)
rest.SetAuthGoogle(gAuth)

# -------------------------------------------------------------------
# Initialize our cache object.  Indicate the location of the root cache directory, and how many cache levels are to exist.
# For small caches (level 0) all cache files are in the root directory.
# For medium caches (level 1) cache files are located in 256 sub-directories from the root.
# For large caches (level 2) cache files are located in 256x256 sub-directories two levels down from the root.
gdCache = chilkat.CkCache()
gdCache.put_Level(0)
# Use a root directory that makes sense on your operating system..
gdCache.AddRoot("C:/ckCache/googleDrive")

# If we are re-building the cache, we can first delete the entire contents of the cache.
numCacheFilesDeleted = gdCache.DeleteAll()

# Create a date/time object with an time 7 days from the current date/time.
dtExpire = chilkat.CkDateTime()
dtExpire.SetFromCurrentSystemTime()
dtExpire.AddDays(7)

# Indicate that we want ALL possible fields.
# If no fields are indicated, then only the basic fields are returned.
allFields = "appProperties,capabilities,contentHints,createdTime,description,explicitlyTrashed,fileExtension,folderColorRgb,fullFileExtension,headRevisionId,iconLink,id,imageMediaMetadata,isAppAuthorized,kind,lastModifyingUser,md5Checksum,mimeType,modifiedByMeTime,modifiedTime,name,originalFilename,ownedByMe,owners,parents,permissions,properties,quotaBytesUsed,shared,sharedWithMeTime,sharingUser,size,spaces,starred,thumbnailLink,trashed,version,videoMediaMetadata,viewedByMe,viewedByMeTime,viewersCanCopyContent,webContentLink,webViewLink,writersCanShare"

# We're going to keep a master list of fileId's as we iterate over all the files in this Google Drive account.
# This master list will also be saved to the cache under the key "AllGoogleDriveFileIds".
jsonMaster = chilkat.CkJsonObject()

jsonMasterArr = chilkat.CkJsonArray()
jsonMaster.AppendArray2("fileIds",jsonMasterArr)

# Also keep a list of file paths.
jsonMasterPaths = chilkat.CkJsonArray()
jsonMaster.AppendArray2("filePaths",jsonMasterPaths)

# The default page size is 100, with a max of 1000.
rest.AddQueryParam("pageSize","200")

json = chilkat.CkJsonObject()

# Send the request for the 1st page.
jsonResponse = rest.fullRequestNoBody("GET","/drive/v3/files")

pageNumber = 1

bContinueLoop = rest.get_LastMethodSuccess() and (rest.get_ResponseStatusCode() == 200)

while bContinueLoop == True :

    print("---- Page " + str(pageNumber) + " ----")
    json.Load(jsonResponse)

    numFiles = json.SizeOfArray("files")
    i = 0
    while i < numFiles :
        # Add this file ID to the master list.
        json.put_I(i)
        jsonMasterArr.AddStringAt(-1,json.stringOf("files[i].id"))

        i = i + 1

    # Get the next page of files.
    # If the "nextPageToken" is present in the JSON response, then use it in the "pageToken" parameter
    # for the next request.   If no "nextPageToken" was present, then this was the last page of files.
    pageToken = json.stringOf("nextPageToken")
    bContinueLoop = False
    bHasMorePages = json.get_LastMethodSuccess()
    if (bHasMorePages == True):
        rest.ClearAllQueryParams()
        rest.AddQueryParam("pageSize","200")
        rest.AddQueryParam("pageToken",pageToken)
        jsonResponse = rest.fullRequestNoBody("GET","/drive/v3/files")
        bContinueLoop = rest.get_LastMethodSuccess() and (rest.get_ResponseStatusCode() == 200)
        pageNumber = pageNumber + 1

# Check to see if the above loop exited with errors...
if (rest.get_LastMethodSuccess() == False):
    print(rest.lastErrorText())
    sys.exit()

# Check to see if the above loop exited with errors...
# A successful response will have a status code equal to 200.
if (rest.get_ResponseStatusCode() != 200):
    print("response status code = " + str(rest.get_ResponseStatusCode()))
    print("response status text = " + rest.responseStatusText())
    print("response header: " + rest.responseHeader())
    print("response JSON: " + jsonResponse)
    sys.exit()

# Iterate over the file IDs and download the metadata for each, saving each to the cache...
# Also, keep in-memory hash entries of the name and parent[0] so we can quickly 
# build the path-->fileId cache entries. (Given that the Google Drive REST API uses
# fileIds, this gives us an easy way to lookup a fileId based on a filePath.)
hashTable = chilkat.CkHashtable()
# Set the capacity of the hash table to something reasonable for the number of files
# to be hashed.
hashTable.ClearWithNewCapacity(521)

sbPathForFileId = chilkat.CkStringBuilder()

# Used for storing the file name and parents[0] in the hashTable.
saFileInfo = chilkat.CkStringArray()
saFileInfo.put_Unique(False)

numFiles = jsonMaster.SizeOfArray("fileIds")
i = 0
while i < numFiles :
    jsonMaster.put_I(i)
    fileId = jsonMaster.stringOf("fileIds[i]")
    sbPathForFileId.SetString("/drive/v3/files/")
    sbPathForFileId.Append(fileId)

    rest.ClearAllQueryParams()
    rest.AddQueryParam("fields",allFields)
    jsonResponse = rest.fullRequestNoBody("GET",sbPathForFileId.getAsString())
    if ((rest.get_LastMethodSuccess() != True) or (rest.get_ResponseStatusCode() != 200)):
        # Force an exit of this loop..
        numFiles = 0

    # Save this file's metadata to the local cache.
    # The lookup key is the fileId.
    gdCache.SaveTextDt(fileId,dtExpire,"",jsonResponse)

    # Get this file's name and parent[0], and put this information
    # in our in-memory hashtable to be used below..
    json.Load(jsonResponse)

    saFileInfo.Clear()
    saFileInfo.Append(json.stringOf("name"))
    saFileInfo.Append(json.stringOf("parents[0]"))
    hashTable.AddStr(fileId,saFileInfo.serialize())

    print(json.stringOf("name") + ", " + json.stringOf("parents[0]"))

    i = i + 1

# Check to see if the above loop exited with errors...
if (rest.get_LastMethodSuccess() == False):
    print(rest.lastErrorText())
    sys.exit()

# Check to see if the above loop exited with errors...
# A successful response will have a status code equal to 200.
if (rest.get_ResponseStatusCode() != 200):
    print("response status code = " + str(rest.get_ResponseStatusCode()))
    print("response status text = " + rest.responseStatusText())
    print("response header: " + rest.responseHeader())
    print("response JSON: " + jsonResponse)
    sys.exit()

# Now that all the fileId's are in the cache, let's build the directory path
# for each fileID.  

# (Technically, a fileId can have multiple parents, which means it can be in multiple directories
# at once.  This is only going to build directory paths following the 0'th parent ID in the parents list.)

# The directory path for files in "My Drive" will be just the filename.
# For files in sub-directories, the path will be relative, such as "subdir1/subdir2/something.pdf"
# 

print("---- building paths ----")

sbPath = chilkat.CkStringBuilder()
numFiles = jsonMaster.SizeOfArray("fileIds")
i = 0
while i < numFiles :
    jsonMaster.put_I(i)

    sbPath.Clear()

    fileId = jsonMaster.stringOf("fileIds[i]")
    bFinished = False
    while (bFinished == False) :
        saFileInfo.Clear()
        saFileInfo.AppendSerialized(hashTable.lookupStr(fileId))
        # Append this file or directory name.
        sbPath.Prepend(saFileInfo.getString(0))
        # Get the parent fileId
        fileId = saFileInfo.getString(1)
        # If this fileId is not in the hashtable, then it's the fileId for "My Drive", and we are finished.
        if (hashTable.Contains(fileId) == False):
            bFinished = True
        else:
            sbPath.Prepend("/")

    print(str(i) + ": " + sbPath.getAsString())

    # Store the filePath --> fileId mapping in our local cache.
    fileId = jsonMaster.stringOf("fileIds[i]")
    gdCache.SaveTextDt(sbPath.getAsString(),dtExpire,"",fileId)

    jsonMasterPaths.AddStringAt(-1,sbPath.getAsString())

    i = i + 1

# Save the master list of file IDs and file paths to the local cache.
jsonMaster.put_EmitCompact(False)
strJsonMaster = jsonMaster.emit()
gdCache.SaveTextNoExpire("AllGoogleDriveFileIds","",strJsonMaster)
print("JSON Master Record:")
print(strJsonMaster)

# The JSON Master Cache Record looks something like this:
# An application can load the JSON master record and iterate over all the files
# in Google Drive by file ID, or by path.  
# {
#   "fileIds": [
#     "0B53Q6OSTWYolQlExSlBQT1phZXM",
#     "0B53Q6OSTWYolVHRPVkxtYWFtZkk",
#     "0B53Q6OSTWYolRGZEV3ZGUTZfNFk",
#     "0B53Q6OSTWYolS2FXSjliMXQxSU0",
#     "0B53Q6OSTWYolZUhxckMzb0dRMzg",
#     "0B53Q6OSTWYolbUF6WS1Gei1oalk",
#     "0B53Q6OSTWYola296ODZUSm5GYU0",
#     "0B53Q6OSTWYolbTE3c3J5RHBUcHM",
#     "0B53Q6OSTWYolTmhybWJSUGd5Q2c",
#     "0B53Q6OSTWYolY2tPU1BnYW02T2c",
#     "0B53Q6OSTWYolTTBBR2NvUE81Zzg",
#   ],
#   "filePaths": [
#     "testFolder/abc/123/pigs.json",
#     "testFolder/starfish20.jpg",
#     "testFolder/penguins2.jpg",
#     "testFolder/starfish.jpg",
#     "testFolder/abc/123/starfish.jpg",
#     "testFolder/abc/123/penguins.jpg",
#     "testFolder/abc/123",
#     "testFolder/abc",
#     "testFolder/testHello.txt",
#     "testFolder",
#     "helloWorld.txt",
#   ]
# }

print("Entire cache rebuilt...")