Chilkat2-Python
Chilkat2-Python
Google Drive - Build a Local Cache of Metadata
See more Google Drive Examples
This example demonstrates how to download the metadata for all files in a Google Drive account to create a local filesystem cache with the information. The cache can be used to fetch information without having to query Google Drive.Chilkat Chilkat2-Python Downloads
import sys
import chilkat2
success = False
success = True
# It requires the Chilkat API to have been previously unlocked.
# See Global Unlock Sample for sample code.
# This example uses a previously obtained access token having permission for the
# Google Drive scope.
gAuth = chilkat2.AuthGoogle()
gAuth.AccessToken = "GOOGLE_DRIVE_ACCESS_TOKEN"
rest = chilkat2.Rest()
# Connect using TLS.
bAutoReconnect = True
success = rest.Connect("www.googleapis.com",443,True,bAutoReconnect)
# Provide the authentication credentials (i.e. the access token)
rest.SetAuthGoogle(gAuth)
# -------------------------------------------------------------------
# Initialize our cache object. Indicate the location of the root cache directory, and how many cache levels are to exist.
# For small caches (level 0) all cache files are in the root directory.
# For medium caches (level 1) cache files are located in 256 sub-directories from the root.
# For large caches (level 2) cache files are located in 256x256 sub-directories two levels down from the root.
gdCache = chilkat2.Cache()
gdCache.Level = 0
# Use a root directory that makes sense on your operating system..
gdCache.AddRoot("C:/ckCache/googleDrive")
# If we are re-building the cache, we can first delete the entire contents of the cache.
numCacheFilesDeleted = gdCache.DeleteAll()
# Create a date/time object with an time 7 days from the current date/time.
dtExpire = chilkat2.CkDateTime()
dtExpire.SetFromCurrentSystemTime()
dtExpire.AddDays(7)
# Indicate that we want ALL possible fields.
# If no fields are indicated, then only the basic fields are returned.
allFields = "appProperties,capabilities,contentHints,createdTime,description,explicitlyTrashed,fileExtension,folderColorRgb,fullFileExtension,headRevisionId,iconLink,id,imageMediaMetadata,isAppAuthorized,kind,lastModifyingUser,md5Checksum,mimeType,modifiedByMeTime,modifiedTime,name,originalFilename,ownedByMe,owners,parents,permissions,properties,quotaBytesUsed,shared,sharedWithMeTime,sharingUser,size,spaces,starred,thumbnailLink,trashed,version,videoMediaMetadata,viewedByMe,viewedByMeTime,viewersCanCopyContent,webContentLink,webViewLink,writersCanShare"
# We're going to keep a master list of fileId's as we iterate over all the files in this Google Drive account.
# This master list will also be saved to the cache under the key "AllGoogleDriveFileIds".
jsonMaster = chilkat2.JsonObject()
jsonMasterArr = chilkat2.JsonArray()
jsonMaster.AppendArray2("fileIds",jsonMasterArr)
# Also keep a list of file paths.
jsonMasterPaths = chilkat2.JsonArray()
jsonMaster.AppendArray2("filePaths",jsonMasterPaths)
# The default page size is 100, with a max of 1000.
rest.AddQueryParam("pageSize","200")
json = chilkat2.JsonObject()
# Send the request for the 1st page.
jsonResponse = rest.FullRequestNoBody("GET","/drive/v3/files")
pageNumber = 1
bContinueLoop = rest.LastMethodSuccess and (rest.ResponseStatusCode == 200)
while bContinueLoop == True :
print("---- Page " + str(pageNumber) + " ----")
json.Load(jsonResponse)
numFiles = json.SizeOfArray("files")
i = 0
while i < numFiles :
# Add this file ID to the master list.
json.I = i
jsonMasterArr.AddStringAt(-1,json.StringOf("files[i].id"))
i = i + 1
# Get the next page of files.
# If the "nextPageToken" is present in the JSON response, then use it in the "pageToken" parameter
# for the next request. If no "nextPageToken" was present, then this was the last page of files.
pageToken = json.StringOf("nextPageToken")
bContinueLoop = False
bHasMorePages = json.LastMethodSuccess
if (bHasMorePages == True):
rest.ClearAllQueryParams()
rest.AddQueryParam("pageSize","200")
rest.AddQueryParam("pageToken",pageToken)
jsonResponse = rest.FullRequestNoBody("GET","/drive/v3/files")
bContinueLoop = rest.LastMethodSuccess and (rest.ResponseStatusCode == 200)
pageNumber = pageNumber + 1
# Check to see if the above loop exited with errors...
if (rest.LastMethodSuccess == False):
print(rest.LastErrorText)
sys.exit()
# Check to see if the above loop exited with errors...
# A successful response will have a status code equal to 200.
if (rest.ResponseStatusCode != 200):
print("response status code = " + str(rest.ResponseStatusCode))
print("response status text = " + rest.ResponseStatusText)
print("response header: " + rest.ResponseHeader)
print("response JSON: " + jsonResponse)
sys.exit()
# Iterate over the file IDs and download the metadata for each, saving each to the cache...
# Also, keep in-memory hash entries of the name and parent[0] so we can quickly
# build the path-->fileId cache entries. (Given that the Google Drive REST API uses
# fileIds, this gives us an easy way to lookup a fileId based on a filePath.)
hashTable = chilkat2.Hashtable()
# Set the capacity of the hash table to something reasonable for the number of files
# to be hashed.
hashTable.ClearWithNewCapacity(521)
sbPathForFileId = chilkat2.StringBuilder()
# Used for storing the file name and parents[0] in the hashTable.
saFileInfo = chilkat2.StringArray()
saFileInfo.Unique = False
numFiles = jsonMaster.SizeOfArray("fileIds")
i = 0
while i < numFiles :
jsonMaster.I = i
fileId = jsonMaster.StringOf("fileIds[i]")
sbPathForFileId.SetString("/drive/v3/files/")
sbPathForFileId.Append(fileId)
rest.ClearAllQueryParams()
rest.AddQueryParam("fields",allFields)
jsonResponse = rest.FullRequestNoBody("GET",sbPathForFileId.GetAsString())
if ((rest.LastMethodSuccess != True) or (rest.ResponseStatusCode != 200)):
# Force an exit of this loop..
numFiles = 0
# Save this file's metadata to the local cache.
# The lookup key is the fileId.
gdCache.SaveTextDt(fileId,dtExpire,"",jsonResponse)
# Get this file's name and parent[0], and put this information
# in our in-memory hashtable to be used below..
json.Load(jsonResponse)
saFileInfo.Clear()
saFileInfo.Append(json.StringOf("name"))
saFileInfo.Append(json.StringOf("parents[0]"))
hashTable.AddStr(fileId,saFileInfo.Serialize())
print(json.StringOf("name") + ", " + json.StringOf("parents[0]"))
i = i + 1
# Check to see if the above loop exited with errors...
if (rest.LastMethodSuccess == False):
print(rest.LastErrorText)
sys.exit()
# Check to see if the above loop exited with errors...
# A successful response will have a status code equal to 200.
if (rest.ResponseStatusCode != 200):
print("response status code = " + str(rest.ResponseStatusCode))
print("response status text = " + rest.ResponseStatusText)
print("response header: " + rest.ResponseHeader)
print("response JSON: " + jsonResponse)
sys.exit()
# Now that all the fileId's are in the cache, let's build the directory path
# for each fileID.
# (Technically, a fileId can have multiple parents, which means it can be in multiple directories
# at once. This is only going to build directory paths following the 0'th parent ID in the parents list.)
# The directory path for files in "My Drive" will be just the filename.
# For files in sub-directories, the path will be relative, such as "subdir1/subdir2/something.pdf"
#
print("---- building paths ----")
sbPath = chilkat2.StringBuilder()
numFiles = jsonMaster.SizeOfArray("fileIds")
i = 0
while i < numFiles :
jsonMaster.I = i
sbPath.Clear()
fileId = jsonMaster.StringOf("fileIds[i]")
bFinished = False
while (bFinished == False) :
saFileInfo.Clear()
saFileInfo.AppendSerialized(hashTable.LookupStr(fileId))
# Append this file or directory name.
sbPath.Prepend(saFileInfo.GetString(0))
# Get the parent fileId
fileId = saFileInfo.GetString(1)
# If this fileId is not in the hashtable, then it's the fileId for "My Drive", and we are finished.
if (hashTable.Contains(fileId) == False):
bFinished = True
else:
sbPath.Prepend("/")
print(str(i) + ": " + sbPath.GetAsString())
# Store the filePath --> fileId mapping in our local cache.
fileId = jsonMaster.StringOf("fileIds[i]")
gdCache.SaveTextDt(sbPath.GetAsString(),dtExpire,"",fileId)
jsonMasterPaths.AddStringAt(-1,sbPath.GetAsString())
i = i + 1
# Save the master list of file IDs and file paths to the local cache.
jsonMaster.EmitCompact = False
strJsonMaster = jsonMaster.Emit()
gdCache.SaveTextNoExpire("AllGoogleDriveFileIds","",strJsonMaster)
print("JSON Master Record:")
print(strJsonMaster)
# The JSON Master Cache Record looks something like this:
# An application can load the JSON master record and iterate over all the files
# in Google Drive by file ID, or by path.
# {
# "fileIds": [
# "0B53Q6OSTWYolQlExSlBQT1phZXM",
# "0B53Q6OSTWYolVHRPVkxtYWFtZkk",
# "0B53Q6OSTWYolRGZEV3ZGUTZfNFk",
# "0B53Q6OSTWYolS2FXSjliMXQxSU0",
# "0B53Q6OSTWYolZUhxckMzb0dRMzg",
# "0B53Q6OSTWYolbUF6WS1Gei1oalk",
# "0B53Q6OSTWYola296ODZUSm5GYU0",
# "0B53Q6OSTWYolbTE3c3J5RHBUcHM",
# "0B53Q6OSTWYolTmhybWJSUGd5Q2c",
# "0B53Q6OSTWYolY2tPU1BnYW02T2c",
# "0B53Q6OSTWYolTTBBR2NvUE81Zzg",
# ],
# "filePaths": [
# "testFolder/abc/123/pigs.json",
# "testFolder/starfish20.jpg",
# "testFolder/penguins2.jpg",
# "testFolder/starfish.jpg",
# "testFolder/abc/123/starfish.jpg",
# "testFolder/abc/123/penguins.jpg",
# "testFolder/abc/123",
# "testFolder/abc",
# "testFolder/testHello.txt",
# "testFolder",
# "helloWorld.txt",
# ]
# }
print("Entire cache rebuilt...")