Skip to content

Instantly share code, notes, and snippets.

Last active December 20, 2019 22:08
Show Gist options
  • Save bolaurent/802d4e9cd7941e13cb5051bece0682c1 to your computer and use it in GitHub Desktop.
Save bolaurent/802d4e9cd7941e13cb5051bece0682c1 to your computer and use it in GitHub Desktop.
Migrate GoogleDocs to Salesforce Files Connect ContentVersion records
# Enable 'Create Audit Fields' []
# Create a custom field Original_Record_Id__c on ContentVersion (you must be in Lightning to do this)
# Find the ID of your ExternalDataSource for Google Drive and update EXTERNAL_SOURCE_ID
# Execute this query and save result to GoogleDocs.csv.
# Id,Name,Url,OwnerId,ParentId,CreatedById,CreatedDate,LastModifiedById,LastModifiedDate
# FROM GoogleDoc
# Then run this script; it will consume GoogleDocs.csv and write ContentVersion.csv.
# Check ContentVersion.csv; are there any rows with no ExternalDocumentInfo2?
# That is the Id in Google docs; it means that you probably need to add some
# regex patterns.
# Check the length of ExternalDocumentInfo2 (in the column named Length).
# If there are some rows that are extra long or short, check whether the
# GoogleDoc actually works. Salesforce did not impose any validation rules
# or sanity checks on the urls that users could save there.
# Some of the old format urls fail when migrated to ContentVersion;
# We put the old GoogleDoc record Id and the url into the Description,
# which might help people find their way.
import csv
import re
patterns = [
cvsFieldNames = [
def main():
patternsByName = {item:re.compile(item) for item in patterns}
with open('ContentVersion.csv', 'w', newline='') as csvOutfile:
writer = csv.DictWriter(csvOutfile, fieldnames=cvsFieldNames)
with open('GoogleDocs.csv', newline='') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
url = row['Url']
gdocid = ''
matchedpattern = ''
for patternname, pattern in patternsByName.items():
m = pattern.match(url)
if m:
gdocid =
matchedpattern = patternname
'ContentLocation' : 'E',
'Origin' : 'H',
'OwnerId' : row['OwnerId'],
'Title' : row['Name'],
'PathOnClient' : row['Name'],
'ExternalDataSourceId' : EXTERNAL_SOURCE_ID,
'FirstPublishLocationId' : row['ParentId'],
'Original_Record_Id__c' : row['Id'],
'CreatedById' : row['CreatedById'],
'CreatedDate' : row['CreatedDate'],
'LastModifiedById' : row['LastModifiedById'],
'LastModifiedDate' : row['LastModifiedDate'],
'ExternalDocumentInfo1' : row['Url'],
'ExternalDocumentInfo2' : gdocid,
'Pattern' : matchedpattern,
'Length' : len(gdocid) if gdocid is not None else 0,
'Description' : 'Migrated from GoogleDoc Id {}, url {}'.format(row['Id'], row['Url']) +
"\n If the 'Open' link does not work, try pasting this link into your browser."
if __name__ == "__main__":
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment