Skip to content

Instantly share code, notes, and snippets.

@bolaurent
Last active December 20, 2019 22:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bolaurent/802d4e9cd7941e13cb5051bece0682c1 to your computer and use it in GitHub Desktop.
Save bolaurent/802d4e9cd7941e13cb5051bece0682c1 to your computer and use it in GitHub Desktop.
Migrate Salesforce.com GoogleDocs to Salesforce Files Connect ContentVersion records
#!/usr/local/bin/python3
# Enable 'Create Audit Fields' [https://help.salesforce.com/articleView?id=000334139&type=1&mode=1]
# Create a custom field Original_Record_Id__c on ContentVersion (you must be in Lightning to do this)
# Find the ID of your ExternalDataSource for Google Drive and update EXTERNAL_SOURCE_ID
# Execute this query and save result to GoogleDocs.csv.
# SELECT
# Id,Name,Url,OwnerId,ParentId,CreatedById,CreatedDate,LastModifiedById,LastModifiedDate
# FROM GoogleDoc
# ORDER BY Id
# Then run this script; it will consume GoogleDocs.csv and write ContentVersion.csv.
# Check ContentVersion.csv; are there any rows with no ExternalDocumentInfo2?
# That is the Id in Google docs; it means that you probably need to add some
# regex patterns.
#
# Check the length of ExternalDocumentInfo2 (in the column named Length).
# If there are some rows that are extra long or short, check whether the
# GoogleDoc actually works. Salesforce did not impose any validation rules
# or sanity checks on the urls that users could save there.
# Some of the old format urls fail when migrated to ContentVersion;
# We put the old GoogleDoc record Id and the url into the Description,
# which might help people find their way.
import csv
import re
EXTERNAL_SOURCE_ID = '0XC3z000000GmbvGAC'
patterns = [
'.*/d/(.+)/.*',
'.+/d/(.+)',
'.*key=(.+)#.*',
'.+folders/(.+)',
'.+\?docid=(.+)',
'.+/document/d/(.+)',
'.+&key=(.+)',
'.+\?key=(.+)',
'.+/folderview\?id=(.+)&.+',
'.+/open\?id=(.+)',
'.+&id=(.+)'
]
cvsFieldNames = [
'ContentLocation',
'Origin',
'OwnerId',
'Title',
'PathOnClient',
'ExternalDataSourceId',
'FirstPublishLocationId',
'Original_Record_Id__c',
'CreatedById',
'CreatedDate',
'LastModifiedById',
'LastModifiedDate',
'Description',
'ExternalDocumentInfo1',
'ExternalDocumentInfo2',
'Pattern',
'Length'
]
def main():
patternsByName = {item:re.compile(item) for item in patterns}
with open('ContentVersion.csv', 'w', newline='') as csvOutfile:
writer = csv.DictWriter(csvOutfile, fieldnames=cvsFieldNames)
writer.writeheader()
with open('GoogleDocs.csv', newline='') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
url = row['Url']
gdocid = ''
matchedpattern = ''
for patternname, pattern in patternsByName.items():
m = pattern.match(url)
if m:
gdocid = m.group(1)
matchedpattern = patternname
break
writer.writerow({
'ContentLocation' : 'E',
'Origin' : 'H',
'OwnerId' : row['OwnerId'],
'Title' : row['Name'],
'PathOnClient' : row['Name'],
'ExternalDataSourceId' : EXTERNAL_SOURCE_ID,
'FirstPublishLocationId' : row['ParentId'],
'Original_Record_Id__c' : row['Id'],
'CreatedById' : row['CreatedById'],
'CreatedDate' : row['CreatedDate'],
'LastModifiedById' : row['LastModifiedById'],
'LastModifiedDate' : row['LastModifiedDate'],
'ExternalDocumentInfo1' : row['Url'],
'ExternalDocumentInfo2' : gdocid,
'Pattern' : matchedpattern,
'Length' : len(gdocid) if gdocid is not None else 0,
'Description' : 'Migrated from GoogleDoc Id {}, url {}'.format(row['Id'], row['Url']) +
"\n If the 'Open' link does not work, try pasting this link into your browser."
})
if __name__ == "__main__":
main()
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment