Migrate Salesforce.com GoogleDocs to Salesforce Files Connect ContentVersion records
#!/usr/local/bin/python3 | |
# Enable 'Create Audit Fields' [https://help.salesforce.com/articleView?id=000334139&type=1&mode=1] | |
# Create a custom field Original_Record_Id__c on ContentVersion (you must be in Lightning to do this) | |
# Find the ID of your ExternalDataSource for Google Drive and update EXTERNAL_SOURCE_ID | |
# Execute this query and save result to GoogleDocs.csv. | |
# SELECT | |
# Id,Name,Url,OwnerId,ParentId,CreatedById,CreatedDate,LastModifiedById,LastModifiedDate | |
# FROM GoogleDoc | |
# ORDER BY Id | |
# Then run this script; it will consume GoogleDocs.csv and write ContentVersion.csv. | |
# Check ContentVersion.csv; are there any rows with no ExternalDocumentInfo2? | |
# That is the Id in Google docs; it means that you probably need to add some | |
# regex patterns. | |
# | |
# Check the length of ExternalDocumentInfo2 (in the column named Length). | |
# If there are some rows that are extra long or short, check whether the | |
# GoogleDoc actually works. Salesforce did not impose any validation rules | |
# or sanity checks on the urls that users could save there. | |
# Some of the old format urls fail when migrated to ContentVersion; | |
# We put the old GoogleDoc record Id and the url into the Description, | |
# which might help people find their way. | |
import csv | |
import re | |
EXTERNAL_SOURCE_ID = '0XC3z000000GmbvGAC' | |
patterns = [ | |
'.*/d/(.+)/.*', | |
'.+/d/(.+)', | |
'.*key=(.+)#.*', | |
'.+folders/(.+)', | |
'.+\?docid=(.+)', | |
'.+/document/d/(.+)', | |
'.+&key=(.+)', | |
'.+\?key=(.+)', | |
'.+/folderview\?id=(.+)&.+', | |
'.+/open\?id=(.+)', | |
'.+&id=(.+)' | |
] | |
cvsFieldNames = [ | |
'ContentLocation', | |
'Origin', | |
'OwnerId', | |
'Title', | |
'PathOnClient', | |
'ExternalDataSourceId', | |
'FirstPublishLocationId', | |
'Original_Record_Id__c', | |
'CreatedById', | |
'CreatedDate', | |
'LastModifiedById', | |
'LastModifiedDate', | |
'Description', | |
'ExternalDocumentInfo1', | |
'ExternalDocumentInfo2', | |
'Pattern', | |
'Length' | |
] | |
def main(): | |
patternsByName = {item:re.compile(item) for item in patterns} | |
with open('ContentVersion.csv', 'w', newline='') as csvOutfile: | |
writer = csv.DictWriter(csvOutfile, fieldnames=cvsFieldNames) | |
writer.writeheader() | |
with open('GoogleDocs.csv', newline='') as csvfile: | |
reader = csv.DictReader(csvfile) | |
for row in reader: | |
url = row['Url'] | |
gdocid = '' | |
matchedpattern = '' | |
for patternname, pattern in patternsByName.items(): | |
m = pattern.match(url) | |
if m: | |
gdocid = m.group(1) | |
matchedpattern = patternname | |
break | |
writer.writerow({ | |
'ContentLocation' : 'E', | |
'Origin' : 'H', | |
'OwnerId' : row['OwnerId'], | |
'Title' : row['Name'], | |
'PathOnClient' : row['Name'], | |
'ExternalDataSourceId' : EXTERNAL_SOURCE_ID, | |
'FirstPublishLocationId' : row['ParentId'], | |
'Original_Record_Id__c' : row['Id'], | |
'CreatedById' : row['CreatedById'], | |
'CreatedDate' : row['CreatedDate'], | |
'LastModifiedById' : row['LastModifiedById'], | |
'LastModifiedDate' : row['LastModifiedDate'], | |
'ExternalDocumentInfo1' : row['Url'], | |
'ExternalDocumentInfo2' : gdocid, | |
'Pattern' : matchedpattern, | |
'Length' : len(gdocid) if gdocid is not None else 0, | |
'Description' : 'Migrated from GoogleDoc Id {}, url {}'.format(row['Id'], row['Url']) + | |
"\n If the 'Open' link does not work, try pasting this link into your browser." | |
}) | |
if __name__ == "__main__": | |
main() | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment