Skip to content

Instantly share code, notes, and snippets.

@peterlyeung
Created April 1, 2015 16:46
Show Gist options
  • Save peterlyeung/03ae4c853d48ac64a3b4 to your computer and use it in GitHub Desktop.
Save peterlyeung/03ae4c853d48ac64a3b4 to your computer and use it in GitHub Desktop.
Python script to parse SFDC metadata on reports to determine which fields are being used.
#!/usr/bin/python
#---------------------------------------------------------------------------
# This python script traverses the reports directory of the SFDC metadata.
# It goes through each directory and reports, and pulls out fields
# used in the report. A file called reporting_fields_used.csv is generated.
#---------------------------------------------------------------------------
import os
import sys
import xml.etree.ElementTree as ET
from StringIO import StringIO
# start traversing in the directory the script is in
walk_dir = "."
# generate a file with this name
list_file_path = ('reporting_fields_used.csv')
print('Generating file: ' + list_file_path)
# open file as output stream
with open(list_file_path, 'wb') as list_file:
#for subdir in subdirs:
#print('\t- subdirectory ' + subdir)
# print the header of output file once
list_file.write("file,report,type,field\n")
# walk through directories using os.walk
for root, subdirs, files in os.walk(walk_dir):
for filename in files:
file_path = os.path.join(root, filename)
# only open up .report files
if ".report" in file_path:
with open(file_path, 'rb') as f:
f_content = f.read()
# read the XML from the file handle and then create the root from string
# be careful here. Since we are traversing directories, we have another variable called
# root for the root directory. So we call the XML root "xmlroot".
xmlroot = ET.fromstring(f_content)
# SFDC report metadata XML looks something like this
#<Report xmlns="http://soap.sforce.com/2006/04/metadata">
# <columns>
# <field>USERS.NAME</field>
# </columns>
# <filter>
# <booleanFilter>1 AND (2 OR 3)</booleanFilter>
# <criteriaItems>
# <column>Address_vod__c.Primary_vod__c</column>
# <operator>equals</operator>
# <value>1</value>
# </criteriaItems>
# <groupingsDown>
# <dateGranularity>Day</dateGranularity>
# <field>ACCOUNT.NAME</field>
# <sortOrder>Asc</sortOrder>
# </groupingsDown
# parse out reporting columns from columns/field
for columns in xmlroot.findall('./{http://soap.sforce.com/2006/04/metadata}columns'):
#print ('columns: %s' % columns)
for field in columns:
s = '%s,%s,%s,%s\n' % (file_path,filename, "columns|field", field.text)
list_file.write(s)
#print ('%s' % s)
# parse out reporting columns from GroupingsDown/field
for columns in xmlroot.findall('./{http://soap.sforce.com/2006/04/metadata}groupingsDown/.'):
#print ('columns: %s' % columns)
for child in columns:
if "field" in child.tag:
#print ('child %s' % child.text )
s = '%s,%s,%s,%s\n' % (file_path,filename, "groupingsDown|field", child.text)
list_file.write(s)
#print ('%s' % s)
# parse out reporting columns from GroupingsAcross/field
for columns in xmlroot.findall('./{http://soap.sforce.com/2006/04/metadata}groupingsAcross/.'):
#print ('columns: %s' % columns)
for child in columns:
if "field" in child.tag:
#print ('child %s' % child.text )
s = '%s,%s,%s,%s\n' % (file_path,filename, "groupingsAcross|field", child.text)
list_file.write(s)
#print ('%s' % s)
# parse out reporting columns from filter/criteriaItems/column
for filters in xmlroot.findall('./{http://soap.sforce.com/2006/04/metadata}filter/.'):
# print ('columns: %s' % filters)
for criteriaItems in filters:
if "criteriaItems" in criteriaItems.tag:
# print ('criteria item: %s , %s' % (criteriaItems.tag , criteriaItems.text))
for child in criteriaItems:
# if column
if "column" in child.tag:
# print ('child %s' % child.text)
s = '%s,%s,%s,%s\n' % (file_path,filename, "filter|criteriaItem|column", child.text)
list_file.write(s)
# print ('%s' % s)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment