Skip to content

Instantly share code, notes, and snippets.

@nikmolnar
Last active October 28, 2015 20:28
Show Gist options
  • Save nikmolnar/6f7e61cf25fd3227aeb8 to your computer and use it in GitHub Desktop.
Save nikmolnar/6f7e61cf25fd3227aeb8 to your computer and use it in GitHub Desktop.
from openpyxl import Workbook
import os
COLUMNS = [
'Name', 'Data Lead', 'Description', 'Date Obtained', 'Time Period of Content Date', 'Publication Date',
'URL', 'Contact', 'Processing Notes', 'Keywords', 'Relative Path', 'Absolute Path'
]
def parse_readme(f):
values = {}
key = None
value = []
for line in f:
line = line.decode('Windows-1252')
if line.strip().startswith('##'):
if key:
values[key] = '\n'.join(value)
key = line.strip(' #\r\n').lower()
value = []
continue
value.append(line.strip())
if key:
values[key] = '\n'.join(value)
return values
def main():
workbook = Workbook()
sheet = workbook.active
sheet.append(COLUMNS)
for root, dirs, files in os.walk('.'):
for name in (n for n in files if n.lower() == 'readme.txt'):
path = os.path.join(root, name)
f = open(path)
if not f.read().strip().startswith('##'): # Ignore other files called 'readme.txt'
print ('Skipping {0}'.format(path))
else:
print('Processing {0}'.format(path))
f.seek(0)
values = parse_readme(f)
values['relative path'] = os.path.dirname(path)
values['absolute path'] = os.path.dirname(os.path.join(os.getcwd(), path))
sheet.append([values.get(c.lower(), '') for c in COLUMNS])
workbook.save('data.xlsx')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment