nikmolnar/readme_to_excel.py

## readme_to_excel.py
from openpyxl import Workbook
import os

COLUMNS = [
    'Name', 'Data Lead', 'Description', 'Date Obtained', 'Time Period of Content Date', 'Publication Date',
    'URL', 'Contact', 'Processing Notes', 'Keywords', 'Relative Path', 'Absolute Path'
]


def parse_readme(f):
    values = {}
    key = None
    value = []

    for line in f:
        line = line.decode('Windows-1252')
        if line.strip().startswith('##'):
            if key:
                values[key] = '\n'.join(value)

            key = line.strip(' #\r\n').lower()
            value = []
            continue

        value.append(line.strip())

    if key:
        values[key] = '\n'.join(value)

    return values


def main():
    workbook = Workbook()
    sheet = workbook.active

    sheet.append(COLUMNS)

    for root, dirs, files in os.walk('.'):
        for name in (n for n in files if n.lower() == 'readme.txt'):
            path = os.path.join(root, name)
            f = open(path)

            if not f.read().strip().startswith('##'):  # Ignore other files called 'readme.txt'
                print ('Skipping {0}'.format(path))
            else:
                print('Processing {0}'.format(path))

            f.seek(0)
            values = parse_readme(f)
            values['relative path'] = os.path.dirname(path)
            values['absolute path'] = os.path.dirname(os.path.join(os.getcwd(), path))

            sheet.append([values.get(c.lower(), '') for c in COLUMNS])

    workbook.save('data.xlsx')


if __name__ == '__main__':
    main()
	from openpyxl import Workbook
	import os

	COLUMNS = [
	'Name', 'Data Lead', 'Description', 'Date Obtained', 'Time Period of Content Date', 'Publication Date',
	'URL', 'Contact', 'Processing Notes', 'Keywords', 'Relative Path', 'Absolute Path'
	]


	def parse_readme(f):
	values = {}
	key = None
	value = []

	for line in f:
	line = line.decode('Windows-1252')
	if line.strip().startswith('##'):
	if key:
	values[key] = '\n'.join(value)

	key = line.strip(' #\r\n').lower()
	value = []
	continue

	value.append(line.strip())

	if key:
	values[key] = '\n'.join(value)

	return values


	def main():
	workbook = Workbook()
	sheet = workbook.active

	sheet.append(COLUMNS)

	for root, dirs, files in os.walk('.'):
	for name in (n for n in files if n.lower() == 'readme.txt'):
	path = os.path.join(root, name)
	f = open(path)

	if not f.read().strip().startswith('##'): # Ignore other files called 'readme.txt'
	print ('Skipping {0}'.format(path))
	else:
	print('Processing {0}'.format(path))

	f.seek(0)
	values = parse_readme(f)
	values['relative path'] = os.path.dirname(path)
	values['absolute path'] = os.path.dirname(os.path.join(os.getcwd(), path))

	sheet.append([values.get(c.lower(), '') for c in COLUMNS])

	workbook.save('data.xlsx')


	if __name__ == '__main__':
	main()