mwek/greenhouse.py

## greenhouse.py
#!/usr/bin/env python

# Usage:
# 1. Go to https://app.greenhouse.io/interviews?type=past and copy the source code.
# 2. Run "pbpaste | ./greenhouse.py"
# 3. Analyze the CSV in the program of your choice.

from bs4 import BeautifulSoup
from itertools import chain
import csv
from io import StringIO
from datetime import datetime, timedelta
from sys import stdin


def to_date(time_string):
    if time_string == 'Yesterday':
        time_string = '1 day ago'
    if time_string.endswith('ago'):
        number, timetype = time_string.split(maxsplit=1)
        if timetype.startswith('day'):
            diff = timedelta(days=int(number))
        elif timetype.startswith('hr'):
            diff = timedelta(hours=int(number))
        else:
            return time_string

        return (datetime.now() - diff).strftime('%b %d, %Y')
    return time_string


def get_status(soup):
    if soup.find('a', class_='awaiting-feedback'):
        return 'Awaiting feedback'
    if soup.find('span', class_='two-thumbs-down'):
        return 'Strong no'
    if soup.find('span', class_='thumbs-down'):
        return 'No'
    if soup.find('span', class_='no-decision'):
        return 'No decision'
    if soup.find('span', class_='thumbs-up'):
        return 'Yes'
    if soup.find('span', class_='two-thumbs-up'):
        return 'Strong yes'
    return ''


def parse_interview(soup):
    interview = {}
    candidate = soup.find('div', class_='candidate')
    if candidate:
        fields = candidate.stripped_strings
        interview['candidate'] = next(fields, '')
        interview['position'] = next(fields, '')
    details = soup.find('div', class_='interview-details')
    if details:
        fields = details.stripped_strings
        interview['date'] = to_date(next(fields, ''))
        interview['type'] = next(fields, '')
        interview['application'] = next(fields, '')
        interview['location'] = next(fields, '')
    interview['status'] = get_status(soup)

    return interview if any(interview.values()) else None


def parse_html(html):
    soup = BeautifulSoup(html, 'html.parser')
    interviews_section = soup.find('div', id='interviews_section') or soup
    result = interviews_section.find_all('li', class_='row')
    result = map(parse_interview, result)
    result = filter(lambda x: x, result)
    return list(result)


def to_csv(interviews):
    fieldnames = ['candidate', 'position', 'date', 'type', 'status']

    with StringIO() as output:
        writer = csv.DictWriter(output, fieldnames=fieldnames, extrasaction='ignore')

        writer.writeheader()
        for d in interviews:
            writer.writerow(d)

        return output.getvalue()


if __name__ == '__main__':
    print(to_csv(parse_html(stdin.read())))
	#!/usr/bin/env python

	# Usage:
	# 1. Go to https://app.greenhouse.io/interviews?type=past and copy the source code.
	# 2. Run "pbpaste \| ./greenhouse.py"
	# 3. Analyze the CSV in the program of your choice.

	from bs4 import BeautifulSoup
	from itertools import chain
	import csv
	from io import StringIO
	from datetime import datetime, timedelta
	from sys import stdin


	def to_date(time_string):
	if time_string == 'Yesterday':
	time_string = '1 day ago'
	if time_string.endswith('ago'):
	number, timetype = time_string.split(maxsplit=1)
	if timetype.startswith('day'):
	diff = timedelta(days=int(number))
	elif timetype.startswith('hr'):
	diff = timedelta(hours=int(number))
	else:
	return time_string

	return (datetime.now() - diff).strftime('%b %d, %Y')
	return time_string


	def get_status(soup):
	if soup.find('a', class_='awaiting-feedback'):
	return 'Awaiting feedback'
	if soup.find('span', class_='two-thumbs-down'):
	return 'Strong no'
	if soup.find('span', class_='thumbs-down'):
	return 'No'
	if soup.find('span', class_='no-decision'):
	return 'No decision'
	if soup.find('span', class_='thumbs-up'):
	return 'Yes'
	if soup.find('span', class_='two-thumbs-up'):
	return 'Strong yes'
	return ''


	def parse_interview(soup):
	interview = {}
	candidate = soup.find('div', class_='candidate')
	if candidate:
	fields = candidate.stripped_strings
	interview['candidate'] = next(fields, '')
	interview['position'] = next(fields, '')
	details = soup.find('div', class_='interview-details')
	if details:
	fields = details.stripped_strings
	interview['date'] = to_date(next(fields, ''))
	interview['type'] = next(fields, '')
	interview['application'] = next(fields, '')
	interview['location'] = next(fields, '')
	interview['status'] = get_status(soup)

	return interview if any(interview.values()) else None


	def parse_html(html):
	soup = BeautifulSoup(html, 'html.parser')
	interviews_section = soup.find('div', id='interviews_section') or soup
	result = interviews_section.find_all('li', class_='row')
	result = map(parse_interview, result)
	result = filter(lambda x: x, result)
	return list(result)


	def to_csv(interviews):
	fieldnames = ['candidate', 'position', 'date', 'type', 'status']

	with StringIO() as output:
	writer = csv.DictWriter(output, fieldnames=fieldnames, extrasaction='ignore')

	writer.writeheader()
	for d in interviews:
	writer.writerow(d)

	return output.getvalue()


	if __name__ == '__main__':
	print(to_csv(parse_html(stdin.read())))