Skip to content

Instantly share code, notes, and snippets.

@drewdaemon
Created August 17, 2019 17:59
Show Gist options
  • Save drewdaemon/6ec1c6b0f215ce98474f3bde901e1735 to your computer and use it in GitHub Desktop.
Save drewdaemon/6ec1c6b0f215ce98474f3bde901e1735 to your computer and use it in GitHub Desktop.
import { APIGatewayProxyHandler } from 'aws-lambda';
import 'source-map-support/register';
import * as cheerio from 'cheerio';
import Axios from 'axios';
const BASE_URL = 'https://www.cs.utah.edu';
const SELECTORS = {
FACULTY_ENTRIES: 'table#people:nth-child(3) tr.professor',
NAME: '#info > tbody > tr:nth-child(1) > td > h8',
OFFICE: '#info > tbody > tr:nth-child(2) > td:nth-child(2)',
PHONE: '#info > tbody > tr:nth-child(3) > td:nth-child(2)',
EMAIL: '#info > tbody > tr:nth-child(4) > td:nth-child(2)',
RESEARCH_INTERESTS: '#info2 > tbody > tr:nth-child(2)',
THUMBNAIL: 'img'
}
function extractFaculty(html) {
const $ = cheerio.load(html);
const faculty = [];
const entries = $(SELECTORS.FACULTY_ENTRIES);
entries.each((_, elem) => {
const facultyEntry = {
name: $(SELECTORS.NAME, elem).text(),
thumbnail: `${BASE_URL}${$(SELECTORS.THUMBNAIL, elem).attr('src')}`,
contactInfo: {
office: $(SELECTORS.OFFICE, elem).text(),
phone: $(SELECTORS.PHONE, elem).text(),
email: $(SELECTORS.EMAIL, elem).text(),
},
researchInterests: $(SELECTORS.RESEARCH_INTERESTS, elem).text()
};
faculty.push(facultyEntry);
});
return faculty;
}
export const harvest: APIGatewayProxyHandler = async (_, _context) => {
const res = await Axios.get(`${BASE_URL}/people/faculty/`);
const resHTML = res.data;
const faculty = extractFaculty(resHTML);
return {
statusCode: 200,
body: JSON.stringify({
faculty
})
};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment