Skip to content

Instantly share code, notes, and snippets.

@ncortines
Last active February 24, 2023 09:54
Show Gist options
  • Save ncortines/3cf66e252366ae2a478056e26c605e3f to your computer and use it in GitHub Desktop.
Save ncortines/3cf66e252366ae2a478056e26c605e3f to your computer and use it in GitHub Desktop.
Script used to scrape data from performancemanager8.successfactors.com
// script used to scrape data from performancemanager8.successfactors.com
// it downloads photos from the organization's chart
// photo file name contains firstname, lastname, position and hire date
(() => {
// root node - does not necessarily have to be the top-most
const THE_BOSS = 'TREFA'
// retry interval in case of server error
const RETRY_INT = 1000
// internal website api
const api = new HOCOrgChartTreeProvider()
const wait = howLong =>
new Promise(resolve => setTimeout(resolve, howLong))
// fetching the employee's hire date requires a separate api call
const fetchEmployeeHireDate = (nodeid) =>
fetch(`/odatav4/workforce/Workforce.svc/v1/WorkProfile?$expand=workforcePersonProfile(%24expand%3Dpronouns%2Cemails%2Cphones)&$filter=(legacyId%20eq%20%27${nodeid}%27)`)
.then(r => r.json())
.then(data => (data && data.value && data.value[0] && data.value[0].hireDate) || '')
// hidden link that will allow us to download files automatically
const a = document.createElement('a')
document.body.appendChild(a)
a.style = 'display: none'
// this function is used autmate the photo file download by emulating an user's link "click" action
const downloadFile = (blob, fileName) => {
const url = URL.createObjectURL(blob)
a.href = url
a.download = fileName
a.click()
URL.revokeObjectURL(url)
}
// fetch employee's photo and return it as a binary object
const downloadPhoto = url => {
return fetch(url)
.then(r => r.blob())
}
// fetch employee's organization data
const getOrgData = nodeid => {
// this promisification seems redundant but it is not
// what "api.getOrgData" return is not a real promise object
// other script promise composition actions will fail unless
// it's properly promisified
return new Promise((resolve, reject) => {
api.getOrgData(nodeid, undefined, 999, undefined)
.then(resolve)
.catch(reject)
})
}
// fetch employee's organization data - retry up to 3 times if server responds with error
const getNode = (nodeid, tries = 3) => {
return getOrgData(nodeid)
.catch(error => {
if (tries > 0) {
console.warn(`could not fetch data for node id ${nodeid} - retrying after 1 second`)
return wait(RETRY_INT)
.then(() => getNode(nodeid, --tries))
}
console.error(`could not fetch data for node id ${nodeid} after 3 tries!`)
return Promise.reject(error)
})
}
// process employee's organizational data - traverse through other employees reporting to this employee ("node.children")
const processNode = (nodeid, results) => {
return getNode(nodeid)
.then(node => {
const { firstname, lastname, employeetitle, pic} = node
results[nodeid] = {
firstname, lastname, employeetitle, pic
}
if (node.children && node.children.length > 0) {
// we'll fetch children is parallel (faster) - if server returns an error for particular request we will retry
return Promise.all(node.children.map(child => processNode(child.nodeid, results)))
}
})
}
// fetch the employee's hire date and the photo, then download the file with appropiate name for identification
const downloadEmployeeFile = (nodeid, node) =>
Promise.all([ fetchEmployeeHireDate(nodeid), downloadPhoto(node.pic) ])
.then(([ hireDate, blob ]) => downloadFile(blob, `${node.firstname}_${node.lastname}__${node.employeetitle}__${hireDate}.jpeg`))
// we need a flat structure to index employees by employee id
// this is needed in order to remove duplicate entries, as some employees report to more
// than one other employee
const results = {}
// here starts the processing
processNode(THE_BOSS, results)
.then(() => {
console.info(`${results.size} nodes fetched - fetching hire dates and photos`)
// now "results" object contains a de-duplicated, nodeid-keyed structure which can be iterated to fetch the missing data
// and finally save the files to the downloads folder
// we will request the missing data for each employee sequentially (slower), as the server does not seem to be handling
// very well multiple parallel requests for employees photos
return Object.entries(results).reduce((promise, [nodeid, node]) => {
return promise.then(() => downloadEmployeeFile(nodeid, node))
}, Promise.resolve())
})
.then(() => {
console.info(`all ${results.size} files saved`)
})
.catch(error => {
console.error('something went wrong: ' + error.message)
})
})()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment