Last active
February 24, 2023 09:54
-
-
Save ncortines/3cf66e252366ae2a478056e26c605e3f to your computer and use it in GitHub Desktop.
Script used to scrape data from performancemanager8.successfactors.com
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// script used to scrape data from performancemanager8.successfactors.com | |
// it downloads photos from the organization's chart | |
// photo file name contains firstname, lastname, position and hire date | |
(() => { | |
// root node - does not necessarily have to be the top-most | |
const THE_BOSS = 'TREFA' | |
// retry interval in case of server error | |
const RETRY_INT = 1000 | |
// internal website api | |
const api = new HOCOrgChartTreeProvider() | |
const wait = howLong => | |
new Promise(resolve => setTimeout(resolve, howLong)) | |
// fetching the employee's hire date requires a separate api call | |
const fetchEmployeeHireDate = (nodeid) => | |
fetch(`/odatav4/workforce/Workforce.svc/v1/WorkProfile?$expand=workforcePersonProfile(%24expand%3Dpronouns%2Cemails%2Cphones)&$filter=(legacyId%20eq%20%27${nodeid}%27)`) | |
.then(r => r.json()) | |
.then(data => (data && data.value && data.value[0] && data.value[0].hireDate) || '') | |
// hidden link that will allow us to download files automatically | |
const a = document.createElement('a') | |
document.body.appendChild(a) | |
a.style = 'display: none' | |
// this function is used autmate the photo file download by emulating an user's link "click" action | |
const downloadFile = (blob, fileName) => { | |
const url = URL.createObjectURL(blob) | |
a.href = url | |
a.download = fileName | |
a.click() | |
URL.revokeObjectURL(url) | |
} | |
// fetch employee's photo and return it as a binary object | |
const downloadPhoto = url => { | |
return fetch(url) | |
.then(r => r.blob()) | |
} | |
// fetch employee's organization data | |
const getOrgData = nodeid => { | |
// this promisification seems redundant but it is not | |
// what "api.getOrgData" return is not a real promise object | |
// other script promise composition actions will fail unless | |
// it's properly promisified | |
return new Promise((resolve, reject) => { | |
api.getOrgData(nodeid, undefined, 999, undefined) | |
.then(resolve) | |
.catch(reject) | |
}) | |
} | |
// fetch employee's organization data - retry up to 3 times if server responds with error | |
const getNode = (nodeid, tries = 3) => { | |
return getOrgData(nodeid) | |
.catch(error => { | |
if (tries > 0) { | |
console.warn(`could not fetch data for node id ${nodeid} - retrying after 1 second`) | |
return wait(RETRY_INT) | |
.then(() => getNode(nodeid, --tries)) | |
} | |
console.error(`could not fetch data for node id ${nodeid} after 3 tries!`) | |
return Promise.reject(error) | |
}) | |
} | |
// process employee's organizational data - traverse through other employees reporting to this employee ("node.children") | |
const processNode = (nodeid, results) => { | |
return getNode(nodeid) | |
.then(node => { | |
const { firstname, lastname, employeetitle, pic} = node | |
results[nodeid] = { | |
firstname, lastname, employeetitle, pic | |
} | |
if (node.children && node.children.length > 0) { | |
// we'll fetch children is parallel (faster) - if server returns an error for particular request we will retry | |
return Promise.all(node.children.map(child => processNode(child.nodeid, results))) | |
} | |
}) | |
} | |
// fetch the employee's hire date and the photo, then download the file with appropiate name for identification | |
const downloadEmployeeFile = (nodeid, node) => | |
Promise.all([ fetchEmployeeHireDate(nodeid), downloadPhoto(node.pic) ]) | |
.then(([ hireDate, blob ]) => downloadFile(blob, `${node.firstname}_${node.lastname}__${node.employeetitle}__${hireDate}.jpeg`)) | |
// we need a flat structure to index employees by employee id | |
// this is needed in order to remove duplicate entries, as some employees report to more | |
// than one other employee | |
const results = {} | |
// here starts the processing | |
processNode(THE_BOSS, results) | |
.then(() => { | |
console.info(`${results.size} nodes fetched - fetching hire dates and photos`) | |
// now "results" object contains a de-duplicated, nodeid-keyed structure which can be iterated to fetch the missing data | |
// and finally save the files to the downloads folder | |
// we will request the missing data for each employee sequentially (slower), as the server does not seem to be handling | |
// very well multiple parallel requests for employees photos | |
return Object.entries(results).reduce((promise, [nodeid, node]) => { | |
return promise.then(() => downloadEmployeeFile(nodeid, node)) | |
}, Promise.resolve()) | |
}) | |
.then(() => { | |
console.info(`all ${results.size} files saved`) | |
}) | |
.catch(error => { | |
console.error('something went wrong: ' + error.message) | |
}) | |
})() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment