Skip to content

Instantly share code, notes, and snippets.

@Kyonru
Last active June 4, 2018 12:37
Show Gist options
  • Save Kyonru/5fda92bcaf0bff6eaf44ce84f1708d3a to your computer and use it in GitHub Desktop.
Save Kyonru/5fda92bcaf0bff6eaf44ce84f1708d3a to your computer and use it in GitHub Desktop.
Extract the results of the loteries in Dominican Republic from http://leidsa.com/
/**
* Extract the body of the hmtl page
* @param {string} html full html of the page
*/
export const extractBody = (html) => {
const startIndex = html.indexOf('<body>');
const endIndex = html.indexOf('</body>');
return html.substring(startIndex, endIndex);
};
/**
* Extract the lotomas part of the page
* @param {string} body full body of the hmtl
*/
export const extractPrincipal = (body) => {
const startOfChunk = '<div class="form-group numeros-ganadores-pc">';
const endOfChunk = '</table>';
const extraction = extract(body, startOfChunk, endOfChunk);
return {
chunk: extraction.chunk,
newBody: extraction.newString
};
};
/**
* Extract all the loteries
* @param {string} body full hmtl body
* @param {array} chunkArr array of loteries
*/
export const extractChunks = (body, chunkArr = []) => {
const principal = extractPrincipal(body);
if (principal.chunk) {
body = principal.newBody;
chunkArr.push(parsePrincipalChunkToObject(principal.chunk, body));
}
const startOfChunk = '<div class="panel panel-default">';
const endOfChunk = '</table>';
const extraction = extract(body, startOfChunk, endOfChunk);
if (extraction.startIndex > -1) {
if (extraction.chunk.indexOf(startOfChunk) > -1) {
if (extraction.chunk) {
chunkArr.push(parseChunkToObject(extraction.chunk));
}
}
return extractChunks(extraction.newString, chunkArr);
}
return chunkArr.filter((item) => item !== undefined);
};
/**
* parse the principal number in the leidsa page to the common format
* @param {string} chunk container of the principal numbers
* @param {string} body full body of the html
*/
export const parsePrincipalChunkToObject = (chunk, body) => {
const numberStart = '<span class="numeros-ganadores';
const numberEnd = '</span>';
const numberExtra = ' numero-ganador-principal';
const symbol = '">';
const dateStart = '<p class="resultados-del-dia">';
const dateEnd = '<div class="form-group numeros-ganadores-pc">';
const rawDate = extract(body, dateStart, dateEnd);
const date = extract(rawDate.chunk, 'Resultados del ', '</p>');
return {
logo: 'https://i.imgur.com/zm0wRDQ.png',
numbers: extractNumbers(chunk, [], numberStart, numberEnd, (number) => number.replace(numberExtra, '').replace(symbol, '')),
time: '',
date: date.chunk
};
};
/**
* Parse the html to and object
* @param {string} chunk
*/
export const parseChunkToObject = (chunk) => {
return {
logo: extractImage(chunk),
numbers: extractNumbers(chunk),
date: extractDate(chunk).date,
time: extractDate(chunk).time
};
};
/**
* Extract the number of the chunk
* @param {string} chunk portion of the string with has the numbers
* @param {array} numbers array of number for that chunk
* @param {string} numberStart start limit
* @param {string} numberEnd end limit
* @param {func} filter apply a filter to the string
*/
const extractNumbers = (chunk, numbers = [], numberStart = '<td class="numeros-ganadores-loterias">', numberEnd = '</td>', filter = (number) => number) => {
const extraction = extract(chunk, numberStart, numberEnd);
if (extraction.startIndex > -1) {
if (extraction.chunk.indexOf(numberStart) > -1) {
const number = extraction.chunk.replace(numberStart, '');
if (number.length > 0) {
numbers.push(filter(number));
}
}
extractNumbers(extraction.newString, numbers, numberStart, numberEnd, filter);
}
return numbers;
};
/**
* Stract the image
* @param {string} chunk
*/
const extractImage = (chunk) => {
const imageStart = '<img class="logo-loteria" src="';
const imageEnd = '.png"/>';
return `${extract(chunk, imageStart, imageEnd).chunk.replace(imageStart, '')}.png`;
};
/**
* stract the date
* @param {String} chunk
*/
const extractDate = (chunk) => {
const timeStart = '<strong>';
const dateStart = '</strong>';
const dateEnd = '</p>';
const date = extract(chunk, dateStart, dateEnd).chunk.replace(dateStart, '');
const time = extract(chunk, timeStart, dateStart).chunk.replace(timeStart, '').replace('Sorteo', '').replace(':', '').trim();
return {
time: `${time}`,
date: `${date}`
};
};
/**
* Extract a portion of the string given the limits
* @param {sttring} string complete body to be searched in
* @param {string} start start of the searched string
* @param {string} end final part of the searched string
*/
const extract = (string, start, end) => {
const startIndex = string.indexOf(start);
const endIndex = string.indexOf(end);
let chunk;
let newString = string;
if (startIndex > -1) {
chunk = string.substring(startIndex, endIndex);
newString = string.replace(chunk, '');
}
return {
chunk: chunk,
newString: newString,
startIndex,
endIndex
};
};
// Convert the html text string in an array of loteries
export const htmlToJson = (html) => {
return extractChunks(extractBody(html));
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment