Skip to content

Instantly share code, notes, and snippets.

@brianjhanson
Last active September 8, 2017 20:13
Show Gist options
  • Save brianjhanson/13fc0182da19f4baaa5bb918e67c149b to your computer and use it in GitHub Desktop.
Save brianjhanson/13fc0182da19f4baaa5bb918e67c149b to your computer and use it in GitHub Desktop.
Scrape FAQs
var $questions = jQuery('.faq-question');
var json = {};
json.categories = [];
json.entries = [];
var categoryIndex = -1;
var sectionId = 21;
var typeId = 28;
function isFirst(string) {
return (string.search(/^1\./g) !== -1);
}
function convertToSlug(Text) {
return Text
.toLowerCase()
.replace(/ /g,'-')
.replace(/[^\w-]+/g,'');
}
function buildCategory(category) {
return {
"@model": "CategoryModel",
"attributes": {
"groupId": 3,
"parentId": 1,
"locale": "en_us",
"slug": category.slug,
"dateCreated": "2016-01-13 01:25:57",
"dateUpdated": "2016-01-13 01:25:57",
"enabled": true
},
"content": {
"title": category.title,
"fields": []
}
}
}
function buildEntry(question, answer, category) {
return {
"@model": "EntryModel",
"attributes": {
"sectionId": sectionId,
"typeId": typeId,
"authorId": 1,
"locale": "en_us",
"slug": convertToSlug(question),
"postDate": "2015-02-27 16:43:52",
"expiryDate": null,
"dateCreated": "2015-02-27 16:43:52",
"dateUpdated": "2015-02-27 16:51:42",
"enabled": true
},
"content": {
"title": question,
"fields": {
"body": answer,
},
"related": {
"category": {
"@model": "CategoryModel",
"matchBy": "slug",
"matchValue": [category],
"matchCriteria": {
"groupId": 3
}
}
}
}
}
}
var $categories = jQuery('.faq-header');
var categories = [];
$categories.each(function(item) {
var title = jQuery(this).text();
json.categories.push(buildCategory({
title: title,
slug: convertToSlug(title)
}));
});
$questions.each(function(item) {
var category = json.categories[categoryIndex];
var answerId = jQuery(this).parent().parent().attr('id');
var $answer = jQuery('[aria-labelledby=' + answerId + ']');
var answerBody = $answer.html();
var question = jQuery(this).text();
if (isFirst(question)) {
categoryIndex++;
category = json.categories[categoryIndex];
}
json.entries.push(buildEntry(
question.replace(/^\d+\.\s*/, ''),
answerBody,
category.attributes.slug
));
});
// These numbers should be the same (make sure we got everything)
console.log('questions length: ', $questions.length);
console.log('faq length: ', json.entries.length);
// copy(json.categories); // copy just the categories
// copy(json.entries); // copy just the entries
copy(json); // Copy everything
@brianjhanson
Copy link
Author

Just paste this directly into the Chrome console to use.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment