Skip to content

Instantly share code, notes, and snippets.

@WyrdNexus
Last active April 25, 2018 23:43
Show Gist options
  • Save WyrdNexus/4131557e76872edfdfba8bf9e5da24ee to your computer and use it in GitHub Desktop.
Save WyrdNexus/4131557e76872edfdfba8bf9e5da24ee to your computer and use it in GitHub Desktop.
Javascript SimpleScraper Bookmarklet
javascript:(function(){ function SimpleScraper(t){var e=this;this.report=null,this.collapseButton=null,this.result=[],this.init=function(){this.selector=this.findContainer(),void 0!==this.report&&null!=this.report&&this.report.length||(this.buildReport(),this.buildNav(this))},this.findContainer=function(){for(var e=0,i=t.length;e<i;e++)if($(t[e]).length)return t[e];return"body"},this.buildNav=function(t){var e=function(e){var i=$("<div></div>");return i.data("state",1),e in t.css.button&&i.css(t.css.button[e]),e in t&&i.click(t[e]),i};e("collapse"),e("config");t.report.append()},this.collapse=function(){$(this).data("state")?($(e.report).find("div").hide(),$(e.report).css({width:"36px",height:"36px"}),$(this).css({"border-right-color":"transparent","border-left-color":"black"}).show(),$(this).data("state",0)):($(e.report).find("div").show(),$(e.report).css({width:"500px",height:"auto"}),$(this).css({"border-right-color":"black","border-left-color":"transparent"}).show(),$(this).data("state",1))},this.buildConfigButton=function(){var t=$("<div></div>"),e=this.asset("gear");t.append(e),t.css(this.css.button.config),t.data("state",1),t.click(this.config),$(this.report).append(t)},this.config=function(){$(this).data("state")?($(e.report).find("div").hide(),$(e.report).css({width:"36px",height:"36px"}),$(this).css({"border-right-color":"transparent","border-left-color":"black"}).show(),$(this).data("state",0)):($(e.report).find("div").show(),$(e.report).css({width:"500px",height:"auto"}),$(this).css({"border-right-color":"black","border-left-color":"transparent"}).show(),$(this).data("state",1))},this.element=function(t,e){var i=$("<div></div>");return t in this.css&&i.css(this.css[t]),void 0!==e.inner&&e.inner.length&&i.html(e.inner),i},this.buildReport=function(){this.report=this.element("report",{}),this.report.attr("id","simpleScraper-report-container"),$(this.selector).append(this.report)},this.addTitle=function(t){var e=this.element("entry",{}),i=this.element("wide",{inner:t});i.css({"font-size":"1.4em","padding-top":"0.6em","border-top":"2px solid black",width:"100%",color:"#005"}),e.append(i),this.report.append(e)},this.addEntry=function(t,e){var i=this.element("entry",{}),r=void 0!==t&&t.length,s=void 0!==e&&e.length;if(r&&s){var n=this.element("label",{inner:t}),o=this.element("value",{inner:e});i.append(n)}else if(s)o=this.element("wide",{inner:e});else{if(!r)return;o=this.element("wide",{inner:t})}i.append(o),this.report.append(i)},this.parse=function(t,i){e.addTitle(t),$(t).each(function(){var r=$(this),s=e.optionExtract(i.value,r),n=e.optionExtract(i.label,r);e.record(t,{label:n,value:s}),e.addEntry(n,s)})},this.optionExtract=function(t,i){if(void 0!==t&&t.length)return"string"==typeof t?t:e.extract(t,i)},this.extract=function(t,e){for(var i="N/A",r=0,s=t.length;r<s;r++){var n;switch(t[r]){case"html":n=e.html();break;default:n=e.attr(t[r])}if(void 0!==n&&null!==n&&n.length>0){i=n;break}}return i},this.record=function(t,i){t in e.result||(e.result[t]=[],console.log("\n\n---"+t)),e.result[t].push(i)},this.console=function(){var t="";for(var i in e.result){t+="\n\n-----\n"+i;for(var r in e.result[i])t+="\n "+e.result[i][r].label+":"+e.result[i][r].value}console.log(t)},this.asset=function(t){if(t in this.assets){var e=this.assets[t],i=$(e.el);for(var r in e.content)i.append(r);return i.css(e.css),i}return null},this.css={report:{position:"fixed",top:"20px",left:"20px",width:"500px",display:"flex","flex-direction":"column","background-color":"#ccc",opacity:"0.7",border:"3px solid black","z-index":"999999"},entry:{width:"100%",display:"flex","flex-direction":"row","border-bottom":"1px solid grey"},label:{flex:"0 0 20%","border-right":"1px dashed grey","font-weight":"bold",padding:"5px"},value:{flex:"1 1 auto","font-size":"0.9em","line-height":"1em",padding:"5px"},wide:{padding:"5px","font-weight":"bold"},button:{collapse:{height:"0",width:"0",border:"12px solid transparent","border-right-color":"black",cursor:"pointer",position:"absolute",top:"3px",right:"3px"},config:{height:"24px",width:"24px",cursor:"pointer",position:"absolute",top:"3px",right:"36px"}}},this.assets={gear:{tag:'<svg version="1.1" xmlns="http://www.w3.org/2000/svg"></svg>',content:['<path d="M620,305.666v-51.333l-31.5-5.25c-2.333-8.75-5.833-16.917-9.917-23.917L597.25,199.5l-36.167-36.75l-26.25,18.083 c-7.583-4.083-15.75-7.583-23.916-9.917L505.667,140h-51.334l-5.25,31.5c-8.75,2.333-16.333,5.833-23.916,9.916L399.5,163.333 L362.75,199.5l18.667,25.666c-4.083,7.584-7.583,15.75-9.917,24.5l-31.5,4.667v51.333l31.5,5.25 c2.333,8.75,5.833,16.334,9.917,23.917l-18.667,26.25l36.167,36.167l26.25-18.667c7.583,4.083,15.75,7.583,24.5,9.917l5.25,30.916 h51.333l5.25-31.5c8.167-2.333,16.333-5.833,23.917-9.916l26.25,18.666l36.166-36.166l-18.666-26.25 c4.083-7.584,7.583-15.167,9.916-23.917L620,305.666z M480,333.666c-29.75,0-53.667-23.916-53.667-53.666s24.5-53.667,53.667-53.667 S533.667,250.25,533.667,280S509.75,333.666,480,333.666z"/>'],css:{height:"20px",width:"20px"}},search:{tag:'<svg xmlns="http://www.w3.org/2000/svg"></svg>',content:['<g stroke-width="2" stroke="#6c6c6c" fill="none"><path d="M11.29 11.71l-4-4"/><circle cx="5" cy="5" r="4"/></g>'],css:{height:"20px",width:"20px"}}},this.init()}if("undefined"==typeof $){var script=document.createElement("script");script.setAttribute("src","https://ajax.googleapis.com/ajax/libs/jquery/1.6.4/jquery.min.js"),script.addEventListener("load",function(){var t=document.createElement("script");document.body.appendChild(t)},!1),document.body.appendChild(script)}var seoReport;(seoReport=new SimpleScraper([".page-content","#page"])).parse("h1",{label:"",value:["html"]}),seoReport.parse("meta",{label:["name","property","itemprop","http-equiv"],value:["content","value"]}),seoReport.console(); })();
// IN DEV
function SimpleScraper(selectors){
var main = this;
this.report = null;
this.collapseButton = null;
this.result = [];
this.init = function() {
this.selector = this.findContainer();
if (typeof this.report == 'undefined' || this.report == null || !this.report.length) {
this.buildReport();
this.buildNav(this);
}
};
this.findContainer = function() {
// find the first matching selector
for (var i = 0, len = selectors.length; i < len; i++) {
if ($(selectors[i]).length) {
return selectors[i];
}
}
return 'body';
};
this.buildNav = function(container){
var nav = {
buildButton: function(name) {
var el = $('<div></div>');
el.data('state',1);
if (name in container.css.button){
el.css(container.css.button[name]);
}
if (name in container){
el.click(container[name]);
}
return el;
}
};
var buttons = [
nav.buildButton('collapse'),
nav.buildButton('config')
];
container.report.append();
};
this.collapse = function(){
if ($(this).data('state')) {
$(main.report).find('div').hide();
$(main.report).css({'width':'36px','height':'36px'});
$(this).css({
'border-right-color':'transparent',
'border-left-color':'black'
}).show();
$(this).data('state',0);
} else {
$(main.report).find('div').show();
$(main.report).css({'width':'500px','height':'auto'});
$(this).css({
'border-right-color':'black',
'border-left-color':'transparent'
}).show();
$(this).data('state',1);
}
};
this.buildConfigButton = function(){
var el = $('<div></div>');
var icon = this.asset('gear');
el.append(icon);
el.css(this.css.button.config);
el.data('state',1);
el.click(this.config);
$(this.report).append(el);
};
this.config = function(){
if ($(this).data('state')) {
$(main.report).find('div').hide();
$(main.report).css({'width':'36px','height':'36px'});
$(this).css({
'border-right-color':'transparent',
'border-left-color':'black'
}).show();
$(this).data('state',0);
} else {
$(main.report).find('div').show();
$(main.report).css({'width':'500px','height':'auto'});
$(this).css({
'border-right-color':'black',
'border-left-color':'transparent'
}).show();
$(this).data('state',1);
}
};
this.element = function(type, options) {
var el = $('<div></div>');
if (type in this.css) {
el.css(this.css[type]);
}
if (typeof options.inner !== 'undefined' && options.inner.length) {
el.html(options.inner);
}
return el;
};
this.buildReport = function() {
this.report = this.element('report',{});
this.report.attr('id','simpleScraper-report-container');
$(this.selector).append(this.report);
};
this.addTitle = function(title) {
var entryEl = this.element('entry',{});
var titleEl = this.element('wide',{inner:title});
titleEl.css({
'font-size': '1.4em',
'padding-top': '0.6em',
'border-top': '2px solid black',
'width':'100%',
'color':'#005'
});
entryEl.append(titleEl);
this.report.append(entryEl);
};
this.addEntry = function(label, value) {
var entryEl = this.element('entry',{});
var lSet = (typeof label !== 'undefined' && label.length);
var vSet = (typeof value !== 'undefined' && value.length);
if (lSet && vSet) {
var labelEl = this.element('label',{inner:label});
var valueEl = this.element('value',{inner:value});
entryEl.append(labelEl);
} else if (vSet) {
var valueEl = this.element('wide',{inner:value});
} else if (lSet) {
var valueEl = this.element('wide',{inner:label});
} else {
return;
}
entryEl.append(valueEl);
this.report.append(entryEl);
};
this.parse = function(selector, options) {
main.addTitle(selector);
$(selector).each(function(){
var el = $(this);
var value = main.optionExtract(options.value, el)
var label = main.optionExtract(options.label, el)
main.record(selector,{
'label': label,
'value': value
});
main.addEntry(label,value);
});
};
this.optionExtract = function(option, el) {
if (typeof option !== 'undefined' && option.length) {
if (typeof option == 'string') {
return option;
} else {
return main.extract(option,el);
}
}
};
this.extract = function(search, subject) {
var result = 'N/A';
for (var i = 0, len = search.length; i < len; i++) {
var tmp;
switch(search[i]) {
case 'html':
tmp = subject.html();
break;
default:
tmp = subject.attr(search[i]);
}
if (typeof tmp !== 'undefined' && tmp !== null && tmp.length > 0) {
result = tmp;
break;
}
}
return result;
};
this.record = function(selector,result) {
if (! (selector in main.result)) {
main.result[selector]=[];
console.log('\n\n---'+selector);
}
main.result[selector].push(result);
};
this.console = function() {
var result = '';
for (var sel in main.result) {
result += '\n\n-----\n' + sel;
for (var i in main.result[sel]) {
result += '\n ' + main.result[sel][i].label + ':' + main.result[sel][i].value;
}
}
console.log(result);
};
this.asset = function(name) {
if (name in this.assets) {
var asset = this.assets[name];
var el = $(asset.el);
for(var cont in asset.content){
el.append(cont);
}
el.css(asset.css);
return el;
}
return null;
};
this.css = {
report: {
'position': 'fixed',
'top': '20px',
'left': '20px',
'width': '500px',
'display':'flex',
'flex-direction':'column',
'background-color':'#ccc',
'opacity': '0.7',
'border':'3px solid black',
'z-index':'999999'
},
entry: {
'width':'100%',
'display':'flex',
'flex-direction':'row',
'border-bottom':'1px solid grey'
},
label: {
'flex':'0 0 20%',
'border-right': '1px dashed grey',
'font-weight': 'bold',
'padding': '5px'
},
value: {
'flex': '1 1 auto',
'font-size': '0.9em',
'line-height': '1em',
'padding': '5px'
},
wide: {
'padding': '5px',
'font-weight': 'bold'
},
button: {
collapse: {
'height': '0',
'width': '0',
'border': '12px solid transparent',
'border-right-color': 'black',
'cursor': 'pointer',
'position': 'absolute',
'top': '3px',
'right': '3px'
},
config: {
'height': '24px',
'width': '24px',
'cursor': 'pointer',
'position': 'absolute',
'top': '3px',
'right': '36px'
}
}
};
this.assets = {
gear: {
tag: '<svg version="1.1" xmlns="http://www.w3.org/2000/svg"></svg>',
content: [
'<path d="M620,305.666v-51.333l-31.5-5.25c-2.333-8.75-5.833-16.917-9.917-23.917L597.25,199.5l-36.167-36.75l-26.25,18.083 c-7.583-4.083-15.75-7.583-23.916-9.917L505.667,140h-51.334l-5.25,31.5c-8.75,2.333-16.333,5.833-23.916,9.916L399.5,163.333 L362.75,199.5l18.667,25.666c-4.083,7.584-7.583,15.75-9.917,24.5l-31.5,4.667v51.333l31.5,5.25 c2.333,8.75,5.833,16.334,9.917,23.917l-18.667,26.25l36.167,36.167l26.25-18.667c7.583,4.083,15.75,7.583,24.5,9.917l5.25,30.916 h51.333l5.25-31.5c8.167-2.333,16.333-5.833,23.917-9.916l26.25,18.666l36.166-36.166l-18.666-26.25 c4.083-7.584,7.583-15.167,9.916-23.917L620,305.666z M480,333.666c-29.75,0-53.667-23.916-53.667-53.666s24.5-53.667,53.667-53.667 S533.667,250.25,533.667,280S509.75,333.666,480,333.666z"/>'
],
css: {
'height':'20px',
'width':'20px'
}
},
search: {
tag: '<svg xmlns="http://www.w3.org/2000/svg"></svg>',
content: [
'<g stroke-width="2" stroke="#6c6c6c" fill="none"><path d="M11.29 11.71l-4-4"/><circle cx="5" cy="5" r="4"/></g>'
],
css: {
'height':'20px',
'width':'20px'
}
}
};
this.init();
}
if (typeof $ === 'undefined') {
var script = document.createElement("script");
script.setAttribute("src", "https://ajax.googleapis.com/ajax/libs/jquery/1.6.4/jquery.min.js");
script.addEventListener('load', function() {
var script = document.createElement("script");
document.body.appendChild(script);
}, false);
document.body.appendChild(script);
}
var seoReport;
seoReport = new SimpleScraper(['.page-content','#page']);
seoReport.parse('h1', {
label: '',
value:['html']
});
seoReport.parse('meta', {
label:['name','property','itemprop','http-equiv'],
value:['content','value']
});
seoReport.console();
function SimpleScraper(selectors){
var main = this;
this.report = null;
this.collapseButton = null;
this.result = [];
this.init = function() {
this.selector = this.findContainer();
if (typeof this.report == 'undefined' || this.report == null || !this.report.length) {
this.buildReport();
this.buildCollapseButton();
}
};
this.findContainer = function() {
// find the first matching selector
for (var i = 0, len = selectors.length; i < len; i++) {
if ($(selectors[i]).length) {
return selectors[i];
}
}
return 'body';
};
this.buildCollapseButton = function(){
var el = $('<div></div>');
el.css(this.css.collapse);
el.data('state',1);
el.click(this.collapse);
$(this.report).append(el);
};
this.collapse = function(){
if ($(this).data('state')) {
$(main.report).find('div').hide();
$(main.report).css({'width':'30px','height':'30px'});
$(this).css({
'border-right-color':'transparent',
'border-left-color':'black'
}).show();
$(this).data('state',0);
} else {
$(main.report).find('div').show();
$(main.report).css({'width':'500px','height':'auto'});
$(this).css({
'border-right-color':'black',
'border-left-color':'transparent'
}).show();
$(this).data('state',1);
}
};
this.element = function(type, options) {
var el = $('<div></div>');
if (type in this.css) {
console.log(el);
console.log(this.css[type]);
el.css(this.css[type]);
}
if (typeof options.inner !== 'undefined' && options.inner.length) {
el.html(options.inner);
}
return el;
};
this.buildReport = function() {
this.report = this.element('report',{});
this.report.attr('id','simpleScraper-report-container');
$(this.selector).append(this.report);
};
this.addTitle = function(title) {
var entryEl = this.element('entry',{});
var titleEl = this.element('wide',{inner:title});
titleEl.css({
'font-size': '1.4em',
'padding-top': '0.6em',
'border-top': '2px solid black',
'width':'100%',
'color':'#005'
});
entryEl.append(titleEl);
this.report.append(entryEl);
};
this.addEntry = function(label, value) {
var entryEl = this.element('entry',{});
var lSet = (typeof label !== 'undefined' && label.length);
var vSet = (typeof value !== 'undefined' && value.length);
if (lSet && vSet) {
var labelEl = this.element('label',{inner:label});
var valueEl = this.element('value',{inner:value});
entryEl.append(labelEl);
} else if (vSet) {
var valueEl = this.element('wide',{inner:value});
} else if (lSet) {
var valueEl = this.element('wide',{inner:label});
} else {
return;
}
entryEl.append(valueEl);
this.report.append(entryEl);
};
this.parse = function(selector, options) {
main.addTitle(selector);
$(selector).each(function(){
var el = $(this);
var value = main.optionExtract(options.value, el)
var label = main.optionExtract(options.label, el)
main.record(selector,{
'label': label,
'value': value
});
main.addEntry(label,value);
});
};
this.optionExtract = function(option, el) {
if (typeof option !== 'undefined' && option.length) {
if (typeof option == 'string') {
return option;
} else {
return main.extract(option,el);
}
}
};
this.extract = function(search, subject) {
var result = 'N/A';
for (var i = 0, len = search.length; i < len; i++) {
var tmp;
switch(search[i]) {
case 'html':
tmp = subject.html();
break;
default:
tmp = subject.attr(search[i]);
}
if (typeof tmp !== 'undefined' && tmp !== null && tmp.length > 0) {
result = tmp;
break;
}
}
return result;
};
this.record = function(selector,result) {
if (! (selector in main.result)) {
main.result[selector]=[];
console.log('\n\n---'+selector);
}
main.result[selector].push(result);
console.log(main.result[selector]);
console.log(main.result);
};
this.console = function() {
var result = '';
for (var sel in main.result) {
result += '\n\n-----\n' + sel;
for (var i in main.result[sel]) {
result += '\n ' + main.result[sel][i].label + ':' + main.result[sel][i].value;
}
}
console.log(result);
}
this.css = {
report: {
'position': 'fixed',
'top': '20px',
'left': '20px',
'width': '500px',
'display':'flex',
'flex-direction':'column',
'background-color':'#ccc',
'opacity': '0.7',
'border':'3px solid black',
'z-index':'999999'
},
entry: {
'width':'100%',
'display':'flex',
'flex-direction':'row',
'border-bottom':'1px solid grey'
},
label: {
'flex':'0 0 20%',
'border-right': '1px dashed grey',
'font-weight': 'bold',
'padding': '5px'
},
value: {
'flex': '1 1 auto',
'font-size': '0.9em',
'line-height': '1em',
'padding': '5px'
},
wide: {
'padding': '5px',
'font-weight': 'bold'
},
collapse: {
'height': '0',
'width': '0',
'border': '12px solid transparent',
'border-right-color': 'black',
'cursor':'pointer',
'position':'absolute',
'top': '3px',
'right': '3px'
}
};
this.init();
}
var seoReport;
seoReport = new SimpleScraper(['.page-content','#page']);
seoReport.parse('h1', {
label: '',
value:['html']
});
seoReport.parse('meta', {
label:['name','property','itemprop','http-equiv'],
value:['content','value']
});
seoReport.console();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment