Skip to content

Instantly share code, notes, and snippets.

@ZeeCoder
Created December 14, 2018 12:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ZeeCoder/664855aad71a67a686a1790ff3ca9193 to your computer and use it in GitHub Desktop.
Save ZeeCoder/664855aad71a67a686a1790ff3ca9193 to your computer and use it in GitHub Desktop.
Instagram Comment Scraper
javascript:(async()=>{const VERSION="2";const delay=n=>new Promise(resolve=>setTimeout(resolve,n));const startTime=new Date;const ui={view:{},state:{progress:"Loading...",rateLimit:"",csv:""},destroy:function(){const pastUI=document.querySelector("#EVENTSTAG-UI");if(pastUI){pastUI.parentElement.removeChild(pastUI)}},initialRender:function(){this.destroy();this.view.background=document.createElement("div");this.view.background.id="EVENTSTAG-UI";this.view.buttons=document.createElement("div");this.view.copyButton=document.createElement("button");this.view.copyButton.textContent="Copy CSV";this.view.closeButton=document.createElement("button");this.view.closeButton.textContent="Close";this.view.closeButton.onclick=(()=>this.destroy());this.view.csv=document.createElement("div");this.view.progress=document.createElement("div");this.view.rateLimit=document.createElement("div");this.view.csvInput=document.createElement("textarea");this.view.copyButton.onclick=(()=>{this.view.csvInput.focus();this.view.csvInput.select();document.execCommand("copy");alert("CSV copied")});const buttonStyle={fontSize:"20px",height:"40px",padding:"0 15px",margin:"20px",cursor:"pointer"};Object.assign(this.view.background.style,{position:"fixed",left:"0",top:"0",width:"100%",height:"100%",background:"rgba(0,0,0,.8)",color:"white",display:"flex"});Object.assign(this.view.progress.style,{position:"absolute",right:"20px",top:"0",whiteSpace:"nowrap",fontSize:"20px",display:"none",lineHeight:"80px"});Object.assign(this.view.rateLimit.style,{position:"absolute",left:"50%",top:"0",color:"red",whiteSpace:"nowrap",fontSize:"20px",display:"none",lineHeight:"80px",transform:"translateX(-50%)"});Object.assign(this.view.buttons.style,{display:"none"});Object.assign(this.view.csv.style,{background:"white",color:"black",margin:"20px",flex:"1",padding:"20px",display:"none",whiteSpace:"pre-line",overflow:"auto"});Object.assign(this.view.csvInput.style,{position:"absolute",left:"100vw"});Object.assign(this.view.closeButton.style,buttonStyle);Object.assign(this.view.copyButton.style,buttonStyle);this.view.background.appendChild(this.view.buttons);this.view.background.appendChild(this.view.progress);this.view.background.appendChild(this.view.rateLimit);this.view.buttons.appendChild(this.view.copyButton);this.view.buttons.appendChild(this.view.closeButton);this.view.background.appendChild(this.view.csv);this.view.background.appendChild(this.view.csvInput);document.body.appendChild(this.view.background);this.render()},render:function(newState={}){Object.assign(this.state,newState);if(typeof this.state.progress==="string"){this.view.progress.textContent=this.state.progress;this.view.progress.style.display="block"}else{this.view.progress.style.display="none"}if(typeof this.state.rateLimit==="string"){this.view.rateLimit.textContent=this.state.rateLimit;this.view.rateLimit.style.display="block"}else{this.view.rateLimit.style.display="none"}if(typeof this.state.csv==="string"){this.view.csvInput.value=this.state.csv;this.view.csv.textContent=this.state.csv;this.view.buttons.style.display="block";this.view.csv.style.display="block"}else{this.view.buttons.style.display="none";this.view.csv.style.display="none"}}};const loadMoreButton=document.querySelector("li button");const isLoadMoreButtonAvailable=()=>Boolean(document.querySelector("li button"));const waitForEnabledLoadButton=()=>new Promise(resolve=>{const loop=async()=>{if(!loadMoreButton.disabled){return resolve()}await delay(100);loop()};loop()});const getCommentNodes=()=>{return[...document.querySelectorAll("article section + div li")].slice(2)};const getCommentFromNode=node=>{const comment={name:"",message:""};const name=node.querySelector("a");const message=node.querySelector("h3 + span");if(name){comment.name=name.textContent}if(message){comment.message=message.textContent}return comment};const removeNode=node=>node.parentElement.removeChild(node);const loadNextCommentPage=async()=>{if(!isLoadMoreButtonAvailable()){return}loadMoreButton.click();await waitForEnabledLoadButton()};let extractingFirstCommentPage=true;const extractCommentsFromPage=()=>{const nodes=getCommentNodes();const comments=nodes.filter(node=>node.id!=="ET-DUMMY-COMMENT").map(node=>getCommentFromNode(node));nodes.slice(1).forEach(node=>removeNode(node));if(extractingFirstCommentPage){extractingFirstCommentPage=false;return comments}return comments.slice(0,comments.length-1)};const getComments=async({frequency:frequency=300,handleComments:handleComments})=>new Promise(resolve=>{const loop=async()=>{await loadNextCommentPage();const comments=await extractCommentsFromPage();if(!comments.length){return resolve()}handleComments(comments);await delay(frequency);loop()};loop()});const getCleanCsvField=text=>{if(text.includes(";")||text.includes(`"`)){if(text.includes('"')){text=text.replace(/"/g,'""')}text=`"${text}"`}return text};const getCsvFromComments=comments=>{const csvLines=[];for(let comment of comments){csvLines.push([getCleanCsvField(comment.name),getCleanCsvField(comment.message)].join(";"))}return csvLines.join("\n")};const getFrequencyFromUser=()=>new Promise(resolve=>{const loop=()=>{let frequency=prompt("(v"+VERSION+") At what frequency should the comment pages be loaded? (300 or more) "+"Recommended values: 300 for less than 1000 comments, otherwise 1000 or more","300");if(frequency===null){return null}try{frequency=parseInt(frequency);if(frequency<300){alert("The given frequency has to be 300 or more.")}else if(isNaN(frequency)){throw new Error("Invalid number")}else{return resolve(frequency)}}catch(error){alert(`Error, please try again! (${error})`)}loop()};loop()});const frequency=await getFrequencyFromUser();if(frequency===null){return}await ui.initialRender();let pageCount=1;ui.render({progress:`Loading comment page#${pageCount}...`});const rateLimitDetector={triggerAfterSeconds:10*1e3,timeout:null,restart:function(){ui.render({rateLimit:""});if(this.timeout){clearTimeout(this.timeout)}this.timeout=setTimeout(()=>{ui.render({rateLimit:"Rate limit may have reached."})},this.triggerAfterSeconds)},stop:function(){if(this.timeout){clearTimeout(this.timeout)}ui.render({rateLimit:""})}};const csvParts=[];rateLimitDetector.restart();await getComments({frequency:frequency,handleComments:batch=>{rateLimitDetector.restart();const csvPart=getCsvFromComments(batch);csvParts.unshift(csvPart);pageCount++;ui.render({csv:csvParts.join("\n"),progress:`Loading comment page#${pageCount}`})}});await delay(300);rateLimitDetector.stop();const stopTime=new Date;const elapsedSeconds=(stopTime-startTime)/1e3;await ui.render({progress:`Done in ${elapsedSeconds} seconds. Loaded ${pageCount} pages.`})})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment