Skip to content

Instantly share code, notes, and snippets.

@skeptrunedev
Last active March 15, 2024 03:42
Show Gist options
  • Save skeptrunedev/f53588fc98d4d87cb077467a69c578f0 to your computer and use it in GitHub Desktop.
Save skeptrunedev/f53588fc98d4d87cb077467a69c578f0 to your computer and use it in GitHub Desktop.
Trieve Brettdg Scrape
const sleepPromise = (ms) => {
return new Promise((resolve) => {
setTimeout(() => {
resolve();
}, ms);
});
};
const scrollDown = async () => {
window.scrollTo(0, document.body.scrollHeight);
}
const createChunk = async (
chunk_html,
link,
tracking_id,
) => {
posts.append({
chunk_html: chunk_html,
link: link,
tracking_id: tracking_id
});
};
let counter = 0;
const scrapeTimeline = async () => {
const timeline = document.querySelector("#react-root > div > div > div.css-175oi2r.r-1f2l425.r-13qz1uu.r-417010.r-18u37iz > main > div > div > div > div > div > div:nth-child(3) > div > div > section > div > div");
const timelineChildren = timeline.children;
for(var i = 0; i < timelineChildren.length; i++) {
const post = timelineChildren[i];
const link = post.querySelector("div.css-175oi2r.r-18u37iz.r-1wbh5a2.r-13hce6t > div > div.css-175oi2r.r-18u37iz.r-1q142lx > a");
const posterName = post.querySelector("div.css-175oi2r.r-18u37iz.r-1wbh5a2.r-13hce6t > div > div.css-175oi2r.r-1wbh5a2.r-dnmrzs");
const postContent = post.querySelector("div.css-175oi2r.r-18u37iz > div.css-175oi2r.r-1iusvr4.r-16y2uox.r-1777fci.r-kzbkwu > div:nth-child(2)")
if (posterName?.innerText == "@brettdg") {
console.log(link?.href, posterName?.innerText, postContent?.innerText);
try {
await createChunk(postContent?.innerText, link?.href, link?.href);
} catch (e) {
console.error(e);
}
}
}
}
const API_URL = "";
const DATASET_ID = "";
const API_KEY = ""
let posts = [];
while (true) {
scrapeTimeline();
scrollDown();
await sleepPromise(1500);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment