adrianhorning08/index.js Secret

## index.js
function createCSV(jsonData, fileName) {
  // Convert JSON to CSV
  const csvData = [];

  // Extract the headers
  const headers = Object.keys(jsonData[0]);
  csvData.push(headers.join(","));

  jsonData.forEach((item) => {
    const row = [];
    for (const key in item) {
      if (item.hasOwnProperty(key)) {
        if (typeof item[key] === "number") {
          row.push(item[key]);
          continue;
        }
        const value = item[key]?.includes(",")
          ? `"${item[key].replace(/"/g, '""')}"`
          : item[key];
        row.push(value);
      }
    }
    csvData.push(row.join(","));
  });

  // Create a Blob containing the CSV data
  const csvBlob = new Blob([csvData.join("\n")], {
    type: "text/csv;charset=utf-8",
  });

  // Create a URL for the Blob
  const csvUrl = URL.createObjectURL(csvBlob);

  // Create a link element
  const link = document.createElement("a");
  link.href = csvUrl;
  link.target = "_blank";
  link.download = fileName;

  // Append the link to the body
  document.body.appendChild(link);

  // Trigger a click event on the link
  link.click();

  // Remove the link and revoke the Blob URL
  document.body.removeChild(link);
  URL.revokeObjectURL(csvUrl);
}

async function scrollDown() {
  const wrapper = document.querySelector(".jobs-search-results-list");
  await new Promise((resolve, reject) => {
    var totalHeight = 0;
    var distance = 1000;

    var timer = setInterval(async () => {
      var scrollHeightBefore = wrapper.scrollHeight;
      wrapper.scrollBy(0, distance);
      totalHeight += distance;

      if (totalHeight >= scrollHeightBefore) {
        totalHeight = 0;

        // Calculate scrollHeight after waiting
        var scrollHeightAfter = wrapper.scrollHeight;

        if (scrollHeightAfter > scrollHeightBefore) {
          // More content loaded, keep scrolling
          return;
        } else {
          // No more content loaded, stop scrolling
          clearInterval(timer);
          resolve();
        }
      }
    }, 300);
  });
}

function getRidOfUnnecessaryLinesAndSpaces(text) {
  return text
    ?.split("\n")
    ?.map((line) => line.trim())
    ?.filter((line) => line !== "")
    ?.join(" ");
}

function jsonify(jobDiv) {
  const title = document.querySelector(
    "h2.job-details-jobs-unified-top-card__job-title"
  );
  const link = jobDiv.querySelector("a.ember-view");
  let jobLocation = jobDiv.querySelector("ul").textContent;

  const primaryDescription = document.querySelector(
    ".job-details-jobs-unified-top-card__primary-description div"
  );
  const company = primaryDescription?.children?.[0]?.textContent;
  let companyLink = primaryDescription?.children?.[0]?.getAttribute("href");
  const posted = primaryDescription?.children?.[3]?.textContent;
  const applicants = primaryDescription?.children?.[5]?.textContent;

  const details = document.querySelectorAll(
    ".job-details-jobs-unified-top-card__job-insight"
  );
  // const [salary, employmentType, level] = details[0].textContent?.split("·");
  const splitFirstLine = details?.[0]?.textContent?.split("·");
  const salary = splitFirstLine?.find((line) => line?.includes("$"));
  const employmentType = salary ? splitFirstLine?.[1] : splitFirstLine?.[0];
  const level = salary ? splitFirstLine?.[2] : splitFirstLine?.[1];

  // const [companySize, companyIndustry] = details?.[1]?.textContent?.split("·");
  const splitCrap = details?.[1]?.textContent?.split("·");
  const companySize = splitCrap?.[0];
  const companyIndustry = splitCrap?.[1];

  const description = document.querySelector(
    ".jobs-description-content__text"
  ).textContent;

  return {
    title: title?.textContent?.trim() || "",
    company: company?.trim() || "",
    link: `https://www.linkedin.com${link?.getAttribute("href")}`,
    jobLocation: jobLocation?.trim() || "",
    companyLink: companyLink || "",
    posted: getRidOfUnnecessaryLinesAndSpaces(posted?.trim()) || "",
    applicants: applicants?.trim() || "",
    salary: salary?.trim() || "",
    employmentType: employmentType?.trim() || "",
    level: level?.trim() || "",
    companySize: companySize?.trim() || "",
    companyIndustry: companyIndustry?.trim() || "",
    description: description?.trim() || "",
  };
}

async function scrapeLinkedinJobs() {
  const allJobs = [];
  let i = 0;
  // TODO: important that they start this on page 1
  let currentPage = 1;
  let hasNextPage = true;

  while (hasNextPage) {
    console.log(
      `If you need jobs scraped, or any other data, email me: adrian@thewebscrapingguy.com`
    );
    // wait for the page to load
    await new Promise((resolve, reject) => {
      setTimeout(() => {
        resolve();
      }, 3000);
    });
    await scrollDown();

    // collect the job data
    const jobDivs = document.querySelectorAll(".job-card-container");
    // click on all the jobDivs
    for (let index = 0; index < jobDivs.length; index++) {
      const jobDiv = jobDivs[index];
      jobDiv.click();
      await new Promise((resolve, reject) => setTimeout(resolve, 1000));
      const job = jsonify(jobDiv);
      allJobs.push(job);
    }

    // get the pages to paginate
    const pages = document.querySelectorAll(
      ".artdeco-pagination__pages--number li button"
    );
    // get the attr data-test-pagination-page-btn
    for (i = 0; i < pages.length; i++) {
      const page = pages[i];
      const pageNumber = page.getAttribute("aria-label").replace(/\D/g, "");
      // if the page number is equal to the current page + 1, then click it
      if (Number(pageNumber) === currentPage + 1) {
        console.log("advance to next page");
        hasNextPage = true;
        currentPage++;
        page.click();
        break;
      }
    }
    // if the current page is the last page, then stop
    if (i === pages.length) {
      hasNextPage = false;
    }
  }

  console.log("allJobs", allJobs);

  createCSV(allJobs, `linkedinJobs-${new Date().getTime()}.csv`);

  console.log(`Congrats! 🎉 You scraped ${allJobs.length} jobs!`);
  console.log(
    `If you need jobs scraped, or any other data, email me: adrian@thewebscrapingguy.com`
  );
}
await scrapeLinkedinJobs();
	function createCSV(jsonData, fileName) {
	// Convert JSON to CSV
	const csvData = [];

	// Extract the headers
	const headers = Object.keys(jsonData[0]);
	csvData.push(headers.join(","));

	jsonData.forEach((item) => {
	const row = [];
	for (const key in item) {
	if (item.hasOwnProperty(key)) {
	if (typeof item[key] === "number") {
	row.push(item[key]);
	continue;
	}
	const value = item[key]?.includes(",")
	? `"${item[key].replace(/"/g, '""')}"`
	: item[key];
	row.push(value);
	}
	}
	csvData.push(row.join(","));
	});

	// Create a Blob containing the CSV data
	const csvBlob = new Blob([csvData.join("\n")], {
	type: "text/csv;charset=utf-8",
	});

	// Create a URL for the Blob
	const csvUrl = URL.createObjectURL(csvBlob);

	// Create a link element
	const link = document.createElement("a");
	link.href = csvUrl;
	link.target = "_blank";
	link.download = fileName;

	// Append the link to the body
	document.body.appendChild(link);

	// Trigger a click event on the link
	link.click();

	// Remove the link and revoke the Blob URL
	document.body.removeChild(link);
	URL.revokeObjectURL(csvUrl);
	}

	async function scrollDown() {
	const wrapper = document.querySelector(".jobs-search-results-list");
	await new Promise((resolve, reject) => {
	var totalHeight = 0;
	var distance = 1000;

	var timer = setInterval(async () => {
	var scrollHeightBefore = wrapper.scrollHeight;
	wrapper.scrollBy(0, distance);
	totalHeight += distance;

	if (totalHeight >= scrollHeightBefore) {
	totalHeight = 0;

	// Calculate scrollHeight after waiting
	var scrollHeightAfter = wrapper.scrollHeight;

	if (scrollHeightAfter > scrollHeightBefore) {
	// More content loaded, keep scrolling
	return;
	} else {
	// No more content loaded, stop scrolling
	clearInterval(timer);
	resolve();
	}
	}
	}, 300);
	});
	}

	function getRidOfUnnecessaryLinesAndSpaces(text) {
	return text
	?.split("\n")
	?.map((line) => line.trim())
	?.filter((line) => line !== "")
	?.join(" ");
	}

	function jsonify(jobDiv) {
	const title = document.querySelector(
	"h2.job-details-jobs-unified-top-card__job-title"
	);
	const link = jobDiv.querySelector("a.ember-view");
	let jobLocation = jobDiv.querySelector("ul").textContent;

	const primaryDescription = document.querySelector(
	".job-details-jobs-unified-top-card__primary-description div"
	);
	const company = primaryDescription?.children?.[0]?.textContent;
	let companyLink = primaryDescription?.children?.[0]?.getAttribute("href");
	const posted = primaryDescription?.children?.[3]?.textContent;
	const applicants = primaryDescription?.children?.[5]?.textContent;

	const details = document.querySelectorAll(
	".job-details-jobs-unified-top-card__job-insight"
	);
	// const [salary, employmentType, level] = details[0].textContent?.split("·");
	const splitFirstLine = details?.[0]?.textContent?.split("·");
	const salary = splitFirstLine?.find((line) => line?.includes("$"));
	const employmentType = salary ? splitFirstLine?.[1] : splitFirstLine?.[0];
	const level = salary ? splitFirstLine?.[2] : splitFirstLine?.[1];

	// const [companySize, companyIndustry] = details?.[1]?.textContent?.split("·");
	const splitCrap = details?.[1]?.textContent?.split("·");
	const companySize = splitCrap?.[0];
	const companyIndustry = splitCrap?.[1];

	const description = document.querySelector(
	".jobs-description-content__text"
	).textContent;

	return {
	title: title?.textContent?.trim() \|\| "",
	company: company?.trim() \|\| "",
	link: `https://www.linkedin.com${link?.getAttribute("href")}`,
	jobLocation: jobLocation?.trim() \|\| "",
	companyLink: companyLink \|\| "",
	posted: getRidOfUnnecessaryLinesAndSpaces(posted?.trim()) \|\| "",
	applicants: applicants?.trim() \|\| "",
	salary: salary?.trim() \|\| "",
	employmentType: employmentType?.trim() \|\| "",
	level: level?.trim() \|\| "",
	companySize: companySize?.trim() \|\| "",
	companyIndustry: companyIndustry?.trim() \|\| "",
	description: description?.trim() \|\| "",
	};
	}

	async function scrapeLinkedinJobs() {
	const allJobs = [];
	let i = 0;
	// TODO: important that they start this on page 1
	let currentPage = 1;
	let hasNextPage = true;

	while (hasNextPage) {
	console.log(
	`If you need jobs scraped, or any other data, email me: adrian@thewebscrapingguy.com`
	);
	// wait for the page to load
	await new Promise((resolve, reject) => {
	setTimeout(() => {
	resolve();
	}, 3000);
	});
	await scrollDown();

	// collect the job data
	const jobDivs = document.querySelectorAll(".job-card-container");
	// click on all the jobDivs
	for (let index = 0; index < jobDivs.length; index++) {
	const jobDiv = jobDivs[index];
	jobDiv.click();
	await new Promise((resolve, reject) => setTimeout(resolve, 1000));
	const job = jsonify(jobDiv);
	allJobs.push(job);
	}

	// get the pages to paginate
	const pages = document.querySelectorAll(
	".artdeco-pagination__pages--number li button"
	);
	// get the attr data-test-pagination-page-btn
	for (i = 0; i < pages.length; i++) {
	const page = pages[i];
	const pageNumber = page.getAttribute("aria-label").replace(/\D/g, "");
	// if the page number is equal to the current page + 1, then click it
	if (Number(pageNumber) === currentPage + 1) {
	console.log("advance to next page");
	hasNextPage = true;
	currentPage++;
	page.click();
	break;
	}
	}
	// if the current page is the last page, then stop
	if (i === pages.length) {
	hasNextPage = false;
	}
	}

	console.log("allJobs", allJobs);

	createCSV(allJobs, `linkedinJobs-${new Date().getTime()}.csv`);

	console.log(`Congrats! 🎉 You scraped ${allJobs.length} jobs!`);
	console.log(
	`If you need jobs scraped, or any other data, email me: adrian@thewebscrapingguy.com`
	);
	}
	await scrapeLinkedinJobs();