Skip to content

Instantly share code, notes, and snippets.

@dennyjohnk
Last active July 12, 2021 18:26
Show Gist options
  • Save dennyjohnk/0d05082f7cef036bca12bd6472829449 to your computer and use it in GitHub Desktop.
Save dennyjohnk/0d05082f7cef036bca12bd6472829449 to your computer and use it in GitHub Desktop.
const cheerio = require("cheerio");
var fs = require("fs");
const fetch = require("node-fetch");
const writeStream = fs.createWriteStream("post.csv");
var csvWriter = require("csv-write-stream");
var writer = csvWriter(); //Instantiate var
var csvFilename = "./post.csv";
writeStream.write(`No , First Name , Last Name, Company, City ,Zip, Phone,Address \n`)
// If CSV file does not exist, create it and add the headers
if (!fs.existsSync(csvFilename)) {
writer = csvWriter();
writer.pipe(fs.createWriteStream(csvFilename));
writer.write({
header1: "SL No",
header2: "First Name",
header3: "Last Name",
header4: "Company",
header5: "City",
header6: "Zip",
header7: "Phone",
header8: "Address",
});
writer.end();
}
fs.readFile(
"../Realtor Search _ Chicago Association of REALTORS®.html",
"utf8",
function (err, html) {
if (err) throw err;
var $ = cheerio.load(html);
$("tr")
.toArray()
.map((item, index) => {
const slNo = index;
const firstName = $(item).eq(0).find("td").eq(1).text();
const lastName = $(item).eq(0).find("td").eq(2).text();
const company = $(item).eq(0).find("td").eq(3).text();
const city = $(item).eq(0).find("td").eq(4).text();
const zip = $(item).eq(0).find("td").eq(5).text();
const phoneNumber = [];
const fullAddress = [];
const address = $(item)
.eq(0)
.find("td")
.eq(7)
.find("a")
.each((i, link) => {
let dataURL = $(link).attr("href");
fetch(dataURL)
.then((resp) => {
return resp.text();
})
.then((html) => {
let $ = cheerio.load(html);
let sampleData = "";
const t1 = $(".agent_address")
.eq(0)
.find("span")
.each((index, elem) => {
let data = $(elem).text();
sampleData += data;
console.log($(elem).text());
});
fullAddress.push(sampleData);
return html;
})
.then((html) => {
let $ = cheerio.load(html);
let number = "";
const t2 = $(".preview-contact-number")
.eq(2)
.find("a")
.each((index, elem) => {
var link = $(elem).attr("href");
number += link + " ";
});
phoneNumber.push(number);
})
.then(() => {
// Append some data to CSV the file
writer = csvWriter({ sendHeaders: false });
writer.pipe(fs.createWriteStream(csvFilename, { flags: "a" }));
writer.write({
header1: slNo,
header2: firstName,
header3: lastName,
header4: company,
header5: city,
header6: zip,
header7: phoneNumber,
header8: fullAddress,
});
writer.end();
})
.catch((err) => {
console.log(err);
});
});
});
}
);
@talentedandrew
Copy link

talentedandrew commented Jul 12, 2021

const cheerio = require("cheerio");
var fs = require("fs");
const fetch = require("node-fetch");
const writeStream = fs.createWriteStream("post.csv");

var csvWriter = require("csv-write-stream");
var writer = csvWriter(); //Instantiate var
var csvFilename = "./post.csv";

writeStream.write(`No , First Name , Last Name, Company, City ,Zip, Phone,Address \n`)

// If CSV file does not exist, create it and add the headers
if (!fs.existsSync(csvFilename)) {
  writer = csvWriter();
  writer.pipe(fs.createWriteStream(csvFilename));
  writer.write({
    header1: "SL No",
    header2: "First Name",
    header3: "Last Name",
    header4: "Company",
    header5: "City",
    header6: "Zip",
    header7: "Phone",
    header8: "Address",
  });
  writer.end();
}

fs.readFile(
  "../Realtor Search _ Chicago Association of REALTORS®.html",
  "utf8",
  async function(err, html) {
    if (err) throw err;
    var $ = cheerio.load(html);
    const rows = $("tr").toArray();

    for (const [index, item] of rows) {
      const slNo = index;
      const firstName = $(item).eq(0).find("td").eq(1).text();
      const lastName = $(item).eq(0).find("td").eq(2).text();
      const company = $(item).eq(0).find("td").eq(3).text();
      const city = $(item).eq(0).find("td").eq(4).text();
      const zip = $(item).eq(0).find("td").eq(5).text();
      const phoneNumber = [];
      const fullAddress = [];

      const address = $(item)
        .eq(0)
        .find("td")
        .eq(7)
        .find("a");

      for await (const [i, link] of address) {
        {

          try {
            let dataURL = $(link).attr("href");
            const html = await fetch(dataURL).then((resp) => {
              return resp.text();
            });


            let $1 = cheerio.load(html);
            let sampleData = "";
            const t1 = $1(".agent_address")
              .eq(0)
              .find("span")
              .each((index, elem) => {
                let data = $1(elem).text();
                sampleData += data;
                console.log($1(elem).text());
              });
            fullAddress.push(sampleData);

            let $2 = cheerio.load(html);
            let number = "";
            const t2 = $2(".preview-contact-number")
              .eq(2)
              .find("a")
              .each((index, elem) => {
                var link = $2(elem).attr("href");
                number += link + " ";
              });
            phoneNumber.push(number);
            writer = csvWriter({
              sendHeaders: false
            });
            writer.pipe(fs.createWriteStream(csvFilename, {
              flags: "a"
            }));
            writer.write({
              header1: slNo,
              header2: firstName,
              header3: lastName,
              header4: company,
              header5: city,
              header6: zip,
              header7: phoneNumber,
              header8: fullAddress,
            });
            writer.end();

          } catch (e) {
            console.log(e)
          }


        }

      }
    }
  }
);

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment