Skip to content

Instantly share code, notes, and snippets.

@nlssn
Created January 29, 2024 18:56
Show Gist options
  • Save nlssn/48a28f1c88a5a93052a26d3e4c240703 to your computer and use it in GitHub Desktop.
Save nlssn/48a28f1c88a5a93052a26d3e4c240703 to your computer and use it in GitHub Desktop.
import puppeteer from "puppeteer";
import * as cheerio from "cheerio";
import fs from "fs";
import jsonexport from "jsonexport";
(async () => {
// Launch the browser and open a new blank page
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
// Navigate the page to a URL
await page.goto(
"https://www.batofiske.se/fiskemetoder/trolling-havsfiske/havsfiskebeten/hven-pilk-original-150gr-6239",
{ waitUntil: "networkidle0" }
);
// Get the page HTML
const html = await page.content();
// Load HTML into cheerio
const $ = cheerio.load(html);
let scriptTags = $("script");
let jsonStr = "";
// Ugly code
let filterStuff = scriptTags.filter(function (val, i) {
if ($(this).text().includes("var JetshopData={")) {
let str = $(this).text().trim();
str = str.replace(/(<([^>]+)>)/gi, "");
str = str.replace("var JetshopData=", "");
str = str.slice(0, -1);
jsonStr = str;
}
});
const data = JSON.parse(jsonStr);
const product = createProduct(data);
await saveJsonToCsv([product], "test.csv");
await browser.close();
})();
/**
* Construct a product object
*/
const createProduct = (data) => {
const {
ArticleNumber,
Name,
SubName,
Images,
StockStatus,
Vat,
Price,
PriceIncVat,
DiscountPrice,
DiscountPriceIncVat,
} = data.ProductInfo;
return {
ArticleNumber,
Name,
SubName,
Images: pipedStr(niceImageUrls(Images)),
StockStatus: StockStatus == "I lager" ? true : false,
Vat,
Price,
PriceIncVat,
DiscountPrice,
DiscountPriceIncVat,
};
};
/**
* Construct a variation object
*/
const createVariation = (parentId, data) => {
return {
parentId,
};
};
/**
* Array to pipe separated string
*/
const pipedStr = (array) => {
console.log(array);
return array.join("|");
};
/**
* Add the base URL to the image URL
*/
const niceImageUrls = (urls) => {
return urls.map((url) => {
url = url.split("?timestamp=")[0];
return `https://www.batofiske.se/pub_images/original/${url}`;
});
};
/**
* Convert JSON to CSV & write to file
*/
const saveJsonToCsv = async (jsonObject, csvFilePath) => {
try {
const csvData = await jsonexport(jsonObject, { verticalOutput: false });
fs.writeFileSync(csvFilePath, csvData);
console.log("CSV file saved successfully!");
} catch (error) {
console.error("Error saving CSV file:", error);
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment