Skip to content

Instantly share code, notes, and snippets.

@pereayats
Created May 16, 2024 17:03
Show Gist options
  • Save pereayats/ebf4ff427af82434b9296b436642f29b to your computer and use it in GitHub Desktop.
Save pereayats/ebf4ff427af82434b9296b436642f29b to your computer and use it in GitHub Desktop.
const axios = require('axios')
const cheerio = require('cheerio')
const fs = require('fs')
function waitUntil(t) {
return new Promise(r => {
setTimeout(r, t)
})
}
async function getWebsiteContent(url) {
try {
const response = await axios.get(url)
return response.data
}
catch (error) {
return null
}
}
async function scrapeAppsCategories() {
let url = 'https://sasi.unionworks.co.uk/categories';
const categories = []
try {
const html_content = await getWebsiteContent(url)
if (html_content) {
const $ = cheerio.load(html_content)
$('.container .card a').each((i, link) => {
const href = $(link).attr('href');
if (href && href.includes('sasi.unionworks.co.uk/category') && !categories.includes(href)) {
categories.push(href)
}
})
return categories
}
else return categories
}
catch (error) {
return categories
}
}
async function findAppLinksByCategory(url) {
const links = []
try {
let html_content = await getWebsiteContent(url)
if (html_content) {
const $ = cheerio.load(html_content)
let last_page = 1
$('.pagination .page-item a').each((i, element) => {
const page = $(element).html()
if (page && Number(page)) last_page = Number(page)
})
$('small a').each((i, link) => {
const href = $(link).attr('href')
if (href && href.includes('apps.shopify.com') && !links.includes(href)) {
links.push(href)
}
})
if (last_page > 1) {
for (let page = 2; page <= last_page; page++) {
html_content = await getWebsiteContent(`${url}?page=${page}`)
if (html_content) {
const $ = cheerio.load(html_content)
$('small a').each((i, link) => {
const href = $(link).attr('href')
if (href && href.includes('apps.shopify.com') && !links.includes(href)) {
links.push(href)
}
})
}
}
}
return links
}
else return links
}
catch (error) {
return links
}
}
async function getAppInfo(url) {
try {
const html_content = await getWebsiteContent(url)
if (html_content) {
const $ = cheerio.load(html_content)
const info = {}
$('h1').each((i, h1) => {
const app_name = $(h1).html()?.trim()
info._id = url?.split('/')?.pop()?.toLowerCase()
info.name = app_name
info.url = url
})
$('a[href*="/partners/"]').each((i, link) => {
const vendor = $(link).html()?.trim()
const href = $(link).attr('href')
if (!info.vendor_url && !info.vendor_name) {
info.vendor_name = vendor
info.vendor_url = 'https://apps.shopify.com' + href
info.vendor_id = info?.vendor_url?.split('/')?.pop()?.toLowerCase()
}
})
$('img[src*="cdn.shopify.com/app-store/listing_images"]').each((i, img) => {
const src = $(img).attr('src')
if (!info.icon_url && src) info.icon_url = src
})
return info
}
else return null
}
catch (error) {
return null
}
}
const main = (async () => {
const categories = await scrapeAppsCategories()
const apps = []
const output = []
for (let category of categories) {
await waitUntil(1000)
let links = await findAppLinksByCategory(category)
apps.push(... links)
}
for (let app of apps) {
await waitUntil(5000)
let info = await getAppInfo(app)
if (info) output.push(info)
}
fs.writeFileSync('shopify_apps.json', JSON.stringify(output, null, 2))
})()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment