Skip to content

Instantly share code, notes, and snippets.

@adrianhajdin
Created September 30, 2023 10:15
Show Gist options
  • Save adrianhajdin/686326bc20e24810128637a9053c49a0 to your computer and use it in GitHub Desktop.
Save adrianhajdin/686326bc20e24810128637a9053c49a0 to your computer and use it in GitHub Desktop.
Web Scraping Full Course 2023 | Build and Deploy eCommerce Price Tracker
import { NextResponse } from "next/server";
import { getLowestPrice, getHighestPrice, getAveragePrice, getEmailNotifType } from "@/lib/utils";
import { connectToDB } from "@/lib/mongoose";
import Product from "@/lib/models/product.model";
import { scrapeAmazonProduct } from "@/lib/scraper";
import { generateEmailBody, sendEmail } from "@/lib/nodemailer";
export const maxDuration = 300; // This function can run for a maximum of 300 seconds
export const dynamic = "force-dynamic";
export const revalidate = 0;
export async function GET(request: Request) {
try {
connectToDB();
const products = await Product.find({});
if (!products) throw new Error("No product fetched");
// ======================== 1 SCRAPE LATEST PRODUCT DETAILS & UPDATE DB
const updatedProducts = await Promise.all(
products.map(async (currentProduct) => {
// Scrape product
const scrapedProduct = await scrapeAmazonProduct(currentProduct.url);
if (!scrapedProduct) return;
const updatedPriceHistory = [
...currentProduct.priceHistory,
{
price: scrapedProduct.currentPrice,
},
];
const product = {
...scrapedProduct,
priceHistory: updatedPriceHistory,
lowestPrice: getLowestPrice(updatedPriceHistory),
highestPrice: getHighestPrice(updatedPriceHistory),
averagePrice: getAveragePrice(updatedPriceHistory),
};
// Update Products in DB
const updatedProduct = await Product.findOneAndUpdate(
{
url: product.url,
},
product
);
// ======================== 2 CHECK EACH PRODUCT'S STATUS & SEND EMAIL ACCORDINGLY
const emailNotifType = getEmailNotifType(
scrapedProduct,
currentProduct
);
if (emailNotifType && updatedProduct.users.length > 0) {
const productInfo = {
title: updatedProduct.title,
url: updatedProduct.url,
};
// Construct emailContent
const emailContent = await generateEmailBody(productInfo, emailNotifType);
// Get array of user emails
const userEmails = updatedProduct.users.map((user: any) => user.email);
// Send email notification
await sendEmail(emailContent, userEmails);
}
return updatedProduct;
})
);
return NextResponse.json({
message: "Ok",
data: updatedProducts,
});
} catch (error: any) {
throw new Error(`Failed to get all products: ${error.message}`);
}
}
export async function generateEmailBody(
product: EmailProductInfo,
type: NotificationType
) {
const THRESHOLD_PERCENTAGE = 40;
// Shorten the product title
const shortenedTitle =
product.title.length > 20
? `${product.title.substring(0, 20)}...`
: product.title;
let subject = "";
let body = "";
switch (type) {
case Notification.WELCOME:
subject = `Welcome to Price Tracking for ${shortenedTitle}`;
body = `
<div>
<h2>Welcome to PriceWise 🚀</h2>
<p>You are now tracking ${product.title}.</p>
<p>Here's an example of how you'll receive updates:</p>
<div style="border: 1px solid #ccc; padding: 10px; background-color: #f8f8f8;">
<h3>${product.title} is back in stock!</h3>
<p>We're excited to let you know that ${product.title} is now back in stock.</p>
<p>Don't miss out - <a href="${product.url}" target="_blank" rel="noopener noreferrer">buy it now</a>!</p>
<img src="https://i.ibb.co/pwFBRMC/Screenshot-2023-09-26-at-1-47-50-AM.png" alt="Product Image" style="max-width: 100%;" />
</div>
<p>Stay tuned for more updates on ${product.title} and other products you're tracking.</p>
</div>
`;
break;
case Notification.CHANGE_OF_STOCK:
subject = `${shortenedTitle} is now back in stock!`;
body = `
<div>
<h4>Hey, ${product.title} is now restocked! Grab yours before they run out again!</h4>
<p>See the product <a href="${product.url}" target="_blank" rel="noopener noreferrer">here</a>.</p>
</div>
`;
break;
case Notification.LOWEST_PRICE:
subject = `Lowest Price Alert for ${shortenedTitle}`;
body = `
<div>
<h4>Hey, ${product.title} has reached its lowest price ever!!</h4>
<p>Grab the product <a href="${product.url}" target="_blank" rel="noopener noreferrer">here</a> now.</p>
</div>
`;
break;
case Notification.THRESHOLD_MET:
subject = `Discount Alert for ${shortenedTitle}`;
body = `
<div>
<h4>Hey, ${product.title} is now available at a discount more than ${THRESHOLD_PERCENTAGE}%!</h4>
<p>Grab it right away from <a href="${product.url}" target="_blank" rel="noopener noreferrer">here</a>.</p>
</div>
`;
break;
default:
throw new Error("Invalid notification type.");
}
return { subject, body };
}
@tailwind base;
@tailwind components;
@tailwind utilities;
* {
margin: 0;
padding: 0;
box-sizing: border-box;
scroll-behavior: smooth;
}
@layer base {
body {
@apply font-inter;
}
}
@layer utilities {
.btn {
@apply py-4 px-4 bg-secondary hover:bg-opacity-70 rounded-[30px] text-white text-lg font-semibold;
}
.head-text {
@apply mt-4 text-6xl leading-[72px] font-bold tracking-[-1.2px] text-gray-900;
}
.section-text {
@apply text-secondary text-[32px] font-semibold;
}
.small-text {
@apply flex gap-2 text-sm font-medium text-primary;
}
.paragraph-text {
@apply text-xl leading-[30px] text-gray-600;
}
.hero-carousel {
@apply relative sm:px-10 py-5 sm:pt-20 pb-5 max-w-[560px] h-[700px] w-full bg-[#F2F4F7] rounded-[30px] sm:mx-auto;
}
.carousel {
@apply flex flex-col-reverse h-[700px];
}
.carousel .control-dots {
@apply static !important;
}
.carousel .control-dots .dot {
@apply w-[10px] h-[10px] bg-[#D9D9D9] rounded-full bottom-0 !important;
}
.carousel .control-dots .dot.selected {
@apply bg-[#475467] !important;
}
.trending-section {
@apply flex flex-col gap-10 px-6 md:px-20 py-24;
}
/* PRODUCT DETAILS PAGE STYLES */
.product-container {
@apply flex flex-col gap-16 flex-wrap px-6 md:px-20 py-24;
}
.product-image {
@apply flex-grow xl:max-w-[50%] max-w-full py-16 border border-[#CDDBFF] rounded-[17px];
}
.product-info {
@apply flex items-center flex-wrap gap-10 py-6 border-y border-y-[#E4E4E4];
}
.product-hearts {
@apply flex items-center gap-2 px-3 py-2 bg-[#FFF0F0] rounded-10;
}
.product-stars {
@apply flex items-center gap-2 px-3 py-2 bg-[#FBF3EA] rounded-[27px];
}
.product-reviews {
@apply flex items-center gap-2 px-3 py-2 bg-white-200 rounded-[27px];
}
/* MODAL */
.dialog-container {
@apply fixed inset-0 z-10 overflow-y-auto bg-black bg-opacity-60;
}
.dialog-content {
@apply p-6 bg-white inline-block w-full max-w-md my-8 overflow-hidden text-left align-middle transition-all transform shadow-xl rounded-2xl;
}
.dialog-head_text {
@apply text-secondary text-lg leading-[24px] font-semibold mt-4;
}
.dialog-input_container {
@apply px-5 py-3 mt-3 flex items-center gap-2 border border-gray-300 rounded-[27px];
}
.dialog-input {
@apply flex-1 pl-1 border-none text-gray-500 text-base focus:outline-none border border-gray-300 rounded-[27px] shadow-xs;
}
.dialog-btn {
@apply px-5 py-3 text-white text-base font-semibold border border-secondary bg-secondary rounded-lg mt-8;
}
/* NAVBAR */
.nav {
@apply flex justify-between items-center px-6 md:px-20 py-4;
}
.nav-logo {
@apply font-spaceGrotesk text-[21px] text-secondary font-bold;
}
/* PRICE INFO */
.price-info_card {
@apply flex-1 min-w-[200px] flex flex-col gap-2 border-l-[3px] rounded-10 bg-white-100 px-5 py-4;
}
/* PRODUCT CARD */
.product-card {
@apply sm:w-[292px] sm:max-w-[292px] w-full flex-1 flex flex-col gap-4 rounded-md;
}
.product-card_img-container {
@apply flex-1 relative flex flex-col gap-5 p-4 rounded-md;
}
.product-card_img {
@apply max-h-[250px] object-contain w-full h-full bg-transparent;
}
.product-title {
@apply text-secondary text-xl leading-6 font-semibold truncate;
}
/* SEARCHBAR INPUT */
.searchbar-input {
@apply flex-1 min-w-[200px] w-full p-3 border border-gray-300 rounded-lg shadow-xs text-base text-gray-500 focus:outline-none;
}
.searchbar-btn {
@apply bg-gray-900 border border-gray-900 rounded-lg shadow-xs px-5 py-3 text-white text-base font-semibold hover:opacity-90 disabled:pointer-events-none disabled:cursor-not-allowed disabled:opacity-40;
}
}
"use server"
import axios from 'axios';
import * as cheerio from 'cheerio';
import { extractCurrency, extractDescription, extractPrice } from '../utils';
export async function scrapeAmazonProduct(url: string) {
if(!url) return;
// BrightData proxy configuration
const username = String(process.env.BRIGHT_DATA_USERNAME);
const password = String(process.env.BRIGHT_DATA_PASSWORD);
const port = 22225;
const session_id = (1000000 * Math.random()) | 0;
const options = {
auth: {
username: `${username}-session-${session_id}`,
password,
},
host: 'brd.superproxy.io',
port,
rejectUnauthorized: false,
}
try {
// Fetch the product page
const response = await axios.get(url, options);
const $ = cheerio.load(response.data);
// Extract the product title
const title = $('#productTitle').text().trim();
const currentPrice = extractPrice(
$('.priceToPay span.a-price-whole'),
$('.a.size.base.a-color-price'),
$('.a-button-selected .a-color-base'),
);
const originalPrice = extractPrice(
$('#priceblock_ourprice'),
$('.a-price.a-text-price span.a-offscreen'),
$('#listPrice'),
$('#priceblock_dealprice'),
$('.a-size-base.a-color-price')
);
const outOfStock = $('#availability span').text().trim().toLowerCase() === 'currently unavailable';
const images =
$('#imgBlkFront').attr('data-a-dynamic-image') ||
$('#landingImage').attr('data-a-dynamic-image') ||
'{}'
const imageUrls = Object.keys(JSON.parse(images));
const currency = extractCurrency($('.a-price-symbol'))
const discountRate = $('.savingsPercentage').text().replace(/[-%]/g, "");
const description = extractDescription($)
// Construct data object with scraped information
const data = {
url,
currency: currency || '$',
image: imageUrls[0],
title,
currentPrice: Number(currentPrice) || Number(originalPrice),
originalPrice: Number(originalPrice) || Number(currentPrice),
priceHistory: [],
discountRate: Number(discountRate),
category: 'category',
reviewsCount:100,
stars: 4.5,
isOutOfStock: outOfStock,
description,
lowestPrice: Number(currentPrice) || Number(originalPrice),
highestPrice: Number(originalPrice) || Number(currentPrice),
averagePrice: Number(currentPrice) || Number(originalPrice),
}
return data;
} catch (error: any) {
console.log(error);
}
}
/** @type {import('next').NextConfig} */
const nextConfig = {
experimental: {
serverActions: true,
serverComponentsExternalPackages: ['mongoose']
},
images: {
domains: ['m.media-amazon.com']
}
}
module.exports = nextConfig
https://drive.google.com/file/d/1v6h993BgYX6axBoIXFbZ9HQAgqbR4PSH/view?usp=sharing
/** @type {import('tailwindcss').Config} */
module.exports = {
content: [
"./pages/**/*.{js,ts,jsx,tsx,mdx}",
"./components/**/*.{js,ts,jsx,tsx,mdx}",
"./app/**/*.{js,ts,jsx,tsx,mdx}",
],
theme: {
extend: {
colors: {
primary: {
DEFAULT: "#E43030",
"orange": "#D48D3B",
"green": "#3E9242"
},
secondary: "#282828",
"gray-200": "#EAECF0",
"gray-300": "D0D5DD",
"gray-500": "#667085",
"gray-600": "#475467",
"gray-700": "#344054",
"gray-900": "#101828",
"white-100": "#F4F4F4",
"white-200": "#EDF0F8",
"black-100": "#3D4258",
"neutral-black": "#23263B",
},
boxShadow: {
xs: "0px 1px 2px 0px rgba(16, 24, 40, 0.05)",
},
maxWidth: {
"10xl": '1440px'
},
fontFamily: {
inter: ['Inter', 'sans-serif'],
spaceGrotesk: ['Space Grotesk', 'sans-serif'],
},
borderRadius: {
10: "10px"
}
},
},
plugins: [],
};
export type PriceHistoryItem = {
price: number;
};
export type User = {
email: string;
};
export type Product = {
_id?: string;
url: string;
currency: string;
image: string;
title: string;
currentPrice: number;
originalPrice: number;
priceHistory: PriceHistoryItem[] | [];
highestPrice: number;
lowestPrice: number;
averagePrice: number;
discountRate: number;
description: string;
category: string;
reviewsCount: number;
stars: number;
isOutOfStock: Boolean;
users?: User[];
};
export type NotificationType =
| "WELCOME"
| "CHANGE_OF_STOCK"
| "LOWEST_PRICE"
| "THRESHOLD_MET";
export type EmailContent = {
subject: string;
body: string;
};
export type EmailProductInfo = {
title: string;
url: string;
};
import { PriceHistoryItem, Product } from "@/types";
const Notification = {
WELCOME: 'WELCOME',
CHANGE_OF_STOCK: 'CHANGE_OF_STOCK',
LOWEST_PRICE: 'LOWEST_PRICE',
THRESHOLD_MET: 'THRESHOLD_MET',
}
const THRESHOLD_PERCENTAGE = 40;
// Extracts and returns the price from a list of possible elements.
export function extractPrice(...elements: any) {
for (const element of elements) {
const priceText = element.text().trim();
if(priceText) {
const cleanPrice = priceText.replace(/[^\d.]/g, '');
let firstPrice;
if (cleanPrice) {
firstPrice = cleanPrice.match(/\d+\.\d{2}/)?.[0];
}
return firstPrice || cleanPrice;
}
}
return '';
}
// Extracts and returns the currency symbol from an element.
export function extractCurrency(element: any) {
const currencyText = element.text().trim().slice(0, 1);
return currencyText ? currencyText : "";
}
// Extracts description from two possible elements from amazon
export function extractDescription($: any) {
// these are possible elements holding description of the product
const selectors = [
".a-unordered-list .a-list-item",
".a-expander-content p",
// Add more selectors here if needed
];
for (const selector of selectors) {
const elements = $(selector);
if (elements.length > 0) {
const textContent = elements
.map((_: any, element: any) => $(element).text().trim())
.get()
.join("\n");
return textContent;
}
}
// If no matching elements were found, return an empty string
return "";
}
export function getHighestPrice(priceList: PriceHistoryItem[]) {
let highestPrice = priceList[0];
for (let i = 0; i < priceList.length; i++) {
if (priceList[i].price > highestPrice.price) {
highestPrice = priceList[i];
}
}
return highestPrice.price;
}
export function getLowestPrice(priceList: PriceHistoryItem[]) {
let lowestPrice = priceList[0];
for (let i = 0; i < priceList.length; i++) {
if (priceList[i].price < lowestPrice.price) {
lowestPrice = priceList[i];
}
}
return lowestPrice.price;
}
export function getAveragePrice(priceList: PriceHistoryItem[]) {
const sumOfPrices = priceList.reduce((acc, curr) => acc + curr.price, 0);
const averagePrice = sumOfPrices / priceList.length || 0;
return averagePrice;
}
export const getEmailNotifType = (
scrapedProduct: Product,
currentProduct: Product
) => {
const lowestPrice = getLowestPrice(currentProduct.priceHistory);
if (scrapedProduct.currentPrice < lowestPrice) {
return Notification.LOWEST_PRICE as keyof typeof Notification;
}
if (!scrapedProduct.isOutOfStock && currentProduct.isOutOfStock) {
return Notification.CHANGE_OF_STOCK as keyof typeof Notification;
}
if (scrapedProduct.discountRate >= THRESHOLD_PERCENTAGE) {
return Notification.THRESHOLD_MET as keyof typeof Notification;
}
return null;
};
export const formatNumber = (num: number = 0) => {
return num.toLocaleString(undefined, {
minimumFractionDigits: 0,
maximumFractionDigits: 0,
});
};
@viibhuGupta
Copy link

viibhuGupta commented May 19, 2024

Hy Guys i am facing the problem that In DB Image and Descreption is not storing other than all the details are storing

` product = {
...scrapedProduct,
priceHistory : updatePriceHistory,
lowestPrice : getLowestPrice(updatePriceHistory),
highestPrice : getHighestPrice(updatePriceHistory),
averagePrice : getAveragePrice(updatePriceHistory),

        }
        console.log(product); //  here i am getting all the details `   

**When i Console log this i get all the details about the product **

` const newProduct = await Product.findOneAndUpdate(
{ url : scrapedProduct.url},
product,
{ upsert : true , new : true}
)

       console.log(newProduct); // here i am not getting all details `   

**When i log this i am not getting getting details like Images , Descreption and users details **

This is the Db Images what i am getting

Screenshot_20240519_125924

this is code and repo link

https://github.com/viibhuGupta/eCommerce-Price-Tracker/blob/main/lib/actions/index.ts

@neelp03
Copy link

neelp03 commented May 22, 2024

I was able to extract the description and categories!

My next goal is to get all the images of the product and show it similar to how the carousel is on home page but without the autoplay
Here is my repo if anyone want to see how: https://github.com/neelp03/pricetracker

Image 1 Image 2

FYI headless UI deprecated a lot of the old components.

It easy to transition to the new version but might be a little tricky to find the correct components since a few have been renamed completely. See my repo linked above to see how to implement the track model with the new version!

@shashankxrm
Copy link

MongooseError: Operation products.find()buffering timed out after 10000ms at Timeout.<anonymous> (/vercel/path0/node_modules/mongoose/lib/drivers/node-mongodb-native/collection.js:185:23) at listOnTimeout (node:internal/timers:573:17) at process.processTimers (node:internal/timers:514:7) MongooseError: Operationproducts.find() buffering timed out after 10000ms at Timeout.<anonymous> (/vercel/path0/node_modules/mongoose/lib/drivers/node-mongodb-native/collection.js:185:23) at listOnTimeout (node:internal/timers:573:17) at process.processTimers (node:internal/timers:514:7)

I am getting this error when i try to scrape any product.

@ManuelAlejandroG
Copy link

Anyone having issues with the logos and favicon? It is not displaying properly on VS Code for me....

@nsaicharan
Copy link

cors I am facing this issue and I am using this in India. Does it create problems due to different country? Please anybody suggest the solution, how you successfully did it?

@vivek9124vivek Maybe you missed "use server" in the file where you made axios request. Or you didn't enable serverActions in next.config.js.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment