Skip to content

Instantly share code, notes, and snippets.

@micaiahwallace
micaiahwallace / run.ts
Created January 28, 2022 03:22
run.ts
import { fetchCachedBlogPosts } from './coordinators/fetchCachedBlogPosts'
import { scrapeNodeBlog } from './coordinators/scrapeNodeBlog'
import { formatDate } from './logic/formatDate'
import { ioWriteFileJson } from './services/ioWriteFile'
import { log } from './services/log'
import { sendTwilioMessage } from './services/sendTwilioMessage'
// Display title and version
const { version } = require('../package.json')
log(`=== Node News Scraper (${version}) ===`)
@micaiahwallace
micaiahwallace / scrapeNodeBlog.ts
Created January 28, 2022 03:20
scrapeNodeBlog.ts
import { fetchNewBlogPosts } from './fetchNewBlogPosts'
import { watchListAndNotify } from './watchListAndNotify'
import { findNewBlogPosts } from '../logic/findNewBlogPosts'
import { log } from '../services/log'
import { BlogPost } from '../types'
interface ScrapeNodeBlogArgs {
blogUrl: string
maxNotify: number
writePostCache: (posts: BlogPost[]) => Promise<void>
@micaiahwallace
micaiahwallace / watchListAndNotify.ts
Created January 28, 2022 03:18
watchListAndNotify.ts
export interface ListWatchNotifierOptions<Item extends {}> {
fetchOldItems: () => Promise<Item[]>
fetchNewItems: () => Promise<Item[]>
getNotifiableItems: (previous: Item[], next: Item[]) => Item[]
notify: (item: Item) => Promise<void>
log: (...messages: any[]) => void
}
/**
* Watch a list of items from a remote source and notify based on the rules specified
@micaiahwallace
micaiahwallace / findNewBlogPosts.ts
Created January 28, 2022 03:17
findNewBlogPosts.ts
import { BlogPost } from '../types'
export const findNewBlogPosts = (
before: BlogPost[],
after: BlogPost[],
max: number
): BlogPost[] => {
const newPosts: BlogPost[] = []
after.forEach((post) => {
const matchedCachePostByTime = before.find(
@micaiahwallace
micaiahwallace / validateBlogPost.ts
Created January 28, 2022 03:15
validateBlogPost.ts
import { BlogPost } from '../types'
export const getValidatedBlogPost = (post: any): BlogPost | undefined => {
if (typeof post !== 'object') return undefined
if (typeof post.title !== 'string') return undefined
if (typeof post.time !== 'string') return undefined
if (typeof post.summary !== 'string') return undefined
return {
title: post.title,
time: post.time,
@micaiahwallace
micaiahwallace / fetchCachedBlogPosts.ts
Created January 28, 2022 03:11
fetchCachedBlogPosts.ts
import { getValidatedBlogPosts } from '../logic/validateBlogPost'
import { ioGetFileJsonArray } from '../services/ioGetFile'
import { BlogPost } from '../types'
export const fetchCachedBlogPosts = async (
file: string
): Promise<BlogPost[]> => {
const cachedPostArrayRaw = await ioGetFileJsonArray(file)
return getValidatedBlogPosts(cachedPostArrayRaw)
}
@micaiahwallace
micaiahwallace / fetchNewBlogPosts.ts
Created January 28, 2022 03:10
fetchNewBlogPosts.ts
import { extractNodeBlogPosts } from '../logic/extractNodeBlogPosts'
import { extractNodeNextPage } from '../logic/extractNodeNextPage'
import { ioFetchUrlText } from '../services/ioFetchUrlText'
import { BlogPost } from '../types'
export const fetchNewBlogPosts = async (
blogUrl: string
): Promise<BlogPost[]> => {
const html = await ioFetchUrlText(blogUrl)
const posts = extractNodeBlogPosts(html)
@micaiahwallace
micaiahwallace / ioFetchUrlText.ts
Created January 28, 2022 03:09
ioFetchUrlText.ts
import axios from 'axios'
export const ioFetchUrlText = async (url: string): Promise<string> => {
const response = await axios.get(url, {
responseType: 'text',
validateStatus: null,
})
return response.data
}
@micaiahwallace
micaiahwallace / extractNodeNextPage.ts
Created January 28, 2022 03:09
extractNodeNextPage.ts
import Cheerio from 'cheerio'
export const extractNodeNextPage = (html: string): string | undefined => {
const $ = Cheerio.load(html)
let returnLink: string | undefined
$('nav.pagination > a').each((_i, linkEl) => {
if ($(linkEl).text().toLowerCase().includes('older')) {
returnLink = linkEl.attribs.href
}
})
@micaiahwallace
micaiahwallace / extractNodeBlogPosts.ts
Created January 28, 2022 03:07
extractNodeBlogPosts.ts
import Cheerio from 'cheerio'
import { BlogPost } from '../types'
export const extractNodeBlogPosts = (html: string): BlogPost[] => {
const posts: BlogPost[] = []
const $ = Cheerio.load(html)
$('ul.blog-index > li').each((_i, li) => {
const title = $(li).find('> a').eq(0).text()
const time = $(li).find('time').attr('datetime') ?? ''
const summary = $(li).find('div.summary h4').text()