Last active
September 28, 2018 06:30
-
-
Save huytd/ca7b66129026572fcd8d6a6c38c29f80 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import axios from 'axios'; | |
import * as Parser from 'rss-parser'; | |
import * as cheerio from 'cheerio'; | |
const rss = new Parser(); | |
interface Article { | |
title: string; | |
url: string; | |
} | |
interface ParsedArticle { | |
title: string, | |
url: string, | |
desc: string, | |
date: string, | |
quotes: string[] | |
} | |
type Optional<T> = T | null; | |
async function getArticlesUrls(sources: string[]): Promise<Article[]> { | |
const promises = await sources.map(async (src: string) => { | |
const feed = await rss.parseURL(src); | |
return feed.items.map(item => { | |
return { | |
title: item.title, | |
url: item.link | |
}; | |
}); | |
}); | |
const raw = await Promise.all(promises); | |
return raw.reduce((urls: Article[], articles: Article[]) => { | |
return urls.concat(articles); | |
}, []); | |
}; | |
async function parseArticle(article: Article): Promise<Optional<ParsedArticle>> { | |
const response = await axios.get(article.url); | |
const html = response.data; | |
const $ = cheerio.load(html); | |
const quotes = $('span.quote'); | |
if (!quotes.length) return null; | |
const desc = quotes.parent('p').text().replace(/\s\s+/g, ' '); | |
const published = $('.timestamp span').text().trim(); | |
const stocks = quotes.map(function(idx, item){return $(this).text().trim();}).get(); | |
return <ParsedArticle>{ | |
title: article.title, | |
url: article.url, | |
desc: desc, | |
date: published, | |
quotes: stocks | |
}; | |
}; | |
async function parseArticles(articles: Article[]): Promise<ParsedArticle[]> { | |
const promises = articles.map(async (item: Article) => { | |
const article: Optional<ParsedArticle> = await parseArticle(item); | |
return article; | |
}); | |
const result = await Promise.all(promises); | |
return result.reduce((arr: ParsedArticle[], item: Optional<ParsedArticle>) => { | |
if (item) arr.push(item); | |
return arr; | |
}, []); | |
}; | |
(async function() { | |
const sources: Array<string> = [ | |
"http://feeds.marketwatch.com/marketwatch/topstories/", | |
"http://feeds.marketwatch.com/marketwatch/marketpulse/" | |
]; | |
const articles: Article[] = await getArticlesUrls(sources); | |
const result = await parseArticles(articles); | |
console.log(result); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment