Skip to content

Instantly share code, notes, and snippets.

@huytd
Last active September 28, 2018 06:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save huytd/ca7b66129026572fcd8d6a6c38c29f80 to your computer and use it in GitHub Desktop.
Save huytd/ca7b66129026572fcd8d6a6c38c29f80 to your computer and use it in GitHub Desktop.
import axios from 'axios';
import * as Parser from 'rss-parser';
import * as cheerio from 'cheerio';
const rss = new Parser();
interface Article {
title: string;
url: string;
}
interface ParsedArticle {
title: string,
url: string,
desc: string,
date: string,
quotes: string[]
}
type Optional<T> = T | null;
async function getArticlesUrls(sources: string[]): Promise<Article[]> {
const promises = await sources.map(async (src: string) => {
const feed = await rss.parseURL(src);
return feed.items.map(item => {
return {
title: item.title,
url: item.link
};
});
});
const raw = await Promise.all(promises);
return raw.reduce((urls: Article[], articles: Article[]) => {
return urls.concat(articles);
}, []);
};
async function parseArticle(article: Article): Promise<Optional<ParsedArticle>> {
const response = await axios.get(article.url);
const html = response.data;
const $ = cheerio.load(html);
const quotes = $('span.quote');
if (!quotes.length) return null;
const desc = quotes.parent('p').text().replace(/\s\s+/g, ' ');
const published = $('.timestamp span').text().trim();
const stocks = quotes.map(function(idx, item){return $(this).text().trim();}).get();
return <ParsedArticle>{
title: article.title,
url: article.url,
desc: desc,
date: published,
quotes: stocks
};
};
async function parseArticles(articles: Article[]): Promise<ParsedArticle[]> {
const promises = articles.map(async (item: Article) => {
const article: Optional<ParsedArticle> = await parseArticle(item);
return article;
});
const result = await Promise.all(promises);
return result.reduce((arr: ParsedArticle[], item: Optional<ParsedArticle>) => {
if (item) arr.push(item);
return arr;
}, []);
};
(async function() {
const sources: Array<string> = [
"http://feeds.marketwatch.com/marketwatch/topstories/",
"http://feeds.marketwatch.com/marketwatch/marketpulse/"
];
const articles: Article[] = await getArticlesUrls(sources);
const result = await parseArticles(articles);
console.log(result);
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment