Skip to content

Instantly share code, notes, and snippets.

@BolajiOlajide
Created December 7, 2017 01:10
Show Gist options
  • Save BolajiOlajide/ba218772f9df19320776da13e8e10355 to your computer and use it in GitHub Desktop.
Save BolajiOlajide/ba218772f9df19320776da13e8e10355 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from bs4 import BeautifulSoup\n",
"import requests"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"url = requests.get('https://bolajiolajide.github.io')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"source = BeautifulSoup(url.text, 'html.parser')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"post_feed = source.find('div', class_=\"post-feed\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"posts = post_feed.find_all('article', class_=\"post-card\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"single_post = posts[0]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"title = single_post.find('h2', class_='post-card-title')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"u'Recursion For Beginners'"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"title.text"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"excerpt = single_post.find('section', class_=\"post-card-excerpt\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"u'What is recursion? Recursion is a method where the solution to a problem depends on solutions to smaller instances of the same problem (as opposed to iteration). The approach can be applied to'"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"excerpt.p.text"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"author = single_post.find('span', class_=\"post-card-author\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"u'Bolaji Olajide'"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"author.a.text"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"def get_post_details(article):\n",
" title = article.find('h2', class_='post-card-title').text\n",
" excerpt = article.find('section', class_='post-card-excerpt').p.text\n",
" author = article.find('span', class_='post-card-author').a.text\n",
" return {\n",
" \"title\": title,\n",
" \"excerpt\": excerpt,\n",
" \"author\": author\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"# Making use of list comprehension instead of loops\n",
"blog_posts = [get_post_details(post) for post in posts ];"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'author': u'Bolaji Olajide',\n",
" 'excerpt': u'What is recursion? Recursion is a method where the solution to a problem depends on solutions to smaller instances of the same problem (as opposed to iteration). The approach can be applied to',\n",
" 'title': u'Recursion For Beginners'},\n",
" {'author': u'Bolaji Olajide',\n",
" 'excerpt': u'One of the amazing things about the Python language is the use of decorators to alter functionality. Decorators are used to extend functions without actually modifying them directly.',\n",
" 'title': u'Creating a Simple Python Decorator'},\n",
" {'author': u'Bolaji Olajide',\n",
" 'excerpt': u'Learning is the process of acquiring new or modifying existing knowledge, behaviors, skills, values, or preferences.',\n",
" 'title': u'The Art of Learning\\u200a\\u2014\\u200aMy Learning Process'}]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"blog_posts"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"blog_posts = []\n",
"for post in posts:\n",
" blog_posts.append(get_post_details(post))"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'author': u'Bolaji Olajide',\n",
" 'excerpt': u'What is recursion? Recursion is a method where the solution to a problem depends on solutions to smaller instances of the same problem (as opposed to iteration). The approach can be applied to',\n",
" 'title': u'Recursion For Beginners'},\n",
" {'author': u'Bolaji Olajide',\n",
" 'excerpt': u'One of the amazing things about the Python language is the use of decorators to alter functionality. Decorators are used to extend functions without actually modifying them directly.',\n",
" 'title': u'Creating a Simple Python Decorator'},\n",
" {'author': u'Bolaji Olajide',\n",
" 'excerpt': u'Learning is the process of acquiring new or modifying existing knowledge, behaviors, skills, values, or preferences.',\n",
" 'title': u'The Art of Learning\\u200a\\u2014\\u200aMy Learning Process'}]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"blog_posts"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment