Skip to content

Instantly share code, notes, and snippets.

@Proteusiq
Created May 1, 2021 07:15
Show Gist options
  • Save Proteusiq/17ef30f8e231f221ef0200642378e0d9 to your computer and use it in GitHub Desktop.
Save Proteusiq/17ef30f8e231f221ef0200642378e0d9 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import httpx\n",
"from bs4 import BeautifulSoup\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Ethical web scraping\n",
"# Read Terms of Use and robots.txt. \n",
"# e.g. Goodreads `Use of Our Service` <https://www.goodreads.com/about/terms>\n",
"# https://www.goodreads.com/robots.txt\n",
"# Identify yourself in user-age and how webmaster can reach-out\n",
"# Repect web traffic by slowing down requests\n",
"\n",
"URI = 'https://www.goodreads.com/review/list/117367560'\n",
"params = {'shelf': 'to-read',\n",
" 'per_page': 100}\n",
"\n",
"headers = {'user-agent': 'Prayson W. Daniel: email: prayson***@****.com'}\n",
"with httpx.Client(headers=headers) as client:\n",
" r = client.get(URI, params=params)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"soup = BeautifulSoup(r.text, 'lxml')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Using select [CSS selector]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'/review/list/117367560?page=2&shelf=to-read'"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Pagination\n",
"# all elements tagged [a] that have an attribute [href] with values containg word page\n",
"# and href ends with \"shelf=to-read\"\n",
"next_page, = [a.get('href') for a in soup.select('a[href*=\"page=\"] + a[href$=\"shelf=to-read\"]')]\n",
"next_page"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# get Prayson's \"Want to Read\" list of books\n",
"# all elements with CSS [table] that have an attribute named [id] with value [books]\n",
"table, = soup.select('table[id=books]')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# all elements with CSS class attribute named with value [bookalike]\n",
"books = table.select('.bookalike') # class=\"bookalike\" => .bookalike\n",
" # id=\"rates\" => #rates"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# limit to fist hit, return the value of title from attribute a\n",
"titles = [book.select('td[class=\"field title\"]', limit=1)[0].a['title'] \n",
" for book in books\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# select all <a> with href that starts with /author\n",
"authors = [book.select('a[href^=\"/author\"]', limit=1)[0].text\n",
" for book in books\n",
"]\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# select all [span] that are directly within an element named [div], \n",
"# but not [span] with title starting with \"Goodreads\"\n",
"added_dates = [book.select('div > span[title]:not(span[title^=\"Goodreads\"])')[0].get('title')\n",
" for book in books\n",
" ]\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"field_names = [\"num_pages\", \"avg_rating\", \"num_ratings\", \"date_pub_edition\"]\n",
"\n",
"# I know! An overkill usage of list comprehesion :(\n",
"multiple_data = [[(book\n",
" .select(f'td[class=\"field {field_name}\"]')[0]\n",
" .get_text(strip=True, separator=\" \")\n",
" .replace(f\"{field_name.replace('_', ' ')}\", \"\")\n",
" .strip()\n",
" ) for field_name in field_names\n",
" ] for book in books]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# Data cleaning: Hail Pandas!\n",
"\n",
"dataf = pd.DataFrame(zip(titles, authors, added_dates, multiple_data),\n",
" columns=['title', 'author', 'added_date', 'multiple_data']\n",
" )\n",
"\n",
"dataf =(pd.merge(\n",
" dataf, (dataf\n",
" .get('multiple_data')\n",
" .apply(pd.Series)\n",
" .rename(columns={index:value for index, value in enumerate(field_names)})\n",
" ),\n",
" left_index=True,\n",
" right_index=True)\n",
" .drop(columns='multiple_data')\n",
" .assign(num_pages=lambda d: d['num_pages'].str[:-3],\n",
" num_ratings=lambda d: d['num_ratings'].str.replace(',',''))\n",
" .astype({\n",
" 'author':'category',\n",
" 'added_date': 'datetime64[ns]',\n",
" 'num_pages':'int32',\n",
" 'avg_rating': 'float32',\n",
" 'num_ratings': 'int64',\n",
" 'date_pub_edition': 'datetime64[ns]'\n",
" })\n",
")\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>title</th>\n",
" <th>author</th>\n",
" <th>added_date</th>\n",
" <th>num_pages</th>\n",
" <th>avg_rating</th>\n",
" <th>num_ratings</th>\n",
" <th>date_pub_edition</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>The Little Book of Talent: 52 Tips for Improving Your Skills</td>\n",
" <td>Coyle, Daniel</td>\n",
" <td>2021-03-13</td>\n",
" <td>160</td>\n",
" <td>4.00</td>\n",
" <td>7104</td>\n",
" <td>2012-08-21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>The Talent Code: Unlocking the Secret of Skill in Sports, Art, Music, Math, and Just About Everything Else</td>\n",
" <td>Coyle, Daniel</td>\n",
" <td>2021-03-13</td>\n",
" <td>288</td>\n",
" <td>4.05</td>\n",
" <td>20789</td>\n",
" <td>2009-04-28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Culture Code the Exp</td>\n",
" <td>Coyle, Daniel</td>\n",
" <td>2021-03-13</td>\n",
" <td>280</td>\n",
" <td>4.31</td>\n",
" <td>17010</td>\n",
" <td>2018-07-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Misbehaving: The Making of Behavioral Economics</td>\n",
" <td>Thaler, Richard H.</td>\n",
" <td>2021-02-19</td>\n",
" <td>358</td>\n",
" <td>4.19</td>\n",
" <td>15928</td>\n",
" <td>2016-06-14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Weapons of Math Destruction: How Big Data Increases Inequality and Threatens Democracy</td>\n",
" <td>O'Neil, Cathy</td>\n",
" <td>2021-02-09</td>\n",
" <td>259</td>\n",
" <td>3.88</td>\n",
" <td>18107</td>\n",
" <td>2016-09-06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>The Hard Thing About Hard Things: Building a Business When There Are No Easy Answers</td>\n",
" <td>Horowitz, Ben</td>\n",
" <td>2020-11-27</td>\n",
" <td>304</td>\n",
" <td>4.24</td>\n",
" <td>64435</td>\n",
" <td>2014-03-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>How Innovation Works: Serendipity, Energy and the Saving of Time</td>\n",
" <td>Ridley, Matt</td>\n",
" <td>2020-11-27</td>\n",
" <td>416</td>\n",
" <td>4.16</td>\n",
" <td>1224</td>\n",
" <td>2020-05-19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Say What You Mean: A Mindful Approach to Nonviolent Communication</td>\n",
" <td>Sofer, Oren Jay</td>\n",
" <td>2020-11-27</td>\n",
" <td>272</td>\n",
" <td>4.24</td>\n",
" <td>908</td>\n",
" <td>2018-12-11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Creativity: Flow and the Psychology of Discovery and Invention</td>\n",
" <td>Csikszentmihalyi, Mihaly</td>\n",
" <td>2020-11-27</td>\n",
" <td>466</td>\n",
" <td>4.08</td>\n",
" <td>4963</td>\n",
" <td>2009-10-13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>How to Feed a Dictator: Saddam Hussein, Idi Amin, Enver Hoxha, Fidel Castro, and Pol Pot Through the Eyes of Their Cooks</td>\n",
" <td>Szabłowski, Witold</td>\n",
" <td>2020-11-27</td>\n",
" <td>288</td>\n",
" <td>4.19</td>\n",
" <td>1366</td>\n",
" <td>2020-04-28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Win Bigly: Persuasion in a World Where Facts Don't Matter</td>\n",
" <td>Adams, Scott</td>\n",
" <td>2020-10-30</td>\n",
" <td>304</td>\n",
" <td>3.74</td>\n",
" <td>4236</td>\n",
" <td>2017-11-16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Impossible to Ignore: Creating Memorable Content to Influence Decisions</td>\n",
" <td>Simon, Carmen</td>\n",
" <td>2020-10-30</td>\n",
" <td>288</td>\n",
" <td>3.85</td>\n",
" <td>359</td>\n",
" <td>2016-05-11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>The Art of Persuasion: Winning Without Intimidation</td>\n",
" <td>Burg, Bob</td>\n",
" <td>2020-10-30</td>\n",
" <td>240</td>\n",
" <td>4.08</td>\n",
" <td>465</td>\n",
" <td>2011-09-20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Scaling Up: How a Few Companies Make It...and Why the Rest Don't (Rockefeller Habits 2.0)</td>\n",
" <td>Harnish, Verne</td>\n",
" <td>2020-10-30</td>\n",
" <td>246</td>\n",
" <td>4.21</td>\n",
" <td>3639</td>\n",
" <td>2014-10-21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Fanatical Prospecting: The Ultimate Guide to Opening Sales Conversations and Filling the Pipeline by Leveraging Social Selling, Telephone, Email, Text, and Cold Calling</td>\n",
" <td>Blount, Jeb</td>\n",
" <td>2020-10-30</td>\n",
" <td>304</td>\n",
" <td>4.30</td>\n",
" <td>2996</td>\n",
" <td>2015-10-05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>Principles: Life and Work</td>\n",
" <td>Dalio, Ray</td>\n",
" <td>2020-10-30</td>\n",
" <td>592</td>\n",
" <td>4.10</td>\n",
" <td>39262</td>\n",
" <td>2017-09-19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Pitch Anything: An Innovative Method for Presenting, Persuading, and Winning the Deal</td>\n",
" <td>Klaff, Oren</td>\n",
" <td>2020-10-30</td>\n",
" <td>225</td>\n",
" <td>4.08</td>\n",
" <td>8815</td>\n",
" <td>2011-02-16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Way of the Wolf: Become a Master Closer with Straight Line Selling</td>\n",
" <td>Belfort, Jordan</td>\n",
" <td>2020-10-30</td>\n",
" <td>240</td>\n",
" <td>4.05</td>\n",
" <td>4570</td>\n",
" <td>2017-09-26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>Exactly What to Say: The Magic Words for Influence and Impact</td>\n",
" <td>Jones, Phil M.</td>\n",
" <td>2020-10-30</td>\n",
" <td>148</td>\n",
" <td>3.74</td>\n",
" <td>4136</td>\n",
" <td>2017-07-26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Essentialism: The Disciplined Pursuit of Less</td>\n",
" <td>McKeown, Greg</td>\n",
" <td>2020-10-30</td>\n",
" <td>260</td>\n",
" <td>4.03</td>\n",
" <td>66505</td>\n",
" <td>2014-04-15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>Never Eat Alone: And Other Secrets to Success, One Relationship at a Time</td>\n",
" <td>Ferrazzi, Keith</td>\n",
" <td>2020-10-30</td>\n",
" <td>309</td>\n",
" <td>3.85</td>\n",
" <td>43404</td>\n",
" <td>2005-02-22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>Made to Stick: Why Some Ideas Survive and Others Die</td>\n",
" <td>Heath, Chip</td>\n",
" <td>2020-10-22</td>\n",
" <td>291</td>\n",
" <td>3.96</td>\n",
" <td>79826</td>\n",
" <td>2007-01-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>Atomic Habits: An Easy &amp; Proven Way to Build Good Habits &amp; Break Bad Ones</td>\n",
" <td>Clear, James</td>\n",
" <td>2020-10-06</td>\n",
" <td>319</td>\n",
" <td>4.36</td>\n",
" <td>197725</td>\n",
" <td>2018-10-16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>Built to Last: Successful Habits of Visionary Companies</td>\n",
" <td>Collins, James C.</td>\n",
" <td>2020-09-17</td>\n",
" <td>368</td>\n",
" <td>4.02</td>\n",
" <td>63281</td>\n",
" <td>2004-11-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>TED Talks: The Official TED Guide to Public Speaking</td>\n",
" <td>Anderson, Chris J.</td>\n",
" <td>2020-09-17</td>\n",
" <td>288</td>\n",
" <td>4.07</td>\n",
" <td>10901</td>\n",
" <td>2016-05-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>Brands and Bulls**t: Excel at the Former and Avoid the Latter. A Branding Primer for Millennial Marketers in a Digital Age.</td>\n",
" <td>Schroeder, Bernhard</td>\n",
" <td>2020-09-11</td>\n",
" <td>180</td>\n",
" <td>3.91</td>\n",
" <td>78</td>\n",
" <td>2017-10-30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>Fail Fast or Win Big: The Start-Up Plan for Starting Now</td>\n",
" <td>Schroeder, Bernhard</td>\n",
" <td>2020-09-11</td>\n",
" <td>208</td>\n",
" <td>3.88</td>\n",
" <td>81</td>\n",
" <td>2015-02-18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>Scrum: The Art of Doing Twice the Work in Half the Time</td>\n",
" <td>Sutherland, Jeff</td>\n",
" <td>2020-09-07</td>\n",
" <td>237</td>\n",
" <td>4.18</td>\n",
" <td>16332</td>\n",
" <td>2014-09-30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>Crossing the Chasm: Marketing and Selling High-Tech Products to Mainstream Customers</td>\n",
" <td>Moore, Geoffrey A.</td>\n",
" <td>2020-09-07</td>\n",
" <td>211</td>\n",
" <td>4.00</td>\n",
" <td>24252</td>\n",
" <td>2006-07-25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>The Last Girl: My Story of Captivity, and My Fight Against the Islamic State</td>\n",
" <td>Murad, Nadia</td>\n",
" <td>2020-08-23</td>\n",
" <td>12</td>\n",
" <td>4.46</td>\n",
" <td>16513</td>\n",
" <td>2017-11-07</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" title \\\n",
"0 The Little Book of Talent: 52 Tips for Improving Your Skills \n",
"1 The Talent Code: Unlocking the Secret of Skill in Sports, Art, Music, Math, and Just About Everything Else \n",
"2 Culture Code the Exp \n",
"3 Misbehaving: The Making of Behavioral Economics \n",
"4 Weapons of Math Destruction: How Big Data Increases Inequality and Threatens Democracy \n",
"5 The Hard Thing About Hard Things: Building a Business When There Are No Easy Answers \n",
"6 How Innovation Works: Serendipity, Energy and the Saving of Time \n",
"7 Say What You Mean: A Mindful Approach to Nonviolent Communication \n",
"8 Creativity: Flow and the Psychology of Discovery and Invention \n",
"9 How to Feed a Dictator: Saddam Hussein, Idi Amin, Enver Hoxha, Fidel Castro, and Pol Pot Through the Eyes of Their Cooks \n",
"10 Win Bigly: Persuasion in a World Where Facts Don't Matter \n",
"11 Impossible to Ignore: Creating Memorable Content to Influence Decisions \n",
"12 The Art of Persuasion: Winning Without Intimidation \n",
"13 Scaling Up: How a Few Companies Make It...and Why the Rest Don't (Rockefeller Habits 2.0) \n",
"14 Fanatical Prospecting: The Ultimate Guide to Opening Sales Conversations and Filling the Pipeline by Leveraging Social Selling, Telephone, Email, Text, and Cold Calling \n",
"15 Principles: Life and Work \n",
"16 Pitch Anything: An Innovative Method for Presenting, Persuading, and Winning the Deal \n",
"17 Way of the Wolf: Become a Master Closer with Straight Line Selling \n",
"18 Exactly What to Say: The Magic Words for Influence and Impact \n",
"19 Essentialism: The Disciplined Pursuit of Less \n",
"20 Never Eat Alone: And Other Secrets to Success, One Relationship at a Time \n",
"21 Made to Stick: Why Some Ideas Survive and Others Die \n",
"22 Atomic Habits: An Easy & Proven Way to Build Good Habits & Break Bad Ones \n",
"23 Built to Last: Successful Habits of Visionary Companies \n",
"24 TED Talks: The Official TED Guide to Public Speaking \n",
"25 Brands and Bulls**t: Excel at the Former and Avoid the Latter. A Branding Primer for Millennial Marketers in a Digital Age. \n",
"26 Fail Fast or Win Big: The Start-Up Plan for Starting Now \n",
"27 Scrum: The Art of Doing Twice the Work in Half the Time \n",
"28 Crossing the Chasm: Marketing and Selling High-Tech Products to Mainstream Customers \n",
"29 The Last Girl: My Story of Captivity, and My Fight Against the Islamic State \n",
"\n",
" author added_date num_pages avg_rating num_ratings \\\n",
"0 Coyle, Daniel 2021-03-13 160 4.00 7104 \n",
"1 Coyle, Daniel 2021-03-13 288 4.05 20789 \n",
"2 Coyle, Daniel 2021-03-13 280 4.31 17010 \n",
"3 Thaler, Richard H. 2021-02-19 358 4.19 15928 \n",
"4 O'Neil, Cathy 2021-02-09 259 3.88 18107 \n",
"5 Horowitz, Ben 2020-11-27 304 4.24 64435 \n",
"6 Ridley, Matt 2020-11-27 416 4.16 1224 \n",
"7 Sofer, Oren Jay 2020-11-27 272 4.24 908 \n",
"8 Csikszentmihalyi, Mihaly 2020-11-27 466 4.08 4963 \n",
"9 Szabłowski, Witold 2020-11-27 288 4.19 1366 \n",
"10 Adams, Scott 2020-10-30 304 3.74 4236 \n",
"11 Simon, Carmen 2020-10-30 288 3.85 359 \n",
"12 Burg, Bob 2020-10-30 240 4.08 465 \n",
"13 Harnish, Verne 2020-10-30 246 4.21 3639 \n",
"14 Blount, Jeb 2020-10-30 304 4.30 2996 \n",
"15 Dalio, Ray 2020-10-30 592 4.10 39262 \n",
"16 Klaff, Oren 2020-10-30 225 4.08 8815 \n",
"17 Belfort, Jordan 2020-10-30 240 4.05 4570 \n",
"18 Jones, Phil M. 2020-10-30 148 3.74 4136 \n",
"19 McKeown, Greg 2020-10-30 260 4.03 66505 \n",
"20 Ferrazzi, Keith 2020-10-30 309 3.85 43404 \n",
"21 Heath, Chip 2020-10-22 291 3.96 79826 \n",
"22 Clear, James 2020-10-06 319 4.36 197725 \n",
"23 Collins, James C. 2020-09-17 368 4.02 63281 \n",
"24 Anderson, Chris J. 2020-09-17 288 4.07 10901 \n",
"25 Schroeder, Bernhard 2020-09-11 180 3.91 78 \n",
"26 Schroeder, Bernhard 2020-09-11 208 3.88 81 \n",
"27 Sutherland, Jeff 2020-09-07 237 4.18 16332 \n",
"28 Moore, Geoffrey A. 2020-09-07 211 4.00 24252 \n",
"29 Murad, Nadia 2020-08-23 12 4.46 16513 \n",
"\n",
" date_pub_edition \n",
"0 2012-08-21 \n",
"1 2009-04-28 \n",
"2 2018-07-01 \n",
"3 2016-06-14 \n",
"4 2016-09-06 \n",
"5 2014-03-04 \n",
"6 2020-05-19 \n",
"7 2018-12-11 \n",
"8 2009-10-13 \n",
"9 2020-04-28 \n",
"10 2017-11-16 \n",
"11 2016-05-11 \n",
"12 2011-09-20 \n",
"13 2014-10-21 \n",
"14 2015-10-05 \n",
"15 2017-09-19 \n",
"16 2011-02-16 \n",
"17 2017-09-26 \n",
"18 2017-07-26 \n",
"19 2014-04-15 \n",
"20 2005-02-22 \n",
"21 2007-01-02 \n",
"22 2018-10-16 \n",
"23 2004-11-02 \n",
"24 2016-05-03 \n",
"25 2017-10-30 \n",
"26 2015-02-18 \n",
"27 2014-09-30 \n",
"28 2006-07-25 \n",
"29 2017-11-07 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"with pd.option_context('display.max_colwidth', -1):\n",
" display(dataf)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Scrapper",
"language": "python",
"name": "scrapper"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment