Skip to content

Instantly share code, notes, and snippets.

@anshulkgupta93
Created December 31, 2015 12:21
Show Gist options
  • Save anshulkgupta93/39689db8b337c3ccf247 to your computer and use it in GitHub Desktop.
Save anshulkgupta93/39689db8b337c3ccf247 to your computer and use it in GitHub Desktop.
Web Scraping in R and Python
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<table>\n",
"<thead><tr><th></th><th scope=col>Player</th><th scope=col>Span</th><th scope=col>Mat</th><th scope=col>Inns</th><th scope=col>NO</th><th scope=col>Runs</th><th scope=col>HS</th><th scope=col>Ave</th><th scope=col>100</th><th scope=col>50</th><th scope=col>0</th><th scope=col></th></tr></thead>\n",
"<tbody>\n",
"\t<tr><th scope=row>1</th><td>KC Sangakkara</td><td>2000-2015</td><td>134</td><td>233</td><td>17</td><td>12400</td><td>319</td><td>57.40</td><td>38</td><td>52</td><td>11</td><td></td></tr>\n",
"\t<tr><th scope=row>2</th><td>DPMD Jayawardene</td><td>1997-2014</td><td>149</td><td>252</td><td>15</td><td>11814</td><td>374</td><td>49.84</td><td>34</td><td>50</td><td>15</td><td></td></tr>\n",
"\t<tr><th scope=row>3</th><td>ST Jayasuriya</td><td>1991-2007</td><td>110</td><td>188</td><td>14</td><td>6973</td><td>340</td><td>40.07</td><td>14</td><td>31</td><td>15</td><td></td></tr>\n",
"\t<tr><th scope=row>4</th><td>PA de Silva</td><td>1984-2002</td><td>93</td><td>159</td><td>11</td><td>6361</td><td>267</td><td>42.97</td><td>20</td><td>22</td><td>7</td><td></td></tr>\n",
"\t<tr><th scope=row>5</th><td>MS Atapattu</td><td>1990-2007</td><td>90</td><td>156</td><td>15</td><td>5502</td><td>249</td><td>39.02</td><td>16</td><td>17</td><td>22</td><td></td></tr>\n",
"\t<tr><th scope=row>6</th><td>TM Dilshan</td><td>1999-2013</td><td>87</td><td>145</td><td>11</td><td>5492</td><td>193</td><td>40.98</td><td>16</td><td>23</td><td>14</td><td></td></tr>\n",
"\t<tr><th scope=row>7</th><td>TT Samaraweera</td><td>2001-2013</td><td>81</td><td>132</td><td>20</td><td>5462</td><td>231</td><td>48.76</td><td>14</td><td>30</td><td>11</td><td></td></tr>\n",
"\t<tr><th scope=row>8</th><td>A Ranatunga</td><td>1982-2000</td><td>93</td><td>155</td><td>12</td><td>5105</td><td>135*</td><td>35.69</td><td>4</td><td>38</td><td>12</td><td></td></tr>\n",
"\t<tr><th scope=row>9</th><td>HP Tillakaratne</td><td>1989-2004</td><td>83</td><td>131</td><td>25</td><td>4545</td><td>204*</td><td>42.87</td><td>11</td><td>20</td><td>9</td><td></td></tr>\n",
"\t<tr><th scope=row>10</th><td>AD Mathews</td><td>2009-2015</td><td>56</td><td>97</td><td>17</td><td>4015</td><td>160</td><td>50.18</td><td>7</td><td>23</td><td>1</td><td></td></tr>\n",
"\t<tr><th scope=row>11</th><td>WPUJC Vaas</td><td>1994-2009</td><td>111</td><td>162</td><td>35</td><td>3089</td><td>100*</td><td>24.32</td><td>1</td><td>13</td><td>12</td><td></td></tr>\n",
"\t<tr><th scope=row>12</th><td>RS Mahanama</td><td>1986-1998</td><td>52</td><td>89</td><td>1</td><td>2576</td><td>225</td><td>29.27</td><td>4</td><td>11</td><td>7</td><td></td></tr>\n",
"\t<tr><th scope=row>13</th><td>AP Gurusinha</td><td>1985-1996</td><td>41</td><td>70</td><td>7</td><td>2452</td><td>143</td><td>38.92</td><td>7</td><td>8</td><td>3</td><td></td></tr>\n",
"\t<tr><th scope=row>14</th><td>HAPW Jayawardene</td><td>2000-2015</td><td>58</td><td>83</td><td>11</td><td>2124</td><td>154*</td><td>29.50</td><td>4</td><td>5</td><td>9</td><td></td></tr>\n",
"\t<tr><th scope=row>15</th><td>RS Kaluwitharana</td><td>1992-2004</td><td>49</td><td>78</td><td>4</td><td>1933</td><td>132*</td><td>26.12</td><td>3</td><td>9</td><td>5</td><td></td></tr>\n",
"\t<tr><th scope=row>16</th><td>LD Chandimal</td><td>2011-2015</td><td>25</td><td>44</td><td>4</td><td>1835</td><td>162*</td><td>45.87</td><td>5</td><td>10</td><td>1</td><td></td></tr>\n",
"\t<tr><th scope=row>17</th><td>RP Arnold</td><td>1997-2004</td><td>44</td><td>69</td><td>4</td><td>1821</td><td>123</td><td>28.01</td><td>3</td><td>10</td><td>7</td><td></td></tr>\n",
"\t<tr><th scope=row>18</th><td>NT Paranavitana</td><td>2009-2012</td><td>32</td><td>60</td><td>5</td><td>1792</td><td>111</td><td>32.58</td><td>2</td><td>11</td><td>9</td><td></td></tr>\n",
"\t<tr><th scope=row>19</th><td>FDM Karunaratne</td><td>2012-2015</td><td>25</td><td>48</td><td>2</td><td>1622</td><td>186</td><td>35.26</td><td>3</td><td>7</td><td>5</td><td></td></tr>\n",
"\t<tr><th scope=row>20</th><td>JK Silva</td><td>2011-2015</td><td>24</td><td>44</td><td>0</td><td>1404</td><td>139</td><td>31.90</td><td>2</td><td>9</td><td>4</td><td></td></tr>\n",
"\t<tr><th scope=row>21</th><td>LRD Mendis</td><td>1982-1988</td><td>24</td><td>43</td><td>1</td><td>1329</td><td>124</td><td>31.64</td><td>4</td><td>8</td><td>2</td><td></td></tr>\n",
"\t<tr><th scope=row>22</th><td>RL Dias</td><td>1982-1987</td><td>20</td><td>36</td><td>1</td><td>1285</td><td>109</td><td>36.71</td><td>3</td><td>8</td><td>2</td><td></td></tr>\n",
"\t<tr><th scope=row>23</th><td>UC Hathurusingha</td><td>1991-1999</td><td>26</td><td>44</td><td>1</td><td>1274</td><td>83</td><td>29.62</td><td>0</td><td>8</td><td>2</td><td></td></tr>\n",
"\t<tr><th scope=row>24</th><td>M Muralitharan</td><td>1992-2010</td><td>132</td><td>162</td><td>56</td><td>1259</td><td>67</td><td>11.87</td><td>0</td><td>1</td><td>32</td><td></td></tr>\n",
"\t<tr><th scope=row>25</th><td>S Wettimuny</td><td>1982-1987</td><td>23</td><td>43</td><td>1</td><td>1221</td><td>190</td><td>29.07</td><td>2</td><td>6</td><td>5</td><td></td></tr>\n",
"\t<tr><th scope=row>26</th><td>MG Vandort</td><td>2001-2008</td><td>20</td><td>33</td><td>2</td><td>1144</td><td>140</td><td>36.90</td><td>4</td><td>4</td><td>3</td><td></td></tr>\n",
"\t<tr><th scope=row>27</th><td>WU Tharanga</td><td>2005-2015</td><td>21</td><td>38</td><td>1</td><td>1117</td><td>165</td><td>30.18</td><td>1</td><td>5</td><td>5</td><td></td></tr>\n",
"\t<tr><th scope=row>28</th><td>HMRKB Herath</td><td>1999-2015</td><td>67</td><td>99</td><td>22</td><td>1049</td><td>80*</td><td>13.62</td><td>0</td><td>1</td><td>18</td><td></td></tr>\n",
"\t<tr><th scope=row>29</th><td>RS Madugalle</td><td>1982-1988</td><td>21</td><td>39</td><td>4</td><td>1029</td><td>103</td><td>29.40</td><td>1</td><td>7</td><td>4</td><td></td></tr>\n",
"\t<tr><th scope=row>30</th><td>HDRL Thirimanne</td><td>2011-2015</td><td>23</td><td>45</td><td>6</td><td>969</td><td>155*</td><td>24.84</td><td>1</td><td>4</td><td>6</td><td></td></tr>\n",
"\t<tr><th scope=row>31</th><td>HDPK Dharmasena</td><td>1993-2004</td><td>31</td><td>51</td><td>7</td><td>868</td><td>62*</td><td>19.72</td><td>0</td><td>3</td><td>4</td><td></td></tr>\n",
"\t<tr><th scope=row>32</th><td>SM Warnapura</td><td>2007-2009</td><td>14</td><td>24</td><td>1</td><td>821</td><td>120</td><td>35.69</td><td>2</td><td>7</td><td>5</td><td></td></tr>\n",
"\t<tr><th scope=row>33</th><td>JR Ratnayeke</td><td>1982-1989</td><td>22</td><td>38</td><td>6</td><td>807</td><td>93</td><td>25.21</td><td>0</td><td>5</td><td>5</td><td></td></tr>\n",
"\t<tr><th scope=row>34</th><td>UDU Chandana</td><td>1999-2005</td><td>16</td><td>24</td><td>1</td><td>616</td><td>92</td><td>26.78</td><td>0</td><td>2</td><td>0</td><td></td></tr>\n",
"\t<tr><th scope=row>35</th><td>MF Maharoof</td><td>2004-2011</td><td>22</td><td>34</td><td>4</td><td>556</td><td>72</td><td>18.53</td><td>0</td><td>3</td><td>1</td><td></td></tr>\n",
"\t<tr><th scope=row>36</th><td>GP Wickramasinghe</td><td>1991-2001</td><td>40</td><td>64</td><td>5</td><td>555</td><td>51</td><td>9.40</td><td>0</td><td>1</td><td>17</td><td></td></tr>\n",
"\t<tr><th scope=row>37</th><td>LPC Silva</td><td>2006-2008</td><td>11</td><td>17</td><td>1</td><td>537</td><td>152*</td><td>33.56</td><td>1</td><td>2</td><td>3</td><td></td></tr>\n",
"\t<tr><th scope=row>38</th><td>S Ranatunga</td><td>1994-1997</td><td>9</td><td>17</td><td>1</td><td>531</td><td>118</td><td>33.18</td><td>2</td><td>2</td><td>1</td><td></td></tr>\n",
"\t<tr><th scope=row>39</th><td>KTGD Prasad</td><td>2008-2015</td><td>25</td><td>39</td><td>2</td><td>476</td><td>47</td><td>12.86</td><td>0</td><td>0</td><td>8</td><td></td></tr>\n",
"\t<tr><th scope=row>40</th><td>RJ Ratnayake</td><td>1983-1992</td><td>23</td><td>36</td><td>6</td><td>433</td><td>56</td><td>14.43</td><td>0</td><td>2</td><td>5</td><td></td></tr>\n",
"\t<tr><th scope=row>41</th><td>CK Kapugedera</td><td>2006-2009</td><td>8</td><td>15</td><td>3</td><td>418</td><td>96</td><td>34.83</td><td>0</td><td>4</td><td>1</td><td></td></tr>\n",
"\t<tr><th scope=row>42</th><td>DS de Silva</td><td>1982-1984</td><td>12</td><td>22</td><td>3</td><td>406</td><td>61</td><td>21.36</td><td>0</td><td>2</td><td>3</td><td></td></tr>\n",
"\t<tr><th scope=row>43</th><td>KMDN Kulasekara</td><td>2005-2014</td><td>21</td><td>28</td><td>1</td><td>391</td><td>64</td><td>14.48</td><td>0</td><td>1</td><td>4</td><td></td></tr>\n",
"\t<tr><th scope=row>44</th><td>J Mubarak</td><td>2002-2015</td><td>13</td><td>23</td><td>1</td><td>385</td><td>49</td><td>17.50</td><td>0</td><td>0</td><td>5</td><td></td></tr>\n",
"\t<tr><th scope=row>45</th><td>KDK Vithanage</td><td>2013-2015</td><td>10</td><td>16</td><td>2</td><td>370</td><td>103*</td><td>26.42</td><td>1</td><td>1</td><td>1</td><td></td></tr>\n",
"\t<tr><th scope=row>46</th><td>SAR Silva</td><td>1983-1988</td><td>9</td><td>16</td><td>2</td><td>353</td><td>111</td><td>25.21</td><td>2</td><td>0</td><td>1</td><td></td></tr>\n",
"\t<tr><th scope=row>47</th><td>ALF de Mel</td><td>1982-1986</td><td>17</td><td>28</td><td>5</td><td>326</td><td>34</td><td>14.17</td><td>0</td><td>0</td><td>5</td><td></td></tr>\n",
"\t<tr><th scope=row>48</th><td>DSBP Kuruppu</td><td>1987-1991</td><td>4</td><td>7</td><td>1</td><td>320</td><td>201*</td><td>53.33</td><td>1</td><td>0</td><td>0</td><td></td></tr>\n",
"\t<tr><th scope=row>49</th><td>RS Kalpage</td><td>1993-1999</td><td>11</td><td>18</td><td>2</td><td>294</td><td>63</td><td>18.37</td><td>0</td><td>2</td><td>0</td><td></td></tr>\n",
"\t<tr><th scope=row>50</th><td>DNT Zoysa</td><td>1997-2004</td><td>30</td><td>40</td><td>6</td><td>288</td><td>28*</td><td>8.47</td><td>0</td><td>0</td><td>9</td><td></td></tr>\n",
"</tbody>\n",
"</table>\n"
],
"text/latex": [
"\\begin{tabular}{r|llllllllllll}\n",
" & Player & Span & Mat & Inns & NO & Runs & HS & Ave & 100 & 50 & 0 & \\\\\n",
"\\hline\n",
"\t1 & KC Sangakkara & 2000-2015 & 134 & 233 & 17 & 12400 & 319 & 57.40 & 38 & 52 & 11 & \\\\\n",
"\t2 & DPMD Jayawardene & 1997-2014 & 149 & 252 & 15 & 11814 & 374 & 49.84 & 34 & 50 & 15 & \\\\\n",
"\t3 & ST Jayasuriya & 1991-2007 & 110 & 188 & 14 & 6973 & 340 & 40.07 & 14 & 31 & 15 & \\\\\n",
"\t4 & PA de Silva & 1984-2002 & 93 & 159 & 11 & 6361 & 267 & 42.97 & 20 & 22 & 7 & \\\\\n",
"\t5 & MS Atapattu & 1990-2007 & 90 & 156 & 15 & 5502 & 249 & 39.02 & 16 & 17 & 22 & \\\\\n",
"\t6 & TM Dilshan & 1999-2013 & 87 & 145 & 11 & 5492 & 193 & 40.98 & 16 & 23 & 14 & \\\\\n",
"\t7 & TT Samaraweera & 2001-2013 & 81 & 132 & 20 & 5462 & 231 & 48.76 & 14 & 30 & 11 & \\\\\n",
"\t8 & A Ranatunga & 1982-2000 & 93 & 155 & 12 & 5105 & 135* & 35.69 & 4 & 38 & 12 & \\\\\n",
"\t9 & HP Tillakaratne & 1989-2004 & 83 & 131 & 25 & 4545 & 204* & 42.87 & 11 & 20 & 9 & \\\\\n",
"\t10 & AD Mathews & 2009-2015 & 56 & 97 & 17 & 4015 & 160 & 50.18 & 7 & 23 & 1 & \\\\\n",
"\t11 & WPUJC Vaas & 1994-2009 & 111 & 162 & 35 & 3089 & 100* & 24.32 & 1 & 13 & 12 & \\\\\n",
"\t12 & RS Mahanama & 1986-1998 & 52 & 89 & 1 & 2576 & 225 & 29.27 & 4 & 11 & 7 & \\\\\n",
"\t13 & AP Gurusinha & 1985-1996 & 41 & 70 & 7 & 2452 & 143 & 38.92 & 7 & 8 & 3 & \\\\\n",
"\t14 & HAPW Jayawardene & 2000-2015 & 58 & 83 & 11 & 2124 & 154* & 29.50 & 4 & 5 & 9 & \\\\\n",
"\t15 & RS Kaluwitharana & 1992-2004 & 49 & 78 & 4 & 1933 & 132* & 26.12 & 3 & 9 & 5 & \\\\\n",
"\t16 & LD Chandimal & 2011-2015 & 25 & 44 & 4 & 1835 & 162* & 45.87 & 5 & 10 & 1 & \\\\\n",
"\t17 & RP Arnold & 1997-2004 & 44 & 69 & 4 & 1821 & 123 & 28.01 & 3 & 10 & 7 & \\\\\n",
"\t18 & NT Paranavitana & 2009-2012 & 32 & 60 & 5 & 1792 & 111 & 32.58 & 2 & 11 & 9 & \\\\\n",
"\t19 & FDM Karunaratne & 2012-2015 & 25 & 48 & 2 & 1622 & 186 & 35.26 & 3 & 7 & 5 & \\\\\n",
"\t20 & JK Silva & 2011-2015 & 24 & 44 & 0 & 1404 & 139 & 31.90 & 2 & 9 & 4 & \\\\\n",
"\t21 & LRD Mendis & 1982-1988 & 24 & 43 & 1 & 1329 & 124 & 31.64 & 4 & 8 & 2 & \\\\\n",
"\t22 & RL Dias & 1982-1987 & 20 & 36 & 1 & 1285 & 109 & 36.71 & 3 & 8 & 2 & \\\\\n",
"\t23 & UC Hathurusingha & 1991-1999 & 26 & 44 & 1 & 1274 & 83 & 29.62 & 0 & 8 & 2 & \\\\\n",
"\t24 & M Muralitharan & 1992-2010 & 132 & 162 & 56 & 1259 & 67 & 11.87 & 0 & 1 & 32 & \\\\\n",
"\t25 & S Wettimuny & 1982-1987 & 23 & 43 & 1 & 1221 & 190 & 29.07 & 2 & 6 & 5 & \\\\\n",
"\t26 & MG Vandort & 2001-2008 & 20 & 33 & 2 & 1144 & 140 & 36.90 & 4 & 4 & 3 & \\\\\n",
"\t27 & WU Tharanga & 2005-2015 & 21 & 38 & 1 & 1117 & 165 & 30.18 & 1 & 5 & 5 & \\\\\n",
"\t28 & HMRKB Herath & 1999-2015 & 67 & 99 & 22 & 1049 & 80* & 13.62 & 0 & 1 & 18 & \\\\\n",
"\t29 & RS Madugalle & 1982-1988 & 21 & 39 & 4 & 1029 & 103 & 29.40 & 1 & 7 & 4 & \\\\\n",
"\t30 & HDRL Thirimanne & 2011-2015 & 23 & 45 & 6 & 969 & 155* & 24.84 & 1 & 4 & 6 & \\\\\n",
"\t31 & HDPK Dharmasena & 1993-2004 & 31 & 51 & 7 & 868 & 62* & 19.72 & 0 & 3 & 4 & \\\\\n",
"\t32 & SM Warnapura & 2007-2009 & 14 & 24 & 1 & 821 & 120 & 35.69 & 2 & 7 & 5 & \\\\\n",
"\t33 & JR Ratnayeke & 1982-1989 & 22 & 38 & 6 & 807 & 93 & 25.21 & 0 & 5 & 5 & \\\\\n",
"\t34 & UDU Chandana & 1999-2005 & 16 & 24 & 1 & 616 & 92 & 26.78 & 0 & 2 & 0 & \\\\\n",
"\t35 & MF Maharoof & 2004-2011 & 22 & 34 & 4 & 556 & 72 & 18.53 & 0 & 3 & 1 & \\\\\n",
"\t36 & GP Wickramasinghe & 1991-2001 & 40 & 64 & 5 & 555 & 51 & 9.40 & 0 & 1 & 17 & \\\\\n",
"\t37 & LPC Silva & 2006-2008 & 11 & 17 & 1 & 537 & 152* & 33.56 & 1 & 2 & 3 & \\\\\n",
"\t38 & S Ranatunga & 1994-1997 & 9 & 17 & 1 & 531 & 118 & 33.18 & 2 & 2 & 1 & \\\\\n",
"\t39 & KTGD Prasad & 2008-2015 & 25 & 39 & 2 & 476 & 47 & 12.86 & 0 & 0 & 8 & \\\\\n",
"\t40 & RJ Ratnayake & 1983-1992 & 23 & 36 & 6 & 433 & 56 & 14.43 & 0 & 2 & 5 & \\\\\n",
"\t41 & CK Kapugedera & 2006-2009 & 8 & 15 & 3 & 418 & 96 & 34.83 & 0 & 4 & 1 & \\\\\n",
"\t42 & DS de Silva & 1982-1984 & 12 & 22 & 3 & 406 & 61 & 21.36 & 0 & 2 & 3 & \\\\\n",
"\t43 & KMDN Kulasekara & 2005-2014 & 21 & 28 & 1 & 391 & 64 & 14.48 & 0 & 1 & 4 & \\\\\n",
"\t44 & J Mubarak & 2002-2015 & 13 & 23 & 1 & 385 & 49 & 17.50 & 0 & 0 & 5 & \\\\\n",
"\t45 & KDK Vithanage & 2013-2015 & 10 & 16 & 2 & 370 & 103* & 26.42 & 1 & 1 & 1 & \\\\\n",
"\t46 & SAR Silva & 1983-1988 & 9 & 16 & 2 & 353 & 111 & 25.21 & 2 & 0 & 1 & \\\\\n",
"\t47 & ALF de Mel & 1982-1986 & 17 & 28 & 5 & 326 & 34 & 14.17 & 0 & 0 & 5 & \\\\\n",
"\t48 & DSBP Kuruppu & 1987-1991 & 4 & 7 & 1 & 320 & 201* & 53.33 & 1 & 0 & 0 & \\\\\n",
"\t49 & RS Kalpage & 1993-1999 & 11 & 18 & 2 & 294 & 63 & 18.37 & 0 & 2 & 0 & \\\\\n",
"\t50 & DNT Zoysa & 1997-2004 & 30 & 40 & 6 & 288 & 28* & 8.47 & 0 & 0 & 9 & \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
" Player Span Mat Inns NO Runs HS Ave 100 50 0 \n",
"1 KC Sangakkara 2000-2015 134 233 17 12400 319 57.40 38 52 11 \n",
"2 DPMD Jayawardene 1997-2014 149 252 15 11814 374 49.84 34 50 15 \n",
"3 ST Jayasuriya 1991-2007 110 188 14 6973 340 40.07 14 31 15 \n",
"4 PA de Silva 1984-2002 93 159 11 6361 267 42.97 20 22 7 \n",
"5 MS Atapattu 1990-2007 90 156 15 5502 249 39.02 16 17 22 \n",
"6 TM Dilshan 1999-2013 87 145 11 5492 193 40.98 16 23 14 \n",
"7 TT Samaraweera 2001-2013 81 132 20 5462 231 48.76 14 30 11 \n",
"8 A Ranatunga 1982-2000 93 155 12 5105 135* 35.69 4 38 12 \n",
"9 HP Tillakaratne 1989-2004 83 131 25 4545 204* 42.87 11 20 9 \n",
"10 AD Mathews 2009-2015 56 97 17 4015 160 50.18 7 23 1 \n",
"11 WPUJC Vaas 1994-2009 111 162 35 3089 100* 24.32 1 13 12 \n",
"12 RS Mahanama 1986-1998 52 89 1 2576 225 29.27 4 11 7 \n",
"13 AP Gurusinha 1985-1996 41 70 7 2452 143 38.92 7 8 3 \n",
"14 HAPW Jayawardene 2000-2015 58 83 11 2124 154* 29.50 4 5 9 \n",
"15 RS Kaluwitharana 1992-2004 49 78 4 1933 132* 26.12 3 9 5 \n",
"16 LD Chandimal 2011-2015 25 44 4 1835 162* 45.87 5 10 1 \n",
"17 RP Arnold 1997-2004 44 69 4 1821 123 28.01 3 10 7 \n",
"18 NT Paranavitana 2009-2012 32 60 5 1792 111 32.58 2 11 9 \n",
"19 FDM Karunaratne 2012-2015 25 48 2 1622 186 35.26 3 7 5 \n",
"20 JK Silva 2011-2015 24 44 0 1404 139 31.90 2 9 4 \n",
"21 LRD Mendis 1982-1988 24 43 1 1329 124 31.64 4 8 2 \n",
"22 RL Dias 1982-1987 20 36 1 1285 109 36.71 3 8 2 \n",
"23 UC Hathurusingha 1991-1999 26 44 1 1274 83 29.62 0 8 2 \n",
"24 M Muralitharan 1992-2010 132 162 56 1259 67 11.87 0 1 32 \n",
"25 S Wettimuny 1982-1987 23 43 1 1221 190 29.07 2 6 5 \n",
"26 MG Vandort 2001-2008 20 33 2 1144 140 36.90 4 4 3 \n",
"27 WU Tharanga 2005-2015 21 38 1 1117 165 30.18 1 5 5 \n",
"28 HMRKB Herath 1999-2015 67 99 22 1049 80* 13.62 0 1 18 \n",
"29 RS Madugalle 1982-1988 21 39 4 1029 103 29.40 1 7 4 \n",
"30 HDRL Thirimanne 2011-2015 23 45 6 969 155* 24.84 1 4 6 \n",
"31 HDPK Dharmasena 1993-2004 31 51 7 868 62* 19.72 0 3 4 \n",
"32 SM Warnapura 2007-2009 14 24 1 821 120 35.69 2 7 5 \n",
"33 JR Ratnayeke 1982-1989 22 38 6 807 93 25.21 0 5 5 \n",
"34 UDU Chandana 1999-2005 16 24 1 616 92 26.78 0 2 0 \n",
"35 MF Maharoof 2004-2011 22 34 4 556 72 18.53 0 3 1 \n",
"36 GP Wickramasinghe 1991-2001 40 64 5 555 51 9.40 0 1 17 \n",
"37 LPC Silva 2006-2008 11 17 1 537 152* 33.56 1 2 3 \n",
"38 S Ranatunga 1994-1997 9 17 1 531 118 33.18 2 2 1 \n",
"39 KTGD Prasad 2008-2015 25 39 2 476 47 12.86 0 0 8 \n",
"40 RJ Ratnayake 1983-1992 23 36 6 433 56 14.43 0 2 5 \n",
"41 CK Kapugedera 2006-2009 8 15 3 418 96 34.83 0 4 1 \n",
"42 DS de Silva 1982-1984 12 22 3 406 61 21.36 0 2 3 \n",
"43 KMDN Kulasekara 2005-2014 21 28 1 391 64 14.48 0 1 4 \n",
"44 J Mubarak 2002-2015 13 23 1 385 49 17.50 0 0 5 \n",
"45 KDK Vithanage 2013-2015 10 16 2 370 103* 26.42 1 1 1 \n",
"46 SAR Silva 1983-1988 9 16 2 353 111 25.21 2 0 1 \n",
"47 ALF de Mel 1982-1986 17 28 5 326 34 14.17 0 0 5 \n",
"48 DSBP Kuruppu 1987-1991 4 7 1 320 201* 53.33 1 0 0 \n",
"49 RS Kalpage 1993-1999 11 18 2 294 63 18.37 0 2 0 \n",
"50 DNT Zoysa 1997-2004 30 40 6 288 28* 8.47 0 0 9 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"library(XML)\n",
"url=\"http://stats.espncricinfo.com/ci/engine/stats/index.html?class=1;team=8;template=results;type=batting\"\n",
"#Note I can also break the url string and use paste command to modify this url with parameters\n",
"tables=readHTMLTable(url)\n",
"tables$\"Overall figures\""
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from bs4 import BeautifulSoup\n",
"import urllib\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"r = urllib.urlopen(\"http://stats.espncricinfo.com/ci/engine/stats/index.html?class=1;team=8;template=results;type=batting\").read()\n",
"soup = BeautifulSoup(r, \"lxml\")\n",
"headings=[row.get_text().encode(\"utf-8\") for row in soup.find_all(\"th\")]\n",
"\n",
"datasets = []\n",
"for row in soup.find_all(\"tr\"):\n",
" \n",
" data=[]\n",
" for td in row.find_all(\"td\"):\n",
" data.append(td.get_text().encode(\"utf-8\"))\n",
" if len(data)==12:\n",
" datasets.append(data)\n",
"\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Player Span Mat Inns NO Runs HS Ave 100 50 \\\n",
"0 KC Sangakkara 2000-2015 134 233 17 12400 319 57.40 38 52 \n",
"1 DPMD Jayawardene 1997-2014 149 252 15 11814 374 49.84 34 50 \n",
"2 ST Jayasuriya 1991-2007 110 188 14 6973 340 40.07 14 31 \n",
"3 PA de Silva 1984-2002 93 159 11 6361 267 42.97 20 22 \n",
"4 MS Atapattu 1990-2007 90 156 15 5502 249 39.02 16 17 \n",
"5 TM Dilshan 1999-2013 87 145 11 5492 193 40.98 16 23 \n",
"6 TT Samaraweera 2001-2013 81 132 20 5462 231 48.76 14 30 \n",
"7 A Ranatunga 1982-2000 93 155 12 5105 135* 35.69 4 38 \n",
"8 HP Tillakaratne 1989-2004 83 131 25 4545 204* 42.87 11 20 \n",
"9 AD Mathews 2009-2015 56 97 17 4015 160 50.18 7 23 \n",
"10 WPUJC Vaas 1994-2009 111 162 35 3089 100* 24.32 1 13 \n",
"11 RS Mahanama 1986-1998 52 89 1 2576 225 29.27 4 11 \n",
"12 AP Gurusinha 1985-1996 41 70 7 2452 143 38.92 7 8 \n",
"13 HAPW Jayawardene 2000-2015 58 83 11 2124 154* 29.50 4 5 \n",
"14 RS Kaluwitharana 1992-2004 49 78 4 1933 132* 26.12 3 9 \n",
"15 LD Chandimal 2011-2015 25 44 4 1835 162* 45.87 5 10 \n",
"16 RP Arnold 1997-2004 44 69 4 1821 123 28.01 3 10 \n",
"17 NT Paranavitana 2009-2012 32 60 5 1792 111 32.58 2 11 \n",
"18 FDM Karunaratne 2012-2015 25 48 2 1622 186 35.26 3 7 \n",
"19 JK Silva 2011-2015 24 44 0 1404 139 31.90 2 9 \n",
"20 LRD Mendis 1982-1988 24 43 1 1329 124 31.64 4 8 \n",
"21 RL Dias 1982-1987 20 36 1 1285 109 36.71 3 8 \n",
"22 UC Hathurusingha 1991-1999 26 44 1 1274 83 29.62 0 8 \n",
"23 M Muralitharan 1992-2010 132 162 56 1259 67 11.87 0 1 \n",
"24 S Wettimuny 1982-1987 23 43 1 1221 190 29.07 2 6 \n",
"25 MG Vandort 2001-2008 20 33 2 1144 140 36.90 4 4 \n",
"26 WU Tharanga 2005-2015 21 38 1 1117 165 30.18 1 5 \n",
"27 HMRKB Herath 1999-2015 67 99 22 1049 80* 13.62 0 1 \n",
"28 RS Madugalle 1982-1988 21 39 4 1029 103 29.40 1 7 \n",
"29 HDRL Thirimanne 2011-2015 23 45 6 969 155* 24.84 1 4 \n",
"30 HDPK Dharmasena 1993-2004 31 51 7 868 62* 19.72 0 3 \n",
"31 SM Warnapura 2007-2009 14 24 1 821 120 35.69 2 7 \n",
"32 JR Ratnayeke 1982-1989 22 38 6 807 93 25.21 0 5 \n",
"33 UDU Chandana 1999-2005 16 24 1 616 92 26.78 0 2 \n",
"34 MF Maharoof 2004-2011 22 34 4 556 72 18.53 0 3 \n",
"35 GP Wickramasinghe 1991-2001 40 64 5 555 51 9.40 0 1 \n",
"36 LPC Silva 2006-2008 11 17 1 537 152* 33.56 1 2 \n",
"37 S Ranatunga 1994-1997 9 17 1 531 118 33.18 2 2 \n",
"38 KTGD Prasad 2008-2015 25 39 2 476 47 12.86 0 0 \n",
"39 RJ Ratnayake 1983-1992 23 36 6 433 56 14.43 0 2 \n",
"40 CK Kapugedera 2006-2009 8 15 3 418 96 34.83 0 4 \n",
"41 DS de Silva 1982-1984 12 22 3 406 61 21.36 0 2 \n",
"42 KMDN Kulasekara 2005-2014 21 28 1 391 64 14.48 0 1 \n",
"43 J Mubarak 2002-2015 13 23 1 385 49 17.50 0 0 \n",
"44 KDK Vithanage 2013-2015 10 16 2 370 103* 26.42 1 1 \n",
"45 SAR Silva 1983-1988 9 16 2 353 111 25.21 2 0 \n",
"46 ALF de Mel 1982-1986 17 28 5 326 34 14.17 0 0 \n",
"47 DSBP Kuruppu 1987-1991 4 7 1 320 201* 53.33 1 0 \n",
"48 RS Kalpage 1993-1999 11 18 2 294 63 18.37 0 2 \n",
"49 DNT Zoysa 1997-2004 30 40 6 288 28* 8.47 0 0 \n",
"\n",
" 0 \n",
"0 11 \n",
"1 15 \n",
"2 15 \n",
"3 7 \n",
"4 22 \n",
"5 14 \n",
"6 11 \n",
"7 12 \n",
"8 9 \n",
"9 1 \n",
"10 12 \n",
"11 7 \n",
"12 3 \n",
"13 9 \n",
"14 5 \n",
"15 1 \n",
"16 7 \n",
"17 9 \n",
"18 5 \n",
"19 4 \n",
"20 2 \n",
"21 2 \n",
"22 2 \n",
"23 32 \n",
"24 5 \n",
"25 3 \n",
"26 5 \n",
"27 18 \n",
"28 4 \n",
"29 6 \n",
"30 4 \n",
"31 5 \n",
"32 5 \n",
"33 0 \n",
"34 1 \n",
"35 17 \n",
"36 3 \n",
"37 1 \n",
"38 8 \n",
"39 5 \n",
"40 1 \n",
"41 3 \n",
"42 4 \n",
"43 5 \n",
"44 1 \n",
"45 1 \n",
"46 5 \n",
"47 0 \n",
"48 0 \n",
"49 9 \n"
]
}
],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"table=pd.DataFrame(np.array(datasets),columns=headings)\n",
"print table"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment