-
-
Save rfsbsb/843ab693fc170720f2a39ed9ff45f71d to your computer and use it in GitHub Desktop.
Scrape ratings from iTunes store
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* | |
* Get all reviews from iTunes store for a given podcast | |
* | |
*/ | |
$podcast_id = '1204701206'; | |
$countries = ['DZ', 'AO', 'AI', 'AG', 'AR', 'AM', 'AU', 'AT', 'AZ', 'BH', 'BB', 'BY', 'BE', 'BZ', 'BM', 'BO', 'BW', 'BR', 'VG', 'BN', 'BG', 'CA', 'KY', 'CL', 'CN', 'CO', 'CR', 'HR', 'CY', 'CZ', 'DK', 'DM', 'DO', 'EC', 'EG', 'SV', 'EE', 'FI', 'FR', 'DE', 'GH', 'GR', 'GD', 'GT', 'GY', 'HN', 'HK', 'HU', 'IS', 'IN', 'ID', 'IE', 'IL', 'IT', 'JM', 'JP', 'JO', 'KZ', 'KE', 'KR', 'KW', 'LV', 'LB', 'LT', 'LU', 'MO', 'MK', 'MG', 'MY', 'ML', 'MT', 'MU', 'MX', 'MD', 'MS', 'NP', 'NL', 'NZ', 'NI', 'NE', 'NG', 'NO', 'OM', 'PK', 'PA', 'PY', 'PE', 'PH', 'PL', 'PT', 'QA', 'RO', 'RU', 'SA', 'SN', 'SG', 'SK', 'SI', 'ZA', 'ES', 'LK', 'KN', 'LC', 'VC', 'SR', 'SE', 'CH', 'TW', 'TZ', 'TH', 'BS', 'TT', 'TN', 'TR', 'TC', 'UG', 'GB', 'UA', 'AE', 'UY', 'US', 'UZ', 'VE', 'VN', 'YE']; | |
$contries_names = ['Algeria', 'Angola', 'Anguilla', 'Antigua & Barbuda', 'Argentina', 'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bahrain', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Bermuda', 'Bolivia', 'Botswana', 'Brazil', 'British Virgin Islands', 'Brunei', 'Bulgaria', 'Canada', 'Cayman Islands', 'Chile', 'China', 'Colombia', 'Costa Rica', 'Croatia', 'Cyprus', 'Czech Republic', 'Denmark', 'Dominica', 'Dominican Rep.', 'Ecuador', 'Egypt', 'El Salvador', 'Estonia', 'Finland', 'France', 'Germany', 'Ghana', 'Greece', 'Grenada', 'Guatemala', 'Guyana', 'Honduras', 'Hong Kong', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Ireland', 'Israel', 'Italy', 'Jamaica', 'Japan', 'Jordan', 'Kazakstan', 'Kenya', 'Korea, Republic Of', 'Kuwait', 'Latvia', 'Lebanon', 'Lithuania', 'Luxembourg', 'Macau', 'Macedonia', 'Madagascar', 'Malaysia', 'Mali', 'Malta', 'Mauritius', 'Mexico', 'Moldova, Republic Of', 'Montserrat', 'Nepal', 'Netherlands', 'New Zealand', 'Nicaragua', 'Niger', 'Nigeria', 'Norway', 'Oman', 'Pakistan', 'Panama', 'Paraguay', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Qatar', 'Romania', 'Russia', 'Saudi Arabia', 'Senegal', 'Singapore', 'Slovakia', 'Slovenia', 'South Africa', 'Spain', 'Sri Lanka', 'St. Kitts & Nevis', 'St. Lucia', 'St. Vincent & The Grenadines', 'Suriname', 'Sweden', 'Switzerland', 'Taiwan', 'Tanzania', 'Thailand', 'The Bahamas', 'Trinidad & Tobago', 'Tunisia', 'Turkey', 'Turks & Caicos', 'Uganda', 'UK', 'Ukraine', 'United Arab Emirates', 'Uruguay', 'USA', 'Uzbekistan', 'Venezuela', 'Vietnam', 'Yemen']; | |
class ItunesScraper { | |
private $ratings; | |
private $podcast_id; | |
private $country; | |
private $average; | |
function __construct($podcast_id, $country) { | |
$this->podcast_id = $podcast_id; | |
$this->country = $country; | |
} | |
private function getDataByPage($page = 0) { | |
$queryPage = ''; | |
if ($page != 0) { | |
$queryPage = "/page=" . $page . "/"; | |
} | |
$contents = file_get_contents("https://itunes.apple.com/" . $this->country . "/rss/customerreviews/" . $queryPage . "id=" . $this->podcast_id . "/sortBy=mostRecent/json"); | |
$data = json_decode($contents); | |
if (isset($data->feed->entry)) { | |
return $data->feed->entry; | |
} | |
return null; | |
} | |
public function scrapeData() { | |
foreach (range(0, $this->getNumberPages()) as $page) { | |
$data = $this->getDataByPage($page); | |
if ($data) { | |
foreach ($data as $key => $entry) { | |
if ($key != 0) { | |
$this->ratings[] = $entry->{"im:rating"}->label; | |
} | |
} | |
} | |
} | |
} | |
public function getAllReviews() { | |
return $this->ratings; | |
} | |
public function getAverageReviews() { | |
if (!empty($this->ratings)) { | |
$sum = 0; | |
foreach ($this->ratings as $key => $value) { | |
$sum += $value; | |
} | |
return number_format(($sum/count($this->ratings)), 2); | |
} | |
} | |
private function getNumberPages() { | |
$contents = file_get_contents("https://itunes.apple.com/" . $this->country . "/rss/customerreviews/id=" . $this->podcast_id . "/sortBy=mostRecent/json"); | |
$data = json_decode($contents); | |
foreach ($data->feed->link as $link) { | |
if ($link->attributes->rel == 'last') { | |
preg_match('/page=([\d])/', $link->attributes->href, $items, PREG_OFFSET_CAPTURE); | |
if (isset($items[1][0])) { | |
return $items[1][0]; | |
} | |
return 0; | |
} | |
} | |
} | |
} | |
$reviews = []; | |
$avg_reviews = []; | |
foreach ($countries as $country) { | |
$revs = new ItunesScraper($podcast_id, $country); | |
$revs->scrapeData(); | |
$total = $revs->getAllReviews(); | |
$avg_reviews[$country] = $revs->getAverageReviews(); | |
$reviews[$country] = count($total); | |
} | |
?> | |
<!DOCTYPE html> | |
<html> | |
<head> | |
<title>No Dumb Questios iTunes ratings by Contry</title> | |
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css"> | |
<script type="text/javascript" src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js"></script> | |
</head> | |
<body> | |
<table class="table bordered"> | |
<tr> | |
<th>Country</th> | |
<th>Total reviews</th> | |
<th>Average reviews</th> | |
</tr> | |
<?php foreach ($countries as $key=>$country): ?> | |
<tr> | |
<td><?php print $contries_names[$key]?></td> | |
<td><?php print $reviews[$country] ?></td> | |
<td><?php print $avg_reviews[$country] ?></td> | |
</tr> | |
<?php endforeach ?> | |
</table> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment