Skip to content

Instantly share code, notes, and snippets.

@Koopzington
Forked from miraris/app.php
Created July 26, 2018 09:45
Show Gist options
  • Save Koopzington/06a788ac95478b9d809d56aa1a85a094 to your computer and use it in GitHub Desktop.
Save Koopzington/06a788ac95478b9d809d56aa1a85a094 to your computer and use it in GitHub Desktop.
๐Ÿ‘Œ๐Ÿ‘€๐Ÿ‘Œ๐Ÿ‘€๐Ÿ‘Œ๐Ÿ‘€๐Ÿ‘Œ๐Ÿ‘€๐Ÿ‘Œ๐Ÿ‘€ good shit goเฑฆิ sHit๐Ÿ‘Œ thats โœ” some good๐Ÿ‘Œ๐Ÿ‘Œshit right๐Ÿ‘Œ๐Ÿ‘Œthere๐Ÿ‘Œ๐Ÿ‘Œ๐Ÿ‘Œ rightโœ”there โœ”โœ”if i do ฦฝaาฏ so my self ๐Ÿ’ฏ i say so ๐Ÿ’ฏ thats what im talking about right there right there (chorus: สณแถฆแตสฐแต— แต—สฐแต‰สณแต‰) mMMMMแŽทะœ๐Ÿ’ฏ ๐Ÿ‘Œ๐Ÿ‘Œ ๐Ÿ‘ŒะO0ะžเฌ OOOOOะžเฌ เฌ Ooooแต’แต’แต’แต’แต’แต’แต’แต’แต’๐Ÿ‘Œ ๐Ÿ‘Œ๐Ÿ‘Œ ๐Ÿ‘Œ ๐Ÿ’ฏ ๐Ÿ‘Œ ๐Ÿ‘€ ๐Ÿ‘€ ๐Ÿ‘€ ๐Ÿ‘Œ๐Ÿ‘ŒGood shit
<?php
require 'vendor/autoload.php';
require 'proxygrabber.php';
const XML_FILE_NAME = 'data/anime-titles.xml';
const ARCHIVE_NAME = 'data/anime-titles.gz';
/**
* Decompresses a gunzip arhive
*
* @param $src string archive name
* @param $dst string destination file name
*/
function decompress($src, $dst): void
{
$s = gzopen($src, "rb");
$d = fopen($dst, "w");
while ($string = gzread($s, 4096)) {
fwrite($d, $string, strlen($string));
}
gzclose($s);
fclose($d);
}
/**
* Checks whether we received an error response.
*
* @param $data string XML file contents
* @return int
*/
function error($data): int
{
switch ($data) {
case '<error>Anime not found</error>':
return 1;
break;
case '<error code="500">banned</error>':
return 2;
break;
default:
return 0;
break;
}
}
/**
* Oof, checks XML anime-titles file modification time, if it's old or doesn't exist
* downloads it from AniDB, parses it and returns an array of IDs that we can iterate over.
*
* @return array Array of anime IDs
* @throws \GuzzleHttp\Exception\GuzzleException
*/
function getAnimuList(): array
{
$client = new \GuzzleHttp\Client(['http_errors' => false]);
if (!file_exists(XML_FILE_NAME) || (time() - filemtime(XML_FILE_NAME) >= 86400)) {
print "Downloading the titles archive\n";
$client->request('GET', 'http://anidb.net/api/anime-titles.xml.gz', [
'headers' => [
'User-Agent' => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
],
'proxy' => getProxy(),
'sink' => ARCHIVE_NAME
]);
decompress(ARCHIVE_NAME, XML_FILE_NAME);
unlink(ARCHIVE_NAME);
}
$xml = simplexml_load_file(XML_FILE_NAME, "SimpleXMLElement", LIBXML_NOCDATA);
// $xml = simplexml_load_string($response->getBody(), "SimpleXMLElement", LIBXML_NOCDATA);
$json = json_encode($xml);
$shitty_dump = json_decode($json, TRUE);
$shitty_cartoons = [];
foreach ($shitty_dump['anime'] as $shitty_cartoon) {
$shitty_cartoons[] = (int)$shitty_cartoon['@attributes']['aid'];
}
// just shuffle the id array to bypass their *dank* anti-scraping *algorithm*
shuffle($shitty_cartoons);
return $shitty_cartoons;
}
$proxy = getProxy();
// instantiate the guzzle client with the "alastore" client user-agent header
$client = new \GuzzleHttp\Client([
'debug' => true,
'headers' => [
'User-Agent' => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
],
]);
// Try to load the anime titles
$animu_list = getAnimuList();
// Request options for our query
$request_options = [
'proxy' => $proxy,
'http_errors' => false,
'connect_timeout' => 30,
];
// Iterate over the IDs
foreach ($animu_list as $animu_id) {
// Skip this iteration if file exists..
if (file_exists("data/$animu_id.xml")) {
continue;
}
$request_options['query'] = [
'request' => 'anime',
'client' => 'alastorehttp',
'clientver' => 1,
'protover' => 1,
'aid' => $animu_id,
];
// alastorehttp&clientver=1
// goanidbhttp&clientver=1
// Get clients name-version strings from https://wiki.anidb.net/w/UDP_Clients
// http://api.anidb.net:9001/httpapi?request=anime&client={client}&clientver={version}&protover=1&aid=12138
try {
$response = $client->request('GET', 'http://api.anidb.net:9001/httpapi', $request_options);
$data = $response->getBody()->getContents();
// If we get a bad response or we're banned we get a new proxy and skip this anime
// since they don't let the same client request the same data-set after a ban.
// This means this shitty script needs to be executed twice to fill the missing data
if ( ($response->getStatusCode() != 200) || (error($data) === 2) ) {
$request_options['proxy'] = getProxy(); // get a new proxy
print "Banned, got a new proxy: {$request_options['proxy']}\n";
sleep(2);
continue;
} elseif (error($data) === 1) {
print "Anime not found.\n";
sleep(2);
continue;
}
} catch (Exception $e) {
// Proxy is probably dead
$request_options['proxy'] = getProxy();
print "Timed out or some kind of exception, getting a new proxy: {$request_options['proxy']}'";
$response = $client->request('GET', 'http://api.anidb.net:9001/httpapi', $request_options);
// We probably got a banned proxy
if ( ($response->getStatusCode() != 200) || (error($response->getBody()->getContents()) === 2) ) {
$request_options['proxy'] = getProxy(); // get a new proxy
sleep(2);
continue;
}
}
// Save the XML response
file_put_contents("data/$animu_id.xml", $data, LOCK_EX);
// sleep between 2 and 2.5 secs - in case they monitor request intervals from an IP
usleep(mt_rand(2000000, 2500000));
}
<?php
/**
* Fetches a proxy from http://gimmeproxy.com/api/getProxy
*
* @return string
*/
function getProxy(): string {
$client = new \GuzzleHttp\Client();
$response = $client->get('http://gimmeproxy.com/api/getProxy');
$data = json_decode($response->getBody(), true);
if(isset($data['error'])) { // there are no proxies left for this user-id and timeout
print $data['error']."\n";
}
return $data['curl'] ?? false;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment