Koopzington/app.php

## app.php
<?php

require 'vendor/autoload.php';
require 'proxygrabber.php';

const XML_FILE_NAME = 'data/anime-titles.xml';
const ARCHIVE_NAME = 'data/anime-titles.gz';

/**
 * Decompresses a gunzip arhive
 *
 * @param $src string archive name
 * @param $dst string destination file name
 */
function decompress($src, $dst): void
{
    $s = gzopen($src, "rb");
    $d = fopen($dst, "w");
    while ($string = gzread($s, 4096)) {
        fwrite($d, $string, strlen($string));
    }
    gzclose($s);
    fclose($d);
}

/**
 * Checks whether we received an error response.
 *
 * @param $data string XML file contents
 * @return int
 */
function error($data): int
{
    switch ($data) {
        case '<error>Anime not found</error>':
            return 1;
            break;
        case '<error code="500">banned</error>':
            return 2;
            break;
        default:
            return 0;
            break;
    }
}

/**
 * Oof, checks XML anime-titles file modification time, if it's old or doesn't exist
 * downloads it from AniDB, parses it and returns an array of IDs that we can iterate over.
 *
 * @return array Array of anime IDs
 * @throws \GuzzleHttp\Exception\GuzzleException
 */
function getAnimuList(): array
{
    $client = new \GuzzleHttp\Client(['http_errors' => false]);

    if (!file_exists(XML_FILE_NAME) || (time() - filemtime(XML_FILE_NAME) >= 86400)) {
        print "Downloading the titles archive\n";
        $client->request('GET', 'http://anidb.net/api/anime-titles.xml.gz', [
            'headers' => [
                'User-Agent' => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
            ],
            'proxy' => getProxy(),
            'sink' => ARCHIVE_NAME
        ]);
        decompress(ARCHIVE_NAME, XML_FILE_NAME);
        unlink(ARCHIVE_NAME);
    }

    $xml = simplexml_load_file(XML_FILE_NAME, "SimpleXMLElement", LIBXML_NOCDATA);
    // $xml = simplexml_load_string($response->getBody(), "SimpleXMLElement", LIBXML_NOCDATA);
    $json = json_encode($xml);
    $shitty_dump = json_decode($json, TRUE);

    $shitty_cartoons = [];
    foreach ($shitty_dump['anime'] as $shitty_cartoon) {
        $shitty_cartoons[] = (int)$shitty_cartoon['@attributes']['aid'];
    }

    // just shuffle the id array to bypass their *dank* anti-scraping *algorithm*
    shuffle($shitty_cartoons);

    return $shitty_cartoons;
}

$proxy = getProxy();

// instantiate the guzzle client with the "alastore" client user-agent header
$client = new \GuzzleHttp\Client([
    'debug' => true,
    'headers' => [
        'User-Agent' => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
    ],
]);

// Try to load the anime titles
$animu_list = getAnimuList();

// Request options for our query
$request_options = [
    'proxy' => $proxy,
    'http_errors' => false,
    'connect_timeout' => 30,
];

// Iterate over the IDs
foreach ($animu_list as $animu_id) {
    // Skip this iteration if file exists..
    if (file_exists("data/$animu_id.xml")) {
        continue;
    }

    $request_options['query'] = [
        'request' => 'anime',
        'client' => 'alastorehttp',
        'clientver' => 1,
        'protover' => 1,
        'aid' => $animu_id,
    ];

    // alastorehttp&clientver=1
    // goanidbhttp&clientver=1
    // Get clients name-version strings from https://wiki.anidb.net/w/UDP_Clients
    // http://api.anidb.net:9001/httpapi?request=anime&client={client}&clientver={version}&protover=1&aid=12138

    try {
        $response = $client->request('GET', 'http://api.anidb.net:9001/httpapi', $request_options);

        $data = $response->getBody()->getContents();

        // If we get a bad response or we're banned we get a new proxy and skip this anime
        // since they don't let the same client request the same data-set after a ban.
        // This means this shitty script needs to be executed twice to fill the missing data
        if ( ($response->getStatusCode() != 200) || (error($data) === 2) ) {
            $request_options['proxy'] = getProxy(); // get a new proxy
            print "Banned, got a new proxy: {$request_options['proxy']}\n";

            sleep(2);
            continue;
        } elseif (error($data) === 1) {
            print "Anime not found.\n";
            sleep(2);
            continue;
        }
    } catch (Exception $e) {
        // Proxy is probably dead
        $request_options['proxy'] = getProxy();
        print "Timed out or some kind of exception, getting a new proxy: {$request_options['proxy']}'";
        $response = $client->request('GET', 'http://api.anidb.net:9001/httpapi', $request_options);

        // We probably got a banned proxy
        if ( ($response->getStatusCode() != 200) || (error($response->getBody()->getContents()) === 2) ) {
            $request_options['proxy'] = getProxy(); // get a new proxy
            sleep(2);
            continue;
        }
    }

    // Save the XML response
    file_put_contents("data/$animu_id.xml", $data, LOCK_EX);

    // sleep between 2 and 2.5 secs - in case they monitor request intervals from an IP
    usleep(mt_rand(2000000, 2500000));
}

## proxygrabber.php
<?php

/**
 * Fetches a proxy from http://gimmeproxy.com/api/getProxy
 *
 * @return string
 */
function getProxy(): string {
	$client = new \GuzzleHttp\Client();

	$response = $client->get('http://gimmeproxy.com/api/getProxy');

	$data = json_decode($response->getBody(), true);

	if(isset($data['error'])) { // there are no proxies left for this user-id and timeout
		print $data['error']."\n";
	}

	return $data['curl'] ?? false;
}
	<?php

	require 'vendor/autoload.php';
	require 'proxygrabber.php';

	const XML_FILE_NAME = 'data/anime-titles.xml';
	const ARCHIVE_NAME = 'data/anime-titles.gz';

	/**
	* Decompresses a gunzip arhive
	*
	* @param $src string archive name
	* @param $dst string destination file name
	*/
	function decompress($src, $dst): void
	{
	$s = gzopen($src, "rb");
	$d = fopen($dst, "w");
	while ($string = gzread($s, 4096)) {
	fwrite($d, $string, strlen($string));
	}
	gzclose($s);
	fclose($d);
	}

	/**
	* Checks whether we received an error response.
	*
	* @param $data string XML file contents
	* @return int
	*/
	function error($data): int
	{
	switch ($data) {
	case '<error>Anime not found</error>':
	return 1;
	break;
	case '<error code="500">banned</error>':
	return 2;
	break;
	default:
	return 0;
	break;
	}
	}

	/**
	* Oof, checks XML anime-titles file modification time, if it's old or doesn't exist
	* downloads it from AniDB, parses it and returns an array of IDs that we can iterate over.
	*
	* @return array Array of anime IDs
	* @throws \GuzzleHttp\Exception\GuzzleException
	*/
	function getAnimuList(): array
	{
	$client = new \GuzzleHttp\Client(['http_errors' => false]);

	if (!file_exists(XML_FILE_NAME) \|\| (time() - filemtime(XML_FILE_NAME) >= 86400)) {
	print "Downloading the titles archive\n";
	$client->request('GET', 'http://anidb.net/api/anime-titles.xml.gz', [
	'headers' => [
	'User-Agent' => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
	],
	'proxy' => getProxy(),
	'sink' => ARCHIVE_NAME
	]);
	decompress(ARCHIVE_NAME, XML_FILE_NAME);
	unlink(ARCHIVE_NAME);
	}

	$xml = simplexml_load_file(XML_FILE_NAME, "SimpleXMLElement", LIBXML_NOCDATA);
	// $xml = simplexml_load_string($response->getBody(), "SimpleXMLElement", LIBXML_NOCDATA);
	$json = json_encode($xml);
	$shitty_dump = json_decode($json, TRUE);

	$shitty_cartoons = [];
	foreach ($shitty_dump['anime'] as $shitty_cartoon) {
	$shitty_cartoons[] = (int)$shitty_cartoon['@attributes']['aid'];
	}

	// just shuffle the id array to bypass their dank anti-scraping algorithm
	shuffle($shitty_cartoons);

	return $shitty_cartoons;
	}

	$proxy = getProxy();

	// instantiate the guzzle client with the "alastore" client user-agent header
	$client = new \GuzzleHttp\Client([
	'debug' => true,
	'headers' => [
	'User-Agent' => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
	],
	]);

	// Try to load the anime titles
	$animu_list = getAnimuList();

	// Request options for our query
	$request_options = [
	'proxy' => $proxy,
	'http_errors' => false,
	'connect_timeout' => 30,
	];

	// Iterate over the IDs
	foreach ($animu_list as $animu_id) {
	// Skip this iteration if file exists..
	if (file_exists("data/$animu_id.xml")) {
	continue;
	}

	$request_options['query'] = [
	'request' => 'anime',
	'client' => 'alastorehttp',
	'clientver' => 1,
	'protover' => 1,
	'aid' => $animu_id,
	];

	// alastorehttp&clientver=1
	// goanidbhttp&clientver=1
	// Get clients name-version strings from https://wiki.anidb.net/w/UDP_Clients
	// http://api.anidb.net:9001/httpapi?request=anime&client={client}&clientver={version}&protover=1&aid=12138

	try {
	$response = $client->request('GET', 'http://api.anidb.net:9001/httpapi', $request_options);

	$data = $response->getBody()->getContents();

	// If we get a bad response or we're banned we get a new proxy and skip this anime
	// since they don't let the same client request the same data-set after a ban.
	// This means this shitty script needs to be executed twice to fill the missing data
	if ( ($response->getStatusCode() != 200) \|\| (error($data) === 2) ) {
	$request_options['proxy'] = getProxy(); // get a new proxy
	print "Banned, got a new proxy: {$request_options['proxy']}\n";

	sleep(2);
	continue;
	} elseif (error($data) === 1) {
	print "Anime not found.\n";
	sleep(2);
	continue;
	}
	} catch (Exception $e) {
	// Proxy is probably dead
	$request_options['proxy'] = getProxy();
	print "Timed out or some kind of exception, getting a new proxy: {$request_options['proxy']}'";
	$response = $client->request('GET', 'http://api.anidb.net:9001/httpapi', $request_options);

	// We probably got a banned proxy
	if ( ($response->getStatusCode() != 200) \|\| (error($response->getBody()->getContents()) === 2) ) {
	$request_options['proxy'] = getProxy(); // get a new proxy
	sleep(2);
	continue;
	}
	}

	// Save the XML response
	file_put_contents("data/$animu_id.xml", $data, LOCK_EX);

	// sleep between 2 and 2.5 secs - in case they monitor request intervals from an IP
	usleep(mt_rand(2000000, 2500000));
	}
	<?php

	/**
	* Fetches a proxy from http://gimmeproxy.com/api/getProxy
	*
	* @return string
	*/
	function getProxy(): string {
	$client = new \GuzzleHttp\Client();

	$response = $client->get('http://gimmeproxy.com/api/getProxy');

	$data = json_decode($response->getBody(), true);

	if(isset($data['error'])) { // there are no proxies left for this user-id and timeout
	print $data['error']."\n";
	}

	return $data['curl'] ?? false;
	}