Skip to content

Instantly share code, notes, and snippets.

@VerifiedJoseph
Last active August 27, 2020 11:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save VerifiedJoseph/32215035636bbbd0c38f5a77725a2d1f to your computer and use it in GitHub Desktop.
Save VerifiedJoseph/32215035636bbbd0c38f5a77725a2d1f to your computer and use it in GitHub Desktop.
PHP script for extracting video details from an archived Plays.tv profile and checking their archive status.
<?php
/*
PHP script for extracting video details from an archived Plays.tv profile and checking their archive status.
*/
/* Dependencies
PHP Simple HTML DOM Parser https://simplehtmldom.sourceforge.io/
php-curl-class (via Composer) https://github.com/php-curl-class/php-curl-class
*/
require __DIR__ . '/simple_html_dom.php';
require __DIR__ . '/vendor/autoload.php';
$outputFile = 'video-list.csv';
/*
Saved version of the user profile videos page
(e.g https://web.archive.org/web/20191210184635if_/https://plays.tv/u/Daricx/videos)
Load profile videos page in brower, scroll down to end of page and keep scrolling until the infinite scrolling stop
and then copy the HTML from brower's developer tools Inspector (called Elements in Chrome) into the HTML file.
*/
$htmlFile = 'profile-page.html';
$html = file_get_contents($htmlFile);
$html = str_get_html($html);
$output = fopen($outputFile, 'w');
$row = array('Title', 'Date', 'Game', 'Duration', 'Page URL', 'Video URL');
fputcsv($output, $row, ',');
$div = $html->find('div#B', 0);
foreach ($div->find('li.video-item') as $index => $item) {
$title = $item->find('a.title', 0)->plaintext;
$game = '';
if ($item->find('a.hashtag.no-wrap', 0)) {
$game = $item->find('a.hashtag.no-wrap', 0)->plaintext;
}
$duration = $item->find('span.duration.no-wrap', 0)->plaintext;
$date = $item->find('span.created', 0)->plaintext;
//$views = $item->find('span.view-count', 0)->plaintext;
//$likes = $item->find('span.like-count', 0)->plaintext;
$url = explode('?', $item->find('a.title', 0)->href);
$PageUrl = 'https://web.archive.org' . $url[0];
echo $index . ' - ' . $title . "\n";
$curl = new Curl\Curl();
$curl->setOpt(CURLOPT_FOLLOWLOCATION, true);
$curl->get($PageUrl);
echo $PageUrl . ' ('. $curl->getHttpStatusCode() .')' . "\n";;
if ($curl->error || $curl->getHttpStatusCode() != 200) {
$PageUrl = 'Not archived';
$videoURL = 'Not archived';
} else {
$html = str_get_html($curl->response);
$video = $html->find('video', 0);
foreach ($video->find('source') as $source) {
if ($source->res == '480') {
$videoURL = $source->src;
}
if ($source->res == '720') {
$videoURL = $source->src;
break;
}
}
}
echo $videoURL;
echo "\n \n";
$row = array(
$title,
$date,
$game,
$duration,
//$views,
//$likes,
$PageUrl,
$videoURL,
);
fputcsv($output, $row, ',');
}
fclose($output);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment