Skip to content

Instantly share code, notes, and snippets.

@langemike
Created February 3, 2017 15:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save langemike/bf136171b49e8db787b7e5780531a3cb to your computer and use it in GitHub Desktop.
Save langemike/bf136171b49e8db787b7e5780531a3cb to your computer and use it in GitHub Desktop.
Simple YouTube API Crawler class for Wordpress
<?php
use Illuminate\Support\Arr;
use Illuminate\Support\Str;
use Illuminate\Database\Capsule\Manager as Capsule;
/**
* Simple YouTube Crawler class for Worpress
*
* @author Mike van Veenhuijzen <mikevv@gmail.com>
*
**/
class YoutubeCrawler {
const MAX_RESULTS = 50;
const VIDEO_STATUS_SUCCESS = 'success';
const VIDEO_STATUS_FAILED = 'failure';
const VIDEO_STATUS_SKIPPED = 'skipped';
/**
* YouTube API
* @var Google_Service_YouTube $api
*/
protected $api = null;
/**
* Write to log
* @var bool $log
**/
protected $log = true;
/**
* YouTube API items criteria
* @var $title_match array
**/
protected $criteria = array(
'playlistItem' => array(
'snippet.title' => array(
'compare' => 'contains',
'value' => array('VR', 'Virtual reality', '360')
),
'status.embeddable' => array(
'compare' => '=',
'value' => true
),
'status.privacyStatus' => array(
'compare' => '=',
'value' => 'public'
)
),
'video' => array(
'statistics.viewCount' => array(
'compare' => '>=',
'value' => 500
)
),
'channel' => array(
'contentDetails.relatedPlaylists' => array(
'compare' => 'has',
'value' => array('uploads')
)
)
);
/**
* Initialize class
**/
public function __construct()
{
$this->setup();
}
/**
* Execute crawler functionality
* @return array
**/
public function run()
{
// Get least updated channel
$channel = Channel::orderBy('updated_at', 'ASC')->first();
// Fail if none found
if ($channel === null) {
throw new Exeception('No channel found');
}
// Get playlist ID if it's missing
if (empty($channel->playlist_id)) {
$channel->playlist_id = $this->youtube_playlist_id($channel->url, 'uploads');
$channel->timestamps = false;
$channel->save();
}
// Fail if still nothing found
if (empty($channel->playlist_id)) {
throw new Exeception('No playlist found');
}
// Get playlist items
$videos = $this->youtube_playlist_items(array(
'playlistId' => $channel->playlist_id
));
// Update video items with supplemented parts
if (! empty($videos)) {
$videos = $this->supplement($videos, array('statistics'));
}
// Save videos
$statistics = $this->save($videos);
// Log statistics
$this->log(strtr('Latest API call resulted in %total% videos from which %success% were saved, %skipped% were skipped and %failed% failed.', array(
'%total%' => count($videos),
'%success%' => count(array_keys($statistics, self::VIDEO_STATUS_SUCCESS, true)),
'%failed%' => count(array_keys($statistics, self::VIDEO_STATUS_FAILED, true)),
'%skipped%' => count(array_keys($statistics, self::VIDEO_STATUS_SKIPPED, true)),
)));
// Update channel statistics
$channel->timestamps = true;
$channel->increment('video_count', count($videos)); // Estimation because we save API returned video count (and not Wordpress posts)
$channel->touch();
$channel->save();
return true;
}
/**
* Write log to file
* @param mixed $data
* @return bool
*/
public function log($data)
{
if (! $this->log) {
return null;
}
$time = date('Y-m-d H:i:s');
$message = is_string($data) ? $data : print_r($data, true);
return file_put_contents(YTC_PLUGIN_DIR . 'log.txt', sprintf('[%s] %s', $time, $message) . PHP_EOL, FILE_APPEND) !== false;
}
/**
* Supplement videos with extra parts
* @param array $videos
* @param array $parts
* @param array
**/
protected function supplement($videos, $parts)
{
// Remove already present videos form API call
$videos_filtered = array_filter($videos, function($video) {
return ! $this->video_exists($video);
});
// Return original array if supplementing is unneeded
if (empty($videos_filtered)) {
return $videos;
}
// Collect video IDs
$video_ids = array_map(function($video) {
return $video->snippet->resourceId->videoId;
}, $videos_filtered);
// Get videos with supplemented parts
$videos_with_supplemented_data = $this->youtube_videos(array(
'part' => implode(',', $parts),
'id' => implode(',', $video_ids)
));
return array_merge_recursive($videos, $videos_with_supplemented_data);
}
/**
* YouTube videos API call
* @link https://developers.google.com/youtube/v3/docs/videos/list
* @param array $parameters
* @return array Video collection matching search criteria
**/
protected function youtube_videos(array $parameters)
{
$defaults = array(
'maxResults' => self::MAX_RESULTS,
'userIp' => $_SERVER['REMOTE_ADDR']
);
$parameters = array_merge($defaults, $parameters);
$this->log('Fire API call: ' . http_build_query($parameters));
$response = $this->api->videos->listVideos(null, $parameters);
return $this->parse_response($response);
}
/**
* YouTube PlaylistItems API call
* @link https://developers.google.com/youtube/v3/docs/playlistItems/list
* @param array $parameters
* @return array Video collection matching search criteria
**/
protected function youtube_playlist_items(array $parameters)
{
if ( ! isset($parameters['playlistId'])) {
throw new Exeception('Required playlistId parameter is missing');
}
$parts = array(
'id',
'snippet',
'status'
);
$defaults = array(
'part' => implode(',', $parts),
'maxResults' => self::MAX_RESULTS,
'userIp' => $_SERVER['REMOTE_ADDR']
);
$parameters = array_merge($defaults, $parameters);
$this->log('Fire API call: ' . http_build_query($parameters));
$response = $this->api->playlistItems->listPlaylistItems(null, $parameters);
return $this->parse_response($response);
}
/**
* YouTube Search API call
* @link https://developers.google.com/youtube/v3/docs/search/list
* @param array $parameters
* @return array Video collection matching search criteria
**/
protected function youtube_search(array $parameters)
{
if ( ! isset($parameters['channelId'])) {
//throw new Exeception('Required channelId parameter is missing');
}
$parts = array(
'id',
'snippet',
);
$defaults = array(
'part' => implode(',', $parts),
'maxResults' => self::MAX_RESULTS,
'safeSearch' => 'none',
'order' => 'date',
'type' => 'video',
'videoEmbeddable' => 'true',
'videoSyndicated' => 'true',
'userIp' => $_SERVER['REMOTE_ADDR']
);
$parameters = array_merge($defaults, $parameters);
$this->log('Fire API call: ' . http_build_query($parameters));
$response = $this->api->search->listSearch(null, $parameters);
return $this->parse_response($response);
}
/**
* Get Youtube channel ID fomr URL
* @param string $url
* @param string $playlist uploads, favorites, likes etc.
* @return string|null
**/
public function youtube_playlist_id($url, $playlist)
{
$parameters = array();
//@todo use regex
if (strpos($url, 'youtube.com/user/') !== false) {
$username = explode('youtube.com/user/', $url)[1];
$parameters['forUsername'] = trim($username);
$this->log("Try to get playlist_id for {$username} with playlist {$playlist}");
}
if (empty($parameters)) {
return null;
}
$response = $this->api->channels->listChannels('contentDetails', $parameters);
$items = $this->parse_response($response);
return Arr::get($items, '0.contentDetails.relatedPlaylists.' . $playlist);
}
/**
* Parse Youtube API response
* @param Google_Collection $response
* @return array
**/
protected function parse_response(Google_Collection $response)
{
//@todo capture errors defined in https://developers.google.com/youtube/v3/docs/errors
$items = array();
foreach ($response->items as $item) {
$kind = str_replace('youtube#', '', $item->getKind());
$model = (array) $item->toSimpleObject();
$tests = array();
$results = array();
$passes = 0;
// Override tests
if (isset($this->criteria[$kind])) {
$tests = $this->criteria[$kind];
}
// When no criteria if found for resource. It passes successfully
if (empty($tests)) {
$results[] = "{$key} PASSED the test because {$kind} doesn't have any criteria";
}
// Loop through required criteria and count passes
foreach ($tests as $key => $condition) {
$value = Arr::get($model, $key);
if (is_null($value)) {
$results[] = "{$key} PASSED the '{$condition['compare']}' test because a value was missing or incomplete";
$passes++;
} else if($this->test($condition, $value, $item)) {
$results[] = "{$key} PASSED the '{$condition['compare']}' test with value --> {$value}";
$passes++;
} else {
$results[] = "{$key} FAILED the '{$condition['compare']}' test with value --> {$value}";
}
}
// Append to array if all passed successfully
if ($passes === count($tests)) {
$items[] = $item;
$this->log('A MATCH is found with the following passes ' . print_r($results, true));
}
}
return $items;
}
/**
* Execute critrium on $value
* @param array $condition
* @param mixed $value
* @param Google_Model $model
* @return bool
**/
protected function test($condition, $value, Google_Model $model = null)
{
if (! isset($condition['compare'])) {
throw new Exeception('Condition compare value is required');
}
if (! isset($condition['value'])) {
throw new Exeception('Condition value is required for comparison');
}
// Detection settings
$strict = isset($condition['strict']) ? $condition['strict'] : false;
$inverse = isset($condition['inverse']) ? $condition['inverse'] : false;
// Execute test
switch(strtolower($condition['compare'])) {
case '>=' :
$result = $value >= $condition['value'];
break;
case '>' :
$result = $value > $condition['value'];
break;
case '<=' :
$result = $value <= $condition['value'];
break;
case '<' :
$result = $value < $condition['value'];
break;
case '<>' :
case '!=' :
$result = $strict ? ($value !== $condition['value']) : ($value != $condition['value']);
break;
case '=' :
case '==' :
$result = $strict ? ($value === $condition['value']) : ($value == $condition['value']);
break;
case 'between' :
list($minimum, $maximum) = $condition['value'];
$result = ($value >= $minimum && $value <= $maximum);
break;
case 'in' :
$result = in_array($value, $condition['value'], $strict);
break;
case 'has' :
$result = Arr::has($value, $condition['value']);
break;
case 'startswith' :
$result = Str::startsWith($value, $condition['value']);
break;
case 'endswith' :
$result = Str::endsWith($value, $condition['value']);
break;
case 'is' :
case 'regex' :
$result = Str::is($condition['value'], $value);
break;
case 'contains' :
if (!$strict) {
// Case-insensitive string matching
$value = strtolower($value);
$values = array_map('strtolower', (array) $condition['value']);
$condition['value'] = is_string($condition['value']) ? $values[0] : $values;
}
if (empty($condition['value'])) {
$result = true;
break;
}
$result = Str::contains($value, $condition['value']);
break;
default:
throw new Exception('Unknown condition compare value');
}
return $inverse ? !$result : $result;
}
/**
* Save video as Wordpress post
* @param array|object $video
* @return string
**/
public function save($video)
{
// Walk through collection if it's an array
if (is_array($video)) {
return array_map(array($this, 'save'), $video);
}
// Skip already existing videos
if ($this->video_exists($video)) {
return self::VIDEO_STATUS_SKIPPED;
}
// Define post and meta data
$post = array();
$meta = array();
// Default post data
$post['post_title'] = $video->id;
$post['post_type'] = 'post';
$post['post_status'] = 'draft';
// Default meta data
$meta['ytc_video_id'] = $video->id;
// Extend with video snippet data
if (isset($video->snippet)) {
$post['post_title'] = $video->snippet->title;
$post['post_name'] = sanitize_title($video->snippet->title);
$post['post_content'] = $video->snippet->description;
$post['post_content'] .= "\n\nhttps://www.youtube.com/watch?v=" . $video->snippet->resourceId->videoId; // Append YouTube video URL
//$post['post_date'] => $video->snippet->publishedAt;
$meta['ytc_video_id'] = $video->snippet->resourceId->videoId;
$meta['ytc_published_at'] = $video->snippet->publishedAt;
$meta['ytc_channel_id'] = $video->snippet->channelId;
$meta['ytc_channel_title'] = $video->snippet->channelTitle;
}
// Extend with video statistics data
if (isset($video->statistics)) {
$meta['ytc_view_count'] = $video->statistics->viewCount;
$meta['ytc_like_count'] = $video->statistics->likeCount;
}
// Save post
$post_id = wp_insert_post($post);
// Failed because of unknown reason
if (empty($post_id)) {
return self::VIDEO_STATUS_FAILED;
}
// Save meta data
foreach ($meta as $key => $value) {
if (! is_null($value)) {
update_post_meta($post_id, $key, $value);
}
}
return self::VIDEO_STATUS_SUCCESS;
}
/**
* Check if video already exists in database
* @param Google_Model $video
* @return bool
**/
public function video_exists(Google_Model $video)
{
// Or check video ID presence in post description??
//$post_query = Capsule::table('posts')->where('post_content', 'like', '%' . $video->id . '%');
$postmeta_query = Capsule::table('postmeta')->where(array(
'meta_key' => 'ytc_video_id',
'meta_value' => $video->id
));
if (isset($video->snippet)) {
$postmeta_query->orWhere(array(
'meta_key' => 'ytc_video_id',
'meta_value' => $video->snippet->resourceId->videoId
));
}
return $postmeta_query->exists();
}
/**
* Setup requirements such as database and API connection
* @global $wpdb
* @return void
*/
public function setup()
{
global $wpdb;
// Database settings
$db = array(
'driver' => 'mysql',
'host' => DB_HOST,
'database' => DB_NAME,
'username' => DB_USER,
'password' => DB_PASSWORD,
'charset' => 'utf8',
'collation' => 'utf8_general_ci',
'prefix' => $wpdb->prefix,
);
// Fix for port within hostname
if (strpos($db['host'], ':') !== false) {
list($host, $port) = explode(':', $db['host']);
$db['host'] = $host;
$db['port'] = $port;
}
// Set timezone same as Wordpress
date_default_timezone_set(get_option('timezone_string', 'UTC'));
// Connect to database
$capsule = new Capsule;
$capsule->addConnection($db);
$capsule->setAsGlobal();
$capsule->bootEloquent();
// Setup YouTube API
if (! defined('YOUTUBE_API_KEY')) {
throw new Exception('YOUTUBE_API_KEY should be defined within wp-config.php or somewhere else');
}
$client = new Google_Client();
$client->setDeveloperKey(YOUTUBE_API_KEY);
// Circumvent SSL errors (non-secure but works)
if (empty($_SERVER['HTTPS']) || $_SERVER['HTTPS'] === 'off') {
$httpClient = new GuzzleHttp\Client([
'verify' => false, // otherwise HTTPS requests will fail.
]);
$client->setHttpClient($httpClient);
}
$this->api = new Google_Service_YouTube($client);
}
/**
* Plugin activation procedure
* @return void
**/
public static function plugin_activation()
{
Channel::initialize();
}
/**
* Plugin deactivation procedure
* @return void
**/
public static function plugin_deactivation()
{
//nothing needed here.
}
}
?>
@langemike
Copy link
Author

I just pasted this here to hopefully inspire others to continue this work :)
This class has more dependencies then are pasted in this gist. If somebody is interested in it, please let me know.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment