Ways to extract video IDs from YouTube shareable strings, using RegExp, in different languages. For training.
Last active
June 10, 2023 17:17
-
-
Save yuigoto/875d23c3f9f7b1f9624b2b57a2983d97 to your computer and use it in GitHub Desktop.
[Lab : Extracting YouTube video IDs] Extracting YouTube video ID from URLs, short URLs, iframe embed or object embed codes, in multiple languages. Will keep adding as I learn and try new languages. 😉
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Text.RegularExpressions; | |
/// <summary> | |
/// Sorry, I took this one from a sandbox project I've been using to learn. :P | |
/// </summary> | |
namespace Sandbox.Helpers | |
{ | |
/// <summary> | |
/// Contains helpers that extract information from strings related to | |
/// social networking services, specially video related. | |
/// </summary> | |
public static class SocialHelpers | |
{ | |
#region "Properties" | |
/// <summary> | |
/// Regular expression to use when matching for the video IDs. | |
/// </summary> | |
const string youTubePattern = @"(youtu\.be|youtube\.com)\/(watch\?(.*&)?v=|(embed|v)\/)?([^\?&""'>\r\n]+)"; | |
#endregion | |
#region "Methods" | |
/// <summary> | |
/// Extracts the video ID from one of YouTube's embed codes or URLs. | |
/// </summary> | |
/// <param name="urlOrEmbed">URL/embed code to check for</param> | |
/// <returns>String containing the YouTube video ID</returns> | |
public static string extractYouTubeVideoId(string urlOrEmbed) | |
{ | |
var regex = new Regex(youTubePattern); | |
MatchCollection matches = regex.Matches(urlOrEmbed); | |
if (matches.Count > 0) | |
{ | |
Match lastMatch = matches[matches.Count - 1]; | |
return lastMatch.Groups[lastMatch.Groups.Count - 1].ToString(); | |
} | |
return ""; | |
} | |
#endregion | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ( | |
"regexp" | |
) | |
// Extracts the video ID from a YouTube video URL, short URL, iframe embed or | |
// object embed code, then returns it. | |
// If the input value is invalid, returns an empty string. | |
func extractYouTubeVideoID(urlOrEmbed string) string { | |
regexA := "(youtu.be|youtube.com)/" | |
regexB := "(watch?(.*&?)?v=|(embed|v)/)?([^?&\"'>\r\n]+)" | |
regex, error := regexp.Compile(regexA + regexB) | |
// If something happened just return an empty string | |
if error != nil { | |
return "" | |
} | |
// Finds all submatches | |
found := regex.FindAllStringSubmatch(urlOrEmbed, -1) | |
// If the length of the submatches is 0 returns empty | |
if len(found) < 1 { | |
return "" | |
} | |
// Returns the last item (the video ID) | |
matches := found[len(found)-1] | |
return matches[len(matches)-1] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package src; | |
class Helper | |
{ | |
/** | |
Extracts the video ID from a YouTube video URL, short URL, iframe embed | |
or object code. | |
@param url_or_embed | |
@return | |
**/ | |
public static function extractYouTubeVideoId(url_or_embed:String):String { | |
var fragment_1 = "(youtu.be|youtube.com)/"; | |
var fragment_2 = "(watch?(.*&?)?v=|(embed|v)/)?([^?&\"'>\r\n]+)"; | |
var regex:EReg = new EReg(fragment_1 + fragment_2, "gm"); | |
var matched = regex.match(url_or_embed); | |
if (matched) return regex.matched(5); | |
return null; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package br.com.yuiti; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
/** | |
* Provides helper methods | |
*/ | |
public class YouTube { | |
/** | |
* Extracts the video ID from a YouTube URL, short URL, iframe embed or | |
* object embed code. | |
* | |
* @param urlOrEmbed | |
* YouTube video URL, short URL, iframe embed or object embed code | |
* @return | |
* YouTube video ID | |
*/ | |
public static String extractYouTubeVideoID(String urlOrEmbed) { | |
String ytPatternA = "(youtu\\.be|youtube\\.com)/"; | |
String ytPatternB = "(watch\\?(.*&)?v=|(embed|v)/)?([^?&\"'>\\r\\n]+)"; | |
Pattern p = Pattern.compile(ytPatternA + ytPatternB); | |
Matcher m = p.matcher(urlOrEmbed); | |
if (m.find()) return m.group(m.groupCount()); | |
return ""; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Extracts the video ID from a YouTube video URL, short URL, iframe embed | |
* or object code. | |
* | |
* @param {string} url_or_embed | |
* @returns {string|bool} | |
*/ | |
var extract_youtube_videoid = function(url_or_embed) { | |
var regex_fragment_a = "(youtu.be|youtube.com)/", | |
regex_fragment_b = "(watch?(.*&?)?v=|(embed|v)/)?([^?&\"'>\r\n]+)", | |
regex = new RegExp(regex_fragment_a + regex_fragment_b), | |
matches; | |
matches = regex.exec(url_or_embed.trim()); | |
if (matches !== null && matches !== "" && matches !== []) { | |
return matches[matches.length - 1]; | |
} | |
return false; | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nre; | |
proc extractYouTubeVideoId(url_or_embed: string): string = | |
## Receives a URL, short URL, iframe embed or object emebed code from | |
## a YouTube video and returns its ID, or `nil`, if invalid | |
var | |
regex_a, regexb: string | |
regexp: Regex | |
match: seq[string] | |
# I split this expression to avoid lines longer than 80 chars | |
regex_a = "(youtu.be|youtube.com)/" | |
regex_b = "(watch?(.*&?)?v=|(embed|v)/)?([^?&\"'>\r\n]+)" | |
regexp = re(regex_a & regex_b) | |
match = split(url_or_embed, regexp) | |
if match.len >= 5: | |
return match[5] | |
else: | |
return nil |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Extracts the video ID from a YouTube URL, Short URL, embed code | |
* or object embed code. | |
* | |
* @param {String} url_or_embed | |
* String containing a YouTube URL or embed code | |
* @return {String} | |
* Video ID or an empty string | |
*/ | |
String extract_youtube_videoid(String url_or_embed) { | |
String[] matches = match(url_or_embed,"(youtu\\.be|youtube\\.com)\\/(watch\\?(.*&)?v=|(embed|v)\\/)?([^\\?&\"'>\r\n]+)"); | |
if (matches != null) { | |
return matches[matches.length - 1]; | |
} | |
return ""; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Extracts the video ID from the following YouTube's shareable means: | |
* - Conventional URL; | |
* - Short URL; | |
* - Iframe embed code; | |
* - Object embed code; | |
* | |
* @param string $url_or_embed | |
* @return string | |
*/ | |
function extract_youtube_videoid($url_or_embed) | |
{ | |
$regex_frag_a = "(youtu\.be|youtube\.com)\/(watch\?(.*&)?v=|(embed|v)\/)"; | |
$regex_frag_b = "?([^\?&\"'>\r\n]+)"; | |
$regex_complete = "/{$regex_frag_a}{$regex_frag_b}/"; | |
$matches = array(); | |
preg_match($regex_complete, $url_or_embed, $matches); | |
// According to the final regex, the ID's index is 5 | |
if (isset($matches[5])) return $matches[5]; | |
return false; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
def extract_youtube_videoid(url_or_embed): | |
""" | |
Extracts the video ID from a YouTube URL, short URL, iframe embed code or | |
object embed code. | |
:param url_or_embed: | |
:return: | |
""" | |
regex = re.compile(r'(youtu\.be|youtube\.com)\/(watch\?(.*&)?v=|(embed|v)\/)?([^\?&"\'>\r\n]+)') | |
matches = regex.findall(url_or_embed) | |
if len(matches): | |
matches_list = matches[len(matches) - 1] | |
return list[len(matches_list) - 1] | |
else: | |
return False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Extracts the video ID from a YouTube URL, short URL, iframe | |
# embed code or object embed code. | |
# | |
# @param url_or_embed | |
# @return [String] | |
def extract_youtube_videoid(url_or_embed) | |
pattern_a = "(youtu\.be|youtube\.com)\/" | |
pattern_b = "(watch\?(.*&?)?v=|(embed|v)\/)?([^\?&\"'>\r\n]+)" | |
regex = Regexp.new(pattern_a + pattern_b) | |
matches = regex.match(url_or_embed) | |
return matches[matches.size - 1] unless matches.nil? | |
false | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace Helper { | |
/** | |
* Extracts the video ID from a YouTube video URL, short URL, iframe embed | |
* or object code. | |
* | |
* @param {string} url_or_embed | |
* @returns {string} | |
*/ | |
export function extractYouTubeVideoId(url_or_embed: string): string { | |
var regex_fragment_a = "(youtu\.be|youtube\.com)\/", | |
regex_fragment_b = "(watch\?(.*&?)?v=|(embed|v)\/)?([^\?&\"'>\r\n]+)", | |
regex = new RegExp( regex_fragment_a + regex_fragment_b ), | |
matches; | |
matches = regex.exec(url_or_embed.trim()); | |
if (matches !== null && matches !== '' && matches !== []) { | |
return matches[matches.length - 1]; | |
} | |
return null; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This file contains the strings used for testing, | |
# put them in an array, object, json, whatever. | |
# | |
# And yes, I'm a Radiohead fan. | |
https://youtu.be/DXP1KdZX4io | |
https://www.youtube.com/watch?v=TNRCvG9YtYI | |
<iframe width="560" height="315" src="https://www.youtube.com/embed/GoLJJRIWCLU" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe> | |
<object height="196" width="305" data="http://www.youtube.com/v/QBGaO89cBMI" type="application/x-shockwave-flash"><param name="wmode" value="transparent" /><param name="quality" value="hight" /><param name="src" value="http://www.youtube.com/v/QBGaO89cBMI" /></object> | |
2o3nvmrew |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment