Skip to content

Instantly share code, notes, and snippets.

@tostka
Last active March 28, 2018 02:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tostka/e08edcf251632b996e6f1d6653ca514e to your computer and use it in GitHub Desktop.
Save tostka/e08edcf251632b996e6f1d6653ca514e to your computer and use it in GitHub Desktop.
Interactive Imdb title lookup function, uses xml (html) parsing. Returns and lists closest matches in menu, then returns details of selected choice
<# TRIMMED GIST, SEE FULL Get-IMDBSearch.ps1 SCRIPT FOR DETAILS#>
#*------v Function Get-IMDBSearch v------
Function Get-IMDBSearch {
<# TRIMMED #>
$qryUrlRoot = "http://google.com/search?q=site:imdb.com/title" ;
$url = "$($qryUrlRoot) $($Title.trim())" ;
$webPage = Invoke-WebRequest -Uri $url ;
$pageElems = $webPage.AllElements ;
$matchedhits=($pageElems | ?{$_.class -eq "g"}) | select innertext ;
if($matchedhits){
if($showDebug){write-verbose -verbose:$true "Processing $($matchedhits.count) matches:"} ;
$menu = [ordered]@{} ;
$menuentries=0 ;
$mnuItems = @() ;
foreach ($hit in $matchedhits){
$fields=$hit.innerText.split("`n") ;
switch ($fields.Count) {
"4" {
if($fields[3]){
$Summary = "$($fields[3].substring(0,[System.Math]::Min(50, $fields[3].Length)))..." ;
} else {
$Summary = "(missing 3rd line)" ;
} ;
}
"5" {
if($fields[3]){
$Summary = "$($fields[3].substring(0,[System.Math]::Min(50, $fields[3].Length)))..." ;
} else {
$Summary = "(no data)" ;
} ;
}
} ; # swtch-E
if($fields[1] -match $rgxImdbID ){
$imdbID=$matches[0] ;
} else {
write-warning "FAILED TO MATCH IMDBid FOR $(($fields | out-string).trim())`nSelection will not be openable without a valid IMDBid!";
$imdbID="-" ;
} ;
if( $fields[0].tostring().trim() -match "^.*\s-\s(Full\sCast\s&\sCrew|Photo\sGallery|Trivia)\s-\sIMDb$" ){
if($showDebug){Write-Host "Skipped: $($fields[0])..." ; } ;
} elseif($fields[0].tostring().trim() -match ".*\s-\s(Trivia|Full\sCast\s&\sCrew|Parents\sGuide|Awards|Plot\sSummary|Company\scredits|FAQ|Plot\skeywords|Photo\sGallery|Taglines|Filming\sLocations|Quotes|News|Synopsis|Soundtracks|External\sReviews|Crazy\sCredits|Connections|TV\sschedule|Release\sInfo|Technical\sSpecifications|Video\sGallery)\s-\sIMDb.*") {
if($showDebug){Write-Host "Skipped: $($fields[0])..." ; } ;
} else {
$props=[ordered]@{
'Title'=$($fields[0].tostring().trim()) ;
'imdbID'=$($imdbID) ;
'Summary'=$($Summary) ;
} ;
if(!($conflict = $mnuItems | ?{$_.imdbID -eq $props.imdbid} )){
$omnuEntry = New-Object PSObject -Property $props ;
$mnuItems += $omnuEntry
} else {
if($showDebug){write-verbose -verbose:$true "$($props.Title) ($($props.imdbID))`n$($props.Summary)`n dupes existing entry $(($conflict|out-string).trim())" ;} ;
} ;
} # if-E;
} # loop-E;
write-host -ForegroundColor Yellow "Query: '$($Title)'" ;
$mnuItem=0 ;
foreach ($mnu in $mnuItems){
$mnuItem++ ;
$menu.Add($($mnuItem),$($mnu.Title)) ;
# output 'visible' menu to console
write-host "$($mnuItem). $($mnu.Title),$($mnu.Summary),$($Mnu.imdbID)" ;
} ;
if($mnuItem -gt 1){
$mnuItem++ ;
$mnuExitText= "[Abort & Exit]" ;
$menu.Add($($mnuItem),$($mnuExitText)) ;
# output 'visible' menu to console
write-host "$($mnuItem). $($mnuExitText)" ;
[int]$choice = Read-Host 'Enter selection' ;
} else {
write-host "single-item menu, defaulting"
[int]$choice = 1 ;
} ;
$selection = $menu.Item($choice-1) ;
if($selection -eq $mnuExitText){
write-host -ForegroundColor green "Exiting..." ;
exit ;
} else {
$TImdbID = $mnuItems|?{$_.Title -eq $selection} | select -expand imdbID ;
} ;
if($host.version.major -ge 3){
$moviedata=[ordered]@{Dummy = $null ; } ;
} else {
$moviedata = New-Object Collections.Specialized.OrderedDictionary ;
} ;
If($moviedata.Contains("Dummy")){$moviedata.remove("Dummy")} ;
# Populate the $moviedata with fields, post creation (can't create [ordered] without members)
$hashfields="Type","MpaaRating","Genres","UsrRatingsStmt","UsrRatingsScore","UsrRatingsCount","RuntimeMinutes","Country","Language","Color","Title","Released","Director","Writers","Stars","Description","Storyline","PlotkeywordsKey","imdbID","imdbURL" ;
$hashfields |%{$moviedata.Add("$($_)",$($null)) ; } ;
# now load the target $TImdbID
$url = "http://www.imdb.com/title/$($TImdbID)" ;
write-host -foregroundcolor green "Opening selection: '$($selection)'`nimdbID:$($TImdbID) : $($url)..." ;
$webPage = Invoke-WebRequest -Uri $url ;
$pageElems = $webPage.AllElements ;
$SummaryLine=($pageElems | ?{$_.class -eq "subtext"})[0].innertext.split("|").trim() ;
$moviedata.Title = (($pageElems | ?{$_.itemprop -eq "name"})[0].innerHTML -split "&nbsp;")[0] ;
switch($SummaryLine.count){
"3" {
$moviedata.MPAARating = "-" ; # always a blank rating on a 3count
$moviedata.Genres = $($SummaryLine[1].Trim()) ;
$matches = $null;
If ($SummaryLine[2] -match "^TV\sSeries.*$") {
$moviedata.Type = "TV Series" ;
$matches = $null ;
if($summaryline[2] -match "^TV\sSeries\s\((\d{4}).*"){
try {
$moviedata.Released = get-date -Year $matches[1] -month 1 -Day 1 -Format "yyyy" ;
} catch {
$moviedata.Released = "-" ;
} ;
} else {
$moviedata.Released = "-";
} ;
} else {
$moviedata.Type = "Movie" ;
$matches = $null ;
if($summaryline[2] -match ".*(\d{1,2}\s\w*\s\d{4}).*" ) {
# lookabehind the (word) and get-date that captured string
try {
$SummaryLine[2] -match ".*(?=\s\(\w*\))" ;
$moviedata.Released = get-date $matches[0] -format "MM/dd/yyyy";
} catch {
$moviedata.Released = "-" ;
} ;
} elseif($summaryline[2] -match "(\d{4})\s\(\w*\)" ) {
# lookabehind the (word) and get-date that captured string
try {
#$moviedata.Released = get-date $matches[1] -format "MM/dd/yyyy";
$moviedata.Released = get-date -Year $matches[1] -month 1 -Day 1 -Format "yyyy" ;
} catch {
$moviedata.Released = "-" ;
} ;
} else { $moviedata.Released = "-"; } ;
} ;
}# swtch-3-E ;
"4" {
$moviedata.MPAARating = $SummaryLine[0].tostring().trim() ;
$moviedata.Genres = $($SummaryLine[2].Trim()) ;
$matches = $null;
If ($SummaryLine[3] -match "^TV\sSeries.*$") {
$moviedata.Type = "TV Series" ;
$matches = $null ;
if($summaryline[3] -match "^TV\sSeries\s\((\d{4}).*"){
try {
$moviedata.Released = get-date $matches[1] -format "MM/dd/yyyy";
} catch {
$moviedata.Released = "-" ;
} ;
} else {
$moviedata.Released = "-";
} ;
} else {
$moviedata.Type = "Movie" ;
$matches = $null ;
if($summaryline[3] -match ".*(\d{1,2}\s\w*\s\d{4}).*" ) {
#lookabehind the (word) and get-date that captured string
try {
$SummaryLine[3] -match ".*(?=\s\(\w*\))" ;
$moviedata.Released = get-date $matches[0] -format "MM/dd/yyyy";
} catch {
$moviedata.Released = "-" ;
} ;
} else { $moviedata.Released = "-"; } ;
} # if-E TV/Movie ;
} # swtch-4-E ;
} ;
if(!$moviedata.Released){$moviedata.Released = "-" ; } ;
# 12:22 PM 6/4/2017 ratingValue is optional, pre-test for presence
if(($pageElems | ?{$_.class -eq "ratingValue"})){
$moviedata.UsrRatingsStmt=($pageElems | ?{$_.class -eq "ratingValue"})[0].innerhtml.split('"')[1].tostring().trim() ;
$moviedata.UsrRatingsScore=($moviedata.UsrRatingsStmt -split("\sbased\son\s"))[0].tostring().trim() ;
$moviedata.UsrRatingsCount=($moviedata.UsrRatingsStmt -split("\sbased\son\s"))[1].tostring().replace(" user ratings","").trim() ;
}else{
$moviedata.UsrRatingsStmt="-" ;
$moviedata.UsrRatingsScore="-" ;
$moviedata.UsrRatingsCount="-" ;
} ;
# 1:47 PM 6/4/2017 films freq don't have writers|Dir|Stars
$crSum=($pageElems | ?{$_.class -eq "credit_summary_item"})| select innertext ;
if($Dir=($crSum|?{$_ -like '*Director:*'}).innerText){
$moviedata.Director= $Dir.tostring().replace("Director: ","").trim() ;
} else {
$moviedata.Director="-" ;
};
if($Writers=($crSum|?{$_ -like '*Writers:*'}).innerText){
# 2:08 PM 6/4/2017 split out : '| 1 more credit' »
if($Writers -match ".*\|.*"){
$moviedata.Writers= $Writers.tostring().split("|").trim()[0].replace("Writers: ","");
} else {
$moviedata.Writers= $Writers.tostring().trim().replace("Writers: ","");
} ;
if($Writers -match "Writers:\s.*\|\s\d{1,2}\smore\scredits.*"){ $moviedata.Writers+="..."} ;
} else {
$moviedata.Writers="-" ;
};
if($Stars=($crSum|?{$_ -like '*Stars:*'}).innerText){
if($stars -match ".*\|.*"){
$moviedata.Stars= $Stars.tostring().split("|").trim()[0].replace("Stars: ","") ;
} else {
$moviedata.Stars= $Stars.tostring().trim().replace("Stars: ","") ;
} ;
} else {
$moviedata.Stars="-" ;
};
# 6:15 PM 9/12/2017 2-step it, some come back with no summary_text
$TempResult = $null ;
$TempResult = $pageElems | ?{$_.class -eq "summary_text"} ;
if($TempResult){
$moviedata.Description = $TempResult[0].innertext.tostring().trim() ;
} ;
if(($moviedata.Description -match "^Add\sa\sPlot\s.*") -OR (!$moviedata.Description)){
$moviedata.Description = "-" ;
} ;
if(($pageElems | ?{$_.class -eq "inline canwrap"})){
$moviedata.Storyline = ($pageElems | ?{$_.class -eq "inline canwrap"})[0].innertext.tostring().trim() ;
} else {
$moviedata.Storyline = "-" ;
} ;
$TempResult = $null ;
$TempResult = $pageElems | ?{$_.class -eq "see-more inline canwrap"} ;
#if(($pageElems | ?{$_.class -eq "see-more inline canwrap"})[0].outerText -match "(Plot\sKeywords:\s.*\s)\|\sSee\sAll\s\(\d*\)\s.*" ){
if($TempResult){
if($TempResult[0].outerText -match "(Plot\sKeywords:\s.*\s)\|\sSee\sAll\s\(\d*\)\s.*" ){
$moviedata.PlotkeywordsKey = $matches[1].tostring().trim() ;
} ;
} ;
if(!$moviedata.PlotkeywordsKey){
$moviedata.PlotkeywordsKey = "-" ;
} ;
$TempResult = $null ;
$TempResult = $pageElems | ?{$_.id -eq "titleDetails"} ;
if($TempResult){
if(($TempResult)[0].innertext -match ".*(Country:\s.*)"){
# also replace out the pipe with comma
$moviedata.Country=($matches[1] -replace "\s\|\s","," -replace("Country: ","")).tostring().trim();
} ;
} ;
if(!$moviedata.Country){ $moviedata.Country="-" } ;
$tempResult = $pageElems | ?{$_.id -eq "titleDetails"} ;
if($tempREsult){
if(($pageElems | ?{$_.id -eq "titleDetails"})[0].innertext -match ".*(Language:\s.*)" ){
# 8:07 PM 6/4/2017 replace pipe->comma
$moviedata.Language=$matches[1].tostring().replace("Language: ","").trim() -replace "\s\|\s",",";
} ;
} ;
if(!$moviedata.Language){ $moviedata.Language="-" } ;
$tempResult= $pageElems | ?{$_.id -eq "titleDetails"} ;
if($tempResult){
if(($tempResult)[0].innertext -match ".*(Color:.*)" ){
# 9:58 PM 6/6/2017 sub-out pipe for comma
$moviedata.Color=$matches[0].tostring().replace("Color: ","").trim() -replace("\|\s",",");
} ;
} ;
if(!$moviedata.Color){ $moviedata.Color = "-" } ;
$moviedata.imdbID=$($TImdbID) ;
$moviedata.imdbURL=$($URL) ;
if($duration=($pageElems | ?{$_.itemprop -eq "duration"})){
$matches=$null ;
switch -regex ($duration[-1].innertext) {
"(\d{1,2}h\s\d{1,2}min)" {
if($duration[-1].innertext -match "(\d{1,2}h\s\d{1,2}min)"){
$timestamp = $matches[0].tostring().trim().replace(" ","") ;
$moviedata.RuntimeMinutes = "$([int]($timestamp.split('h')[0])*60 +[int]($timestamp.split('h')[1]).replace('min',''))min" ;
} ;
} ;
"(\d{1,3}\smin)" {
# 1h 27min
if($duration[-1].innertext -match "(\d{1,3}\smin)" ){
$moviedata.RuntimeMinutes = $matches[0].tostring().trim().replace(" ","") ; ;
} ;
} ;
default{ $moviedata.RuntimeMinutes = "-" } ;
} ;
} else { $moviedata.RuntimeMinutes = "-" } ;
# dump hash into pipeline (formatting should be handled on receiving end, we just do source data in this func :D)
$moviedata | write-output ;
} else {
write-host "No matches on qry:$($Title)" ;
} ; #if-E matchedhits ;
} #*------^ END Function Get-IMDBSearch ^------;
<# TRIMMED #>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment