Interactive Imdb title lookup function, uses xml (html) parsing. Returns and lists closest matches in menu, then returns details of selected choice
#*------v Function Get-IMDBSearch v------
Function Get-IMDBSearch {
$qryUrlRoot = "" ;
$url = "$($qryUrlRoot) $($Title.trim())" ;
$webPage = Invoke-WebRequest -Uri $url ;
$pageElems = $webPage.AllElements ;
$matchedhits=($pageElems | ?{$_.class -eq "g"}) | select innertext ;
if($showDebug){write-verbose -verbose:$true "Processing $($matchedhits.count) matches:"} ;
$menu = [ordered]@{} ;
$menuentries=0 ;
$mnuItems = @() ;
foreach ($hit in $matchedhits){
$fields=$hit.innerText.split("`n") ;
switch ($fields.Count) {
"4" {
$Summary = "$($fields[3].substring(0,[System.Math]::Min(50, $fields[3].Length)))..." ;
} else {
$Summary = "(missing 3rd line)" ;
} ;
"5" {
$Summary = "$($fields[3].substring(0,[System.Math]::Min(50, $fields[3].Length)))..." ;
} else {
$Summary = "(no data)" ;
} ;
} ; # swtch-E
if($fields[1] -match $rgxImdbID ){
$imdbID=$matches[0] ;
} else {
write-warning "FAILED TO MATCH IMDBid FOR $(($fields | out-string).trim())`nSelection will not be openable without a valid IMDBid!";
$imdbID="-" ;
} ;
if( $fields[0].tostring().trim() -match "^.*\s-\s(Full\sCast\s&\sCrew|Photo\sGallery|Trivia)\s-\sIMDb$" ){
if($showDebug){Write-Host "Skipped: $($fields[0])..." ; } ;
} elseif($fields[0].tostring().trim() -match ".*\s-\s(Trivia|Full\sCast\s&\sCrew|Parents\sGuide|Awards|Plot\sSummary|Company\scredits|FAQ|Plot\skeywords|Photo\sGallery|Taglines|Filming\sLocations|Quotes|News|Synopsis|Soundtracks|External\sReviews|Crazy\sCredits|Connections|TV\sschedule|Release\sInfo|Technical\sSpecifications|Video\sGallery)\s-\sIMDb.*") {
if($showDebug){Write-Host "Skipped: $($fields[0])..." ; } ;
} else {
'Title'=$($fields[0].tostring().trim()) ;
'imdbID'=$($imdbID) ;
'Summary'=$($Summary) ;
} ;
if(!($conflict = $mnuItems | ?{$_.imdbID -eq $props.imdbid} )){
$omnuEntry = New-Object PSObject -Property $props ;
$mnuItems += $omnuEntry
} else {
if($showDebug){write-verbose -verbose:$true "$($props.Title) ($($props.imdbID))`n$($props.Summary)`n dupes existing entry $(($conflict|out-string).trim())" ;} ;
} ;
} # if-E;
} # loop-E;
write-host -ForegroundColor Yellow "Query: '$($Title)'" ;
$mnuItem=0 ;
foreach ($mnu in $mnuItems){
$mnuItem++ ;
$menu.Add($($mnuItem),$($mnu.Title)) ;
# output 'visible' menu to console
write-host "$($mnuItem). $($mnu.Title),$($mnu.Summary),$($Mnu.imdbID)" ;
} ;
if($mnuItem -gt 1){
$mnuItem++ ;
$mnuExitText= "[Abort & Exit]" ;
$menu.Add($($mnuItem),$($mnuExitText)) ;
# output 'visible' menu to console
write-host "$($mnuItem). $($mnuExitText)" ;
[int]$choice = Read-Host 'Enter selection' ;
} else {
write-host "single-item menu, defaulting"
[int]$choice = 1 ;
} ;
$selection = $menu.Item($choice-1) ;
if($selection -eq $mnuExitText){
write-host -ForegroundColor green "Exiting..." ;
exit ;
} else {
$TImdbID = $mnuItems|?{$_.Title -eq $selection} | select -expand imdbID ;
} ;
if($host.version.major -ge 3){
$moviedata=[ordered]@{Dummy = $null ; } ;
} else {
$moviedata = New-Object Collections.Specialized.OrderedDictionary ;
} ;
If($moviedata.Contains("Dummy")){$moviedata.remove("Dummy")} ;
# Populate the $moviedata with fields, post creation (can't create [ordered] without members)
$hashfields="Type","MpaaRating","Genres","UsrRatingsStmt","UsrRatingsScore","UsrRatingsCount","RuntimeMinutes","Country","Language","Color","Title","Released","Director","Writers","Stars","Description","Storyline","PlotkeywordsKey","imdbID","imdbURL" ;
$hashfields |%{$moviedata.Add("$($_)",$($null)) ; } ;
# now load the target $TImdbID
$url = "$($TImdbID)" ;
write-host -foregroundcolor green "Opening selection: '$($selection)'`nimdbID:$($TImdbID) : $($url)..." ;
$webPage = Invoke-WebRequest -Uri $url ;
$pageElems = $webPage.AllElements ;
$SummaryLine=($pageElems | ?{$_.class -eq "subtext"})[0].innertext.split("|").trim() ;
$moviedata.Title = (($pageElems | ?{$_.itemprop -eq "name"})[0].innerHTML -split "&nbsp;")[0] ;
"3" {
$moviedata.MPAARating = "-" ; # always a blank rating on a 3count
$moviedata.Genres = $($SummaryLine[1].Trim()) ;
$matches = $null;
If ($SummaryLine[2] -match "^TV\sSeries.*$") {
$moviedata.Type = "TV Series" ;
$matches = $null ;
if($summaryline[2] -match "^TV\sSeries\s\((\d{4}).*"){
try {
$moviedata.Released = get-date -Year $matches[1] -month 1 -Day 1 -Format "yyyy" ;
} catch {
$moviedata.Released = "-" ;
} ;
} else {
$moviedata.Released = "-";
} ;
} else {
$moviedata.Type = "Movie" ;
$matches = $null ;
if($summaryline[2] -match ".*(\d{1,2}\s\w*\s\d{4}).*" ) {
# lookabehind the (word) and get-date that captured string
try {
$SummaryLine[2] -match ".*(?=\s\(\w*\))" ;
$moviedata.Released = get-date $matches[0] -format "MM/dd/yyyy";
} catch {
$moviedata.Released = "-" ;
} ;
} elseif($summaryline[2] -match "(\d{4})\s\(\w*\)" ) {
# lookabehind the (word) and get-date that captured string
try {
#$moviedata.Released = get-date $matches[1] -format "MM/dd/yyyy";
$moviedata.Released = get-date -Year $matches[1] -month 1 -Day 1 -Format "yyyy" ;
} catch {
$moviedata.Released = "-" ;
} ;
} else { $moviedata.Released = "-"; } ;
} ;
}# swtch-3-E ;
"4" {
$moviedata.MPAARating = $SummaryLine[0].tostring().trim() ;
$moviedata.Genres = $($SummaryLine[2].Trim()) ;
$matches = $null;
If ($SummaryLine[3] -match "^TV\sSeries.*$") {
$moviedata.Type = "TV Series" ;
$matches = $null ;
if($summaryline[3] -match "^TV\sSeries\s\((\d{4}).*"){
try {
$moviedata.Released = get-date $matches[1] -format "MM/dd/yyyy";
} catch {
$moviedata.Released = "-" ;
} ;
} else {
$moviedata.Released = "-";
} ;
} else {
$moviedata.Type = "Movie" ;
$matches = $null ;
if($summaryline[3] -match ".*(\d{1,2}\s\w*\s\d{4}).*" ) {
#lookabehind the (word) and get-date that captured string
try {
$SummaryLine[3] -match ".*(?=\s\(\w*\))" ;
$moviedata.Released = get-date $matches[0] -format "MM/dd/yyyy";
} catch {
$moviedata.Released = "-" ;
} ;
} else { $moviedata.Released = "-"; } ;
} # if-E TV/Movie ;
} # swtch-4-E ;
} ;
if(!$moviedata.Released){$moviedata.Released = "-" ; } ;
# 12:22 PM 6/4/2017 ratingValue is optional, pre-test for presence
if(($pageElems | ?{$_.class -eq "ratingValue"})){
$moviedata.UsrRatingsStmt=($pageElems | ?{$_.class -eq "ratingValue"})[0].innerhtml.split('"')[1].tostring().trim() ;
$moviedata.UsrRatingsScore=($moviedata.UsrRatingsStmt -split("\sbased\son\s"))[0].tostring().trim() ;
$moviedata.UsrRatingsCount=($moviedata.UsrRatingsStmt -split("\sbased\son\s"))[1].tostring().replace(" user ratings","").trim() ;
$moviedata.UsrRatingsStmt="-" ;
$moviedata.UsrRatingsScore="-" ;
$moviedata.UsrRatingsCount="-" ;
} ;
# 1:47 PM 6/4/2017 films freq don't have writers|Dir|Stars
$crSum=($pageElems | ?{$_.class -eq "credit_summary_item"})| select innertext ;
if($Dir=($crSum|?{$_ -like '*Director:*'}).innerText){
$moviedata.Director= $Dir.tostring().replace("Director: ","").trim() ;
} else {
$moviedata.Director="-" ;
if($Writers=($crSum|?{$_ -like '*Writers:*'}).innerText){
# 2:08 PM 6/4/2017 split out : '| 1 more credit' »
if($Writers -match ".*\|.*"){
$moviedata.Writers= $Writers.tostring().split("|").trim()[0].replace("Writers: ","");
} else {
$moviedata.Writers= $Writers.tostring().trim().replace("Writers: ","");
} ;
if($Writers -match "Writers:\s.*\|\s\d{1,2}\smore\scredits.*"){ $moviedata.Writers+="..."} ;
} else {
$moviedata.Writers="-" ;
if($Stars=($crSum|?{$_ -like '*Stars:*'}).innerText){
if($stars -match ".*\|.*"){
$moviedata.Stars= $Stars.tostring().split("|").trim()[0].replace("Stars: ","") ;
} else {
$moviedata.Stars= $Stars.tostring().trim().replace("Stars: ","") ;
} ;
} else {
$moviedata.Stars="-" ;
# 6:15 PM 9/12/2017 2-step it, some come back with no summary_text
$TempResult = $null ;
$TempResult = $pageElems | ?{$_.class -eq "summary_text"} ;
$moviedata.Description = $TempResult[0].innertext.tostring().trim() ;
} ;
if(($moviedata.Description -match "^Add\sa\sPlot\s.*") -OR (!$moviedata.Description)){
$moviedata.Description = "-" ;
} ;
if(($pageElems | ?{$_.class -eq "inline canwrap"})){
$moviedata.Storyline = ($pageElems | ?{$_.class -eq "inline canwrap"})[0].innertext.tostring().trim() ;
} else {
$moviedata.Storyline = "-" ;
} ;
$TempResult = $null ;
$TempResult = $pageElems | ?{$_.class -eq "see-more inline canwrap"} ;
#if(($pageElems | ?{$_.class -eq "see-more inline canwrap"})[0].outerText -match "(Plot\sKeywords:\s.*\s)\|\sSee\sAll\s\(\d*\)\s.*" ){
if($TempResult[0].outerText -match "(Plot\sKeywords:\s.*\s)\|\sSee\sAll\s\(\d*\)\s.*" ){
$moviedata.PlotkeywordsKey = $matches[1].tostring().trim() ;
} ;
} ;
$moviedata.PlotkeywordsKey = "-" ;
} ;
$TempResult = $null ;
$TempResult = $pageElems | ?{$ -eq "titleDetails"} ;
if(($TempResult)[0].innertext -match ".*(Country:\s.*)"){
# also replace out the pipe with comma
$moviedata.Country=($matches[1] -replace "\s\|\s","," -replace("Country: ","")).tostring().trim();
} ;
} ;
if(!$moviedata.Country){ $moviedata.Country="-" } ;
$tempResult = $pageElems | ?{$ -eq "titleDetails"} ;
if(($pageElems | ?{$ -eq "titleDetails"})[0].innertext -match ".*(Language:\s.*)" ){
# 8:07 PM 6/4/2017 replace pipe->comma
$moviedata.Language=$matches[1].tostring().replace("Language: ","").trim() -replace "\s\|\s",",";
} ;
} ;
if(!$moviedata.Language){ $moviedata.Language="-" } ;
$tempResult= $pageElems | ?{$ -eq "titleDetails"} ;
if(($tempResult)[0].innertext -match ".*(Color:.*)" ){
# 9:58 PM 6/6/2017 sub-out pipe for comma
$moviedata.Color=$matches[0].tostring().replace("Color: ","").trim() -replace("\|\s",",");
} ;
} ;
if(!$moviedata.Color){ $moviedata.Color = "-" } ;
$moviedata.imdbID=$($TImdbID) ;
$moviedata.imdbURL=$($URL) ;
if($duration=($pageElems | ?{$_.itemprop -eq "duration"})){
$matches=$null ;
switch -regex ($duration[-1].innertext) {
"(\d{1,2}h\s\d{1,2}min)" {
if($duration[-1].innertext -match "(\d{1,2}h\s\d{1,2}min)"){
$timestamp = $matches[0].tostring().trim().replace(" ","") ;
$moviedata.RuntimeMinutes = "$([int]($timestamp.split('h')[0])*60 +[int]($timestamp.split('h')[1]).replace('min',''))min" ;
} ;
} ;
"(\d{1,3}\smin)" {
# 1h 27min
if($duration[-1].innertext -match "(\d{1,3}\smin)" ){
$moviedata.RuntimeMinutes = $matches[0].tostring().trim().replace(" ","") ; ;
} ;
} ;
default{ $moviedata.RuntimeMinutes = "-" } ;
} ;
} else { $moviedata.RuntimeMinutes = "-" } ;
# dump hash into pipeline (formatting should be handled on receiving end, we just do source data in this func :D)
$moviedata | write-output ;
} else {
write-host "No matches on qry:$($Title)" ;
} ; #if-E matchedhits ;
} #*------^ END Function Get-IMDBSearch ^------;
