Skip to content

Instantly share code, notes, and snippets.

@grayatrox
Created September 30, 2012 07:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save grayatrox/3806195 to your computer and use it in GitHub Desktop.
Save grayatrox/3806195 to your computer and use it in GitHub Desktop.
MovieInfo
;#include lib\movies.ahk
; Please take heed with this warning. This script modifies files, and data it gets from IMDB isn't always accurate.
; I have taken as many steps as I can to make sure you don't corrupt your filenames (loss of files is unlikely)
Msgbox, 16, Warning!, Warning!`nThis script not smart!`n`nIt WILL occasionally get it wrong!
; This script is designed to get the correct movie dates and titles.
; For this to work, each movie has to be in its own folder
; folder formating is movietitle[release date]
; it may possibly work with just the folder named as a movietitle
Gui, +OwnDialogs
OnMessage(0x53, "WM_HELP")
dest := "E:\Movies\" ; this also allows you to move a collection to a new directory
source := "E:\Movies\"
flags := ( FOF_NOCONFIRMMKDIR := 0x200 ) ; flags used to movie files via the windows api (not used in this script - it is a modification of another)
Loop, %source%*,1 ; count how many files we have to process so we can keep an accurate progressbar
count := A_index
Progress, r0-%count% y0,,Loading..., Processing files`n
movies := {}
Loop, %source%*,1
{
movie :={}
Progress, %a_index%,, Processing: %A_LoopFileName%
movie.folder := A_LoopFileName
movie.name := title := regExReplace(A_LoopFileName,"(.*?)\[(.*)","$1") ; get the movie title without the date
movie.date := date := regExReplace(A_LoopFileName,"(.*?)\[(.*?)\](.*?)","$2") ; get the date without the movie title
movie.path := A_LoopFileFullPath
if(getMovie(title,date)){ ; find the movie information from IMDB
title := sanitiseTitle(title) ; Convert the title found on IMDB into a "Windows safe" format
if (!InStr(title,movie.name)){ ; compare the local name against the one found in IMDB, and request to change it.
Msgbox,20772,Title Missmatch, % title " was downloaded from IMDB.`nUse " title " instead of " movie.name "?`n`nClick Help to open data source(opens a webpage)"
IfMsgBox Yes
{
changeMade := true
movie.name := title ; Change the local title to the one found on IMDB
}
}
if (movie.date != date) { ; compare the local date against the one found in IMDB, and request to change it.
Msgbox,20772,Date Missmatch, % "The date for " movie.name " was downloaded from IMDB.`nUse " date " instead of " movie.date "?`n`nClick Help to open data source(opens a webpage)"
IfMsgBox Yes
{
changeMade := true
movie.date := date
}
}
if (changeMade) { ; this stops every file from pointlessly renamed
path := movie.path
newPath := dest movie.name "[" movie.date "]"
FileMoveDir, %path%, %newPath%,
changes ++
changeMade := false
}
count := A_index
} else ; looks like getMovie() couldn't find anything on IMDB (this is unusual)
Msgbox % "Couldn't find any information about: " movie.name
}
Msgbox,4160,Process Complete!, Process Complete with %changes% changes
ExitApp
; Some of these functions are not used in this script, but I was lazy when I dropped them in from my includes file.
sanitiseTitle(str) {
if (inStr(str,"?"))
ret := regexReplace(str,"[\?]","") ;We don't want the question mark to take up any room
else
ret := regexReplace(str,"[\\|/|:|\*|""|<|>|\|]"," - ")
return % regExReplace(ret," "," ") ; Remove double spaces
}
WM_HELP(){
global SearchTitle
Run http://www.imdb.com/find?q=%SearchTitle%&s=tt
}
getMovie(byRef title, byRef date) {
; sets found movie title and date to corrisponding byRef Vars
; Title is also an input to find the movie.
; returns 1 when it finds data about the requested movie
; returns 0 when it cannot find any data about the movie
title := EncodeURL(title)
global SearchTitle := title
URLDownloadToFile, http://www.imdb.com/find?q=%title%&s=tt,find.htm
Loop, Read, find.htm
{
; let's see if imdb redirected straight to the movie page
if (RegExMatch(A_LoopReadLine,"<h1 class=""header"" itemprop=""name"">",str)) { ; find where the title is stored (usually line 603)
found := 1
title_index := a_index + 1
date_index := a_index + 5
continue
} else if (title_index == a_index && found) { ; the title is kept on the next line. Lets get it
title := A_LoopReadLine
continue
} else if (date_index == a_index && found){
regExMatch(A_LoopReadLine,"[0-9]{4,4}",date) ; get the date
continue
}
if (found) ; Ok, we found the date and title, we don't need to do anymore.
break
if (RegExMatch(A_LoopReadLine,"(.*?)<b>Popular Titles</b>(.*)",str) ; Now we look through the popular matches
|| RegExMatch(A_LoopReadLine,"(.*?)Titles \(Exact Matches\)(.*)",str) ; We still haven't found the tile and date. Let's see if there any exact matches
|| RegExMatch(A_LoopReadLine,"(.*?)Titles \(Approx Matches\)(.*)",str)) { ; We are getting desperate now. Let's check partial matches.
while strLen(str) > 0 {
title := RegExReplace(str, "(.*?)<br><a(.*?)>(.*)" , "$3")
regExMatch(title,"[0-9]{4,4}",date) ; get the date
title := RegExReplace(title, "[<].*")
if (regExMatch(title,"&#x22;")) {
index := regExMatch(str,"</a>")+4
str := subStr(str,index)
continue
} else {
found := 1
break
}
}
if (found) ; Ok, we found the title, we don't need to do anymore.
break
}
; if still not found, well...
; There are more maching data, but it isn't as accurate, so I will leave it up to you if you wish to include it.
}
fileDelete, find.htm
if (found) {
title := DecodeURL(html_decode(title))
return 1
} else {
date := ""
title := ""
return 0
}
}
HTML_Decode( string ) { ; --------------------------------------------------------------------------
; Function by [VxE]. Transforms HTML entities (like &#123, or &#xab,) into their actual characters.
; NOTE: unless you are using a unicode version of AHK, characters with a character code above 255
; will appear in UTF-8 encoding and will look like '™'. Named entity codes were ripped from here:
; http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
Static NamedEntities := "
( LTRIM JOIN
&quot,22&amp,26&apos,27&lt,3C&gt,3E&nbsp,A0&iexcl,A1&cent,A2&pound,A3&curren,A4&yen,A5
&brvbar,A6&sect,A7&uml,A8&copy,A9&ordf,AA&laquo,AB&not,AC&shy,AD&reg,AE&macr,AF&deg,B0
&plusmn,B1&sup2,B2&sup3,B3&acute,B4&micro,B5&para,B6&middot,B7&cedil,B8&sup1,B9&ordm,BA
&raquo,BB&frac14,BC&frac12,BD&frac34,BE&iquest,BF&Agrave,C0&Aacute,C1&Acirc,C2&Atilde,C3
&Auml,C4&Aring,C5&AElig,C6&Ccedil,C7&Egrave,C8&Eacute,C9&Ecirc,CA&Euml,CB&Igrave,CC
&Iacute,CD&Icirc,CE&Iuml,CF&ETH,D0&Ntilde,D1&Ograve,D2&Oacute,D3&Ocirc,D4&Otilde,D5&Ouml,D6
&times,D7&Oslash,D8&Ugrave,D9&Uacute,DA&Ucirc,DB&Uuml,DC&Yacute,DD&THORN,DE&szlig,DF
&agrave,E0&aacute,E1&acirc,E2&atilde,E3&auml,E4&aring,E5&aelig,E6&ccedil,E7&egrave,E8
&eacute,E9&ecirc,EA&euml,EB&igrave,EC&iacute,ED&icirc,EE&iuml,EF&eth,F0&ntilde,F1&ograve,F2
&oacute,F3&ocirc,F4&otilde,F5&ouml,F6&divide,F7&oslash,F8&ugrave,F9&uacute,FA&ucirc,FB
&uuml,FC&yacute,FD&thorn,FE&yuml,FF&OElig,152&oelig,153&Scaron,160&scaron,161&Yuml,178
&fnof,192&circ,2C6&tilde,2DC&Alpha,391&Beta,392&Gamma,393&Delta,394&Epsilon,395&Zeta,396
&Eta,397&Theta,398&Iota,399&Kappa,39A&Lambda,39B&Mu,39C&Nu,39D&Xi,39E&Omicron,39F&Pi,3A0
&Rho,3A1&Sigma,3A3&Tau,3A4&Upsilon,3A5&Phi,3A6&Chi,3A7&Psi,3A8&Omega,3A9&alpha,3B1&beta,3B2
&gamma,3B3&delta,3B4&epsilon,3B5&zeta,3B6&eta,3B7&theta,3B8&iota,3B9&kappa,3BA&lambda,3BB
&mu,3BC&nu,3BD&xi,3BE&omicron,3BF&pi,3C0&rho,3C1&sigmaf,3C2&sigma,3C3&tau,3C4&upsilon,3C5
&phi,3C6&chi,3C7&psi,3C8&omega,3C9&thetasym,3D1&upsih,3D2&piv,3D6&ensp,2002&emsp,2003
&thinsp,2009&zwnj,200C&zwj,200D&lrm,200E&rlm,200F&ndash,2013&mdash,2014&lsquo,2018
&rsquo,2019&sbquo,201A&ldquo,201C&rdquo,201D&bdquo,201E&dagger,2020&Dagger,2021&bull,2022
&hellip,2026&permil,2030&prime,2032&Prime,2033&lsaquo,2039&rsaquo,203A&oline,203E
&frasl,2044&euro,20AC&image,2111&weierp,2118&real,211C&trade,2122&alefsym,2135&larr,2190
&uarr,2191&rarr,2192&darr,2193&harr,2194&crarr,21B5&lArr,21D0&uArr,21D1&rArr,21D2&dArr,21D3
&hArr,21D4&forall,2200&part,2202&exist,2203&empty,2205&nabla,2207&isin,2208&notin,2209
&ni,220B&prod,220F&sum,2211&minus,2212&lowast,2217&radic,221A&prop,221D&infin,221E&ang,2220
&and,2227&or,2228&cap,2229&cup,222A&int,222B&there4,2234&sim,223C&cong,2245&asymp,2248
&ne,2260&equiv,2261&le,2264&ge,2265&sub,2282&sup,2283&nsub,2284&sube,2286&supe,2287
&oplus,2295&otimes,2297&perp,22A5&sdot,22C5&lceil,2308&rceil,2309&lfloor,230A&rfloor,230B
&lang,27E8&rang,27E9&loz,25CA&spades,2660&clubs,2663&hearts,2665&diams,2666&
)"
oel := ErrorLevel, oscs := A_StringCaseSense
StringCaseSense, On
Loop, Parse, string, & ; parse the string by ampersands, because they neatly delimit entities
If ( A_Index = 1 )
string := A_LoopField ; the part before the first ampersand can't contain an entity
Else
{
StringGetPos, pos, A_LoopField, % ";" ; find the ending ';' of the entity
If !( ErrorLevel )
{
StringLeft, entity, A_LoopField, pos
StringGetPos, np, NamedEntities, &%entity%, ; see if the entity is a named entity
If !( ErrorLevel )
{
; Use the static lookup list to convert the entity name to a number
np += 3 + StrLen( entity )
StringGetPos, cp, NamedEntities, &,, np
StringMid, entity, NamedEntities, np, 1 + cp - np
entity := "0x" entity
}
Else StringReplace, entity, entity, #, 0
If entity IS INTEGER ; convert the numeric entity to character(s)
{
If ( A_IsUnicode ) || ( entity < 0x100 ) ; normal character conversion
string .= Chr( entity )
Else If ( entity < 0x800 ) ; UTF-8 character conversion
string .= Chr( 0xC0 | ( entity >> 6 ) ) Chr( 0x80 | ( entity & 63 ) )
Else If ( entity < 0x10000 )
string .= Chr( 0xE0 | ( entity >> 12 ) ) Chr( 0x80 | ( ( entity >> 6 ) & 63 ) ) Chr( 0x80 | ( entity & 63 ) )
Else If ( entity < 0x110000 )
string .= Chr( 0xF0 | ( entity >> 18 ) ) Chr( 0x80 | ( ( entity >> 12 ) & 63 ) ) Chr( 0x80 | ( ( entity >> 6 ) & 63 ) ) Chr( 0x80 | ( entity & 63 ) )
Else string .= "&#" SubStr( entity, 2 ) ";" ; unknown character code ?
}
Else string .= "&" SubStr( A_LoopField, 1, pos + 1 ) ; unrecognized entity
string .= SubStr( A_LoopField, pos + 2 ) ; append the rest of the substring
}
Else string .= "&" A_LoopField ; so a lone '&' was found... just move along.
}
StringCaseSense, % oscs
Return string, ErrorLevel := oel
} ; HTML_Decode( string ) --------------------------------------------------------------------------
EncodeURL( p_data, p_reserved=true, p_encode=true )
{
old_FormatInteger := A_FormatInteger
SetFormat, Integer, hex
unsafe =
( Join LTrim
25000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F20
22233C3E5B5C5D5E607B7C7D7F808182838485868788898A8B8C8D8E8F9091929394
95969798999A9B9C9D9E9FA0A1A2A3A4A5A6A7A8A9AAABACADAEAFB0B1B2B3B4B5B6
B7B8B9BABBBCBDBEBFC0C1C2C3C4C5C6C7C8C9CACBCCCDCECFD0D1D2D3D4D5D6D7D8
D9DADBDCDDDEDF7EE0E1E2E3E4E5E6E7E8E9EAEBECEDEEEFF0F1F2F3F4F5F6F7F8F9
FAFBFCFDFEFF
)
if ( p_reserved )
unsafe = %unsafe%24262B2C2F3A3B3D3F40
if ( p_encode )
loop, % StrLen( unsafe )//2
{
StringMid, token, unsafe, A_Index*2-1, 2
StringReplace, p_data, p_data, % Chr( "0x" token ), `%%token%, all
}
else
loop, % StrLen( unsafe )//2
{
StringMid, token, unsafe, A_Index*2-1, 2
StringReplace, p_data, p_data, `%%token%, % Chr( "0x" token ), all
}
SetFormat, Integer, %old_FormatInteger%
return, p_data
}
DecodeURL( p_data )
{
return, EncodeURL( p_data, true, false )
}
ShellFileOperation( fileO=0x0, fSource="", fTarget="", flags=0x0, ghwnd=0x0 )
{
;dout_f(A_ThisFunc)
FO_MOVE := 0x1
FO_COPY := 0x2
FO_DELETE := 0x3
FO_RENAME := 0x4
FOF_MULTIDESTFILES := 0x1 ; Indicates that the to member specifies multiple destination files (one for each source file) rather than one directory where all source files are to be deposited.
FOF_SILENT := 0x4 ; Does not display a progress dialog box.
FOF_RENAMEONCOLLISION := 0x8 ; Gives the file being operated on a new name (such as "Copy #1 of...") in a move, copy, or rename operation if a file of the target name already exists.
FOF_NOCONFIRMATION := 0x10 ; Responds with "yes to all" for any dialog box that is displayed.
FOF_ALLOWUNDO := 0x40 ; Preserves undo information, if possible. With del, uses recycle bin.
FOF_FILESONLY := 0x80 ; Performs the operation only on files if a wildcard filename (*.*) is specified.
FOF_SIMPLEPROGRESS := 0x100 ; Displays a progress dialog box, but does not show the filenames.
FOF_NOCONFIRMMKDIR := 0x200 ; Does not confirm the creation of a new directory if the operation requires one to be created.
FOF_NOERRORUI := 0x400 ; don't put up error UI
FOF_NOCOPYSECURITYATTRIBS := 0x800 ; dont copy file security attributes
FOF_NORECURSION := 0x1000 ; Only operate in the specified directory. Don't operate recursively into subdirectories.
FOF_NO_CONNECTED_ELEMENTS := 0x2000 ; Do not move connected files as a group (e.g. html file together with images). Only move the specified files.
FOF_WANTNUKEWARNING := 0x4000 ; Send a warning if a file is being destroyed during a delete operation rather than recycled. This flag partially overrides FOF_NOCONFIRMATION.
; no more annoying numbers to deal with (but they should still work, if you really want them to)
fileO := %fileO% ? %fileO% : fileO
; the double ternary was too fun to pass up
_flags := 0
Loop Parse, flags, |
_flags |= %A_LoopField%
flags := _flags ? _flags : (%flags% ? %flags% : flags)
If ( SubStr(fSource,0) != "|" )
fSource := fSource . "|"
If ( SubStr(fTarget,0) != "|" )
fTarget := fTarget . "|"
char_size := A_IsUnicode ? 2 : 1
char_type := A_IsUnicode ? "UShort" : "Char"
fsPtr := &fSource
Loop % StrLen(fSource)
if NumGet(fSource, (A_Index-1)*char_size, char_type) = 124
NumPut(0, fSource, (A_Index-1)*char_size, char_type)
ftPtr := &fTarget
Loop % StrLen(fTarget)
if NumGet(fTarget, (A_Index-1)*char_size, char_type) = 124
NumPut(0, fTarget, (A_Index-1)*char_size, char_type)
VarSetCapacity( SHFILEOPSTRUCT, 60, 0 ) ; Encoding SHFILEOPSTRUCT
NextOffset := NumPut( ghwnd, &SHFILEOPSTRUCT ) ; hWnd of calling GUI
NextOffset := NumPut( fileO, NextOffset+0 ) ; File operation
NextOffset := NumPut( fsPtr, NextOffset+0 ) ; Source file / pattern
NextOffset := NumPut( ftPtr, NextOffset+0 ) ; Target file / folder
NextOffset := NumPut( flags, NextOffset+0, 0, "Short" ) ; options
code := DllCall( "Shell32\SHFileOperation" . (A_IsUnicode ? "W" : "A"), UInt,&SHFILEOPSTRUCT )
ErrorLevel := ShellFileOperation_InterpretReturn(code)
Return NumGet( NextOffset+0 )
}
ShellFileOperation_InterpretReturn(c)
{
static dict
if !dict
{
dict := Object()
dict[0x0] := ""
dict[0x71] := "DE_SAMEFILE - The source and destination files are the same file."
dict[0x72] := "DE_MANYSRC1DEST - Multiple file paths were specified in the source buffer, but only one destination file path."
dict[0x73] := "DE_DIFFDIR - Rename operation was specified but the destination path is a different directory. Use the move operation instead."
dict[0x74] := "DE_ROOTDIR - The source is a root directory, which cannot be moved or renamed."
dict[0x75] := "DE_OPCANCELLED - The operation was cancelled by the user, or silently cancelled if the appropriate flags were supplied to SHFileOperation."
dict[0x76] := "DE_DESTSUBTREE - The destination is a subtree of the source."
dict[0x78] := "DE_ACCESSDENIEDSRC - Security settings denied access to the source."
dict[0x79] := "DE_PATHTOODEEP - The source or destination path exceeded or would exceed MAX_PATH."
dict[0x7A] := "DE_MANYDEST - The operation involved multiple destination paths, which can fail in the case of a move operation."
dict[0x7C] := "DE_INVALIDFILES - The path in the source or destination or both was invalid."
dict[0x7D] := "DE_DESTSAMETREE - The source and destination have the same parent folder."
dict[0x7E] := "DE_FLDDESTISFILE - The destination path is an existing file."
dict[0x80] := "DE_FILEDESTISFLD - The destination path is an existing folder."
dict[0x81] := "DE_FILENAMETOOLONG - The name of the file exceeds MAX_PATH."
dict[0x82] := "DE_DEST_IS_CDROM - The destination is a read-only CD-ROM, possibly unformatted."
dict[0x83] := "DE_DEST_IS_DVD - The destination is a read-only DVD, possibly unformatted."
dict[0x84] := "DE_DEST_IS_CDRECORD - The destination is a writable CD-ROM, possibly unformatted."
dict[0x85] := "DE_FILE_TOO_LARGE - The file involved in the operation is too large for the destination media or file system."
dict[0x86] := "DE_SRC_IS_CDROM - The source is a read-only CD-ROM, possibly unformatted."
dict[0x87] := "DE_SRC_IS_DVD - The source is a read-only DVD, possibly unformatted."
dict[0x88] := "DE_SRC_IS_CDRECORD - The source is a writable CD-ROM, possibly unformatted."
dict[0xB7] := "DE_ERROR_MAX - MAX_PATH was exceeded during the operation."
dict[0x402] := "An unknown error occurred. This is typically due to an invalid path in the source or destination. This error does not occur on Windows Vista and later."
dict[0x10000] := "RRORONDEST - An unspecified error occurred on the destination."
dict[0x10074] := "E_ROOTDIR | ERRORONDEST - Destination is a root directory and cannot be renamed."
}
return dict[c] ? dict[c] : "Error code not recognized"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment