Skip to content

Instantly share code, notes, and snippets.

@Uberi
Created July 23, 2012 18:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Uberi/3165409 to your computer and use it in GitHub Desktop.
Save Uberi/3165409 to your computer and use it in GitHub Desktop.
Google Cache Downloader
Search := "site:autohotkey.net/~Uberi filetype:ahk"
OutputPath := A_ScriptDir . "\CacheFiles"
Search := "http://www.google.com/search?q=" . URLEncode(Search)
Loop
{
SearchResult := Get(Search)
FoundPos := 1
While, FoundPos := RegExMatch(SearchResult,"S)<a href=""([^""]*)""[^>]*>Cached</a>",Match,FoundPos)
{
Random, Delay, 1000, 10000
Sleep, %Delay%
URL := "http:" . ConvertEntities(Match1) . "&strip=1"
Page := Get(URL)
If RegExMatch(Page,"isS)cache of <a href=""([^""]*)"".*<pre>(.*)</pre>",Field)
{
Field1 := ConvertEntities(Field1)
Field2 := ConvertEntities(Field2)
SplitPath, Field1, FileName, FilePath,,, FileDomain
FilePath := URLDecode(OutputPath . SubStr(FilePath,StrLen(FileDomain) + 1))
FileName := URLDecode(FileName)
StringReplace, FilePath, FilePath, /, \, All
FileCreateDir, %FilePath%
FileAppend, %Field2%, %FilePath%\%FileName%
}
FoundPos += StrLen(Match)
}
If !RegExMatch(SearchResult,"S)<a href=""([^""]*)""[^>]*><span[^>]*>[^<]*</span><span[^>]*>Next</span></a>",Match)
Break
Search := "http://google.com" . ConvertEntities(Match1)
}
Get(URL)
{
WebRequest := ComObjCreate("WinHttp.WinHttpRequest.5.1")
WebRequest.Open("GET",URL)
WebRequest.SetRequestHeader("Content-Type","application/x-www-form-urlencoded")
WebRequest.SetRequestHeader("User-Agent","CacheRetriever/1.0")
WebRequest.Send()
Return, WebRequest.ResponseText
}
URLEncode(Text)
{
StringReplace, Text, Text, `%, `%25, All
FormatInteger := A_FormatInteger, FoundPos := 0
SetFormat, IntegerFast, Hex
While, FoundPos := RegExMatch(Text,"S)[^\w-\.~%]",Char,FoundPos + 1)
StringReplace, Text, Text, %Char%, % "%" . SubStr("0" . SubStr(Asc(Char),3),-1), All
SetFormat, IntegerFast, %FormatInteger%
Return, Text
}
URLDecode(Encoded)
{
;StringReplace, Encoded, Encoded, +, %A_Space%, All
FoundPos := 0
While, FoundPos := InStr(Encoded,"%",False,FoundPos + 1)
{
If (Temp1 := SubStr(Encoded,FoundPos + 1,2)) != "25"
StringReplace, Encoded, Encoded, `%%Temp1%, % Chr("0x" . Temp1), All
}
StringReplace, Encoded, Encoded, `%25, `%, All
Return, Encoded
}
ConvertEntities(HTML)
{
static EntityList := "|quot=34|apos=39|amp=38|lt=60|gt=62|nbsp=160|iexcl=161|cent=162|pound=163|curren=164|yen=165|brvbar=166|sect=167|uml=168|copy=169|ordf=170|laquo=171|not=172|shy=173|reg=174|macr=175|deg=176|plusmn=177|sup2=178|sup3=179|acute=180|micro=181|para=182|middot=183|cedil=184|sup1=185|ordm=186|raquo=187|frac14=188|frac12=189|frac34=190|iquest=191|Agrave=192|Aacute=193|Acirc=194|Atilde=195|Auml=196|Aring=197|AElig=198|Ccedil=199|Egrave=200|Eacute=201|Ecirc=202|Euml=203|Igrave=204|Iacute=205|Icirc=206|Iuml=207|ETH=208|Ntilde=209|Ograve=210|Oacute=211|Ocirc=212|Otilde=213|Ouml=214|times=215|Oslash=216|Ugrave=217|Uacute=218|Ucirc=219|Uuml=220|Yacute=221|THORN=222|szlig=223|agrave=224|aacute=225|acirc=226|atilde=227|auml=228|aring=229|aelig=230|ccedil=231|egrave=232|eacute=233|ecirc=234|euml=235|igrave=236|iacute=237|icirc=238|iuml=239|eth=240|ntilde=241|ograve=242|oacute=243|ocirc=244|otilde=245|ouml=246|divide=247|oslash=248|ugrave=249|uacute=250|ucirc=251|uuml=252|yacute=253|thorn=254|yuml=255|OElig=338|oelig=339|Scaron=352|scaron=353|Yuml=376|circ=710|tilde=732|ensp=8194|emsp=8195|thinsp=8201|zwnj=8204|zwj=8205|lrm=8206|rlm=8207|ndash=8211|mdash=8212|lsquo=8216|rsquo=8217|sbquo=8218|ldquo=8220|rdquo=8221|bdquo=8222|dagger=8224|Dagger=8225|hellip=8230|permil=8240|lsaquo=8249|rsaquo=8250|euro=8364|trade=8482|"
FoundPos := 1
While, FoundPos := InStr(HTML,"&",1,FoundPos)
{
FoundPos ++ ;move past the ampersand
Entity := SubStr(HTML,FoundPos,InStr(HTML,";",1,FoundPos) - FoundPos) ;retrieve the entity body
If SubStr(Entity,1,1) = "#" ;numeric entity code
EntityCode := SubStr(Entity,2)
Else ;named entity
{
Temp1 := InStr(EntityList,"|" . Entity . "=") + StrLen(Entity) + 2
EntityCode := SubStr(EntityList,Temp1,InStr(EntityList,"|",1,Temp1) - Temp1)
}
If (Entity != "amp") ;convert every entity except the ampersand
StringReplace, HTML, HTML, &%Entity%;, % Chr(EntityCode), All ;convert the entity
}
StringReplace, HTML, HTML, &amp;, &, All ;convert the ampersand entity
Return, HTML
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment