Created
November 30, 2022 02:28
-
-
Save westoncampbell/67b89e2206c02e77182c2d1a0b9a8989 to your computer and use it in GitHub Desktop.
Scrape all submitted carts from the TIC-80 website
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; Script Information =========================================================== | |
; Name: TIC-80 Cart Scraper | |
; Description: Scrape all submitted carts from the TIC-80 website | |
; AHK Version: 1.1.35.00 (Unicode 32-bit) | |
; OS Version: Windows 10 | |
; Language: English (United States) | |
; Author: IAmTheDewd | |
; Filename: TIC-80 Cart Scraper.ahk | |
; ============================================================================== | |
; Auto-Execute ================================================================= | |
#SingleInstance, Force ; Allow only one running instance of script | |
#Persistent ; Keep the script permanently running until terminated | |
#NoEnv ; Avoid checking empty variables for environment variables | |
SetWorkingDir, % A_ScriptDir ; Set the working directory of the script | |
SetBatchLines, -1 ; The speed at which the lines of the script are executed | |
Menu, Tray, Icon, shell32.dll, 3 | |
Menu, Tray, Tip, TIC-80 Cart Scraper | |
FileAppend, % "#,Category,Title,Description,Author,Uploaded,Added,Updated,Cartridge,Cover,URL,Filename`n", TIC-80.csv | |
Counter := 1 | |
Loop, 4000 { | |
HttpReq := ComObjCreate("WinHttp.WinHttpRequest.5.1") | |
HttpReq.Open("GET", "https://tic80.com/play?cart=" A_Index, false) | |
HttpReq.Send() | |
HttpReq.WaitForResponse() | |
HttpReq.ResponseText := StrReplace(HttpReq.ResponseText, " ", " ") | |
If (HttpReq.ResponseText = "<p>404. Not Found!</p>") { | |
Continue | |
} | |
RegExMatch(HttpReq.ResponseText, "s)<a\shref=""\/play\?cat=\d+"">(.*?)<\/a>", MatchCategory) | |
RegExMatch(HttpReq.ResponseText, "s)<title>Play\s(.*?)\s+\/\sTIC-80<\/title>", MatchTitle) | |
MatchTitle1 := StrReplace(StrReplace(StrReplace(StrReplace(StrReplace(MatchTitle1, "'", "'"), """, """"), "<", "<"), ">", ">"), "&", "&") | |
MatchTitle1 := StrReplace(MatchTitle1, "`t", " ") | |
MatchTitle1 := RegExReplace(MatchTitle1, "\S\s\K\s+(?=\S)") | |
MatchTitle1 := RegExReplace(MatchTitle1, "^\s+|\s+$") | |
RegExMatch(HttpReq.ResponseText, "s)<meta\sname=""description""\scontent=""(.*?)"">", MatchDescription) | |
MatchDescription1 := StrReplace(StrReplace(StrReplace(StrReplace(MatchDescription1, "'", "'"), """, """"), "<", "<"), ">", ">") | |
MatchDescription1 := StrReplace(MatchDescription1, "`t", " ") | |
MatchDescription1 := RegExReplace(MatchDescription1, "\S\s\K\s+(?=\S)") | |
MatchDescription1 := RegExReplace(MatchDescription1, "\S\s\K\s+(?=\S)") | |
MatchDescription1 := RegExReplace(MatchDescription1, "^\s+|\s+$") | |
RegExMatch(HttpReq.ResponseText, "s)<div>made\sby\s(.*?)<\/div>", MatchAuthor) | |
RegExMatch(HttpReq.ResponseText, "s)data-cfemail=""(.*?)""", MatchAuthorEmail) | |
MatchAuthor1 := RegExReplace(MatchAuthor1, "s)<a.*?data-cfemail="".*?"">.*?<\/a>", "Email-Placeholder") | |
RegExMatch(HttpReq.ResponseText, "s)uploaded\sby\s.*?<a.*?>(.*?)<\/a>", MatchUploaded) | |
RegExMatch(HttpReq.ResponseText, "s)added.*?(\d{13})", MatchAdded) | |
RegExMatch(HttpReq.ResponseText, "s)updated.*?(\d{13})", MatchUpdated) | |
RegExMatch(HttpReq.ResponseText, "s)(\/cart\/\S+\/\S+\.tic)", MatchDownload) | |
RegExMatch(HttpReq.ResponseText, "s)(\/cart\/\S+\/cover.gif)", MatchCover) | |
RegExMatch(HttpReq.ResponseText, "s)<a\shref=""\/cart\/\S+\/(.*?).tic"">", MatchFilename) | |
ToolTip, % A_Index "`n" MatchCategory1 " > " MatchFilename1 " (Cart " A_Index ").tic" | |
If (MatchAuthorEmail1) { | |
jsObj := EmailDecode() | |
MatchAuthorEmail1 := %jsObj%("cfDecodeEmail('" MatchAuthorEmail1 "');") | |
MatchAuthor1 := StrReplace(MatchAuthor1, "Email-Placeholder", MatchAuthorEmail1) | |
} | |
FileAppend, % Counter | |
. "," MatchCategory1 | |
. "," (InStr(MatchTitle1, ",") ? """" : "") MatchTitle1 (InStr(MatchTitle1, ",") ? """" : "") | |
. "," (InStr(MatchDescription1, ",") ? """" : "") StrReplace(MatchDescription1, """", """""") (InStr(MatchDescription1, ",") ? """" : "") | |
. "," (InStr(MatchAuthor1, ",") ? """" : "") MatchAuthor1 (InStr(MatchAuthor1, ",") ? """" : "") | |
. "," (InStr(MatchUploaded1, ",") ? """" : "") MatchUploaded1 (InStr(MatchUploaded1, ",") ? """" : "") | |
. "," ConvertTimestamp(MatchAdded1) | |
. "," (MatchUpdated1 = "" ? ConvertTimestamp(MatchAdded1) : ConvertTimestamp(MatchUpdated1)) | |
. ",https://tic80.com" MatchDownload1 | |
. ",https://tic80.com" MatchCover1 | |
. ",https://tic80.com/play?cart=" A_Index | |
. "," MatchFilename1 " (Cart " A_Index ").tic" | |
. "`n" | |
, TIC-80.csv | |
Counter++ | |
IfNotExist, Imgs | |
{ | |
FileCreateDir, Imgs | |
} | |
IfNotExist, % MatchCategory1 | |
{ | |
FileCreateDir, % MatchCategory1 | |
} | |
UrlDownloadToFile, % "https://tic80.com" MatchCover1, % "Imgs\" MatchFilename1 " (Cart " A_Index ").gif" | |
UrlDownloadToFile, % "https://tic80.com" MatchDownload1, % MatchCategory1 "\" MatchFilename1 " (Cart " A_Index ").tic" | |
} | |
ExitApp | |
return ; End automatic execution | |
; ============================================================================== | |
EmailDecode() { | |
static doc := ComObjCreate("htmlfile") | |
doc.write("<meta http-equiv='X-UA-Compatible' content='IE=9'><script>function cfDecodeEmail(r){var e,o,t='',f=parseInt(r.substr(0,2),16);for(e=2;r.length-e;e+=2)t+=String.fromCharCode(o=parseInt(r.substr(e,2),16)^f);return t}</script>") | |
return ObjBindMethod(doc.parentWindow, "eval") | |
} | |
ConvertTimestamp(Str) { | |
U := SubStr(Str, 1, 10) | |
T := 1970 | |
T += U, s | |
RegExMatch(T, "(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})", M) | |
return M1 "-" M2 "-" M3 " " M4 ":" M5 ":" M6 " GMT+0000" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment