Skip to content

Instantly share code, notes, and snippets.

@westoncampbell
Created November 30, 2022 02:28
Show Gist options
  • Save westoncampbell/67b89e2206c02e77182c2d1a0b9a8989 to your computer and use it in GitHub Desktop.
Save westoncampbell/67b89e2206c02e77182c2d1a0b9a8989 to your computer and use it in GitHub Desktop.
Scrape all submitted carts from the TIC-80 website
; Script Information ===========================================================
; Name: TIC-80 Cart Scraper
; Description: Scrape all submitted carts from the TIC-80 website
; AHK Version: 1.1.35.00 (Unicode 32-bit)
; OS Version: Windows 10
; Language: English (United States)
; Author: IAmTheDewd
; Filename: TIC-80 Cart Scraper.ahk
; ==============================================================================
; Auto-Execute =================================================================
#SingleInstance, Force ; Allow only one running instance of script
#Persistent ; Keep the script permanently running until terminated
#NoEnv ; Avoid checking empty variables for environment variables
SetWorkingDir, % A_ScriptDir ; Set the working directory of the script
SetBatchLines, -1 ; The speed at which the lines of the script are executed
Menu, Tray, Icon, shell32.dll, 3
Menu, Tray, Tip, TIC-80 Cart Scraper
FileAppend, % "#,Category,Title,Description,Author,Uploaded,Added,Updated,Cartridge,Cover,URL,Filename`n", TIC-80.csv
Counter := 1
Loop, 4000 {
HttpReq := ComObjCreate("WinHttp.WinHttpRequest.5.1")
HttpReq.Open("GET", "https://tic80.com/play?cart=" A_Index, false)
HttpReq.Send()
HttpReq.WaitForResponse()
HttpReq.ResponseText := StrReplace(HttpReq.ResponseText, " ", " ")
If (HttpReq.ResponseText = "<p>404. Not Found!</p>") {
Continue
}
RegExMatch(HttpReq.ResponseText, "s)<a\shref=""\/play\?cat=\d+"">(.*?)<\/a>", MatchCategory)
RegExMatch(HttpReq.ResponseText, "s)<title>Play\s(.*?)\s+\/\sTIC-80<\/title>", MatchTitle)
MatchTitle1 := StrReplace(StrReplace(StrReplace(StrReplace(StrReplace(MatchTitle1, "&#39;", "'"), "&#34;", """"), "&lt;", "<"), "&gt;", ">"), "&amp;", "&")
MatchTitle1 := StrReplace(MatchTitle1, "`t", " ")
MatchTitle1 := RegExReplace(MatchTitle1, "\S\s\K\s+(?=\S)")
MatchTitle1 := RegExReplace(MatchTitle1, "^\s+|\s+$")
RegExMatch(HttpReq.ResponseText, "s)<meta\sname=""description""\scontent=""(.*?)"">", MatchDescription)
MatchDescription1 := StrReplace(StrReplace(StrReplace(StrReplace(MatchDescription1, "&#39;", "'"), "&#34;", """"), "&lt;", "<"), "&gt;", ">")
MatchDescription1 := StrReplace(MatchDescription1, "`t", " ")
MatchDescription1 := RegExReplace(MatchDescription1, "\S\s\K\s+(?=\S)")
MatchDescription1 := RegExReplace(MatchDescription1, "\S\s\K\s+(?=\S)")
MatchDescription1 := RegExReplace(MatchDescription1, "^\s+|\s+$")
RegExMatch(HttpReq.ResponseText, "s)<div>made\sby\s(.*?)<\/div>", MatchAuthor)
RegExMatch(HttpReq.ResponseText, "s)data-cfemail=""(.*?)""", MatchAuthorEmail)
MatchAuthor1 := RegExReplace(MatchAuthor1, "s)<a.*?data-cfemail="".*?"">.*?<\/a>", "Email-Placeholder")
RegExMatch(HttpReq.ResponseText, "s)uploaded\sby\s.*?<a.*?>(.*?)<\/a>", MatchUploaded)
RegExMatch(HttpReq.ResponseText, "s)added.*?(\d{13})", MatchAdded)
RegExMatch(HttpReq.ResponseText, "s)updated.*?(\d{13})", MatchUpdated)
RegExMatch(HttpReq.ResponseText, "s)(\/cart\/\S+\/\S+\.tic)", MatchDownload)
RegExMatch(HttpReq.ResponseText, "s)(\/cart\/\S+\/cover.gif)", MatchCover)
RegExMatch(HttpReq.ResponseText, "s)<a\shref=""\/cart\/\S+\/(.*?).tic"">", MatchFilename)
ToolTip, % A_Index "`n" MatchCategory1 " > " MatchFilename1 " (Cart " A_Index ").tic"
If (MatchAuthorEmail1) {
jsObj := EmailDecode()
MatchAuthorEmail1 := %jsObj%("cfDecodeEmail('" MatchAuthorEmail1 "');")
MatchAuthor1 := StrReplace(MatchAuthor1, "Email-Placeholder", MatchAuthorEmail1)
}
FileAppend, % Counter
. "," MatchCategory1
. "," (InStr(MatchTitle1, ",") ? """" : "") MatchTitle1 (InStr(MatchTitle1, ",") ? """" : "")
. "," (InStr(MatchDescription1, ",") ? """" : "") StrReplace(MatchDescription1, """", """""") (InStr(MatchDescription1, ",") ? """" : "")
. "," (InStr(MatchAuthor1, ",") ? """" : "") MatchAuthor1 (InStr(MatchAuthor1, ",") ? """" : "")
. "," (InStr(MatchUploaded1, ",") ? """" : "") MatchUploaded1 (InStr(MatchUploaded1, ",") ? """" : "")
. "," ConvertTimestamp(MatchAdded1)
. "," (MatchUpdated1 = "" ? ConvertTimestamp(MatchAdded1) : ConvertTimestamp(MatchUpdated1))
. ",https://tic80.com" MatchDownload1
. ",https://tic80.com" MatchCover1
. ",https://tic80.com/play?cart=" A_Index
. "," MatchFilename1 " (Cart " A_Index ").tic"
. "`n"
, TIC-80.csv
Counter++
IfNotExist, Imgs
{
FileCreateDir, Imgs
}
IfNotExist, % MatchCategory1
{
FileCreateDir, % MatchCategory1
}
UrlDownloadToFile, % "https://tic80.com" MatchCover1, % "Imgs\" MatchFilename1 " (Cart " A_Index ").gif"
UrlDownloadToFile, % "https://tic80.com" MatchDownload1, % MatchCategory1 "\" MatchFilename1 " (Cart " A_Index ").tic"
}
ExitApp
return ; End automatic execution
; ==============================================================================
EmailDecode() {
static doc := ComObjCreate("htmlfile")
doc.write("<meta http-equiv='X-UA-Compatible' content='IE=9'><script>function cfDecodeEmail(r){var e,o,t='',f=parseInt(r.substr(0,2),16);for(e=2;r.length-e;e+=2)t+=String.fromCharCode(o=parseInt(r.substr(e,2),16)^f);return t}</script>")
return ObjBindMethod(doc.parentWindow, "eval")
}
ConvertTimestamp(Str) {
U := SubStr(Str, 1, 10)
T := 1970
T += U, s
RegExMatch(T, "(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})", M)
return M1 "-" M2 "-" M3 " " M4 ":" M5 ":" M6 " GMT+0000"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment