Last active
July 16, 2019 06:04
-
-
Save Mijyuoon/ccd623fa18fd169f5ac44a5861ed591f to your computer and use it in GitHub Desktop.
Discord image archiving script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'rest-client' | |
require 'json' | |
require 'time' | |
require 'digest/sha1' | |
require 'fileutils' | |
TARGET_CHAN = "CHANNEL_ID" | |
ARCHIVE_FROM_ID = nil # or "MESSAGE_ID" | |
ARCHIVE_UNTIL = Time.parse "2018-01-01" | |
BATCH_MIN_WAIT = 5.000 | |
AUTHORIZATION = "REDACTED" | |
puts "Initializing archive in <##{TARGET_CHAN}> until #{ARCHIVE_UNTIL}" | |
savedir = "pic-archive/#{TARGET_CHAN}" | |
FileUtils.mkdir_p savedir | |
arcdata = File.open("#{savedir}/_metadata.txt", "a") | |
def save_image(url, dir) | |
data = RestClient.get url | |
fname = Digest::SHA1.hexdigest data | |
surl = url.gsub(/\?.*$/, '') | |
fname += File.extname surl | |
File.binwrite "#{dir}/#{fname}", data | |
fname | |
end | |
last_msg = ARCHIVE_FROM_ID | |
loop do | |
puts "* Starting new batch from #{last_msg || "HEAD"}" | |
batch_start = Time.now | |
req_param = "limit=100" | |
req_param += "&before=#{last_msg}" if last_msg | |
headers = { 'Authorization' => AUTHORIZATION } | |
json = JSON.parse RestClient.get "https://discordapp.com/api/v6/channels/#{TARGET_CHAN}/messages?#{req_param}", headers | |
break if json.empty? | |
json.each do |msg| | |
begin | |
puts " * Processing msg #{msg['id']}" | |
if (attach = msg['attachments'])&.any? | |
puts " * Saving attachments..." | |
attach.each do |att| | |
fname = save_image(att['url'], savedir) | |
arcdata.puts "#{msg['id']}\tIMAGE\t#{fname}" | |
puts " - Saved image #{fname}" | |
end | |
end | |
if (embed = msg['embeds'])&.any? | |
puts " * Saving embeds..." | |
embed.each do |emb| | |
if emb['type'] == "image" | |
fname = save_image(emb['url'], savedir) | |
arcdata.puts "#{msg['id']}\tIMAGE\t#{fname}" | |
puts " - Saved image #{fname}" | |
else | |
if (thumb = emb['thumbnail']) | |
fname = save_image(thumb['url'], savedir) | |
arcdata.puts "#{msg['id']}\tTHUMB\t#{fname}" | |
puts " - Saved thumbnail #{fname}" | |
end | |
if (video = emb['video']) | |
arcdata.puts "#{msg['id']}\tVIDEO\t#{video['url']}" | |
puts " - Saved video #{video['url']}" | |
end | |
arcdata.puts "#{msg['id']}\tURL\t#{emb['url']}" | |
puts " - Saved URL #{emb['url']}" | |
end | |
end | |
end | |
rescue => ex | |
jsonerr = {message: ex.message, backtrace: ex.backtrace} | |
puts " ! Internal error occurred:\n#{jsonerr.to_json}" | |
end | |
end | |
arcdata.flush | |
last_msg = json.last['id'] | |
last_ts = Time.parse json.last['timestamp'] | |
break if ARCHIVE_UNTIL > last_ts | |
delay = Time.now - batch_start | |
if delay < BATCH_MIN_WAIT | |
puts "! Batch finshed early, waiting" | |
sleep(BATCH_MIN_WAIT - delay) | |
end | |
end | |
arcdata.close | |
puts "Finished archive at msg #{last_msg}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Exists due to issues like Pixiv links saving with .php extension | |
SIGLIST = { | |
"\xFF\xD8\xFF".b => '.jpg', | |
"\x89\x50\x4E\x47".b => '.png', | |
"\x47\x49\x46\x38".b => '.gif', | |
} | |
ARGV.each do |fname| | |
fblk = File.open(fname, "rb") {|x| x.read 32 } | |
SIGLIST.each do |sig, fext| | |
next unless fblk.start_with? sig | |
puts "Identified #{fname} as '#{fext}'" | |
newname = fname.gsub /\.\w+$/, fext | |
File.rename fname, newname | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment