Skip to content

Instantly share code, notes, and snippets.

@Mijyuoon
Last active July 16, 2019 06:04
Show Gist options
  • Save Mijyuoon/ccd623fa18fd169f5ac44a5861ed591f to your computer and use it in GitHub Desktop.
Save Mijyuoon/ccd623fa18fd169f5ac44a5861ed591f to your computer and use it in GitHub Desktop.
Discord image archiving script
require 'rest-client'
require 'json'
require 'time'
require 'digest/sha1'
require 'fileutils'
TARGET_CHAN = "CHANNEL_ID"
ARCHIVE_FROM_ID = nil # or "MESSAGE_ID"
ARCHIVE_UNTIL = Time.parse "2018-01-01"
BATCH_MIN_WAIT = 5.000
AUTHORIZATION = "REDACTED"
puts "Initializing archive in <##{TARGET_CHAN}> until #{ARCHIVE_UNTIL}"
savedir = "pic-archive/#{TARGET_CHAN}"
FileUtils.mkdir_p savedir
arcdata = File.open("#{savedir}/_metadata.txt", "a")
def save_image(url, dir)
data = RestClient.get url
fname = Digest::SHA1.hexdigest data
surl = url.gsub(/\?.*$/, '')
fname += File.extname surl
File.binwrite "#{dir}/#{fname}", data
fname
end
last_msg = ARCHIVE_FROM_ID
loop do
puts "* Starting new batch from #{last_msg || "HEAD"}"
batch_start = Time.now
req_param = "limit=100"
req_param += "&before=#{last_msg}" if last_msg
headers = { 'Authorization' => AUTHORIZATION }
json = JSON.parse RestClient.get "https://discordapp.com/api/v6/channels/#{TARGET_CHAN}/messages?#{req_param}", headers
break if json.empty?
json.each do |msg|
begin
puts " * Processing msg #{msg['id']}"
if (attach = msg['attachments'])&.any?
puts " * Saving attachments..."
attach.each do |att|
fname = save_image(att['url'], savedir)
arcdata.puts "#{msg['id']}\tIMAGE\t#{fname}"
puts " - Saved image #{fname}"
end
end
if (embed = msg['embeds'])&.any?
puts " * Saving embeds..."
embed.each do |emb|
if emb['type'] == "image"
fname = save_image(emb['url'], savedir)
arcdata.puts "#{msg['id']}\tIMAGE\t#{fname}"
puts " - Saved image #{fname}"
else
if (thumb = emb['thumbnail'])
fname = save_image(thumb['url'], savedir)
arcdata.puts "#{msg['id']}\tTHUMB\t#{fname}"
puts " - Saved thumbnail #{fname}"
end
if (video = emb['video'])
arcdata.puts "#{msg['id']}\tVIDEO\t#{video['url']}"
puts " - Saved video #{video['url']}"
end
arcdata.puts "#{msg['id']}\tURL\t#{emb['url']}"
puts " - Saved URL #{emb['url']}"
end
end
end
rescue => ex
jsonerr = {message: ex.message, backtrace: ex.backtrace}
puts " ! Internal error occurred:\n#{jsonerr.to_json}"
end
end
arcdata.flush
last_msg = json.last['id']
last_ts = Time.parse json.last['timestamp']
break if ARCHIVE_UNTIL > last_ts
delay = Time.now - batch_start
if delay < BATCH_MIN_WAIT
puts "! Batch finshed early, waiting"
sleep(BATCH_MIN_WAIT - delay)
end
end
arcdata.close
puts "Finished archive at msg #{last_msg}"
# Exists due to issues like Pixiv links saving with .php extension
SIGLIST = {
"\xFF\xD8\xFF".b => '.jpg',
"\x89\x50\x4E\x47".b => '.png',
"\x47\x49\x46\x38".b => '.gif',
}
ARGV.each do |fname|
fblk = File.open(fname, "rb") {|x| x.read 32 }
SIGLIST.each do |sig, fext|
next unless fblk.start_with? sig
puts "Identified #{fname} as '#{fext}'"
newname = fname.gsub /\.\w+$/, fext
File.rename fname, newname
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment