Skip to content

Instantly share code, notes, and snippets.

@dawtmaytrikx
Last active October 26, 2023 20:25
Show Gist options
  • Save dawtmaytrikx/b69c6058dd127c04ebee05025fec28db to your computer and use it in GitHub Desktop.
Save dawtmaytrikx/b69c6058dd127c04ebee05025fec28db to your computer and use it in GitHub Desktop.
Renames media files according to the release name over at srrdb.com and compares the hash to identify corrupted files
#!/usr/bin/python3
import os
import pycurl
from io import BytesIO
from time import sleep
import zlib
import sys
import json
from colorama import Fore
import argparse
import sqlite3 # >= 3.24.0
import datetime
#import signal
import urllib.parse
#import pdb; pdb.set_trace()
buffersize = 65536
extensions = ['.mkv', '.avi', '.mp4']
parser = argparse.ArgumentParser(
description='This script renames SCENE media files and compares their hashes to those stored at srrDB.')
parser.add_argument('-v', '--verbose', action='store_const',
const=True, default=False, help='Enable verbose mode.')
parser.add_argument('-f', '--skip-not-found', action='store_const', const=True, default=False,
help='Disable processing of files that were previously marked as not found.')
parser.add_argument('-n', '--no-comparison', action='store_const', const=True, default=False,
help='Disables hashing of files for comparison with hashes stored at SRRDB to check for corruption. Will still hash files to identify and rename them.')
parser.add_argument('-s', '--no-ssl-verify', action='store_const', const=True,
default=False, help='Disable SSL verification (not secure).')
parser.add_argument('-t', '--tag', action='store', default='',
nargs=1, help='Tag the files in dir as being movies, shows, etc.')
parser.add_argument('-w', '--whitelist', action='store', default='', nargs='+', metavar='ARG',
help='Only process files that include at least one of the arguments (case insensitive) passed with this option.')
parser.add_argument('-d', '--dir', nargs=1, required=True,
help='folder with your media files')
# skip errors
args = vars(parser.parse_args())
if args['verbose']:
print(sqlite3.sqlite_version)
print(sqlite3.__file__)
print(args)
if not os.path.exists('srr.db'):
with open('srr.db', 'w'): pass
connection = sqlite3.connect('srr.db')
cursor = connection.cursor()
sql_command = '''
CREATE TABLE IF NOT EXISTS srrdb (
relname TEXT PRIMARY KEY,
origname TEXT,
crccalc TEXT,
crcweb TEXT,
status TEXT,
tag TEXT,
date TEXT
);'''
cursor.execute(sql_command)
connection.commit()
sql_command = '''
CREATE TABLE IF NOT EXISTS errors (
key INTEGER PRIMARY KEY AUTOINCREMENT,
relname TEXT,
errnum TEXT,
description TEXT,
page TEXT,
date TEXT
);'''
cursor.execute(sql_command)
connection.commit()
sql_command = '''
CREATE TABLE IF NOT EXISTS lastrun (
key INTEGER PRIMARY KEY AUTOINCREMENT,
start TEXT,
end TEXT,
parameters TEXT
);'''
cursor.execute(sql_command)
connection.commit()
start = datetime.datetime.now()
cursor.execute(
'INSERT INTO lastrun (start, parameters) VALUES (?, ?)', (start, str(args)))
connection.commit()
# def signal_handler(sig, frame):
# end_run()
# sys.exit(1)
def end_run():
end = datetime.datetime.now()
cursor.execute('UPDATE lastrun SET end=? WHERE start=?', (end, start))
connection.commit()
connection.close()
def loadpage(url):
buffer.seek(0)
buffer.truncate()
c.setopt(c.URL, url)
c.perform()
body = buffer.getvalue()
page = body.decode('utf-8')
if args['verbose']:
print(Fore.YELLOW + 'VERBOSE: ' + Fore.RESET +
str(c.getinfo(c.RESPONSE_CODE)) + ' - ' + url + '\r\n' + page)
return page
def error(errnum, description):
print(Fore.RED + 'ERROR ' + errnum + ': ' + Fore.RESET +
str(description) + '\r\n\tfile: ' + filename)
cursor.execute('INSERT INTO errors (relname, errnum, description, page, date) VALUES (?, ?, ?, ?, ?)',
(os.path.join(dirpath, filename), errnum, str(description), page, datetime.datetime.now()))
connection.commit()
def mislabeled():
realname = json.loads(page)['name']
print(Fore.BLUE + 'RENAMED: ' + Fore.RESET + relname + ' -> ' + realname)
cursor.execute('''INSERT OR REPLACE INTO srrdb (relname, origname, status, tag, date) VALUES (?, ?, ?, ?, ?)
ON CONFLICT (relname) DO UPDATE SET origname=excluded.origname, status=excluded.status, tag=excluded.tag, date=excluded.date''',
(realname, relname, 'RENAMED', args['tag'][0], datetime.datetime.now()))
# auch crccalc, falls vorhanden
connection.commit()
os.rename(os.path.join(dirpath, relname) + extension,
os.path.join(dirpath, realname) + extension)
return realname
def calculatecrc(filepath):
if args['verbose']:
print(Fore.YELLOW + 'VERBOSE: ' + Fore.RESET +
'Calculating CRC for ' + filename)
with open(filepath, 'rb') as afile:
buffr = afile.read(buffersize)
crcvalue = 0
while len(buffr) > 0:
crcvalue = zlib.crc32(buffr, crcvalue)
buffr = afile.read(buffersize)
crccalc = '{:08X}'.format(crcvalue)
if args['verbose']:
print(Fore.YELLOW + 'VERBOSE: ' + Fore.RESET + 'CRC is ' + crccalc)
return crccalc
def wrong_filesize():
cursor.execute('''INSERT OR REPLACE INTO srrdb (relname, status, tag, date) VALUES (?, ?, ?, ?)
ON CONFLICT (relname) DO UPDATE SET status=excluded.status, tag=excluded.tag, date=excluded.date''',
(relname, 'CORRUPT', args['tag'][0], datetime.datetime.now()))
connection.commit()
# also crccalc, if available
print(Fore.RED + 'WRONG FILESIZE: ' + Fore.RESET + relname)
#signal.signal(signal.SIGINT, signal_handler)
for dirpath, dirs, files in os.walk(args['dir'][0]):
for filename in files:
filenotfound = False
crccalc = False
unprocessed = False
skip = False
# handle whitelist
for item in args['whitelist']:
if item in filename.lower():
break
else:
skip = True
if skip == True:
print('SKIPPING ' + Fore.RED + 'NOT WHITELISTED' +
Fore.RESET + ': ' + filename)
continue
try:
if os.path.splitext(filename)[1].lower() in extensions:
extension = os.path.splitext(filename)[1].lower()
relname = os.path.splitext(filename)[0]
# fix suffixes
suffixes = ['-AsRequested', '-NZBgeek', '-SickBeard', '-Obfuscated', '-Scrambled', '-RP', '.1',
' (1)', '.(1)', '(1)', '-1', '.repost', '-BUYMORE', '-repost', '-newz', '.', '-postbot', '-[cx86]', '-BWBP', '-[TRP]', '[rarbg]', '-RakuvFIN', '-Rakuv']
for suffix in suffixes:
if relname.lower().endswith(suffix.lower()):
realname = relname[:-len(suffix)]
print(Fore.BLUE + 'RENAMED: ' + Fore.RESET +
relname + ' -> ' + realname)
cursor.execute('''INSERT OR REPLACE INTO srrdb (relname, origname, status, tag, date) VALUES (?, ?, ?, ?, ?)
ON CONFLICT (relname) DO UPDATE SET status=excluded.origname, status=excluded.status, tag=excluded.tag, date=excluded.date''',
(realname, relname, 'RENAMED', args['tag'][0], datetime.datetime.now()))
connection.commit()
os.rename(os.path.join(dirpath, relname) + extension,
os.path.join(dirpath, realname) + extension)
relname = realname
skiptags = ['dirfix', '_S0', '_S1', '-d0rks', '-BTN', '-WiKi',
'-2Maverick', '-NTb', '-BTW', '-McTav', 'M3lloW', 'itouch-mw']
for skiptag in skiptags:
if skiptag.lower() in relname.lower():
skip = True
if skip == True:
print('SKIPPING ' + Fore.RED + 'BLACKLISTED' +
Fore.RESET + ': ' + relname)
continue
# skip, if already processed
# skip manually renamed
cursor.execute(
'SELECT status FROM srrdb WHERE relname=?', (relname,))
record = cursor.fetchone()
if record is not None:
if record[0] == 'OK':
print('SKIPPING ' + Fore.GREEN + 'OK' +
Fore.RESET + ': ' + relname)
continue
elif record[0] == 'CORRUPT':
print('SKIPPING ' + Fore.RED + 'CORRUPT' +
Fore.RESET + ': ' + relname)
continue
elif record[0] == 'NOT FOUND' and args['skip_not_found'] == True:
print('SKIPPING ' + Fore.MAGENTA +
'NOT FOUND' + Fore.RESET + ': ' + relname)
continue
elif record[0] == None or record[0] == 'RENAMED':
unprocessed == True
if args['no_comparison'] == True:
print('SKIPPING ' + Fore.CYAN +
'UNPROCESSED' + Fore.RESET + ': ' + relname)
continue
# download website
buffer = BytesIO()
c = pycurl.Curl()
c.setopt(c.URL, 'https://api.srrdb.com/v1/details/' +
urllib.parse.quote_plus(relname))
c.setopt(c.WRITEDATA, buffer)
if args['no_ssl_verify'] == True:
# if ssl verification fails, try this instead of enabling this option:
# sudo dpkg-reconfigure ca-certificates -> deactivate DST_Root_CA_X3.crt (expired on Oct 1 2021)
c.setopt(c.SSL_VERIFYPEER, 0)
c.setopt(c.SSL_VERIFYHOST, 0)
c.perform()
body = buffer.getvalue()
page = body.decode('utf-8')
if args['verbose']:
print(Fore.YELLOW + 'VERBOSE: ' + Fore.RESET + str(c.getinfo(c.RESPONSE_CODE)) +
' - https://api.srrdb.com/v1/details/' + urllib.parse.quote_plus(relname) + '\r\n' + page)
# check response code
# 404 only on release/details
# 302 is redirect
# what's with 300?
# 400 is illegal characters (should be caught by skiptags!)
if c.getinfo(c.RESPONSE_CODE) == 400:
error('5', 'Illegal character in filename!')
continue
while c.getinfo(c.RESPONSE_CODE) == 503:
print('RATE LIMITED! Sleeping 10 s ...')
sleep(10)
c.perform()
# rename
if c.getinfo(c.RESPONSE_CODE) == 302:
c.setopt(c.FOLLOWLOCATION, 1)
c.perform()
# see https://bitbucket.org/srrdb/srrdb-issues/issues/114/api-faulty-redirect-if-query-contains
realurl = c.getinfo(c.EFFECTIVE_URL).replace(
'/release/', '/v1/')
page = loadpage(realurl)
if args['verbose']:
print(Fore.YELLOW + 'VERBOSE: ' + Fore.RESET +
str(c.getinfo(c.RESPONSE_CODE)) + ' - ' + realurl + '\r\n' + page)
relname = mislabeled()
if c.getinfo(c.RESPONSE_CODE) == 200 and page != '[]':
#body = buffer.getvalue()
#page = body.decode('utf-8')
if args['no_comparison'] == True:
cursor.execute('''INSERT OR REPLACE INTO srrdb (relname, tag, date) VALUES (?, ?, ?)
ON CONFLICT (relname) DO UPDATE SET tag=excluded.tag, date=excluded.date''', (relname, args['tag'][0], datetime.datetime.now()))
connection.commit()
print(Fore.GREEN + 'MATCHED: ' + Fore.RESET + relname)
# search
if c.getinfo(c.RESPONSE_CODE) == 404 or page == '[]' or unprocessed == True:
page = loadpage(
'https://api.srrdb.com/v1/search/r:' + urllib.parse.quote_plus(relname))
if 'resultsCount' in json.loads(page): # else what?
if json.loads(page)['resultsCount'] == '1':
# rename
page = loadpage('https://api.srrdb.com/v1/details/' + urllib.parse.quote_plus(
json.loads(page)['results'][0]['release']))
try:
if len(json.loads(page)['archived-files']) == 1:
if os.path.getsize(os.path.join(dirpath, relname) + extension) == json.loads(page)['archived-files'][0]['size']:
relname = mislabeled()
else:
wrong_filesize()
continue
else:
error(
'6', 'Multiples files in release ' + json.loads(page)['name'])
except:
error(
'1', 'Problem while processing page https://api.srrdb.com/v1/search/r:' + relname + '\r\n' + page)
continue
# search for sample name
else:
sampletagstried = 0
sampletags = ['', '-sample', '.sample']
for sampletag in sampletags:
samplename = relname + sampletag + extension
page = loadpage(
'https://api.srrdb.com/v1/search/store-real-filename:' + urllib.parse.quote_plus(samplename))
# and do what if it fails?
if 'resultsCount' in json.loads(page):
if json.loads(page)['resultsCount'] == '1':
# rename
page = loadpage('https://api.srrdb.com/v1/details/' + urllib.parse.quote_plus(
json.loads(page)['results'][0]['release']))
try:
if len(json.loads(page)['archived-files']) == 1:
if os.path.getsize(os.path.join(dirpath, relname) + extension) == json.loads(page)['archived-files'][0]['size']:
relname = mislabeled()
break
else:
error(
'10', 'Filesize does not match that of release ' + json.loads(page)['name'])
filenotfound = True
else:
error(
'7', 'Multiple files in release ' + json.loads(page)['name'])
except:
error('2', 'Problem while processing page https://api.srrdb.com/v1/details/' + json.loads(
page)['results'][0]['release'] + '\r\n' + page)
else:
sampletagstried += 1
if sampletagstried == len(sampletags):
filenotfound = True
# search for CRC of file
if filenotfound:
# if crc has alredy been calculated, don't do it again
cursor.execute(
'SELECT crccalc FROM srrdb WHERE relname=?', (relname,))
record = cursor.fetchone()
if record is not None:
if record[0] is not None:
crccalc = record[0]
else:
crccalc = calculatecrc(os.path.join(
dirpath, relname) + extension)
else:
crccalc = calculatecrc(os.path.join(
dirpath, relname) + extension)
page = loadpage(
'https://api.srrdb.com/v1/search/archive-crc:' + crccalc)
if json.loads(page)['resultsCount'] == '1':
# rename
page = loadpage('https://api.srrdb.com/v1/details/' + urllib.parse.quote_plus(
json.loads(page)['results'][0]['release']))
if page == '[]':
# url of page
error('10', 'Error in srrDB entry!')
else:
try:
if len(json.loads(page)['archived-files']) == 1:
if os.path.getsize(os.path.join(dirpath, relname) + extension) == json.loads(page)['archived-files'][0]['size']:
relname = mislabeled()
# file can be OK'd
else:
error(
'9', 'Filesize does not match that of release ' + json.loads(page)['name'])
continue
else:
error(
'8', 'Multiple files in release ' + json.loads(page)['name'])
except:
error('3', 'Problem while processing page https://api.srrdb.com/v1/details/' + json.loads(
page)['results'][0]['release'] + '\r\n' + page)
else:
cursor.execute('''INSERT OR REPLACE INTO srrdb (relname, crccalc, status, tag, date) VALUES (?, ?, ?, ?, ?)
ON CONFLICT (relname) DO UPDATE SET crccalc=excluded.crccalc, status=excluded.status, tag=excluded.tag, date=excluded.date''',
(relname, crccalc, 'NOT FOUND', args['tag'][0], datetime.datetime.now()))
connection.commit()
print(Fore.MAGENTA + 'NOT FOUND: ' +
Fore.RESET + relname + ' ' + crccalc)
continue
if args['no_comparison'] == False:
# find CRC in page
if 'archived-files' in json.loads(page):
if args['verbose']:
print(Fore.YELLOW + 'VERBOSE: ' + Fore.RESET + 'Size of file on disk: ' + str(os.path.getsize(os.path.join(
dirpath, relname) + extension)) + ' - size of file on srrdb: ' + str(json.loads(page)['archived-files'][0]['size']))
if os.path.getsize(os.path.join(dirpath, relname) + extension) == json.loads(page)['archived-files'][0]['size']:
crcweb = json.loads(
page)['archived-files'][0]['crc']
else:
wrong_filesize()
continue
else:
continue
c.close()
# calculate CRC
if not crccalc:
cursor.execute(
'SELECT crccalc FROM srrdb WHERE relname=?', (relname,))
record = cursor.fetchone()
if args['verbose']:
print(Fore.YELLOW + 'VERBOSE: ' + Fore.RESET +
'Searched DB for CRC and found ' + str(record[0]))
if record is not None and record[0] is not None and record[0] != '':
crccalc = record[0]
if args['verbose']:
print(Fore.YELLOW + 'VERBOSE: ' + Fore.RESET +
'Using CRC found in DB ' + crccalc)
else:
crccalc = calculatecrc(os.path.join(
dirpath, relname) + extension)
else:
crccalc = calculatecrc(os.path.join(
dirpath, relname) + extension)
if crccalc == crcweb:
cursor.execute('''INSERT OR REPLACE INTO srrdb (relname, crccalc, crcweb, status, tag, date) VALUES (?, ?, ?, ?, ?, ?)
ON CONFLICT (relname) DO UPDATE SET crccalc=excluded.crccalc, crcweb=excluded.crcweb, status=excluded.status, tag=excluded.tag, date=excluded.date''',
(relname, crccalc, crcweb, 'OK', args['tag'][0], datetime.datetime.now()))
connection.commit()
print(Fore.GREEN + 'OK: ' + Fore.RESET +
relname + ' ' + crccalc)
else:
cursor.execute('''INSERT OR REPLACE INTO srrdb (relname, crccalc, crcweb, status, tag, date) VALUES (?, ?, ?, ?, ?, ?)
ON CONFLICT (relname) DO UPDATE SET crccalc=excluded.crccalc, crcweb=excluded.crcweb, status=excluded.status, tag=excluded.tag, date=excluded.date''',
(relname, crccalc, crcweb, 'CORRUPT', args['tag'][0], datetime.datetime.now()))
connection.commit()
print(Fore.RED + 'CORRUPT: ' + Fore.RESET +
relname + ' ' + crccalc + ' ' + crcweb)
except pycurl.error:
error('pyCurl', pycurl.error)
continue
end_run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment