Last active
October 26, 2023 20:25
-
-
Save dawtmaytrikx/b69c6058dd127c04ebee05025fec28db to your computer and use it in GitHub Desktop.
Renames media files according to the release name over at srrdb.com and compares the hash to identify corrupted files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import os | |
import pycurl | |
from io import BytesIO | |
from time import sleep | |
import zlib | |
import sys | |
import json | |
from colorama import Fore | |
import argparse | |
import sqlite3 # >= 3.24.0 | |
import datetime | |
#import signal | |
import urllib.parse | |
#import pdb; pdb.set_trace() | |
buffersize = 65536 | |
extensions = ['.mkv', '.avi', '.mp4'] | |
parser = argparse.ArgumentParser( | |
description='This script renames SCENE media files and compares their hashes to those stored at srrDB.') | |
parser.add_argument('-v', '--verbose', action='store_const', | |
const=True, default=False, help='Enable verbose mode.') | |
parser.add_argument('-f', '--skip-not-found', action='store_const', const=True, default=False, | |
help='Disable processing of files that were previously marked as not found.') | |
parser.add_argument('-n', '--no-comparison', action='store_const', const=True, default=False, | |
help='Disables hashing of files for comparison with hashes stored at SRRDB to check for corruption. Will still hash files to identify and rename them.') | |
parser.add_argument('-s', '--no-ssl-verify', action='store_const', const=True, | |
default=False, help='Disable SSL verification (not secure).') | |
parser.add_argument('-t', '--tag', action='store', default='', | |
nargs=1, help='Tag the files in dir as being movies, shows, etc.') | |
parser.add_argument('-w', '--whitelist', action='store', default='', nargs='+', metavar='ARG', | |
help='Only process files that include at least one of the arguments (case insensitive) passed with this option.') | |
parser.add_argument('-d', '--dir', nargs=1, required=True, | |
help='folder with your media files') | |
# skip errors | |
args = vars(parser.parse_args()) | |
if args['verbose']: | |
print(sqlite3.sqlite_version) | |
print(sqlite3.__file__) | |
print(args) | |
if not os.path.exists('srr.db'): | |
with open('srr.db', 'w'): pass | |
connection = sqlite3.connect('srr.db') | |
cursor = connection.cursor() | |
sql_command = ''' | |
CREATE TABLE IF NOT EXISTS srrdb ( | |
relname TEXT PRIMARY KEY, | |
origname TEXT, | |
crccalc TEXT, | |
crcweb TEXT, | |
status TEXT, | |
tag TEXT, | |
date TEXT | |
);''' | |
cursor.execute(sql_command) | |
connection.commit() | |
sql_command = ''' | |
CREATE TABLE IF NOT EXISTS errors ( | |
key INTEGER PRIMARY KEY AUTOINCREMENT, | |
relname TEXT, | |
errnum TEXT, | |
description TEXT, | |
page TEXT, | |
date TEXT | |
);''' | |
cursor.execute(sql_command) | |
connection.commit() | |
sql_command = ''' | |
CREATE TABLE IF NOT EXISTS lastrun ( | |
key INTEGER PRIMARY KEY AUTOINCREMENT, | |
start TEXT, | |
end TEXT, | |
parameters TEXT | |
);''' | |
cursor.execute(sql_command) | |
connection.commit() | |
start = datetime.datetime.now() | |
cursor.execute( | |
'INSERT INTO lastrun (start, parameters) VALUES (?, ?)', (start, str(args))) | |
connection.commit() | |
# def signal_handler(sig, frame): | |
# end_run() | |
# sys.exit(1) | |
def end_run(): | |
end = datetime.datetime.now() | |
cursor.execute('UPDATE lastrun SET end=? WHERE start=?', (end, start)) | |
connection.commit() | |
connection.close() | |
def loadpage(url): | |
buffer.seek(0) | |
buffer.truncate() | |
c.setopt(c.URL, url) | |
c.perform() | |
body = buffer.getvalue() | |
page = body.decode('utf-8') | |
if args['verbose']: | |
print(Fore.YELLOW + 'VERBOSE: ' + Fore.RESET + | |
str(c.getinfo(c.RESPONSE_CODE)) + ' - ' + url + '\r\n' + page) | |
return page | |
def error(errnum, description): | |
print(Fore.RED + 'ERROR ' + errnum + ': ' + Fore.RESET + | |
str(description) + '\r\n\tfile: ' + filename) | |
cursor.execute('INSERT INTO errors (relname, errnum, description, page, date) VALUES (?, ?, ?, ?, ?)', | |
(os.path.join(dirpath, filename), errnum, str(description), page, datetime.datetime.now())) | |
connection.commit() | |
def mislabeled(): | |
realname = json.loads(page)['name'] | |
print(Fore.BLUE + 'RENAMED: ' + Fore.RESET + relname + ' -> ' + realname) | |
cursor.execute('''INSERT OR REPLACE INTO srrdb (relname, origname, status, tag, date) VALUES (?, ?, ?, ?, ?) | |
ON CONFLICT (relname) DO UPDATE SET origname=excluded.origname, status=excluded.status, tag=excluded.tag, date=excluded.date''', | |
(realname, relname, 'RENAMED', args['tag'][0], datetime.datetime.now())) | |
# auch crccalc, falls vorhanden | |
connection.commit() | |
os.rename(os.path.join(dirpath, relname) + extension, | |
os.path.join(dirpath, realname) + extension) | |
return realname | |
def calculatecrc(filepath): | |
if args['verbose']: | |
print(Fore.YELLOW + 'VERBOSE: ' + Fore.RESET + | |
'Calculating CRC for ' + filename) | |
with open(filepath, 'rb') as afile: | |
buffr = afile.read(buffersize) | |
crcvalue = 0 | |
while len(buffr) > 0: | |
crcvalue = zlib.crc32(buffr, crcvalue) | |
buffr = afile.read(buffersize) | |
crccalc = '{:08X}'.format(crcvalue) | |
if args['verbose']: | |
print(Fore.YELLOW + 'VERBOSE: ' + Fore.RESET + 'CRC is ' + crccalc) | |
return crccalc | |
def wrong_filesize(): | |
cursor.execute('''INSERT OR REPLACE INTO srrdb (relname, status, tag, date) VALUES (?, ?, ?, ?) | |
ON CONFLICT (relname) DO UPDATE SET status=excluded.status, tag=excluded.tag, date=excluded.date''', | |
(relname, 'CORRUPT', args['tag'][0], datetime.datetime.now())) | |
connection.commit() | |
# also crccalc, if available | |
print(Fore.RED + 'WRONG FILESIZE: ' + Fore.RESET + relname) | |
#signal.signal(signal.SIGINT, signal_handler) | |
for dirpath, dirs, files in os.walk(args['dir'][0]): | |
for filename in files: | |
filenotfound = False | |
crccalc = False | |
unprocessed = False | |
skip = False | |
# handle whitelist | |
for item in args['whitelist']: | |
if item in filename.lower(): | |
break | |
else: | |
skip = True | |
if skip == True: | |
print('SKIPPING ' + Fore.RED + 'NOT WHITELISTED' + | |
Fore.RESET + ': ' + filename) | |
continue | |
try: | |
if os.path.splitext(filename)[1].lower() in extensions: | |
extension = os.path.splitext(filename)[1].lower() | |
relname = os.path.splitext(filename)[0] | |
# fix suffixes | |
suffixes = ['-AsRequested', '-NZBgeek', '-SickBeard', '-Obfuscated', '-Scrambled', '-RP', '.1', | |
' (1)', '.(1)', '(1)', '-1', '.repost', '-BUYMORE', '-repost', '-newz', '.', '-postbot', '-[cx86]', '-BWBP', '-[TRP]', '[rarbg]', '-RakuvFIN', '-Rakuv'] | |
for suffix in suffixes: | |
if relname.lower().endswith(suffix.lower()): | |
realname = relname[:-len(suffix)] | |
print(Fore.BLUE + 'RENAMED: ' + Fore.RESET + | |
relname + ' -> ' + realname) | |
cursor.execute('''INSERT OR REPLACE INTO srrdb (relname, origname, status, tag, date) VALUES (?, ?, ?, ?, ?) | |
ON CONFLICT (relname) DO UPDATE SET status=excluded.origname, status=excluded.status, tag=excluded.tag, date=excluded.date''', | |
(realname, relname, 'RENAMED', args['tag'][0], datetime.datetime.now())) | |
connection.commit() | |
os.rename(os.path.join(dirpath, relname) + extension, | |
os.path.join(dirpath, realname) + extension) | |
relname = realname | |
skiptags = ['dirfix', '_S0', '_S1', '-d0rks', '-BTN', '-WiKi', | |
'-2Maverick', '-NTb', '-BTW', '-McTav', 'M3lloW', 'itouch-mw'] | |
for skiptag in skiptags: | |
if skiptag.lower() in relname.lower(): | |
skip = True | |
if skip == True: | |
print('SKIPPING ' + Fore.RED + 'BLACKLISTED' + | |
Fore.RESET + ': ' + relname) | |
continue | |
# skip, if already processed | |
# skip manually renamed | |
cursor.execute( | |
'SELECT status FROM srrdb WHERE relname=?', (relname,)) | |
record = cursor.fetchone() | |
if record is not None: | |
if record[0] == 'OK': | |
print('SKIPPING ' + Fore.GREEN + 'OK' + | |
Fore.RESET + ': ' + relname) | |
continue | |
elif record[0] == 'CORRUPT': | |
print('SKIPPING ' + Fore.RED + 'CORRUPT' + | |
Fore.RESET + ': ' + relname) | |
continue | |
elif record[0] == 'NOT FOUND' and args['skip_not_found'] == True: | |
print('SKIPPING ' + Fore.MAGENTA + | |
'NOT FOUND' + Fore.RESET + ': ' + relname) | |
continue | |
elif record[0] == None or record[0] == 'RENAMED': | |
unprocessed == True | |
if args['no_comparison'] == True: | |
print('SKIPPING ' + Fore.CYAN + | |
'UNPROCESSED' + Fore.RESET + ': ' + relname) | |
continue | |
# download website | |
buffer = BytesIO() | |
c = pycurl.Curl() | |
c.setopt(c.URL, 'https://api.srrdb.com/v1/details/' + | |
urllib.parse.quote_plus(relname)) | |
c.setopt(c.WRITEDATA, buffer) | |
if args['no_ssl_verify'] == True: | |
# if ssl verification fails, try this instead of enabling this option: | |
# sudo dpkg-reconfigure ca-certificates -> deactivate DST_Root_CA_X3.crt (expired on Oct 1 2021) | |
c.setopt(c.SSL_VERIFYPEER, 0) | |
c.setopt(c.SSL_VERIFYHOST, 0) | |
c.perform() | |
body = buffer.getvalue() | |
page = body.decode('utf-8') | |
if args['verbose']: | |
print(Fore.YELLOW + 'VERBOSE: ' + Fore.RESET + str(c.getinfo(c.RESPONSE_CODE)) + | |
' - https://api.srrdb.com/v1/details/' + urllib.parse.quote_plus(relname) + '\r\n' + page) | |
# check response code | |
# 404 only on release/details | |
# 302 is redirect | |
# what's with 300? | |
# 400 is illegal characters (should be caught by skiptags!) | |
if c.getinfo(c.RESPONSE_CODE) == 400: | |
error('5', 'Illegal character in filename!') | |
continue | |
while c.getinfo(c.RESPONSE_CODE) == 503: | |
print('RATE LIMITED! Sleeping 10 s ...') | |
sleep(10) | |
c.perform() | |
# rename | |
if c.getinfo(c.RESPONSE_CODE) == 302: | |
c.setopt(c.FOLLOWLOCATION, 1) | |
c.perform() | |
# see https://bitbucket.org/srrdb/srrdb-issues/issues/114/api-faulty-redirect-if-query-contains | |
realurl = c.getinfo(c.EFFECTIVE_URL).replace( | |
'/release/', '/v1/') | |
page = loadpage(realurl) | |
if args['verbose']: | |
print(Fore.YELLOW + 'VERBOSE: ' + Fore.RESET + | |
str(c.getinfo(c.RESPONSE_CODE)) + ' - ' + realurl + '\r\n' + page) | |
relname = mislabeled() | |
if c.getinfo(c.RESPONSE_CODE) == 200 and page != '[]': | |
#body = buffer.getvalue() | |
#page = body.decode('utf-8') | |
if args['no_comparison'] == True: | |
cursor.execute('''INSERT OR REPLACE INTO srrdb (relname, tag, date) VALUES (?, ?, ?) | |
ON CONFLICT (relname) DO UPDATE SET tag=excluded.tag, date=excluded.date''', (relname, args['tag'][0], datetime.datetime.now())) | |
connection.commit() | |
print(Fore.GREEN + 'MATCHED: ' + Fore.RESET + relname) | |
# search | |
if c.getinfo(c.RESPONSE_CODE) == 404 or page == '[]' or unprocessed == True: | |
page = loadpage( | |
'https://api.srrdb.com/v1/search/r:' + urllib.parse.quote_plus(relname)) | |
if 'resultsCount' in json.loads(page): # else what? | |
if json.loads(page)['resultsCount'] == '1': | |
# rename | |
page = loadpage('https://api.srrdb.com/v1/details/' + urllib.parse.quote_plus( | |
json.loads(page)['results'][0]['release'])) | |
try: | |
if len(json.loads(page)['archived-files']) == 1: | |
if os.path.getsize(os.path.join(dirpath, relname) + extension) == json.loads(page)['archived-files'][0]['size']: | |
relname = mislabeled() | |
else: | |
wrong_filesize() | |
continue | |
else: | |
error( | |
'6', 'Multiples files in release ' + json.loads(page)['name']) | |
except: | |
error( | |
'1', 'Problem while processing page https://api.srrdb.com/v1/search/r:' + relname + '\r\n' + page) | |
continue | |
# search for sample name | |
else: | |
sampletagstried = 0 | |
sampletags = ['', '-sample', '.sample'] | |
for sampletag in sampletags: | |
samplename = relname + sampletag + extension | |
page = loadpage( | |
'https://api.srrdb.com/v1/search/store-real-filename:' + urllib.parse.quote_plus(samplename)) | |
# and do what if it fails? | |
if 'resultsCount' in json.loads(page): | |
if json.loads(page)['resultsCount'] == '1': | |
# rename | |
page = loadpage('https://api.srrdb.com/v1/details/' + urllib.parse.quote_plus( | |
json.loads(page)['results'][0]['release'])) | |
try: | |
if len(json.loads(page)['archived-files']) == 1: | |
if os.path.getsize(os.path.join(dirpath, relname) + extension) == json.loads(page)['archived-files'][0]['size']: | |
relname = mislabeled() | |
break | |
else: | |
error( | |
'10', 'Filesize does not match that of release ' + json.loads(page)['name']) | |
filenotfound = True | |
else: | |
error( | |
'7', 'Multiple files in release ' + json.loads(page)['name']) | |
except: | |
error('2', 'Problem while processing page https://api.srrdb.com/v1/details/' + json.loads( | |
page)['results'][0]['release'] + '\r\n' + page) | |
else: | |
sampletagstried += 1 | |
if sampletagstried == len(sampletags): | |
filenotfound = True | |
# search for CRC of file | |
if filenotfound: | |
# if crc has alredy been calculated, don't do it again | |
cursor.execute( | |
'SELECT crccalc FROM srrdb WHERE relname=?', (relname,)) | |
record = cursor.fetchone() | |
if record is not None: | |
if record[0] is not None: | |
crccalc = record[0] | |
else: | |
crccalc = calculatecrc(os.path.join( | |
dirpath, relname) + extension) | |
else: | |
crccalc = calculatecrc(os.path.join( | |
dirpath, relname) + extension) | |
page = loadpage( | |
'https://api.srrdb.com/v1/search/archive-crc:' + crccalc) | |
if json.loads(page)['resultsCount'] == '1': | |
# rename | |
page = loadpage('https://api.srrdb.com/v1/details/' + urllib.parse.quote_plus( | |
json.loads(page)['results'][0]['release'])) | |
if page == '[]': | |
# url of page | |
error('10', 'Error in srrDB entry!') | |
else: | |
try: | |
if len(json.loads(page)['archived-files']) == 1: | |
if os.path.getsize(os.path.join(dirpath, relname) + extension) == json.loads(page)['archived-files'][0]['size']: | |
relname = mislabeled() | |
# file can be OK'd | |
else: | |
error( | |
'9', 'Filesize does not match that of release ' + json.loads(page)['name']) | |
continue | |
else: | |
error( | |
'8', 'Multiple files in release ' + json.loads(page)['name']) | |
except: | |
error('3', 'Problem while processing page https://api.srrdb.com/v1/details/' + json.loads( | |
page)['results'][0]['release'] + '\r\n' + page) | |
else: | |
cursor.execute('''INSERT OR REPLACE INTO srrdb (relname, crccalc, status, tag, date) VALUES (?, ?, ?, ?, ?) | |
ON CONFLICT (relname) DO UPDATE SET crccalc=excluded.crccalc, status=excluded.status, tag=excluded.tag, date=excluded.date''', | |
(relname, crccalc, 'NOT FOUND', args['tag'][0], datetime.datetime.now())) | |
connection.commit() | |
print(Fore.MAGENTA + 'NOT FOUND: ' + | |
Fore.RESET + relname + ' ' + crccalc) | |
continue | |
if args['no_comparison'] == False: | |
# find CRC in page | |
if 'archived-files' in json.loads(page): | |
if args['verbose']: | |
print(Fore.YELLOW + 'VERBOSE: ' + Fore.RESET + 'Size of file on disk: ' + str(os.path.getsize(os.path.join( | |
dirpath, relname) + extension)) + ' - size of file on srrdb: ' + str(json.loads(page)['archived-files'][0]['size'])) | |
if os.path.getsize(os.path.join(dirpath, relname) + extension) == json.loads(page)['archived-files'][0]['size']: | |
crcweb = json.loads( | |
page)['archived-files'][0]['crc'] | |
else: | |
wrong_filesize() | |
continue | |
else: | |
continue | |
c.close() | |
# calculate CRC | |
if not crccalc: | |
cursor.execute( | |
'SELECT crccalc FROM srrdb WHERE relname=?', (relname,)) | |
record = cursor.fetchone() | |
if args['verbose']: | |
print(Fore.YELLOW + 'VERBOSE: ' + Fore.RESET + | |
'Searched DB for CRC and found ' + str(record[0])) | |
if record is not None and record[0] is not None and record[0] != '': | |
crccalc = record[0] | |
if args['verbose']: | |
print(Fore.YELLOW + 'VERBOSE: ' + Fore.RESET + | |
'Using CRC found in DB ' + crccalc) | |
else: | |
crccalc = calculatecrc(os.path.join( | |
dirpath, relname) + extension) | |
else: | |
crccalc = calculatecrc(os.path.join( | |
dirpath, relname) + extension) | |
if crccalc == crcweb: | |
cursor.execute('''INSERT OR REPLACE INTO srrdb (relname, crccalc, crcweb, status, tag, date) VALUES (?, ?, ?, ?, ?, ?) | |
ON CONFLICT (relname) DO UPDATE SET crccalc=excluded.crccalc, crcweb=excluded.crcweb, status=excluded.status, tag=excluded.tag, date=excluded.date''', | |
(relname, crccalc, crcweb, 'OK', args['tag'][0], datetime.datetime.now())) | |
connection.commit() | |
print(Fore.GREEN + 'OK: ' + Fore.RESET + | |
relname + ' ' + crccalc) | |
else: | |
cursor.execute('''INSERT OR REPLACE INTO srrdb (relname, crccalc, crcweb, status, tag, date) VALUES (?, ?, ?, ?, ?, ?) | |
ON CONFLICT (relname) DO UPDATE SET crccalc=excluded.crccalc, crcweb=excluded.crcweb, status=excluded.status, tag=excluded.tag, date=excluded.date''', | |
(relname, crccalc, crcweb, 'CORRUPT', args['tag'][0], datetime.datetime.now())) | |
connection.commit() | |
print(Fore.RED + 'CORRUPT: ' + Fore.RESET + | |
relname + ' ' + crccalc + ' ' + crcweb) | |
except pycurl.error: | |
error('pyCurl', pycurl.error) | |
continue | |
end_run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment