Created
November 19, 2023 17:24
-
-
Save aramosf/451d26f17a9317f6eb188312d3503df8 to your computer and use it in GitHub Desktop.
download all media files from channel / telegram
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
import time | |
from telethon import TelegramClient, sync | |
from telethon.tl.types import InputMessagesFilterDocument | |
from telethon.tl.types import InputMessagesFilterVideo | |
from telethon.tl import types | |
from telethon.errors import FloodWaitError | |
import sys | |
import os | |
import getopt | |
from pprint import pprint | |
## SETUP | |
api_id = # id int | |
api_hash = "" # api hash | |
phone_number = "+" # phone number | |
secs = 360 | |
client = TelegramClient('session_telegram', api_id, api_hash) | |
client.connect() | |
if not client.is_user_authorized(): | |
client.send_code_request(phone_number) | |
client.sign_in('phone_number', input('Enter the code: ')) | |
#logging.basicConfig(format="%(message)s", level=logging.WARNING) | |
#logger = logging.getLogger(__name__) | |
#logger.setLevel(logging.DEBUG) | |
class bcolors: | |
HEADER = '\033[95m' | |
OKBLUE = '\033[94m' | |
OKGREEN = '\033[92m' | |
WARNING = '\033[93m' | |
FAIL = '\033[91m' | |
ENDC = '\033[0m' | |
BOLD = '\033[1m' | |
UNDERLINE = '\033[4m' | |
def list_dialogs ( ): | |
for dialog in client.get_dialogs(limit=200): | |
print(dialog.name, dialog.draft.text, dialog.id) | |
sys.exit(0) | |
client.disconnect() | |
def usage ( ): | |
print ('Usage: %s -i <ID>|-l\n' % sys.argv[0]) | |
print ('Options:') | |
print (' -i <ID> Dialog ID. IE: -i -10000000') | |
print (' -l List dialogs IDs') | |
sys.exit(1) | |
try: | |
opts, args = getopt.getopt(sys.argv[1:], "i:l") | |
except getopt.GetoptError: | |
usage() | |
sys.exit(2) | |
for opt, arg in opts: | |
if opt in ('-l'): | |
list_dialogs() | |
elif opt in ('-i'): | |
dialog_id = arg | |
else: | |
usage() | |
sys.exit(2) | |
if len(sys.argv) <= 1: | |
usage() | |
sys.exit(2) | |
client.get_dialogs(limit=30) | |
channel = client.get_entity(int(dialog_id)) | |
#pprint(vars(channel)) | |
print ("Dialog_ID: " + dialog_id + " channel: " + channel.title) | |
#msgs = client.get_messages(channel, None, wait_time=15, filter=InputMessagesFilterDocument) | |
msgs = client.get_messages(channel, limit=30, wait_time=15, filter=InputMessagesFilterVideo) | |
count = 0 | |
for msg in msgs: | |
if hasattr(msg, 'media') and msg.media is not None: | |
count += 1 | |
print(channel.title, " has Total Messages with media = ", count) | |
dialog_id2 = dialog_id[1:] | |
if not os.path.exists(dialog_id2): | |
os.makedirs(dialog_id2) | |
# Create info.txt with ID and channel | |
if os.path.isfile(dialog_id2 + "/info.txt"): | |
os.remove(dialog_id2 + "/info.txt") | |
with open(dialog_id2 + "/info.txt", "w") as myfile: | |
myfile.write(dialog_id2 + "|" + channel.title + "\n") | |
def humanbytes(B): | |
'Return the given bytes as a human friendly KB, MB, GB, or TB string' | |
B = float(B) | |
KB = float(1024) | |
MB = float(KB ** 2) # 1,048,576 | |
GB = float(KB ** 3) # 1,073,741,824 | |
TB = float(KB ** 4) # 1,099,511,627,776 | |
if B < KB: | |
return '{0} {1}'.format(B,'Bytes' if 0 == B > 1 else 'Byte') | |
elif KB <= B < MB: | |
return '{0:.2f} KB'.format(B/KB) | |
elif MB <= B < GB: | |
return '{0:.2f} MB'.format(B/MB) | |
elif GB <= B < TB: | |
return '{0:.2f} GB'.format(B/GB) | |
elif TB <= B: | |
return '{0:.2f} TB'.format(B/TB) | |
def save_result (filename, size) : | |
try: | |
f = open(dialog_id2 + "/" + dialog_id2 + ".txt","r") | |
lines = f.readlines() | |
f.close() | |
except IOError: | |
lines = "" | |
pass | |
try: | |
f = open(dialog_id2 + "/" + dialog_id2 + ".txt","w") | |
for line in lines: | |
if filename not in line: | |
f.write(line) | |
else: | |
print(bcolors.WARNING + str(msg.id), "File: " + filename + " deleted from entity database" + bcolors.ENDC) | |
f.close() | |
except IOError: | |
print("ERROR: Can't write file") | |
sys.exit(1) | |
try: | |
with open(dialog_id2 + "/" + dialog_id2 + ".txt", "a") as myfile: | |
myfile.write(filename + "|" + str(size) + "\n") | |
except IOError: | |
print("ERROR: Cant write file") | |
sys.exit(1) | |
entity,fname = filename.split("/") | |
try: | |
f = open("allfiles.txt","r") | |
lines = f.readlines() | |
f.close() | |
except IOError: | |
pass | |
try: | |
f = open("allfiles.txt","w") | |
for line in lines: | |
if fname not in line: | |
f.write(line) | |
else: | |
print(bcolors.WARNING + str(msg.id) + "File: " + fname + " deleted from all database" + bcolors.ENDC) | |
f.close() | |
except IOError: | |
pass | |
try: | |
with open("allfiles.txt", "a") as myfile: | |
myfile.write(fname + "|" + str(size) + "|" + entity + "\n") | |
except IOError: | |
print("ERROR: Cant write file") | |
sys.exit(1) | |
# 0 = not found, 1 = size ok, 2 = bad size | |
def exists (filename, size) : | |
#print("Exists called with: " + filename + " " + str(size)) | |
try: | |
with open(dialog_id2 + "/" + dialog_id2 + ".txt") as fp: | |
for line in fp: | |
#print (line + "\n") | |
rls,dwnsize = line.split("|") | |
if filename == rls: | |
if int(size) == int(dwnsize.rstrip()): | |
return 1 | |
else: | |
return 2 | |
return 0 | |
except IOError: | |
return 0 | |
# 0 = not found, 1 = size ok, 2 = bad size | |
def exists_all (filename, size) : | |
global rls | |
global dwnsize | |
try: | |
with open("allfiles.txt") as fp: | |
for line in fp: | |
rls,dwnsize,entity = line.split("|") | |
entity_orig,f = filename.split("/") | |
if f == rls: | |
if int(size) == int(dwnsize.rstrip()): | |
return 1 | |
else: | |
return 2 | |
return 0 | |
except IOError: | |
return 0 | |
def download(file_name, output, file_size): | |
print(msg.id, "File: " + file_name + " - Sleeping 360 seconds " + str(d_count) + "/" + str(count)) | |
time.sleep(secs) | |
print(msg.id, "File: " + file_name + " - Downloading " + humanbytes(str(file_size)) + " as:", output) | |
try: | |
msg.download_media(output) | |
size = os.stat(output).st_size | |
save_result(output, size) | |
except FloodWaitError as e: | |
print(bcolors.WARNING + str(msg.id), "FloodWait error. Waiting 900 secs" + bcolors.ENDC) | |
time.sleep(900) | |
try: | |
msg.download_media(output) | |
size = os.stat(output).st_size | |
save_result(output, size) | |
except FloodWaitError as e: | |
print(bcolors.WARNING + str(msg.id), "FloodWait (2) error. Waiting 2500 secs" + bcolors.ENDC) | |
template = "An exception of type {0} occurred. Arguments:\n{1!r}" | |
message = template.format(type(e).__name__, e.args) | |
print (message) | |
time.sleep(2500) | |
try: | |
msg.download_media(output) | |
size = os.stat(output).st_size | |
save_result(output, size) | |
except Exception as e: | |
print (bcolors.FAIL + "ERROR: " + str(e) + bcolors.ENDC) | |
return 0 | |
except Exception as e: | |
print (bcolors.FAIL + "ERROR: " + str(e) + bcolors.ENDC) | |
return 0 | |
except Exception as e: | |
template = "An exception of type {0} occurred. Arguments:\n{1!r}" | |
message = template.format(type(e).__name__, ex.args) | |
print (message) | |
print (bcolors.FAIL + "ERROR: " + str(e) + bcolors.ENDC) | |
print (bcolors.FAIL + "Error downloading!! " + file_name + ". Saving anyway in DB to skip next time" + bcolors.ENDC) | |
save_result(output, file_size) | |
return 0 | |
except errors.rpcerrorlist.FileReferenceExpiredError as e: | |
template = "An exception FileReferenceExpired" | |
message = template.format(type(e).__name__, ex.args) | |
print (message) | |
print (bcolors.FAIL + "ERROR: " + str(e) + bcolors.ENDC) | |
msg.download_media(output) | |
size = os.stat(output).st_size | |
save_result(output, size) | |
d_count = 0 | |
for msg in msgs: | |
d_count += 1 | |
if not hasattr(msg, 'media') or msg.media is None: | |
continue | |
elif hasattr(msg.media, 'document'): | |
for attribute in msg.media.document.attributes: | |
# filename_attr = next(filter(lambda x: isinstance(x, DocumentAttributeFilename), | |
# msg.document.attributes), None) | |
# file_name = filename_attr.file_name if filename_attr else 'Unknown' | |
if isinstance(attribute, types.DocumentAttributeFilename): | |
file_name = attribute.file_name | |
#else: | |
# file_name = "errorerrorerror.mp4" | |
# print (bcolors.FAIL + "FILENAME ERROR: errorerrorerror.mp4" + bcolors.ENDC) | |
file_size=msg.media.document.size | |
output = dialog_id2 + "/" + file_name | |
file_exist = exists(output, file_size) | |
file_exist_global = exists_all(output, file_size) | |
global rls | |
global dwnsize | |
if file_exist == 0 and file_exist_global == 0: | |
download(file_name, output, file_size) | |
elif file_exist == 2 or file_exist_global == 2: | |
string = " have bad size downloaded/remote: " | |
print (bcolors.FAIL + str(msg.id), "File: " + output + string + str(dwnsize).rstrip() + "/" + str(file_size) + bcolors.ENDC) | |
download(file_name, output, file_size) | |
elif file_exist == 1 or file_exist_global == 1: | |
print(msg.id, "File: " + output + " (" + humanbytes(str(file_size)) + ") Already present, skipping " + str(d_count) + "/" + str(count)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment