LovingMelody/main.py

## main.py
#!/usr/bin/env python3
import json
import random
import sys
from time import sleep

import requests
from bs4 import BeautifulSoup

MIN_SLEEP=0
MAX_SLEEP=10

def sleep_time():
    return random.randrange(MIN_SLEEP,MAX_SLEEP)

def parse_user(user):
    r = requests.get(f"https://mobile.twitter.com/{user}/media/grid")
    soup = BeautifulSoup(r.text, features="html5lib")
    title = soup.select_one('.title')
    if title is None:
        print("Failed to get media")
        return []
    try:
        ct = int(title.text.strip().split(' ')[-1])
    except:
        print("Failed to media count, aborting")
        return []
    print(f"Fetching {ct} images for {user}")
    tweets = []
    for idx in range(ct):
        try:
            r = requests.get(f"https://mobile.twitter.com/{user}/media/grid", cookies=r.cookies, params={'idx':idx})
            soup = BeautifulSoup(r.text, features="html5lib")
            link = soup.select_one('.media > img:nth-child(1)')
            tweets.append({
                'media': link['src'],
                'id': link['id'],
                'content': soup.select_one('.tweet-content').text
            })
            sys.stdout.write(f"Fetched {idx}\033[K\n")
        except:
            print(f"Failed to fetch {idx}")
        finally:
            time = sleep_time()
            print(f"Sleeping for {time}s", end='\r')
            sleep(time)
    print("Finished collecting for", user)
    return tweets

def main():
    data = {}
    try:
        for user in sys.argv[1:]:
            try:
                data[user] =parse_user(user)
            except:
                print(f"Failed to find {user}.")
                data[user] = []
    except:
        pass
    with open("data.json", "w") as f:
        json.dump(data, f)

if __name__ == "__main__":
    main()

## requirements.txt
beautifulsoup4>=4.7.1
html5lib>=1.0.1
requests>=2.22.0
	#!/usr/bin/env python3
	import json
	import random
	import sys
	from time import sleep

	import requests
	from bs4 import BeautifulSoup

	MIN_SLEEP=0
	MAX_SLEEP=10

	def sleep_time():
	return random.randrange(MIN_SLEEP,MAX_SLEEP)

	def parse_user(user):
	r = requests.get(f"https://mobile.twitter.com/{user}/media/grid")
	soup = BeautifulSoup(r.text, features="html5lib")
	title = soup.select_one('.title')
	if title is None:
	print("Failed to get media")
	return []
	try:
	ct = int(title.text.strip().split(' ')[-1])
	except:
	print("Failed to media count, aborting")
	return []
	print(f"Fetching {ct} images for {user}")
	tweets = []
	for idx in range(ct):
	try:
	r = requests.get(f"https://mobile.twitter.com/{user}/media/grid", cookies=r.cookies, params={'idx':idx})
	soup = BeautifulSoup(r.text, features="html5lib")
	link = soup.select_one('.media > img:nth-child(1)')
	tweets.append({
	'media': link['src'],
	'id': link['id'],
	'content': soup.select_one('.tweet-content').text
	})
	sys.stdout.write(f"Fetched {idx}\033[K\n")
	except:
	print(f"Failed to fetch {idx}")
	finally:
	time = sleep_time()
	print(f"Sleeping for {time}s", end='\r')
	sleep(time)
	print("Finished collecting for", user)
	return tweets

	def main():
	data = {}
	try:
	for user in sys.argv[1:]:
	try:
	data[user] =parse_user(user)
	except:
	print(f"Failed to find {user}.")
	data[user] = []
	except:
	pass
	with open("data.json", "w") as f:
	json.dump(data, f)

	if __name__ == "__main__":
	main()