clayote/betterfollowing.py

## betterfollowing.py
from requests_html import HTMLSession, HTML
from lxml.etree import ParserError
import json
import time
import random

with open('following.json', 'rt') as inf:
    useless = json.load(inf)

session = HTMLSession()

useful = []
for i, you in enumerate(useless):
    userid = you['following']['accountId']
    # headers are what chromium spits out
    # I've tried passing in an active cookie as well and it doesn't seem to work
    headers = {
            "sec-ch-ua": '" Not A;Brand";v="99", "Chromium";v="90"''
            'sec-ch-ua-mobile': '?0',
            'Upgrade-Insecure-Requests': '1',
            "User-Agent": "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
        }
    page = session.get(f"https://twitter.com/i/user/{userid}", headers=headers)
    page.html.render()
    header = page.html.find('[data-testid="UserProfileHeader_Items"]')
    # I would do some more processing here but the find call never returns anything!
    useful.append(header)

    time.sleep(random.randrange(1, 10))
    if i % 100 == 0:
        print(i)


with open('following_detailed.json', 'w') as outf:
    json.dump(useful, outf)
	from requests_html import HTMLSession, HTML
	from lxml.etree import ParserError
	import json
	import time
	import random

	with open('following.json', 'rt') as inf:
	useless = json.load(inf)

	session = HTMLSession()

	useful = []
	for i, you in enumerate(useless):
	userid = you['following']['accountId']
	# headers are what chromium spits out
	# I've tried passing in an active cookie as well and it doesn't seem to work
	headers = {
	"sec-ch-ua": '" Not A;Brand";v="99", "Chromium";v="90"''
	'sec-ch-ua-mobile': '?0',
	'Upgrade-Insecure-Requests': '1',
	"User-Agent": "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36"
	}
	page = session.get(f"https://twitter.com/i/user/{userid}", headers=headers)
	page.html.render()
	header = page.html.find('[data-testid="UserProfileHeader_Items"]')
	# I would do some more processing here but the find call never returns anything!
	useful.append(header)

	time.sleep(random.randrange(1, 10))
	if i % 100 == 0:
	print(i)


	with open('following_detailed.json', 'w') as outf:
	json.dump(useful, outf)