Skip to content

Instantly share code, notes, and snippets.

@heffo42
Last active July 7, 2019 19:06
Show Gist options
  • Save heffo42/9a94fe5416244ac4e7837bd8c92a692d to your computer and use it in GitHub Desktop.
Save heffo42/9a94fe5416244ac4e7837bd8c92a692d to your computer and use it in GitHub Desktop.
RELOAD_URL = 'https://twitter.com/i/search/timeline?f=tweets&vertical=' \
'default&include_available_features=1&include_entities=1&' \
'reset_error_state=false&src=typd&max_position={pos}&q={q}&l={lang}'
response = requests.get(url, headers=HEADER)
soup = BeautifulSoup(response.text, 'lxml')
tweets = soup.find_all("li", {"data-item-type": "tweet"})
writeTweets(tweets)
next_pointer = soup.find("div", {"class": "stream-container"})["data-min-position"]
for i in range(10000):
url = RELOAD_URL.format(q=query, lang=lang, pos = next_pointer)
if response.status_code != 200:
print(response.status_code)
response = requests.get(url, headers=HEADER)
try:
json_resp = json.loads(response.text)
except:
print("Unable to process json response")
return
html = json_resp['items_html']
soup = BeautifulSoup(html, 'lxml')
tweets = soup.find_all("li", {"data-item-type": "tweet"})
writeTweets(tweets)
if (not json_resp['has_more_items']):
break
next_pointer = json_resp['min_position']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment