Skip to content

Instantly share code, notes, and snippets.

@botlabsDev
Last active June 11, 2024 08:38
Show Gist options
  • Save botlabsDev/3a73606af0a4cae6b6388eddcf23a17b to your computer and use it in GitHub Desktop.
Save botlabsDev/3a73606af0a4cae6b6388eddcf23a17b to your computer and use it in GitHub Desktop.
import datetime
import logging
from time import sleep
import requests as requests
logging.basicConfig(level=logging.INFO, filename="git_emails.log")
API_URL = "https://api.github.com/events?per_page=1000"
DENY_LIST = ["example.com", "github.com"]
LOCAL_TIMEZONE = datetime.datetime.now().astimezone().tzinfo
def main():
known_emails = []
counter = 0
for email in call_github_api():
if email not in known_emails:
print(counter, email)
sleep(0.3)
counter += 1
logging.info(f"{counter}: {email}")
known_emails.append(email)
def call_github_api():
while True:
r = requests.get(API_URL)
headers = r.headers
#print(f"{headers['X-RateLimit-Limit']=}")
#print(f"{headers['X-RateLimit-Remaining']=}")
#print(f"{headers['X-RateLimit-Reset']=}")
if int(headers["X-RateLimit-Remaining"]) < 1:
sleep_till = datetime.datetime.utcfromtimestamp(float(headers["X-RateLimit-Reset"]))
sleep_till = sleep_till.replace(tzinfo=datetime.timezone.utc).astimezone(LOCAL_TIMEZONE)
sleep_time = sleep_till - datetime.datetime.now(LOCAL_TIMEZONE)
print(f"sleep till: {sleep_till} (UTC)")
sleep(sleep_time.seconds + 2)
continue
r.raise_for_status()
yield from _extract_emails(r.json())
def _extract_emails(data):
for i in data:
try:
for c in (i["payload"]["commits"]):
email = c["author"]["email"]
if any(email.endswith(domain) for domain in DENY_LIST):
continue
yield email
except KeyboardInterrupt:
exit()
except:
pass
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment