Skip to content

Instantly share code, notes, and snippets.

@callahantiff
Last active January 1, 2024 11:17
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save callahantiff/0ae1c00df9bec7228be3f6bda5466d73 to your computer and use it in GitHub Desktop.
Save callahantiff/0ae1c00df9bec7228be3f6bda5466d73 to your computer and use it in GitHub Desktop.
GitHub API Repository Search (Python 3.6.2)
#!/usr/bin/env python3.6.2
# -*- coding: utf-8 -*-
# run script from command line via python3 keyword_search_github_repositories.py
import click
import datetime
import time
from github import Github
from github.GithubException import RateLimitExceededException
from tqdm import tqdm
def search_github(auth: Github, keyword: list) -> list:
"""Search the GitHub API for repositories using an input keyword.
Args:
auth: A Github authenticate object.
keyword: A keyword string.
Returns:
A nested list of GitHub repositories returned for a keyword. Each result list contains the repository name,
url, and description.
"""
print('Searching GitHub using keyword: {}'.format(keyword))
# set-up query
query = keyword + '+in:readme+in:description'
results = auth.search_repositories(query, 'stars', 'desc')
# print results
print(f'Found {results.totalCount} repo(s)')
results_list = []
for repo in tqdm(range(0, results.totalCount)):
try:
results_list.append([results[repo].name, results[repo].url, results[repo].description])
time.sleep(2)
except RateLimitExceededException:
time.sleep(60)
results_list.append([results[repo].name, results[repo].url, results[repo].description])
return results_list
@click.command()
@click.option('--token', prompt='Please enter your GitHub Access Token')
@click.option('--keywords', prompt='Please enter the keywords separated by a comma')
@click.option('--filename', prompt='Please provide the file path')
def main(token: str, keywords: str, filename: str) -> None:
# initialize and authenticate GitHub API
auth = Github(token)
# search a list of keywords
search_list = [keyword.strip() for keyword in keywords.split(',')]
# search repositories on GitHub
github_results = dict()
for key in search_list:
github_results[key] = []
github_results[key] += search_github(auth, key)
if len(search_list) > 1: time.sleep(120)
# write out results
timestamp = datetime.datetime.now()
formatted_date = timestamp.strftime('%d') + timestamp.strftime('%b') + timestamp.strftime('%Y')
full_filename = filename.strip() + 'GitHub_Search_Results_' + formatted_date + '.txt'
print('Writing search results to: {}'.format(full_filename))
with open(full_filename, 'w') as f_out:
for key in tqdm(github_results.keys()):
for res in github_results[key]:
f_out.write(key + '\t' + str(res[0]) + '\t' + str(res[1]) + '\t' + str(res[2]))
f_out.close()
if __name__ == '__main__':
main()
@callahantiff
Copy link
Author

callahantiff commented May 21, 2020

Note. sleep timing could be improved with some API sensitivity experimentation.

@Abdur-rahmaanJ
Copy link

python -m pip install tqdm click pygithub

@callahantiff
Copy link
Author

python -m pip install tqdm click pygithub

Thanks so much @Abdur-rahmaanJ!

@Abdur-rahmaanJ
Copy link

@callahantiff This could be a nice project of it's own btw

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment