Skip to content

Instantly share code, notes, and snippets.

@davidejones
Last active August 16, 2023 15:08
Show Gist options
  • Save davidejones/3aac5643478ce06eb21c18a5c67e6162 to your computer and use it in GitHub Desktop.
Save davidejones/3aac5643478ce06eb21c18a5c67e6162 to your computer and use it in GitHub Desktop.
Downloads a github repo tar file and extracts metadata.csv files only
import os
import requests
import tarfile
def download_github_repo_tar(token, org, repo, branch, to_path):
"""
Downloads a stream tar.gz file from github of a certain organisation/branch/repo
and extracts csv files to tmp directory for parsing
:param token: string of github token
:param org: string of organization
:param repo: string of git repository
:param branch: string of branchname
:param to_path: where to extract
"""
os.makedirs(to_path, exist_ok=True)
url = 'https://api.github.com/repos/{0}/{1}/tarball/{2}'.format(org, repo, branch)
headers = {'Accept': 'application/vnd.github.v3.raw'}
if token:
headers.update({'Authorization': 'token {}'.format(token)})
print('Downloading {} {}..'.format(repo, branch))
response = requests.get(url, headers=headers, stream=True)
if response.status_code == requests.codes.ok:
with tarfile.open(mode='r|gz', fileobj=response.raw) as tar:
tar.extractall(path=to_path, members=(i for i in tar if i.name.endswith('metadata.csv')))
if __name__ == '__main__':
download_github_repo_tar(None, 'DataDog', 'integrations-core', 'master', os.path.curdir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment