Skip to content

Instantly share code, notes, and snippets.

@rossturk
Created December 16, 2022 22:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rossturk/71def13c19dc4e9b8fd755f486ee74ec to your computer and use it in GitHub Desktop.
Save rossturk/71def13c19dc4e9b8fd755f486ee74ec to your computer and use it in GitHub Desktop.
Snapshot downloads of a GitHub package
from datetime import datetime
from astro import sql as aql
from astro.sql.table import Table
from airflow.models import DAG
import pandas as pd
import requests
CONN_ID = "dwh"
@aql.dataframe
def get_download_counts(org="astronomer", project="astro-cli"):
"""
#### Get Download Counts
This grabs a snapshot of the download numbers for each release in the given GitHub project.
"""
url = "/".join(["https://api.github.com", "repos", org, project, "releases"])
results = requests.get(url).json()
files = pd.json_normalize(
results,
record_path=["assets"],
meta_prefix="release_",
record_prefix="file_",
meta=[
"name",
"created_at",
],
max_level=1,
)
files["reported_date"] = datetime.now().isoformat()
return files[
[
"reported_date",
"release_name",
"file_name",
"file_download_count",
"file_created_at",
"release_created_at",
]
]
with DAG(
"astro-cli-downloads",
schedule_interval="@daily",
start_date=datetime(2022, 9, 15),
catchup=False,
default_args={
"retries": 2,
},
tags=["git", "astro-cli"],
) as dag:
"""
### Astro CLI downloads by file
This is a simple pipeline that gathers download counts by file from the GitHub API
"""
download_snapshot = get_download_counts(
org="astronomer",
project="astro-cli",
output_table=Table(
conn_id=CONN_ID,
),
)
reporting_table = aql.append(
target_table=Table(
name="ASTRO_CLI_DOWNLOADS",
conn_id=CONN_ID,
),
source_table=download_snapshot,
)
aql.cleanup()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment