Skip to content

Instantly share code, notes, and snippets.

@colin-ho
Last active April 25, 2025 17:36
Show Gist options
  • Save colin-ho/60484081e91187b4983d421048519cff to your computer and use it in GitHub Desktop.
Save colin-ho/60484081e91187b4983d421048519cff to your computer and use it in GitHub Desktop.
# Search GitHub API for repos using a Daft UDF and write the results.
def find_github_repos(query):
res = []
repos = github.search_repositories(query=query, sort="stars", order="desc")
for repo in repos:
res.append(
{
"name": repo.name,
"owner": repo.owner.login,
}
)
return res
queries = ["language:python", "language:rust", "language:javascript", "language:go"]
repos = daft.from_pydict({"queries": queries}).with_column(
"repos",
# Apply the UDF on the query column
daft.col("queries").apply(
find_github_repos,
return_dtype=daft.DataType.struct(
{
"name": daft.DataType.string(),
"owner": daft.DataType.string(),
}
),
),
)
repos.show()
repos.write_parquet("repos")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment