Skip to content

Instantly share code, notes, and snippets.

@paolodina
Last active July 2, 2022 16:02
Show Gist options
  • Save paolodina/71e51d01e3a1f5847f618adbdade7357 to your computer and use it in GitHub Desktop.
Save paolodina/71e51d01e3a1f5847f618adbdade7357 to your computer and use it in GitHub Desktop.
A script to import GitHub stars into Dendron. DRAFT
#!/usr/bin/env python
"""
Obtain the starred data with this:
https://github.com/dogsheep/github-to-sqlite#fetching-repos-that-have-been-starred-by-a-user
"""
import csv
import json
import string
import sys
from datetime import datetime as dt
from pathlib import Path
from textwrap import dedent
try:
from nanoid import generate
except ModuleNotFoundError:
print("error: nanoid not found")
print("install with: pip install nanoid")
sys.exit(1)
# Id generation
LONG_ID_LENGTH = 23
ALPHA_LOWER = string.digits + string.ascii_lowercase
# Dendron hierarchy prefix
HIERARCHY_PREFIX = 'starred.gh'
# Max number of items to generate (0 means all)
IMPORT_LIMIT = 0
try:
input_csv, output_dir = sys.argv[1:]
except ValueError:
print(dedent("""
SYNOPSIS
gh_star_importer input_csv output_dir
ARGUMENTS
input_csv: file containing starred data in csv format
output_dir: output directory where generated files are written
"""))
sys.exit(1)
front_matter_tpl = """---
id: {id}
title: {title}
desc: '{desc}'
updated: {updated}
created: {created}
tags: {tags}
---
## [{full_name}]({github_url})
- [homepage]({home_url})
"""
try:
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
with open(Path(input_csv)) as csvfile:
# header:
# starred_at, full_name, homepage, description, topics, language
starred = csv.DictReader(csvfile, delimiter=',', quotechar='"')
for idx, item in enumerate(starred):
lang = item['language'].lower()
full_name = item['full_name']
user_org, repo = full_name.split('/')
home_url = item['homepage'] if item['homepage'] else ''
if lang:
md_file_name = f"{HIERARCHY_PREFIX}.{lang}.{user_org}.{repo}.md"
else:
md_file_name = f"{HIERARCHY_PREFIX}.{user_org}.{repo}.md"
topics = json.loads(item['topics'].strip('"').replace('""', '"'))
front_matter = {
'id': generate(ALPHA_LOWER, LONG_ID_LENGTH),
'title': repo,
'desc': item['description'],
'full_name': full_name,
'github_url': f'https://github.com/{full_name}',
'home_url': home_url or "''",
'updated': int(dt.timestamp(dt.now())) * 1000,
'created': int(dt.timestamp(dt.fromisoformat(item['starred_at'].replace('Z', '')))) * 1000,
'tags': [f'starred.{topic}' for topic in topics]
}
with open(output_dir / md_file_name, "w") as md_out:
md_out.write(front_matter_tpl.format(**front_matter))
if idx > IMPORT_LIMIT and IMPORT_LIMIT != 0:
break
except FileNotFoundError:
print('error: csv input file not found')
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment