paolodina/dendron-import-github-stars.py

## dendron-import-github-stars.py
#!/usr/bin/env python
"""
Obtain the starred data with this:
https://github.com/dogsheep/github-to-sqlite#fetching-repos-that-have-been-starred-by-a-user
"""


import csv
import json
import string
import sys
from datetime import datetime as dt
from pathlib import Path
from textwrap import dedent

try:
    from nanoid import generate
except ModuleNotFoundError:
    print("error: nanoid not found")
    print("install with: pip install nanoid")
    sys.exit(1)

# Id generation
LONG_ID_LENGTH = 23
ALPHA_LOWER = string.digits + string.ascii_lowercase

# Dendron hierarchy prefix
HIERARCHY_PREFIX = 'starred.gh'

# Max number of items to generate (0 means all)
IMPORT_LIMIT = 0

try:
    input_csv, output_dir = sys.argv[1:]
except ValueError:
    print(dedent("""
    SYNOPSIS
        gh_star_importer input_csv output_dir

    ARGUMENTS
        input_csv: file containing starred data in csv format
        output_dir: output directory where generated files are written
    """))
    sys.exit(1)

front_matter_tpl = """---
id: {id}
title: {title}
desc: '{desc}'
updated: {updated}
created: {created}
tags: {tags}
---

## [{full_name}]({github_url})

- [homepage]({home_url})
"""

try:
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    with open(Path(input_csv)) as csvfile:
        # header:
        # starred_at, full_name, homepage, description, topics, language
        starred = csv.DictReader(csvfile, delimiter=',', quotechar='"')
        for idx, item in enumerate(starred):
            lang = item['language'].lower()
            full_name = item['full_name']
            user_org, repo = full_name.split('/')
            home_url = item['homepage'] if item['homepage'] else ''

            if lang:
                md_file_name = f"{HIERARCHY_PREFIX}.{lang}.{user_org}.{repo}.md"
            else:
                md_file_name = f"{HIERARCHY_PREFIX}.{user_org}.{repo}.md"

            topics = json.loads(item['topics'].strip('"').replace('""', '"'))
            front_matter = {
                'id': generate(ALPHA_LOWER, LONG_ID_LENGTH),
                'title': repo,
                'desc': item['description'],
                'full_name': full_name,
                'github_url': f'https://github.com/{full_name}',
                'home_url': home_url or "''",
                'updated': int(dt.timestamp(dt.now())) * 1000,
                'created': int(dt.timestamp(dt.fromisoformat(item['starred_at'].replace('Z', '')))) * 1000,
                'tags': [f'starred.{topic}' for topic in topics]
            }

            with open(output_dir / md_file_name, "w") as md_out:
                md_out.write(front_matter_tpl.format(**front_matter))

            if idx > IMPORT_LIMIT and IMPORT_LIMIT != 0:
                break
except FileNotFoundError:
    print('error: csv input file not found')
    sys.exit(1)
	#!/usr/bin/env python
	"""
	Obtain the starred data with this:
	https://github.com/dogsheep/github-to-sqlite#fetching-repos-that-have-been-starred-by-a-user
	"""


	import csv
	import json
	import string
	import sys
	from datetime import datetime as dt
	from pathlib import Path
	from textwrap import dedent

	try:
	from nanoid import generate
	except ModuleNotFoundError:
	print("error: nanoid not found")
	print("install with: pip install nanoid")
	sys.exit(1)

	# Id generation
	LONG_ID_LENGTH = 23
	ALPHA_LOWER = string.digits + string.ascii_lowercase

	# Dendron hierarchy prefix
	HIERARCHY_PREFIX = 'starred.gh'

	# Max number of items to generate (0 means all)
	IMPORT_LIMIT = 0

	try:
	input_csv, output_dir = sys.argv[1:]
	except ValueError:
	print(dedent("""
	SYNOPSIS
	gh_star_importer input_csv output_dir

	ARGUMENTS
	input_csv: file containing starred data in csv format
	output_dir: output directory where generated files are written
	"""))
	sys.exit(1)

	front_matter_tpl = """---
	id: {id}
	title: {title}
	desc: '{desc}'
	updated: {updated}
	created: {created}
	tags: {tags}
	---

	## [{full_name}]({github_url})

	- [homepage]({home_url})
	"""

	try:
	output_dir = Path(output_dir)
	output_dir.mkdir(parents=True, exist_ok=True)
	with open(Path(input_csv)) as csvfile:
	# header:
	# starred_at, full_name, homepage, description, topics, language
	starred = csv.DictReader(csvfile, delimiter=',', quotechar='"')
	for idx, item in enumerate(starred):
	lang = item['language'].lower()
	full_name = item['full_name']
	user_org, repo = full_name.split('/')
	home_url = item['homepage'] if item['homepage'] else ''

	if lang:
	md_file_name = f"{HIERARCHY_PREFIX}.{lang}.{user_org}.{repo}.md"
	else:
	md_file_name = f"{HIERARCHY_PREFIX}.{user_org}.{repo}.md"

	topics = json.loads(item['topics'].strip('"').replace('""', '"'))
	front_matter = {
	'id': generate(ALPHA_LOWER, LONG_ID_LENGTH),
	'title': repo,
	'desc': item['description'],
	'full_name': full_name,
	'github_url': f'https://github.com/{full_name}',
	'home_url': home_url or "''",
	'updated': int(dt.timestamp(dt.now())) * 1000,
	'created': int(dt.timestamp(dt.fromisoformat(item['starred_at'].replace('Z', '')))) * 1000,
	'tags': [f'starred.{topic}' for topic in topics]
	}

	with open(output_dir / md_file_name, "w") as md_out:
	md_out.write(front_matter_tpl.format(**front_matter))

	if idx > IMPORT_LIMIT and IMPORT_LIMIT != 0:
	break
	except FileNotFoundError:
	print('error: csv input file not found')
	sys.exit(1)