Created
June 25, 2021 08:44
-
-
Save paulsharpeY/0e67002b41d5d43c4c4ef9ddd3124fa0 to your computer and use it in GitHub Desktop.
Create a bibliography from (R)markdown sorted by in-article citation frequency
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Create a bibliography from (R)markdown sorted by in-article citation frequency | |
# | |
# usage: pandoc -s --bibliography=refs.bib --citeproc foo.Rmd bar.Rmd ... | count-and-sort.py > refs_by_citation_frequency.html | |
# | |
# Author: emilianoeheyns | |
# Details: https://forums.zotero.org/discussion/comment/385929#Comment_385929 | |
import xml.etree.ElementTree as ET | |
import sys | |
def citekey(key): | |
if key.startswith('ref-'): key = key[4:] | |
return key | |
tree = ET.parse(sys.stdin) | |
root = tree.getroot() | |
for node in root.findall('.//*'): | |
node.tag = node.tag.split('}')[-1] | |
cited = {} | |
for node in root.findall('.//span'): | |
if node.attrib.get('class') == 'citation': | |
for key in node.attrib['data-cites'].split(' '): | |
if not key in cited: cited[key] = 0 | |
cited[key] += 1 | |
body = root.find('.//body') | |
body[:] = [node for node in body if node.tag == 'div' and node.attrib.get('role') == 'doc-bibliography'] | |
bib = [node for node in root.findall('.//div') if node.attrib.get('role') == 'doc-bibliography'][0] | |
bib[:] = sorted(bib[:], reverse=True, key=lambda node: cited[citekey(node.attrib['id'])]) | |
for node in root.findall('.//div'): | |
if node.attrib.get('role') == 'doc-biblioentry': | |
count = ET.SubElement(node, 'span') | |
count.text = f': {cited[citekey(node.attrib["id"])]}' | |
tree.write(sys.stdout.buffer) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment