Created
September 12, 2023 22:01
-
-
Save Xeophon/f81b620250dd4d0b833a4c0eddb82ad5 to your computer and use it in GitHub Desktop.
Bib entry fixer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
matches: | |
- trigger: ":doi" | |
replace: "{{output}}" | |
vars: | |
- name: "clipboard" | |
type: "clipboard" | |
- name: output | |
type: script | |
params: | |
args: | |
- python | |
- "%CONFIG%/scripts/doi_to_bib.py" | |
- "{{clipboard}}" | |
- trigger: ":dblp" | |
replace: "{{output}}" | |
vars: | |
- name: "clipboard" | |
type: "clipboard" | |
- name: output | |
type: script | |
params: | |
args: | |
- python | |
- "%CONFIG%/scripts/dblp_search.py" | |
- "{{clipboard}}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib.request | |
import urllib.parse | |
import json | |
import re | |
import sys | |
def get_bib_info(bib): | |
"""Extracts author, title, and year from a given bib entry.""" | |
author = re.search(r"author\s*=\s*{(.*?)}", bib, re.DOTALL) | |
if author is None: | |
author = re.search(r"editor\s*=\s*{(.*?)}", bib, re.DOTALL) | |
author = author.group(1) if author is not None else "Unknown" | |
title = re.search(r"title\s*=\s*{(.*?)}", bib, re.DOTALL).group(1) | |
year = re.search(r"year\s*=\s*{(.*?)}", bib).group(1) | |
return author, title, year | |
def format_bib_key(author, title, year): | |
"""Formats the bib key using author's last name, first three words of the title, and the year.""" | |
author_lastname = author.split(" and")[0].split()[-1].lower() | |
first_three_words = "".join( | |
re.sub(r"[^a-zA-Z]", "", word) for word in title.split()[:3] | |
) | |
key = f"{author_lastname}{first_three_words}{year}" | |
return key | |
def search_and_get(query): | |
"""Searches for a publication and retrieves its bib entry.""" | |
options = {"q": query, "format": "json", "h": 1} | |
url = f"https://dblp.org/search/publ/api?{urllib.parse.urlencode(options)}" | |
with urllib.request.urlopen(url) as response: | |
data = json.loads(response.read().decode()) | |
hit = data.get("result", {}).get("hits", {}).get("hit") | |
if hit is not None: | |
info = hit[0].get("info") | |
bib_url = f'{info.get("url")}.bib' | |
with urllib.request.urlopen(bib_url) as bib_response: | |
bib = bib_response.read().decode() | |
if "not found" not in bib: | |
author, title, year = get_bib_info(bib) | |
key = format_bib_key(author, title, year) | |
output = re.sub(r"\{DBLP:.*?\,", "{" + key + ",", bib) | |
else: | |
output = "Not found in DBLP" | |
else: | |
output = "Not found in DBLP" | |
print(output) | |
try: | |
search_and_get(sys.argv[1]) | |
except Exception as e: | |
output = str(e) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from urllib.request import urlopen, Request | |
from urllib.error import URLError | |
import sys | |
def extract_doi(clipboard_content): | |
"""Gets DOI from clipboard""" | |
regex = r"^(https?://doi\.org/|doi\.org/)?10\.\d{4,9}/[-._;()/:A-Za-z0-9]+$" | |
match = re.search(regex, clipboard_content, re.IGNORECASE) | |
if match is None: | |
return None | |
doi = match.group() | |
return re.search(r"10\.\d{4,9}/[-._;()/:A-Za-z0-9]+", doi).group() | |
def fetch_and_clean_bib(doi): | |
"""Get .bib from dblp or doi.org (fallback)""" | |
try: | |
# Try DBLP first | |
with urlopen(f"https://dblp.org/doi/{doi}.bib") as response: | |
bib = response.read().decode() | |
if "not found" in bib: | |
raise Exception("DOI not found in DBLP") | |
# Extract author, title, and year | |
author = extract_author_or_editor(bib) | |
title = extract_title(bib) | |
year = extract_year(bib) | |
# Format the key | |
key = format_key(author, title, year) | |
return re.sub("{DBLP:.*", "{" + key + ",", bib) | |
except Exception: | |
# If DBLP fails, fallback to doi.org | |
try: | |
url = f"https://doi.org/{doi}" | |
headers = {"Accept": "application/x-bibtex"} | |
request = Request(url, headers=headers) | |
with urlopen(request) as response: | |
bib = response.read().decode() | |
if "not found" in bib: | |
return "DOI not found" | |
# Extract author, title, and year | |
author = extract_author_or_editor(bib) | |
title = extract_title(bib) | |
year = extract_year(bib) | |
# Format the key | |
key = format_key(author, title, year) | |
return re.sub("{\w+?,", "{" + key + ",", bib) | |
except URLError: | |
return "DOI not found" | |
except Exception: | |
return "An error occurred" | |
def extract_author_or_editor(bib): | |
"""Extract authors / editors""" | |
author = re.search(r"author\s*=\s*{(.*?)}", bib, re.DOTALL) | |
if author is None: | |
author = re.search(r"editor\s*=\s*{(.*?)}", bib, re.DOTALL) | |
return author.group(1) if author is not None else "Unknown" | |
def extract_title(bib): | |
"""Extract title""" | |
try: | |
# match any character or any pair of characters surrounded by braces | |
match = re.search(r"title\s*=\s*{((?:[^{}]|{[^{}]*})*)}", bib, re.DOTALL) | |
title = match.group(1) | |
# Remove any trailing commas and surrounding whitespace | |
title = title.rstrip(",").strip() | |
title = title.replace("{", "").replace("}", "") | |
return title | |
except AttributeError: | |
return "Unknown" | |
def extract_year(bib): | |
"""Extract year""" | |
try: | |
return re.search(r'year\s*=\s*([{"]?)(\d+)[}"]?', bib).group(2) | |
except AttributeError: | |
return "Unknown" | |
def format_key(author, title, year): | |
"""Format citekey""" | |
author_lastname = author.split(" and")[0].split()[-1].lower() | |
words = title.split() | |
clean_words = [re.sub(r"[^a-zA-Z]", "", word) for word in words] | |
first_three_words = clean_words[:3] | |
shorttitle = "".join(first_three_words) | |
return f"{author_lastname}{shorttitle}{year}" | |
if __name__ == "__main__": | |
"""Main which gets invoked by espanso""" | |
clipboard_content = sys.argv[1] | |
doi = extract_doi(clipboard_content) | |
if doi is None: | |
print("DOI not found in clipboard content.") | |
else: | |
print(fetch_and_clean_bib(doi)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment