Skip to content

Instantly share code, notes, and snippets.

@Xeophon
Created September 12, 2023 22:01
Show Gist options
  • Save Xeophon/f81b620250dd4d0b833a4c0eddb82ad5 to your computer and use it in GitHub Desktop.
Save Xeophon/f81b620250dd4d0b833a4c0eddb82ad5 to your computer and use it in GitHub Desktop.
Bib entry fixer
matches:
- trigger: ":doi"
replace: "{{output}}"
vars:
- name: "clipboard"
type: "clipboard"
- name: output
type: script
params:
args:
- python
- "%CONFIG%/scripts/doi_to_bib.py"
- "{{clipboard}}"
- trigger: ":dblp"
replace: "{{output}}"
vars:
- name: "clipboard"
type: "clipboard"
- name: output
type: script
params:
args:
- python
- "%CONFIG%/scripts/dblp_search.py"
- "{{clipboard}}"
import urllib.request
import urllib.parse
import json
import re
import sys
def get_bib_info(bib):
"""Extracts author, title, and year from a given bib entry."""
author = re.search(r"author\s*=\s*{(.*?)}", bib, re.DOTALL)
if author is None:
author = re.search(r"editor\s*=\s*{(.*?)}", bib, re.DOTALL)
author = author.group(1) if author is not None else "Unknown"
title = re.search(r"title\s*=\s*{(.*?)}", bib, re.DOTALL).group(1)
year = re.search(r"year\s*=\s*{(.*?)}", bib).group(1)
return author, title, year
def format_bib_key(author, title, year):
"""Formats the bib key using author's last name, first three words of the title, and the year."""
author_lastname = author.split(" and")[0].split()[-1].lower()
first_three_words = "".join(
re.sub(r"[^a-zA-Z]", "", word) for word in title.split()[:3]
)
key = f"{author_lastname}{first_three_words}{year}"
return key
def search_and_get(query):
"""Searches for a publication and retrieves its bib entry."""
options = {"q": query, "format": "json", "h": 1}
url = f"https://dblp.org/search/publ/api?{urllib.parse.urlencode(options)}"
with urllib.request.urlopen(url) as response:
data = json.loads(response.read().decode())
hit = data.get("result", {}).get("hits", {}).get("hit")
if hit is not None:
info = hit[0].get("info")
bib_url = f'{info.get("url")}.bib'
with urllib.request.urlopen(bib_url) as bib_response:
bib = bib_response.read().decode()
if "not found" not in bib:
author, title, year = get_bib_info(bib)
key = format_bib_key(author, title, year)
output = re.sub(r"\{DBLP:.*?\,", "{" + key + ",", bib)
else:
output = "Not found in DBLP"
else:
output = "Not found in DBLP"
print(output)
try:
search_and_get(sys.argv[1])
except Exception as e:
output = str(e)
import re
from urllib.request import urlopen, Request
from urllib.error import URLError
import sys
def extract_doi(clipboard_content):
"""Gets DOI from clipboard"""
regex = r"^(https?://doi\.org/|doi\.org/)?10\.\d{4,9}/[-._;()/:A-Za-z0-9]+$"
match = re.search(regex, clipboard_content, re.IGNORECASE)
if match is None:
return None
doi = match.group()
return re.search(r"10\.\d{4,9}/[-._;()/:A-Za-z0-9]+", doi).group()
def fetch_and_clean_bib(doi):
"""Get .bib from dblp or doi.org (fallback)"""
try:
# Try DBLP first
with urlopen(f"https://dblp.org/doi/{doi}.bib") as response:
bib = response.read().decode()
if "not found" in bib:
raise Exception("DOI not found in DBLP")
# Extract author, title, and year
author = extract_author_or_editor(bib)
title = extract_title(bib)
year = extract_year(bib)
# Format the key
key = format_key(author, title, year)
return re.sub("{DBLP:.*", "{" + key + ",", bib)
except Exception:
# If DBLP fails, fallback to doi.org
try:
url = f"https://doi.org/{doi}"
headers = {"Accept": "application/x-bibtex"}
request = Request(url, headers=headers)
with urlopen(request) as response:
bib = response.read().decode()
if "not found" in bib:
return "DOI not found"
# Extract author, title, and year
author = extract_author_or_editor(bib)
title = extract_title(bib)
year = extract_year(bib)
# Format the key
key = format_key(author, title, year)
return re.sub("{\w+?,", "{" + key + ",", bib)
except URLError:
return "DOI not found"
except Exception:
return "An error occurred"
def extract_author_or_editor(bib):
"""Extract authors / editors"""
author = re.search(r"author\s*=\s*{(.*?)}", bib, re.DOTALL)
if author is None:
author = re.search(r"editor\s*=\s*{(.*?)}", bib, re.DOTALL)
return author.group(1) if author is not None else "Unknown"
def extract_title(bib):
"""Extract title"""
try:
# match any character or any pair of characters surrounded by braces
match = re.search(r"title\s*=\s*{((?:[^{}]|{[^{}]*})*)}", bib, re.DOTALL)
title = match.group(1)
# Remove any trailing commas and surrounding whitespace
title = title.rstrip(",").strip()
title = title.replace("{", "").replace("}", "")
return title
except AttributeError:
return "Unknown"
def extract_year(bib):
"""Extract year"""
try:
return re.search(r'year\s*=\s*([{"]?)(\d+)[}"]?', bib).group(2)
except AttributeError:
return "Unknown"
def format_key(author, title, year):
"""Format citekey"""
author_lastname = author.split(" and")[0].split()[-1].lower()
words = title.split()
clean_words = [re.sub(r"[^a-zA-Z]", "", word) for word in words]
first_three_words = clean_words[:3]
shorttitle = "".join(first_three_words)
return f"{author_lastname}{shorttitle}{year}"
if __name__ == "__main__":
"""Main which gets invoked by espanso"""
clipboard_content = sys.argv[1]
doi = extract_doi(clipboard_content)
if doi is None:
print("DOI not found in clipboard content.")
else:
print(fetch_and_clean_bib(doi))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment