Xeophon/bib_fixer.yml

## bib_fixer.yml
matches:
  - trigger: ":doi"
    replace: "{{output}}"
    vars:
      - name: "clipboard"
        type: "clipboard"
      - name: output
        type: script
        params:
          args:
            - python
            - "%CONFIG%/scripts/doi_to_bib.py"
            - "{{clipboard}}"
  - trigger: ":dblp"
    replace: "{{output}}"
    vars:
      - name: "clipboard"
        type: "clipboard"
      - name: output
        type: script
        params:
          args:
            - python
            - "%CONFIG%/scripts/dblp_search.py"
            - "{{clipboard}}"

## dblp_search.py
import urllib.request
import urllib.parse
import json
import re
import sys


def get_bib_info(bib):
    """Extracts author, title, and year from a given bib entry."""
    author = re.search(r"author\s*=\s*{(.*?)}", bib, re.DOTALL)
    if author is None:
        author = re.search(r"editor\s*=\s*{(.*?)}", bib, re.DOTALL)
    author = author.group(1) if author is not None else "Unknown"

    title = re.search(r"title\s*=\s*{(.*?)}", bib, re.DOTALL).group(1)
    year = re.search(r"year\s*=\s*{(.*?)}", bib).group(1)

    return author, title, year


def format_bib_key(author, title, year):
    """Formats the bib key using author's last name, first three words of the title, and the year."""
    author_lastname = author.split(" and")[0].split()[-1].lower()
    first_three_words = "".join(
        re.sub(r"[^a-zA-Z]", "", word) for word in title.split()[:3]
    )
    key = f"{author_lastname}{first_three_words}{year}"

    return key


def search_and_get(query):
    """Searches for a publication and retrieves its bib entry."""
    options = {"q": query, "format": "json", "h": 1}
    url = f"https://dblp.org/search/publ/api?{urllib.parse.urlencode(options)}"
    with urllib.request.urlopen(url) as response:
        data = json.loads(response.read().decode())
        hit = data.get("result", {}).get("hits", {}).get("hit")

        if hit is not None:
            info = hit[0].get("info")
            bib_url = f'{info.get("url")}.bib'
            with urllib.request.urlopen(bib_url) as bib_response:
                bib = bib_response.read().decode()
                if "not found" not in bib:
                    author, title, year = get_bib_info(bib)
                    key = format_bib_key(author, title, year)
                    output = re.sub(r"\{DBLP:.*?\,", "{" + key + ",", bib)
                else:
                    output = "Not found in DBLP"
        else:
            output = "Not found in DBLP"
        print(output)


try:
    search_and_get(sys.argv[1])
except Exception as e:
    output = str(e)

## doi_to_bib.py
import re
from urllib.request import urlopen, Request
from urllib.error import URLError
import sys


def extract_doi(clipboard_content):
    """Gets DOI from clipboard"""
    regex = r"^(https?://doi\.org/|doi\.org/)?10\.\d{4,9}/[-._;()/:A-Za-z0-9]+$"
    match = re.search(regex, clipboard_content, re.IGNORECASE)
    if match is None:
        return None
    doi = match.group()
    return re.search(r"10\.\d{4,9}/[-._;()/:A-Za-z0-9]+", doi).group()


def fetch_and_clean_bib(doi):
    """Get .bib from dblp or doi.org (fallback)"""
    try:
        # Try DBLP first
        with urlopen(f"https://dblp.org/doi/{doi}.bib") as response:
            bib = response.read().decode()
            if "not found" in bib:
                raise Exception("DOI not found in DBLP")

            # Extract author, title, and year
            author = extract_author_or_editor(bib)
            title = extract_title(bib)
            year = extract_year(bib)

            # Format the key
            key = format_key(author, title, year)

            return re.sub("{DBLP:.*", "{" + key + ",", bib)
    except Exception:
        # If DBLP fails, fallback to doi.org
        try:
            url = f"https://doi.org/{doi}"
            headers = {"Accept": "application/x-bibtex"}
            request = Request(url, headers=headers)
            with urlopen(request) as response:
                bib = response.read().decode()
                if "not found" in bib:
                    return "DOI not found"

                # Extract author, title, and year
                author = extract_author_or_editor(bib)
                title = extract_title(bib)
                year = extract_year(bib)

                # Format the key
                key = format_key(author, title, year)

                return re.sub("{\w+?,", "{" + key + ",", bib)
        except URLError:
            return "DOI not found"
        except Exception:
            return "An error occurred"


def extract_author_or_editor(bib):
    """Extract authors / editors"""
    author = re.search(r"author\s*=\s*{(.*?)}", bib, re.DOTALL)
    if author is None:
        author = re.search(r"editor\s*=\s*{(.*?)}", bib, re.DOTALL)
    return author.group(1) if author is not None else "Unknown"


def extract_title(bib):
    """Extract title"""
    try:
        # match any character or any pair of characters surrounded by braces
        match = re.search(r"title\s*=\s*{((?:[^{}]|{[^{}]*})*)}", bib, re.DOTALL)
        title = match.group(1)
        # Remove any trailing commas and surrounding whitespace
        title = title.rstrip(",").strip()
        title = title.replace("{", "").replace("}", "")
        return title
    except AttributeError:
        return "Unknown"


def extract_year(bib):
    """Extract year"""
    try:
        return re.search(r'year\s*=\s*([{"]?)(\d+)[}"]?', bib).group(2)
    except AttributeError:
        return "Unknown"


def format_key(author, title, year):
    """Format citekey"""
    author_lastname = author.split(" and")[0].split()[-1].lower()
    words = title.split()
    clean_words = [re.sub(r"[^a-zA-Z]", "", word) for word in words]
    first_three_words = clean_words[:3]
    shorttitle = "".join(first_three_words)
    return f"{author_lastname}{shorttitle}{year}"


if __name__ == "__main__":
    """Main which gets invoked by espanso"""
    clipboard_content = sys.argv[1]
    doi = extract_doi(clipboard_content)
    if doi is None:
        print("DOI not found in clipboard content.")
    else:
        print(fetch_and_clean_bib(doi))
	matches:
	- trigger: ":doi"
	replace: "{{output}}"
	vars:
	- name: "clipboard"
	type: "clipboard"
	- name: output
	type: script
	params:
	args:
	- python
	- "%CONFIG%/scripts/doi_to_bib.py"
	- "{{clipboard}}"
	- trigger: ":dblp"
	replace: "{{output}}"
	vars:
	- name: "clipboard"
	type: "clipboard"
	- name: output
	type: script
	params:
	args:
	- python
	- "%CONFIG%/scripts/dblp_search.py"
	- "{{clipboard}}"
	import urllib.request
	import urllib.parse
	import json
	import re
	import sys


	def get_bib_info(bib):
	"""Extracts author, title, and year from a given bib entry."""
	author = re.search(r"author\s=\s{(.*?)}", bib, re.DOTALL)
	if author is None:
	author = re.search(r"editor\s=\s{(.*?)}", bib, re.DOTALL)
	author = author.group(1) if author is not None else "Unknown"

	title = re.search(r"title\s=\s{(.*?)}", bib, re.DOTALL).group(1)
	year = re.search(r"year\s=\s{(.*?)}", bib).group(1)

	return author, title, year


	def format_bib_key(author, title, year):
	"""Formats the bib key using author's last name, first three words of the title, and the year."""
	author_lastname = author.split(" and")[0].split()[-1].lower()
	first_three_words = "".join(
	re.sub(r"[^a-zA-Z]", "", word) for word in title.split()[:3]
	)
	key = f"{author_lastname}{first_three_words}{year}"

	return key


	def search_and_get(query):
	"""Searches for a publication and retrieves its bib entry."""
	options = {"q": query, "format": "json", "h": 1}
	url = f"https://dblp.org/search/publ/api?{urllib.parse.urlencode(options)}"
	with urllib.request.urlopen(url) as response:
	data = json.loads(response.read().decode())
	hit = data.get("result", {}).get("hits", {}).get("hit")

	if hit is not None:
	info = hit[0].get("info")
	bib_url = f'{info.get("url")}.bib'
	with urllib.request.urlopen(bib_url) as bib_response:
	bib = bib_response.read().decode()
	if "not found" not in bib:
	author, title, year = get_bib_info(bib)
	key = format_bib_key(author, title, year)
	output = re.sub(r"\{DBLP:.*?\,", "{" + key + ",", bib)
	else:
	output = "Not found in DBLP"
	else:
	output = "Not found in DBLP"
	print(output)


	try:
	search_and_get(sys.argv[1])
	except Exception as e:
	output = str(e)
	import re
	from urllib.request import urlopen, Request
	from urllib.error import URLError
	import sys


	def extract_doi(clipboard_content):
	"""Gets DOI from clipboard"""
	regex = r"^(https?://doi\.org/\|doi\.org/)?10\.\d{4,9}/[-._;()/:A-Za-z0-9]+$"
	match = re.search(regex, clipboard_content, re.IGNORECASE)
	if match is None:
	return None
	doi = match.group()
	return re.search(r"10\.\d{4,9}/[-._;()/:A-Za-z0-9]+", doi).group()


	def fetch_and_clean_bib(doi):
	"""Get .bib from dblp or doi.org (fallback)"""
	try:
	# Try DBLP first
	with urlopen(f"https://dblp.org/doi/{doi}.bib") as response:
	bib = response.read().decode()
	if "not found" in bib:
	raise Exception("DOI not found in DBLP")

	# Extract author, title, and year
	author = extract_author_or_editor(bib)
	title = extract_title(bib)
	year = extract_year(bib)

	# Format the key
	key = format_key(author, title, year)

	return re.sub("{DBLP:.*", "{" + key + ",", bib)
	except Exception:
	# If DBLP fails, fallback to doi.org
	try:
	url = f"https://doi.org/{doi}"
	headers = {"Accept": "application/x-bibtex"}
	request = Request(url, headers=headers)
	with urlopen(request) as response:
	bib = response.read().decode()
	if "not found" in bib:
	return "DOI not found"

	# Extract author, title, and year
	author = extract_author_or_editor(bib)
	title = extract_title(bib)
	year = extract_year(bib)

	# Format the key
	key = format_key(author, title, year)

	return re.sub("{\w+?,", "{" + key + ",", bib)
	except URLError:
	return "DOI not found"
	except Exception:
	return "An error occurred"


	def extract_author_or_editor(bib):
	"""Extract authors / editors"""
	author = re.search(r"author\s=\s{(.*?)}", bib, re.DOTALL)
	if author is None:
	author = re.search(r"editor\s=\s{(.*?)}", bib, re.DOTALL)
	return author.group(1) if author is not None else "Unknown"


	def extract_title(bib):
	"""Extract title"""
	try:
	# match any character or any pair of characters surrounded by braces
	match = re.search(r"title\s=\s{((?:[^{}]\|{[^{}]}))}", bib, re.DOTALL)
	title = match.group(1)
	# Remove any trailing commas and surrounding whitespace
	title = title.rstrip(",").strip()
	title = title.replace("{", "").replace("}", "")
	return title
	except AttributeError:
	return "Unknown"


	def extract_year(bib):
	"""Extract year"""
	try:
	return re.search(r'year\s=\s([{"]?)(\d+)[}"]?', bib).group(2)
	except AttributeError:
	return "Unknown"


	def format_key(author, title, year):
	"""Format citekey"""
	author_lastname = author.split(" and")[0].split()[-1].lower()
	words = title.split()
	clean_words = [re.sub(r"[^a-zA-Z]", "", word) for word in words]
	first_three_words = clean_words[:3]
	shorttitle = "".join(first_three_words)
	return f"{author_lastname}{shorttitle}{year}"


	if __name__ == "__main__":
	"""Main which gets invoked by espanso"""
	clipboard_content = sys.argv[1]
	doi = extract_doi(clipboard_content)
	if doi is None:
	print("DOI not found in clipboard content.")
	else:
	print(fetch_and_clean_bib(doi))