Skip to content

Instantly share code, notes, and snippets.

@Adoria298
Last active September 19, 2021 10:55
Show Gist options
  • Save Adoria298/6ec680732576e0fb987343e6f7b2413a to your computer and use it in GitHub Desktop.
Save Adoria298/6ec680732576e0fb987343e6f7b2413a to your computer and use it in GitHub Desktop.
comment2bibtex - turns a URL in a Python comment into a BibTeX reference.
import requests # pip install requests
import bs4 # pip install beautifulsoup4
from pathlib import Path # part of the python 3.6+ standard library
in_file = Path(__file__).parent / input("Relative path to file: ")
comments = []
def is_url(word):
if ("http" in word or "www" in word): # most URLS have at least one of the two
return True
with open(in_file) as fp:
for line in fp.readlines():
if "#" in line:
if is_url(line[line.index("#"):]):
print(line)
comments.append(line[line.index("#"):])
urls = []
for comment in comments:
if comment[1] == "h" or comment[1] == "w":
if " " not in comment or "\t" not in comment:
urls.append(comment[1:].strip())
else:
comment = comment.split()
urls.append(comment[0][1:].strip())
for word in comment[1:]:
if is_url(word):
urls.append(word)
else:
for word in comment.split():
if is_url(word):
urls.append(word)
def yn_prompt(prompt):
if "[y/n]" not in prompt:
prompt += "[y/n] "
if input(prompt).lower() == "y":
return True
else:
return False
gen_names = yn_prompt("Would you like to auto-generate names [y/n]? ")
use_web = yn_prompt("Would you like to the Internet to get the page's title [y/n]? ")
add_dates = yn_prompt("Would you like to add urldate [y/n]? ")
bibtices = {} # named after index indices - NB: repeated URLs will be overwritten
for url in urls:
print("Referencing", url)
bibtex = "@online{"
if use_web:
res = requests.get(url)
soup = bs4.BeautifulSoup(res.text, features="html.parser") # parser built into python
title = soup.title.string
if gen_names:
name = "".join((s[0] for s in url.split(".") if s)) # `if s` to remove empty strings
name += "".join((s[0] for s in url.split("/") if s)) # ^ like the one between / and / in https://
name += "".join((s[0] for s in url.split("-") if s))
else:
name = input("Name the url " + url + ": ")
if not use_web:
name = title
bibtex += name + ",\n"
bibtex += '\ttitle = {' + title + '},\n'
bibtex += "\turl = {" + url + "},\n"
if add_dates:
date = input("When was this accessed [YYYY-MM-DD]? ")
bibtex += "\turldate = {" + date + "}\n"
bibtex += "}\n\n"
bibtices[url] = bibtex
out_file = Path(__file__).parent / input("Which file should I append these to? ")
with open(out_file, "a") as fp:
fp.write("\n% Below references generated by c2b:\n")
for url in urls:
fp.write(bibtices[url])
print("comments:", comments)
print("urls:", urls)
print("bibtices:", bibtices)
with open(out_file) as fp:
print(fp.read())
input("Press enter to exit. ")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment