Last active
September 19, 2021 10:55
-
-
Save Adoria298/6ec680732576e0fb987343e6f7b2413a to your computer and use it in GitHub Desktop.
comment2bibtex - turns a URL in a Python comment into a BibTeX reference.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests # pip install requests | |
import bs4 # pip install beautifulsoup4 | |
from pathlib import Path # part of the python 3.6+ standard library | |
in_file = Path(__file__).parent / input("Relative path to file: ") | |
comments = [] | |
def is_url(word): | |
if ("http" in word or "www" in word): # most URLS have at least one of the two | |
return True | |
with open(in_file) as fp: | |
for line in fp.readlines(): | |
if "#" in line: | |
if is_url(line[line.index("#"):]): | |
print(line) | |
comments.append(line[line.index("#"):]) | |
urls = [] | |
for comment in comments: | |
if comment[1] == "h" or comment[1] == "w": | |
if " " not in comment or "\t" not in comment: | |
urls.append(comment[1:].strip()) | |
else: | |
comment = comment.split() | |
urls.append(comment[0][1:].strip()) | |
for word in comment[1:]: | |
if is_url(word): | |
urls.append(word) | |
else: | |
for word in comment.split(): | |
if is_url(word): | |
urls.append(word) | |
def yn_prompt(prompt): | |
if "[y/n]" not in prompt: | |
prompt += "[y/n] " | |
if input(prompt).lower() == "y": | |
return True | |
else: | |
return False | |
gen_names = yn_prompt("Would you like to auto-generate names [y/n]? ") | |
use_web = yn_prompt("Would you like to the Internet to get the page's title [y/n]? ") | |
add_dates = yn_prompt("Would you like to add urldate [y/n]? ") | |
bibtices = {} # named after index indices - NB: repeated URLs will be overwritten | |
for url in urls: | |
print("Referencing", url) | |
bibtex = "@online{" | |
if use_web: | |
res = requests.get(url) | |
soup = bs4.BeautifulSoup(res.text, features="html.parser") # parser built into python | |
title = soup.title.string | |
if gen_names: | |
name = "".join((s[0] for s in url.split(".") if s)) # `if s` to remove empty strings | |
name += "".join((s[0] for s in url.split("/") if s)) # ^ like the one between / and / in https:// | |
name += "".join((s[0] for s in url.split("-") if s)) | |
else: | |
name = input("Name the url " + url + ": ") | |
if not use_web: | |
name = title | |
bibtex += name + ",\n" | |
bibtex += '\ttitle = {' + title + '},\n' | |
bibtex += "\turl = {" + url + "},\n" | |
if add_dates: | |
date = input("When was this accessed [YYYY-MM-DD]? ") | |
bibtex += "\turldate = {" + date + "}\n" | |
bibtex += "}\n\n" | |
bibtices[url] = bibtex | |
out_file = Path(__file__).parent / input("Which file should I append these to? ") | |
with open(out_file, "a") as fp: | |
fp.write("\n% Below references generated by c2b:\n") | |
for url in urls: | |
fp.write(bibtices[url]) | |
print("comments:", comments) | |
print("urls:", urls) | |
print("bibtices:", bibtices) | |
with open(out_file) as fp: | |
print(fp.read()) | |
input("Press enter to exit. ") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment