Adoria298/comment2bibtex.py

## comment2bibtex.py
import requests # pip install requests
import bs4 # pip install beautifulsoup4
from pathlib import Path # part of the python 3.6+ standard library

in_file = Path(__file__).parent / input("Relative path to file: ")
comments = []
def is_url(word):
    if ("http" in word or "www" in word): # most URLS have at least one of the two
        return True
with open(in_file) as fp:
    for line in fp.readlines():
        if "#" in line:
            if is_url(line[line.index("#"):]):
                print(line)
                comments.append(line[line.index("#"):])

urls = []
for comment in comments:
    if comment[1] == "h" or comment[1] == "w":
        if " " not in comment or "\t" not in comment:
            urls.append(comment[1:].strip())
        else:
            comment = comment.split()
            urls.append(comment[0][1:].strip())
            for word in comment[1:]:
                if is_url(word):
                    urls.append(word)
    else:
        for word in comment.split():
            if is_url(word):
                urls.append(word)

def yn_prompt(prompt):
    if "[y/n]" not in prompt:
        prompt += "[y/n] "
    if input(prompt).lower() == "y":
        return True
    else:
        return False

gen_names = yn_prompt("Would you like to auto-generate names [y/n]? ")
use_web = yn_prompt("Would you like to the Internet to get the page's title [y/n]? ")
add_dates = yn_prompt("Would you like to add urldate [y/n]? ")
bibtices = {} # named after index indices - NB: repeated URLs will be overwritten
for url in urls:
    print("Referencing", url)
    bibtex = "@online{"
    if use_web:
        res = requests.get(url)
        soup = bs4.BeautifulSoup(res.text, features="html.parser") # parser built into python
        title = soup.title.string
    if gen_names:
        name = "".join((s[0] for s in url.split(".") if s)) # `if s` to remove empty strings
        name += "".join((s[0] for s in url.split("/") if s)) # ^ like the one between / and / in https://
        name += "".join((s[0] for s in url.split("-") if s))
    else:
        name = input("Name the url " + url + ": ")
        if not use_web:
            name = title
    bibtex += name + ",\n"
    bibtex += '\ttitle = {' + title + '},\n'
    bibtex += "\turl = {" + url + "},\n"
    if add_dates:
        date = input("When was this accessed [YYYY-MM-DD]? ")
        bibtex += "\turldate = {" + date + "}\n"
    bibtex += "}\n\n"
    bibtices[url] = bibtex

out_file = Path(__file__).parent / input("Which file should I append these to? ")
with open(out_file, "a") as fp:
    fp.write("\n% Below references generated by c2b:\n")
    for url in urls:
        fp.write(bibtices[url])

print("comments:", comments)
print("urls:", urls)
print("bibtices:", bibtices)
with open(out_file) as fp:
    print(fp.read())
input("Press enter to exit. ")
	import requests # pip install requests
	import bs4 # pip install beautifulsoup4
	from pathlib import Path # part of the python 3.6+ standard library

	in_file = Path(__file__).parent / input("Relative path to file: ")
	comments = []
	def is_url(word):
	if ("http" in word or "www" in word): # most URLS have at least one of the two
	return True
	with open(in_file) as fp:
	for line in fp.readlines():
	if "#" in line:
	if is_url(line[line.index("#"):]):
	print(line)
	comments.append(line[line.index("#"):])

	urls = []
	for comment in comments:
	if comment[1] == "h" or comment[1] == "w":
	if " " not in comment or "\t" not in comment:
	urls.append(comment[1:].strip())
	else:
	comment = comment.split()
	urls.append(comment[0][1:].strip())
	for word in comment[1:]:
	if is_url(word):
	urls.append(word)
	else:
	for word in comment.split():
	if is_url(word):
	urls.append(word)

	def yn_prompt(prompt):
	if "[y/n]" not in prompt:
	prompt += "[y/n] "
	if input(prompt).lower() == "y":
	return True
	else:
	return False

	gen_names = yn_prompt("Would you like to auto-generate names [y/n]? ")
	use_web = yn_prompt("Would you like to the Internet to get the page's title [y/n]? ")
	add_dates = yn_prompt("Would you like to add urldate [y/n]? ")
	bibtices = {} # named after index indices - NB: repeated URLs will be overwritten
	for url in urls:
	print("Referencing", url)
	bibtex = "@online{"
	if use_web:
	res = requests.get(url)
	soup = bs4.BeautifulSoup(res.text, features="html.parser") # parser built into python
	title = soup.title.string
	if gen_names:
	name = "".join((s[0] for s in url.split(".") if s)) # `if s` to remove empty strings
	name += "".join((s[0] for s in url.split("/") if s)) # ^ like the one between / and / in https://
	name += "".join((s[0] for s in url.split("-") if s))
	else:
	name = input("Name the url " + url + ": ")
	if not use_web:
	name = title
	bibtex += name + ",\n"
	bibtex += '\ttitle = {' + title + '},\n'
	bibtex += "\turl = {" + url + "},\n"
	if add_dates:
	date = input("When was this accessed [YYYY-MM-DD]? ")
	bibtex += "\turldate = {" + date + "}\n"
	bibtex += "}\n\n"
	bibtices[url] = bibtex

	out_file = Path(__file__).parent / input("Which file should I append these to? ")
	with open(out_file, "a") as fp:
	fp.write("\n% Below references generated by c2b:\n")
	for url in urls:
	fp.write(bibtices[url])

	print("comments:", comments)
	print("urls:", urls)
	print("bibtices:", bibtices)
	with open(out_file) as fp:
	print(fp.read())
	input("Press enter to exit. ")