Last active
August 25, 2017 15:37
-
-
Save pydsigner/d954d29ed8a39cd2f013a333d0ff2758 to your computer and use it in GitHub Desktop.
hip-hop creation assistance tool
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python2 | |
''' | |
Impress your friends and become the next | |
2pac using rapgod.py, a state-of-the-art | |
rhymez and raps assistance tool. | |
dsc@dev4 $ rapgod en python | |
brighten brighton frighten icahn | |
icon ikon lightning lightson | |
ptyon pythons tighten titan | |
triton whiten | |
''' | |
import argparse | |
import re | |
import string | |
logo = r""" | |
___ _ | |
_ __ __ _ _ __ / _ \___ __| | | |
| '__/ _` | '_ \ / /_\/ _ \ / _` | | |
| | | (_| | |_) / /_\\ (_) | (_| | | |
|_| \__,_| .__/\____/\___/ \__,_| | |
|_| | |
""".strip('\n') | |
def fetch(source, word): | |
import requests | |
headers = { | |
"User-Agent": "Mozilla/5.0 AppleWebKit/539.40 (KHTML, like Gecko) Chrome/62.0.4.80" | |
} | |
r = requests.get(source % word, headers=headers) | |
r.raise_for_status() | |
return r.content | |
def nl_parser(content): | |
from bs4 import BeautifulSoup | |
soup = BeautifulSoup(content, "html.parser") | |
for h2 in soup.findAll("h2", attrs={"class": "bigblue"}): | |
h2_title = h2.text.lower() | |
if h2_title.startswith(("rijmwoordenboek", "uitdrukkingen")): | |
continue | |
if "woorden" not in h2_title: | |
continue | |
table = h2.next_sibling | |
if table.name != "table": | |
continue | |
words = str(table) | |
delimiter = "" | |
for your_anti_crawling_sucks in ["br", "/br", "br/"]: | |
if "<%s>" % your_anti_crawling_sucks in words: | |
delimiter = "<%s>" % your_anti_crawling_sucks | |
break | |
words = filter(None, [re.sub(r'<[^>]*?>', '', z).decode("utf8") for z in words.split(delimiter)]) | |
return words[::-1] | |
def en_parser(content): | |
from bs4 import BeautifulSoup | |
soup = BeautifulSoup(content, "html.parser") | |
words = [] | |
for a in soup.findAll("a", attrs={"href": re.compile('^d=.*$')}): | |
if a.text.startswith("-"): | |
continue | |
words.append(a.text) | |
return words | |
def val_words(words): | |
def val_word(word): | |
if len(word) <= 3: | |
return | |
return word | |
rtn = [] | |
for word in words: | |
word = filter(lambda x: x in string.printable, word) | |
if not word: | |
continue | |
if " " in word: | |
spl = word.split(" ", 1) | |
if not val_word(spl[0]) or not val_word(spl[1]): | |
continue | |
if not val_word(word): | |
continue | |
rtn.append(word) | |
return rtn | |
def output(words): | |
from tabulate import tabulate | |
words = val_words(words) | |
if not words: | |
print("no results") | |
return | |
data = (words[i : i+4] for i in range(0, len(words), 4)) | |
table = tabulate(data, tablefmt="plain", stralign="left") | |
print(table) | |
word_sources = { | |
"en": "https://www.rhymezone.com/r/rhyme.cgi?typeofrhyme=perfect&loc=thesql&Word=%s", | |
"nl": "http://www.mijnwoordenboek.nl/rijmwoordenboek/%s" | |
} | |
parsers = { | |
"en": en_parser, | |
"nl": nl_parser | |
} | |
def main(): | |
# Just in case we have some parsers without corresponding sources or vice versa | |
langs = set(word_sources) & set(parsers) | |
parser = argparse.ArgumentParser(description=logo, formatter_class=argparse.RawDescriptionHelpFormatter) | |
parser.add_argument('lang', choices=langs, help='language for which rhymes should be obtained') | |
parser.add_argument('word', help='word which rhymes will be obtained for') | |
args = parser.parse_args() | |
try: | |
raw = fetch(word_sources[args.lang], args.word) | |
words = parsers[args.lang](raw) | |
output(words) | |
except ImportError: | |
print("Missing rapgod.py requirements. Try: pip install requests beautifulsoup4 tabulate") | |
except Exception as e: | |
print("Oops! rapgod.py failed because: %r" % e) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment