Skip to content

Instantly share code, notes, and snippets.

@pydsigner
Last active August 25, 2017 15:37
Show Gist options
  • Save pydsigner/d954d29ed8a39cd2f013a333d0ff2758 to your computer and use it in GitHub Desktop.
Save pydsigner/d954d29ed8a39cd2f013a333d0ff2758 to your computer and use it in GitHub Desktop.
hip-hop creation assistance tool
#!/usr/bin/python2
'''
Impress your friends and become the next
2pac using rapgod.py, a state-of-the-art
rhymez and raps assistance tool.
dsc@dev4 $ rapgod en python
brighten brighton frighten icahn
icon ikon lightning lightson
ptyon pythons tighten titan
triton whiten
'''
import argparse
import re
import string
logo = r"""
___ _
_ __ __ _ _ __ / _ \___ __| |
| '__/ _` | '_ \ / /_\/ _ \ / _` |
| | | (_| | |_) / /_\\ (_) | (_| |
|_| \__,_| .__/\____/\___/ \__,_|
|_|
""".strip('\n')
def fetch(source, word):
import requests
headers = {
"User-Agent": "Mozilla/5.0 AppleWebKit/539.40 (KHTML, like Gecko) Chrome/62.0.4.80"
}
r = requests.get(source % word, headers=headers)
r.raise_for_status()
return r.content
def nl_parser(content):
from bs4 import BeautifulSoup
soup = BeautifulSoup(content, "html.parser")
for h2 in soup.findAll("h2", attrs={"class": "bigblue"}):
h2_title = h2.text.lower()
if h2_title.startswith(("rijmwoordenboek", "uitdrukkingen")):
continue
if "woorden" not in h2_title:
continue
table = h2.next_sibling
if table.name != "table":
continue
words = str(table)
delimiter = ""
for your_anti_crawling_sucks in ["br", "/br", "br/"]:
if "<%s>" % your_anti_crawling_sucks in words:
delimiter = "<%s>" % your_anti_crawling_sucks
break
words = filter(None, [re.sub(r'<[^>]*?>', '', z).decode("utf8") for z in words.split(delimiter)])
return words[::-1]
def en_parser(content):
from bs4 import BeautifulSoup
soup = BeautifulSoup(content, "html.parser")
words = []
for a in soup.findAll("a", attrs={"href": re.compile('^d=.*$')}):
if a.text.startswith("-"):
continue
words.append(a.text)
return words
def val_words(words):
def val_word(word):
if len(word) <= 3:
return
return word
rtn = []
for word in words:
word = filter(lambda x: x in string.printable, word)
if not word:
continue
if " " in word:
spl = word.split(" ", 1)
if not val_word(spl[0]) or not val_word(spl[1]):
continue
if not val_word(word):
continue
rtn.append(word)
return rtn
def output(words):
from tabulate import tabulate
words = val_words(words)
if not words:
print("no results")
return
data = (words[i : i+4] for i in range(0, len(words), 4))
table = tabulate(data, tablefmt="plain", stralign="left")
print(table)
word_sources = {
"en": "https://www.rhymezone.com/r/rhyme.cgi?typeofrhyme=perfect&loc=thesql&Word=%s",
"nl": "http://www.mijnwoordenboek.nl/rijmwoordenboek/%s"
}
parsers = {
"en": en_parser,
"nl": nl_parser
}
def main():
# Just in case we have some parsers without corresponding sources or vice versa
langs = set(word_sources) & set(parsers)
parser = argparse.ArgumentParser(description=logo, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('lang', choices=langs, help='language for which rhymes should be obtained')
parser.add_argument('word', help='word which rhymes will be obtained for')
args = parser.parse_args()
try:
raw = fetch(word_sources[args.lang], args.word)
words = parsers[args.lang](raw)
output(words)
except ImportError:
print("Missing rapgod.py requirements. Try: pip install requests beautifulsoup4 tabulate")
except Exception as e:
print("Oops! rapgod.py failed because: %r" % e)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment