pydsigner/rapgod.py

## rapgod.py
#!/usr/bin/python2
'''
Impress your friends and become the next
2pac using rapgod.py, a state-of-the-art
rhymez and raps assistance tool.

dsc@dev4 $ rapgod en python
brighten  brighton  frighten   icahn
icon      ikon      lightning  lightson
ptyon     pythons   tighten    titan
triton    whiten
'''

import argparse
import re
import string


logo = r"""
                   ___          _
 _ __ __ _ _ __   / _ \___   __| |
| '__/ _` | '_ \ / /_\/ _ \ / _` |
| | | (_| | |_) / /_\\ (_) | (_| |
|_|  \__,_| .__/\____/\___/ \__,_|
          |_|
""".strip('\n')


def fetch(source, word):
    import requests

    headers = {
        "User-Agent": "Mozilla/5.0 AppleWebKit/539.40 (KHTML, like Gecko) Chrome/62.0.4.80"
    }
    r = requests.get(source % word, headers=headers)
    r.raise_for_status()
    return r.content


def nl_parser(content):
    from bs4 import BeautifulSoup

    soup = BeautifulSoup(content, "html.parser")
    for h2 in soup.findAll("h2", attrs={"class": "bigblue"}):
        h2_title = h2.text.lower()
        if h2_title.startswith(("rijmwoordenboek", "uitdrukkingen")):
            continue
        if "woorden" not in h2_title:
            continue

        table = h2.next_sibling
        if table.name != "table":
            continue

        words = str(table)
        delimiter = ""

        for your_anti_crawling_sucks in ["br", "/br", "br/"]:
            if "<%s>" % your_anti_crawling_sucks in words:
                delimiter = "<%s>" % your_anti_crawling_sucks
                break

        words = filter(None, [re.sub(r'<[^>]*?>', '', z).decode("utf8") for z in words.split(delimiter)])
        return words[::-1]


def en_parser(content):
    from bs4 import BeautifulSoup

    soup = BeautifulSoup(content, "html.parser")
    words = []
    for a in soup.findAll("a", attrs={"href": re.compile('^d=.*$')}):
        if a.text.startswith("-"):
            continue
        words.append(a.text)

    return words


def val_words(words):
    def val_word(word):
        if len(word) <= 3:
            return
        return word
    rtn = []
    for word in words:
        word = filter(lambda x: x in string.printable, word)
        if not word:
            continue
        if " " in word:
            spl = word.split(" ", 1)
            if not val_word(spl[0]) or not val_word(spl[1]):
                continue
        if not val_word(word):
            continue
        rtn.append(word)
    return rtn


def output(words):
    from tabulate import tabulate

    words = val_words(words)
    if not words:
        print("no results")
        return

    data = (words[i : i+4] for i in range(0, len(words), 4))
    table = tabulate(data, tablefmt="plain", stralign="left")
    print(table)


word_sources = {
    "en": "https://www.rhymezone.com/r/rhyme.cgi?typeofrhyme=perfect&loc=thesql&Word=%s",
    "nl": "http://www.mijnwoordenboek.nl/rijmwoordenboek/%s"
}

parsers = {
    "en": en_parser,
    "nl": nl_parser
}


def main():
    # Just in case we have some parsers without corresponding sources or vice versa
    langs = set(word_sources) & set(parsers)

    parser = argparse.ArgumentParser(description=logo, formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('lang', choices=langs, help='language for which rhymes should be obtained')
    parser.add_argument('word', help='word which rhymes will be obtained for')

    args = parser.parse_args()

    try:
        raw = fetch(word_sources[args.lang], args.word)
        words = parsers[args.lang](raw)
        output(words)
    except ImportError:
        print("Missing rapgod.py requirements. Try: pip install requests beautifulsoup4 tabulate")
    except Exception as e:
        print("Oops! rapgod.py failed because: %r" % e)


if __name__ == '__main__':
    main()
	#!/usr/bin/python2
	'''
	Impress your friends and become the next
	2pac using rapgod.py, a state-of-the-art
	rhymez and raps assistance tool.

	dsc@dev4 $ rapgod en python
	brighten brighton frighten icahn
	icon ikon lightning lightson
	ptyon pythons tighten titan
	triton whiten
	'''

	import argparse
	import re
	import string


	logo = r"""
	___ _
	_ __ __ _ _ __ / _ \___ __\| \|
	\| '__/ _` \| '_ \ / /_\/ _ \ / _` \|
	\| \| \| (_\| \| \|_) / /_\\ (_) \| (_\| \|
	\|_\| \__,_\| .__/\____/\___/ \__,_\|
	\|_\|
	""".strip('\n')


	def fetch(source, word):
	import requests

	headers = {
	"User-Agent": "Mozilla/5.0 AppleWebKit/539.40 (KHTML, like Gecko) Chrome/62.0.4.80"
	}
	r = requests.get(source % word, headers=headers)
	r.raise_for_status()
	return r.content


	def nl_parser(content):
	from bs4 import BeautifulSoup

	soup = BeautifulSoup(content, "html.parser")
	for h2 in soup.findAll("h2", attrs={"class": "bigblue"}):
	h2_title = h2.text.lower()
	if h2_title.startswith(("rijmwoordenboek", "uitdrukkingen")):
	continue
	if "woorden" not in h2_title:
	continue

	table = h2.next_sibling
	if table.name != "table":
	continue

	words = str(table)
	delimiter = ""

	for your_anti_crawling_sucks in ["br", "/br", "br/"]:
	if "<%s>" % your_anti_crawling_sucks in words:
	delimiter = "<%s>" % your_anti_crawling_sucks
	break

	words = filter(None, [re.sub(r'<[^>]*?>', '', z).decode("utf8") for z in words.split(delimiter)])
	return words[::-1]


	def en_parser(content):
	from bs4 import BeautifulSoup

	soup = BeautifulSoup(content, "html.parser")
	words = []
	for a in soup.findAll("a", attrs={"href": re.compile('^d=.*$')}):
	if a.text.startswith("-"):
	continue
	words.append(a.text)

	return words


	def val_words(words):
	def val_word(word):
	if len(word) <= 3:
	return
	return word
	rtn = []
	for word in words:
	word = filter(lambda x: x in string.printable, word)
	if not word:
	continue
	if " " in word:
	spl = word.split(" ", 1)
	if not val_word(spl[0]) or not val_word(spl[1]):
	continue
	if not val_word(word):
	continue
	rtn.append(word)
	return rtn


	def output(words):
	from tabulate import tabulate

	words = val_words(words)
	if not words:
	print("no results")
	return

	data = (words[i : i+4] for i in range(0, len(words), 4))
	table = tabulate(data, tablefmt="plain", stralign="left")
	print(table)


	word_sources = {
	"en": "https://www.rhymezone.com/r/rhyme.cgi?typeofrhyme=perfect&loc=thesql&Word=%s",
	"nl": "http://www.mijnwoordenboek.nl/rijmwoordenboek/%s"
	}

	parsers = {
	"en": en_parser,
	"nl": nl_parser
	}


	def main():
	# Just in case we have some parsers without corresponding sources or vice versa
	langs = set(word_sources) & set(parsers)

	parser = argparse.ArgumentParser(description=logo, formatter_class=argparse.RawDescriptionHelpFormatter)
	parser.add_argument('lang', choices=langs, help='language for which rhymes should be obtained')
	parser.add_argument('word', help='word which rhymes will be obtained for')

	args = parser.parse_args()

	try:
	raw = fetch(word_sources[args.lang], args.word)
	words = parsers[args.lang](raw)
	output(words)
	except ImportError:
	print("Missing rapgod.py requirements. Try: pip install requests beautifulsoup4 tabulate")
	except Exception as e:
	print("Oops! rapgod.py failed because: %r" % e)


	if __name__ == '__main__':
	main()