Created
January 2, 2018 20:50
-
-
Save neuberoliveira/7f4f4b94850bbfaa1566c999d3baa580 to your computer and use it in GitHub Desktop.
Generate random name using the behindthename.com/random
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import urllib | |
import re | |
from HTMLParser import HTMLParser | |
url = 'https://www.behindthename.com/random/random.php?number=2&gender=both&surname=&norare=yes&nodiminutives=yes&all=no&usage_ara=1&usage_arm=1&usage_bre=1&usage_eng=1&usage_fre=1&usage_fri=1&usage_ger=1&usage_ita=1&usage_por=1&usage_sco=1&usage_grem=1&usage_romm=1&usage_grea=1&usage_roma=1' | |
class MyHTMLParser(HTMLParser): | |
found_container = False | |
is_name_tag = False | |
name_found = False | |
names = [] | |
def handle_starttag(self, tag, attrs): | |
if self._check_container(tag, attrs): | |
self.found_container = True | |
if self.found_container: | |
self.is_name_tag = self._check_name_tag(tag, attrs) | |
def handle_endtag(self, tag): | |
if tag=='span' and self.found_container: | |
self.found_container = False | |
def handle_data(self, data): | |
if self.found_container and self.is_name_tag: | |
if re.match('[a-zA-Z]{2,}', data): | |
name_found = True | |
self.names.append(re.sub('[^a-zA-Z]+', '', data)) | |
def get_fullname(self): | |
return ' '.join(self.names) | |
def has_name(self): | |
return self.name_found | |
def _check_container(self, tag, attrs): | |
return self._search_attr(attrs, 'class', 'heavyhuge') | |
def _check_name_tag(self, tag, attrs): | |
return self._search_attr(attrs, 'class', 'plain') | |
def _search_attr(self, attrs, attrname, attrvalue): | |
found = False | |
attr_len = len(attrs) | |
if attr_len>0 : | |
for i in range(0, attr_len): | |
attr = attrs[i] | |
name = attr[0] | |
value = attr[1] | |
if name==attrname and value==attrvalue: | |
found = True | |
break | |
return found | |
def get_url_contents(url): | |
f = urllib.urlopen(url) | |
contents = f.read() | |
f.close() | |
return contents; | |
parser = MyHTMLParser() | |
parser.feed(get_url_contents(url)) | |
if parser.has_name: | |
print parser.get_fullname() | |
else: | |
print 'Sorry, no name was found :(' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment