Skip to content

Instantly share code, notes, and snippets.

@GrenderG
Last active June 11, 2020 17:54
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save GrenderG/fd4eb4c73eadad219b0ecb5f9345070a to your computer and use it in GitHub Desktop.
Save GrenderG/fd4eb4c73eadad219b0ecb5f9345070a to your computer and use it in GitHub Desktop.
Getting all emojis directly from http://emoji.codes/ and formatted to use in https://github.com/mrowa44/emojify
from bs4 import BeautifulSoup
import requests
def start_requests():
urls = [
'http://emoji.codes/family?c=people',
'http://emoji.codes/family?c=nature',
'http://emoji.codes/family?c=food',
'http://emoji.codes/family?c=activity',
'http://emoji.codes/family?c=travel',
'http://emoji.codes/family?c=objects',
'http://emoji.codes/family?c=symbols',
'http://emoji.codes/family?c=flags',
'http://emoji.codes/family?c=diversity'
]
print 'emojis=('
for url in urls:
req = requests.get(url)
html = BeautifulSoup(req.text, 'html.parser')
parse(html, url.split('=')[1])
print ')'
def parse(html, title):
emoji_list = html.find(id='emoji-list')
print '\n\t# category: ' + title + '\n'
for emoji in emoji_list.find_all('tr'):
code = u'\\U' + emoji.get('id').replace('-', '\\U')
shortcode = emoji.select_one('span[class*=shortcode]').text
if title == 'diversity':
if 'tone1' in shortcode:
print '\t[\"' + shortcode.replace('_tone1', '') + \
'\"]=\"\\' + (code.split('\\'))[1].split('\\')[0] + '\"'
print '\t[\"' + shortcode + '\"]=\"' + code + '\"'
if __name__ == '__main__':
start_requests()
from bs4 import BeautifulSoup
import requests
curr_index = 0
def start_requests():
urls = [
'http://emoji.codes/family?c=people',
'http://emoji.codes/family?c=nature',
'http://emoji.codes/family?c=food',
'http://emoji.codes/family?c=activity',
'http://emoji.codes/family?c=travel',
'http://emoji.codes/family?c=objects',
'http://emoji.codes/family?c=symbols',
'http://emoji.codes/family?c=flags',
'http://emoji.codes/family?c=diversity'
]
for url in urls:
req = requests.get(url)
html = BeautifulSoup(req.text, 'html.parser')
parse(html, url.split('=')[1])
def parse(html, title):
emoji_list = html.find(id='emoji-list')
print '# category: ' + title + '\n'
for emoji in emoji_list.find_all('tr'):
code = u'\\U' + emoji.get('id').replace('-', '\\U')
shortcode = emoji.select_one('span[class*=shortcode]').text
global curr_index
if title == 'diversity':
if 'tone1' in shortcode:
print 'keys[' + str(curr_index) + ']=\'' + shortcode.replace('_tone1', '') + '\'; values[' + str(curr_index) + ']=\'' + '\\' + (code.split('\\'))[1].split('\\')[0] + '\';'
curr_index +=1
print 'keys[' + str(curr_index) + ']=\'' + shortcode + '\'; values[' + str(curr_index) + ']=\'' + code + '\';'
curr_index += 1
print '\n'
if __name__ == '__main__':
start_requests()
@GrenderG
Copy link
Author

GrenderG commented Apr 5, 2017

  • BeautifulSoup4 is required: pip install beautifulsoup4
  • requests is required: pip install requests

If you want to redirect the output stream to a file you can simply do python emoji_scrap.py > list.txt

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment