Skip to content

Instantly share code, notes, and snippets.

@doggan
Created July 20, 2016 17:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save doggan/6781a1e23abde0f445c937079d1cdc73 to your computer and use it in GitHub Desktop.
Save doggan/6781a1e23abde0f445c937079d1cdc73 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""
Script to parse the most frequently used emoji unicode values from http://emojitracker.com.
"""
import argparse
from bs4 import BeautifulSoup
import glob
import os
def parse_args():
parser = argparse.ArgumentParser(
description='filter only frequently used emoji assets')
parser.add_argument(
'dump', help='the raw html (from http://emojitracker.com)')
parser.add_argument(
'images', help='the path to the images')
return parser.parse_args()
def main():
args = parse_args()
dump = args.dump
images = args.images
with open(dump, 'r') as f:
soup = BeautifulSoup(f.read(), "html.parser")
# Max # of results to keep (rankings).
COUNT = 500
print "### Parsing up to %s codes..." % COUNT
# Extract all the codes, preserving order.
# Links are in the form:
# <a href="/details/1F52B" title="PISTOL" data-id="1F52B">
codes = []
rankings = soup.find("section", {"id": "rankings"})
for link in soup.findAll('a'):
data = link.get('data-id')
if data:
codes.append(data.lower())
if len(codes) == COUNT:
break
print "### %s codes found..." % len(codes)
delete_cnt = 0
filelist = glob.glob(os.path.join(images, '*.png'))
for f in filelist:
filename = os.path.splitext(os.path.basename(f))[0].lower()
if filename not in codes:
print "### Delete: %s" % f
os.remove(f)
delete_cnt += 1
print "### Deleted %s assets." % delete_cnt
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment