Created
December 21, 2016 01:46
Star
You must be signed in to star a gist
emoji-usage-analyzer.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class Analyzer | |
{ | |
const MAX_TO_CHECK = 1000; | |
protected $dirs; | |
protected $emojis; | |
public function __construct() | |
{ | |
$this->dirs = array_filter(glob('*'), 'is_dir'); | |
$this->knownEmojis = json_decode(file_get_contents('emojis.json')); | |
$this->emojiSites = array_map('trim', file('uses_emoji.txt')); | |
} | |
/** | |
* Print a report on the number of documents in each site confirmed using emoji | |
* | |
* Expected a file called uses_emoji.txt to be in the current directory with | |
* a list of sites using emoji, one per line. | |
* | |
* @return void | |
*/ | |
public function documentCountPerEmojiSite() | |
{ | |
foreach ($this->emojiSites as $site) { | |
$numPosts = []; | |
exec("find " . $site . " -name '*.html' -o -name '*.md' -o -name '*.markdown' | wc -l", $numPosts); | |
echo $site . ', ' . trim($numPosts[0]) . PHP_EOL; | |
} | |
} | |
/** | |
* Print a report on found emojis | |
* | |
* Scans all the subdirectories to detect emoji usages | |
* | |
* Reports back on what it found. | |
* | |
* @return void | |
*/ | |
public function printFoundEmojis() | |
{ | |
$results = []; | |
$checked = 0; | |
$emojis; | |
foreach ($this->dirs as $dir) { | |
$checked++; | |
if ($checked >= self::MAX_TO_CHECK) { | |
break; | |
} | |
echo 'Checking: ' . $dir . PHP_EOL; | |
$candidates = []; | |
exec("grep -oEIR '\:.+\:' " . $dir, $candidates); | |
foreach ($candidates as $candidate) { | |
$parts = explode(':', $candidate, 2); | |
if (strlen($parts[0]) > 0) { | |
$lastFile = $parts[0]; | |
} | |
$key = str_replace(':', '', $parts[1]); | |
if (!$this->shouldCheckCandidate($lastFile, $key)) { | |
continue; | |
} | |
if (array_key_exists($key, $this->knownEmojis)) { | |
$results[$dir][$lastFile][] = $key; | |
$emojis[$key]++; | |
} | |
} | |
} | |
arsort($emojis); | |
echo '------------------' . PHP_EOL; | |
echo 'EMOJI USAGE REPORT' . PHP_EOL; | |
echo '------------------' . PHP_EOL; | |
echo PHP_EOL; | |
echo '----- SUMMARY -----' . PHP_EOL; | |
echo 'Scanned Sites: ' . $checked . PHP_EOL; | |
echo 'Number Of Sites Using Emojis: ' . count($results) . PHP_EOL; | |
echo PHP_EOL; | |
echo '----- USAGE BY EMOJI -----' . PHP_EOL; | |
foreach ($emojis as $k => $v) { | |
echo $k . ', ' . $v . PHP_EOL; | |
} | |
echo PHP_EOL; | |
echo '----- USAGE BY BLOG -----' . PHP_EOL; | |
foreach ($results as $blog => $result) { | |
if (empty($result)) { | |
continue; | |
} | |
$uses = 0; | |
foreach ($result as $page => $usages) { | |
$uses += count($usages); | |
} | |
echo $blog . ', ' . $uses . PHP_EOL; | |
} | |
echo PHP_EOL; | |
echo '----- RAW USE DUMP -----' . PHP_EOL; | |
echo json_encode($results) . PHP_EOL; | |
var_dump($results); | |
} | |
protected function shouldCheckCandidate($file, $key) | |
{ | |
if (strlen($file) < 1) { | |
return false; | |
} | |
$blacklist = [ | |
'.js', | |
'.css', | |
'.sass', | |
'.scss', | |
'.rb', | |
'.thor', | |
'.xml', | |
'README', | |
'.less', | |
'node_modules', | |
'.cpp', | |
'vendor/bundle/ruby' | |
]; | |
$knownFalsePositives = [ | |
'abrahamdu.github.io/scripts/commit' => [ | |
'shipit' | |
], | |
'abrahamdu.github.io/scripts/publish' => [ | |
'shipit' | |
], | |
'buildr/doc/more_stuff.textile' => [ | |
'shell' | |
], | |
'shapeshed.github.com/_posts/2009-08-17-copyright.markdown' => [ | |
'copyright' | |
], | |
'tpitale.github.com/_posts/2015-09-08-mailroom-now-with-sidekiq-and-que-support.md' => [ | |
'email' | |
] | |
]; | |
if (is_array($knownFalsePositives[$file]) && in_array($key, $knownFalsePositives[$file])) { | |
return false; | |
} | |
foreach ($blacklist as $item) { | |
if (stripos($file, $item) !== false) { | |
return false; | |
} | |
} | |
return true; | |
} | |
} | |
$analyzer = new Analyzer; | |
$analyzer->$argv[1](); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment