Skip to content

Instantly share code, notes, and snippets.

@mpchadwick
Created December 21, 2016 01:46
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save mpchadwick/627eb8348af947d8bbcb2cd92f1e6caf to your computer and use it in GitHub Desktop.
emoji-usage-analyzer.php
<?php
class Analyzer
{
const MAX_TO_CHECK = 1000;
protected $dirs;
protected $emojis;
public function __construct()
{
$this->dirs = array_filter(glob('*'), 'is_dir');
$this->knownEmojis = json_decode(file_get_contents('emojis.json'));
$this->emojiSites = array_map('trim', file('uses_emoji.txt'));
}
/**
* Print a report on the number of documents in each site confirmed using emoji
*
* Expected a file called uses_emoji.txt to be in the current directory with
* a list of sites using emoji, one per line.
*
* @return void
*/
public function documentCountPerEmojiSite()
{
foreach ($this->emojiSites as $site) {
$numPosts = [];
exec("find " . $site . " -name '*.html' -o -name '*.md' -o -name '*.markdown' | wc -l", $numPosts);
echo $site . ', ' . trim($numPosts[0]) . PHP_EOL;
}
}
/**
* Print a report on found emojis
*
* Scans all the subdirectories to detect emoji usages
*
* Reports back on what it found.
*
* @return void
*/
public function printFoundEmojis()
{
$results = [];
$checked = 0;
$emojis;
foreach ($this->dirs as $dir) {
$checked++;
if ($checked >= self::MAX_TO_CHECK) {
break;
}
echo 'Checking: ' . $dir . PHP_EOL;
$candidates = [];
exec("grep -oEIR '\:.+\:' " . $dir, $candidates);
foreach ($candidates as $candidate) {
$parts = explode(':', $candidate, 2);
if (strlen($parts[0]) > 0) {
$lastFile = $parts[0];
}
$key = str_replace(':', '', $parts[1]);
if (!$this->shouldCheckCandidate($lastFile, $key)) {
continue;
}
if (array_key_exists($key, $this->knownEmojis)) {
$results[$dir][$lastFile][] = $key;
$emojis[$key]++;
}
}
}
arsort($emojis);
echo '------------------' . PHP_EOL;
echo 'EMOJI USAGE REPORT' . PHP_EOL;
echo '------------------' . PHP_EOL;
echo PHP_EOL;
echo '----- SUMMARY -----' . PHP_EOL;
echo 'Scanned Sites: ' . $checked . PHP_EOL;
echo 'Number Of Sites Using Emojis: ' . count($results) . PHP_EOL;
echo PHP_EOL;
echo '----- USAGE BY EMOJI -----' . PHP_EOL;
foreach ($emojis as $k => $v) {
echo $k . ', ' . $v . PHP_EOL;
}
echo PHP_EOL;
echo '----- USAGE BY BLOG -----' . PHP_EOL;
foreach ($results as $blog => $result) {
if (empty($result)) {
continue;
}
$uses = 0;
foreach ($result as $page => $usages) {
$uses += count($usages);
}
echo $blog . ', ' . $uses . PHP_EOL;
}
echo PHP_EOL;
echo '----- RAW USE DUMP -----' . PHP_EOL;
echo json_encode($results) . PHP_EOL;
var_dump($results);
}
protected function shouldCheckCandidate($file, $key)
{
if (strlen($file) < 1) {
return false;
}
$blacklist = [
'.js',
'.css',
'.sass',
'.scss',
'.rb',
'.thor',
'.xml',
'README',
'.less',
'node_modules',
'.cpp',
'vendor/bundle/ruby'
];
$knownFalsePositives = [
'abrahamdu.github.io/scripts/commit' => [
'shipit'
],
'abrahamdu.github.io/scripts/publish' => [
'shipit'
],
'buildr/doc/more_stuff.textile' => [
'shell'
],
'shapeshed.github.com/_posts/2009-08-17-copyright.markdown' => [
'copyright'
],
'tpitale.github.com/_posts/2015-09-08-mailroom-now-with-sidekiq-and-que-support.md' => [
'email'
]
];
if (is_array($knownFalsePositives[$file]) && in_array($key, $knownFalsePositives[$file])) {
return false;
}
foreach ($blacklist as $item) {
if (stripos($file, $item) !== false) {
return false;
}
}
return true;
}
}
$analyzer = new Analyzer;
$analyzer->$argv[1]();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment