Skip to content

Instantly share code, notes, and snippets.

@marktheunissen
Last active September 28, 2015 21:59
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save marktheunissen/1502846 to your computer and use it in GitHub Desktop.
Save marktheunissen/1502846 to your computer and use it in GitHub Desktop.
Examine frequency of characters in Drupal code
<?php
/**
* Implements hook_drush_command().
*/
function mymodule_drush_command() {
$items['text-analysis'] = array(
'description' => 'Analyse text.',
'aliases' => array('ta'),
);
return $items;
}
/**
* Command callback.
*/
function drush_mymodule_text_analysis() {
$files = file_scan_directory('/path/to/drupal', '/.*module$|.*test$|.*install$|.*inc$/');
$chars = array();
foreach ($files as $file) {
$data = file_get_contents($file->uri);
$counts = count_chars($data, 1);
foreach ($counts as $i => $c) {
switch ($i) {
case 10:
$char = 'LF';
break;
case 32:
$char = 'SPACE';
break;
default:
$char = chr($i);
}
$chars[$char] = $chars[$char] + $c;
}
}
arsort($chars);
foreach ($chars as $char => $num) {
if ($num > 400 && !in_array($char, range('a', 'z')) && !in_array($char, range('A', 'Z'))) {
drush_print($char . " --- " . $num);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment