Skip to content

Instantly share code, notes, and snippets.

@jetonr
Created November 26, 2018 22:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jetonr/3230987999a7158f167853dec01169ec to your computer and use it in GitHub Desktop.
Save jetonr/3230987999a7158f167853dec01169ec to your computer and use it in GitHub Desktop.
Word Count From Word Docs
<?php
function read_docx($filename){
$striped_content = '';
$content = '';
if(!$filename || !file_exists($filename)) return false;
$zip = zip_open($filename);
if (!$zip || is_numeric($zip)) return false;
while ($zip_entry = zip_read($zip)) {
if (zip_entry_open($zip, $zip_entry) == FALSE) continue;
if (zip_entry_name($zip_entry) != "word/document.xml") continue;
$content .= zip_entry_read($zip_entry, zip_entry_filesize($zip_entry));
zip_entry_close($zip_entry);
}
zip_close($zip);
$content = str_replace('</w:r></w:p></w:tc><w:tc>', " ", $content);
$content = str_replace('</w:r></w:p>', "\r\n", $content);
$striped_content = strip_tags($content);
return $striped_content;
}
$normalizeChars = array(
'Š'=>'S', 'š'=>'s', 'Ð'=>'Dj','Ž'=>'Z', 'ž'=>'z', 'À'=>'A', 'Á'=>'A', 'Â'=>'A', 'Ã'=>'A', 'Ä'=>'A',
'Å'=>'A', 'Æ'=>'A', 'Ç'=>'C', 'È'=>'E', 'É'=>'E', 'Ê'=>'E', 'Ë'=>'E', 'Ì'=>'I', 'Í'=>'I', 'Î'=>'I',
'Ï'=>'I', 'Ñ'=>'N', 'Ń'=>'N', 'Ò'=>'O', 'Ó'=>'O', 'Ô'=>'O', 'Õ'=>'O', 'Ö'=>'O', 'Ø'=>'O', 'Ù'=>'U', 'Ú'=>'U',
'Û'=>'U', 'Ü'=>'U', 'Ý'=>'Y', 'Þ'=>'B', 'ß'=>'Ss','à'=>'a', 'á'=>'a', 'â'=>'a', 'ã'=>'a', 'ä'=>'a',
'å'=>'a', 'æ'=>'a', 'ç'=>'c', 'è'=>'e', 'é'=>'e', 'ê'=>'e', 'ë'=>'e', 'ì'=>'i', 'í'=>'i', 'î'=>'i',
'ï'=>'i', 'ð'=>'o', 'ñ'=>'n', 'ń'=>'n', 'ò'=>'o', 'ó'=>'o', 'ô'=>'o', 'õ'=>'o', 'ö'=>'o', 'ø'=>'o', 'ù'=>'u',
'ú'=>'u', 'û'=>'u', 'ü'=>'u', 'ý'=>'y', 'ý'=>'y', 'þ'=>'b', 'ÿ'=>'y', 'ƒ'=>'f',
'ă'=>'a', 'î'=>'i', 'â'=>'a', 'ș'=>'s', 'ț'=>'t', 'Ă'=>'A', 'Î'=>'I', 'Â'=>'A', 'Ș'=>'S', 'Ț'=>'T',
);
$toto = strtolower(strtr(read_docx('1.docx'), $normalizeChars));
$toto = array_count_values(str_word_count($toto, 1));
arsort($toto);
$csv = '';
foreach( $toto as $key => $value ) {
$csv .= $key . ',' . $value . "\n";
}
var_dump($csv);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment