Skip to content

Instantly share code, notes, and snippets.

@ZacharyJacobCollins
Created June 29, 2018 03:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ZacharyJacobCollins/c73e33bb71af13e87cb5f8b2c3b36855 to your computer and use it in GitHub Desktop.
Save ZacharyJacobCollins/c73e33bb71af13e87cb5f8b2c3b36855 to your computer and use it in GitHub Desktop.
Concordance table
<?php
CONST TEXT = 'Given an arbitrary text document written in English, write a program that will generate a concordance, i.e. an alphabetical list of all word occurrences, labeled with word frequencies. Bonus: label each word with the sentence numbers in which each occurrence appeared.';
//Build concordance table
function build_table($text)
{
$table = [];
$text = format_text($text);
// Split sentences on regex so as to cover acronym edge case
$sentences = preg_split('/(?<![i\.e\.])\./', $text);
foreach ($sentences as $i => $sentence)
{
$words = explode(' ', $sentence);
foreach ($words as $j => $word)
{
$table = update_count($table, $word);
$table = update_sentence($table, $word, $i);
}
}
return $table;
}
function format_text($text)
{
$text = strtolower($text);
$text = str_replace(',', '', $text);
return $text;
}
// Add word to table and update the number
// of times it has appeared
function update_count($table, $word)
{
// If the word is in the table, increment count
if (isset($table[$word][0]))
{
$table[$word][0]++;
}
// Add word to table with count 1
else
{
$table[$word][0] = 1;
}
return $table;
}
// Mark sentence number where word was found
function update_sentence($table, $word, $i)
{
$table[$word][1][] = $i+1;
return $table;
}
// Format table data for printing to console
function format_table($table)
{
// Add colon to word count, flatten sentence count array deliminate with commas
foreach ($table as &$table_entry)
{
$table_entry = $table_entry[0] . ':' . implode(',', $table_entry[1]);
}
// Sort table alphabetically
ksort($table);
// Remove match char
array_shift($table);
return json_encode($table);
}
$table = build_table(TEXT);
$table = format_table($table);
echo($table);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment