Skip to content

Instantly share code, notes, and snippets.

@ArnaudLigny
Last active August 22, 2022 10:20
Show Gist options
  • Save ArnaudLigny/b35dafd434691a3268439b39b914d69d to your computer and use it in GitHub Desktop.
Save ArnaudLigny/b35dafd434691a3268439b39b914d69d to your computer and use it in GitHub Desktop.
<?php
// build an inverted index
// https://en.m.wikipedia.org/wiki/Inverted_index
function buildInvertedIndex(array $filenames): array
{
$invertedIndex = [];
foreach($filenames as $filename) {
$data = file_get_contents($filename);
if ($data === false) {
die('Unable to read file: '.$filename);
}
// collect words in files
preg_match_all('/(\w+)/', $data, $matches, PREG_SET_ORDER);
foreach($matches as $match) {
$word = strtolower($match[0]);
// add an entry for a (new) word
if (!array_key_exists($word, $invertedIndex)) {
$invertedIndex[$word] = [];
}
// add an entry for a (new) filename
if (!in_array($filename, $invertedIndex[$word], true)) {
$invertedIndex[$word][] = $filename;
}
}
}
return $invertedIndex;
}
// search for a word in the index and returns filenames (array) or false
function lookupWord($invertedIndex, $word)
{
return array_key_exists($word, $invertedIndex) ? $invertedIndex[$word] : false;
}
<?php
$invertedIndex = buildInvertedIndex(['file1.txt', 'file2.txt', 'file3.txt']);
foreach(['cat', 'dog'] as $word) {
$matches = lookupWord($invertedIndex, $word);
if ($matches !== false) {
echo 'Found the word "$word" in files: '.implode(', ', $matches)."\n";
} else {
echo 'Unable to find the word "$word" in the index'."\n";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment