Skip to content

Instantly share code, notes, and snippets.

@danbarua
Last active August 29, 2015 14:18
Show Gist options
  • Save danbarua/1ee97f6879e9c5f9c8e1 to your computer and use it in GitHub Desktop.
Save danbarua/1ee97f6879e9c5f9c8e1 to your computer and use it in GitHub Desktop.
public class Indexer
{
private Dictionary<string, List<string>> termToIds =
new Dictionary<string, List<string>>(StringComparer.OrdinalIgnoreCase);
private Dictionary<string, List<string>> idToTerms =
new Dictionary<string, List<string>>(StringComparer.OrdinalIgnoreCase);
public void Index(string docId, string text)
{
//could just call Delete(docId) here for brevity, but would involve making another lookup
List<string> docTerms;
if (idToTerms.TryGetValue(docId, out docTerms))
{
foreach (var existingTerm in docTerms)
{
termToIds[existingTerm].Remove(docId);
}
docTerms = new List<string>();
}
else
{
docTerms = new List<string>();
}
var words = text.Split();
foreach (var term in words)
{
List<string> termDocs;
if (termToIds.TryGetValue(term, out termDocs) == false)
{
termDocs = new List<string>();
termToIds[term] = termDocs;
}
termDocs.Add(docId);
docTerms.Add(term);
}
idToTerms[docId] = docTerms;
}
public List<string> Query(string term)
{
List<string> val;
termToIds.TryGetValue(term, out val);
return val ?? new List<string>();
}
public void Delete(string docId)
{
List<string> existingTermsForDocument;
if (idToTerms.TryGetValue(docId, out existingTermsForDocument))
{
foreach (var existingTerm in existingTermsForDocument)
{
termToIds[existingTerm].Remove(docId);
}
idToTerms.Remove(docId);
}
}
}
public class IndexTests
{
[Fact]
public void CanIndexAndQuery()
{
var index = new Indexer();
index.Index("users/1", "Oren Eini");
index.Index("users/2", "Hibernating Rhinos");
Assert.Contains("users/1", index.Query("eini"));
Assert.Contains("users/2", index.Query("rhinos"));
}
[Fact]
public void CanUpdate()
{
var index = new Indexer();
index.Index("users/1", "Oren Eini");
//updating
index.Index("users/1", "Ayende Rahien");
Assert.Contains("users/1", index.Query("Rahien"));
Assert.Empty(index.Query("eini"));
}
[Fact]
public void CanDelete()
{
var index = new Indexer();
index.Index("users/1", "Oren Eini");
index.Delete("users/1");
Assert.Empty(index.Query("Oren"));
}
[Fact]
public void UpdatingShouldNotAffectDocumentsThatShareTerms()
{
var index = new Indexer();
index.Index("docs/1", "Apples are green");
index.Index("docs/2", "Trees are green");
index.Index("docs/1", "My car is grey");
Assert.Empty(index.Query("apples"));
Assert.False(index.Query("green").Contains("docs/1"));
Assert.True(index.Query("green").Contains("docs/2"));
}
[Fact]
public void DeletingShouldNotRemoveDocumentsThatShareTerms()
{
var index = new Indexer();
index.Index("docs/1", "Apples are green");
index.Index("docs/2", "Trees are green");
index.Delete("docs/1");
Assert.Empty(index.Query("apples"));
Assert.False(index.Query("green").Contains("docs/1"));
Assert.True(index.Query("green").Contains("docs/2"));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment