Skip to content

Instantly share code, notes, and snippets.

@zaus
Last active June 30, 2017 16:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zaus/014ac9b5a78b267aa1643d63d30c7554 to your computer and use it in GitHub Desktop.
Save zaus/014ac9b5a78b267aa1643d63d30c7554 to your computer and use it in GitHub Desktop.
Comparing Collection.Contains for various sets of data
void Main()
{
// https://stackoverflow.com/questions/150750/hashset-vs-list-performance/13089134?noredirect=1#comment76605526_13089134
test(10, 5, 10, true);
test(20);
test(50);
test(100);
test(1000);
test(10, 50, 10, true);
test(1000, 50);
}
// Define other methods and classes here
static Random r = new Random();
string getRandomString(int size) {
var sb = new StringBuilder();
for(int i = 0; i < size; i++) sb.Append( (char)('a' + r.Next(0, 25)));
return sb.ToString();
}
ICollection<string> createTestList<T>(int setSize, int minWordLength = 5, int wordLengthVariance = 10, int previewSize = 0) where T : ICollection<string>, new() {
var set = new T();
for(int i = 0; i < setSize; i++) set.Add(getRandomString(r.Next(minWordLength, minWordLength + wordLengthVariance)));
if(previewSize > 0) set.Take(previewSize).Dump(string.Format("Test size {0} sample {1}", setSize, previewSize));
return set;
}
List<T> getSearchKeys<T>(ICollection<T> set) {
return _getSearchKeys(set).ToList();
}
IEnumerable<T> _getSearchKeys<T>(ICollection<T> set) {
// first
yield return set.First();
// early
yield return set.ElementAt(set.Count / 10);
// middle
yield return set.ElementAt(set.Count / 2);
// late
yield return set.ElementAt(set.Count / 10 * 9 - 1);
// last
yield return set.Last();
}
void test(int setSize, int minWordLength = 5, int wordLengthVariance = 10, bool preview = false) {
var list = createTestList<List<string>>(setSize, minWordLength, wordLengthVariance);
var hashset = new HashSet<string>(list);
var keys = getSearchKeys(list);
if(preview) {
list.Dump("Set");
keys.Dump("Find");
}
// the thing that runs each test 10k times and prints the results, see https://gist.github.com/zaus/8055941
new Perf {
{ "list, first", n => list.Contains(keys[0]) },
{ "hashset, first", n => hashset.Contains(keys[0]) },
{ "list, early", n => list.Contains(keys[1]) },
{ "hashset, early", n => hashset.Contains(keys[1]) },
{ "list, middle", n => list.Contains(keys[2]) },
{ "hashset, middle", n => hashset.Contains(keys[2]) },
{ "list, late", n => list.Contains(keys[3]) },
{ "hashset, late", n => hashset.Contains(keys[3]) },
{ "list, last", n => list.Contains(keys[4]) },
{ "hashset, last", n => hashset.Contains(keys[4]) },
}.Vs(string.Format("Comparing set size {0} min word size {1}-{2}", setSize, minWordLength, minWordLength+wordLengthVariance));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment