Skip to content

Instantly share code, notes, and snippets.

@rmela
Last active August 28, 2020 03:22
Show Gist options
  • Save rmela/caa08d845216dc369c74f46797297c6e to your computer and use it in GitHub Desktop.
Save rmela/caa08d845216dc369c74f46797297c6e to your computer and use it in GitHub Desktop.
Interview coding challenge. Aced this one well under the time limit. I guess practical problems suit me better.
Coding challenge.
Create a DataCapture class that queries statistics about an input stream of numbers.
Assume that the input stream consists of positive integers in the range 0 to 1000.
Class must implement the following methods:
add(n) - add an input element to collection in O(1) time
build() - create summary information on the collection in O(log(n)) time
less(n) - report the number of input elements less than N in O(1) time
greater(n) - report the number of input elements less than N in O(1) time
between(n1,n2) - report the number of input between N1 and N1 in O(1) time
I actually completed this one in well under the required time because it's not that far
off from things I sometimes need to do at work.
To run the code:
python3 runme.py
or
python3 test.py
import collections
class Stats:
def __init__(self):
self.occurrences = 0
self.greater = 0
self.less = 0
def __str__(self):
return "occurrences={0} greater={1} less={2}".format( self.occurrences, self.greater, self.less )
class DataCapture:
def __init__(self):
self.stats = collections.defaultdict( Stats )
self.size = 0
self.max = 0
def add(self, n):
"""Add a number to the collection to be analyzed"""
stats = self.stats[n]
stats.occurrences += 1
self.size += 1
if n > self.max:
self.max = n
def build_stats( self ):
"""Generate cached statistics information for O(1) statistics queries"""
less = 0
greater = self.size
for n in range( self.max + 1 ):
stats = self.stats[n]
greater = greater - stats.occurrences
stats.greater = greater
stats.less = less
less = less + stats.occurrences
def __str__(self):
"""Print string representation of built DataCapture stats. Not practical for much beyond coding challenge example"""
arr = map( lambda key: "n={0} {1}".format( key, self.stats[key] ), sorted(self.stats ) )
return "\n".join(arr)
def between(self,start,finish):
"""Return the number of entries with a value between start and finish"""
start = self.stats[start]
finish = self.stats[finish]
return self.size - finish.greater - start.less
def less( self, n ):
"""Return the number of entries with a value less than n"""
return self.stats[n].less
def greater( self, n ):
"""Return the number of entries with a value greater than n"""
return self.stats[n].greater
#!/usr/bin/env python3
import sys
sys.path.append('.')
import unittest
import stats
def createDataCollector( datasource ):
dc = stats.DataCapture()
for n in map(int, datasource): dc.add( n )
dc.build_stats()
return dc
class Foo( unittest.TestCase ):
def setUp(self):
self.dc = createDataCollector( [3, 9, 3, 4, 6 ] )
def test_less(self):
self.assertEqual( self.dc.less(4), 2 )
def test_greater(self):
self.assertEqual( self.dc.greater(5), 2 )
def test_between(self):
self.assertEqual( self.dc.between( 3, 6 ), 4 )
if __name__ == '__main__':
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment