Skip to content

Instantly share code, notes, and snippets.

@apiguy
Created April 24, 2013 17:57
Show Gist options
  • Save apiguy/5454125 to your computer and use it in GitHub Desktop.
Save apiguy/5454125 to your computer and use it in GitHub Desktop.
Grouping strings by prefixes in python
#!/usr/bin/env python
# Gribouillis at www.daniweb.com
"""
The class Grouper implements grouping strings by prefixes.
A Grouper object is created with a sequence of prefixes
G = Grouper(["pref1", "pref2", ...])
(an empty prefix is automatically added). The Grouper object
is a dictionary "prefix --> set of strings". Initially, it looks
like
{
"pref1" : set([]),
"pref2" : set([]),
}
The method 'add_strings' can be used to add a sequence of strings
to the grouper. Each string goes in one of the sets according to
the longest prefix it contains.
After
G.add_strings(["pref1aaaaa", "pref2jjj", "pref1s"])
it's content will be
{
"pref1" : set(["pref1aaaaa", "pref1s"]),
"pref2" : set(["pref2jjj"]),
}
More strings can be added.
"""
from collections import deque
from bisect import bisect_left ,bisect_right
class Grouper (dict ):
def __init__ (self ,iterable ):
prefixes =set (iterable )
prefixes .add ("")
dict .__init__ (self ,[(p ,set ())for p in prefixes ])
self .skeys =[]
def add_strings (self ,iterable ):
strings =sorted (set (iterable ))
if len (self .skeys )!=len (self ):
self .skeys =sorted (self )[1 :]
stack =deque ([("",chr (255 ))])
k =0
for p in self .skeys :
while stack [-1 ][1 ]<=p :
q ,qs =stack .pop ()
k =self ._forward (q ,k ,qs ,strings )
k =self ._forward (stack [-1 ][0 ],k ,p ,strings )
stack .append ((p ,p [:-1 ]+chr (ord (p [-1 ])+1 )))
while stack :
q ,qs =stack .pop ()
k =self ._forward (q ,k ,qs ,strings )
def _forward (self ,q ,k ,qs ,strings ):
ks =bisect_left (strings ,qs ,k )
self [q ].update (strings [i ]for i in xrange (k ,ks ))
return ks
if __name__ =="__main__":
grouper =Grouper (["/usr","/usr/lib","hello","heat"])
grouper .add_strings ([
"/usr/bin/python","/usr/lib/python2.5","/dev",
"hello world","hello","heat foo","heat","her",
"/usr/liberation",
])
for k ,s in grouper .items ():
print ("%s: %s"%(repr (k ),str (s )))
print "Adding more strings"
grouper .add_strings ([
"hello fred","/usrssss",
])
for k ,s in grouper .items ():
print ("%s: %s"%(repr (k ),str (s )))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment