Skip to content

Instantly share code, notes, and snippets.

@prehensile
Created May 12, 2015 09:49
Show Gist options
  • Save prehensile/a0ce581718ceec7a6dce to your computer and use it in GitHub Desktop.
Save prehensile/a0ce581718ceec7a6dce to your computer and use it in GitHub Desktop.
A Python function to chop up a string into chunks with a maximum length for each chunk. Chops at whitespace for nice separation, rather than just every n characters. Useful, for, say, breaking up a long piece of text into multiple tweets.
import re
def chunk_string( str_in, max_chunk_length ):
l = len(str_in)
chunks =[]
if l <= max_chunk_length:
chunks = [str_in]
else:
spaces = re.finditer( "\s+", str_in )
start_index = 0
do_chunking = True
while do_chunking:
end_index = start_index + max_chunk_length
if end_index > l:
end_index = l
do_chunking = False
if do_chunking:
# find the chunk of whitespace closest to end_index
end_space = None
for space_match in spaces:
if space_match.start() > end_index:
break
if space_match.start() >= start_index:
end_space = space_match
if end_space:
end_index = end_space.start()
this_chunk = str_in[start_index:end_index]
chunks.append( this_chunk )
start_index = end_index
if end_space:
start_index = end_space.end()
return chunks
# simple test case
if __name__ == '__main__':
t = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Cras imperdiet nec erat ac condimentum. Nulla vel rutrum ligula. Sed hendrerit interdum orci a posuere. Vivamus ut velit aliquet, mollis purus eget, iaculis nisl. Proin posuere malesuada ante. Proin auctor orci eros, ac molestie lorem dictum nec. Vestibulum sit amet erat est. Morbi luctus sed elit ac luctus. Proin blandit, enim vitae egestas posuere, neque elit ultricies dui, vel mattis nibh enim ac lorem. Maecenas molestie nisl sit amet velit dictum lobortis. Aliquam erat volutpat."
chunks = chunk_string( t, 140 )
for chunk in chunks:
print chunk
print len(chunk)
@BustACode
Copy link

Thank you.

@tandav
Copy link

tandav commented Feb 22, 2020

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment