Skip to content

Instantly share code, notes, and snippets.

@walkerh
Last active April 7, 2019 14:13
Show Gist options
  • Save walkerh/1b58bc4ac555d347d23ffd5be22e6351 to your computer and use it in GitHub Desktop.
Save walkerh/1b58bc4ac555d347d23ffd5be22e6351 to your computer and use it in GitHub Desktop.
Demonstration of grouping by repeating letters in Python
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# `itertools` to the Rescue\n",
"\n",
"https://docs.python.org/3/library/itertools.html#itertools.groupby\n",
"\n",
"Given a list of strings comprising capital letters, find (for each string) the longest consecutive substring containing a single repeated letter, and print out the letter along with the number of times it repeats itself in that substring. If there is a tie, choose the letter that comes earliest in the alphabet."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from itertools import groupby"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"strings = \"\"\"\n",
"HELLOTHEREWORLD\n",
"NOWISTHETIMEFORALLGOOD\n",
"MEETBEETBOOBOO\n",
"ZZOEUCUTAAEUCUTTTCUU\n",
"\"\"\".split()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['HELLOTHEREWORLD',\n",
" 'NOWISTHETIMEFORALLGOOD',\n",
" 'MEETBEETBOOBOO',\n",
" 'ZZOEUCUTAAEUCUTTTCUU']"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"strings"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"H 1\n",
"E 1\n",
"L 2\n",
"O 1\n",
"T 1\n",
"H 1\n",
"E 1\n",
"R 1\n",
"E 1\n",
"W 1\n",
"O 1\n",
"R 1\n",
"L 1\n",
"D 1\n",
"\n",
"N 1\n",
"O 1\n",
"W 1\n",
"I 1\n",
"S 1\n",
"T 1\n",
"H 1\n",
"E 1\n",
"T 1\n",
"I 1\n",
"M 1\n",
"E 1\n",
"F 1\n",
"O 1\n",
"R 1\n",
"A 1\n",
"L 2\n",
"G 1\n",
"O 2\n",
"D 1\n",
"\n",
"M 1\n",
"E 2\n",
"T 1\n",
"B 1\n",
"E 2\n",
"T 1\n",
"B 1\n",
"O 2\n",
"B 1\n",
"O 2\n",
"\n",
"Z 2\n",
"O 1\n",
"E 1\n",
"U 1\n",
"C 1\n",
"U 1\n",
"T 1\n",
"A 2\n",
"E 1\n",
"U 1\n",
"C 1\n",
"U 1\n",
"T 3\n",
"C 1\n",
"U 2\n"
]
}
],
"source": [
"for s in strings:\n",
" print()\n",
" for letter, iterator in groupby(s):\n",
" print(letter, sum(1 for _ in iterator))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[(1, 'D'), (1, 'E'), (1, 'E'), (1, 'E'), (1, 'H'), (1, 'H'), (1, 'L'), (1, 'O'), (1, 'O'), (1, 'R'), (1, 'R'), (1, 'T'), (1, 'W'), (2, 'L')]\n",
"\n",
"[(1, 'A'), (1, 'D'), (1, 'E'), (1, 'E'), (1, 'F'), (1, 'G'), (1, 'H'), (1, 'I'), (1, 'I'), (1, 'M'), (1, 'N'), (1, 'O'), (1, 'O'), (1, 'R'), (1, 'S'), (1, 'T'), (1, 'T'), (1, 'W'), (2, 'L'), (2, 'O')]\n",
"\n",
"[(1, 'B'), (1, 'B'), (1, 'B'), (1, 'M'), (1, 'T'), (1, 'T'), (2, 'E'), (2, 'E'), (2, 'O'), (2, 'O')]\n",
"\n",
"[(1, 'C'), (1, 'C'), (1, 'C'), (1, 'E'), (1, 'E'), (1, 'O'), (1, 'T'), (1, 'U'), (1, 'U'), (1, 'U'), (1, 'U'), (2, 'A'), (2, 'U'), (2, 'Z'), (3, 'T')]\n"
]
}
],
"source": [
"for s in strings:\n",
" print()\n",
" print(\n",
" sorted(\n",
" [\n",
" (sum(1 for _ in iterator), letter)\n",
" for letter, iterator in groupby(s)\n",
" ]\n",
" )\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def sort_key(t):\n",
" count, letter = t\n",
" return -count, letter"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[(2, 'L'), (1, 'D'), (1, 'E'), (1, 'E'), (1, 'E'), (1, 'H'), (1, 'H'), (1, 'L'), (1, 'O'), (1, 'O'), (1, 'R'), (1, 'R'), (1, 'T'), (1, 'W')]\n",
"\n",
"[(2, 'L'), (2, 'O'), (1, 'A'), (1, 'D'), (1, 'E'), (1, 'E'), (1, 'F'), (1, 'G'), (1, 'H'), (1, 'I'), (1, 'I'), (1, 'M'), (1, 'N'), (1, 'O'), (1, 'O'), (1, 'R'), (1, 'S'), (1, 'T'), (1, 'T'), (1, 'W')]\n",
"\n",
"[(2, 'E'), (2, 'E'), (2, 'O'), (2, 'O'), (1, 'B'), (1, 'B'), (1, 'B'), (1, 'M'), (1, 'T'), (1, 'T')]\n",
"\n",
"[(3, 'T'), (2, 'A'), (2, 'U'), (2, 'Z'), (1, 'C'), (1, 'C'), (1, 'C'), (1, 'E'), (1, 'E'), (1, 'O'), (1, 'T'), (1, 'U'), (1, 'U'), (1, 'U'), (1, 'U')]\n"
]
}
],
"source": [
"for s in strings:\n",
" print()\n",
" print(\n",
" sorted(\n",
" [\n",
" (sum(1 for _ in iterator), letter)\n",
" for letter, iterator in groupby(s)\n",
" ],\n",
" key=sort_key,\n",
" )\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2 L\n",
"2 L\n",
"2 E\n",
"3 T\n"
]
}
],
"source": [
"for s in strings:\n",
" print(\n",
" *sorted(\n",
" [\n",
" (sum(1 for _ in iterator), letter)\n",
" for letter, iterator in groupby(s)\n",
" ],\n",
" key=sort_key,\n",
" )[0]\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2 L HELLOTHEREWORLD\n",
"2 L NOWISTHETIMEFORALLGOOD\n",
"2 E MEETBEETBOOBOO\n",
"3 T ZZOEUCUTAAEUCUTTTCUU\n"
]
}
],
"source": [
"for s in strings:\n",
" print(\n",
" *sorted(\n",
" [\n",
" (sum(1 for _ in iterator), letter)\n",
" for letter, iterator in groupby(s)\n",
" ],\n",
" key=sort_key,\n",
" )[0],\n",
" s\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment