Skip to content

Instantly share code, notes, and snippets.

@aparrish
Created March 7, 2019 23:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save aparrish/2a2078718f3e6767dc82f2a24e33a10e to your computer and use it in GitHub Desktop.
Save aparrish/2a2078718f3e6767dc82f2a24e33a10e to your computer and use it in GitHub Desktop.
notes for RWET (grad) march 7th 2019
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# notes 2019-03-07"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## word counts"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"words = open(\"./genesis.txt\").read().split()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['In',\n",
" 'the',\n",
" 'beginning',\n",
" 'God',\n",
" 'created',\n",
" 'the',\n",
" 'heaven',\n",
" 'and',\n",
" 'the',\n",
" 'earth.']"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"words[:10]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" {'In': 1,\n",
" 'the': 90,\n",
" 'beginning': 3,\n",
" 'God': 23,\n",
" ...\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"counts = {}\n",
"for item in words:\n",
" if item in counts:\n",
" counts[item] += 1\n",
" else:\n",
" counts[item] = 1"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from collections import Counter"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"count = Counter(words)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"108"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"count['the']"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('the', 108),\n",
" ('and', 63),\n",
" ('And', 33),\n",
" ('God', 32),\n",
" ('of', 20),\n",
" ('was', 17),\n",
" ('it', 15),\n",
" ('that', 14),\n",
" ('in', 13),\n",
" ('every', 12),\n",
" ('after', 11),\n",
" ('to', 11)]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"count.most_common(12)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('the', 108)]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"count.most_common(1)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'the'"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"count.most_common(10)[0][0]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"letter_count = Counter(open(\"./genesis.txt\").read())"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[(' ', 797),\n",
" ('e', 450),\n",
" ('t', 339),\n",
" ('a', 267),\n",
" ('h', 255),\n",
" ('n', 233),\n",
" ('d', 232),\n",
" ('i', 195),\n",
" ('r', 186),\n",
" ('o', 167),\n",
" ('s', 128),\n",
" ('f', 87),\n",
" ('l', 86),\n",
" ('g', 82),\n",
" (',', 68),\n",
" ('m', 65),\n",
" ('w', 62),\n",
" ('v', 59),\n",
" ('u', 44),\n",
" ('y', 42),\n",
" ('.', 33),\n",
" ('A', 33),\n",
" ('G', 32),\n",
" ('c', 31),\n",
" ('\\n', 31),\n",
" ('b', 29),\n",
" ('p', 25),\n",
" (':', 17),\n",
" ('k', 16),\n",
" ('L', 8),\n",
" (';', 6),\n",
" ('I', 3),\n",
" ('S', 3),\n",
" ('B', 3),\n",
" ('D', 1),\n",
" ('N', 1),\n",
" ('H', 1),\n",
" ('E', 1),\n",
" ('x', 1)]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"letter_count.most_common()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"test_count = Counter([\"🐈\", \"🐈\", \"🐩\", \"🐩\"])"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'🐈'"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_count.most_common(1)[0][0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## tracery"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: tracery in /Users/allison/anaconda/lib/python3.6/site-packages\n",
"\u001b[33mYou are using pip version 9.0.3, however version 19.0.3 is available.\n",
"You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n"
]
}
],
"source": [
"!pip install tracery"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"import tracery"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Greetings, precinct!\n"
]
}
],
"source": [
"rules = {\n",
" \"origin\": \"#greeting#, #noun#!\",\n",
" \"noun\": [\"galaxy\", \"world\", \"solar system\", \"dimensional plane\", \"precinct\", \"neighborhood\"],\n",
" \"greeting\": [\"Howdy\", \"Hello\", \"Greetings\", \"Hey\", \"Uhhhh\", \"Ummm\", \"Okay\"]\n",
"}\n",
"grammar = tracery.Grammar(rules)\n",
"print(grammar.flatten(\"#origin#\"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"meme generator project!\n",
"\n",
" <interjection> <name>, I'm a <profession> not a <profession>"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [],
"source": [
"from tracery.modifiers import base_english"
]
},
{
"cell_type": "code",
"execution_count": 149,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"You know, Robert, I've never met \"an actor\" like you.\n"
]
}
],
"source": [
"rules = {\n",
" \"origin\": [\"#interjection.capitalize#, #name#! I'm #profession.a#, not #profession.a#\",\n",
" \"You know, #name#, I've never met \\\"#profession.a#\\\" like you.\"],\n",
" \"interjection\": [\"dammit\",\n",
" \"alas\",\n",
" \"oh god\",\n",
" \"gee\",\n",
" \"good grief\",\n",
" \"hallelujah\",\n",
" \"whoa\",\n",
" \"rats\"],\n",
" \"name\": [\"Jim\", \"Robert\", \"Bernie\", \"Adam\", \"Gary\", \"Kevin\", \"Steve\"],\n",
" \"profession\": [\n",
" \"accountant\", \"actor\", \"bartender\", \"engineer\", \"farmer\", \"stripper\",\n",
" \"new media artist\", \"therapist\", \"sous chef\", \"zoologist\"\n",
" ]\n",
"}\n",
"grammar = tracery.Grammar(rules)\n",
"grammar.add_modifiers(base_english)\n",
"print(grammar.flatten(\"#origin#\"))"
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'a': <function tracery.modifiers.a>,\n",
" 'capitalize': <function tracery.modifiers.capitalize_>,\n",
" 'capitalizeAll': <function tracery.modifiers.capitalizeAll>,\n",
" 'ed': <function tracery.modifiers.ed>,\n",
" 'firstS': <function tracery.modifiers.firstS>,\n",
" 'lowercase': <function tracery.modifiers.lowercase>,\n",
" 'replace': <function tracery.modifiers.replace>,\n",
" 's': <function tracery.modifiers.s>,\n",
" 'uppercase': <function tracery.modifiers.uppercase>}"
]
},
"execution_count": 124,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"base_english"
]
},
{
"cell_type": "code",
"execution_count": 151,
"metadata": {},
"outputs": [],
"source": [
"import json"
]
},
{
"cell_type": "code",
"execution_count": 152,
"metadata": {},
"outputs": [],
"source": [
"data = json.load(open(\"./occupations.json\"))"
]
},
{
"cell_type": "code",
"execution_count": 154,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dict"
]
},
"execution_count": 154,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(data)"
]
},
{
"cell_type": "code",
"execution_count": 155,
"metadata": {},
"outputs": [],
"source": [
"occupations = data['occupations']"
]
},
{
"cell_type": "code",
"execution_count": 156,
"metadata": {},
"outputs": [],
"source": [
"import random"
]
},
{
"cell_type": "code",
"execution_count": 168,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'floor layer'"
]
},
"execution_count": 168,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"random.choice(occupations)"
]
},
{
"cell_type": "code",
"execution_count": 177,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Gee, Bernie! I'm a curator, not a textile worker\n",
"Oh god, Jim! I'm a highway maintenance worker, not a clinical psychologist\n",
"Gee, Gary! I'm a conservation worker, not an ambulance dispatcher\n",
"You know, Robert, I've never met \"a blending machine operator\" like you.\n",
"You know, Gary, I've never met \"a cutting machine operator\" like you.\n",
"Gee, Robert! I'm a title searcher, not a physical therapist\n",
"Hallelujah, Kevin! I'm a diagnostic medical sonographer, not an operations research analyst\n",
"Oh god, Gary! I'm a stationary engineer, not a travel guide\n",
"You know, Steve, I've never met \"a respiratory therapist\" like you.\n",
"Alas, Robert! I'm a drier operator, not a prosthetist\n",
"You know, Steve, I've never met \"a religion teacher\" like you.\n",
"You know, Robert, I've never met \"a museum technician\" like you.\n"
]
}
],
"source": [
"rules = {\n",
" \"origin\": [\"#interjection.capitalize#, #name#! I'm #profession.a#, not #profession.a#\",\n",
" \"You know, #name#, I've never met \\\"#profession.a#\\\" like you.\"],\n",
" \"interjection\": [\"dammit\",\n",
" \"alas\",\n",
" \"oh god\",\n",
" \"gee\",\n",
" \"good grief\",\n",
" \"hallelujah\",\n",
" \"whoa\",\n",
" \"rats\"],\n",
" \"name\": [\"Jim\", \"Robert\", \"Bernie\", \"Adam\", \"Gary\", \"Kevin\", \"Steve\"],\n",
" \"profession\": occupations\n",
"}\n",
"grammar = tracery.Grammar(rules)\n",
"grammar.add_modifiers(base_english)\n",
"for i in range(12):\n",
" print(grammar.flatten(\"#origin#\"))"
]
},
{
"cell_type": "code",
"execution_count": 206,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Oh cheesewheel cheesewheel\n",
"cheesewheel baseball climatologists\n",
"cheesewheel banana\n"
]
}
],
"source": [
"interjection = random.choice([\"Oh\", \"Rats\", \"Gee\"])\n",
"rules = {\n",
" \"origin\": \"#five#\\n#seven#\\n#five#\",\n",
" \"five\": [\"#twosyl# #threesyl#\", \"#threesyl# #twosyl#\", interjection + \" #twosyl# #twosyl#\"],\n",
" \"seven\": [\"#twosyl# #twosyl# #fivesyl#\"],\n",
" \"twosyl\": [\"cheesewheel\", \"parrot\", \"baseball\"],\n",
" \"threesyl\": [\"abacus\", \"mastodon\", \"banana\"],\n",
" \"fivesyl\": ['climatologists',\n",
" 'augustyniak',\n",
" 'recalculation',\n",
" 'illusionism',\n",
" 'exhilarating']\n",
"}\n",
"grammar = tracery.Grammar(rules)\n",
"grammar.add_modifiers(base_english)\n",
"haiku = grammar.flatten(\"#origin#\")\n",
"print(haiku)"
]
},
{
"cell_type": "code",
"execution_count": 191,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Geecheesewheel baseball\n",
"cheesewheel cheesewheel recalculation\n",
"banana baseball\n"
]
}
],
"source": [
"print(haiku)"
]
},
{
"cell_type": "code",
"execution_count": 178,
"metadata": {},
"outputs": [],
"source": [
"import pronouncing"
]
},
{
"cell_type": "code",
"execution_count": 180,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['climatologists',\n",
" 'augustyniak',\n",
" 'recalculation',\n",
" 'illusionism',\n",
" 'exhilarating']"
]
},
"execution_count": 180,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"random.sample(pronouncing.search_stresses(\"^.....$\"), 5)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment