Skip to content

Instantly share code, notes, and snippets.

@Phylliida
Last active February 16, 2021 18:06
Show Gist options
  • Save Phylliida/c159bc02d8072eb1c81f0b472e8a34c1 to your computer and use it in GitHub Desktop.
Save Phylliida/c159bc02d8072eb1c81f0b472e8a34c1 to your computer and use it in GitHub Desktop.
NGram Feedback Modeling Notebook
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 18,
"id": "sound-stewart",
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import display\n",
"import graphviz\n",
"from matplotlib import pyplot as plt\n",
"from collections import defaultdict \n",
"import random\n",
"\n",
"BEGIN_TOKEN = \"___BEGIN__\"\n",
"END_TOKEN = \"___END__\"\n",
"\n",
"class NGramModel(object):\n",
" def __init__(self, inputSequences, contextSize):\n",
" '''\n",
" Creates an n-gram model of the given inputSequences\n",
" The input sequences should be an array of arrays.\n",
" Internally, each array has contextSize BEGIN_TOKEN appended to the start,\n",
" and a single END_TOKEN appended to the end,\n",
" then n-gram analysis is done.\n",
" contextSize=1 is bigram\n",
" contextSize=2 is trigram\n",
" etc.\n",
" '''\n",
" self.contextSize = contextSize\n",
" self.model = buildNGramModel(inputSequences=inputSequences, contextSize=contextSize)\n",
" \n",
" def gen(self):\n",
" '''\n",
" This function is a generator that can be used to generate samples from the trained nGram model\n",
" It will stop once it hits an end token, meaning that it could go on for a very long time\n",
" '''\n",
" context = [BEGIN_TOKEN]*self.contextSize\n",
" curToken = context[-1]\n",
" while True:\n",
" nextWordOptions = list(self.model[tuple(context)].items())\n",
" words = [nextWord for (nextWord, count) in nextWordOptions]\n",
" counts = [count for (nextWord, count) in nextWordOptions]\n",
" curToken = random.choices(words, weights=counts, k=1)[0] # for lower then python 3.6 you'll need to implement custom weighted choice here\n",
" context = context[1:] + [curToken]\n",
" if curToken == END_TOKEN: break\n",
" # we haven't reached end token, return token and continue generating\n",
" yield curToken\n",
" def __len__(self):\n",
" '''\n",
" Number of different types of context tuples\n",
" '''\n",
" return len(self.model)\n",
" \n",
"\n",
"def buildNGramModel(inputSequences, contextSize):\n",
" '''\n",
" Creates a dictionary of [contextTuple][nextWord] = numOfTimesSeen\n",
" Where contextTuple is of length contextSize, so \n",
" contextSize=1 is bigram\n",
" contextSize=2 is trigram\n",
" etc.\n",
" Every sequence is automatically given contextSize*[BEGIN_TOKEN] at the start, and an [END_TOKEN] at the end\n",
" '''\n",
" counts = defaultdict(lambda : defaultdict(int)) # default dict of default dict\n",
" for sequence in inputSequences:\n",
" context = [BEGIN_TOKEN]*contextSize\n",
" actualSequence = sequence + [END_TOKEN]\n",
" for word in actualSequence:\n",
" counts[tuple(context)][word] += 1 # because it's a default dict, it'll create entries if they don't exist\n",
" context = context[1:] + [word]\n",
" # convert default dicts to regular dict\n",
" return dict([(contextTuple, dict(nextWordDict)) for (contextTuple, nextWordDict) in counts.items()])\n",
"\n",
"\n",
"def generateTextWithMaxLen(model, maxSampleLen):\n",
" '''\n",
" Generates text that isn't allowed to be longer than the given length\n",
" If the model doesn't stop generating before that length, we throw out the sequence and make a new one\n",
" Warning: if maxSampleLen is too small, this could loop forever\n",
" '''\n",
" resultSent = list(model.gen())\n",
" while len(resultSent) > maxSampleLen:\n",
" resultSent = list(model.gen())\n",
" return resultSent\n",
"\n",
"\n",
"def generateDataset(model, maxSampleLen, minTotalGeneratedTokens):\n",
" '''\n",
" Generates samples of text from the given markov chain until the total\n",
" number of tokens generated is more than minTotalGeneratedTokens.\n",
" (If a sample generated is longer than maxSampleLen, it is ignored)\n",
" '''\n",
" totalChars = 0\n",
" allSamples = []\n",
" while totalChars < minTotalGeneratedTokens:\n",
" nextData = generateTextWithMaxLen(model, maxSampleLen)\n",
" allSamples.append(nextData)\n",
" totalChars += len(nextData)\n",
" return allSamples\n",
"\n",
"def runExperiment(initialDataset, numSteps, contextSize, maxSampleLen, minTotalGeneratedTokens, debug=False):\n",
" '''\n",
" Runs an experiment where we generate numSamples of text, each of length sampleLen\n",
" Then we use that to train a new markovify model (bigram=stateSize:1, trigram=stateSize:2, etc.)\n",
" We repeat this process numSteps times, and return an array with the model after each step\n",
" '''\n",
" model = NGramModel(initialDataset, contextSize=contextSize)\n",
" models = []\n",
" for t in range(numSteps):\n",
" try:\n",
" if debug: print(t, numSteps, len(model))\n",
" models.append(model)\n",
" dataset = generateDataset(model, maxSampleLen=maxSampleLen, minTotalGeneratedTokens=minTotalGeneratedTokens)\n",
" model = NGramModel(dataset, contextSize=contextSize)\n",
" except:\n",
" # debugging info if you are running into errors\n",
" # display(modelToDot(models[-1]))\n",
" # display(modelToDot(model))\n",
" # print(t)\n",
" raise\n",
" return models\n",
"\n",
"def loopThroughEdgeCounts(model):\n",
" '''\n",
" # model chain stores counts as chain.model[(tuple, of, context)][nextWord]\n",
" # so we need to connect edges between (tuple, of, context) and (of, context, nextWord)\n",
" # so this loops through and returns each\n",
" (tuple, of, context), (of, context, nextWord), transitionCount\n",
" '''\n",
" for keyTuple, outputDict in model.model.items():\n",
" keyTupleList = list(keyTuple)\n",
" for outputNode, transitionPr in outputDict.items():\n",
" connectToNodeList = keyTupleList[1:] + [outputNode]\n",
" yield (keyTuple, tuple(connectToNodeList), transitionPr)\n",
"\n",
"def modelToDot(model):\n",
" '''\n",
" Converts a n-gram model into a dot graph that displays the transition counts\n",
" '''\n",
" dot = graphviz.Digraph()\n",
" # it stores probabilities as chain.model[(tuple, of, context)][nextWord]\n",
" # so we need to connect edges between (tuple, of, context) and (of, context, nextWord)\n",
" # made utility function loopThroughEdgeCounts that loops through these for us\n",
" for keyTuple, outputDict in model.model.items():\n",
" dot.node(\" \".join(list(keyTuple)))\n",
" for tupleIn, tupleOut, transitionCount in loopThroughEdgeCounts(model):\n",
" dot.edge(\" \".join(tupleIn), \" \".join(tupleOut), label=str(transitionCount))\n",
" return dot\n",
"\n",
"def modelHistoryToGraphs(modelHistory):\n",
" '''\n",
" Converts a n-gram model history into a dict with [(tupleIn, tupleOut)] = arrOfTransitionCounts\n",
" If a graph doesn't contain a given (tupleIn, tupleOut), a zero is placed in that position\n",
" '''\n",
" edges = dict()\n",
" # loop through all models and get all edges\n",
" for model in modelHistory:\n",
" for tupleIn, tupleOut, transitionCount in loopThroughEdgeCounts(model):\n",
" edge = (tupleIn, tupleOut)\n",
" if not edge in edges:\n",
" edges[edge] = []\n",
" \n",
" # for each edge, construct a history graph of transition counts\n",
" for model in modelHistory:\n",
" # append a zero to the end of every pair, this ensures if our model doesn't have it it still gets noted\n",
" for edge, arr in edges.items():\n",
" arr.append(0)\n",
" # for every pair our model has, overwrite those zeros at the end with the actual transition count\n",
" for tupleIn, tupleOut, transitionCount in loopThroughEdgeCounts(model):\n",
" edge = (tupleIn, tupleOut)\n",
" # overwrite last value to our transition count\n",
" edges[edge][-1] = transitionCount\n",
" return edges\n",
"\n",
"def printModelHistoryAsGraphs(modelHistory):\n",
" '''\n",
" Converts a markovify model history into a dict with [(tupleIn, tupleOut)] = arrOfTransitionCounts\n",
" If a graph doesn't contain a given (tupleIn, tupleOut), a zero is placed in that position\n",
" Then this history is plotted as graphs, for each tuple\n",
" Warning, if you have a lot of tuples this could get slow\n",
" '''\n",
" graphs = modelHistoryToGraphs(modelHistory)\n",
" for edgeTuple, historyOfCounts in graphs.items():\n",
" plt.plot(historyOfCounts)\n",
" plt.title(str(edgeTuple))\n",
" plt.show()\n",
" \n",
"\n",
"def printOutputGenerations(model, maxSampleLen, minTotalGeneratedTokens):\n",
" '''\n",
" Print the generated dataset of the model\n",
" '''\n",
" dataset = generateDataset(model, maxSampleLen=maxSampleLen, minTotalGeneratedTokens=minTotalGeneratedTokens)\n",
" return \"\\n\".join([\" \".join(sample) for sample in dataset])\n",
" \n",
"def runAndVisualizeExperiment(initialDataset, printRange, numSteps, contextSize, maxSampleLen, minTotalGeneratedTokens):\n",
" '''\n",
" Runs an experiment where we generate at least minTotalGeneratedTokens tokens per step worth of samples\n",
" (rejecting individual samples that are longer than maxSampleLen)\n",
" Then we use that to train a new n-gram model (bigram=contextSize:1, trigram=contextSize:2, etc.)\n",
" We repeat this process numSteps times, and return an array with the models after each step\n",
" printRange is which models we actually want to visualize as dot models (for example, [0,-1] will display the first and last)\n",
" This will also print a graph of the history of each transition pr over time\n",
" '''\n",
" modelHistory = runExperiment(initialDataset=initialDataset, numSteps=numSteps, contextSize=contextSize, maxSampleLen=maxSampleLen, minTotalGeneratedTokens=minTotalGeneratedTokens)\n",
" for i in printRange:\n",
" print(i, \"model:\")\n",
" display(modelToDot(modelHistory[i]))\n",
" print(printOutputGenerations(modelHistory[i], minTotalGeneratedTokens=30, maxSampleLen=maxSampleLen))\n",
" printModelHistoryAsGraphs(modelHistory)\n",
" return modelHistory\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "innovative-desktop",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 model:\n"
]
},
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\r\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\r\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\r\n",
"<!-- Generated by graphviz version 2.46.0 (20210118.1747)\r\n",
" -->\r\n",
"<!-- Pages: 1 -->\r\n",
"<svg width=\"195pt\" height=\"305pt\"\r\n",
" viewBox=\"0.00 0.00 195.39 305.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\r\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 301)\">\r\n",
"<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-301 191.39,-301 191.39,4 -4,4\"/>\r\n",
"<!-- ___BEGIN__ -->\r\n",
"<g id=\"node1\" class=\"node\">\r\n",
"<title>___BEGIN__</title>\r\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"125\" cy=\"-279\" rx=\"62.29\" ry=\"18\"/>\r\n",
"<text text-anchor=\"middle\" x=\"125\" y=\"-275.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">___BEGIN__</text>\r\n",
"</g>\r\n",
"<!-- a -->\r\n",
"<g id=\"node2\" class=\"node\">\r\n",
"<title>a</title>\r\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"54\" cy=\"-192\" rx=\"27\" ry=\"18\"/>\r\n",
"<text text-anchor=\"middle\" x=\"54\" y=\"-188.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">a</text>\r\n",
"</g>\r\n",
"<!-- ___BEGIN__&#45;&gt;a -->\r\n",
"<g id=\"edge1\" class=\"edge\">\r\n",
"<title>___BEGIN__&#45;&gt;a</title>\r\n",
"<path fill=\"none\" stroke=\"black\" d=\"M110.97,-261.21C100.16,-248.26 85.1,-230.23 73.17,-215.95\"/>\r\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"75.67,-213.48 66.57,-208.05 70.3,-217.97 75.67,-213.48\"/>\r\n",
"<text text-anchor=\"middle\" x=\"101\" y=\"-231.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">10</text>\r\n",
"</g>\r\n",
"<!-- ___END__ -->\r\n",
"<g id=\"node5\" class=\"node\">\r\n",
"<title>___END__</title>\r\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"126\" cy=\"-105\" rx=\"53.09\" ry=\"18\"/>\r\n",
"<text text-anchor=\"middle\" x=\"126\" y=\"-101.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">___END__</text>\r\n",
"</g>\r\n",
"<!-- ___BEGIN__&#45;&gt;___END__ -->\r\n",
"<g id=\"edge2\" class=\"edge\">\r\n",
"<title>___BEGIN__&#45;&gt;___END__</title>\r\n",
"<path fill=\"none\" stroke=\"black\" d=\"M125.1,-260.88C125.27,-231 125.63,-169.11 125.84,-133.27\"/>\r\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"129.34,-133.07 125.9,-123.05 122.34,-133.03 129.34,-133.07\"/>\r\n",
"<text text-anchor=\"middle\" x=\"128.5\" y=\"-188.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">1</text>\r\n",
"</g>\r\n",
"<!-- a&#45;&gt;a -->\r\n",
"<g id=\"edge4\" class=\"edge\">\r\n",
"<title>a&#45;&gt;a</title>\r\n",
"<path fill=\"none\" stroke=\"black\" d=\"M78.53,-199.75C89.51,-200.49 99,-197.91 99,-192 99,-188.03 94.72,-185.56 88.57,-184.59\"/>\r\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"88.65,-181.09 78.53,-184.25 88.41,-188.09 88.65,-181.09\"/>\r\n",
"<text text-anchor=\"middle\" x=\"106\" y=\"-188.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">10</text>\r\n",
"</g>\r\n",
"<!-- b -->\r\n",
"<g id=\"node3\" class=\"node\">\r\n",
"<title>b</title>\r\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"27\" cy=\"-105\" rx=\"27\" ry=\"18\"/>\r\n",
"<text text-anchor=\"middle\" x=\"27\" y=\"-101.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">b</text>\r\n",
"</g>\r\n",
"<!-- a&#45;&gt;b -->\r\n",
"<g id=\"edge3\" class=\"edge\">\r\n",
"<title>a&#45;&gt;b</title>\r\n",
"<path fill=\"none\" stroke=\"black\" d=\"M38.94,-176.71C33.8,-170.84 28.68,-163.64 26,-156 23.5,-148.87 22.81,-140.81 23,-133.28\"/>\r\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"26.5,-133.36 23.74,-123.13 19.52,-132.85 26.5,-133.36\"/>\r\n",
"<text text-anchor=\"middle\" x=\"33\" y=\"-144.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">20</text>\r\n",
"</g>\r\n",
"<!-- a&#45;&gt;___END__ -->\r\n",
"<g id=\"edge5\" class=\"edge\">\r\n",
"<title>a&#45;&gt;___END__</title>\r\n",
"<path fill=\"none\" stroke=\"black\" d=\"M66.87,-175.8C77.59,-163.15 93.05,-144.9 105.47,-130.24\"/>\r\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"108.22,-132.41 112.01,-122.52 102.88,-127.88 108.22,-132.41\"/>\r\n",
"<text text-anchor=\"middle\" x=\"102\" y=\"-144.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">10</text>\r\n",
"</g>\r\n",
"<!-- b&#45;&gt;a -->\r\n",
"<g id=\"edge7\" class=\"edge\">\r\n",
"<title>b&#45;&gt;a</title>\r\n",
"<path fill=\"none\" stroke=\"black\" d=\"M33.5,-122.64C35.67,-128.41 38.04,-134.96 40,-141 42.45,-148.54 44.87,-156.81 46.99,-164.41\"/>\r\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"43.67,-165.5 49.68,-174.23 50.42,-163.66 43.67,-165.5\"/>\r\n",
"<text text-anchor=\"middle\" x=\"52\" y=\"-144.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">20</text>\r\n",
"</g>\r\n",
"<!-- c -->\r\n",
"<g id=\"node4\" class=\"node\">\r\n",
"<title>c</title>\r\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"27\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\r\n",
"<text text-anchor=\"middle\" x=\"27\" y=\"-14.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">c</text>\r\n",
"</g>\r\n",
"<!-- b&#45;&gt;c -->\r\n",
"<g id=\"edge6\" class=\"edge\">\r\n",
"<title>b&#45;&gt;c</title>\r\n",
"<path fill=\"none\" stroke=\"black\" d=\"M17.26,-87.83C14.32,-82.09 11.48,-75.45 10,-69 8.12,-60.83 9.75,-52.12 12.62,-44.3\"/>\r\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"15.91,-45.53 16.76,-34.97 9.51,-42.7 15.91,-45.53\"/>\r\n",
"<text text-anchor=\"middle\" x=\"17\" y=\"-57.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">40</text>\r\n",
"</g>\r\n",
"<!-- c&#45;&gt;b -->\r\n",
"<g id=\"edge8\" class=\"edge\">\r\n",
"<title>c&#45;&gt;b</title>\r\n",
"<path fill=\"none\" stroke=\"black\" d=\"M27,-36.18C27,-47.81 27,-63.42 27,-76.73\"/>\r\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"23.5,-76.8 27,-86.8 30.5,-76.8 23.5,-76.8\"/>\r\n",
"<text text-anchor=\"middle\" x=\"34\" y=\"-57.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">40</text>\r\n",
"</g>\r\n",
"</g>\r\n",
"</svg>\r\n"
],
"text/plain": [
"<graphviz.dot.Digraph at 0x1bfee7a07b8>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"a a\n",
"a b c b c b a\n",
"a b c b c b c b a b a\n",
"a\n",
"a\n",
"a\n",
"a b c b a a\n",
"a b c b a b a\n",
"-1 model:\n"
]
},
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\r\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\r\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\r\n",
"<!-- Generated by graphviz version 2.46.0 (20210118.1747)\r\n",
" -->\r\n",
"<!-- Pages: 1 -->\r\n",
"<svg width=\"133pt\" height=\"218pt\"\r\n",
" viewBox=\"0.00 0.00 132.79 218.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\r\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 214)\">\r\n",
"<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-214 128.79,-214 128.79,4 -4,4\"/>\r\n",
"<!-- ___BEGIN__ -->\r\n",
"<g id=\"node1\" class=\"node\">\r\n",
"<title>___BEGIN__</title>\r\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"62.39\" cy=\"-192\" rx=\"62.29\" ry=\"18\"/>\r\n",
"<text text-anchor=\"middle\" x=\"62.39\" y=\"-188.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">___BEGIN__</text>\r\n",
"</g>\r\n",
"<!-- a -->\r\n",
"<g id=\"node2\" class=\"node\">\r\n",
"<title>a</title>\r\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"62.39\" cy=\"-105\" rx=\"27\" ry=\"18\"/>\r\n",
"<text text-anchor=\"middle\" x=\"62.39\" y=\"-101.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">a</text>\r\n",
"</g>\r\n",
"<!-- ___BEGIN__&#45;&gt;a -->\r\n",
"<g id=\"edge1\" class=\"edge\">\r\n",
"<title>___BEGIN__&#45;&gt;a</title>\r\n",
"<path fill=\"none\" stroke=\"black\" d=\"M62.39,-173.8C62.39,-162.16 62.39,-146.55 62.39,-133.24\"/>\r\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"65.89,-133.18 62.39,-123.18 58.89,-133.18 65.89,-133.18\"/>\r\n",
"<text text-anchor=\"middle\" x=\"76.39\" y=\"-144.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">1000</text>\r\n",
"</g>\r\n",
"<!-- ___END__ -->\r\n",
"<g id=\"node3\" class=\"node\">\r\n",
"<title>___END__</title>\r\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"62.39\" cy=\"-18\" rx=\"53.09\" ry=\"18\"/>\r\n",
"<text text-anchor=\"middle\" x=\"62.39\" y=\"-14.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">___END__</text>\r\n",
"</g>\r\n",
"<!-- a&#45;&gt;___END__ -->\r\n",
"<g id=\"edge2\" class=\"edge\">\r\n",
"<title>a&#45;&gt;___END__</title>\r\n",
"<path fill=\"none\" stroke=\"black\" d=\"M62.39,-86.8C62.39,-75.16 62.39,-59.55 62.39,-46.24\"/>\r\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"65.89,-46.18 62.39,-36.18 58.89,-46.18 65.89,-46.18\"/>\r\n",
"<text text-anchor=\"middle\" x=\"76.39\" y=\"-57.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">1000</text>\r\n",
"</g>\r\n",
"</g>\r\n",
"</svg>\r\n"
],
"text/plain": [
"<graphviz.dot.Digraph at 0x1bfee7a07b8>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n",
"a\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEICAYAAACktLTqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAAZk0lEQVR4nO3dfbRddX3n8ffn3ksChECCucaYgAk1xEK1Jb1FLIpUWghUgem4LA6VQJmVOkNHrdNSKGuNdNYaH6ZVate0aqZQYhdFGBVhjbQlE0XmoYS5gQiBgAkBNDEkFyEQByRP3/nj/E5ycrPPufecc8/D3vvzWuuuu/dv73P2N/vc+8nv/vaTIgIzMyuWgV4XYGZmU8/hbmZWQA53M7MCcribmRWQw93MrIAc7mZmBeRwt74l6TOSPlEzP13SE5LmTfF2HpJ0epOvuUDSt6a4jndI+j/j2j4v6d9M5XasHBzu1pckDQNXAF+paV4BPBAR29M6t0q6cpLv96ykhXUW/znwH5ss8T8Bn615/0ldMCLpXEn3Zy2LiEeBXZI+MK62P5E0rcn6rOQc7tavrgTujYjXato+CvxdB7Z1D/Brkt40mZUl/QpwQkQ82IFabgN+rzqT/iN7Eri4A9uyAnO4W7+6EPhedUbSycApwNqslSX9nKTvSPqJpBck3SZp1mQ2FBE/A9YBF7RSW0YtV0naKGm3pC2Sfq/euhnuB86TNH1c22828R5mDnfrW28Hnho3vyUi9lUbIuLKiLg1zQr4DPBm4OeBk4Aba9ZdGBHPNtjeRuAXW6yNiFDN7E7g/cDxwFXATZKWpvXuj4hz671xRGwD9gJLWqzNDHC4W/+aBexuMH+YiNgcEasj4vWIGAO+ALy3ie3tTttopbbxtXw7Ip6Oiu8B9wHvaaOWZmozA2Co1wWY1fESMLPB/GEkzQW+SCVEZ1LpuLzUxPZmArtarG18LRcCnwJOTXUcCzzWRi3N1GYGuOdu/etRKuFYO79IUr0OyaeBAN4eEccDv0NlqGayfh74fou1HZTGyr9B5SyXuRExC7h3srVImg9M4/Bhn2ZqMwMc7ta/7qVmWCUitgKbgTPrrD8T+CnwcgrIP6r3xpKulPRszfzRwC8Dq2vaQtK5k6ltnGnAdGAM2Jd68ec3qOV+STfWNL0X+E5EvD6u7R/qvYdZFoe79auvAhdJOqam7SvAR+qs/6fAUuBl4NvANxu890nA/66Z/wBwf0T8GEDSSVTGuTOHUiLiYSr/ibwzY9lu4GPAnVSGb/4VlVMtJ1vL5cCXqzPpgq3TgG81eA+zI8gP67B+JenTwM6I+Is0Px14BDiveiFTi+97H/DxiNiY5tcCV0fEhjT/O8DpEXF9g/c4H/i3EXFpG3UsAO6MiF9N8+8AvhIR76pZ5/PA0xHx161ux8rJ4W5mVkAeljEzK6AJw13SLZJ2StpQ0/Znkp6U9Kiku2qvBJR0vaTNkp6SNNkr/szMbApNpud+K7BsXNtq4Bci4h3AD4DrASSdBlwGnJ5e89eSBqesWjMzm5QJL2KKiAfG300vIu6rmX0Q+GCavgT4WjqN6xlJ1VPX/rnRNubMmRMLFy5stIqZmY2zbt26FyJiOGvZVFyh+rvAHWl6PpWwr9qa2hpauHAho6OjU1CKmVl5SHqu3rK2DqhKugHYR+U2pc2+doWkUUmjY2Nj7ZRhZmbjtBzu6SEJ7wcuj0PnU26jclFG1YLUdoSIWBkRIxExMjyc+VeFmZm1qKVwl7QMuBa4OCJerVl0D3BZehzaImAx8FD7ZZqZWTMmHHOXdDtwLjBH0lYqd7u7nsr9M1ZLAngwIj4aEY9LuhN4gspwzTURsb9TxZuZWba+uEJ1ZGQkfEDVzKw5ktZFxEjWMl+hamZWQA53M7MCKsSTmDbt2M36H+1i3gnH8O7Fc3pdjplZzxUi3H/jpgcOTj/7WT8k3szMwzJmZgXkcDczKyCHu5lZATnczcwKyOFuZlZADnczswJyuJuZFZDD3cysgBzuZmYF5HA3Mysgh7uZWQE53M3MCsjhbmZWQA53M7MCcribmRWQw93MrIAc7mZmBeRwNzMrIIe7mVkBFS7cz7/pe+z+2d5el2Fm1lOFC/cf7PgpDz3zYq/LMDPrqcKFO8Ar7rmbWclNGO6SbpG0U9KGmrYTJa2WtCl9n53aJekvJW2W9KikpZ0s3szMsk2m534rsGxc23XAmohYDKxJ8wAXAovT1wrgS1NTZnMGpF5s1sysb0wY7hHxADB+EPsSYFWaXgVcWtP+1ah4EJglad4U1TppcribWcm1OuY+NyK2p+nngblpej7wo5r1tqa2I0haIWlU0ujY2FiLZWQbdLibWcm1fUA1IgKIFl63MiJGImJkeHi43TIOM+BsN7OSazXcd1SHW9L3nal9G3BSzXoLUltXvfjqnm5v0sysr7Qa7vcAy9P0cuDumvYr0lkzZwEv1wzfdM0Nd22YeCUzswIbmmgFSbcD5wJzJG0FPgV8FrhT0tXAc8CH0ur3AhcBm4FXgas6ULOZmU1gwnCPiA/XWXRexroBXNNuUWZm1p5CXqFqZlZ2DnczswJyuJuZFZDD3cysgBzuZmYF5HA3Mysgh7uZWQE53M3MCsjhbmZWQA53M7MCyn24V+54cLglc2f2oBIzs/5RgHA/ss3P6jCzsst/uGe1Nf3oEDOzYsl9uB/ISPJo/sFQZmaFkvtwz+qlH3C2m1nJ5T7cM3vuHpcxs5LLfbhncbabWdnlPtyzx9zNzMot9+GePebueDezcst9uGePufegEDOzPpL7cK/N8Y+c9RZOnXuce+5mVnr5D/cDh6bPOXWYt8+f5Z67mZVe/sO9pu8uKrce8KmQZlZ2uQ/38RcsTRsa4PV9B7JXNjMridyHe20vfWAA5syYxkuv7mG/L1M1sxJrK9wl/YGkxyVtkHS7pKMlLZK0VtJmSXdImjZVxWbZn8J9cECcs3iYY6cPcSDg9X37O7lZM7O+1nK4S5oPfAwYiYhfAAaBy4DPATdFxFuBl4Crp6LQej52+yMA/OnFpzM0OMBgut/vOz+9ppObNTPra+0OywwBx0gaAo4FtgPvA76elq8CLm1zGw09uOVF4NA93Kvfd/9sXyc3a2bW11oO94jYBvw58EMqof4ysA7YFRHVZN0KzM96vaQVkkYljY6NjbVaxkHffnQ7AAN+UoeZWVvDMrOBS4BFwJuBGcCyyb4+IlZGxEhEjAwPD7daxkGPbX0ZqIy912yj7fc1M8ujdoZlfh14JiLGImIv8E3gbGBWGqYBWABsa7PGSfnDC5YAUJPtPP7jV7qxaTOzvtNOuP8QOEvSsZIEnAc8AXwX+GBaZzlwd3slTs4ZJ88CQDXDMnv3+3x3Myundsbc11I5cPow8Fh6r5XAHwOflLQZeANw8xTUOaHqWHvtmLsHZcysrIYmXqW+iPgU8KlxzVuAM9t531ZUQ732eKrH3M2srHJ/hWpV9UDqnppbDzjbzaysChPu1R77YeHeo1rMzHqtMOFetWe/e+5mZsUL95qeux/aYWZlVZhwrx5Hre25O9zNrKwKE+5Ve31A1cyseOHunruZWRHD/bAx9x4WYmbWQ4UL9yvPXnhw2hcxmVlZFSbcB9JFTG970/F865qzAY+5m1l5FSbcT5kz4+B09c6QHnM3s7Jq694y/eD4o4f4raULDrsbZPU+Mx5zN7OyynXPPSJ45Wf7jhhbl3vuZlZyuQ73/7npBQBW/fNzh7UrXdLkA6pmVla5DveXX9ub2T6Q/lXOdjMrq1yH+1GD2Q/D9pi7mZVdrsN9aCC7fJ8tY2Zll+9wr9Nz18Geu8PdzMop3+Fep+dejXxnu5mVVa7DfSC7435wzD38LCYzK6lch/uM6ZVrsC56+5sOaz94QPXAES8xMyuFXId79aHYl/7S/MPafRGTmZVdrsO9qvbWA3DoJmLOdjMrq1yHe73wrka9e+5mVla5Dveq8cdVfRGTmZVdIcJ9vOpZND5bxszKqq1wlzRL0tclPSlpo6R3STpR0mpJm9L32VNV7Hj1wlvuuZtZybXbc/8i8I8R8TbgF4GNwHXAmohYDKxJ8x017njqoZ67x9zNrKRaDndJJwDnADcDRMSeiNgFXAKsSqutAi5tr8TmHTrP3eFuZuXUTs99ETAG/K2kRyT9jaQZwNyI2J7WeR6Ym/ViSSskjUoaHRsba6mAumfLHDzPvaW3NTPLvXbCfQhYCnwpIs4A/h/jhmCiMi6SGbERsTIiRiJiZHh4uI0yjhyW0cHbD5iZlVM74b4V2BoRa9P816mE/Q5J8wDS953tlVhfvfD2mLuZlV3L4R4RzwM/krQkNZ0HPAHcAyxPbcuBu9uqcBI07kz3Ad/y18xKbqjN1/874DZJ04AtwFVU/sO4U9LVwHPAh9rcRtN8EZOZlV1b4R4R64GRjEXntfO+TWw/s903DjOzsivGFapHHFCtfHe2m1lZ5Trc6x9Qrd4V0uluZuWU63Cv8o3DzMwOV4hwH2/AY+5mVnK5Dvf6V6i6525m5ZbrcK8a/ySmSpvH3M2svAoR7lkGJJ8tY2allfNwr5/eA/KYu5mVV87DveLIQZnKUI3H3M2srHId7o065gMeczezEst1uFdlHE9FyMMyZlZahQj3LJWee6+rMDPrjVyHe6PsHpDY73Q3s5LKdbhXjb+fe2p0z93MSivX4d4ovLPOoDEzK4tch3tV1gHVgQH5bBkzK61ChHsW4XvLmFl55TrcG/XMJREND7mamRVXrsO9KvMKVXxA1czKqxDhnqXSczczK6dch3uj8PYtf82szHId7gdln+buYRkzK61ch3vD89x9EZOZlViuw70q6wrVAZ8tY2YlVohwz+Lz3M2szNoOd0mDkh6R9N/T/CJJayVtlnSHpGntl5mtUc9cfsyemZXYVPTcPw5srJn/HHBTRLwVeAm4egq20VDm/dzVOPzNzIqsrXCXtAD4TeBv0ryA9wFfT6usAi5tZxut1+YDqmZWXu323P8CuBY4kObfAOyKiH1pfiswP+uFklZIGpU0OjY21trWG94V0jcOM7PyajncJb0f2BkR61p5fUSsjIiRiBgZHh5utYxKLRltP3zxVb61/scOeDMrpaE2Xns2cLGki4CjgeOBLwKzJA2l3vsCYFv7ZWabTGzv2X+A6UODnSrBzKwvtdxzj4jrI2JBRCwELgO+ExGXA98FPphWWw7c3XaVE1DWEdWDdXZ662Zm/acT57n/MfBJSZupjMHf3IFtTNp+n+xuZiXUzrDMQRFxP3B/mt4CnDkV7zvxdidexw/JNrMyKsQVqg1GZYgD9ZeZmRVVrsN9MhcpueduZmWU63CvatBx95i7mZVSIcK9EZ/nbmZllOtw9wFVM7NsuQ73qkYHVD0sY2ZlVIhwb8QddzMro1yH+2Ry2z13MyujXIf7IfXHZTzmbmZllOtwn8yZMD5bxszKKNfhXpV1QPXaZUsA2O8rVM2shAoR7llOmTMD8Ji7mZVTrsO9UWwPpO78AQ/LmFkJ5Trcq7IOpzrczazMChHuWQYHKuHuYRkzK6N8h3uD3B4YqPbcu1SLmVkfyXe4J1mP2UvZ7mEZMyulXId7o/u5D8rDMmZWXrkO96rMA6oDPqBqZuVViHDPUg38tVte7GkdZma9kOtwb9Qpf23vfgC+uGZTl6oxM+sfuQ73qqzbD3g0xszKLNfh3ijA9/qmMmZWYrkO9yplHFL1WTJmVmaFCPcsex3uZlZiLYe7pJMkfVfSE5Iel/Tx1H6ipNWSNqXvs6eu3MM1iu/3njoMwJmLTuzU5s3M+lY7Pfd9wL+PiNOAs4BrJJ0GXAesiYjFwJo031FZB1RPOOYo3nzC0Zx84rGd3ryZWd9pOdwjYntEPJymdwMbgfnAJcCqtNoq4NI2a2zZ4KA89m5mpTQlY+6SFgJnAGuBuRGxPS16Hpg7FdvIMtEj9AblcDezcmo73CUdB3wD+EREvFK7LCrpm5muklZIGpU0OjY21m4ZmQYG5Adkm1kptRXuko6iEuy3RcQ3U/MOSfPS8nnAzqzXRsTKiBiJiJHh4eGWtj9RbA9IfkC2mZVSO2fLCLgZ2BgRX6hZdA+wPE0vB+5uvbzJ1pLd7mEZMyuroTZeezbwEeAxSetT258AnwXulHQ18BzwobYqbMPAgNizz1eqmln5tBzuEfG/yL7bLsB5rb5vczU0Xr5x+yts3A6v7dnPMdMGu1GSmVlfKMQVqlm3H6i167U9XarEzKw/5DzcJzee/vpeD82YWbnkPNwnZ4/vEGlmJVOIcK93tkyVe+5mVja5DvfJnsL++r79nS3EzKzP5Drcqybque/zue5mVjKFCPeJHHC4m1nJ5DrcJ4rsL12+FABnu5mVTa7Dvareee5vPH46gG8eZmalk+twnyizB9JgvIdlzKxsch3uVXVvHDZQWeCbh5lZ2RQi3Oup9tw9LGNmZZPrcF/ypuP4owuW8IYZ0zKXV3vuHpYxs7Jp55a/PffWN87krW+cWXf5wWEZ99zNrGRy3XOfSMp2nwppZqVT8HD3sIyZlVOhw706LPMHd67ntrXP9bgaM7PuKXS4V3vuEXDDXRt6XI2ZWfcUOtyHBie4o5iZWUEVOtxnHZN9iqSZWdEVOtz9UGwzK6tChzvAMUc54M2sfAof7h53N7MyKny479t/6Bz31/b4cXtmVg6FD/fX9h4K9G27Xu1hJWZm3VP4cK+19pkXe12CmVlXdCzcJS2T9JSkzZKu69R2mvHID3f1ugQzs67oSLhLGgT+CrgQOA34sKTTOrGtiVy7bAmnv/l4AF746eu9KMHMrOsUHbgdrqR3ATdGxAVp/nqAiPhM1vojIyMxOjo65XXUuuKWh3jomZ9w0uxjO7odM7Nm/PavnMS/fs8pLb1W0rqIGMla1qn7uc8HflQzvxV457iiVgArAE4++eQOlXHIlb/6Fo6b7nPezay/zDluekfet2cP64iIlcBKqPTcO729971tLu9729xOb8bMrC906oDqNuCkmvkFqc3MzLqgU+H+f4HFkhZJmgZcBtzToW2Zmdk4HRmWiYh9kn4f+CdgELglIh7vxLbMzOxIHRtzj4h7gXs79f5mZlZfqa5QNTMrC4e7mVkBOdzNzArI4W5mVkAduf1A00VIY8BzLb58DvDCFJYzVfq1Lujf2lxXc1xXc4pY11siYjhrQV+Eezskjda7t0Iv9Wtd0L+1ua7muK7mlK0uD8uYmRWQw93MrICKEO4re11AHf1aF/Rvba6rOa6rOaWqK/dj7mZmdqQi9NzNzGwch7uZWQHlOtx7+RBuSSdJ+q6kJyQ9Lunjqf1GSdskrU9fF9W85vpU61OSLuhgbc9KeixtfzS1nShptaRN6fvs1C5Jf5nqelTS0g7VtKRmn6yX9IqkT/Rif0m6RdJOSRtq2preP5KWp/U3SVreobr+TNKTadt3SZqV2hdKeq1mv3255jW/nD7/zal2daCupj+3qf59rVPXHTU1PStpfWrv5v6qlw3d/RmLiFx+UbmV8NPAKcA04PvAaV3c/jxgaZqeCfyAysPAbwT+MGP901KN04FFqfbBDtX2LDBnXNt/Bq5L09cBn0vTFwH/AAg4C1jbpc/ueeAtvdhfwDnAUmBDq/sHOBHYkr7PTtOzO1DX+cBQmv5cTV0La9cb9z4PpVqVar+wA3U19bl14vc1q65xyz8P/Ice7K962dDVn7E899zPBDZHxJaI2AN8DbikWxuPiO0R8XCa3g1spPLs2HouAb4WEa9HxDPAZir/hm65BFiVplcBl9a0fzUqHgRmSZrX4VrOA56OiEZXJXdsf0XEA8CLGdtrZv9cAKyOiBcj4iVgNbBsquuKiPsiYl+afZDKU83qSrUdHxEPRiUhvlrzb5myuhqo97lN+e9ro7pS7/tDwO2N3qND+6teNnT1ZyzP4Z71EO5G4doxkhYCZwBrU9Pvpz+vbqn+6UV36w3gPknrVHkQOcDciNiepp8Hqg+U7cV+vIzDf+l6vb+g+f3Ti/32u1R6eFWLJD0i6XuS3pPa5qdaulFXM59bt/fXe4AdEbGppq3r+2tcNnT1ZyzP4d4XJB0HfAP4RES8AnwJ+Dngl4DtVP407LZ3R8RS4ELgGknn1C5MPZSenAOrymMXLwb+W2rqh/11mF7un3ok3QDsA25LTduBkyPiDOCTwN9LOr6LJfXd5zbOhzm8A9H1/ZWRDQd142csz+He84dwSzqKyod3W0R8EyAidkTE/og4APxXDg0ldK3eiNiWvu8E7ko17KgOt6TvO7tdV3Ih8HBE7Eg19nx/Jc3un67VJ+lK4P3A5SkUSMMeP0nT66iMZ5+aaqgduulIXS18bt3cX0PAbwF31NTb1f2VlQ10+Wcsz+He04dwpzG9m4GNEfGFmvba8ep/AVSP5N8DXCZpuqRFwGIqB3Kmuq4ZkmZWp6kckNuQtl892r4cuLumrivSEfuzgJdr/nTshMN6VL3eXzWa3T//BJwvaXYakjg/tU0pScuAa4GLI+LVmvZhSYNp+hQq+2dLqu0VSWeln9Erav4tU1lXs59bN39ffx14MiIODrd0c3/Vywa6/TPWzlHhXn9ROcr8Ayr/C9/Q5W2/m8qfVY8C69PXRcDfAY+l9nuAeTWvuSHV+hRtHpFvUNcpVM5E+D7weHW/AG8A1gCbgP8BnJjaBfxVqusxYKSD+2wG8BPghJq2ru8vKv+5bAf2UhnHvLqV/UNlDHxz+rqqQ3VtpjLuWv0Z+3Ja91+mz3c98DDwgZr3GaEStk8D/4V0JfoU19X05zbVv69ZdaX2W4GPjlu3m/urXjZ09WfMtx8wMyugPA/LmJlZHQ53M7MCcribmRWQw93MrIAc7mZmBeRwNzMrIIe7mVkB/X+glk/+EApFrQAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"random.seed(5) # make deterministic\n",
"# shakespeare data from https://www.kaggle.com/kingburrito666/shakespeare-plays?select=alllines.txt\n",
"#with open(\"alllines.txt\", \"r\") as f:\n",
"# text = f.read()\n",
"text = ((\"a b c b c b a \"*2) + \"\\n\")*10\n",
"datas = [a.split() for a in text.split(\"\\n\")]\n",
"# USE runExperiment INSTEAD OF runAndVisualizeExperiment WHEN YOU DO shakespeare,\n",
"# OTHERWISE DOT WILL CRASH YOUR COMPUTER BECAUSE THE FIRST BIGRAM MODEL IS 80,000 NODES,\n",
"# ALSO THERE WILL BE TOO MANY EDGES AND NOTEBOOK WILL GET VERY LARGE\n",
"# models = runExperiment(initialDataset=datas, numSteps=10000, contextSize=1, maxSampleLen=100, minTotalGeneratedTokens=10000, debug=True)\n",
"# runAndVisualize is safe for toy stuff\n",
"models = runAndVisualizeExperiment(initialDataset=datas, printRange=[0, -1], numSteps=2000, contextSize=1, maxSampleLen=100, minTotalGeneratedTokens=1000)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "aboriginal-pressure",
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\r\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\r\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\r\n",
"<!-- Generated by graphviz version 2.46.0 (20210118.1747)\r\n",
" -->\r\n",
"<!-- Pages: 1 -->\r\n",
"<svg width=\"186pt\" height=\"218pt\"\r\n",
" viewBox=\"0.00 0.00 186.30 218.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\r\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 214)\">\r\n",
"<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-214 182.3,-214 182.3,4 -4,4\"/>\r\n",
"<!-- ___BEGIN__ -->\r\n",
"<g id=\"node1\" class=\"node\">\r\n",
"<title>___BEGIN__</title>\r\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"71\" cy=\"-192\" rx=\"62.29\" ry=\"18\"/>\r\n",
"<text text-anchor=\"middle\" x=\"71\" y=\"-188.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">___BEGIN__</text>\r\n",
"</g>\r\n",
"<!-- a -->\r\n",
"<g id=\"node2\" class=\"node\">\r\n",
"<title>a</title>\r\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"71\" cy=\"-105\" rx=\"27\" ry=\"18\"/>\r\n",
"<text text-anchor=\"middle\" x=\"71\" y=\"-101.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">a</text>\r\n",
"</g>\r\n",
"<!-- ___BEGIN__&#45;&gt;a -->\r\n",
"<g id=\"edge1\" class=\"edge\">\r\n",
"<title>___BEGIN__&#45;&gt;a</title>\r\n",
"<path fill=\"none\" stroke=\"black\" d=\"M71,-173.8C71,-162.16 71,-146.55 71,-133.24\"/>\r\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"74.5,-133.18 71,-123.18 67.5,-133.18 74.5,-133.18\"/>\r\n",
"<text text-anchor=\"middle\" x=\"81.5\" y=\"-144.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">497</text>\r\n",
"</g>\r\n",
"<!-- b -->\r\n",
"<g id=\"node3\" class=\"node\">\r\n",
"<title>b</title>\r\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"27\" cy=\"-18\" rx=\"27\" ry=\"18\"/>\r\n",
"<text text-anchor=\"middle\" x=\"27\" y=\"-14.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">b</text>\r\n",
"</g>\r\n",
"<!-- a&#45;&gt;b -->\r\n",
"<g id=\"edge2\" class=\"edge\">\r\n",
"<title>a&#45;&gt;b</title>\r\n",
"<path fill=\"none\" stroke=\"black\" d=\"M50.43,-92.74C41.86,-86.88 32.81,-78.86 28,-69 24.6,-62.03 23.45,-53.83 23.4,-46.11\"/>\r\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"26.9,-46.2 23.89,-36.04 19.9,-45.86 26.9,-46.2\"/>\r\n",
"<text text-anchor=\"middle\" x=\"38.5\" y=\"-57.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">253</text>\r\n",
"</g>\r\n",
"<!-- ___END__ -->\r\n",
"<g id=\"node4\" class=\"node\">\r\n",
"<title>___END__</title>\r\n",
"<ellipse fill=\"none\" stroke=\"black\" cx=\"125\" cy=\"-18\" rx=\"53.09\" ry=\"18\"/>\r\n",
"<text text-anchor=\"middle\" x=\"125\" y=\"-14.3\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">___END__</text>\r\n",
"</g>\r\n",
"<!-- a&#45;&gt;___END__ -->\r\n",
"<g id=\"edge3\" class=\"edge\">\r\n",
"<title>a&#45;&gt;___END__</title>\r\n",
"<path fill=\"none\" stroke=\"black\" d=\"M81.16,-88.01C88.93,-75.78 99.79,-58.68 108.76,-44.56\"/>\r\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"111.93,-46.11 114.33,-35.79 106.02,-42.35 111.93,-46.11\"/>\r\n",
"<text text-anchor=\"middle\" x=\"111.5\" y=\"-57.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">497</text>\r\n",
"</g>\r\n",
"<!-- b&#45;&gt;a -->\r\n",
"<g id=\"edge4\" class=\"edge\">\r\n",
"<title>b&#45;&gt;a</title>\r\n",
"<path fill=\"none\" stroke=\"black\" d=\"M37.58,-34.74C41.38,-40.66 45.57,-47.53 49,-54 53.06,-61.66 57.03,-70.22 60.45,-78.07\"/>\r\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"57.33,-79.67 64.46,-87.5 63.77,-76.93 57.33,-79.67\"/>\r\n",
"<text text-anchor=\"middle\" x=\"66.5\" y=\"-57.8\" font-family=\"Times New Roman,serif\" font-size=\"14.00\">253</text>\r\n",
"</g>\r\n",
"</g>\r\n",
"</svg>\r\n"
],
"text/plain": [
"<graphviz.dot.Digraph at 0x1bfee128198>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# how to visualize any model in the history\n",
"# (warning: don't visualize early models when using large datasets,\n",
"# examine len of the model first and make sure it's not more than a few thousand)\n",
"display(modelToDot(models[600]))"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "black-lesson",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'\"By being miss\\'d, I will not wish thee apart Cousin of duty,\"'"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# how to generate text from any model in the history\n",
"\" \".join(list(models[-1].gen()))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "filled-western",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment