wupeixian/Final Assignment.ipynb

## Final Assignment.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4ce0fba5",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer\n",
    "tokenizer = AutoTokenizer.from_pretrained('distilgpt2')\n",
    "model = AutoModelForCausalLM.from_pretrained('distilgpt2')\n",
    "generator = pipeline('text-generation', model=model, tokenizer=tokenizer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "0572d1c2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "import markovify\n",
    "import spacy\n",
    "import re\n",
    "import tracery\n",
    "import textwrap"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "997f8fb1",
   "metadata": {},
   "source": [
    "# importing the text from \"The library of the Babel\" and some body related poems"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "f948e219",
   "metadata": {},
   "outputs": [],
   "source": [
    "text_arch = open(\"thelibraryofthebabel.txt\").read()\n",
    "text_body = open(\"Body.txt\").read()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "ed4ab878",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " The content was also deciphered: some notions of\n",
      "combinative analysis, illustrated with examples of variations with unlimited\n",
      "repetition\n"
     ]
    }
   ],
   "source": [
    "sentences_arch = text_arch.split('.')\n",
    "print(random.choice(sentences_arch))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "0b313455",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "much with her? and has she been much\n"
     ]
    }
   ],
   "source": [
    "sentences_body = text_body.split(\"\\n\")\n",
    "print(random.choice(sentences_body))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "440fbfc2",
   "metadata": {},
   "source": [
    "# using space the categorize the words used in architecture text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "c672ce4a",
   "metadata": {},
   "outputs": [],
   "source": [
    "nlp_arch = spacy.load('en_core_web_md')\n",
    "doc_arch = nlp_arch(text_arch)\n",
    "sentences_arch = list(doc_arch.sents)\n",
    "\n",
    "words_arch = [w for w in list(doc_arch) if w.is_alpha]\n",
    "noun_chunks_arch = list(doc_arch.noun_chunks)\n",
    "entities_arch = list(doc_arch.ents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "cea45565",
   "metadata": {},
   "outputs": [],
   "source": [
    "nouns_arch = [w for w in words_arch if w.pos_ == \"NOUN\"]\n",
    "verbs_arch = [w for w in words_arch if w.pos_ == \"VERB\"]\n",
    "adjs_arch = [w for w in words_arch if w.pos_ == \"ADJ\"]\n",
    "advs_arch = [w for w in words_arch if w.pos_ == \"ADV\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "51e39f53",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "exception"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "random.choice(nouns_arch)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "95f49204",
   "metadata": {},
   "source": [
    "# using spacy to catergorize the words used in Architectural texts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "fe1fa986",
   "metadata": {},
   "outputs": [],
   "source": [
    "nlp_arch = spacy.load('en_core_web_md')\n",
    "doc_arch = nlp_arch(text_arch)\n",
    "sentences_arch = list(doc_arch.sents)\n",
    "\n",
    "words_arch = [w for w in list(doc_arch) if w.is_alpha]\n",
    "noun_chunks_arch = list(doc_arch.noun_chunks)\n",
    "entities_arch = list(doc_arch.ents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "fc8bae46",
   "metadata": {},
   "outputs": [],
   "source": [
    "nouns_arch = [w for w in words_arch if w.pos_ == \"NOUN\"]\n",
    "verbs_arch = [w for w in words_arch if w.pos_ == \"VERB\"]\n",
    "adjs_arch = [w for w in words_arch if w.pos_ == \"ADJ\"]\n",
    "advs_arch = [w for w in words_arch if w.pos_ == \"ADV\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "00f23bcc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "task"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "random.choice(nouns_arch)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "df24f56f",
   "metadata": {},
   "source": [
    "# using markov model to generate the mix of two texts style"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "99398d5c",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"body or another person's body,\""
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "random.choice(sentences_body)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "0b0be2c6",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "generator_arch = markovify.Text(text_arch)\n",
    "generator_body = markovify.Text(text_body)\n",
    "length_arch = 500\n",
    "length_body = 80\n",
    "length_combo = 50\n",
    "weight_arch = 0.3\n",
    "weight_body = 0.7\n",
    "combo = markovify.combine([generator_arch, generator_body], [weight_arch, weight_body])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "c2a41255",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "For a century they have exhausted the hexagons ...\n"
     ]
    }
   ],
   "source": [
    "print(combo.make_short_sentence(length_combo))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dfefe740",
   "metadata": {},
   "source": [
    "# get some markov sentences from the body poems"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "fccf270c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The man's body at auction!\n"
     ]
    }
   ],
   "source": [
    "print(generator_body.make_short_sentence(length_body))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "d8d66ebe",
   "metadata": {},
   "outputs": [],
   "source": [
    "mk_body = []\n",
    "for line in range(4):\n",
    "    mk_body.append(generator_body.make_short_sentence(length_body))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "f9ad6f74",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Have you ever loved the body of man is calming and ex- cellent to the mothers.', 'Whatever the bids of the parts of you!', \"A man's body is sacred�it is no matter who, Is it one of the parts of you!\", 'Is it one of the eye, eye-brows, and the outlet again.']\n"
     ]
    }
   ],
   "source": [
    "print(mk_body)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5ea783d2",
   "metadata": {},
   "source": [
    "# get some markov sentences from the architecture text and store the nouns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "09df5dd4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "To the left and right of the same series may have examined and read it.\n"
     ]
    }
   ],
   "source": [
    "mk_arch = generator_arch.make_short_sentence(length_arch)\n",
    "print(mk_arch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "c15c9f07",
   "metadata": {},
   "outputs": [],
   "source": [
    "nlp_mk_arch = spacy.load('en_core_web_md')\n",
    "doc_mk_arch = nlp_mk_arch(mk_arch)\n",
    "sentences_mk_arch = list(doc_arch.sents)\n",
    "\n",
    "words_mk_arch = [w for w in list(doc_mk_arch) if w.is_alpha]\n",
    "noun_chunks_mk_arch = list(doc_mk_arch.noun_chunks)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "0bc89440",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "the same series"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "random.choice(noun_chunks_mk_arch)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8bed82fa",
   "metadata": {},
   "source": [
    "# replace the nouns in markov body sentences with nouns from architecture"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "id": "6ab275d6",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[Have, you, ever, loved, the, body, of, man, is, calming, and, cellent, to, the, mothers]\n"
     ]
    }
   ],
   "source": [
    "nlp_line = spacy.load('en_core_web_md')\n",
    "doc_line = nlp_line(mk_body[0])\n",
    "\n",
    "words_line = [w for w in list(doc_line) if w.is_alpha]\n",
    "noun_chunks_line = list(doc_line.noun_chunks)\n",
    "\n",
    "print(words_line)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "id": "37aa91f3",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[you, the body, man, the mothers]\n"
     ]
    }
   ],
   "source": [
    "print(noun_chunks_line)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "id": "1a3892fc",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Have',\n",
       " 'you',\n",
       " 'ever',\n",
       " 'loved',\n",
       " 'the',\n",
       " 'it',\n",
       " 'of',\n",
       " 'it',\n",
       " 'is',\n",
       " 'calming',\n",
       " 'and',\n",
       " 'cellent',\n",
       " 'to',\n",
       " 'the',\n",
       " 'the left']"
      ]
     },
     "execution_count": 164,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "for i in range(len(words_line)):\n",
    "    if words_line[i].pos_ == \"NOUN\":\n",
    "        words_line[i] = str(random.choice(noun_chunks_mk_arch))\n",
    "    words_line[i] = str(words_line[i])\n",
    "words_line"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "id": "6e5db93f",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Have you ever loved the it of it is calming and cellent to the the\n",
      "left\n"
     ]
    }
   ],
   "source": [
    "print(textwrap.fill(\" \".join(words_line)))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "68f73d92",
   "metadata": {},
   "source": [
    "# Generate a stanza repeating the process above"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "id": "1a91b26a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "How do you know who shall be stript that you and he might touch each\n",
      "other\n",
      "And if the the next-to-last page only but of the the senseless\n",
      "perdition of These phrases\n",
      "How do you know so much that you may see them\n",
      "And if the one time of a well made hope appears not only one the wind\n",
      "this is the The original manuscript\n"
     ]
    }
   ],
   "source": [
    "mk_body = []\n",
    "for line in range(4):\n",
    "    mk_body.append(generator_body.make_short_sentence(length_body))\n",
    "    \n",
    "mk_arch = generator_arch.make_short_sentence(length_arch)\n",
    "nlp_mk_arch = spacy.load('en_core_web_md')\n",
    "doc_mk_arch = nlp_mk_arch(mk_arch)\n",
    "sentences_mk_arch = list(doc_arch.sents)\n",
    "\n",
    "words_mk_arch = [w for w in list(doc_mk_arch) if w.is_alpha]\n",
    "noun_chunks_mk_arch = list(doc_mk_arch.noun_chunks)\n",
    "\n",
    "for line in range(4):\n",
    "    nlp_line = spacy.load('en_core_web_md')\n",
    "    doc_line = nlp_line(mk_body[line])\n",
    "\n",
    "    words_line = [w for w in list(doc_line) if w.is_alpha]\n",
    "    noun_chunks_line = list(doc_line.noun_chunks)\n",
    "\n",
    "    for i in range(len(words_line)):\n",
    "        if words_line[i].pos_ == \"NOUN\":\n",
    "            words_line[i] = str(random.choice(noun_chunks_arch))\n",
    "        words_line[i] = str(words_line[i])\n",
    "    \n",
    "    print(textwrap.fill(\" \".join(words_line)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 187,
   "id": "0764841c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The a few miles is a a delirious divinity\n",
      "Within there runs the one the same old beautiful total\n",
      "The prophecy these canonical books at they\n",
      "In them and of them that pleases the inalterable MCV well\n"
     ]
    }
   ],
   "source": [
    "mk_body = []\n",
    "for line in range(4):\n",
    "    mk_body.append(generator_body.make_short_sentence(length_body))\n",
    "    \n",
    "mk_arch = combo.make_short_sentence(100)\n",
    "nlp_mk_arch = spacy.load('en_core_web_md')\n",
    "doc_mk_arch = nlp_mk_arch(mk_arch)\n",
    "sentences_mk_arch = list(doc_arch.sents)\n",
    "\n",
    "words_mk_arch = [w for w in list(doc_mk_arch) if w.is_alpha]\n",
    "noun_chunks_mk_arch = list(doc_mk_arch.noun_chunks)\n",
    "\n",
    "for line in range(4):\n",
    "    nlp_line = spacy.load('en_core_web_md')\n",
    "    doc_line = nlp_line(mk_body[line])\n",
    "\n",
    "    words_line = [w for w in list(doc_line) if w.is_alpha]\n",
    "    noun_chunks_line = list(doc_line.noun_chunks)\n",
    "\n",
    "    for i in range(len(words_line)):\n",
    "        if words_line[i].pos_ == \"NOUN\":\n",
    "            words_line[i] = str(random.choice(noun_chunks_arch))\n",
    "        words_line[i] = str(words_line[i])\n",
    "    \n",
    "    print(textwrap.fill(\" \".join(words_line)))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 2,
	"id": "4ce0fba5",
	"metadata": {},
	"outputs": [],
	"source": [
	"from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer\n",
	"tokenizer = AutoTokenizer.from_pretrained('distilgpt2')\n",
	"model = AutoModelForCausalLM.from_pretrained('distilgpt2')\n",
	"generator = pipeline('text-generation', model=model, tokenizer=tokenizer)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"id": "0572d1c2",
	"metadata": {},
	"outputs": [],
	"source": [
	"import random\n",
	"import markovify\n",
	"import spacy\n",
	"import re\n",
	"import tracery\n",
	"import textwrap"
	]
	},
	{
	"cell_type": "markdown",
	"id": "997f8fb1",
	"metadata": {},
	"source": [
	"# importing the text from \"The library of the Babel\" and some body related poems"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"id": "f948e219",
	"metadata": {},
	"outputs": [],
	"source": [
	"text_arch = open(\"thelibraryofthebabel.txt\").read()\n",
	"text_body = open(\"Body.txt\").read()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"id": "ed4ab878",
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	" The content was also deciphered: some notions of\n",
	"combinative analysis, illustrated with examples of variations with unlimited\n",
	"repetition\n"
	]
	}
	],
	"source": [
	"sentences_arch = text_arch.split('.')\n",
	"print(random.choice(sentences_arch))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"id": "0b313455",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"much with her? and has she been much\n"
	]
	}
	],
	"source": [
	"sentences_body = text_body.split(\"\\n\")\n",
	"print(random.choice(sentences_body))"
	]
	},
	{
	"cell_type": "markdown",
	"id": "440fbfc2",
	"metadata": {},
	"source": [
	"# using space the categorize the words used in architecture text"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"id": "c672ce4a",
	"metadata": {},
	"outputs": [],
	"source": [
	"nlp_arch = spacy.load('en_core_web_md')\n",
	"doc_arch = nlp_arch(text_arch)\n",
	"sentences_arch = list(doc_arch.sents)\n",
	"\n",
	"words_arch = [w for w in list(doc_arch) if w.is_alpha]\n",
	"noun_chunks_arch = list(doc_arch.noun_chunks)\n",
	"entities_arch = list(doc_arch.ents)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"id": "cea45565",
	"metadata": {},
	"outputs": [],
	"source": [
	"nouns_arch = [w for w in words_arch if w.pos_ == \"NOUN\"]\n",
	"verbs_arch = [w for w in words_arch if w.pos_ == \"VERB\"]\n",
	"adjs_arch = [w for w in words_arch if w.pos_ == \"ADJ\"]\n",
	"advs_arch = [w for w in words_arch if w.pos_ == \"ADV\"]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"id": "51e39f53",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"exception"
	]
	},
	"execution_count": 13,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"random.choice(nouns_arch)"
	]
	},
	{
	"cell_type": "markdown",
	"id": "95f49204",
	"metadata": {},
	"source": [
	"# using spacy to catergorize the words used in Architectural texts"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"id": "fe1fa986",
	"metadata": {},
	"outputs": [],
	"source": [
	"nlp_arch = spacy.load('en_core_web_md')\n",
	"doc_arch = nlp_arch(text_arch)\n",
	"sentences_arch = list(doc_arch.sents)\n",
	"\n",
	"words_arch = [w for w in list(doc_arch) if w.is_alpha]\n",
	"noun_chunks_arch = list(doc_arch.noun_chunks)\n",
	"entities_arch = list(doc_arch.ents)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"id": "fc8bae46",
	"metadata": {},
	"outputs": [],
	"source": [
	"nouns_arch = [w for w in words_arch if w.pos_ == \"NOUN\"]\n",
	"verbs_arch = [w for w in words_arch if w.pos_ == \"VERB\"]\n",
	"adjs_arch = [w for w in words_arch if w.pos_ == \"ADJ\"]\n",
	"advs_arch = [w for w in words_arch if w.pos_ == \"ADV\"]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"id": "00f23bcc",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"task"
	]
	},
	"execution_count": 16,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"random.choice(nouns_arch)"
	]
	},
	{
	"cell_type": "markdown",
	"id": "df24f56f",
	"metadata": {},
	"source": [
	"# using markov model to generate the mix of two texts style"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"id": "99398d5c",
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"\"body or another person's body,\""
	]
	},
	"execution_count": 17,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"random.choice(sentences_body)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"id": "0b0be2c6",
	"metadata": {
	"scrolled": true
	},
	"outputs": [],
	"source": [
	"generator_arch = markovify.Text(text_arch)\n",
	"generator_body = markovify.Text(text_body)\n",
	"length_arch = 500\n",
	"length_body = 80\n",
	"length_combo = 50\n",
	"weight_arch = 0.3\n",
	"weight_body = 0.7\n",
	"combo = markovify.combine([generator_arch, generator_body], [weight_arch, weight_body])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"id": "c2a41255",
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"For a century they have exhausted the hexagons ...\n"
	]
	}
	],
	"source": [
	"print(combo.make_short_sentence(length_combo))"
	]
	},
	{
	"cell_type": "markdown",
	"id": "dfefe740",
	"metadata": {},
	"source": [
	"# get some markov sentences from the body poems"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"id": "fccf270c",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"The man's body at auction!\n"
	]
	}
	],
	"source": [
	"print(generator_body.make_short_sentence(length_body))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 21,
	"id": "d8d66ebe",
	"metadata": {},
	"outputs": [],
	"source": [
	"mk_body = []\n",
	"for line in range(4):\n",
	" mk_body.append(generator_body.make_short_sentence(length_body))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"id": "f9ad6f74",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"['Have you ever loved the body of man is calming and ex- cellent to the mothers.', 'Whatever the bids of the parts of you!', \"A man's body is sacred�it is no matter who, Is it one of the parts of you!\", 'Is it one of the eye, eye-brows, and the outlet again.']\n"
	]
	}
	],
	"source": [
	"print(mk_body)"
	]
	},
	{
	"cell_type": "markdown",
	"id": "5ea783d2",
	"metadata": {},
	"source": [
	"# get some markov sentences from the architecture text and store the nouns"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"id": "09df5dd4",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"To the left and right of the same series may have examined and read it.\n"
	]
	}
	],
	"source": [
	"mk_arch = generator_arch.make_short_sentence(length_arch)\n",
	"print(mk_arch)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"id": "c15c9f07",
	"metadata": {},
	"outputs": [],
	"source": [
	"nlp_mk_arch = spacy.load('en_core_web_md')\n",
	"doc_mk_arch = nlp_mk_arch(mk_arch)\n",
	"sentences_mk_arch = list(doc_arch.sents)\n",
	"\n",
	"words_mk_arch = [w for w in list(doc_mk_arch) if w.is_alpha]\n",
	"noun_chunks_mk_arch = list(doc_mk_arch.noun_chunks)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 39,
	"id": "0bc89440",
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"the same series"
	]
	},
	"execution_count": 39,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"random.choice(noun_chunks_mk_arch)"
	]
	},
	{
	"cell_type": "markdown",
	"id": "8bed82fa",
	"metadata": {},
	"source": [
	"# replace the nouns in markov body sentences with nouns from architecture"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 162,
	"id": "6ab275d6",
	"metadata": {
	"scrolled": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[Have, you, ever, loved, the, body, of, man, is, calming, and, cellent, to, the, mothers]\n"
	]
	}
	],
	"source": [
	"nlp_line = spacy.load('en_core_web_md')\n",
	"doc_line = nlp_line(mk_body[0])\n",
	"\n",
	"words_line = [w for w in list(doc_line) if w.is_alpha]\n",
	"noun_chunks_line = list(doc_line.noun_chunks)\n",
	"\n",
	"print(words_line)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 163,
	"id": "37aa91f3",
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[you, the body, man, the mothers]\n"
	]
	}
	],
	"source": [
	"print(noun_chunks_line)\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 164,
	"id": "1a3892fc",
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"['Have',\n",
	" 'you',\n",
	" 'ever',\n",
	" 'loved',\n",
	" 'the',\n",
	" 'it',\n",
	" 'of',\n",
	" 'it',\n",
	" 'is',\n",
	" 'calming',\n",
	" 'and',\n",
	" 'cellent',\n",
	" 'to',\n",
	" 'the',\n",
	" 'the left']"
	]
	},
	"execution_count": 164,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"for i in range(len(words_line)):\n",
	" if words_line[i].pos_ == \"NOUN\":\n",
	" words_line[i] = str(random.choice(noun_chunks_mk_arch))\n",
	" words_line[i] = str(words_line[i])\n",
	"words_line"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 165,
	"id": "6e5db93f",
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Have you ever loved the it of it is calming and cellent to the the\n",
	"left\n"
	]
	}
	],
	"source": [
	"print(textwrap.fill(\" \".join(words_line)))"
	]
	},
	{
	"cell_type": "markdown",
	"id": "68f73d92",
	"metadata": {},
	"source": [
	"# Generate a stanza repeating the process above"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 173,
	"id": "1a91b26a",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"How do you know who shall be stript that you and he might touch each\n",
	"other\n",
	"And if the the next-to-last page only but of the the senseless\n",
	"perdition of These phrases\n",
	"How do you know so much that you may see them\n",
	"And if the one time of a well made hope appears not only one the wind\n",
	"this is the The original manuscript\n"
	]
	}
	],
	"source": [
	"mk_body = []\n",
	"for line in range(4):\n",
	" mk_body.append(generator_body.make_short_sentence(length_body))\n",
	" \n",
	"mk_arch = generator_arch.make_short_sentence(length_arch)\n",
	"nlp_mk_arch = spacy.load('en_core_web_md')\n",
	"doc_mk_arch = nlp_mk_arch(mk_arch)\n",
	"sentences_mk_arch = list(doc_arch.sents)\n",
	"\n",
	"words_mk_arch = [w for w in list(doc_mk_arch) if w.is_alpha]\n",
	"noun_chunks_mk_arch = list(doc_mk_arch.noun_chunks)\n",
	"\n",
	"for line in range(4):\n",
	" nlp_line = spacy.load('en_core_web_md')\n",
	" doc_line = nlp_line(mk_body[line])\n",
	"\n",
	" words_line = [w for w in list(doc_line) if w.is_alpha]\n",
	" noun_chunks_line = list(doc_line.noun_chunks)\n",
	"\n",
	" for i in range(len(words_line)):\n",
	" if words_line[i].pos_ == \"NOUN\":\n",
	" words_line[i] = str(random.choice(noun_chunks_arch))\n",
	" words_line[i] = str(words_line[i])\n",
	" \n",
	" print(textwrap.fill(\" \".join(words_line)))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 187,
	"id": "0764841c",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"The a few miles is a a delirious divinity\n",
	"Within there runs the one the same old beautiful total\n",
	"The prophecy these canonical books at they\n",
	"In them and of them that pleases the inalterable MCV well\n"
	]
	}
	],
	"source": [
	"mk_body = []\n",
	"for line in range(4):\n",
	" mk_body.append(generator_body.make_short_sentence(length_body))\n",
	" \n",
	"mk_arch = combo.make_short_sentence(100)\n",
	"nlp_mk_arch = spacy.load('en_core_web_md')\n",
	"doc_mk_arch = nlp_mk_arch(mk_arch)\n",
	"sentences_mk_arch = list(doc_arch.sents)\n",
	"\n",
	"words_mk_arch = [w for w in list(doc_mk_arch) if w.is_alpha]\n",
	"noun_chunks_mk_arch = list(doc_mk_arch.noun_chunks)\n",
	"\n",
	"for line in range(4):\n",
	" nlp_line = spacy.load('en_core_web_md')\n",
	" doc_line = nlp_line(mk_body[line])\n",
	"\n",
	" words_line = [w for w in list(doc_line) if w.is_alpha]\n",
	" noun_chunks_line = list(doc_line.noun_chunks)\n",
	"\n",
	" for i in range(len(words_line)):\n",
	" if words_line[i].pos_ == \"NOUN\":\n",
	" words_line[i] = str(random.choice(noun_chunks_arch))\n",
	" words_line[i] = str(words_line[i])\n",
	" \n",
	" print(textwrap.fill(\" \".join(words_line)))"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3 (ipykernel)",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.9.13"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}