Created
May 3, 2023 20:54
-
-
Save wupeixian/117f91652e7ba49d1dcc3bec475f2740 to your computer and use it in GitHub Desktop.
Final Assignment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "4ce0fba5", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer\n", | |
"tokenizer = AutoTokenizer.from_pretrained('distilgpt2')\n", | |
"model = AutoModelForCausalLM.from_pretrained('distilgpt2')\n", | |
"generator = pipeline('text-generation', model=model, tokenizer=tokenizer)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "0572d1c2", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import random\n", | |
"import markovify\n", | |
"import spacy\n", | |
"import re\n", | |
"import tracery\n", | |
"import textwrap" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "997f8fb1", | |
"metadata": {}, | |
"source": [ | |
"# importing the text from \"The library of the Babel\" and some body related poems" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "f948e219", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"text_arch = open(\"thelibraryofthebabel.txt\").read()\n", | |
"text_body = open(\"Body.txt\").read()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "ed4ab878", | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" The content was also deciphered: some notions of\n", | |
"combinative analysis, illustrated with examples of variations with unlimited\n", | |
"repetition\n" | |
] | |
} | |
], | |
"source": [ | |
"sentences_arch = text_arch.split('.')\n", | |
"print(random.choice(sentences_arch))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "0b313455", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"much with her? and has she been much\n" | |
] | |
} | |
], | |
"source": [ | |
"sentences_body = text_body.split(\"\\n\")\n", | |
"print(random.choice(sentences_body))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "440fbfc2", | |
"metadata": {}, | |
"source": [ | |
"# using space the categorize the words used in architecture text" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "c672ce4a", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"nlp_arch = spacy.load('en_core_web_md')\n", | |
"doc_arch = nlp_arch(text_arch)\n", | |
"sentences_arch = list(doc_arch.sents)\n", | |
"\n", | |
"words_arch = [w for w in list(doc_arch) if w.is_alpha]\n", | |
"noun_chunks_arch = list(doc_arch.noun_chunks)\n", | |
"entities_arch = list(doc_arch.ents)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "cea45565", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"nouns_arch = [w for w in words_arch if w.pos_ == \"NOUN\"]\n", | |
"verbs_arch = [w for w in words_arch if w.pos_ == \"VERB\"]\n", | |
"adjs_arch = [w for w in words_arch if w.pos_ == \"ADJ\"]\n", | |
"advs_arch = [w for w in words_arch if w.pos_ == \"ADV\"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"id": "51e39f53", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"exception" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"random.choice(nouns_arch)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "95f49204", | |
"metadata": {}, | |
"source": [ | |
"# using spacy to catergorize the words used in Architectural texts" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"id": "fe1fa986", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"nlp_arch = spacy.load('en_core_web_md')\n", | |
"doc_arch = nlp_arch(text_arch)\n", | |
"sentences_arch = list(doc_arch.sents)\n", | |
"\n", | |
"words_arch = [w for w in list(doc_arch) if w.is_alpha]\n", | |
"noun_chunks_arch = list(doc_arch.noun_chunks)\n", | |
"entities_arch = list(doc_arch.ents)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"id": "fc8bae46", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"nouns_arch = [w for w in words_arch if w.pos_ == \"NOUN\"]\n", | |
"verbs_arch = [w for w in words_arch if w.pos_ == \"VERB\"]\n", | |
"adjs_arch = [w for w in words_arch if w.pos_ == \"ADJ\"]\n", | |
"advs_arch = [w for w in words_arch if w.pos_ == \"ADV\"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"id": "00f23bcc", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"task" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"random.choice(nouns_arch)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "df24f56f", | |
"metadata": {}, | |
"source": [ | |
"# using markov model to generate the mix of two texts style" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"id": "99398d5c", | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"\"body or another person's body,\"" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"random.choice(sentences_body)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"id": "0b0be2c6", | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"generator_arch = markovify.Text(text_arch)\n", | |
"generator_body = markovify.Text(text_body)\n", | |
"length_arch = 500\n", | |
"length_body = 80\n", | |
"length_combo = 50\n", | |
"weight_arch = 0.3\n", | |
"weight_body = 0.7\n", | |
"combo = markovify.combine([generator_arch, generator_body], [weight_arch, weight_body])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"id": "c2a41255", | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"For a century they have exhausted the hexagons ...\n" | |
] | |
} | |
], | |
"source": [ | |
"print(combo.make_short_sentence(length_combo))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "dfefe740", | |
"metadata": {}, | |
"source": [ | |
"# get some markov sentences from the body poems" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"id": "fccf270c", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"The man's body at auction!\n" | |
] | |
} | |
], | |
"source": [ | |
"print(generator_body.make_short_sentence(length_body))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"id": "d8d66ebe", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"mk_body = []\n", | |
"for line in range(4):\n", | |
" mk_body.append(generator_body.make_short_sentence(length_body))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"id": "f9ad6f74", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['Have you ever loved the body of man is calming and ex- cellent to the mothers.', 'Whatever the bids of the parts of you!', \"A man's body is sacred�it is no matter who, Is it one of the parts of you!\", 'Is it one of the eye, eye-brows, and the outlet again.']\n" | |
] | |
} | |
], | |
"source": [ | |
"print(mk_body)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "5ea783d2", | |
"metadata": {}, | |
"source": [ | |
"# get some markov sentences from the architecture text and store the nouns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"id": "09df5dd4", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"To the left and right of the same series may have examined and read it.\n" | |
] | |
} | |
], | |
"source": [ | |
"mk_arch = generator_arch.make_short_sentence(length_arch)\n", | |
"print(mk_arch)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"id": "c15c9f07", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"nlp_mk_arch = spacy.load('en_core_web_md')\n", | |
"doc_mk_arch = nlp_mk_arch(mk_arch)\n", | |
"sentences_mk_arch = list(doc_arch.sents)\n", | |
"\n", | |
"words_mk_arch = [w for w in list(doc_mk_arch) if w.is_alpha]\n", | |
"noun_chunks_mk_arch = list(doc_mk_arch.noun_chunks)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"id": "0bc89440", | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"the same series" | |
] | |
}, | |
"execution_count": 39, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"random.choice(noun_chunks_mk_arch)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "8bed82fa", | |
"metadata": {}, | |
"source": [ | |
"# replace the nouns in markov body sentences with nouns from architecture" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 162, | |
"id": "6ab275d6", | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[Have, you, ever, loved, the, body, of, man, is, calming, and, cellent, to, the, mothers]\n" | |
] | |
} | |
], | |
"source": [ | |
"nlp_line = spacy.load('en_core_web_md')\n", | |
"doc_line = nlp_line(mk_body[0])\n", | |
"\n", | |
"words_line = [w for w in list(doc_line) if w.is_alpha]\n", | |
"noun_chunks_line = list(doc_line.noun_chunks)\n", | |
"\n", | |
"print(words_line)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 163, | |
"id": "37aa91f3", | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[you, the body, man, the mothers]\n" | |
] | |
} | |
], | |
"source": [ | |
"print(noun_chunks_line)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 164, | |
"id": "1a3892fc", | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['Have',\n", | |
" 'you',\n", | |
" 'ever',\n", | |
" 'loved',\n", | |
" 'the',\n", | |
" 'it',\n", | |
" 'of',\n", | |
" 'it',\n", | |
" 'is',\n", | |
" 'calming',\n", | |
" 'and',\n", | |
" 'cellent',\n", | |
" 'to',\n", | |
" 'the',\n", | |
" 'the left']" | |
] | |
}, | |
"execution_count": 164, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"for i in range(len(words_line)):\n", | |
" if words_line[i].pos_ == \"NOUN\":\n", | |
" words_line[i] = str(random.choice(noun_chunks_mk_arch))\n", | |
" words_line[i] = str(words_line[i])\n", | |
"words_line" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 165, | |
"id": "6e5db93f", | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Have you ever loved the it of it is calming and cellent to the the\n", | |
"left\n" | |
] | |
} | |
], | |
"source": [ | |
"print(textwrap.fill(\" \".join(words_line)))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "68f73d92", | |
"metadata": {}, | |
"source": [ | |
"# Generate a stanza repeating the process above" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 173, | |
"id": "1a91b26a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"How do you know who shall be stript that you and he might touch each\n", | |
"other\n", | |
"And if the the next-to-last page only but of the the senseless\n", | |
"perdition of These phrases\n", | |
"How do you know so much that you may see them\n", | |
"And if the one time of a well made hope appears not only one the wind\n", | |
"this is the The original manuscript\n" | |
] | |
} | |
], | |
"source": [ | |
"mk_body = []\n", | |
"for line in range(4):\n", | |
" mk_body.append(generator_body.make_short_sentence(length_body))\n", | |
" \n", | |
"mk_arch = generator_arch.make_short_sentence(length_arch)\n", | |
"nlp_mk_arch = spacy.load('en_core_web_md')\n", | |
"doc_mk_arch = nlp_mk_arch(mk_arch)\n", | |
"sentences_mk_arch = list(doc_arch.sents)\n", | |
"\n", | |
"words_mk_arch = [w for w in list(doc_mk_arch) if w.is_alpha]\n", | |
"noun_chunks_mk_arch = list(doc_mk_arch.noun_chunks)\n", | |
"\n", | |
"for line in range(4):\n", | |
" nlp_line = spacy.load('en_core_web_md')\n", | |
" doc_line = nlp_line(mk_body[line])\n", | |
"\n", | |
" words_line = [w for w in list(doc_line) if w.is_alpha]\n", | |
" noun_chunks_line = list(doc_line.noun_chunks)\n", | |
"\n", | |
" for i in range(len(words_line)):\n", | |
" if words_line[i].pos_ == \"NOUN\":\n", | |
" words_line[i] = str(random.choice(noun_chunks_arch))\n", | |
" words_line[i] = str(words_line[i])\n", | |
" \n", | |
" print(textwrap.fill(\" \".join(words_line)))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 187, | |
"id": "0764841c", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"The a few miles is a a delirious divinity\n", | |
"Within there runs the one the same old beautiful total\n", | |
"The prophecy these canonical books at they\n", | |
"In them and of them that pleases the inalterable MCV well\n" | |
] | |
} | |
], | |
"source": [ | |
"mk_body = []\n", | |
"for line in range(4):\n", | |
" mk_body.append(generator_body.make_short_sentence(length_body))\n", | |
" \n", | |
"mk_arch = combo.make_short_sentence(100)\n", | |
"nlp_mk_arch = spacy.load('en_core_web_md')\n", | |
"doc_mk_arch = nlp_mk_arch(mk_arch)\n", | |
"sentences_mk_arch = list(doc_arch.sents)\n", | |
"\n", | |
"words_mk_arch = [w for w in list(doc_mk_arch) if w.is_alpha]\n", | |
"noun_chunks_mk_arch = list(doc_mk_arch.noun_chunks)\n", | |
"\n", | |
"for line in range(4):\n", | |
" nlp_line = spacy.load('en_core_web_md')\n", | |
" doc_line = nlp_line(mk_body[line])\n", | |
"\n", | |
" words_line = [w for w in list(doc_line) if w.is_alpha]\n", | |
" noun_chunks_line = list(doc_line.noun_chunks)\n", | |
"\n", | |
" for i in range(len(words_line)):\n", | |
" if words_line[i].pos_ == \"NOUN\":\n", | |
" words_line[i] = str(random.choice(noun_chunks_arch))\n", | |
" words_line[i] = str(words_line[i])\n", | |
" \n", | |
" print(textwrap.fill(\" \".join(words_line)))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.13" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment