Skip to content

Instantly share code, notes, and snippets.

@kiransair
Last active March 28, 2024 10:01
Show Gist options
  • Save kiransair/20b0ceea207f93f2a5a4d85fa90f1e59 to your computer and use it in GitHub Desktop.
Save kiransair/20b0ceea207f93f2a5a4d85fa90f1e59 to your computer and use it in GitHub Desktop.
TF_Forum_23337.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyPoXGRogeQOJq3P6haVMPw1",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/kiransair/20b0ceea207f93f2a5a4d85fa90f1e59/tf_forum_23337.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"source": [
"import numpy as np\n",
"import string\n",
"import re\n",
"import sys\n",
"\n",
"import os\n",
"os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n",
"\n",
"import keras\n",
"from keras import layers\n",
"\n",
"import tensorflow as tf\n",
"import tensorflow_text"
],
"metadata": {
"id": "W0YmCAmcbxFH"
},
"execution_count": 22,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import keras_nlp\n",
"\n",
"from keras_nlp.tokenizers import SentencePieceTokenizer\n",
"\n",
"import datasets # HuggingFace\n",
"from datasets import Dataset\n",
"\n",
"SPtokenizer = None"
],
"metadata": {
"id": "eLydGBEnb13H"
},
"execution_count": 23,
"outputs": []
},
{
"cell_type": "code",
"source": [
"print(f'''\\ttensorflow={tf.__version__}\n",
" tensorflow_text={tensorflow_text.__version__}\n",
" keras={keras.__version__}\n",
" keras_nlp={keras_nlp.__version__}\n",
" ''')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "sbCZvTT1b57B",
"outputId": "8d9080ac-de4b-4d34-ff30-8f788a9d0726"
},
"execution_count": 24,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\ttensorflow=2.16.1\n",
" tensorflow_text=2.16.1\n",
" keras=3.1.1\n",
" keras_nlp=0.8.2\n",
" \n"
]
}
]
},
{
"cell_type": "code",
"source": [
" batch_size = 16\n",
" max_seq_len = 100\n",
" bos_idx = 0\n",
" eos_idx = 99\n",
"\n",
" embed_size = 100"
],
"metadata": {
"id": "CUF22GpHb54F"
},
"execution_count": 25,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import datasets"
],
"metadata": {
"id": "rYGSQwz9b51B"
},
"execution_count": 26,
"outputs": []
},
{
"cell_type": "code",
"source": [
"LH_dataset_HF = datasets.load_dataset(\"nguha/legalbench\", 'learned_hands_torts')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "kOnUAG_db5yD",
"outputId": "9944c673-ced8-4ddb-cbed-5e9d44126921"
},
"execution_count": 27,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.10/dist-packages/datasets/load.py:1461: FutureWarning: The repository for nguha/legalbench contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/nguha/legalbench\n",
"You can avoid this message in future by passing the argument `trust_remote_code=True`.\n",
"Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.\n",
" warnings.warn(\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"LH_dataset_HF"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "2Q6GcCu_b5vA",
"outputId": "349ff0c6-788c-461d-e711-42ae5739b0c8"
},
"execution_count": 28,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"DatasetDict({\n",
" train: Dataset({\n",
" features: ['answer', 'index', 'text'],\n",
" num_rows: 6\n",
" })\n",
" test: Dataset({\n",
" features: ['answer', 'index', 'text'],\n",
" num_rows: 432\n",
" })\n",
"})"
]
},
"metadata": {},
"execution_count": 28
}
]
},
{
"cell_type": "code",
"source": [
"LH_dataset_HF['train']['answer']"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Lyvrrxq8b5rn",
"outputId": "8b14cf0a-86ba-4b21-8401-75af4480ff63"
},
"execution_count": 29,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['Yes', 'Yes', 'Yes', 'No', 'No', 'No']"
]
},
"metadata": {},
"execution_count": 29
}
]
},
{
"cell_type": "code",
"source": [
"LH_dataset_HF['train']['text']"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "pu0dROykcM12",
"outputId": "3264d87e-fffa-429f-ba3a-d2a43b43b002"
},
"execution_count": 30,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[\"My roommate and I were feeling unwell in our basement apartment for a long time. We discovered a drier was exhausting directly into our unit. We asked the landlord to fix it, but he did, and ever since then it has gotten way worse. There's a chemical smell in the air and staying in the apt more than ~15 mins causes extreme fatigue, loss of focus, sinuses closing up, and chest tightness (Whatever it is isn't triggering the CO2 or natural gas alarm) Landlord agreed to terminate our lease and let us keep our stuff in there unpaid for the first few weeks but now he wants us to set a move out date and pack up. But I feel really bad when I stay in here. The last time I went in to get my stuff I fell asleep suddenly and woke up 2 hours later with a nosebleed and difficulty breathing. Landlord refuses to hire an indoor air quality inspector and says he plans on sealing off the whole basement and no longer renting it. I called the housing inspector but he said he only inspects whole houses, not just the basement... so I would need to go through my landlord... what do I do?\",\n",
" 'Had a bad interaction with an employee. Attempted to 1 star review the company saying that some staff are not very welcoming to new community members. It was replied to by the owner who explained I have been trying to contact most of his employees outside of work hours for things not related to work(turns out this part was true, but he left out the part that I had no idea they worked there, and they it was through either dating apps or fetlife). I replied to defend myself from the accusation and the review was deleted. I bough a bunch of dislikes because im petty and it was cheap, how much if any trouble can I get in',\n",
" \"Hi everybody, I'm in need of some advice/guidance. I'm about to release an app, which is basically based on seeing people in your area (through an interactive map) and then being able to message them and ultimately meet up. Unfortunately, that can obviously lead to somebody getting hurt or robbed. My question is, would I be considered liable or get into any kind of trouble if that did happen (seeing as how they used my app)? And, if so, is there any way I can protect myself from that? Sorry if that sounds unethical or something I'm in Houston, TX, btw\",\n",
" 'I\\'ve been honorably discharged from the military. Under the military clause in my lease it says that with proof of orders (in this case, it\\'s \"separation orders\") and rent for 30 days I am able to break my lease. Because I have separation orders rather than transfer orders am I still obligated to pay the lease breaking penalty?',\n",
" \"My mom rented out the 1st-floor apartment to my uncle. He moved his girlfriend in and now they have a rodent infestation. They have complained to my mom about it. She calls the exterminators and the girlfriend won't let them in. The girlfriend takes their traps from them at the door and sends them away. The rodent problem is still there. My uncle is upset but won't stand up to the girlfriend. My Aunt's family lives on the second floor and is completely frustrated. More recently we had an offer for free upgrades to the home and this same girlfriend won't allow anyone in to take measurements. What legal steps can we take to save this house from falling apart?\",\n",
" 'Hey guys. The title pretty much sums it up. Here\\'s the situation: Smoking of any sort is not allowed in my apartment, so I smoke in my car. I don\\'t smoke much, so I\\'ll have a single puff in my car for about 5-7 minutes, then go back inside. We have street parking on public roads. Anyway, I guess someone complained, because then my landlord shows up from out of nowhere, and approached me in my car. Now, where I live, weed has basically been decriminalized. At least, when you get caught with it in small amounts (all I ever carry is maybe 1g) you get something like a $100 ticket. Anyway, my landlord approaches me, and tells me to get out of the car like he\\'s some sort of cop. I do, and greet him. He says, \"I can have you evicted for this.\" I say, \"for smoking weed?\" and he says, \"yes, the lease prohibits it.\" I say, \"the lease prohibits smoking inside of the apartment.\" He says, \"we\\'ll see about that,\" and drives off. He then sends me an email saying, \"tomorrow I will be serving you with a notice to vacate for violating the lease.\" My lease does not end until October. How is this going to play out for me?']"
]
},
"metadata": {},
"execution_count": 30
}
]
},
{
"cell_type": "code",
"source": [
"trainDS = LH_dataset_HF['train'].to_tf_dataset(\n",
" columns=\"text\",\n",
" label_cols=\"answer\",\n",
" batch_size=batch_size,\n",
" shuffle=False,\n",
" )\n",
""
],
"metadata": {
"id": "mvi752XzcnD6"
},
"execution_count": 31,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import requests"
],
"metadata": {
"id": "fcukb9AscMy_"
},
"execution_count": 32,
"outputs": []
},
{
"cell_type": "code",
"source": [
"url = \"https://github.com/tensorflow/text/blob/master/tensorflow_text/python/ops/test_data/test_oss_model.model?raw=true\"\n",
"sp_model = requests.get(url).content"
],
"metadata": {
"id": "uxC5kte4cMwS"
},
"execution_count": 33,
"outputs": []
},
{
"cell_type": "code",
"source": [
"trainTF = LH_dataset_HF['train'].with_format(\"tf\")"
],
"metadata": {
"id": "2WWPNipocMtX"
},
"execution_count": 34,
"outputs": []
},
{
"cell_type": "code",
"source": [
"global SPtokenizer\n",
"SPtokenizer = SentencePieceTokenizer(proto=sp_model,sequence_length=32)\n",
"SPvocabDict = {SPtokenizer.id_to_token(i): i for i in range(SPtokenizer.vocabulary_size())}\n",
"SPvocabList = SPtokenizer.get_vocabulary()"
],
"metadata": {
"id": "ZSDpzMq0eUAY"
},
"execution_count": 35,
"outputs": []
},
{
"cell_type": "code",
"source": [
" print(f\"trainTF shape={trainTF.shape} answer shape={trainTF['answer'].shape}\")\n",
" print(f\"all answers={trainTF['answer']}\")\n",
"\n",
" LH_Torts_Train0 = LH_dataset_HF['train']['text'][0]\n",
" # '''My roommate and I were feeling unwell in our basement apartment for a long time. We discovered a drier was exhausting directly into our unit. We asked the landlord to fix it, but he did, and ever since then it has gotten way worse. There's a chemical smell in the air and staying in the apt more than ~15 mins causes extreme fatigue, loss of focus, sinuses closing up, and chest tightness (Whatever it is isn't triggering the CO2 or natural gas alarm) Landlord agreed to terminate our lease and let us keep our stuff in there unpaid for the first few weeks but now he wants us to set a move out date and pack up. But I feel really bad when I stay in here. The last time I went in to get my stuff I fell asleep suddenly and woke up 2 hours later with a nosebleed and difficulty breathing. Landlord refuses to hire an indoor air quality inspector and says he plans on sealing off the whole basement and no longer renting it. I called the housing inspector but he said he only inspects whole houses, not just the basement... so I would need to go through my landlord... what do I do?'''\n",
" input_text = LH_Torts_Train0\n",
" tokens = SPtokenizer.tokenize(input_text)\n",
" print('tokens:',tokens)\n",
" input_ids = SPtokenizer.detokenize(tokens)\n",
" echo1 = tf.Variable(input_ids).numpy()\n",
" print('echo1: ',echo1)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "yDMYX6mYcMqc",
"outputId": "332063db-63f5-4398-a657-09ed430b0141"
},
"execution_count": 36,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"trainTF shape=(6, 3) answer shape=(6,)\n",
"all answers=[b'Yes' b'Yes' b'Yes' b'No' b'No' b'No']\n",
"tokens: tf.Tensor(\n",
"[564 139 26 186 13 9 116 650 18 210 63 16 88 20 288 80 19 91\n",
" 240 11 29 48 15 240 42 11 308 171 6 416 244 28], shape=(32,), dtype=int32)\n",
"echo1: b'My roommate and I were feeling unwell in our basement apartment for a long time. We disc'\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"for i in trainDS.take(1):\n",
" print(i)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "g_wEApYOcs2E",
"outputId": "e40f44b4-e215-42a2-9ce1-a258be8e5695"
},
"execution_count": 37,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(<tf.Tensor: shape=(6,), dtype=string, numpy=\n",
"array([b\"My roommate and I were feeling unwell in our basement apartment for a long time. We discovered a drier was exhausting directly into our unit. We asked the landlord to fix it, but he did, and ever since then it has gotten way worse. There's a chemical smell in the air and staying in the apt more than ~15 mins causes extreme fatigue, loss of focus, sinuses closing up, and chest tightness (Whatever it is isn't triggering the CO2 or natural gas alarm) Landlord agreed to terminate our lease and let us keep our stuff in there unpaid for the first few weeks but now he wants us to set a move out date and pack up. But I feel really bad when I stay in here. The last time I went in to get my stuff I fell asleep suddenly and woke up 2 hours later with a nosebleed and difficulty breathing. Landlord refuses to hire an indoor air quality inspector and says he plans on sealing off the whole basement and no longer renting it. I called the housing inspector but he said he only inspects whole houses, not just the basement... so I would need to go through my landlord... what do I do?\",\n",
" b'Had a bad interaction with an employee. Attempted to 1 star review the company saying that some staff are not very welcoming to new community members. It was replied to by the owner who explained I have been trying to contact most of his employees outside of work hours for things not related to work(turns out this part was true, but he left out the part that I had no idea they worked there, and they it was through either dating apps or fetlife). I replied to defend myself from the accusation and the review was deleted. I bough a bunch of dislikes because im petty and it was cheap, how much if any trouble can I get in',\n",
" b\"Hi everybody, I'm in need of some advice/guidance. I'm about to release an app, which is basically based on seeing people in your area (through an interactive map) and then being able to message them and ultimately meet up. Unfortunately, that can obviously lead to somebody getting hurt or robbed. My question is, would I be considered liable or get into any kind of trouble if that did happen (seeing as how they used my app)? And, if so, is there any way I can protect myself from that? Sorry if that sounds unethical or something I'm in Houston, TX, btw\",\n",
" b'I\\'ve been honorably discharged from the military. Under the military clause in my lease it says that with proof of orders (in this case, it\\'s \"separation orders\") and rent for 30 days I am able to break my lease. Because I have separation orders rather than transfer orders am I still obligated to pay the lease breaking penalty?',\n",
" b\"My mom rented out the 1st-floor apartment to my uncle. He moved his girlfriend in and now they have a rodent infestation. They have complained to my mom about it. She calls the exterminators and the girlfriend won't let them in. The girlfriend takes their traps from them at the door and sends them away. The rodent problem is still there. My uncle is upset but won't stand up to the girlfriend. My Aunt's family lives on the second floor and is completely frustrated. More recently we had an offer for free upgrades to the home and this same girlfriend won't allow anyone in to take measurements. What legal steps can we take to save this house from falling apart?\",\n",
" b'Hey guys. The title pretty much sums it up. Here\\'s the situation: Smoking of any sort is not allowed in my apartment, so I smoke in my car. I don\\'t smoke much, so I\\'ll have a single puff in my car for about 5-7 minutes, then go back inside. We have street parking on public roads. Anyway, I guess someone complained, because then my landlord shows up from out of nowhere, and approached me in my car. Now, where I live, weed has basically been decriminalized. At least, when you get caught with it in small amounts (all I ever carry is maybe 1g) you get something like a $100 ticket. Anyway, my landlord approaches me, and tells me to get out of the car like he\\'s some sort of cop. I do, and greet him. He says, \"I can have you evicted for this.\" I say, \"for smoking weed?\" and he says, \"yes, the lease prohibits it.\" I say, \"the lease prohibits smoking inside of the apartment.\" He says, \"we\\'ll see about that,\" and drives off. He then sends me an email saying, \"tomorrow I will be serving you with a notice to vacate for violating the lease.\" My lease does not end until October. How is this going to play out for me?'],\n",
" dtype=object)>, <tf.Tensor: shape=(6,), dtype=string, numpy=array([b'Yes', b'Yes', b'Yes', b'No', b'No', b'No'], dtype=object)>)\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"\n",
"\n",
"def preprocess_text(text, label):\n",
" tokens = SPtokenizer.tokenize(text)\n",
" input_ids = tokens\n",
"\n",
" answer = tf.where(label == \"Yes\", 1, 0)\n",
"\n",
" return input_ids, answer\n",
"\n",
"trainDS = trainDS.map(preprocess_text)\n",
"\n"
],
"metadata": {
"id": "QMC_JZijc-Ks"
},
"execution_count": 38,
"outputs": []
},
{
"cell_type": "code",
"source": [
"for i in trainDS.take(1):\n",
" print(i)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "fk6Beegoc-H8",
"outputId": "a449fa83-ce90-4ce6-f98f-ad6f308aae32"
},
"execution_count": 39,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(<tf.Tensor: shape=(6, 32), dtype=int32, numpy=\n",
"array([[564, 139, 26, 186, 13, 9, 116, 650, 18, 210, 63, 16, 88,\n",
" 20, 288, 80, 19, 91, 240, 11, 29, 48, 15, 240, 42, 11,\n",
" 308, 171, 6, 416, 244, 28],\n",
" [ 4, 165, 218, 11, 412, 20, 190, 19, 409, 44, 163, 250, 26,\n",
" 29, 31, 21, 34, 16, 16, 6, 104, 15, 142, 26, 29, 228,\n",
" 10, 4, 357, 637, 70, 87],\n",
" [ 4, 165, 25, 588, 419, 5, 9, 32, 26, 20, 649, 14, 146,\n",
" 11, 17, 87, 107, 16, 0, 37, 53, 140, 254, 6, 9, 32,\n",
" 26, 106, 10, 70, 82, 19],\n",
" [ 9, 32, 125, 153, 788, 726, 244, 92, 48, 37, 12, 101, 7,\n",
" 402, 31, 114, 48, 34, 6, 4, 352, 24, 17, 40, 7, 402,\n",
" 31, 114, 48, 34, 78, 180],\n",
" [564, 264, 26, 70, 24, 228, 99, 7, 4, 357, 85, 35, 57,\n",
" 31, 21, 54, 11, 29, 48, 15, 240, 10, 45, 210, 28, 82,\n",
" 6, 151, 264, 87, 12, 72],\n",
" [151, 34, 782, 8, 6, 69, 552, 15, 82, 848, 268, 51, 156,\n",
" 8, 27, 119, 6, 151, 56, 32, 8, 7, 608, 53, 149, 224,\n",
" 168, 26, 21, 46, 18, 14]], dtype=int32)>, <tf.Tensor: shape=(6,), dtype=int32, numpy=array([1, 1, 1, 0, 0, 0], dtype=int32)>)\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"trainDS = trainDS.map(lambda text, label: (text, tf.expand_dims(label, -1)))"
],
"metadata": {
"id": "9Re0xkM6c-FH"
},
"execution_count": 40,
"outputs": []
},
{
"cell_type": "code",
"source": [
"model = tf.keras.Sequential([\n",
" layers.Embedding(SPtokenizer.vocabulary_size(), embed_size),\n",
" layers.Dropout(0.2),\n",
" layers.GlobalAveragePooling1D(),\n",
" layers.Dropout(0.2),\n",
" layers.Dense(1, activation='sigmoid')])"
],
"metadata": {
"id": "6iQtKeQ1c-CK"
},
"execution_count": 41,
"outputs": []
},
{
"cell_type": "code",
"source": [
"model.compile(loss='binary_crossentropy',\n",
" optimizer='adam',\n",
" metrics=[tf.metrics.BinaryAccuracy(threshold=0.5)])"
],
"metadata": {
"id": "v65UfMdYc9_J"
},
"execution_count": 42,
"outputs": []
},
{
"cell_type": "code",
"source": [
"model.fit(trainDS, epochs=3)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "yD_Adlasc975",
"outputId": "815e1af8-4a32-4b68-8221-a1663e7111d1"
},
"execution_count": 43,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Epoch 1/3\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m13s\u001b[0m 13s/step - binary_accuracy: 0.5000 - loss: 0.6927\n",
"Epoch 2/3\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 77ms/step - binary_accuracy: 0.8333 - loss: 0.6875\n",
"Epoch 3/3\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 128ms/step - binary_accuracy: 1.0000 - loss: 0.6840\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<keras.src.callbacks.history.History at 0x78dcc3003bb0>"
]
},
"metadata": {},
"execution_count": 43
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "pjflfmXlevrv"
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment