Skip to content

Instantly share code, notes, and snippets.

@allanbatista
Created May 19, 2020 13:26
Show Gist options
  • Save allanbatista/66fe0f2ab3f4c34909c5e3443f15f278 to your computer and use it in GitHub Desktop.
Save allanbatista/66fe0f2ab3f4c34909c5e3443f15f278 to your computer and use it in GitHub Desktop.
Word Pertubation Tensorflow.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Word Pertubation Tensorflow.ipynb",
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyOj2P/ZfSeY7xYxVIcLOUEd",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/allanbatista/66fe0f2ab3f4c34909c5e3443f15f278/word-pertubation-tensorflow.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"id": "tLLLrt96_yK4",
"colab_type": "code",
"colab": {}
},
"source": [
"import tensorflow as tf\n",
"\n",
"\n",
"def uniform_random_drop_sequence_fn(sequence_size, ration=0.1):\n",
" \"\"\"\n",
" essa irá remover N elementos da sequencia e irá substituir por um novo elemento (zero) no final da sequencia\n",
"\n",
" obs.: essa função só funciona com right padding.\n",
"\n",
" how its works.: o rato roeu o roupa do rei de roma -> o rato roeu o roupa do de roma\n",
" \"\"\"\n",
"\n",
" ration = tf.constant(ration, dtype=tf.float32)\n",
" min_quantity = tf.constant(1, dtype=tf.float32)\n",
"\n",
" def _uniform_random_drop_sequence(sequence):\n",
" reduce_sum = tf.reduce_sum(sequence, axis=1)\n",
" sequence_clean = tf.boolean_mask(sequence, tf.cast(reduce_sum, dtype=tf.bool))\n",
" total_nonzero = tf.math.count_nonzero(reduce_sum)\n",
"\n",
" drop_quantity = tf.math.floor(tf.multiply(tf.cast(total_nonzero, dtype=tf.float32), ration))\n",
" drop_quantity = tf.reduce_max([drop_quantity, min_quantity])\n",
" drop_quantity = tf.cast(drop_quantity, dtype=tf.int32)\n",
"\n",
" drop_mask_mask = tf.argsort(tf.random.uniform([total_nonzero])) >= drop_quantity\n",
"\n",
" sequence_dropped = tf.boolean_mask(sequence_clean, drop_mask_mask)\n",
"\n",
" right_pad_size = sequence_size - tf.math.count_nonzero(tf.reduce_sum(sequence_dropped, axis=1))\n",
" right_pad = tf.zeros((right_pad_size, sequence.get_shape()[1]))\n",
"\n",
" return tf.concat([sequence_dropped, right_pad], axis=0)\n",
"\n",
" return _uniform_random_drop_sequence\n",
"\n",
"\n",
"def uniform_random_drop_sequences(sequences, sequence_size, ration=0.1):\n",
" return tf.map_fn(uniform_random_drop_sequence_fn(sequence_size=sequence_size, ration=ration), sequences, dtype=sequences.dtype)\n",
"\n",
"\n",
"def uniform_random_swap_sequence(sequence_size, ration=0.1):\n",
" \"\"\"\n",
" essa função altera a posição entre duas palavras randomicamente.\n",
"\n",
" how its works.: o rato roeu o roupa do rei de roma -> o [roeu rato] o roupa do rei [roma de]\n",
"\n",
" \"\"\"\n",
" ration = tf.constant(ration, dtype=tf.float32)\n",
" min_quantity = tf.constant(1, dtype=tf.float32)\n",
"\n",
" def _uniform_random_swap_sequence(sequence):\n",
" reduce_sum = tf.reduce_sum(sequence, axis=1)\n",
" total_nonzero = tf.math.count_nonzero(reduce_sum)\n",
"\n",
" swap_quantity = tf.math.floor(tf.multiply(tf.cast(total_nonzero, dtype=tf.float32), ration))\n",
" swap_quantity = tf.reduce_max([swap_quantity, min_quantity])\n",
" swap_quantity = tf.cast(swap_quantity, dtype=tf.int32)\n",
"\n",
" indexies_to_swap = tf.argsort(tf.random.uniform([sequence_size - 1]))[:swap_quantity]\n",
"\n",
" indexies = tf.range(sequence_size)\n",
" indexies = tf.tensor_scatter_nd_update(indexies, tf.reshape(indexies_to_swap, (-1, 1)), indexies_to_swap + 1)\n",
" indexies = tf.tensor_scatter_nd_update(indexies, tf.reshape(indexies_to_swap + 1, (-1, 1)), indexies_to_swap)\n",
"\n",
" return tf.gather(sequence, indexies)\n",
"\n",
" return _uniform_random_swap_sequence\n",
"\n",
"\n",
"def uniform_random_swap_sequences(sequences, sequence_size, ration=0.1):\n",
" return tf.map_fn(uniform_random_swap_sequence(sequence_size=sequence_size, ration=ration), sequences,\n",
" dtype=sequences.dtype)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "_tDHJZWpCu32",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 458
},
"outputId": "f204cd3b-349a-45bd-b6df-7695e24307de"
},
"source": [
"sequences = tf.stack([tf.concat([tf.round(tf.random.uniform((i, 2)) * 10), tf.zeros((5-i, 2))], axis=0) for i in range(2, 6)])\n",
"sequences"
],
"execution_count": 74,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<tf.Tensor: shape=(4, 5, 2), dtype=float32, numpy=\n",
"array([[[ 0., 7.],\n",
" [ 8., 8.],\n",
" [ 0., 0.],\n",
" [ 0., 0.],\n",
" [ 0., 0.]],\n",
"\n",
" [[ 8., 4.],\n",
" [ 9., 8.],\n",
" [ 0., 1.],\n",
" [ 0., 0.],\n",
" [ 0., 0.]],\n",
"\n",
" [[ 6., 9.],\n",
" [ 0., 3.],\n",
" [ 1., 5.],\n",
" [ 1., 6.],\n",
" [ 0., 0.]],\n",
"\n",
" [[ 2., 5.],\n",
" [ 8., 1.],\n",
" [ 0., 1.],\n",
" [ 7., 10.],\n",
" [ 4., 2.]]], dtype=float32)>"
]
},
"metadata": {
"tags": []
},
"execution_count": 74
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "O-bBVD3vBV1f",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 458
},
"outputId": "8aee0a12-aa6a-43ed-9368-79f90ec3f29d"
},
"source": [
"uniform_random_drop_sequence = uniform_random_drop_sequences(sequences, sequence_size=sequences.get_shape()[1], ration=0.2)"
],
"execution_count": 62,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<tf.Tensor: shape=(4, 5, 2), dtype=float32, numpy=\n",
"array([[[0.37371004, 0.32919097],\n",
" [0. , 0. ],\n",
" [0. , 0. ],\n",
" [0. , 0. ],\n",
" [0. , 0. ]],\n",
"\n",
" [[0.054371 , 0.04614568],\n",
" [0.65778756, 0.3740257 ],\n",
" [0. , 0. ],\n",
" [0. , 0. ],\n",
" [0. , 0. ]],\n",
"\n",
" [[0.34164703, 0.3842957 ],\n",
" [0.24133039, 0.05708456],\n",
" [0.8817282 , 0.74781 ],\n",
" [0. , 0. ],\n",
" [0. , 0. ]],\n",
"\n",
" [[0.4453119 , 0.26886618],\n",
" [0.38716066, 0.72081804],\n",
" [0.46648932, 0.9998658 ],\n",
" [0.65702033, 0.6006948 ],\n",
" [0. , 0. ]]], dtype=float32)>"
]
},
"metadata": {
"tags": []
},
"execution_count": 62
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "0H4gFolpE9Ba",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 458
},
"outputId": "2d9e944a-955c-4a4f-9a2c-9c862614e67f"
},
"source": [
"uniform_random_swap_sequences(sequences, sequence_size=sequences.get_shape()[1], ration=0.2)"
],
"execution_count": 63,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"<tf.Tensor: shape=(4, 5, 2), dtype=float32, numpy=\n",
"array([[[0.37371004, 0.32919097],\n",
" [0.85325074, 0.54138684],\n",
" [0. , 0. ],\n",
" [0. , 0. ],\n",
" [0. , 0. ]],\n",
"\n",
" [[0.054371 , 0.04614568],\n",
" [0.65778756, 0.3740257 ],\n",
" [0.7399832 , 0.44001162],\n",
" [0. , 0. ],\n",
" [0. , 0. ]],\n",
"\n",
" [[0.144961 , 0.7626959 ],\n",
" [0.24133039, 0.05708456],\n",
" [0.34164703, 0.3842957 ],\n",
" [0.8817282 , 0.74781 ],\n",
" [0. , 0. ]],\n",
"\n",
" [[0.4453119 , 0.26886618],\n",
" [0.38716066, 0.72081804],\n",
" [0.46648932, 0.9998658 ],\n",
" [0.75947475, 0.3422799 ],\n",
" [0.65702033, 0.6006948 ]]], dtype=float32)>"
]
},
"metadata": {
"tags": []
},
"execution_count": 63
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "ibfv86ubGK3i",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment