Skip to content

Instantly share code, notes, and snippets.

@sujnesh
Created February 25, 2021 17:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sujnesh/79a95e6274056831a5ea239618aff156 to your computer and use it in GitHub Desktop.
Save sujnesh/79a95e6274056831a5ea239618aff156 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"DLNLP_1_training.ipynb","provenance":[{"file_id":"1EITQsi_KfxiDa-0n2H3B60ya9eDZxmlk","timestamp":1614273981603},{"file_id":"1OQWcwvlFXpVa4jY3xwgWsv1YNP82zPsh","timestamp":1614187490944},{"file_id":"1IkS10nv6TZlyEye7XP5mOH5FN9ZOSMzp","timestamp":1613390197346}],"collapsed_sections":[],"toc_visible":true},"kernelspec":{"name":"python3","display_name":"Python 3"},"widgets":{"application/vnd.jupyter.widget-state+json":{"5f2953a72f2042d49184ead9117cbf73":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_461f11d60378466e8bb4aa35661c5bc7","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_c1d26b5ee62c4f6d88f6f23017a466b0","IPY_MODEL_75027d1d70f6453196ff4c5515026b1b"]}},"461f11d60378466e8bb4aa35661c5bc7":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"c1d26b5ee62c4f6d88f6f23017a466b0":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_view_name":"ProgressView","style":"IPY_MODEL_6aba6c7f2f0a4793b75728e00429313e","_dom_classes":[],"description":"test.csv: 100%","_model_name":"FloatProgressModel","bar_style":"success","max":929146,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":929146,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_1455993f735749dbb12e6b37feb3a0be"}},"75027d1d70f6453196ff4c5515026b1b":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_view_name":"HTMLView","style":"IPY_MODEL_629fcbd9e79e4e3db097aaab74d8ce49","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 929k/929k [00:02&lt;00:00, 332kB/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_3c97da95822749ffaaf4fc6f366e95de"}},"6aba6c7f2f0a4793b75728e00429313e":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"initial","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"1455993f735749dbb12e6b37feb3a0be":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"629fcbd9e79e4e3db097aaab74d8ce49":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"3c97da95822749ffaaf4fc6f366e95de":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"6c5e759b25764fa28fb67cf9890bb082":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_25b640cb177f43549f579a61ca41932f","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_6d6fceaec873483d8e5056be146f0039","IPY_MODEL_3f98b0437e6d4ab4a79cc3c7a56d6e0e"]}},"25b640cb177f43549f579a61ca41932f":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"6d6fceaec873483d8e5056be146f0039":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_view_name":"ProgressView","style":"IPY_MODEL_3c17cab91bca448a8f4eed615b45ccc1","_dom_classes":[],"description":"train.csv: 100%","_model_name":"FloatProgressModel","bar_style":"success","max":5140653,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":5140653,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_dfb5b47e326e498883f2dec217711489"}},"3f98b0437e6d4ab4a79cc3c7a56d6e0e":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_view_name":"HTMLView","style":"IPY_MODEL_37d2e5f6b813417fa67fb79754516a70","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 5.14M/5.14M [00:01&lt;00:00, 4.26MB/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_e78e88f074204ed7b3e96e74e23fb48d"}},"3c17cab91bca448a8f4eed615b45ccc1":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"initial","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"dfb5b47e326e498883f2dec217711489":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"37d2e5f6b813417fa67fb79754516a70":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"e78e88f074204ed7b3e96e74e23fb48d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}}}}},"cells":[{"cell_type":"markdown","metadata":{"id":"Yl2Ccct-22CN"},"source":["<div style=\"text-align: center\">\n"," <a href=\"https://www.aicrowd.com/challenges/dlnlp-note\"><img alt=\"AIcrowd\" src=\"https://gitlab.aicrowd.com/S.Rathi/iit-b-notebook-misc/-/raw/S.Rathi-master-patch-59012/creative_updated%20on%208.2.21_1%20_desktopbanner.jpg\"></a>\n","</div>"]},{"cell_type":"markdown","metadata":{"id":"OqD6TR0UiuMy"},"source":["# How to use this notebook? 📝\n","1. **Copy the notebook**. This is a shared template and any edits you make here will not be saved. _You should copy it into your own drive folder._ For this, click the \"File\" menu (top-left), then \"Save a Copy in Drive\". You can edit your copy however you like.\n","2. **Link it to your AIcrowd account**. In order to submit your code to AICrowd, you need to provide your account's API key [here](https://colab.research.google.com/drive/1EITQsi_KfxiDa-0n2H3B60ya9eDZxmlk#scrollTo=H7iqy5XcWeHN&line=4&uniqifier=1).\n","3. **Modify** the predefined functions for preprocessing, softmax, training etc.\n","4. **Save** your trained model.\n","\n"]},{"cell_type":"markdown","metadata":{"id":"7xo2UdutLUpg"},"source":["#Dataset Specifications 💾\n","\n","* **train.csv**: has 3 columns with latter two being 'reviews' & corresponding 'ratings'.\n","* **test.csv**: has 2 columns with latter being 'reviews'. You will have to predict the corresponding 'ratings'.\n","* 'ratings' in predictions should be integers in range \\[1,5\\] i.e. {1,2,3,4,5}"]},{"cell_type":"markdown","metadata":{"id":"iIw4tX5SdMVn"},"source":["# Install AIcrowd Utilities 🧰\n","\n","We will install `aicrowd-cli` that can help us manage some trivial tasks and make our lives easier."]},{"cell_type":"code","metadata":{"id":"qjHTtoBG5aAh","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614266927478,"user_tz":-330,"elapsed":10604,"user":{"displayName":"Dipam Chakraborty","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjYbzjRrx303amHrTPU3lUOY9OMpxKTrTzhtYd=s64","userId":"04496869744334527762"}},"outputId":"ad89348f-2328-458b-8c23-96d413f7f473"},"source":["!pip install -U git+https://gitlab.aicrowd.com/aicrowd/aicrowd-cli.git > /dev/null"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" Running command git clone -q https://gitlab.aicrowd.com/aicrowd/aicrowd-cli.git /tmp/pip-req-build-0awb7ht1\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"QDpkSmY6jf9c"},"source":["Load AIcrowd magic commands"]},{"cell_type":"code","metadata":{"id":"5mMLkbvTjfd-"},"source":["%load_ext aicrowd.magic"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"z2VrYX9KowAR"},"source":["# Import necessary modules and packages 📚"]},{"cell_type":"code","metadata":{"id":"K7J7E7RWWXju"},"source":["import os\n","import pandas as pd\n","import numpy as np\n","\n","#Add your necessary modules & packages here"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"0rUE152lf4wv"},"source":["# AIcrowd Runtime Configuration ⚙️\n","\n","The contents of the `ASSETS_DIR` are copied to your GDrive."]},{"cell_type":"code","metadata":{"id":"H7iqy5XcWeHN"},"source":["class AIcrowdConfig:\n"," DATASET_DIR = \"data\"\n"," TEST_DATA_PATH = os.path.join(DATASET_DIR, \"test.csv\")\n"," TRAIN_DATA_PATH = os.path.join(DATASET_DIR, \"train.csv\")\n"," ASSETS_DIR = \"assets\"\n"," API_KEY = \"\" # Get your key from https://www.aicrowd.com/participants/me (ctrl + click the link)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"l8iIbBlCf8sf"},"source":["# Download training data 📲\n","AIcrowd magic functions will download the dataset after authenticating your API key."]},{"cell_type":"code","metadata":{"id":"Mfd1YA-6W3-F","colab":{"base_uri":"https://localhost:8080/","height":149,"referenced_widgets":["5f2953a72f2042d49184ead9117cbf73","461f11d60378466e8bb4aa35661c5bc7","c1d26b5ee62c4f6d88f6f23017a466b0","75027d1d70f6453196ff4c5515026b1b","6aba6c7f2f0a4793b75728e00429313e","1455993f735749dbb12e6b37feb3a0be","629fcbd9e79e4e3db097aaab74d8ce49","3c97da95822749ffaaf4fc6f366e95de","6c5e759b25764fa28fb67cf9890bb082","25b640cb177f43549f579a61ca41932f","6d6fceaec873483d8e5056be146f0039","3f98b0437e6d4ab4a79cc3c7a56d6e0e","3c17cab91bca448a8f4eed615b45ccc1","dfb5b47e326e498883f2dec217711489","37d2e5f6b813417fa67fb79754516a70","e78e88f074204ed7b3e96e74e23fb48d"]},"executionInfo":{"status":"ok","timestamp":1614266970915,"user_tz":-330,"elapsed":7000,"user":{"displayName":"Dipam Chakraborty","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjYbzjRrx303amHrTPU3lUOY9OMpxKTrTzhtYd=s64","userId":"04496869744334527762"}},"outputId":"b15248db-84cf-42b4-d4d9-98d4d487db44"},"source":["%aicrowd login --api-key \"$AIcrowdConfig.API_KEY\"\n","%aicrowd dataset download -c dlnlp-note"],"execution_count":null,"outputs":[{"output_type":"stream","text":["\u001b[32mAPI Key valid\u001b[0m\n","\u001b[32mSaved API Key successfully!\u001b[0m\n"],"name":"stdout"},{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"5f2953a72f2042d49184ead9117cbf73","version_minor":0,"version_major":2},"text/plain":["HBox(children=(FloatProgress(value=0.0, description='test.csv', max=929146.0, style=ProgressStyle(description_…"]},"metadata":{"tags":[]}},{"output_type":"stream","text":["\n"],"name":"stdout"},{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"6c5e759b25764fa28fb67cf9890bb082","version_minor":0,"version_major":2},"text/plain":["HBox(children=(FloatProgress(value=0.0, description='train.csv', max=5140653.0, style=ProgressStyle(descriptio…"]},"metadata":{"tags":[]}},{"output_type":"stream","text":["\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"LDFjY8lrk6-5"},"source":["Extract the downloaded dataset to `data` directory"]},{"cell_type":"code","metadata":{"id":"g7qa_wvIaFkT"},"source":["!mkdir $AIcrowdConfig.DATASET_DIR\n","!mv train.csv $AIcrowdConfig.DATASET_DIR\n","!mv test.csv $AIcrowdConfig.DATASET_DIR"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"ukXy2a0Qlbq2"},"source":["# Load training data 💻"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":204},"id":"MSn0e6bQZ-QV","executionInfo":{"status":"ok","timestamp":1614266979225,"user_tz":-330,"elapsed":1265,"user":{"displayName":"Dipam Chakraborty","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjYbzjRrx303amHrTPU3lUOY9OMpxKTrTzhtYd=s64","userId":"04496869744334527762"}},"outputId":"16cec0e4-da2f-4bff-9de3-a5f81abe276b"},"source":["train_data = pd.read_csv(AIcrowdConfig.TRAIN_DATA_PATH)\n","train_data.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>Unnamed: 0</th>\n"," <th>reviews</th>\n"," <th>ratings</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>0</td>\n"," <td>This book was very informative, covering all a...</td>\n"," <td>4</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>1</td>\n"," <td>I am already a baseball fan and knew a bit abo...</td>\n"," <td>5</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>2</td>\n"," <td>I didn't like this product it smudged all unde...</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>3</td>\n"," <td>I simply love the product. I appreciate print ...</td>\n"," <td>5</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>4</td>\n"," <td>It goes on very easily and makes my eyes look ...</td>\n"," <td>5</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" Unnamed: 0 reviews ratings\n","0 0 This book was very informative, covering all a... 4\n","1 1 I am already a baseball fan and knew a bit abo... 5\n","2 2 I didn't like this product it smudged all unde... 1\n","3 3 I simply love the product. I appreciate print ... 5\n","4 4 It goes on very easily and makes my eyes look ... 5"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"markdown","metadata":{"id":"RIRuYtFklgp8"},"source":["## Preprocess the training data 🧹"]},{"cell_type":"code","metadata":{"id":"wAXwyPnhamHc"},"source":["'''\n","About the task:\n","\n","You are provided with a codeflow- which consists of functions to be implemented(MANDATORY).\n","\n","You need to implement each of the functions mentioned below, you may add your own function parameters if needed.\n","'''\n","\n","\n","def encode_data(text):\n"," # This function will be used to encode the reviews using a dictionary(created using corpus vocabulary) \n"," \n"," # Example of encoding :\"The food was fabulous but pricey\" has a vocabulary of 4 words, each one has to be mapped to an integer like: \n"," # {'The':1,'food':2,'was':3 'fabulous':4 'but':5 'pricey':6} this vocabulary has to be created for the entire corpus and then be used to \n"," # encode the words into integers \n","\n"," # return encoded examples\n"," pass\n","\n","\n","\n","def convert_to_lower(text):\n"," # return the reviews after convering then to lowercase\n"," pass\n","\n","\n","def remove_punctuation(text):\n"," # return the reviews after removing punctuations\n"," pass\n","\n","\n","def remove_stopwords(text):\n"," # return the reviews after removing the stopwords\n"," pass\n","\n","def perform_tokenization(text):\n"," # return the reviews after performing tokenization\n"," pass\n","\n","\n","def perform_padding(data):\n"," # return the reviews after padding the reviews to maximum length\n"," pass\n","\n","def preprocess_data(data):\n"," # make all the following function calls on your data\n","\n"," review = data[\"reviews\"]\n"," review = convert_to_lower(review)\n"," review = remove_punctuation(review)\n"," review = remove_stopwords(review)\n"," review = perform_tokenization(review)\n"," review = encode_data(review)\n"," processed_data = perform_padding(review)\n","\n"," # return processed_data # Uncomment this\n"," # Remove this dummy code at the bottom\n"," return np.zeros( (len(data[\"reviews\"]), 100) ) \n"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"H2MtfZ6cfF6i"},"source":["# Define your Softmax function\n","\n","You have to write your own implementation from scratch and return softmax values(using predefined softmax is prohibited)"]},{"cell_type":"code","metadata":{"id":"XDLPaP6DfKjy"},"source":["def softmax_activation(x):\n"," # write your implementation here\n"," pass"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"US2eZwsMgbnu"},"source":["#Training Time ⏰"]},{"cell_type":"markdown","metadata":{"id":"ES_qPFSklr9v"},"source":["## Define your model\n","You should define your medal related methods here using the given template"]},{"cell_type":"code","metadata":{"id":"9GlHfaCe0c5W"},"source":["# Example with tensorflow, but you can replace with pytorch\n","# For better code add all imports to the top cell marked for imports\n","import tensorflow\n","from tensorflow import keras"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"XaZpngdTVNuG"},"source":["class NeuralNet:\n","\n"," def __init__(self, reviews, ratings):\n","\n"," self.reviews = reviews\n"," self.ratings = ratings\n","\n","\n"," def build_nn(self):\n"," #add the input and output layer here; you can use either tensorflow or pytorch\n"," model = keras.models.Sequential()\n"," model.add(keras.layers.Input((100,)))\n"," model.add(keras.layers.Dense(np.max(self.ratings)+1, activation='softmax') )\n","\n"," ####### Use the softmax activation that you wrote code for above #####\n"," \n"," model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')\n","\n"," self.model = model\n","\n"," def train_nn(self,batch_size,epochs):\n"," # write the training loop here; you can use either tensorflow or pytorch\n"," # print validation accuracy\n"," self.model.fit(x=self.reviews, y=self.ratings, epochs=3)\n","\n"," def predict(self, reviews):\n"," # return a list containing all the ratings predicted by the trained model\n","\n"," self.model.predict(reviews)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"gYZrv0q-mJlr"},"source":["\n","## Initialize & Train your model"]},{"cell_type":"code","metadata":{"id":"IhJXjbj_Z4v1","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614268774053,"user_tz":-330,"elapsed":5577,"user":{"displayName":"Dipam Chakraborty","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjYbzjRrx303amHrTPU3lUOY9OMpxKTrTzhtYd=s64","userId":"04496869744334527762"}},"outputId":"1a33ad74-7a28-45af-b10f-d6d2d635ed52"},"source":["batch_size, epochs= 1000, 3\n"," \n","train_reviews=preprocess_data(train_data)\n","train_ratings=train_data['ratings'].values - 1\n","\n","model=NeuralNet(train_reviews,train_ratings)\n","model.build_nn()\n","model.train_nn(batch_size,epochs)"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Epoch 1/3\n","1563/1563 [==============================] - 2s 898us/step - loss: 1.3788\n","Epoch 2/3\n","1563/1563 [==============================] - 1s 886us/step - loss: 1.0885\n","Epoch 3/3\n","1563/1563 [==============================] - 1s 897us/step - loss: 1.0759\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"4L5sKWOMmciy"},"source":["## Save your trained model\n","Save your model for later. Your model will loaded from here for predictions. Replace \\[example\\] below accordingly"]},{"cell_type":"code","metadata":{"id":"CrEnW27Vzup0"},"source":["if not os.path.isdir(AIcrowdConfig.ASSETS_DIR):\n"," os.mkdir(AIcrowdConfig.ASSETS_DIR)\n","# This is the example for a keras model, save your model according to your framework\n","model.model.save(os.path.join(AIcrowdConfig.ASSETS_DIR, \"dummy_model.h5\"))"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"B1YQfCZoCrDS"},"source":["# Copy trained model to GDrive\n","\n","These assets can be loaded again in another notebook by running `%aicrowd submission load-assets`"]},{"cell_type":"code","metadata":{"id":"9POzV8uBN_Lq","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614269054044,"user_tz":-330,"elapsed":20948,"user":{"displayName":"Dipam Chakraborty","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjYbzjRrx303amHrTPU3lUOY9OMpxKTrTzhtYd=s64","userId":"04496869744334527762"}},"outputId":"7dd5a9f4-a1fe-4a5c-82da-1c832f014f16"},"source":["%aicrowd submission save-assets -c dlnlp-note"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Mounting Google Drive 💾\n","Google drive is needed to store your assets\n","Mounted at /content/drive\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"Tc_VkaIuDhJ5"},"source":["# Submitting to AIcrowd\n","\n","You need to use the submission notebook that will load the trained model and predict the outputs for given inputs.\n","\n","You can use this as a starting point for your submission notebook.\n","\n","Inference notebook [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1qesKuphCpa6dKsx8_AXsw2z7X22NWum5?usp=sharing)\n"]}]}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment