Created
February 25, 2021 13:54
-
-
Save sujnesh/0c4a5d73d6ff65c0c119a6fcbd2ec337 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Starter Notebook.ipynb","provenance":[{"file_id":"1qesKuphCpa6dKsx8_AXsw2z7X22NWum5","timestamp":1614067747166},{"file_id":"1IkS10nv6TZlyEye7XP5mOH5FN9ZOSMzp","timestamp":1613420987089}],"collapsed_sections":[],"toc_visible":true},"kernelspec":{"name":"python3","display_name":"Python 3"},"widgets":{"application/vnd.jupyter.widget-state+json":{"9cc566d795fa45cfbe34fada53c020e9":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_c9aa934302a64d1a9c5045f84a559f2d","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_77a4a982dc1e4d1eb1b52d2e56e96756","IPY_MODEL_8e96d45bb1134e5ebcf2d593ec7633fe"]}},"c9aa934302a64d1a9c5045f84a559f2d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"77a4a982dc1e4d1eb1b52d2e56e96756":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_view_name":"ProgressView","style":"IPY_MODEL_5aeb6a1ccaf2489aa8eb89322b13e118","_dom_classes":[],"description":"test.csv: 100%","_model_name":"FloatProgressModel","bar_style":"success","max":929146,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":929146,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_8f4d7be07b9a41fcb59cc7e29aa55df1"}},"8e96d45bb1134e5ebcf2d593ec7633fe":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_view_name":"HTMLView","style":"IPY_MODEL_c3e519e0f9814bbf9380c888c0a156ef","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 929k/929k [03:01<00:00, 5.11kB/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_8bde543e9d104eff880acd3c642403b6"}},"5aeb6a1ccaf2489aa8eb89322b13e118":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"initial","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"8f4d7be07b9a41fcb59cc7e29aa55df1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"c3e519e0f9814bbf9380c888c0a156ef":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"8bde543e9d104eff880acd3c642403b6":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"606fcbee87724f99a2fdff54aa3902ef":{"model_module":"@jupyter-widgets/output","model_name":"OutputModel","state":{"_view_name":"OutputView","msg_id":"","_dom_classes":[],"_model_name":"OutputModel","outputs":[{"output_type":"display_data","metadata":{"tags":[]},"text/html":"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #000080; text-decoration-color: #000080; font-weight: bold\">submission.zip</span> <span style=\"color: #729c1f; text-decoration-color: #729c1f\">━━━━━━━━━━━━━━━━━━━━</span> <span style=\"color: #800080; text-decoration-color: #800080\">100.0%</span> • <span style=\"color: #008000; text-decoration-color: #008000\">68.8/67.2 KB</span> • <span style=\"color: #800000; text-decoration-color: #800000\">305.2 kB/s</span> • <span style=\"color: #008080; text-decoration-color: #008080\">0:00:00</span>\n</pre>\n","text/plain":"<rich.jupyter.JupyterRenderable at 0x7fc319e51bd0>"}],"_view_module":"@jupyter-widgets/output","_model_module_version":"1.0.0","_view_count":null,"_view_module_version":"1.0.0","layout":"IPY_MODEL_ebe5085b9bf04ab58b429939d3e22649","_model_module":"@jupyter-widgets/output"}},"ebe5085b9bf04ab58b429939d3e22649":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}}}}},"cells":[{"cell_type":"markdown","metadata":{"id":"EIGlTKlzGHgQ"},"source":["<div style=\"text-align: center\">\n"," <img alt=\"AIcrowd\" src=\"https://gitlab.aicrowd.com/S.Rathi/iit-b-notebook-misc/-/raw/S.Rathi-master-patch-59012/creative_updated%20on%208.2.21_1%20_desktopbanner.jpg\">\n","</div>"]},{"cell_type":"markdown","metadata":{"id":"Rk3nRu08nIsn"},"source":["# How to use this notebook? 📝\n","1. **Copy the notebook**. This is a shared template and any edits you make here will not be saved. _You should copy it into your own drive folder._ For this, click the \"File\" menu (top-left), then \"Save a Copy in Drive\". You can edit your copy however you like.\n","2. **Link it to your AICrowd account**. In order to submit your code to AICrowd, you need to provide your account's API key.\n","3. **Modify** the predefined functions to train & predict.\n","4. **Make a submission**. Run all the code in the notebook to get a feel of how the notebook and the submission process works. Finally use this notebook to make your submission.\n"]},{"cell_type":"markdown","metadata":{"id":"iIw4tX5SdMVn"},"source":["# Install AIcrowd Utilities\n","\n","We will install `aicrowd-cli` that can help us manage some trivial tasks and make our lives easier."]},{"cell_type":"code","metadata":{"id":"qjHTtoBG5aAh","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614202853072,"user_tz":-330,"elapsed":9077,"user":{"displayName":"Sudarsh Rathi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gj3qELh8V4mCIujE5HqHQHHGpz-1_qYSdp4VG-i=s64","userId":"04293397634187217051"}},"outputId":"e40e86db-0a63-434b-ca74-3154ce55aef2"},"source":["!pip install -U git+https://gitlab.aicrowd.com/aicrowd/aicrowd-cli.git > /dev/null"],"execution_count":1,"outputs":[{"output_type":"stream","text":[" Running command git clone -q https://gitlab.aicrowd.com/aicrowd/aicrowd-cli.git /tmp/pip-req-build-j_xg9bap\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"UZ5AVIxWna9B"},"source":["Load AIcrowd magic commands"]},{"cell_type":"code","metadata":{"id":"5pGvuw2dnack","executionInfo":{"status":"ok","timestamp":1614202855773,"user_tz":-330,"elapsed":1312,"user":{"displayName":"Sudarsh Rathi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gj3qELh8V4mCIujE5HqHQHHGpz-1_qYSdp4VG-i=s64","userId":"04293397634187217051"}}},"source":["%load_ext aicrowd.magic"],"execution_count":2,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"wmC1uk_ZniGi"},"source":["# Import necessary modules and packages"]},{"cell_type":"code","metadata":{"id":"eYgfWwhJnioq","executionInfo":{"status":"ok","timestamp":1614202862314,"user_tz":-330,"elapsed":2508,"user":{"displayName":"Sudarsh Rathi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gj3qELh8V4mCIujE5HqHQHHGpz-1_qYSdp4VG-i=s64","userId":"04293397634187217051"}}},"source":["#Add your necessary modules & packages here"],"execution_count":3,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"0rUE152lf4wv"},"source":["# AIcrowd Runtime Configuration ⚙️\n","\n","Define configuration parameters. Please include any files needed for the notebook to run under `ASSETS_DIR`. We will copy the contents of this directory to your final submission file 🙂"]},{"cell_type":"code","metadata":{"id":"H7iqy5XcWeHN","executionInfo":{"status":"ok","timestamp":1614202865848,"user_tz":-330,"elapsed":931,"user":{"displayName":"Sudarsh Rathi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gj3qELh8V4mCIujE5HqHQHHGpz-1_qYSdp4VG-i=s64","userId":"04293397634187217051"}}},"source":["class AIcrowdConfig:\n"," DATASET_DIR = \"data\"\n"," TEST_DATA_PATH = os.path.join(DATASET_DIR, \"test.csv\")\n"," TRAIN_DATA_PATH = os.path.join(DATASET_DIR, \"train.csv\")\n"," ASSETS_DIR = \"assets\"\n"," API_KEY = \"\" # Get your key from https://www.aicrowd.com/participants/me (ctrl + click the link)"],"execution_count":4,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"l8iIbBlCf8sf"},"source":["\n","# Download the data 📲"]},{"cell_type":"code","metadata":{"id":"Mfd1YA-6W3-F","colab":{"base_uri":"https://localhost:8080/","height":100,"referenced_widgets":["9cc566d795fa45cfbe34fada53c020e9","c9aa934302a64d1a9c5045f84a559f2d","77a4a982dc1e4d1eb1b52d2e56e96756","8e96d45bb1134e5ebcf2d593ec7633fe","5aeb6a1ccaf2489aa8eb89322b13e118","8f4d7be07b9a41fcb59cc7e29aa55df1","c3e519e0f9814bbf9380c888c0a156ef","8bde543e9d104eff880acd3c642403b6"]},"executionInfo":{"status":"ok","timestamp":1614202874570,"user_tz":-330,"elapsed":3874,"user":{"displayName":"Sudarsh Rathi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gj3qELh8V4mCIujE5HqHQHHGpz-1_qYSdp4VG-i=s64","userId":"04293397634187217051"}},"outputId":"ab65b771-1482-4aed-ed5f-138ea351e171"},"source":["%aicrowd login --api-key \"$AIcrowdConfig.API_KEY\"\n","%aicrowd dataset download -c dlnlp-note"],"execution_count":6,"outputs":[{"output_type":"stream","text":["\u001b[32mAPI Key valid\u001b[0m\n","\u001b[32mSaved API Key successfully!\u001b[0m\n"],"name":"stdout"},{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"9cc566d795fa45cfbe34fada53c020e9","version_minor":0,"version_major":2},"text/plain":["HBox(children=(FloatProgress(value=0.0, description='test.csv', max=929146.0, style=ProgressStyle(description_…"]},"metadata":{"tags":[]}},{"output_type":"stream","text":["\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"NMwHAN22ENj0"},"source":["Extract the downloaded dataset to `data` directory\n","---\n","\n"]},{"cell_type":"code","metadata":{"id":"5oBrrQu13ZIJ"},"source":["!mkdir data\n","!mv train.csv $AIcrowdConfig.DATASET_DIR\n","!mv test.csv $AIcrowdConfig.DATASET_DIR"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"CEy92x00PaaJ"},"source":["# Tinker within Pre-defined functions 💻"]},{"cell_type":"code","metadata":{"id":"BKddC8LBParb","executionInfo":{"status":"ok","timestamp":1614203051052,"user_tz":-330,"elapsed":925,"user":{"displayName":"Sudarsh Rathi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gj3qELh8V4mCIujE5HqHQHHGpz-1_qYSdp4VG-i=s64","userId":"04293397634187217051"}}},"source":["# ADD THE LIBRARIES YOU'LL NEED\n","\n","'''\n","About the task:\n","\n","You are provided with a codeflow- which consists of functions to be implemented(MANDATORY).\n","\n","You need to implement each of the functions mentioned below, you may add your own function parameters if needed(not to main).\n","'''\n","\n","\n","def encode_data(text):\n"," # This function will be used to encode the reviews using a dictionary(created using corpus vocabulary) \n"," \n"," # Example of encoding :\"The food was fabulous but pricey\" has a vocabulary of 4 words, each one has to be mapped to an integer like: \n"," # {'The':1,'food':2,'was':3 'fabulous':4 'but':5 'pricey':6} this vocabulary has to be created for the entire corpus and then be used to \n"," # encode the words into integers \n","\n"," # return encoded examples\n","\n","\n","\n","def convert_to_lower(text):\n"," # return the reviews after convering then to lowercase\n","\n","\n","def remove_punctuation(text):\n"," # return the reviews after removing punctuations\n","\n","\n","def remove_stopwords(text):\n"," # return the reviews after removing the stopwords\n","\n","def perform_tokenization(text):\n"," # return the reviews after performing tokenization\n","\n","\n","def perform_padding(data):\n"," # return the reviews after padding the reviews to maximum length\n","\n","def preprocess_data(data):\n"," # make all the following function calls on your data\n"," # EXAMPLE:->\n"," '''\n"," review = data[\"reviews\"]\n"," review = convert_to_lower(review)\n"," review = remove_punctuation(review)\n"," review = remove_stopwords(review)\n"," review = perform_tokenization(review)\n"," review = encode_data(review)\n"," review = perform_padding(review)\n"," '''\n"," # return processed data\n","\n","\n","\n","def softmax_activation(x):\n"," # write your own implementation from scratch and return softmax values(using predefined softmax is prohibited)\n","\n","\n","\n","class NeuralNet:\n","\n"," def __init__(self, reviews, ratings):\n","\n"," self.reviews = reviews\n"," self.ratings = ratings\n","\n","\n","\n"," def build_nn(self):\n"," #add the input and output layer here; you can use either tensorflow or pytorch\n","\n"," def train_nn(self,batch_size,epochs):\n"," # write the training loop here; you can use either tensorflow or pytorch\n"," # print validation accuracy\n","\n"," def predict(self, reviews):\n"," # return a list containing all the ratings predicted by the trained model\n","\n","\n","\n","# DO NOT MODIFY MAIN FUNCTION'S PARAMETERS\n","def main(train_file, test_file):\n"," \n"," batch_size,epochs=\n"," \n"," train_reviews=preprocess_data(train_data)\n"," test_reviews=preprocess_data(test_data)\n","\n"," model=NeuralNet(train_reviews,train_ratings)\n"," model.build_nn()\n"," model.train_nn(batch_size,epochs)\n","\n"," return model.predict(test_reviews)"],"execution_count":8,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"4Bo9Fmr1p-hj"},"source":["## Prediction time ⏰"]},{"cell_type":"code","metadata":{"id":"XniRSW0fYHGq"},"source":[" #Generate Predictions here using above functions"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"ndJ64fKkymTj"},"source":["# Submit to AIcrowd 🚀"]},{"cell_type":"code","metadata":{"id":"AtxH12yzZBtd","colab":{"base_uri":"https://localhost:8080/","height":305,"referenced_widgets":["606fcbee87724f99a2fdff54aa3902ef","ebe5085b9bf04ab58b429939d3e22649"]},"executionInfo":{"status":"ok","timestamp":1614203168411,"user_tz":-330,"elapsed":3804,"user":{"displayName":"Sudarsh Rathi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gj3qELh8V4mCIujE5HqHQHHGpz-1_qYSdp4VG-i=s64","userId":"04293397634187217051"}},"outputId":"d56a7cdd-c427-4c5a-ad8e-cea62e794ed0"},"source":["%aicrowd submission create --jupyter -c dlnlp-note"],"execution_count":12,"outputs":[{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"606fcbee87724f99a2fdff54aa3902ef","version_minor":0,"version_major":2},"text/plain":["Output()"]},"metadata":{"tags":[]}},{"output_type":"stream","text":[" ╭─────────────────────────╮ \n"," │ Successfully submitted! │ \n"," ╰─────────────────────────╯ \n"," Important links \n","┌──────────────────┬───────────────────────────────────────────────────────────────────────────────┐\n","│ This submission │ https://www.aicrowd.com/challenges/dlnlp-note/submissions/123163 │\n","│ │ │\n","│ All submissions │ https://www.aicrowd.com/challenges/dlnlp-note/submissions?my_submissions=true │\n","│ │ │\n","│ Leaderboard │ https://www.aicrowd.com/challenges/dlnlp-note/leaderboards │\n","│ │ │\n","│ Discussion forum │ https://discourse.aicrowd.com/c/dlnlp-note │\n","│ │ │\n","│ Challenge page │ https://www.aicrowd.com/challenges/dlnlp-note │\n","└──────────────────┴───────────────────────────────────────────────────────────────────────────────┘\n","{'submission_id': 123163, 'created_at': '2021-02-24T21:46:07.935Z'}\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"CrEnW27Vzup0"},"source":[""],"execution_count":null,"outputs":[]}]} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment