Skip to content

Instantly share code, notes, and snippets.

@sujnesh
Created February 25, 2021 13:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sujnesh/0c4a5d73d6ff65c0c119a6fcbd2ec337 to your computer and use it in GitHub Desktop.
Save sujnesh/0c4a5d73d6ff65c0c119a6fcbd2ec337 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Starter Notebook.ipynb","provenance":[{"file_id":"1qesKuphCpa6dKsx8_AXsw2z7X22NWum5","timestamp":1614067747166},{"file_id":"1IkS10nv6TZlyEye7XP5mOH5FN9ZOSMzp","timestamp":1613420987089}],"collapsed_sections":[],"toc_visible":true},"kernelspec":{"name":"python3","display_name":"Python 3"},"widgets":{"application/vnd.jupyter.widget-state+json":{"9cc566d795fa45cfbe34fada53c020e9":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_c9aa934302a64d1a9c5045f84a559f2d","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_77a4a982dc1e4d1eb1b52d2e56e96756","IPY_MODEL_8e96d45bb1134e5ebcf2d593ec7633fe"]}},"c9aa934302a64d1a9c5045f84a559f2d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"77a4a982dc1e4d1eb1b52d2e56e96756":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_view_name":"ProgressView","style":"IPY_MODEL_5aeb6a1ccaf2489aa8eb89322b13e118","_dom_classes":[],"description":"test.csv: 100%","_model_name":"FloatProgressModel","bar_style":"success","max":929146,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":929146,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_8f4d7be07b9a41fcb59cc7e29aa55df1"}},"8e96d45bb1134e5ebcf2d593ec7633fe":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_view_name":"HTMLView","style":"IPY_MODEL_c3e519e0f9814bbf9380c888c0a156ef","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 929k/929k [03:01&lt;00:00, 5.11kB/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_8bde543e9d104eff880acd3c642403b6"}},"5aeb6a1ccaf2489aa8eb89322b13e118":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"initial","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"8f4d7be07b9a41fcb59cc7e29aa55df1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"c3e519e0f9814bbf9380c888c0a156ef":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"8bde543e9d104eff880acd3c642403b6":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"606fcbee87724f99a2fdff54aa3902ef":{"model_module":"@jupyter-widgets/output","model_name":"OutputModel","state":{"_view_name":"OutputView","msg_id":"","_dom_classes":[],"_model_name":"OutputModel","outputs":[{"output_type":"display_data","metadata":{"tags":[]},"text/html":"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"color: #000080; text-decoration-color: #000080; font-weight: bold\">submission.zip</span> <span style=\"color: #729c1f; text-decoration-color: #729c1f\">━━━━━━━━━━━━━━━━━━━━</span> <span style=\"color: #800080; text-decoration-color: #800080\">100.0%</span> • <span style=\"color: #008000; text-decoration-color: #008000\">68.8/67.2 KB</span> • <span style=\"color: #800000; text-decoration-color: #800000\">305.2 kB/s</span> • <span style=\"color: #008080; text-decoration-color: #008080\">0:00:00</span>\n</pre>\n","text/plain":"<rich.jupyter.JupyterRenderable at 0x7fc319e51bd0>"}],"_view_module":"@jupyter-widgets/output","_model_module_version":"1.0.0","_view_count":null,"_view_module_version":"1.0.0","layout":"IPY_MODEL_ebe5085b9bf04ab58b429939d3e22649","_model_module":"@jupyter-widgets/output"}},"ebe5085b9bf04ab58b429939d3e22649":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}}}}},"cells":[{"cell_type":"markdown","metadata":{"id":"EIGlTKlzGHgQ"},"source":["<div style=\"text-align: center\">\n"," <img alt=\"AIcrowd\" src=\"https://gitlab.aicrowd.com/S.Rathi/iit-b-notebook-misc/-/raw/S.Rathi-master-patch-59012/creative_updated%20on%208.2.21_1%20_desktopbanner.jpg\">\n","</div>"]},{"cell_type":"markdown","metadata":{"id":"Rk3nRu08nIsn"},"source":["# How to use this notebook? 📝\n","1. **Copy the notebook**. This is a shared template and any edits you make here will not be saved. _You should copy it into your own drive folder._ For this, click the \"File\" menu (top-left), then \"Save a Copy in Drive\". You can edit your copy however you like.\n","2. **Link it to your AICrowd account**. In order to submit your code to AICrowd, you need to provide your account's API key.\n","3. **Modify** the predefined functions to train & predict.\n","4. **Make a submission**. Run all the code in the notebook to get a feel of how the notebook and the submission process works. Finally use this notebook to make your submission.\n"]},{"cell_type":"markdown","metadata":{"id":"iIw4tX5SdMVn"},"source":["# Install AIcrowd Utilities\n","\n","We will install `aicrowd-cli` that can help us manage some trivial tasks and make our lives easier."]},{"cell_type":"code","metadata":{"id":"qjHTtoBG5aAh","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614202853072,"user_tz":-330,"elapsed":9077,"user":{"displayName":"Sudarsh Rathi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gj3qELh8V4mCIujE5HqHQHHGpz-1_qYSdp4VG-i=s64","userId":"04293397634187217051"}},"outputId":"e40e86db-0a63-434b-ca74-3154ce55aef2"},"source":["!pip install -U git+https://gitlab.aicrowd.com/aicrowd/aicrowd-cli.git > /dev/null"],"execution_count":1,"outputs":[{"output_type":"stream","text":[" Running command git clone -q https://gitlab.aicrowd.com/aicrowd/aicrowd-cli.git /tmp/pip-req-build-j_xg9bap\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"UZ5AVIxWna9B"},"source":["Load AIcrowd magic commands"]},{"cell_type":"code","metadata":{"id":"5pGvuw2dnack","executionInfo":{"status":"ok","timestamp":1614202855773,"user_tz":-330,"elapsed":1312,"user":{"displayName":"Sudarsh Rathi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gj3qELh8V4mCIujE5HqHQHHGpz-1_qYSdp4VG-i=s64","userId":"04293397634187217051"}}},"source":["%load_ext aicrowd.magic"],"execution_count":2,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"wmC1uk_ZniGi"},"source":["# Import necessary modules and packages"]},{"cell_type":"code","metadata":{"id":"eYgfWwhJnioq","executionInfo":{"status":"ok","timestamp":1614202862314,"user_tz":-330,"elapsed":2508,"user":{"displayName":"Sudarsh Rathi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gj3qELh8V4mCIujE5HqHQHHGpz-1_qYSdp4VG-i=s64","userId":"04293397634187217051"}}},"source":["#Add your necessary modules & packages here"],"execution_count":3,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"0rUE152lf4wv"},"source":["# AIcrowd Runtime Configuration ⚙️\n","\n","Define configuration parameters. Please include any files needed for the notebook to run under `ASSETS_DIR`. We will copy the contents of this directory to your final submission file 🙂"]},{"cell_type":"code","metadata":{"id":"H7iqy5XcWeHN","executionInfo":{"status":"ok","timestamp":1614202865848,"user_tz":-330,"elapsed":931,"user":{"displayName":"Sudarsh Rathi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gj3qELh8V4mCIujE5HqHQHHGpz-1_qYSdp4VG-i=s64","userId":"04293397634187217051"}}},"source":["class AIcrowdConfig:\n"," DATASET_DIR = \"data\"\n"," TEST_DATA_PATH = os.path.join(DATASET_DIR, \"test.csv\")\n"," TRAIN_DATA_PATH = os.path.join(DATASET_DIR, \"train.csv\")\n"," ASSETS_DIR = \"assets\"\n"," API_KEY = \"\" # Get your key from https://www.aicrowd.com/participants/me (ctrl + click the link)"],"execution_count":4,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"l8iIbBlCf8sf"},"source":["\n","# Download the data 📲"]},{"cell_type":"code","metadata":{"id":"Mfd1YA-6W3-F","colab":{"base_uri":"https://localhost:8080/","height":100,"referenced_widgets":["9cc566d795fa45cfbe34fada53c020e9","c9aa934302a64d1a9c5045f84a559f2d","77a4a982dc1e4d1eb1b52d2e56e96756","8e96d45bb1134e5ebcf2d593ec7633fe","5aeb6a1ccaf2489aa8eb89322b13e118","8f4d7be07b9a41fcb59cc7e29aa55df1","c3e519e0f9814bbf9380c888c0a156ef","8bde543e9d104eff880acd3c642403b6"]},"executionInfo":{"status":"ok","timestamp":1614202874570,"user_tz":-330,"elapsed":3874,"user":{"displayName":"Sudarsh Rathi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gj3qELh8V4mCIujE5HqHQHHGpz-1_qYSdp4VG-i=s64","userId":"04293397634187217051"}},"outputId":"ab65b771-1482-4aed-ed5f-138ea351e171"},"source":["%aicrowd login --api-key \"$AIcrowdConfig.API_KEY\"\n","%aicrowd dataset download -c dlnlp-note"],"execution_count":6,"outputs":[{"output_type":"stream","text":["\u001b[32mAPI Key valid\u001b[0m\n","\u001b[32mSaved API Key successfully!\u001b[0m\n"],"name":"stdout"},{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"9cc566d795fa45cfbe34fada53c020e9","version_minor":0,"version_major":2},"text/plain":["HBox(children=(FloatProgress(value=0.0, description='test.csv', max=929146.0, style=ProgressStyle(description_…"]},"metadata":{"tags":[]}},{"output_type":"stream","text":["\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"NMwHAN22ENj0"},"source":["Extract the downloaded dataset to `data` directory\n","---\n","\n"]},{"cell_type":"code","metadata":{"id":"5oBrrQu13ZIJ"},"source":["!mkdir data\n","!mv train.csv $AIcrowdConfig.DATASET_DIR\n","!mv test.csv $AIcrowdConfig.DATASET_DIR"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"CEy92x00PaaJ"},"source":["# Tinker within Pre-defined functions 💻"]},{"cell_type":"code","metadata":{"id":"BKddC8LBParb","executionInfo":{"status":"ok","timestamp":1614203051052,"user_tz":-330,"elapsed":925,"user":{"displayName":"Sudarsh Rathi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gj3qELh8V4mCIujE5HqHQHHGpz-1_qYSdp4VG-i=s64","userId":"04293397634187217051"}}},"source":["# ADD THE LIBRARIES YOU'LL NEED\n","\n","'''\n","About the task:\n","\n","You are provided with a codeflow- which consists of functions to be implemented(MANDATORY).\n","\n","You need to implement each of the functions mentioned below, you may add your own function parameters if needed(not to main).\n","'''\n","\n","\n","def encode_data(text):\n"," # This function will be used to encode the reviews using a dictionary(created using corpus vocabulary) \n"," \n"," # Example of encoding :\"The food was fabulous but pricey\" has a vocabulary of 4 words, each one has to be mapped to an integer like: \n"," # {'The':1,'food':2,'was':3 'fabulous':4 'but':5 'pricey':6} this vocabulary has to be created for the entire corpus and then be used to \n"," # encode the words into integers \n","\n"," # return encoded examples\n","\n","\n","\n","def convert_to_lower(text):\n"," # return the reviews after convering then to lowercase\n","\n","\n","def remove_punctuation(text):\n"," # return the reviews after removing punctuations\n","\n","\n","def remove_stopwords(text):\n"," # return the reviews after removing the stopwords\n","\n","def perform_tokenization(text):\n"," # return the reviews after performing tokenization\n","\n","\n","def perform_padding(data):\n"," # return the reviews after padding the reviews to maximum length\n","\n","def preprocess_data(data):\n"," # make all the following function calls on your data\n"," # EXAMPLE:->\n"," '''\n"," review = data[\"reviews\"]\n"," review = convert_to_lower(review)\n"," review = remove_punctuation(review)\n"," review = remove_stopwords(review)\n"," review = perform_tokenization(review)\n"," review = encode_data(review)\n"," review = perform_padding(review)\n"," '''\n"," # return processed data\n","\n","\n","\n","def softmax_activation(x):\n"," # write your own implementation from scratch and return softmax values(using predefined softmax is prohibited)\n","\n","\n","\n","class NeuralNet:\n","\n"," def __init__(self, reviews, ratings):\n","\n"," self.reviews = reviews\n"," self.ratings = ratings\n","\n","\n","\n"," def build_nn(self):\n"," #add the input and output layer here; you can use either tensorflow or pytorch\n","\n"," def train_nn(self,batch_size,epochs):\n"," # write the training loop here; you can use either tensorflow or pytorch\n"," # print validation accuracy\n","\n"," def predict(self, reviews):\n"," # return a list containing all the ratings predicted by the trained model\n","\n","\n","\n","# DO NOT MODIFY MAIN FUNCTION'S PARAMETERS\n","def main(train_file, test_file):\n"," \n"," batch_size,epochs=\n"," \n"," train_reviews=preprocess_data(train_data)\n"," test_reviews=preprocess_data(test_data)\n","\n"," model=NeuralNet(train_reviews,train_ratings)\n"," model.build_nn()\n"," model.train_nn(batch_size,epochs)\n","\n"," return model.predict(test_reviews)"],"execution_count":8,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"4Bo9Fmr1p-hj"},"source":["## Prediction time ⏰"]},{"cell_type":"code","metadata":{"id":"XniRSW0fYHGq"},"source":[" #Generate Predictions here using above functions"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"ndJ64fKkymTj"},"source":["# Submit to AIcrowd 🚀"]},{"cell_type":"code","metadata":{"id":"AtxH12yzZBtd","colab":{"base_uri":"https://localhost:8080/","height":305,"referenced_widgets":["606fcbee87724f99a2fdff54aa3902ef","ebe5085b9bf04ab58b429939d3e22649"]},"executionInfo":{"status":"ok","timestamp":1614203168411,"user_tz":-330,"elapsed":3804,"user":{"displayName":"Sudarsh Rathi","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gj3qELh8V4mCIujE5HqHQHHGpz-1_qYSdp4VG-i=s64","userId":"04293397634187217051"}},"outputId":"d56a7cdd-c427-4c5a-ad8e-cea62e794ed0"},"source":["%aicrowd submission create --jupyter -c dlnlp-note"],"execution_count":12,"outputs":[{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"606fcbee87724f99a2fdff54aa3902ef","version_minor":0,"version_major":2},"text/plain":["Output()"]},"metadata":{"tags":[]}},{"output_type":"stream","text":[" ╭─────────────────────────╮ \n"," │ Successfully submitted! │ \n"," ╰─────────────────────────╯ \n"," Important links \n","┌──────────────────┬───────────────────────────────────────────────────────────────────────────────┐\n","│ This submission │ https://www.aicrowd.com/challenges/dlnlp-note/submissions/123163 │\n","│ │ │\n","│ All submissions │ https://www.aicrowd.com/challenges/dlnlp-note/submissions?my_submissions=true │\n","│ │ │\n","│ Leaderboard │ https://www.aicrowd.com/challenges/dlnlp-note/leaderboards │\n","│ │ │\n","│ Discussion forum │ https://discourse.aicrowd.com/c/dlnlp-note │\n","│ │ │\n","│ Challenge page │ https://www.aicrowd.com/challenges/dlnlp-note │\n","└──────────────────┴───────────────────────────────────────────────────────────────────────────────┘\n","{'submission_id': 123163, 'created_at': '2021-02-24T21:46:07.935Z'}\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"CrEnW27Vzup0"},"source":[""],"execution_count":null,"outputs":[]}]}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment