Skip to content

Instantly share code, notes, and snippets.

@sujnesh
Created February 25, 2021 17:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sujnesh/69237f2bd59b64abd6ae9c0524347239 to your computer and use it in GitHub Desktop.
Save sujnesh/69237f2bd59b64abd6ae9c0524347239 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"DLNLP_1_inference.ipynb","provenance":[{"file_id":"1mNjZLd5eKe9JJ4NVLPSNgGR5UiMsZIDz","timestamp":1614274189832},{"file_id":"1qesKuphCpa6dKsx8_AXsw2z7X22NWum5","timestamp":1614233365227},{"file_id":"1IkS10nv6TZlyEye7XP5mOH5FN9ZOSMzp","timestamp":1613420987089}],"collapsed_sections":[],"toc_visible":true},"kernelspec":{"name":"python3","display_name":"Python 3"},"widgets":{"application/vnd.jupyter.widget-state+json":{"f43ddcf72d51486a807353f51d62c2cb":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_cc35995f78a3489689b3c6c396f49941","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_494b4a1bd2614b909819b953874c4927","IPY_MODEL_338c077dbd2440f8a440f8eb69880cdf"]}},"cc35995f78a3489689b3c6c396f49941":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"494b4a1bd2614b909819b953874c4927":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","state":{"_view_name":"ProgressView","style":"IPY_MODEL_75b7dd331cfc45c3810d9faaae5ee460","_dom_classes":[],"description":"test.csv: 100%","_model_name":"FloatProgressModel","bar_style":"success","max":929146,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":929146,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_eb1778f417bb46538f8a05001206d4fe"}},"338c077dbd2440f8a440f8eb69880cdf":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","state":{"_view_name":"HTMLView","style":"IPY_MODEL_b198a9ece4e049a3a03d1804eaedd55a","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"โ€‹","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 929k/929k [00:33&lt;00:00, 27.9kB/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_c71704350c2242f7b1ed87112d92d493"}},"75b7dd331cfc45c3810d9faaae5ee460":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"initial","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"eb1778f417bb46538f8a05001206d4fe":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"b198a9ece4e049a3a03d1804eaedd55a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"c71704350c2242f7b1ed87112d92d493":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}}}}},"cells":[{"cell_type":"markdown","metadata":{"id":"3-HLhpfJ2tWf"},"source":["<div style=\"text-align: center\">\n"," <a href=\"https://www.aicrowd.com/challenges/dlnlp-note\"><img alt=\"AIcrowd\" src=\"https://gitlab.aicrowd.com/S.Rathi/iit-b-notebook-misc/-/raw/S.Rathi-master-patch-59012/creative_updated%20on%208.2.21_1%20_desktopbanner.jpg\"></a>\n","</div>"]},{"cell_type":"markdown","metadata":{"id":"Rk3nRu08nIsn"},"source":["\n","# How to use this notebook? ๐Ÿ“\n","1. **Copy the notebook**. This is a shared template and any edits you make here will not be saved. _You should copy it into your own drive folder._ For this, click the \"File\" menu (top-left), then \"Save a Copy in Drive\". You can edit your copy however you like.\n","2. **Link it to your AICrowd account**. In order to submit your code to AICrowd, you need to provide your account's API key [here](https://colab.research.google.com/drive/1mNjZLd5eKe9JJ4NVLPSNgGR5UiMsZIDz#scrollTo=H7iqy5XcWeHN&line=4&uniqifier=1).\n","3. **Load** your trained model.\n","4. **Modify** the predefined functions for preprocessing, prediction etc.\n","\n","5. **Make a submission**. You have to run all the code in the notebook & use it to make your submission.\n","\n"]},{"cell_type":"markdown","metadata":{"id":"TiSy9UAsJgHa"},"source":["#Dataset Specifications ๐Ÿ’พ\n","\n","* **train.csv**: has 3 columns with latter two being 'reviews' & corresponding 'ratings'.\n","* **test.csv**: has 2 columns with latter being 'reviews'. You will have to predict the corresponding 'ratings'.\n","* 'ratings' in predictions should be integers in range \\[1,5\\] i.e. {1,2,3,4,5}"]},{"cell_type":"markdown","metadata":{"id":"iIw4tX5SdMVn"},"source":["# Install AIcrowd Utilities ๐Ÿงฐ\n","\n","We will install `aicrowd-cli` that can help us manage some trivial tasks and make our lives easier."]},{"cell_type":"code","metadata":{"id":"qjHTtoBG5aAh","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614269135145,"user_tz":-330,"elapsed":10459,"user":{"displayName":"Dipam Chakraborty","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjYbzjRrx303amHrTPU3lUOY9OMpxKTrTzhtYd=s64","userId":"04496869744334527762"}},"outputId":"0970b5a6-e53c-4948-e899-538325e0bc29"},"source":["!pip install -U git+https://gitlab.aicrowd.com/aicrowd/aicrowd-cli.git > /dev/null"],"execution_count":null,"outputs":[{"output_type":"stream","text":[" Running command git clone -q https://gitlab.aicrowd.com/aicrowd/aicrowd-cli.git /tmp/pip-req-build-_2ogoy30\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"UZ5AVIxWna9B"},"source":["Load AIcrowd magic commands"]},{"cell_type":"code","metadata":{"id":"5pGvuw2dnack"},"source":["%load_ext aicrowd.magic"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"wmC1uk_ZniGi"},"source":["# Import necessary modules and packages ๐Ÿ“š"]},{"cell_type":"code","metadata":{"id":"eYgfWwhJnioq"},"source":["import os\n","import pandas as pd\n","import numpy as np\n","\n","#Add your necessary modules & packages here"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"0rUE152lf4wv"},"source":["# AIcrowd Runtime Configuration โš™๏ธ\n","\n","Define configuration parameters. Please include any files needed for the notebook to run under `ASSETS_DIR`. We will copy the contents of this directory to your final submission file ๐Ÿ™‚"]},{"cell_type":"code","metadata":{"id":"H7iqy5XcWeHN"},"source":["class AIcrowdConfig:\n"," DATASET_PATH = \"test.csv\"\n"," PREDICTIONS_PATH = \"predictions.csv\"\n"," ASSETS_DIR = \"assets\"\n"," API_KEY = \"\" # Get your key from https://www.aicrowd.com/participants/me"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"l8iIbBlCf8sf"},"source":["# Download test data ๐Ÿ“ฒ"]},{"cell_type":"code","metadata":{"id":"Mfd1YA-6W3-F","colab":{"base_uri":"https://localhost:8080/","height":100,"referenced_widgets":["f43ddcf72d51486a807353f51d62c2cb","cc35995f78a3489689b3c6c396f49941","494b4a1bd2614b909819b953874c4927","338c077dbd2440f8a440f8eb69880cdf","75b7dd331cfc45c3810d9faaae5ee460","eb1778f417bb46538f8a05001206d4fe","b198a9ece4e049a3a03d1804eaedd55a","c71704350c2242f7b1ed87112d92d493"]},"executionInfo":{"status":"ok","timestamp":1614269203693,"user_tz":-330,"elapsed":4235,"user":{"displayName":"Dipam Chakraborty","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjYbzjRrx303amHrTPU3lUOY9OMpxKTrTzhtYd=s64","userId":"04496869744334527762"}},"outputId":"1ee3c1c7-ec9f-4cca-f4ff-c91499e06984"},"source":["%aicrowd login --api-key \"$AIcrowdConfig.API_KEY\"\n","%aicrowd dataset download -c dlnlp-note test*"],"execution_count":null,"outputs":[{"output_type":"stream","text":["\u001b[32mAPI Key valid\u001b[0m\n","\u001b[32mSaved API Key successfully!\u001b[0m\n"],"name":"stdout"},{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"f43ddcf72d51486a807353f51d62c2cb","version_minor":0,"version_major":2},"text/plain":["HBox(children=(FloatProgress(value=0.0, description='test.csv', max=929146.0, style=ProgressStyle(description_โ€ฆ"]},"metadata":{"tags":[]}},{"output_type":"stream","text":["\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"CEy92x00PaaJ"},"source":["# Load the saved assets from GDrive"]},{"cell_type":"code","metadata":{"id":"BKddC8LBParb","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614269226485,"user_tz":-330,"elapsed":17729,"user":{"displayName":"Dipam Chakraborty","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjYbzjRrx303amHrTPU3lUOY9OMpxKTrTzhtYd=s64","userId":"04496869744334527762"}},"outputId":"8eaa3bcb-42bc-479c-e9b6-8665ed7fc722"},"source":["%aicrowd submission load-assets -c dlnlp-note"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Mounting Google Drive ๐Ÿ’พ\n","Google drive is needed to store your assets\n","Mounted at /content/drive\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"j35PzDltlrvM"},"source":["#Load test data ๐Ÿ’ป"]},{"cell_type":"code","metadata":{"id":"t-3g5JeBluL8","colab":{"base_uri":"https://localhost:8080/","height":204},"executionInfo":{"status":"ok","timestamp":1614269228434,"user_tz":-330,"elapsed":1202,"user":{"displayName":"Dipam Chakraborty","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjYbzjRrx303amHrTPU3lUOY9OMpxKTrTzhtYd=s64","userId":"04496869744334527762"}},"outputId":"f4fffb10-b593-4205-bb7c-887a5a2d0b8f"},"source":["test_data = pd.read_csv(AIcrowdConfig.DATASET_PATH)\n","test_data.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>Unnamed: 0</th>\n"," <th>reviews</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>0</td>\n"," <td>Doesn't work at ALL. Don't waste your money or...</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>1</td>\n"," <td>What crap. Would need a lot more power to do ...</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>2</td>\n"," <td>Has no suction and didn't work. Not worth trying.</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>3</td>\n"," <td>That is definitely a trash. Unable to clean an...</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>4</td>\n"," <td>Didn't even worked on cleaning the ears at all...</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" Unnamed: 0 reviews\n","0 0 Doesn't work at ALL. Don't waste your money or...\n","1 1 What crap. Would need a lot more power to do ...\n","2 2 Has no suction and didn't work. Not worth trying.\n","3 3 That is definitely a trash. Unable to clean an...\n","4 4 Didn't even worked on cleaning the ears at all..."]},"metadata":{"tags":[]},"execution_count":14}]},{"cell_type":"markdown","metadata":{"id":"_ps7jlE3jmDp"},"source":["## Preprocess the test data ๐Ÿงน"]},{"cell_type":"code","metadata":{"id":"xTdMs_MBjr-a"},"source":["##### Important note - Make sure to add the preprocessing code here correctly\n","\n","\n","'''\n","About the task:\n","\n","You are provided with a codeflow- which consists of functions to be implemented(MANDATORY).\n","\n","You need to implement each of the functions mentioned below, you may add your own function parameters if needed.\n","'''\n","\n","\n","def encode_data(text):\n"," # This function will be used to encode the reviews using a dictionary(created using corpus vocabulary) \n"," \n"," # Example of encoding :\"The food was fabulous but pricey\" has a vocabulary of 4 words, each one has to be mapped to an integer like: \n"," # {'The':1,'food':2,'was':3 'fabulous':4 'but':5 'pricey':6} this vocabulary has to be created for the entire corpus and then be used to \n"," # encode the words into integers \n","\n"," # return encoded examples\n"," pass\n","\n","\n","\n","def convert_to_lower(text):\n"," # return the reviews after convering then to lowercase\n"," pass\n","\n","\n","def remove_punctuation(text):\n"," # return the reviews after removing punctuations\n"," pass\n","\n","\n","def remove_stopwords(text):\n"," # return the reviews after removing the stopwords\n"," pass\n","\n","def perform_tokenization(text):\n"," # return the reviews after performing tokenization\n"," pass\n","\n","\n","def perform_padding(data):\n"," # return the reviews after padding the reviews to maximum length\n"," pass\n","\n","def preprocess_data(data):\n"," # make all the following function calls on your data\n","\n"," review = data[\"reviews\"]\n"," review = convert_to_lower(review)\n"," review = remove_punctuation(review)\n"," review = remove_stopwords(review)\n"," review = perform_tokenization(review)\n"," review = encode_data(review)\n"," processed_data = perform_padding(review)\n","\n"," # return processed_data # Uncomment this\n"," # Remove this dummy code at the bottom\n"," return np.zeros( (len(data[\"reviews\"]), 100) ) \n"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"4Bo9Fmr1p-hj"},"source":["## Prediction time โฐ"]},{"cell_type":"markdown","metadata":{"id":"75RLUyIhlW6T"},"source":["#### Read and preprocess the data"]},{"cell_type":"code","metadata":{"id":"VEsgymgJYBVR"},"source":["test_reviews=preprocess_data(test_data)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"1ZKMU3tp_Qo3"},"source":["from tensorflow import keras\n","model = keras.models.load_model(os.path.join(AIcrowdConfig.ASSETS_DIR, \"dummy_model.h5\"))"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"XniRSW0fYHGq"},"source":["#Make your predictions here based on your model\n","raw_predictions = model.predict(test_reviews)\n","predictions = np.argmax(raw_predictions, axis=-1)\n","\n","pd.DataFrame(predictions, columns=[\"ratings\"]).to_csv(AIcrowdConfig.PREDICTIONS_PATH)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"8Rj6IceQAiPo","executionInfo":{"status":"ok","timestamp":1614270333246,"user_tz":-330,"elapsed":1211,"user":{"displayName":"Dipam Chakraborty","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjYbzjRrx303amHrTPU3lUOY9OMpxKTrTzhtYd=s64","userId":"04496869744334527762"}},"outputId":"11c7d7d2-39f1-4cf1-ad04-cc5bb9f252d3"},"source":["!ls $AIcrowdConfig.PREDICTIONS_PATH"],"execution_count":null,"outputs":[{"output_type":"stream","text":["predictions.csv\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"ndJ64fKkymTj"},"source":["# Submit to AIcrowd ๐Ÿš€"]},{"cell_type":"code","metadata":{"id":"AtxH12yzZBtd","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1614270302405,"user_tz":-330,"elapsed":1798,"user":{"displayName":"Dipam Chakraborty","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GhjYbzjRrx303amHrTPU3lUOY9OMpxKTrTzhtYd=s64","userId":"04496869744334527762"}},"outputId":"f10d63c8-6933-4f36-c6b1-4371ca634d40"},"source":["%aicrowd submission create --jupyter -c dlnlp-note"],"execution_count":null,"outputs":[{"output_type":"stream","text":["An error occured: [Errno 2] No such file or directory: '/content/drive/My Drive/Colab Notebooks/dlnlp-note-inference.ipynb'\n"],"name":"stdout"}]}]}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment