Skip to content

Instantly share code, notes, and snippets.

@mathigatti
Created June 25, 2020 14:04
Show Gist options
  • Save mathigatti/d4481edcb2b90f5d42fa0e12f8317dca to your computer and use it in GitHub Desktop.
Save mathigatti/d4481edcb2b90f5d42fa0e12f8317dca to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"code2sentiment = {0:\"negative\",\n",
"1: \"somewhat negative\",\n",
"2: \"neutral\",\n",
"3: \"somewhat positive\",\n",
"4:\"positive\"}\n",
"\n",
"data = pd.read_csv(\"data/sentiment.tsv\",sep=\"\\t\")[[\"SentenceId\",\"Phrase\",\"Sentiment\"]]\n",
"data.drop_duplicates(subset=[\"SentenceId\"],inplace=True,keep=\"first\")\n",
"data[\"Sentiment\"] = data[\"Sentiment\"].apply(lambda code : code2sentiment[code])"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"answers = []\n",
"for _, row in data[[\"Phrase\",\"Sentiment\"]].iterrows():\n",
" review = row[\"Phrase\"]\n",
" for symbol in [\"?\",\"!\",\",\",\".\",\"'\",\"n't\"]:\n",
" review = review.replace(\" \"+symbol,symbol)\n",
" sentiment = row[\"Sentiment\"]\n",
" answers.append(f\"{sentiment} | {review}\")"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"with open(\"reviews_sentiment.txt\",'w') as f:\n",
" f.write(\"\\n\".join(answers))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment