Created
December 11, 2019 11:17
-
-
Save denis19973/70d8d5ae02937964e448e8738071c202 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import matplotlib.pyplot as plt\n", | |
"%matplotlib inline" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = pd.read_csv('photo_recognition_results.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"EXCLUDE_ALIASES = [\n", | |
" 'COURVOISIER VS',\n", | |
" 'COURVOISIER VSOP',\n", | |
" 'COURVOISIER XO +',\n", | |
" 'A.D FUSSIGNY',\n", | |
" 'CAMUS',\n", | |
" 'MARTELL',\n", | |
" 'REMY MARTIN',\n", | |
" 'HENNESSY',\n", | |
"]\n", | |
"\n", | |
"REFERENCE_ALIASES = [\n", | |
" 'AUCHENTOSHAN 18YO',\n", | |
" \"McCLELLAND'S HIGHLAND\",\n", | |
" \"McCLELLAND'S LOWLAND\",\n", | |
" 'AUCHENTOSHAN 12YO',\n", | |
" \"McCLELLAND'S SPEYSIDE\",\n", | |
" 'JIM BEAM RED STAG BLACK CHERRY',\n", | |
" 'JIM BEAM APPLE',\n", | |
" 'AUCHENTOSHAN AMERICAN OAK',\n", | |
" 'AUCHENTOSHAN 3 WOOD',\n", | |
" 'BOWMORE 12YO',\n", | |
" \"McCLELLAND'S ISLAY\",\n", | |
" 'KILBEGGAN',\n", | |
" 'JIM BEAM HONEY',\n", | |
" 'MACALLAN 12YO DOUBLE CASK',\n", | |
" 'MACALLAN 12YO TRIPLE CASK',\n", | |
" 'MACALLAN 15YO TRIPLE CASK',\n", | |
" 'MACALLAN 18YO TRIPLE CASK',\n", | |
" 'MACALLAN RARE CASK',\n", | |
" 'MACALLAN SHERRY OAK 12YO',\n", | |
" 'MACALLAN SHERRY OAK 18YO',\n", | |
" 'HIGHLAND PARK 10YO',\n", | |
"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>call_id</th>\n", | |
" <th>photo_id</th>\n", | |
" <th>recognition_alias</th>\n", | |
" <th>count</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>452592</td>\n", | |
" <td>1317053</td>\n", | |
" <td>JOHNNIE WALKER</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>452592</td>\n", | |
" <td>1317053</td>\n", | |
" <td>other</td>\n", | |
" <td>16</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>452592</td>\n", | |
" <td>1317053</td>\n", | |
" <td>AUCHENTOSHAN AMERICAN OAK</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>452592</td>\n", | |
" <td>1317053</td>\n", | |
" <td>GLENFIDDICH</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>452592</td>\n", | |
" <td>1317053</td>\n", | |
" <td>MARTELL</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" call_id photo_id recognition_alias count\n", | |
"0 452592 1317053 JOHNNIE WALKER 1\n", | |
"1 452592 1317053 other 16\n", | |
"2 452592 1317053 AUCHENTOSHAN AMERICAN OAK 1\n", | |
"3 452592 1317053 GLENFIDDICH 2\n", | |
"4 452592 1317053 MARTELL 1" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"photos = dict()\n", | |
"for _, row in df.iterrows():\n", | |
" if row['recognition_alias'] == 'other':\n", | |
" continue\n", | |
" if row['photo_id'] not in photos:\n", | |
" photos[row['photo_id']] = [row['recognition_alias']]\n", | |
" else:\n", | |
" photos[row['photo_id']].append(row['recognition_alias'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1073" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"without_ref_count = 0\n", | |
"reference_set = set(REFERENCE_ALIASES)\n", | |
"exclude_set = set(EXCLUDE_ALIASES)\n", | |
"for k, v in photos.items():\n", | |
" aliases = set(v)\n", | |
" if not reference_set.intersection(aliases) and not aliases.issubset(exclude_set):\n", | |
" without_ref_count += 1\n", | |
"\n", | |
"without_ref_count" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.3757002801120448" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"without_ref_count / len(photos.keys())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment