Skip to content

Instantly share code, notes, and snippets.

@wtberry
Created June 23, 2019 21:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wtberry/9dac6eed1ebc6440aafeb094969462b9 to your computer and use it in GitHub Desktop.
Save wtberry/9dac6eed1ebc6440aafeb094969462b9 to your computer and use it in GitHub Desktop.
medium/NameClassifier/dataload
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>code</th>\n",
" <th>name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>24944</th>\n",
" <td>Spanish (Spain)</td>\n",
" <td>Luís Escobar Abascal</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83738</th>\n",
" <td>Japanese</td>\n",
" <td>中島 英樹</td>\n",
" </tr>\n",
" <tr>\n",
" <th>87719</th>\n",
" <td>Japanese</td>\n",
" <td>吉本 知実</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2567</th>\n",
" <td>Arabic (Egypt)</td>\n",
" <td>Maria Green</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23727</th>\n",
" <td>Spanish (Spain)</td>\n",
" <td>Emilia Valbuena Fuente</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24189</th>\n",
" <td>Spanish (Spain)</td>\n",
" <td>Carlos Estrada Teruel</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83525</th>\n",
" <td>Japanese</td>\n",
" <td>田中 晃</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74719</th>\n",
" <td>Russian</td>\n",
" <td>Сорокина Евпраксия Афанасьевна</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7575</th>\n",
" <td>Arabic (Egypt)</td>\n",
" <td>Darren Gonzalez</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69719</th>\n",
" <td>Portuguese (Brazil)</td>\n",
" <td>Isaac Almeida</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" code name\n",
"24944 Spanish (Spain) Luís Escobar Abascal\n",
"83738 Japanese 中島 英樹\n",
"87719 Japanese 吉本 知実\n",
"2567 Arabic (Egypt) Maria Green\n",
"23727 Spanish (Spain) Emilia Valbuena Fuente\n",
"24189 Spanish (Spain) Carlos Estrada Teruel\n",
"83525 Japanese 田中 晃\n",
"74719 Russian Сорокина Евпраксия Афанасьевна\n",
"7575 Arabic (Egypt) Darren Gonzalez\n",
"69719 Portuguese (Brazil) Isaac Almeida"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd \n",
"import os \n",
"import numpy as np\n",
"from sklearn.feature_extraction.text import CountVectorizer\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"# setting up path to the data file\n",
"PATH = os.path.abspath('')\n",
"PATH = os.path.join(PATH, 'data')\n",
"names = 'multi_class_names.csv'\n",
"\n",
"# read in the data as panda dataframe\n",
"df = pd.read_csv(os.path.join(PATH, names))\n",
"df.sample(10)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment