Created
August 15, 2020 00:07
-
-
Save thorwhalen/127092ff13fbe67c9b88757b23654ed7 to your computer and use it in GitHub Desktop.
Scraping and preparing color name and RGB code (hex and dec) mapping.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"toc": true | |
}, | |
"source": [ | |
"<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n", | |
"<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Get-color-tables\" data-toc-modified-id=\"Get-color-tables-1\"><span class=\"toc-item-num\">1 </span>Get color tables</a></span></li><li><span><a href=\"#Some-processing-of-these-tables\" data-toc-modified-id=\"Some-processing-of-these-tables-2\"><span class=\"toc-item-num\">2 </span>Some processing of these tables</a></span><ul class=\"toc-item\"><li><span><a href=\"#Just-keep-what-we-need-(and-replace-by-easier-names)\" data-toc-modified-id=\"Just-keep-what-we-need-(and-replace-by-easier-names)-2.1\"><span class=\"toc-item-num\">2.1 </span>Just keep what we need (and replace by easier names)</a></span></li><li><span><a href=\"#Make-decimal-codes-be-numerical\" data-toc-modified-id=\"Make-decimal-codes-be-numerical-2.2\"><span class=\"toc-item-num\">2.2 </span>Make decimal codes be numerical</a></span></li><li><span><a href=\"#Handle-those-multiple-name-colors\" data-toc-modified-id=\"Handle-those-multiple-name-colors-2.3\"><span class=\"toc-item-num\">2.3 </span>Handle those multiple name colors</a></span></li></ul></li><li><span><a href=\"#So-my-color-table-is...\" data-toc-modified-id=\"So-my-color-table-is...-3\"><span class=\"toc-item-num\">3 </span>So my color table is...</a></span><ul class=\"toc-item\"><li><span><a href=\"#Replace-spaces-by-underscores\" data-toc-modified-id=\"Replace-spaces-by-underscores-3.1\"><span class=\"toc-item-num\">3.1 </span>Replace spaces by underscores</a></span></li><li><span><a href=\"#Save-to-json\" data-toc-modified-id=\"Save-to-json-3.2\"><span class=\"toc-item-num\">3.2 </span>Save to json</a></span></li></ul></li></ul></div>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T19:53:33.382635Z", | |
"start_time": "2020-08-11T19:53:33.366200Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"from collections import Counter\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Get color tables" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T19:35:22.135107Z", | |
"start_time": "2020-08-11T19:35:21.251141Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"5" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from requests import request\n", | |
"import pandas as pd\n", | |
"\n", | |
"tables = pd.read_html(request('get', 'https://www.rapidtables.com/web/color/RGB_Color.html').content)\n", | |
"len(tables)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T19:37:07.276632Z", | |
"start_time": "2020-08-11T19:37:07.251653Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Color</th>\n", | |
" <th>Color Name</th>\n", | |
" <th>Hex Code#RRGGBB</th>\n", | |
" <th>Decimal CodeR,G,B</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>NaN</td>\n", | |
" <td>maroon</td>\n", | |
" <td>#800000</td>\n", | |
" <td>(128,0,0)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>NaN</td>\n", | |
" <td>dark red</td>\n", | |
" <td>#8B0000</td>\n", | |
" <td>(139,0,0)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>NaN</td>\n", | |
" <td>brown</td>\n", | |
" <td>#A52A2A</td>\n", | |
" <td>(165,42,42)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>NaN</td>\n", | |
" <td>firebrick</td>\n", | |
" <td>#B22222</td>\n", | |
" <td>(178,34,34)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>NaN</td>\n", | |
" <td>crimson</td>\n", | |
" <td>#DC143C</td>\n", | |
" <td>(220,20,60)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>134</th>\n", | |
" <td>NaN</td>\n", | |
" <td>silver</td>\n", | |
" <td>#C0C0C0</td>\n", | |
" <td>(192,192,192)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>135</th>\n", | |
" <td>NaN</td>\n", | |
" <td>light gray / light grey</td>\n", | |
" <td>#D3D3D3</td>\n", | |
" <td>(211,211,211)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>136</th>\n", | |
" <td>NaN</td>\n", | |
" <td>gainsboro</td>\n", | |
" <td>#DCDCDC</td>\n", | |
" <td>(220,220,220)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>137</th>\n", | |
" <td>NaN</td>\n", | |
" <td>white smoke</td>\n", | |
" <td>#F5F5F5</td>\n", | |
" <td>(245,245,245)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>138</th>\n", | |
" <td>NaN</td>\n", | |
" <td>white</td>\n", | |
" <td>#FFFFFF</td>\n", | |
" <td>(255,255,255)</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>139 rows × 4 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Color Color Name Hex Code#RRGGBB Decimal CodeR,G,B\n", | |
"0 NaN maroon #800000 (128,0,0)\n", | |
"1 NaN dark red #8B0000 (139,0,0)\n", | |
"2 NaN brown #A52A2A (165,42,42)\n", | |
"3 NaN firebrick #B22222 (178,34,34)\n", | |
"4 NaN crimson #DC143C (220,20,60)\n", | |
".. ... ... ... ...\n", | |
"134 NaN silver #C0C0C0 (192,192,192)\n", | |
"135 NaN light gray / light grey #D3D3D3 (211,211,211)\n", | |
"136 NaN gainsboro #DCDCDC (220,220,220)\n", | |
"137 NaN white smoke #F5F5F5 (245,245,245)\n", | |
"138 NaN white #FFFFFF (255,255,255)\n", | |
"\n", | |
"[139 rows x 4 columns]" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = tables[4]\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Some processing of these tables" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Just keep what we need (and replace by easier names)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T19:37:13.718296Z", | |
"start_time": "2020-08-11T19:37:13.699394Z" | |
} | |
}, | |
"source": [ | |
"The `Color` column displayed some actual color in the web page, but we don't need that here (well, we don't even have it, so...). So we'll just keep the other columns, and also rename them" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"We're just going to keep color name and hex code. We can get the decimal code from it." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T19:41:22.150767Z", | |
"start_time": "2020-08-11T19:41:22.134366Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"True" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"'Color' in df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T19:41:54.739368Z", | |
"start_time": "2020-08-11T19:41:54.714329Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>color</th>\n", | |
" <th>hex</th>\n", | |
" <th>dec</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>maroon</td>\n", | |
" <td>#800000</td>\n", | |
" <td>(128,0,0)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>dark red</td>\n", | |
" <td>#8B0000</td>\n", | |
" <td>(139,0,0)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>brown</td>\n", | |
" <td>#A52A2A</td>\n", | |
" <td>(165,42,42)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>firebrick</td>\n", | |
" <td>#B22222</td>\n", | |
" <td>(178,34,34)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>crimson</td>\n", | |
" <td>#DC143C</td>\n", | |
" <td>(220,20,60)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>134</th>\n", | |
" <td>silver</td>\n", | |
" <td>#C0C0C0</td>\n", | |
" <td>(192,192,192)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>135</th>\n", | |
" <td>light gray / light grey</td>\n", | |
" <td>#D3D3D3</td>\n", | |
" <td>(211,211,211)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>136</th>\n", | |
" <td>gainsboro</td>\n", | |
" <td>#DCDCDC</td>\n", | |
" <td>(220,220,220)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>137</th>\n", | |
" <td>white smoke</td>\n", | |
" <td>#F5F5F5</td>\n", | |
" <td>(245,245,245)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>138</th>\n", | |
" <td>white</td>\n", | |
" <td>#FFFFFF</td>\n", | |
" <td>(255,255,255)</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>139 rows × 3 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" color hex dec\n", | |
"0 maroon #800000 (128,0,0)\n", | |
"1 dark red #8B0000 (139,0,0)\n", | |
"2 brown #A52A2A (165,42,42)\n", | |
"3 firebrick #B22222 (178,34,34)\n", | |
"4 crimson #DC143C (220,20,60)\n", | |
".. ... ... ...\n", | |
"134 silver #C0C0C0 (192,192,192)\n", | |
"135 light gray / light grey #D3D3D3 (211,211,211)\n", | |
"136 gainsboro #DCDCDC (220,220,220)\n", | |
"137 white smoke #F5F5F5 (245,245,245)\n", | |
"138 white #FFFFFF (255,255,255)\n", | |
"\n", | |
"[139 rows x 3 columns]" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"if 'Color' in df:\n", | |
" del df['Color']\n", | |
"df = df.rename(columns={\n", | |
" 'Color Name': 'color', \n", | |
" 'Hex Code#RRGGBB': 'hex', \n", | |
" 'Decimal CodeR,G,B': 'dec'})\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Make decimal codes be numerical" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Let's see if the dec values are actually tuples of numbers..." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T19:45:04.534659Z", | |
"start_time": "2020-08-11T19:45:04.517371Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'(240,128,128)'" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.dec.iloc[9]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"That's what I thought... So let's make these actual numerical triples." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T19:45:31.611451Z", | |
"start_time": "2020-08-11T19:45:31.593509Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(240, 128, 128)" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import re\n", | |
"p = re.compile('\\((\\d+),(\\d+),(\\d+)\\)')\n", | |
"tuple(int(x) for x in p.match('(240,128,128)').groups())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T19:46:51.866855Z", | |
"start_time": "2020-08-11T19:46:51.842305Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>color</th>\n", | |
" <th>hex</th>\n", | |
" <th>dec</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>maroon</td>\n", | |
" <td>#800000</td>\n", | |
" <td>(128, 0, 0)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>dark red</td>\n", | |
" <td>#8B0000</td>\n", | |
" <td>(139, 0, 0)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>brown</td>\n", | |
" <td>#A52A2A</td>\n", | |
" <td>(165, 42, 42)</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" color hex dec\n", | |
"0 maroon #800000 (128, 0, 0)\n", | |
"1 dark red #8B0000 (139, 0, 0)\n", | |
"2 brown #A52A2A (165, 42, 42)" | |
] | |
}, | |
"execution_count": 19, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"if isinstance(df.dec.iloc[0], str):\n", | |
" df.dec = df.dec.apply(lambda v: tuple(int(x) for x in p.match(v).groups()))\n", | |
"df.head(3)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T19:47:10.930297Z", | |
"start_time": "2020-08-11T19:47:10.913284Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(128, 0, 0)" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.dec.iloc[0] # see that we're good now" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Handle those multiple name colors" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Some of the color names have a slash in them." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 91, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T18:40:38.689101Z", | |
"start_time": "2020-08-11T18:40:38.671329Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['magenta / fuchsia',\n", | |
" 'dim gray / dim grey',\n", | |
" 'gray / grey',\n", | |
" 'dark gray / dark grey',\n", | |
" 'light gray / light grey']" | |
] | |
}, | |
"execution_count": 91, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"[x for x in df.color if '/' in x]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"It obviously is there to mark alternative names. We'll choose to keep them all, which means that for every `us_name / brit_name` row we'll produce two rows: One with the `us_name` and one with the `brit_name`..." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T19:50:14.325089Z", | |
"start_time": "2020-08-11T19:50:14.269125Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>color</th>\n", | |
" <th>hex</th>\n", | |
" <th>dec</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>maroon</td>\n", | |
" <td>#800000</td>\n", | |
" <td>(128, 0, 0)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>dark red</td>\n", | |
" <td>#8B0000</td>\n", | |
" <td>(139, 0, 0)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>brown</td>\n", | |
" <td>#A52A2A</td>\n", | |
" <td>(165, 42, 42)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>firebrick</td>\n", | |
" <td>#B22222</td>\n", | |
" <td>(178, 34, 34)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>crimson</td>\n", | |
" <td>#DC143C</td>\n", | |
" <td>(220, 20, 60)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>139</th>\n", | |
" <td>light gray</td>\n", | |
" <td>#D3D3D3</td>\n", | |
" <td>(211, 211, 211)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>140</th>\n", | |
" <td>light grey</td>\n", | |
" <td>#D3D3D3</td>\n", | |
" <td>(211, 211, 211)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>141</th>\n", | |
" <td>gainsboro</td>\n", | |
" <td>#DCDCDC</td>\n", | |
" <td>(220, 220, 220)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>142</th>\n", | |
" <td>white smoke</td>\n", | |
" <td>#F5F5F5</td>\n", | |
" <td>(245, 245, 245)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>143</th>\n", | |
" <td>white</td>\n", | |
" <td>#FFFFFF</td>\n", | |
" <td>(255, 255, 255)</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>144 rows × 3 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" color hex dec\n", | |
"0 maroon #800000 (128, 0, 0)\n", | |
"1 dark red #8B0000 (139, 0, 0)\n", | |
"2 brown #A52A2A (165, 42, 42)\n", | |
"3 firebrick #B22222 (178, 34, 34)\n", | |
"4 crimson #DC143C (220, 20, 60)\n", | |
".. ... ... ...\n", | |
"139 light gray #D3D3D3 (211, 211, 211)\n", | |
"140 light grey #D3D3D3 (211, 211, 211)\n", | |
"141 gainsboro #DCDCDC (220, 220, 220)\n", | |
"142 white smoke #F5F5F5 (245, 245, 245)\n", | |
"143 white #FFFFFF (255, 255, 255)\n", | |
"\n", | |
"[144 rows x 3 columns]" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from i2.deco import postprocess\n", | |
"\n", | |
"@postprocess(pd.DataFrame.from_dict)\n", | |
"def with_multiple_spellings(df):\n", | |
" for _, r in df.iterrows():\n", | |
" if '/' in r.color:\n", | |
" for color in r.color.split('/'):\n", | |
" rr = r.copy()\n", | |
" rr.color = color.strip()\n", | |
" yield rr.to_dict()\n", | |
" else:\n", | |
" yield r.to_dict()\n", | |
" \n", | |
"df = with_multiple_spellings(df)\n", | |
"df\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T19:56:33.311764Z", | |
"start_time": "2020-08-11T19:56:33.291657Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'b': 'blue',\n", | |
" 'g': 'green',\n", | |
" 'r': 'red',\n", | |
" 'c': 'cyan',\n", | |
" 'm': 'magenta',\n", | |
" 'y': 'yellow',\n", | |
" 'k': 'black',\n", | |
" 'w': 'white'}" | |
] | |
}, | |
"execution_count": 32, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Found a list of matplotlib single letter color refs here: \n", | |
"# https://matplotlib.org/3.3.0/tutorials/colors/colors.html\n", | |
"\n", | |
"matplotlib_shorthands = dict(zip(\n", | |
" ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w'],\n", | |
" ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black', 'white']))\n", | |
"matplotlib_shorthands" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Make sure we have all those names in `df`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T19:57:05.914675Z", | |
"start_time": "2020-08-11T19:57:05.897460Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"assert all(x in set(df.color) for x in matplotlib_shorthands.values())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Let's see what letters of the `df.color` (first letter) are not covered." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T20:10:44.623071Z", | |
"start_time": "2020-08-11T20:10:44.606282Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'a', 'd', 'f', 'h', 'i', 'l', 'n', 'o', 'p', 's', 't', 'v'}" | |
] | |
}, | |
"execution_count": 40, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"letters_not_covered_by_matplotlib = Counter(x[0] for x in df.color).keys() - matplotlib_shorthands.keys()\n", | |
"letters_not_covered_by_matplotlib" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Make a list of \"base colors\" (i.e. not just a \"dark\" or \"light\" version of another color)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 56, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T20:18:57.961058Z", | |
"start_time": "2020-08-11T20:18:57.941698Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"103" | |
] | |
}, | |
"execution_count": 56, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"forbidden_prefixes = ['dark', 'light', 'medium']\n", | |
"base_colors = [x for x in df.color if not any(x.startswith(w) for w in forbidden_prefixes)]\n", | |
"len(base_colors)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"And now let's make a list of colors for each not covered letter." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 62, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T20:25:31.911785Z", | |
"start_time": "2020-08-11T20:25:31.892166Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"* f\n", | |
"\t'firebrick', 'forest green', 'fuchsia', 'floral white'\n", | |
"* t\n", | |
"\t'tomato', 'teal', 'turquoise', 'thistle', 'tan'\n", | |
"* i\n", | |
"\t'indian red', 'indigo', 'ivory'\n", | |
"* s\n", | |
"\t'salmon', 'spring green', 'sea green', 'steel blue', 'sky blue', 'slate blue', 'saddle brown', 'sienna', 'sandy brown', 'sea shell', 'slate gray', 'snow', 'silver'\n", | |
"* o\n", | |
"\t'orange red', 'orange', 'olive', 'olive drab', 'orchid', 'old lace'\n", | |
"* p\n", | |
"\t'pale golden rod', 'pale green', 'pale turquoise', 'powder blue', 'purple', 'plum', 'pale violet red', 'pink', 'peru', 'peach puff', 'papaya whip'\n", | |
"* l\n", | |
"\t'lawn green', 'lime', 'lime green', 'lemon chiffon', 'lavender blush', 'linen', 'lavender'\n", | |
"* a\n", | |
"\t'aqua', 'aqua marine', 'antique white', 'alice blue', 'azure'\n", | |
"* d\n", | |
"\t'deep sky blue', 'dodger blue', 'deep pink', 'dim gray', 'dim grey'\n", | |
"* n\n", | |
"\t'navy', 'navajo white'\n", | |
"* v\n", | |
"\t'violet'\n", | |
"* h\n", | |
"\t'hot pink', 'honeydew'\n" | |
] | |
} | |
], | |
"source": [ | |
"from i2.genu import groupby\n", | |
"g = groupby(base_colors, lambda x: x[0])\n", | |
"for letter, color_names in g.items():\n", | |
" if letter in letters_not_covered_by_matplotlib:\n", | |
" print('* ' + letter + '\\n\\t' + ', '.join(f\"'{c}'\" for c in color_names))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 64, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T20:26:54.413619Z", | |
"start_time": "2020-08-11T20:26:54.394768Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"# We'll extend it with our \n", | |
"extended_matplotlib_shorthands = {\n", | |
" # matlab shorthands\n", | |
" 'b': 'blue', \n", | |
" 'g': 'green',\n", | |
" 'r': 'red',\n", | |
" 'c': 'cyan',\n", | |
" 'm': 'magenta',\n", | |
" 'y': 'yellow',\n", | |
" 'k': 'black',\n", | |
" 'w': 'white', \n", | |
" # and more...\n", | |
" 'f': 'firebrick',\n", | |
" 't': 'teal',\n", | |
" 'i': 'indigo',\n", | |
" 's': 'salmon',\n", | |
" 'o': 'orange',\n", | |
" 'p': 'purple',\n", | |
" 'l': 'lime',\n", | |
" 'a': 'aqua',\n", | |
" 'd': 'deep pink',\n", | |
" 'n': 'navy',\n", | |
" 'v': 'violet',\n", | |
" 'h': 'honeydew'\n", | |
"}\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# So my color table is..." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Extend the existing table with the `extended_matplotlib_shorthands`, jsonize this, and be done." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 71, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T20:32:49.024193Z", | |
"start_time": "2020-08-11T20:32:48.977612Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>color</th>\n", | |
" <th>hex</th>\n", | |
" <th>dec</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>b</td>\n", | |
" <td>#0000FF</td>\n", | |
" <td>(0, 0, 255)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>g</td>\n", | |
" <td>#008000</td>\n", | |
" <td>(0, 128, 0)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>r</td>\n", | |
" <td>#FF0000</td>\n", | |
" <td>(255, 0, 0)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>c</td>\n", | |
" <td>#00FFFF</td>\n", | |
" <td>(0, 255, 255)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>m</td>\n", | |
" <td>#FF00FF</td>\n", | |
" <td>(255, 0, 255)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>y</td>\n", | |
" <td>#FFFF00</td>\n", | |
" <td>(255, 255, 0)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>k</td>\n", | |
" <td>#000000</td>\n", | |
" <td>(0, 0, 0)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>w</td>\n", | |
" <td>#FFFFFF</td>\n", | |
" <td>(255, 255, 255)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>f</td>\n", | |
" <td>#B22222</td>\n", | |
" <td>(178, 34, 34)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>t</td>\n", | |
" <td>#008080</td>\n", | |
" <td>(0, 128, 128)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>i</td>\n", | |
" <td>#4B0082</td>\n", | |
" <td>(75, 0, 130)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>s</td>\n", | |
" <td>#FA8072</td>\n", | |
" <td>(250, 128, 114)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>o</td>\n", | |
" <td>#FFA500</td>\n", | |
" <td>(255, 165, 0)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>p</td>\n", | |
" <td>#800080</td>\n", | |
" <td>(128, 0, 128)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>l</td>\n", | |
" <td>#00FF00</td>\n", | |
" <td>(0, 255, 0)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>a</td>\n", | |
" <td>#00FFFF</td>\n", | |
" <td>(0, 255, 255)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>d</td>\n", | |
" <td>#FF1493</td>\n", | |
" <td>(255, 20, 147)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>n</td>\n", | |
" <td>#000080</td>\n", | |
" <td>(0, 0, 128)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>v</td>\n", | |
" <td>#EE82EE</td>\n", | |
" <td>(238, 130, 238)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>h</td>\n", | |
" <td>#F0FFF0</td>\n", | |
" <td>(240, 255, 240)</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" color hex dec\n", | |
"0 b #0000FF (0, 0, 255)\n", | |
"1 g #008000 (0, 128, 0)\n", | |
"2 r #FF0000 (255, 0, 0)\n", | |
"3 c #00FFFF (0, 255, 255)\n", | |
"4 m #FF00FF (255, 0, 255)\n", | |
"5 y #FFFF00 (255, 255, 0)\n", | |
"6 k #000000 (0, 0, 0)\n", | |
"7 w #FFFFFF (255, 255, 255)\n", | |
"8 f #B22222 (178, 34, 34)\n", | |
"9 t #008080 (0, 128, 128)\n", | |
"10 i #4B0082 (75, 0, 130)\n", | |
"11 s #FA8072 (250, 128, 114)\n", | |
"12 o #FFA500 (255, 165, 0)\n", | |
"13 p #800080 (128, 0, 128)\n", | |
"14 l #00FF00 (0, 255, 0)\n", | |
"15 a #00FFFF (0, 255, 255)\n", | |
"16 d #FF1493 (255, 20, 147)\n", | |
"17 n #000080 (0, 0, 128)\n", | |
"18 v #EE82EE (238, 130, 238)\n", | |
"19 h #F0FFF0 (240, 255, 240)" | |
] | |
}, | |
"execution_count": 71, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from i2.deco import postprocess\n", | |
"\n", | |
"@postprocess(pd.DataFrame.from_dict)\n", | |
"def mk_extended_matplotlib_shorthands_df(extended_matplotlib_shorthands):\n", | |
" for letter, color_name in extended_matplotlib_shorthands.items():\n", | |
" match = df[df.color == color_name]\n", | |
" assert len(match) == 1, f\"Oops, you were supposed to have one and one only match for {color_name}\"\n", | |
" r = match.iloc[0]\n", | |
" rr = r.copy()\n", | |
" rr.color = letter\n", | |
" yield rr.to_dict()\n", | |
" \n", | |
"extra_df = mk_extended_matplotlib_shorthands_df(extended_matplotlib_shorthands)\n", | |
"extra_df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 126, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T21:26:07.645358Z", | |
"start_time": "2020-08-11T21:26:07.616551Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>color</th>\n", | |
" <th>hex</th>\n", | |
" <th>dec</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>b</td>\n", | |
" <td>#0000FF</td>\n", | |
" <td>(0, 0, 255)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>g</td>\n", | |
" <td>#008000</td>\n", | |
" <td>(0, 128, 0)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>r</td>\n", | |
" <td>#FF0000</td>\n", | |
" <td>(255, 0, 0)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>c</td>\n", | |
" <td>#00FFFF</td>\n", | |
" <td>(0, 255, 255)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>m</td>\n", | |
" <td>#FF00FF</td>\n", | |
" <td>(255, 0, 255)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>159</th>\n", | |
" <td>light gray</td>\n", | |
" <td>#D3D3D3</td>\n", | |
" <td>(211, 211, 211)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>160</th>\n", | |
" <td>light grey</td>\n", | |
" <td>#D3D3D3</td>\n", | |
" <td>(211, 211, 211)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>161</th>\n", | |
" <td>gainsboro</td>\n", | |
" <td>#DCDCDC</td>\n", | |
" <td>(220, 220, 220)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>162</th>\n", | |
" <td>white smoke</td>\n", | |
" <td>#F5F5F5</td>\n", | |
" <td>(245, 245, 245)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>163</th>\n", | |
" <td>white</td>\n", | |
" <td>#FFFFFF</td>\n", | |
" <td>(255, 255, 255)</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>164 rows × 3 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" color hex dec\n", | |
"0 b #0000FF (0, 0, 255)\n", | |
"1 g #008000 (0, 128, 0)\n", | |
"2 r #FF0000 (255, 0, 0)\n", | |
"3 c #00FFFF (0, 255, 255)\n", | |
"4 m #FF00FF (255, 0, 255)\n", | |
".. ... ... ...\n", | |
"159 light gray #D3D3D3 (211, 211, 211)\n", | |
"160 light grey #D3D3D3 (211, 211, 211)\n", | |
"161 gainsboro #DCDCDC (220, 220, 220)\n", | |
"162 white smoke #F5F5F5 (245, 245, 245)\n", | |
"163 white #FFFFFF (255, 255, 255)\n", | |
"\n", | |
"[164 rows x 3 columns]" | |
] | |
}, | |
"execution_count": 126, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"final_df = pd.concat([extra_df, df]).reset_index(drop=True)\n", | |
"final_df" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Replace spaces by underscores" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 145, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T22:47:30.768934Z", | |
"start_time": "2020-08-11T22:47:30.740351Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>color</th>\n", | |
" <th>hex</th>\n", | |
" <th>dec</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>b</td>\n", | |
" <td>#0000FF</td>\n", | |
" <td>(0, 0, 255)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>g</td>\n", | |
" <td>#008000</td>\n", | |
" <td>(0, 128, 0)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>r</td>\n", | |
" <td>#FF0000</td>\n", | |
" <td>(255, 0, 0)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>c</td>\n", | |
" <td>#00FFFF</td>\n", | |
" <td>(0, 255, 255)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>m</td>\n", | |
" <td>#FF00FF</td>\n", | |
" <td>(255, 0, 255)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>159</th>\n", | |
" <td>light_gray</td>\n", | |
" <td>#D3D3D3</td>\n", | |
" <td>(211, 211, 211)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>160</th>\n", | |
" <td>light_grey</td>\n", | |
" <td>#D3D3D3</td>\n", | |
" <td>(211, 211, 211)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>161</th>\n", | |
" <td>gainsboro</td>\n", | |
" <td>#DCDCDC</td>\n", | |
" <td>(220, 220, 220)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>162</th>\n", | |
" <td>white_smoke</td>\n", | |
" <td>#F5F5F5</td>\n", | |
" <td>(245, 245, 245)</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>163</th>\n", | |
" <td>white</td>\n", | |
" <td>#FFFFFF</td>\n", | |
" <td>(255, 255, 255)</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>164 rows × 3 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" color hex dec\n", | |
"0 b #0000FF (0, 0, 255)\n", | |
"1 g #008000 (0, 128, 0)\n", | |
"2 r #FF0000 (255, 0, 0)\n", | |
"3 c #00FFFF (0, 255, 255)\n", | |
"4 m #FF00FF (255, 0, 255)\n", | |
".. ... ... ...\n", | |
"159 light_gray #D3D3D3 (211, 211, 211)\n", | |
"160 light_grey #D3D3D3 (211, 211, 211)\n", | |
"161 gainsboro #DCDCDC (220, 220, 220)\n", | |
"162 white_smoke #F5F5F5 (245, 245, 245)\n", | |
"163 white #FFFFFF (255, 255, 255)\n", | |
"\n", | |
"[164 rows x 3 columns]" | |
] | |
}, | |
"execution_count": 145, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"final_df.color = final_df.color.apply(lambda c: c.replace(' ', '_'))\n", | |
"final_df" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Save to json" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 146, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T22:47:36.280775Z", | |
"start_time": "2020-08-11T22:47:36.258829Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"json.dump(final_df.to_dict(orient='records'), open('color_names_and_codes.json', 'w'))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 123, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T21:19:31.306156Z", | |
"start_time": "2020-08-11T21:19:31.287685Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"pts = rgb[:, :2]\n", | |
"# pts = rgb\n", | |
"pts = pts.tolist()\n", | |
"\n", | |
"pts = [{'fv': x, 'tag': str(i)} for i, x in enumerate(pts)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 124, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2020-08-11T21:19:31.883603Z", | |
"start_time": "2020-08-11T21:19:31.865257Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['pts',\n", | |
" 'nodeSize',\n", | |
" 'height',\n", | |
" 'width',\n", | |
" 'untaggedColor',\n", | |
" 'maxIterations',\n", | |
" 'fps',\n", | |
" 'fillColors',\n", | |
" 'dim',\n", | |
" 'epsilon',\n", | |
" 'perplexity',\n", | |
" 'spread']" | |
] | |
}, | |
"execution_count": 124, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from i2.signatures import Sig\n", | |
"[x.name for x in Sig(splatter_raw).values()]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.2" | |
}, | |
"latex_envs": { | |
"LaTeX_envs_menu_present": true, | |
"autoclose": false, | |
"autocomplete": true, | |
"bibliofile": "biblio.bib", | |
"cite_by": "apalike", | |
"current_citInitial": 1, | |
"eqLabelWithNumbers": true, | |
"eqNumInitial": 1, | |
"hotkeys": { | |
"equation": "Ctrl-E", | |
"itemize": "Ctrl-I" | |
}, | |
"labels_anchors": false, | |
"latex_user_defs": false, | |
"report_style_numbering": false, | |
"user_envs_cfg": false | |
}, | |
"toc": { | |
"base_numbering": 1, | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": false, | |
"skip_h1_title": false, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": true, | |
"toc_position": { | |
"height": "141px", | |
"left": "34px", | |
"top": "110px", | |
"width": "872px" | |
}, | |
"toc_section_display": true, | |
"toc_window_display": true | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Author
thorwhalen
commented
Aug 15, 2020
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment