Skip to content

Instantly share code, notes, and snippets.

@cthoyt
Created February 6, 2023 17:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cthoyt/9a195127d3b0871599841b7a690b07ad to your computer and use it in GitHub Desktop.
Save cthoyt/9a195127d3b0871599841b7a690b07ad to your computer and use it in GitHub Desktop.
Quick and dirty look at CellMarker (http://xteam.xbio.top/CellMarker/)
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "8073ed2b",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "8a921f4e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>UberonOntologyID</th>\n",
" <th>cancerType</th>\n",
" <th>cellType</th>\n",
" <th>CellOntologyID</th>\n",
" <th>cellMarker</th>\n",
" <th>geneSymbol</th>\n",
" <th>geneID</th>\n",
" <th>proteinName</th>\n",
" <th>proteinID</th>\n",
" <th>markerResource</th>\n",
" <th>PMID</th>\n",
" <th>Company</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>UBERON:0002107</td>\n",
" <td>Normal</td>\n",
" <td>Normal cell</td>\n",
" <td>CL:0000632</td>\n",
" <td>Synaptophysin</td>\n",
" <td>SYP</td>\n",
" <td>6855</td>\n",
" <td>SYPH</td>\n",
" <td>P08247</td>\n",
" <td>Experiment</td>\n",
" <td>10595912</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>UBERON:0001295</td>\n",
" <td>Normal</td>\n",
" <td>Normal cell</td>\n",
" <td>CL:0000351</td>\n",
" <td>CEACAM1</td>\n",
" <td>CEACAM1</td>\n",
" <td>634</td>\n",
" <td>CEAM1</td>\n",
" <td>P13688</td>\n",
" <td>Experiment</td>\n",
" <td>10751340</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>UBERON:0000923</td>\n",
" <td>Normal</td>\n",
" <td>Normal cell</td>\n",
" <td>CL:0000670</td>\n",
" <td>VASA</td>\n",
" <td>DDX4</td>\n",
" <td>54514</td>\n",
" <td>DDX4</td>\n",
" <td>Q9NQI0</td>\n",
" <td>Experiment</td>\n",
" <td>10920202</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>UBERON:0001772</td>\n",
" <td>Normal</td>\n",
" <td>Normal cell</td>\n",
" <td>CL:0000066</td>\n",
" <td>KLF6</td>\n",
" <td>KLF6</td>\n",
" <td>1316</td>\n",
" <td>KLF6</td>\n",
" <td>Q99612</td>\n",
" <td>Experiment</td>\n",
" <td>12407152</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>UBERON:0001987</td>\n",
" <td>Normal</td>\n",
" <td>Normal cell</td>\n",
" <td>CL:0000351</td>\n",
" <td>FGF10</td>\n",
" <td>FGF10</td>\n",
" <td>2255</td>\n",
" <td>FGF10</td>\n",
" <td>O15520</td>\n",
" <td>Experiment</td>\n",
" <td>15950061</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2862</th>\n",
" <td>UBERON:0000922</td>\n",
" <td>Normal</td>\n",
" <td>Normal cell</td>\n",
" <td>CL:0000023</td>\n",
" <td>ABCA5, ABCC4, ABI3BP, AKD1, AMPD3, ANKRA2, ARV...</td>\n",
" <td>ABCA5, ABCC4, ABI3BP, NA, AMPD3, ANKRA2, ARV1,...</td>\n",
" <td>23461, 10257, 25890, NA, 272, 57763, 64801, NA...</td>\n",
" <td>ABCA5, MRP4, TARSH, NA, AMPD3, ANRA2, ARV1, NA...</td>\n",
" <td>Q8WWZ7, O15439, Q7Z7G0, NA, Q01432, Q9H9E1, Q9...</td>\n",
" <td>Single-cell sequencing</td>\n",
" <td>23892778</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2863</th>\n",
" <td>UBERON:0000922</td>\n",
" <td>Normal</td>\n",
" <td>Normal cell</td>\n",
" <td>CL:0000353</td>\n",
" <td>ACCSL, ACVR1B, ARHGEF16, ASF1B, BCL2L10, BLCAP...</td>\n",
" <td>ACCSL, ACVR1B, ARHGEF16, ASF1B, BCL2L10, BLCAP...</td>\n",
" <td>390110, 91, 27237, 55723, 10017, 10904, 662, 7...</td>\n",
" <td>1A1L2, ACV1B, ARHGG, ASF1B, B2L10, BLCAP, SEC2...</td>\n",
" <td>Q4AC99, P36896, Q5VV41, Q9NVP2, Q9HD36, P62952...</td>\n",
" <td>Single-cell sequencing</td>\n",
" <td>23892778</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2864</th>\n",
" <td>UBERON:0000922</td>\n",
" <td>Normal</td>\n",
" <td>Normal cell</td>\n",
" <td>CL:0000353</td>\n",
" <td>ADPGK, AIM1, AIMP2, ARG2, ARHGAP17, ARIH1, CDC...</td>\n",
" <td>ADPGK, CRYBG1, AIMP2, ARG2, ARHGAP17, ARIH1, C...</td>\n",
" <td>83440, 202, 7965, 384, 55114, 25820, 55536, 24...</td>\n",
" <td>ADPGK, CRBG1, AIMP2, ARGI2, RHG17, ARI1, CDA7L...</td>\n",
" <td>Q9BRR6, Q9Y4K1, Q13155, P78540, Q68EM7, Q9Y4X5...</td>\n",
" <td>Single-cell sequencing</td>\n",
" <td>23892778</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2865</th>\n",
" <td>UBERON:0000922</td>\n",
" <td>Normal</td>\n",
" <td>Normal cell</td>\n",
" <td>CL:0000353</td>\n",
" <td>C11orf48, C19orf53, DHX9, DIABLO, EIF1AD, EIF4...</td>\n",
" <td>LBHD1, C19orf53, DHX9, DIABLO, EIF1AD, EIF4G1,...</td>\n",
" <td>79081, 28974, 1660, 56616, 84285, 1981, 26017,...</td>\n",
" <td>LBHD1, L10K, DHX9, DBLOH, EIF1A, IF4G1, FA32A,...</td>\n",
" <td>Q9BQE6, Q9UNZ5, Q08211, Q9NR28, Q8N9N8, Q04637...</td>\n",
" <td>Single-cell sequencing</td>\n",
" <td>23892778</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2866</th>\n",
" <td>UBERON:0000922</td>\n",
" <td>Normal</td>\n",
" <td>Normal cell</td>\n",
" <td>CL:0000360</td>\n",
" <td>ADCK1, AGL, AIMP1, AKAP12, ARPC3, ATP1B3, ATP5...</td>\n",
" <td>ADCK1, AGL, AIMP1, AKAP12, ARPC3, ATP1B3, NA, ...</td>\n",
" <td>57143, 178, 9255, 9590, 10094, 483, NA, 586, 9...</td>\n",
" <td>ADCK1, GDE, AIMP1, AKA12, ARPC3, AT1B3, AT5F1,...</td>\n",
" <td>Q86TW2, P35573, Q12904, Q02952, O15145, P54709...</td>\n",
" <td>Single-cell sequencing</td>\n",
" <td>23892778</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1798 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" UberonOntologyID cancerType cellType CellOntologyID \\\n",
"1 UBERON:0002107 Normal Normal cell CL:0000632 \n",
"2 UBERON:0001295 Normal Normal cell CL:0000351 \n",
"3 UBERON:0000923 Normal Normal cell CL:0000670 \n",
"4 UBERON:0001772 Normal Normal cell CL:0000066 \n",
"5 UBERON:0001987 Normal Normal cell CL:0000351 \n",
"... ... ... ... ... \n",
"2862 UBERON:0000922 Normal Normal cell CL:0000023 \n",
"2863 UBERON:0000922 Normal Normal cell CL:0000353 \n",
"2864 UBERON:0000922 Normal Normal cell CL:0000353 \n",
"2865 UBERON:0000922 Normal Normal cell CL:0000353 \n",
"2866 UBERON:0000922 Normal Normal cell CL:0000360 \n",
"\n",
" cellMarker \\\n",
"1 Synaptophysin \n",
"2 CEACAM1 \n",
"3 VASA \n",
"4 KLF6 \n",
"5 FGF10 \n",
"... ... \n",
"2862 ABCA5, ABCC4, ABI3BP, AKD1, AMPD3, ANKRA2, ARV... \n",
"2863 ACCSL, ACVR1B, ARHGEF16, ASF1B, BCL2L10, BLCAP... \n",
"2864 ADPGK, AIM1, AIMP2, ARG2, ARHGAP17, ARIH1, CDC... \n",
"2865 C11orf48, C19orf53, DHX9, DIABLO, EIF1AD, EIF4... \n",
"2866 ADCK1, AGL, AIMP1, AKAP12, ARPC3, ATP1B3, ATP5... \n",
"\n",
" geneSymbol \\\n",
"1 SYP \n",
"2 CEACAM1 \n",
"3 DDX4 \n",
"4 KLF6 \n",
"5 FGF10 \n",
"... ... \n",
"2862 ABCA5, ABCC4, ABI3BP, NA, AMPD3, ANKRA2, ARV1,... \n",
"2863 ACCSL, ACVR1B, ARHGEF16, ASF1B, BCL2L10, BLCAP... \n",
"2864 ADPGK, CRYBG1, AIMP2, ARG2, ARHGAP17, ARIH1, C... \n",
"2865 LBHD1, C19orf53, DHX9, DIABLO, EIF1AD, EIF4G1,... \n",
"2866 ADCK1, AGL, AIMP1, AKAP12, ARPC3, ATP1B3, NA, ... \n",
"\n",
" geneID \\\n",
"1 6855 \n",
"2 634 \n",
"3 54514 \n",
"4 1316 \n",
"5 2255 \n",
"... ... \n",
"2862 23461, 10257, 25890, NA, 272, 57763, 64801, NA... \n",
"2863 390110, 91, 27237, 55723, 10017, 10904, 662, 7... \n",
"2864 83440, 202, 7965, 384, 55114, 25820, 55536, 24... \n",
"2865 79081, 28974, 1660, 56616, 84285, 1981, 26017,... \n",
"2866 57143, 178, 9255, 9590, 10094, 483, NA, 586, 9... \n",
"\n",
" proteinName \\\n",
"1 SYPH \n",
"2 CEAM1 \n",
"3 DDX4 \n",
"4 KLF6 \n",
"5 FGF10 \n",
"... ... \n",
"2862 ABCA5, MRP4, TARSH, NA, AMPD3, ANRA2, ARV1, NA... \n",
"2863 1A1L2, ACV1B, ARHGG, ASF1B, B2L10, BLCAP, SEC2... \n",
"2864 ADPGK, CRBG1, AIMP2, ARGI2, RHG17, ARI1, CDA7L... \n",
"2865 LBHD1, L10K, DHX9, DBLOH, EIF1A, IF4G1, FA32A,... \n",
"2866 ADCK1, GDE, AIMP1, AKA12, ARPC3, AT1B3, AT5F1,... \n",
"\n",
" proteinID \\\n",
"1 P08247 \n",
"2 P13688 \n",
"3 Q9NQI0 \n",
"4 Q99612 \n",
"5 O15520 \n",
"... ... \n",
"2862 Q8WWZ7, O15439, Q7Z7G0, NA, Q01432, Q9H9E1, Q9... \n",
"2863 Q4AC99, P36896, Q5VV41, Q9NVP2, Q9HD36, P62952... \n",
"2864 Q9BRR6, Q9Y4K1, Q13155, P78540, Q68EM7, Q9Y4X5... \n",
"2865 Q9BQE6, Q9UNZ5, Q08211, Q9NR28, Q8N9N8, Q04637... \n",
"2866 Q86TW2, P35573, Q12904, Q02952, O15145, P54709... \n",
"\n",
" markerResource PMID Company \n",
"1 Experiment 10595912 NaN \n",
"2 Experiment 10751340 NaN \n",
"3 Experiment 10920202 NaN \n",
"4 Experiment 12407152 NaN \n",
"5 Experiment 15950061 NaN \n",
"... ... ... ... \n",
"2862 Single-cell sequencing 23892778 NaN \n",
"2863 Single-cell sequencing 23892778 NaN \n",
"2864 Single-cell sequencing 23892778 NaN \n",
"2865 Single-cell sequencing 23892778 NaN \n",
"2866 Single-cell sequencing 23892778 NaN \n",
"\n",
"[1798 rows x 12 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"url = \"http://xteam.xbio.top/CellMarker/download/Human_cell_markers.txt\"\n",
"df = pd.read_csv(url, sep='\\t', dtype=str)\n",
"\n",
"# Remove redundant species type annotation since we're looking at human markers file\n",
"del df['speciesType']\n",
"\n",
"# Assert existince of and clean CL identifier\n",
"df = df[df[\"CellOntologyID\"].notna()]\n",
"df[\"CellOntologyID\"] = df[\"CellOntologyID\"].str.replace(\"_\", \":\")\n",
"del df['cellName']\n",
"\n",
"# Assert existince of and clean UBERON identifier\n",
"df = df[df[\"UberonOntologyID\"].notna()]\n",
"df[\"UberonOntologyID\"] = df[\"UberonOntologyID\"].str.replace(\"_\", \":\")\n",
"del df['tissueType']\n",
"\n",
"df"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment