Skip to content

Instantly share code, notes, and snippets.

@wasade
Created April 13, 2016 20:09
Show Gist options
  • Save wasade/9704c2714ec9daa9fc78f43f283d00a6 to your computer and use it in GitHub Desktop.
Save wasade/9704c2714ec9daa9fc78f43f283d00a6 to your computer and use it in GitHub Desktop.
AG samples not in "clean" metadata
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import biom"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
" Dload Upload Total Spent Left Speed\n",
"100 38.6M 100 38.6M 0 0 656k 0 0:01:00 0:01:00 --:--:-- 962k\n",
" % Total % Received % Xferd Average Speed Time Time Time Current\n",
" Dload Upload Total Spent Left Speed\n",
"100 35.8M 100 35.8M 0 0 946k 0 0:00:38 0:00:38 --:--:-- 910k\n"
]
}
],
"source": [
"!curl -O ftp://ftp.microbio.me/AmericanGut/rounds-1-25/01-raw/metadata.txt\n",
"!curl -O ftp://ftp.microbio.me/AmericanGut/rounds-1-25/04-meta/ag-cleaned.txt"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/daniel/miniconda3/envs/qiime191/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2902: DtypeWarning: Columns (70) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" interactivity=interactivity, compiler=compiler, result=result)\n"
]
}
],
"source": [
"raw = pd.read_csv('metadata.txt', sep='\\t', dtype={'#SampleID': str}).set_index('#SampleID')\n",
"cln = pd.read_csv('ag-cleaned.txt', sep='\\t', dtype={'#SampleID': str}).set_index('#SampleID')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# from https://github.com/biocore/American-Gut/blob/master/americangut/util.py#L432\n",
"simple_matter_map = {\n",
" 'feces': 'FECAL',\n",
" 'sebum': 'SKIN',\n",
" 'tongue': 'ORAL',\n",
" 'skin': 'SKIN',\n",
" 'mouth': 'ORAL',\n",
" 'gingiva': 'ORAL',\n",
" 'gingival epithelium': 'ORAL',\n",
" 'nares': 'SKIN',\n",
" 'skin of hand': 'SKIN',\n",
" 'hand': 'SKIN',\n",
" 'skin of head': 'SKIN',\n",
" 'hand skin': 'SKIN',\n",
" 'throat': 'ORAL',\n",
" 'auricular region zone of skin': 'SKIN',\n",
" 'mucosa of tongue': 'ORAL',\n",
" 'mucosa of vagina': 'SKIN',\n",
" 'palatine tonsil': 'ORAL',\n",
" 'hard palate': 'ORAL',\n",
" 'saliva': 'ORAL',\n",
" 'stool': 'FECAL',\n",
" 'vagina': 'SKIN',\n",
" 'fossa': 'SKIN',\n",
" 'buccal mucosa': 'ORAL',\n",
" 'vaginal fornix': 'SKIN',\n",
" 'hair follicle': 'SKIN',\n",
" 'nostril': 'SKIN'\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"9218\n",
"8361\n",
"8361\n"
]
}
],
"source": [
"print(len(raw.index))\n",
"print(len(cln.index))\n",
"print(len(set(raw.index) & set(cln.index)))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"missing = raw.loc[set(raw.index) - set(cln.index)]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"857"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(missing.index)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Counter({'Unknown': 833, 'UBERON:vaginal introitus': 19, 'UBERON:hair': 5})\n",
"Counter({'ENVO:sterile water': 833, 'ENVO:mucus': 19, 'ENVO:sebum': 5})\n"
]
}
],
"source": [
"from collections import Counter\n",
"print(Counter(missing.BODY_SITE))\n",
"print(Counter(missing.ENV_MATTER))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment