Skip to content

Instantly share code, notes, and snippets.

@AKuederle
Last active June 16, 2016 07:18
Show Gist options
  • Save AKuederle/684a620bed1aac4444c30f238a3d975e to your computer and use it in GitHub Desktop.
Save AKuederle/684a620bed1aac4444c30f238a3d975e to your computer and use it in GitHub Desktop.
A short example how to deal strange datatypes
Sequence_ID, Sequence_Length, Hit_Count, Start, End, Strand
NM_172887.2,1-10753,1,10453,10459,+
XM_006504928.1,1-10641,1,10364,10370,+
XM_006504927.1,1-10650,1,10373,10379,+
XM_006504926.1,1-10659,1,10382,10388,+
NM_147219.2,1-8339,1,7632,7638,+
XM_006533065.2,1-8166,1,7529,7535,+
NM_009592.1,1-5759,3,2822,2828,+
3867,3873,+
3971,3977,+
NM_009784.2,1-7415,4,520,526,+
6391,6397,+
6542,6548,+
7302,7308,+
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"pd.options.mode.chained_assignment = None # default='warn'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sequence_ID</th>\n",
" <th>Sequence_Length</th>\n",
" <th>Hit_Count</th>\n",
" <th>Start</th>\n",
" <th>End</th>\n",
" <th>Strand</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>NM_172887.2</td>\n",
" <td>1-10753</td>\n",
" <td>1</td>\n",
" <td>10453.0</td>\n",
" <td>10459.0</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>XM_006504928.1</td>\n",
" <td>1-10641</td>\n",
" <td>1</td>\n",
" <td>10364.0</td>\n",
" <td>10370.0</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>XM_006504927.1</td>\n",
" <td>1-10650</td>\n",
" <td>1</td>\n",
" <td>10373.0</td>\n",
" <td>10379.0</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>XM_006504926.1</td>\n",
" <td>1-10659</td>\n",
" <td>1</td>\n",
" <td>10382.0</td>\n",
" <td>10388.0</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NM_147219.2</td>\n",
" <td>1-8339</td>\n",
" <td>1</td>\n",
" <td>7632.0</td>\n",
" <td>7638.0</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>XM_006533065.2</td>\n",
" <td>1-8166</td>\n",
" <td>1</td>\n",
" <td>7529.0</td>\n",
" <td>7535.0</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>NM_009592.1</td>\n",
" <td>1-5759</td>\n",
" <td>3</td>\n",
" <td>2822.0</td>\n",
" <td>2828.0</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>3867</td>\n",
" <td>3873</td>\n",
" <td>+</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>3971</td>\n",
" <td>3977</td>\n",
" <td>+</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>NM_009784.2</td>\n",
" <td>1-7415</td>\n",
" <td>4</td>\n",
" <td>520.0</td>\n",
" <td>526.0</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>6391</td>\n",
" <td>6397</td>\n",
" <td>+</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>6542</td>\n",
" <td>6548</td>\n",
" <td>+</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>7302</td>\n",
" <td>7308</td>\n",
" <td>+</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sequence_ID Sequence_Length Hit_Count Start End \\\n",
"0 NM_172887.2 1-10753 1 10453.0 10459.0 \n",
"1 XM_006504928.1 1-10641 1 10364.0 10370.0 \n",
"2 XM_006504927.1 1-10650 1 10373.0 10379.0 \n",
"3 XM_006504926.1 1-10659 1 10382.0 10388.0 \n",
"4 NM_147219.2 1-8339 1 7632.0 7638.0 \n",
"5 XM_006533065.2 1-8166 1 7529.0 7535.0 \n",
"6 NM_009592.1 1-5759 3 2822.0 2828.0 \n",
"7 3867 3873 + NaN NaN \n",
"8 3971 3977 + NaN NaN \n",
"9 NM_009784.2 1-7415 4 520.0 526.0 \n",
"10 6391 6397 + NaN NaN \n",
"11 6542 6548 + NaN NaN \n",
"12 7302 7308 + NaN NaN \n",
"\n",
" Strand \n",
"0 + \n",
"1 + \n",
"2 + \n",
"3 + \n",
"4 + \n",
"5 + \n",
"6 + \n",
"7 NaN \n",
"8 NaN \n",
"9 + \n",
"10 NaN \n",
"11 NaN \n",
"12 NaN "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv('./file.txt')\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"['Sequence_ID', ' Sequence_Length', ' Hit_Count', ' Start', ' End', ' Strand']"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"header = list(df.columns.values)\n",
"header"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"['Sequence_ID', 'Sequence_Length', 'Hit_Count', 'Start', 'End', 'Strand']"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"new_header = [name.strip() for name in header]\n",
"new_header"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sequence_ID</th>\n",
" <th>Sequence_Length</th>\n",
" <th>Hit_Count</th>\n",
" <th>Start</th>\n",
" <th>End</th>\n",
" <th>Strand</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>NM_172887.2</td>\n",
" <td>1-10753</td>\n",
" <td>1</td>\n",
" <td>10453.0</td>\n",
" <td>10459.0</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>XM_006504928.1</td>\n",
" <td>1-10641</td>\n",
" <td>1</td>\n",
" <td>10364.0</td>\n",
" <td>10370.0</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>XM_006504927.1</td>\n",
" <td>1-10650</td>\n",
" <td>1</td>\n",
" <td>10373.0</td>\n",
" <td>10379.0</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>XM_006504926.1</td>\n",
" <td>1-10659</td>\n",
" <td>1</td>\n",
" <td>10382.0</td>\n",
" <td>10388.0</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NM_147219.2</td>\n",
" <td>1-8339</td>\n",
" <td>1</td>\n",
" <td>7632.0</td>\n",
" <td>7638.0</td>\n",
" <td>+</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sequence_ID Sequence_Length Hit_Count Start End Strand\n",
"0 NM_172887.2 1-10753 1 10453.0 10459.0 +\n",
"1 XM_006504928.1 1-10641 1 10364.0 10370.0 +\n",
"2 XM_006504927.1 1-10650 1 10373.0 10379.0 +\n",
"3 XM_006504926.1 1-10659 1 10382.0 10388.0 +\n",
"4 NM_147219.2 1-8339 1 7632.0 7638.0 +"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns = new_header\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sequence_ID</th>\n",
" <th>Sequence_Length</th>\n",
" <th>Hit_Count</th>\n",
" <th>Start</th>\n",
" <th>End</th>\n",
" <th>Strand</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>NM_172887.2</td>\n",
" <td>1-10753</td>\n",
" <td>1</td>\n",
" <td>10453</td>\n",
" <td>10459</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>XM_006504928.1</td>\n",
" <td>1-10641</td>\n",
" <td>1</td>\n",
" <td>10364</td>\n",
" <td>10370</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>XM_006504927.1</td>\n",
" <td>1-10650</td>\n",
" <td>1</td>\n",
" <td>10373</td>\n",
" <td>10379</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>XM_006504926.1</td>\n",
" <td>1-10659</td>\n",
" <td>1</td>\n",
" <td>10382</td>\n",
" <td>10388</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NM_147219.2</td>\n",
" <td>1-8339</td>\n",
" <td>1</td>\n",
" <td>7632</td>\n",
" <td>7638</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>XM_006533065.2</td>\n",
" <td>1-8166</td>\n",
" <td>1</td>\n",
" <td>7529</td>\n",
" <td>7535</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>NM_009592.1</td>\n",
" <td>1-5759</td>\n",
" <td>3</td>\n",
" <td>2822</td>\n",
" <td>2828</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3867</td>\n",
" <td>3873</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3971</td>\n",
" <td>3977</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>NM_009784.2</td>\n",
" <td>1-7415</td>\n",
" <td>4</td>\n",
" <td>520</td>\n",
" <td>526</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>6391</td>\n",
" <td>6397</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>6542</td>\n",
" <td>6548</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>7302</td>\n",
" <td>7308</td>\n",
" <td>+</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sequence_ID Sequence_Length Hit_Count Start \\\n",
"0 NM_172887.2 1-10753 1 10453 \n",
"1 XM_006504928.1 1-10641 1 10364 \n",
"2 XM_006504927.1 1-10650 1 10373 \n",
"3 XM_006504926.1 1-10659 1 10382 \n",
"4 NM_147219.2 1-8339 1 7632 \n",
"5 XM_006533065.2 1-8166 1 7529 \n",
"6 NM_009592.1 1-5759 3 2822 \n",
"7 NaN NaN NaN 3867 \n",
"8 NaN NaN NaN 3971 \n",
"9 NM_009784.2 1-7415 4 520 \n",
"10 NaN NaN NaN 6391 \n",
"11 NaN NaN NaN 6542 \n",
"12 NaN NaN NaN 7302 \n",
"\n",
" End Strand \n",
"0 10459 + \n",
"1 10370 + \n",
"2 10379 + \n",
"3 10388 + \n",
"4 7638 + \n",
"5 7535 + \n",
"6 2828 + \n",
"7 3873 + \n",
"8 3977 + \n",
"9 526 + \n",
"10 6397 + \n",
"11 6548 + \n",
"12 7308 + "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nan_mask = df.Strand.isnull()\n",
"df.Start[nan_mask] = df.Sequence_ID[nan_mask]\n",
"df.Sequence_ID[nan_mask] = np.nan\n",
"df.End[nan_mask] = df.Sequence_Length[nan_mask]\n",
"df.Sequence_Length[nan_mask] = np.nan\n",
"df.Strand[nan_mask] = df.Hit_Count[nan_mask]\n",
"df.Hit_Count[nan_mask] = np.nan\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sequence_ID</th>\n",
" <th>Sequence_Length</th>\n",
" <th>Hit_Count</th>\n",
" <th>Start</th>\n",
" <th>End</th>\n",
" <th>Strand</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>NM_172887.2</td>\n",
" <td>1-10753</td>\n",
" <td>1</td>\n",
" <td>10453</td>\n",
" <td>10459</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>XM_006504928.1</td>\n",
" <td>1-10641</td>\n",
" <td>1</td>\n",
" <td>10364</td>\n",
" <td>10370</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>XM_006504927.1</td>\n",
" <td>1-10650</td>\n",
" <td>1</td>\n",
" <td>10373</td>\n",
" <td>10379</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>XM_006504926.1</td>\n",
" <td>1-10659</td>\n",
" <td>1</td>\n",
" <td>10382</td>\n",
" <td>10388</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NM_147219.2</td>\n",
" <td>1-8339</td>\n",
" <td>1</td>\n",
" <td>7632</td>\n",
" <td>7638</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>XM_006533065.2</td>\n",
" <td>1-8166</td>\n",
" <td>1</td>\n",
" <td>7529</td>\n",
" <td>7535</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>NM_009592.1</td>\n",
" <td>1-5759</td>\n",
" <td>3</td>\n",
" <td>2822</td>\n",
" <td>2828</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>3867</td>\n",
" <td>3873</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>3971</td>\n",
" <td>3977</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>NM_009784.2</td>\n",
" <td>1-7415</td>\n",
" <td>4</td>\n",
" <td>520</td>\n",
" <td>526</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>6391</td>\n",
" <td>6397</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>6542</td>\n",
" <td>6548</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>7302</td>\n",
" <td>7308</td>\n",
" <td>+</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sequence_ID Sequence_Length Hit_Count Start \\\n",
"0 NM_172887.2 1-10753 1 10453 \n",
"1 XM_006504928.1 1-10641 1 10364 \n",
"2 XM_006504927.1 1-10650 1 10373 \n",
"3 XM_006504926.1 1-10659 1 10382 \n",
"4 NM_147219.2 1-8339 1 7632 \n",
"5 XM_006533065.2 1-8166 1 7529 \n",
"6 NM_009592.1 1-5759 3 2822 \n",
"7 3867 \n",
"8 3971 \n",
"9 NM_009784.2 1-7415 4 520 \n",
"10 6391 \n",
"11 6542 \n",
"12 7302 \n",
"\n",
" End Strand \n",
"0 10459 + \n",
"1 10370 + \n",
"2 10379 + \n",
"3 10388 + \n",
"4 7638 + \n",
"5 7535 + \n",
"6 2828 + \n",
"7 3873 + \n",
"8 3977 + \n",
"9 526 + \n",
"10 6397 + \n",
"11 6548 + \n",
"12 7308 + "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.fillna(\"\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Sequence_ID</th>\n",
" <th>Sequence_Length</th>\n",
" <th>Hit_Count</th>\n",
" <th>Start</th>\n",
" <th>End</th>\n",
" <th>Strand</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>NM_172887.2</td>\n",
" <td>1-10753</td>\n",
" <td>1</td>\n",
" <td>10453</td>\n",
" <td>10459</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>XM_006504928.1</td>\n",
" <td>1-10641</td>\n",
" <td>1</td>\n",
" <td>10364</td>\n",
" <td>10370</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>XM_006504927.1</td>\n",
" <td>1-10650</td>\n",
" <td>1</td>\n",
" <td>10373</td>\n",
" <td>10379</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>XM_006504926.1</td>\n",
" <td>1-10659</td>\n",
" <td>1</td>\n",
" <td>10382</td>\n",
" <td>10388</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NM_147219.2</td>\n",
" <td>1-8339</td>\n",
" <td>1</td>\n",
" <td>7632</td>\n",
" <td>7638</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>XM_006533065.2</td>\n",
" <td>1-8166</td>\n",
" <td>1</td>\n",
" <td>7529</td>\n",
" <td>7535</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>NM_009592.1</td>\n",
" <td>1-5759</td>\n",
" <td>3</td>\n",
" <td>2822</td>\n",
" <td>2828</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>NM_009592.1</td>\n",
" <td>1-5759</td>\n",
" <td>3</td>\n",
" <td>3867</td>\n",
" <td>3873</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>NM_009592.1</td>\n",
" <td>1-5759</td>\n",
" <td>3</td>\n",
" <td>3971</td>\n",
" <td>3977</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>NM_009784.2</td>\n",
" <td>1-7415</td>\n",
" <td>4</td>\n",
" <td>520</td>\n",
" <td>526</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>NM_009784.2</td>\n",
" <td>1-7415</td>\n",
" <td>4</td>\n",
" <td>6391</td>\n",
" <td>6397</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>NM_009784.2</td>\n",
" <td>1-7415</td>\n",
" <td>4</td>\n",
" <td>6542</td>\n",
" <td>6548</td>\n",
" <td>+</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>NM_009784.2</td>\n",
" <td>1-7415</td>\n",
" <td>4</td>\n",
" <td>7302</td>\n",
" <td>7308</td>\n",
" <td>+</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Sequence_ID Sequence_Length Hit_Count Start \\\n",
"0 NM_172887.2 1-10753 1 10453 \n",
"1 XM_006504928.1 1-10641 1 10364 \n",
"2 XM_006504927.1 1-10650 1 10373 \n",
"3 XM_006504926.1 1-10659 1 10382 \n",
"4 NM_147219.2 1-8339 1 7632 \n",
"5 XM_006533065.2 1-8166 1 7529 \n",
"6 NM_009592.1 1-5759 3 2822 \n",
"7 NM_009592.1 1-5759 3 3867 \n",
"8 NM_009592.1 1-5759 3 3971 \n",
"9 NM_009784.2 1-7415 4 520 \n",
"10 NM_009784.2 1-7415 4 6391 \n",
"11 NM_009784.2 1-7415 4 6542 \n",
"12 NM_009784.2 1-7415 4 7302 \n",
"\n",
" End Strand \n",
"0 10459 + \n",
"1 10370 + \n",
"2 10379 + \n",
"3 10388 + \n",
"4 7638 + \n",
"5 7535 + \n",
"6 2828 + \n",
"7 3873 + \n",
"8 3977 + \n",
"9 526 + \n",
"10 6397 + \n",
"11 6548 + \n",
"12 7308 + "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.fillna(method='ffill')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment