Skip to content

Instantly share code, notes, and snippets.

@mburke05
Created October 27, 2015 20:16
Show Gist options
  • Save mburke05/9e78a5c7ce2efacb233f to your computer and use it in GitHub Desktop.
Save mburke05/9e78a5c7ce2efacb233f to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "code",
"execution_count": 227,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# we use pandas and regular expression libraries\n",
"import pandas as pd\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": 221,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# dropping all but the 'input' column; dropping all rows in that column that are NaN\n",
"df = pd.read_csv('Parsing.csv')\n",
"df = df[df['input'].notnull() == True]\n",
"df = df[['input']]"
]
},
{
"cell_type": "code",
"execution_count": 222,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# defining params to search for in our keywords\n",
"parameters = ['src', 'adid', 'kw', 'kwid', 'mt', 'dist', 'qs', 'adpos', 'device', 'placement']\n",
"new_params = [params+'=' for params in parameters]"
]
},
{
"cell_type": "code",
"execution_count": 223,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#split apart parsed data will be stored here in a mock_dataframe which we will reinject to pandas \n",
"mock_df = {'comb_str=': [],\n",
" 'src=': [], \n",
" 'adid=': [], \n",
" 'kw=': [], \n",
" 'kwid=': [], \n",
" 'mt=': [], \n",
" 'dist=': [], \n",
" 'qs=' : [], \n",
" 'adpos=' : [], \n",
" 'device=' : [],\n",
" 'placement=' : [],\n",
" 'catchall' : []}"
]
},
{
"cell_type": "code",
"execution_count": 224,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# nested for loop that looks for the conditions using regular expressions and appends to our mock_df\n",
"a = [x for x in df['input']]\n",
"for entry in a:\n",
" mock_df['comb_str='].append(entry)\n",
" if any(params in entry for params in new_params):\n",
" mock_df['catchall'].append('')\n",
" for params in new_params:\n",
" if params in entry:\n",
" mock_df[params].append(re.search(params+'(.*?)(?=_[^_=\\n]+=|$)',entry).group(1))\n",
" else:\n",
" mock_df[params].append('')\n",
" else:\n",
" for params in new_params:\n",
" mock_df[params].append('')\n",
" mock_df['catchall'].append(entry)"
]
},
{
"cell_type": "code",
"execution_count": 225,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>adid=</th>\n",
" <th>adpos=</th>\n",
" <th>catchall</th>\n",
" <th>comb_str=</th>\n",
" <th>device=</th>\n",
" <th>dist=</th>\n",
" <th>kw=</th>\n",
" <th>kwid=</th>\n",
" <th>mt=</th>\n",
" <th>placement=</th>\n",
" <th>qs=</th>\n",
" <th>src=</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>8312488564</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>src=bing_adid=8312488564_kw=swiftcapital com_k...</td>\n",
" <td>c</td>\n",
" <td></td>\n",
" <td>swiftcapital com</td>\n",
" <td>35030235383</td>\n",
" <td>e</td>\n",
" <td></td>\n",
" <td>swiftcapital.com</td>\n",
" <td>bing</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>8312488564</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>src=bing_adid=8312488564_kw=swiftcapital com_k...</td>\n",
" <td>c</td>\n",
" <td></td>\n",
" <td>swiftcapital com</td>\n",
" <td>35030235383</td>\n",
" <td>e</td>\n",
" <td></td>\n",
" <td>swiftcapital.com</td>\n",
" <td>bing</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8312488564</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>src=bing_adid=8312488564_kw=swiftcapital com_k...</td>\n",
" <td>c</td>\n",
" <td></td>\n",
" <td>swiftcapital com</td>\n",
" <td>35030235383</td>\n",
" <td>e</td>\n",
" <td></td>\n",
" <td>swiftcapital.com</td>\n",
" <td>bing</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>8312488564</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>src=bing_adid=8312488564_kw=swiftcapital com_k...</td>\n",
" <td>c</td>\n",
" <td></td>\n",
" <td>swiftcapital com</td>\n",
" <td>35030235383</td>\n",
" <td>e</td>\n",
" <td></td>\n",
" <td>swiftcapital.com</td>\n",
" <td>bing</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>8312488564</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>src=bing_adid=8312488564_kw=swiftcapital com_k...</td>\n",
" <td>c</td>\n",
" <td></td>\n",
" <td>swiftcapital com</td>\n",
" <td>35030235383</td>\n",
" <td>e</td>\n",
" <td></td>\n",
" <td>swiftcapital.com</td>\n",
" <td>bing</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>8312488564</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>src=bing_adid=8312488564_kw=swiftcapital com_k...</td>\n",
" <td>c</td>\n",
" <td></td>\n",
" <td>swiftcapital com</td>\n",
" <td>35030235383</td>\n",
" <td>e</td>\n",
" <td></td>\n",
" <td>swiftcapital.com</td>\n",
" <td>bing</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>8312488564</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>src=bing_adid=8312488564_kw=swiftcapital com_k...</td>\n",
" <td>c</td>\n",
" <td></td>\n",
" <td>swiftcapital com</td>\n",
" <td>35030235383</td>\n",
" <td>e</td>\n",
" <td></td>\n",
" <td>swiftcapital.com</td>\n",
" <td>bing</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>8312488564</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>src=bing_adid=8312488564_kw=swiftcapital com_k...</td>\n",
" <td>c</td>\n",
" <td></td>\n",
" <td>swiftcapital com</td>\n",
" <td>35030235383</td>\n",
" <td>e</td>\n",
" <td></td>\n",
" <td>swiftcapital.com</td>\n",
" <td>bing</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>8312488564</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>src=bing_adid=8312488564_kw=swiftcapital com_k...</td>\n",
" <td>c</td>\n",
" <td></td>\n",
" <td>swiftcapital com</td>\n",
" <td>35030235383</td>\n",
" <td>e</td>\n",
" <td></td>\n",
" <td>swiftcapital.com</td>\n",
" <td>bing</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>8312488564</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>src=bing_adid=8312488564_kw=www swiftcapital c...</td>\n",
" <td>c</td>\n",
" <td></td>\n",
" <td>www swiftcapital com</td>\n",
" <td>35030235386</td>\n",
" <td>e</td>\n",
" <td></td>\n",
" <td>www.swiftcapital.com</td>\n",
" <td>bing</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>{creative}</td>\n",
" <td>{adposition}</td>\n",
" <td></td>\n",
" <td>src=google_adid={creative}_kw={keyword}_mt={ma...</td>\n",
" <td>{device}</td>\n",
" <td>{network}</td>\n",
" <td>{keyword}</td>\n",
" <td></td>\n",
" <td>{matchtype}</td>\n",
" <td>{placement}</td>\n",
" <td></td>\n",
" <td>google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>47791559991</td>\n",
" <td>1s1</td>\n",
" <td></td>\n",
" <td>src=google_adid=47791559991_kw=business financ...</td>\n",
" <td>c</td>\n",
" <td>g</td>\n",
" <td>business financing companies</td>\n",
" <td></td>\n",
" <td>e</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>47791559991</td>\n",
" <td>1s3</td>\n",
" <td></td>\n",
" <td>src=google_adid=47791559991_kw=business financ...</td>\n",
" <td>c</td>\n",
" <td>g</td>\n",
" <td>business financing</td>\n",
" <td></td>\n",
" <td>e</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>47791559991</td>\n",
" <td>1s3</td>\n",
" <td></td>\n",
" <td>src=google_adid=47791559991_kw=business financ...</td>\n",
" <td>c</td>\n",
" <td>g</td>\n",
" <td>business financing</td>\n",
" <td></td>\n",
" <td>e</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>47791559991</td>\n",
" <td>1s3</td>\n",
" <td></td>\n",
" <td>src=google_adid=47791559991_kw=business financ...</td>\n",
" <td>c</td>\n",
" <td>g</td>\n",
" <td>business financing</td>\n",
" <td></td>\n",
" <td>e</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>47791559991</td>\n",
" <td>1s3</td>\n",
" <td></td>\n",
" <td>src=google_adid=47791559991_kw=business financ...</td>\n",
" <td>c</td>\n",
" <td>g</td>\n",
" <td>business financing</td>\n",
" <td></td>\n",
" <td>e</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>47791559991</td>\n",
" <td>1s3</td>\n",
" <td></td>\n",
" <td>src=google_adid=47791559991_kw=business financ...</td>\n",
" <td>c</td>\n",
" <td>g</td>\n",
" <td>business financing</td>\n",
" <td></td>\n",
" <td>e</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>47791559991</td>\n",
" <td>1s1</td>\n",
" <td></td>\n",
" <td>src=google_adid=47791559991_kw=business financ...</td>\n",
" <td>m</td>\n",
" <td>s</td>\n",
" <td>business financing</td>\n",
" <td></td>\n",
" <td>e</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>47791559991</td>\n",
" <td>1s1</td>\n",
" <td></td>\n",
" <td>src=google_adid=47791559991_kw=business financ...</td>\n",
" <td>m</td>\n",
" <td>s</td>\n",
" <td>business financing</td>\n",
" <td></td>\n",
" <td>e</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>47791559991</td>\n",
" <td>1s1</td>\n",
" <td></td>\n",
" <td>src=google_adid=47791559991_kw=business financ...</td>\n",
" <td>m</td>\n",
" <td>s</td>\n",
" <td>business financing</td>\n",
" <td></td>\n",
" <td>e</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>47791573791</td>\n",
" <td>none</td>\n",
" <td></td>\n",
" <td>src=google_adid=47791573791_kw=_mt=_dist=d_adp...</td>\n",
" <td>c</td>\n",
" <td>d</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>www.instructables.com</td>\n",
" <td></td>\n",
" <td>google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>47791573791</td>\n",
" <td>none</td>\n",
" <td></td>\n",
" <td>src=google_adid=47791573791_kw=_mt=_dist=d_adp...</td>\n",
" <td>c</td>\n",
" <td>d</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>www.investopedia.com</td>\n",
" <td></td>\n",
" <td>google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>47791573791</td>\n",
" <td>none</td>\n",
" <td></td>\n",
" <td>src=google_adid=47791573791_kw=_mt=_dist=d_adp...</td>\n",
" <td>c</td>\n",
" <td>d</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>www.investopedia.com</td>\n",
" <td></td>\n",
" <td>google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>47791575951</td>\n",
" <td>none</td>\n",
" <td></td>\n",
" <td>src=google_adid=47791575951_kw=_mt=_dist=d_adp...</td>\n",
" <td>c</td>\n",
" <td>d</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>news.chosun.com</td>\n",
" <td></td>\n",
" <td>google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>47791575951</td>\n",
" <td>none</td>\n",
" <td></td>\n",
" <td>src=google_adid=47791575951_kw=_mt=_dist=d_adp...</td>\n",
" <td>c</td>\n",
" <td>d</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>www.remodeling.hw.net</td>\n",
" <td></td>\n",
" <td>google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>47791626231</td>\n",
" <td>none</td>\n",
" <td></td>\n",
" <td>src=google_adid=47791626231_kw=_mt=_dist=d_adp...</td>\n",
" <td>c</td>\n",
" <td>d</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>banking.about.com</td>\n",
" <td></td>\n",
" <td>google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>47791626231</td>\n",
" <td>none</td>\n",
" <td></td>\n",
" <td>src=google_adid=47791626231_kw=_mt=_dist=d_adp...</td>\n",
" <td>c</td>\n",
" <td>d</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>recipe-of-today.blogspot.com</td>\n",
" <td></td>\n",
" <td>google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>47791626231</td>\n",
" <td>none</td>\n",
" <td></td>\n",
" <td>src=google_adid=47791626231_kw=_mt=_dist=d_adp...</td>\n",
" <td>c</td>\n",
" <td>d</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>recipe-of-today.blogspot.com</td>\n",
" <td></td>\n",
" <td>google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>47791626231</td>\n",
" <td>none</td>\n",
" <td></td>\n",
" <td>src=google_adid=47791626231_kw=_mt=_dist=d_adp...</td>\n",
" <td>c</td>\n",
" <td>d</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>recipe-of-today.blogspot.com</td>\n",
" <td></td>\n",
" <td>google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>47791626231</td>\n",
" <td>none</td>\n",
" <td></td>\n",
" <td>src=google_adid=47791626231_kw=_mt=_dist=d_adp...</td>\n",
" <td>c</td>\n",
" <td>d</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>recipe-of-today.blogspot.com</td>\n",
" <td></td>\n",
" <td>google</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td>100% commercial loans</td>\n",
" <td>100% commercial loans</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td>50k business loans</td>\n",
" <td>50k business loans</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>32</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td>50k business loans</td>\n",
" <td>50k business loans</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td>50k business loans</td>\n",
" <td>50k business loans</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td>50k business loans</td>\n",
" <td>50k business loans</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td>50k business loans</td>\n",
" <td>50k business loans</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td>alliance loan company scam</td>\n",
" <td>alliance loan company scam</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td>apply online for business loan</td>\n",
" <td>apply online for business loan</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td>bad credit business start up loans uk</td>\n",
" <td>bad credit business start up loans uk</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td></td>\n",
" <td></td>\n",
" <td>bad credit installment loans</td>\n",
" <td>bad credit installment loans</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" <td></td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" adid= adpos= catchall \\\n",
"0 8312488564 \n",
"1 8312488564 \n",
"2 8312488564 \n",
"3 8312488564 \n",
"4 8312488564 \n",
"5 8312488564 \n",
"6 8312488564 \n",
"7 8312488564 \n",
"8 8312488564 \n",
"9 8312488564 \n",
"10 {creative} {adposition} \n",
"11 47791559991 1s1 \n",
"12 47791559991 1s3 \n",
"13 47791559991 1s3 \n",
"14 47791559991 1s3 \n",
"15 47791559991 1s3 \n",
"16 47791559991 1s3 \n",
"17 47791559991 1s1 \n",
"18 47791559991 1s1 \n",
"19 47791559991 1s1 \n",
"20 47791573791 none \n",
"21 47791573791 none \n",
"22 47791573791 none \n",
"23 47791575951 none \n",
"24 47791575951 none \n",
"25 47791626231 none \n",
"26 47791626231 none \n",
"27 47791626231 none \n",
"28 47791626231 none \n",
"29 47791626231 none \n",
"30 100% commercial loans \n",
"31 50k business loans \n",
"32 50k business loans \n",
"33 50k business loans \n",
"34 50k business loans \n",
"35 50k business loans \n",
"36 alliance loan company scam \n",
"37 apply online for business loan \n",
"38 bad credit business start up loans uk \n",
"39 bad credit installment loans \n",
"\n",
" comb_str= device= dist= \\\n",
"0 src=bing_adid=8312488564_kw=swiftcapital com_k... c \n",
"1 src=bing_adid=8312488564_kw=swiftcapital com_k... c \n",
"2 src=bing_adid=8312488564_kw=swiftcapital com_k... c \n",
"3 src=bing_adid=8312488564_kw=swiftcapital com_k... c \n",
"4 src=bing_adid=8312488564_kw=swiftcapital com_k... c \n",
"5 src=bing_adid=8312488564_kw=swiftcapital com_k... c \n",
"6 src=bing_adid=8312488564_kw=swiftcapital com_k... c \n",
"7 src=bing_adid=8312488564_kw=swiftcapital com_k... c \n",
"8 src=bing_adid=8312488564_kw=swiftcapital com_k... c \n",
"9 src=bing_adid=8312488564_kw=www swiftcapital c... c \n",
"10 src=google_adid={creative}_kw={keyword}_mt={ma... {device} {network} \n",
"11 src=google_adid=47791559991_kw=business financ... c g \n",
"12 src=google_adid=47791559991_kw=business financ... c g \n",
"13 src=google_adid=47791559991_kw=business financ... c g \n",
"14 src=google_adid=47791559991_kw=business financ... c g \n",
"15 src=google_adid=47791559991_kw=business financ... c g \n",
"16 src=google_adid=47791559991_kw=business financ... c g \n",
"17 src=google_adid=47791559991_kw=business financ... m s \n",
"18 src=google_adid=47791559991_kw=business financ... m s \n",
"19 src=google_adid=47791559991_kw=business financ... m s \n",
"20 src=google_adid=47791573791_kw=_mt=_dist=d_adp... c d \n",
"21 src=google_adid=47791573791_kw=_mt=_dist=d_adp... c d \n",
"22 src=google_adid=47791573791_kw=_mt=_dist=d_adp... c d \n",
"23 src=google_adid=47791575951_kw=_mt=_dist=d_adp... c d \n",
"24 src=google_adid=47791575951_kw=_mt=_dist=d_adp... c d \n",
"25 src=google_adid=47791626231_kw=_mt=_dist=d_adp... c d \n",
"26 src=google_adid=47791626231_kw=_mt=_dist=d_adp... c d \n",
"27 src=google_adid=47791626231_kw=_mt=_dist=d_adp... c d \n",
"28 src=google_adid=47791626231_kw=_mt=_dist=d_adp... c d \n",
"29 src=google_adid=47791626231_kw=_mt=_dist=d_adp... c d \n",
"30 100% commercial loans \n",
"31 50k business loans \n",
"32 50k business loans \n",
"33 50k business loans \n",
"34 50k business loans \n",
"35 50k business loans \n",
"36 alliance loan company scam \n",
"37 apply online for business loan \n",
"38 bad credit business start up loans uk \n",
"39 bad credit installment loans \n",
"\n",
" kw= kwid= mt= \\\n",
"0 swiftcapital com 35030235383 e \n",
"1 swiftcapital com 35030235383 e \n",
"2 swiftcapital com 35030235383 e \n",
"3 swiftcapital com 35030235383 e \n",
"4 swiftcapital com 35030235383 e \n",
"5 swiftcapital com 35030235383 e \n",
"6 swiftcapital com 35030235383 e \n",
"7 swiftcapital com 35030235383 e \n",
"8 swiftcapital com 35030235383 e \n",
"9 www swiftcapital com 35030235386 e \n",
"10 {keyword} {matchtype} \n",
"11 business financing companies e \n",
"12 business financing e \n",
"13 business financing e \n",
"14 business financing e \n",
"15 business financing e \n",
"16 business financing e \n",
"17 business financing e \n",
"18 business financing e \n",
"19 business financing e \n",
"20 \n",
"21 \n",
"22 \n",
"23 \n",
"24 \n",
"25 \n",
"26 \n",
"27 \n",
"28 \n",
"29 \n",
"30 \n",
"31 \n",
"32 \n",
"33 \n",
"34 \n",
"35 \n",
"36 \n",
"37 \n",
"38 \n",
"39 \n",
"\n",
" placement= qs= src= \n",
"0 swiftcapital.com bing \n",
"1 swiftcapital.com bing \n",
"2 swiftcapital.com bing \n",
"3 swiftcapital.com bing \n",
"4 swiftcapital.com bing \n",
"5 swiftcapital.com bing \n",
"6 swiftcapital.com bing \n",
"7 swiftcapital.com bing \n",
"8 swiftcapital.com bing \n",
"9 www.swiftcapital.com bing \n",
"10 {placement} google \n",
"11 google \n",
"12 google \n",
"13 google \n",
"14 google \n",
"15 google \n",
"16 google \n",
"17 google \n",
"18 google \n",
"19 google \n",
"20 www.instructables.com google \n",
"21 www.investopedia.com google \n",
"22 www.investopedia.com google \n",
"23 news.chosun.com google \n",
"24 www.remodeling.hw.net google \n",
"25 banking.about.com google \n",
"26 recipe-of-today.blogspot.com google \n",
"27 recipe-of-today.blogspot.com google \n",
"28 recipe-of-today.blogspot.com google \n",
"29 recipe-of-today.blogspot.com google \n",
"30 \n",
"31 \n",
"32 \n",
"33 \n",
"34 \n",
"35 \n",
"36 \n",
"37 \n",
"38 \n",
"39 "
]
},
"execution_count": 225,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# to get an idea what the new df looks like\n",
"df_upd = pd.DataFrame(data=mock_df)\n",
"df_upd"
]
},
{
"cell_type": "code",
"execution_count": 226,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# port new DF to a csv for excel use. \n",
"df_upd.to_csv(path_or_buf='parsed.csv')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment