Created
June 27, 2017 10:09
-
-
Save KimMyungSam/3110df1e0bfc57ec5fe2b1c94fd372cd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"From: stephen.marquard@uct.ac.za\n", | |
"From: louis@media.berkeley.edu\n", | |
"From: zqian@umich.edu\n", | |
"From: rjlowe@iupui.edu\n", | |
"From: zqian@umich.edu\n", | |
"From: rjlowe@iupui.edu\n", | |
"From: cwen@iupui.edu\n", | |
"From: cwen@iupui.edu\n", | |
"From: gsilver@umich.edu\n", | |
"From: gsilver@umich.edu\n", | |
"From: zqian@umich.edu\n", | |
"From: gsilver@umich.edu\n", | |
"From: wagnermr@iupui.edu\n", | |
"From: zqian@umich.edu\n", | |
"From: antranig@caret.cam.ac.uk\n", | |
"From: gopal.ramasammycook@gmail.com\n", | |
"From: david.horwitz@uct.ac.za\n", | |
"From: david.horwitz@uct.ac.za\n", | |
"From: david.horwitz@uct.ac.za\n", | |
"From: david.horwitz@uct.ac.za\n", | |
"From: stephen.marquard@uct.ac.za\n", | |
"From: louis@media.berkeley.edu\n", | |
"From: louis@media.berkeley.edu\n", | |
"From: ray@media.berkeley.edu\n", | |
"From: cwen@iupui.edu\n", | |
"From: cwen@iupui.edu\n", | |
"From: cwen@iupui.edu\n" | |
] | |
} | |
], | |
"source": [ | |
"hand = open('mbox-short.txt')\n", | |
"for line in hand:\n", | |
" line = line.rstrip()\n", | |
" if line.find('From:') >= 0:\n", | |
" # if line.startswith('From:') >= 0:\n", | |
" print (line)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"From: stephen.marquard@uct.ac.za\n", | |
"From: louis@media.berkeley.edu\n", | |
"From: zqian@umich.edu\n", | |
"From: rjlowe@iupui.edu\n", | |
"From: zqian@umich.edu\n", | |
"From: rjlowe@iupui.edu\n", | |
"From: cwen@iupui.edu\n", | |
"From: cwen@iupui.edu\n", | |
"From: gsilver@umich.edu\n", | |
"From: gsilver@umich.edu\n", | |
"From: zqian@umich.edu\n", | |
"From: gsilver@umich.edu\n", | |
"From: wagnermr@iupui.edu\n", | |
"From: zqian@umich.edu\n", | |
"From: antranig@caret.cam.ac.uk\n", | |
"From: gopal.ramasammycook@gmail.com\n", | |
"From: david.horwitz@uct.ac.za\n", | |
"From: david.horwitz@uct.ac.za\n", | |
"From: david.horwitz@uct.ac.za\n", | |
"From: david.horwitz@uct.ac.za\n", | |
"From: stephen.marquard@uct.ac.za\n", | |
"From: louis@media.berkeley.edu\n", | |
"From: louis@media.berkeley.edu\n", | |
"From: ray@media.berkeley.edu\n", | |
"From: cwen@iupui.edu\n", | |
"From: cwen@iupui.edu\n", | |
"From: cwen@iupui.edu\n" | |
] | |
} | |
], | |
"source": [ | |
"import re\n", | |
"\n", | |
"hand = open('mbox-short.txt')\n", | |
"for line in hand:\n", | |
" line = line.strip()\n", | |
" if re.search('From:', line):\n", | |
" print (line)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"From stephen.marquard@uct.ac.za Sat Jan 5 09:14:16 2008\n", | |
"From: stephen.marquard@uct.ac.za\n", | |
"From louis@media.berkeley.edu Fri Jan 4 18:10:48 2008\n", | |
"From: louis@media.berkeley.edu\n", | |
"From zqian@umich.edu Fri Jan 4 16:10:39 2008\n", | |
"From: zqian@umich.edu\n", | |
"From rjlowe@iupui.edu Fri Jan 4 15:46:24 2008\n", | |
"From: rjlowe@iupui.edu\n", | |
"From zqian@umich.edu Fri Jan 4 15:03:18 2008\n", | |
"From: zqian@umich.edu\n", | |
"From rjlowe@iupui.edu Fri Jan 4 14:50:18 2008\n", | |
"From: rjlowe@iupui.edu\n", | |
"From cwen@iupui.edu Fri Jan 4 11:37:30 2008\n", | |
"From: cwen@iupui.edu\n", | |
"From cwen@iupui.edu Fri Jan 4 11:35:08 2008\n", | |
"From: cwen@iupui.edu\n", | |
"From gsilver@umich.edu Fri Jan 4 11:12:37 2008\n", | |
"From: gsilver@umich.edu\n", | |
"From gsilver@umich.edu Fri Jan 4 11:11:52 2008\n", | |
"From: gsilver@umich.edu\n", | |
"From zqian@umich.edu Fri Jan 4 11:11:03 2008\n", | |
"From: zqian@umich.edu\n", | |
"From gsilver@umich.edu Fri Jan 4 11:10:22 2008\n", | |
"From: gsilver@umich.edu\n", | |
"From wagnermr@iupui.edu Fri Jan 4 10:38:42 2008\n", | |
"From: wagnermr@iupui.edu\n", | |
"From zqian@umich.edu Fri Jan 4 10:17:43 2008\n", | |
"From: zqian@umich.edu\n", | |
"From antranig@caret.cam.ac.uk Fri Jan 4 10:04:14 2008\n", | |
"From: antranig@caret.cam.ac.uk\n", | |
"From gopal.ramasammycook@gmail.com Fri Jan 4 09:05:31 2008\n", | |
"From: gopal.ramasammycook@gmail.com\n", | |
"From david.horwitz@uct.ac.za Fri Jan 4 07:02:32 2008\n", | |
"From: david.horwitz@uct.ac.za\n", | |
"From david.horwitz@uct.ac.za Fri Jan 4 06:08:27 2008\n", | |
"From: david.horwitz@uct.ac.za\n", | |
"From david.horwitz@uct.ac.za Fri Jan 4 04:49:08 2008\n", | |
"From: david.horwitz@uct.ac.za\n", | |
"From david.horwitz@uct.ac.za Fri Jan 4 04:33:44 2008\n", | |
"From: david.horwitz@uct.ac.za\n", | |
"From stephen.marquard@uct.ac.za Fri Jan 4 04:07:34 2008\n", | |
"From: stephen.marquard@uct.ac.za\n", | |
"From louis@media.berkeley.edu Thu Jan 3 19:51:21 2008\n", | |
"From: louis@media.berkeley.edu\n", | |
"From louis@media.berkeley.edu Thu Jan 3 17:18:23 2008\n", | |
"From: louis@media.berkeley.edu\n", | |
"From ray@media.berkeley.edu Thu Jan 3 17:07:00 2008\n", | |
"From: ray@media.berkeley.edu\n", | |
"From cwen@iupui.edu Thu Jan 3 16:34:40 2008\n", | |
"From: cwen@iupui.edu\n", | |
"From cwen@iupui.edu Thu Jan 3 16:29:07 2008\n", | |
"From: cwen@iupui.edu\n", | |
"From cwen@iupui.edu Thu Jan 3 16:23:48 2008\n", | |
"From: cwen@iupui.edu\n" | |
] | |
} | |
], | |
"source": [ | |
"import re\n", | |
"\n", | |
"hand = open('mbox-short.txt')\n", | |
"for line in hand:\n", | |
" line = line.rstrip()\n", | |
" # if re.search('^From:', line):\n", | |
" if re.search('^F.*:', line):\n", | |
" print (line)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"From stephen.marquard@uct.ac.za Sat Jan 5 09:14:16 2008\n", | |
"From: stephen.marquard@uct.ac.za\n", | |
"From louis@media.berkeley.edu Fri Jan 4 18:10:48 2008\n", | |
"From: louis@media.berkeley.edu\n", | |
"From zqian@umich.edu Fri Jan 4 16:10:39 2008\n", | |
"From: zqian@umich.edu\n", | |
"From rjlowe@iupui.edu Fri Jan 4 15:46:24 2008\n", | |
"From: rjlowe@iupui.edu\n", | |
"From zqian@umich.edu Fri Jan 4 15:03:18 2008\n", | |
"From: zqian@umich.edu\n", | |
"From rjlowe@iupui.edu Fri Jan 4 14:50:18 2008\n", | |
"From: rjlowe@iupui.edu\n", | |
"From cwen@iupui.edu Fri Jan 4 11:37:30 2008\n", | |
"From: cwen@iupui.edu\n", | |
"From cwen@iupui.edu Fri Jan 4 11:35:08 2008\n", | |
"From: cwen@iupui.edu\n", | |
"From gsilver@umich.edu Fri Jan 4 11:12:37 2008\n", | |
"From: gsilver@umich.edu\n", | |
"From gsilver@umich.edu Fri Jan 4 11:11:52 2008\n", | |
"From: gsilver@umich.edu\n", | |
"From zqian@umich.edu Fri Jan 4 11:11:03 2008\n", | |
"From: zqian@umich.edu\n", | |
"From gsilver@umich.edu Fri Jan 4 11:10:22 2008\n", | |
"From: gsilver@umich.edu\n", | |
"From wagnermr@iupui.edu Fri Jan 4 10:38:42 2008\n", | |
"From: wagnermr@iupui.edu\n", | |
"From zqian@umich.edu Fri Jan 4 10:17:43 2008\n", | |
"From: zqian@umich.edu\n", | |
"From antranig@caret.cam.ac.uk Fri Jan 4 10:04:14 2008\n", | |
"From: antranig@caret.cam.ac.uk\n", | |
"From gopal.ramasammycook@gmail.com Fri Jan 4 09:05:31 2008\n", | |
"From: gopal.ramasammycook@gmail.com\n", | |
"From david.horwitz@uct.ac.za Fri Jan 4 07:02:32 2008\n", | |
"From: david.horwitz@uct.ac.za\n", | |
"From david.horwitz@uct.ac.za Fri Jan 4 06:08:27 2008\n", | |
"From: david.horwitz@uct.ac.za\n", | |
"From david.horwitz@uct.ac.za Fri Jan 4 04:49:08 2008\n", | |
"From: david.horwitz@uct.ac.za\n", | |
"From david.horwitz@uct.ac.za Fri Jan 4 04:33:44 2008\n", | |
"From: david.horwitz@uct.ac.za\n", | |
"From stephen.marquard@uct.ac.za Fri Jan 4 04:07:34 2008\n", | |
"From: stephen.marquard@uct.ac.za\n", | |
"From louis@media.berkeley.edu Thu Jan 3 19:51:21 2008\n", | |
"From: louis@media.berkeley.edu\n", | |
"From louis@media.berkeley.edu Thu Jan 3 17:18:23 2008\n", | |
"From: louis@media.berkeley.edu\n", | |
"From ray@media.berkeley.edu Thu Jan 3 17:07:00 2008\n", | |
"From: ray@media.berkeley.edu\n", | |
"From cwen@iupui.edu Thu Jan 3 16:34:40 2008\n", | |
"From: cwen@iupui.edu\n", | |
"From cwen@iupui.edu Thu Jan 3 16:29:07 2008\n", | |
"From: cwen@iupui.edu\n", | |
"From cwen@iupui.edu Thu Jan 3 16:23:48 2008\n", | |
"From: cwen@iupui.edu\n" | |
] | |
} | |
], | |
"source": [ | |
"import re\n", | |
"\n", | |
"hand = open('mbox-short.txt')\n", | |
"for line in hand:\n", | |
" line = line.rstrip()\n", | |
" if re.findall('^F.+?:', line):\n", | |
" print (line)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['2', '19', '42']\n" | |
] | |
} | |
], | |
"source": [ | |
"import re\n", | |
"\n", | |
"hand = open('mbox-short.txt')\n", | |
"# for line in hand:\n", | |
"line = '2 19 42'\n", | |
"y = re.findall('[0-9]+', line)\n", | |
"print (y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['stephen.marquard@uct.ac.za']\n", | |
"['stephen.marquard@uct.ac.za']\n", | |
"['From stephen.marquard@uct.ac.za']\n" | |
] | |
} | |
], | |
"source": [ | |
"x ='From stephen.marquard@uct.ac.za Sat Jan 5 09:14:16 2008'\n", | |
"y = re.findall('\\S+@\\S+',x)\n", | |
"print (y)\n", | |
"\n", | |
"y = re.findall('^From (\\S+@\\S+)',x)\n", | |
"print (y)\n", | |
"\n", | |
"y = re.findall('^From \\S+@\\S+',x)\n", | |
"print (y)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"uct.ac.za\n", | |
"uct.ac.za\n", | |
"['uct.ac.za']\n" | |
] | |
} | |
], | |
"source": [ | |
"import re\n", | |
"data ='From stephen.marquard@uct.ac.za Sat Jan 5 09:14:16 2008'\n", | |
"\n", | |
"atpos = data.find('@')\n", | |
"sppos = data.find(' ', atpos)\n", | |
"host = data[atpos+1 : sppos]\n", | |
"print (host)\n", | |
"\n", | |
"words = data.split()\n", | |
"email = words[1]\n", | |
"pieces = email.split('@')\n", | |
"print (pieces[1])\n", | |
"\n", | |
"y = re.findall ('@([^ ]*)',data)\n", | |
"print (y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Maximum: 0.9907\n" | |
] | |
} | |
], | |
"source": [ | |
"import re\n", | |
"hand = open('mbox-short.txt')\n", | |
"numlist = list()\n", | |
"for line in hand:\n", | |
" line = line.rstrip()\n", | |
" stuff = re.findall('^X-DSPAM-Confidence: ([0-9.]+)',line)\n", | |
" if len(stuff) != 1 : continue\n", | |
" num = float(stuff[0])\n", | |
" numlist.append(num)\n", | |
" \n", | |
"print ('Maximum:', max(numlist))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['$10.00']\n" | |
] | |
} | |
], | |
"source": [ | |
"import re\n", | |
"x = 'We just received $10.00 for cookies'\n", | |
"y = re.findall('\\$[0-9.]+',x)\n", | |
"print (y)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment