Skip to content

Instantly share code, notes, and snippets.

@KimMyungSam
Created June 27, 2017 10:09
Show Gist options
  • Save KimMyungSam/3110df1e0bfc57ec5fe2b1c94fd372cd to your computer and use it in GitHub Desktop.
Save KimMyungSam/3110df1e0bfc57ec5fe2b1c94fd372cd to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"From: stephen.marquard@uct.ac.za\n",
"From: louis@media.berkeley.edu\n",
"From: zqian@umich.edu\n",
"From: rjlowe@iupui.edu\n",
"From: zqian@umich.edu\n",
"From: rjlowe@iupui.edu\n",
"From: cwen@iupui.edu\n",
"From: cwen@iupui.edu\n",
"From: gsilver@umich.edu\n",
"From: gsilver@umich.edu\n",
"From: zqian@umich.edu\n",
"From: gsilver@umich.edu\n",
"From: wagnermr@iupui.edu\n",
"From: zqian@umich.edu\n",
"From: antranig@caret.cam.ac.uk\n",
"From: gopal.ramasammycook@gmail.com\n",
"From: david.horwitz@uct.ac.za\n",
"From: david.horwitz@uct.ac.za\n",
"From: david.horwitz@uct.ac.za\n",
"From: david.horwitz@uct.ac.za\n",
"From: stephen.marquard@uct.ac.za\n",
"From: louis@media.berkeley.edu\n",
"From: louis@media.berkeley.edu\n",
"From: ray@media.berkeley.edu\n",
"From: cwen@iupui.edu\n",
"From: cwen@iupui.edu\n",
"From: cwen@iupui.edu\n"
]
}
],
"source": [
"hand = open('mbox-short.txt')\n",
"for line in hand:\n",
" line = line.rstrip()\n",
" if line.find('From:') >= 0:\n",
" # if line.startswith('From:') >= 0:\n",
" print (line)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"From: stephen.marquard@uct.ac.za\n",
"From: louis@media.berkeley.edu\n",
"From: zqian@umich.edu\n",
"From: rjlowe@iupui.edu\n",
"From: zqian@umich.edu\n",
"From: rjlowe@iupui.edu\n",
"From: cwen@iupui.edu\n",
"From: cwen@iupui.edu\n",
"From: gsilver@umich.edu\n",
"From: gsilver@umich.edu\n",
"From: zqian@umich.edu\n",
"From: gsilver@umich.edu\n",
"From: wagnermr@iupui.edu\n",
"From: zqian@umich.edu\n",
"From: antranig@caret.cam.ac.uk\n",
"From: gopal.ramasammycook@gmail.com\n",
"From: david.horwitz@uct.ac.za\n",
"From: david.horwitz@uct.ac.za\n",
"From: david.horwitz@uct.ac.za\n",
"From: david.horwitz@uct.ac.za\n",
"From: stephen.marquard@uct.ac.za\n",
"From: louis@media.berkeley.edu\n",
"From: louis@media.berkeley.edu\n",
"From: ray@media.berkeley.edu\n",
"From: cwen@iupui.edu\n",
"From: cwen@iupui.edu\n",
"From: cwen@iupui.edu\n"
]
}
],
"source": [
"import re\n",
"\n",
"hand = open('mbox-short.txt')\n",
"for line in hand:\n",
" line = line.strip()\n",
" if re.search('From:', line):\n",
" print (line)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"From stephen.marquard@uct.ac.za Sat Jan 5 09:14:16 2008\n",
"From: stephen.marquard@uct.ac.za\n",
"From louis@media.berkeley.edu Fri Jan 4 18:10:48 2008\n",
"From: louis@media.berkeley.edu\n",
"From zqian@umich.edu Fri Jan 4 16:10:39 2008\n",
"From: zqian@umich.edu\n",
"From rjlowe@iupui.edu Fri Jan 4 15:46:24 2008\n",
"From: rjlowe@iupui.edu\n",
"From zqian@umich.edu Fri Jan 4 15:03:18 2008\n",
"From: zqian@umich.edu\n",
"From rjlowe@iupui.edu Fri Jan 4 14:50:18 2008\n",
"From: rjlowe@iupui.edu\n",
"From cwen@iupui.edu Fri Jan 4 11:37:30 2008\n",
"From: cwen@iupui.edu\n",
"From cwen@iupui.edu Fri Jan 4 11:35:08 2008\n",
"From: cwen@iupui.edu\n",
"From gsilver@umich.edu Fri Jan 4 11:12:37 2008\n",
"From: gsilver@umich.edu\n",
"From gsilver@umich.edu Fri Jan 4 11:11:52 2008\n",
"From: gsilver@umich.edu\n",
"From zqian@umich.edu Fri Jan 4 11:11:03 2008\n",
"From: zqian@umich.edu\n",
"From gsilver@umich.edu Fri Jan 4 11:10:22 2008\n",
"From: gsilver@umich.edu\n",
"From wagnermr@iupui.edu Fri Jan 4 10:38:42 2008\n",
"From: wagnermr@iupui.edu\n",
"From zqian@umich.edu Fri Jan 4 10:17:43 2008\n",
"From: zqian@umich.edu\n",
"From antranig@caret.cam.ac.uk Fri Jan 4 10:04:14 2008\n",
"From: antranig@caret.cam.ac.uk\n",
"From gopal.ramasammycook@gmail.com Fri Jan 4 09:05:31 2008\n",
"From: gopal.ramasammycook@gmail.com\n",
"From david.horwitz@uct.ac.za Fri Jan 4 07:02:32 2008\n",
"From: david.horwitz@uct.ac.za\n",
"From david.horwitz@uct.ac.za Fri Jan 4 06:08:27 2008\n",
"From: david.horwitz@uct.ac.za\n",
"From david.horwitz@uct.ac.za Fri Jan 4 04:49:08 2008\n",
"From: david.horwitz@uct.ac.za\n",
"From david.horwitz@uct.ac.za Fri Jan 4 04:33:44 2008\n",
"From: david.horwitz@uct.ac.za\n",
"From stephen.marquard@uct.ac.za Fri Jan 4 04:07:34 2008\n",
"From: stephen.marquard@uct.ac.za\n",
"From louis@media.berkeley.edu Thu Jan 3 19:51:21 2008\n",
"From: louis@media.berkeley.edu\n",
"From louis@media.berkeley.edu Thu Jan 3 17:18:23 2008\n",
"From: louis@media.berkeley.edu\n",
"From ray@media.berkeley.edu Thu Jan 3 17:07:00 2008\n",
"From: ray@media.berkeley.edu\n",
"From cwen@iupui.edu Thu Jan 3 16:34:40 2008\n",
"From: cwen@iupui.edu\n",
"From cwen@iupui.edu Thu Jan 3 16:29:07 2008\n",
"From: cwen@iupui.edu\n",
"From cwen@iupui.edu Thu Jan 3 16:23:48 2008\n",
"From: cwen@iupui.edu\n"
]
}
],
"source": [
"import re\n",
"\n",
"hand = open('mbox-short.txt')\n",
"for line in hand:\n",
" line = line.rstrip()\n",
" # if re.search('^From:', line):\n",
" if re.search('^F.*:', line):\n",
" print (line)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"From stephen.marquard@uct.ac.za Sat Jan 5 09:14:16 2008\n",
"From: stephen.marquard@uct.ac.za\n",
"From louis@media.berkeley.edu Fri Jan 4 18:10:48 2008\n",
"From: louis@media.berkeley.edu\n",
"From zqian@umich.edu Fri Jan 4 16:10:39 2008\n",
"From: zqian@umich.edu\n",
"From rjlowe@iupui.edu Fri Jan 4 15:46:24 2008\n",
"From: rjlowe@iupui.edu\n",
"From zqian@umich.edu Fri Jan 4 15:03:18 2008\n",
"From: zqian@umich.edu\n",
"From rjlowe@iupui.edu Fri Jan 4 14:50:18 2008\n",
"From: rjlowe@iupui.edu\n",
"From cwen@iupui.edu Fri Jan 4 11:37:30 2008\n",
"From: cwen@iupui.edu\n",
"From cwen@iupui.edu Fri Jan 4 11:35:08 2008\n",
"From: cwen@iupui.edu\n",
"From gsilver@umich.edu Fri Jan 4 11:12:37 2008\n",
"From: gsilver@umich.edu\n",
"From gsilver@umich.edu Fri Jan 4 11:11:52 2008\n",
"From: gsilver@umich.edu\n",
"From zqian@umich.edu Fri Jan 4 11:11:03 2008\n",
"From: zqian@umich.edu\n",
"From gsilver@umich.edu Fri Jan 4 11:10:22 2008\n",
"From: gsilver@umich.edu\n",
"From wagnermr@iupui.edu Fri Jan 4 10:38:42 2008\n",
"From: wagnermr@iupui.edu\n",
"From zqian@umich.edu Fri Jan 4 10:17:43 2008\n",
"From: zqian@umich.edu\n",
"From antranig@caret.cam.ac.uk Fri Jan 4 10:04:14 2008\n",
"From: antranig@caret.cam.ac.uk\n",
"From gopal.ramasammycook@gmail.com Fri Jan 4 09:05:31 2008\n",
"From: gopal.ramasammycook@gmail.com\n",
"From david.horwitz@uct.ac.za Fri Jan 4 07:02:32 2008\n",
"From: david.horwitz@uct.ac.za\n",
"From david.horwitz@uct.ac.za Fri Jan 4 06:08:27 2008\n",
"From: david.horwitz@uct.ac.za\n",
"From david.horwitz@uct.ac.za Fri Jan 4 04:49:08 2008\n",
"From: david.horwitz@uct.ac.za\n",
"From david.horwitz@uct.ac.za Fri Jan 4 04:33:44 2008\n",
"From: david.horwitz@uct.ac.za\n",
"From stephen.marquard@uct.ac.za Fri Jan 4 04:07:34 2008\n",
"From: stephen.marquard@uct.ac.za\n",
"From louis@media.berkeley.edu Thu Jan 3 19:51:21 2008\n",
"From: louis@media.berkeley.edu\n",
"From louis@media.berkeley.edu Thu Jan 3 17:18:23 2008\n",
"From: louis@media.berkeley.edu\n",
"From ray@media.berkeley.edu Thu Jan 3 17:07:00 2008\n",
"From: ray@media.berkeley.edu\n",
"From cwen@iupui.edu Thu Jan 3 16:34:40 2008\n",
"From: cwen@iupui.edu\n",
"From cwen@iupui.edu Thu Jan 3 16:29:07 2008\n",
"From: cwen@iupui.edu\n",
"From cwen@iupui.edu Thu Jan 3 16:23:48 2008\n",
"From: cwen@iupui.edu\n"
]
}
],
"source": [
"import re\n",
"\n",
"hand = open('mbox-short.txt')\n",
"for line in hand:\n",
" line = line.rstrip()\n",
" if re.findall('^F.+?:', line):\n",
" print (line)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['2', '19', '42']\n"
]
}
],
"source": [
"import re\n",
"\n",
"hand = open('mbox-short.txt')\n",
"# for line in hand:\n",
"line = '2 19 42'\n",
"y = re.findall('[0-9]+', line)\n",
"print (y)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['stephen.marquard@uct.ac.za']\n",
"['stephen.marquard@uct.ac.za']\n",
"['From stephen.marquard@uct.ac.za']\n"
]
}
],
"source": [
"x ='From stephen.marquard@uct.ac.za Sat Jan 5 09:14:16 2008'\n",
"y = re.findall('\\S+@\\S+',x)\n",
"print (y)\n",
"\n",
"y = re.findall('^From (\\S+@\\S+)',x)\n",
"print (y)\n",
"\n",
"y = re.findall('^From \\S+@\\S+',x)\n",
"print (y)\n"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"uct.ac.za\n",
"uct.ac.za\n",
"['uct.ac.za']\n"
]
}
],
"source": [
"import re\n",
"data ='From stephen.marquard@uct.ac.za Sat Jan 5 09:14:16 2008'\n",
"\n",
"atpos = data.find('@')\n",
"sppos = data.find(' ', atpos)\n",
"host = data[atpos+1 : sppos]\n",
"print (host)\n",
"\n",
"words = data.split()\n",
"email = words[1]\n",
"pieces = email.split('@')\n",
"print (pieces[1])\n",
"\n",
"y = re.findall ('@([^ ]*)',data)\n",
"print (y)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Maximum: 0.9907\n"
]
}
],
"source": [
"import re\n",
"hand = open('mbox-short.txt')\n",
"numlist = list()\n",
"for line in hand:\n",
" line = line.rstrip()\n",
" stuff = re.findall('^X-DSPAM-Confidence: ([0-9.]+)',line)\n",
" if len(stuff) != 1 : continue\n",
" num = float(stuff[0])\n",
" numlist.append(num)\n",
" \n",
"print ('Maximum:', max(numlist))"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['$10.00']\n"
]
}
],
"source": [
"import re\n",
"x = 'We just received $10.00 for cookies'\n",
"y = re.findall('\\$[0-9.]+',x)\n",
"print (y)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment