Created
February 27, 2016 18:40
-
-
Save notconfusing/d950a67ab2cd958a90d7 to your computer and use it in GitHub Desktop.
Calculate the Labour Hours of a user using PAWS wmflab SQL services.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import pymysql\n", | |
"import datetime\n", | |
"import pywikibot\n", | |
"from ipywidgets import interact" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 41, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"conn = pymysql.connect(host='paws-db', user='cscw', password='cscw')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def calculate_edit_hours(user, cursor):\n", | |
" starttime = datetime.datetime.now()\n", | |
" qstring = u'''SELECT rev_timestamp FROM enwiki_p.revision_userindex WHERE rev_user_text like \"'''+ user + u'''\";'''\n", | |
" cursor.execute(qstring)\n", | |
" results = cursor.fetchall()\n", | |
" clean_results = map(lambda t: t[0], results)\n", | |
" str_clean = [res.decode(\"utf-8\") for res in clean_results]\n", | |
" timestamps = [pywikibot.Timestamp.fromtimestampformat(x) for x in str_clean]\n", | |
" #print len(timestamps) \n", | |
" edit_sessions = []\n", | |
" curr_edit_session = []\n", | |
"\n", | |
" prev_timestamp = datetime.datetime(year=2001, month=1, day=1)\n", | |
"\n", | |
"\n", | |
" for contrib in timestamps:\n", | |
" curr_timestamp = contrib\n", | |
"\n", | |
" #if curr_timestamp > snapshot_timestamp: \n", | |
" # break \n", | |
" \n", | |
" if curr_timestamp-prev_timestamp < datetime.timedelta(hours=1):\n", | |
" curr_edit_session.append(curr_timestamp)\n", | |
" prev_timestamp = curr_timestamp\n", | |
"\n", | |
" else:\n", | |
" if curr_edit_session:\n", | |
" edit_sessions.append(curr_edit_session)\n", | |
" curr_edit_session = [curr_timestamp]\n", | |
" prev_timestamp = curr_timestamp\n", | |
"\n", | |
" #finally have to add the curr_edit_session to list \n", | |
" if curr_edit_session:\n", | |
" print('hea')\n", | |
" edit_sessions.append(curr_edit_session)\n", | |
"\n", | |
"\n", | |
" #print len(edit_sessions) \n", | |
" def session_length(edit_session):\n", | |
" avg_time = datetime.timedelta(minutes=4, seconds=30)\n", | |
" last = edit_session[-1]\n", | |
" first = edit_session[0]\n", | |
" span = last - first\n", | |
" total = span + avg_time\n", | |
" return total\n", | |
"\n", | |
" session_lengths = map(session_length, edit_sessions)\n", | |
" second_lens = map(lambda td: td.total_seconds(), session_lengths)\n", | |
" total_time = sum(second_lens)\n", | |
"\n", | |
" took = datetime.datetime.now() - starttime\n", | |
" tooksecs = took.total_seconds()\n", | |
" print('timestamps per second: ', len(timestamps)/float(tooksecs))\n", | |
" #returning total hours \n", | |
" return total_time / float(3600)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"hea\n", | |
"timestamps per second: 21443.30095127309\n" | |
] | |
} | |
], | |
"source": [ | |
"@interact\n", | |
"def calc(username=''):\n", | |
" conn.ping(True)\n", | |
" calculate_edit_hours(username, conn.cursor())" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.4.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment