Skip to content

Instantly share code, notes, and snippets.

@notconfusing
Created February 27, 2016 18:40
Show Gist options
  • Save notconfusing/d950a67ab2cd958a90d7 to your computer and use it in GitHub Desktop.
Save notconfusing/d950a67ab2cd958a90d7 to your computer and use it in GitHub Desktop.
Calculate the Labour Hours of a user using PAWS wmflab SQL services.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import pymysql\n",
"import datetime\n",
"import pywikibot\n",
"from ipywidgets import interact"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"conn = pymysql.connect(host='paws-db', user='cscw', password='cscw')"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def calculate_edit_hours(user, cursor):\n",
" starttime = datetime.datetime.now()\n",
" qstring = u'''SELECT rev_timestamp FROM enwiki_p.revision_userindex WHERE rev_user_text like \"'''+ user + u'''\";'''\n",
" cursor.execute(qstring)\n",
" results = cursor.fetchall()\n",
" clean_results = map(lambda t: t[0], results)\n",
" str_clean = [res.decode(\"utf-8\") for res in clean_results]\n",
" timestamps = [pywikibot.Timestamp.fromtimestampformat(x) for x in str_clean]\n",
" #print len(timestamps) \n",
" edit_sessions = []\n",
" curr_edit_session = []\n",
"\n",
" prev_timestamp = datetime.datetime(year=2001, month=1, day=1)\n",
"\n",
"\n",
" for contrib in timestamps:\n",
" curr_timestamp = contrib\n",
"\n",
" #if curr_timestamp > snapshot_timestamp: \n",
" # break \n",
" \n",
" if curr_timestamp-prev_timestamp < datetime.timedelta(hours=1):\n",
" curr_edit_session.append(curr_timestamp)\n",
" prev_timestamp = curr_timestamp\n",
"\n",
" else:\n",
" if curr_edit_session:\n",
" edit_sessions.append(curr_edit_session)\n",
" curr_edit_session = [curr_timestamp]\n",
" prev_timestamp = curr_timestamp\n",
"\n",
" #finally have to add the curr_edit_session to list \n",
" if curr_edit_session:\n",
" print('hea')\n",
" edit_sessions.append(curr_edit_session)\n",
"\n",
"\n",
" #print len(edit_sessions) \n",
" def session_length(edit_session):\n",
" avg_time = datetime.timedelta(minutes=4, seconds=30)\n",
" last = edit_session[-1]\n",
" first = edit_session[0]\n",
" span = last - first\n",
" total = span + avg_time\n",
" return total\n",
"\n",
" session_lengths = map(session_length, edit_sessions)\n",
" second_lens = map(lambda td: td.total_seconds(), session_lengths)\n",
" total_time = sum(second_lens)\n",
"\n",
" took = datetime.datetime.now() - starttime\n",
" tooksecs = took.total_seconds()\n",
" print('timestamps per second: ', len(timestamps)/float(tooksecs))\n",
" #returning total hours \n",
" return total_time / float(3600)\n"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"hea\n",
"timestamps per second: 21443.30095127309\n"
]
}
],
"source": [
"@interact\n",
"def calc(username=''):\n",
" conn.ping(True)\n",
" calculate_edit_hours(username, conn.cursor())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment