SELECT
u.user_name AS username
,u.user_id AS uid, p.page_id AS pid
,p.page_namespace AS nid
FROM
revision rev
,user u
,page p
WHERE
rev.rev_user = u.user_id
AND rev.rev_page = p.page_id
AND p.page_namespace NOT IN(3000,3001,3010,3011,3020,2021)
+-------------------------------------------------+--------+-------+------+
| username | uid | pid | nid |
+-------------------------------------------------+--------+-------+------+
| Shepazu | 2 | 1 | 0 |
| Shepazu | 2 | 1 | 0 |
...
SELECT DISTINCT
u.user_id AS uid
FROM
revision rev
,user u
,page p
WHERE
rev.rev_user = u.user_id
AND rev.rev_page = p.page_id
AND p.page_namespace NOT IN(3000,3001,3010,3011,3020,2021)
ORDER BY uid;
+--------+
| uid |
+--------+
| 2 |
| 3 |
| 4 |
| 5 |
| 6 |
| 8 |
| 9 |
| 10 |
| 11 |
| 14 |
...
SELECT DISTINCT
page_namespace
FROM
page
WHERE
page_namespace NOT IN(3000,3001,3010,3011,3020,2021);
+----------------+
| page_namespace |
+----------------+
| 0 |
| 1 |
| 2 |
| 3 |
| 4 |
| 6 |
| 8 |
| 9 |
| 10 |
| 12 |
| 14 |
| 102 |
| 103 |
| 106 |
| 108 |
| 200 |
| 202 |
| 1198 |
+----------------+
18 rows in set (0.00 sec)
List most prolific first
SELECT
articles.revision_uid AS user_id,
COUNT(articles.revision_uid) AS edit_count
FROM (
SELECT
r.rev_user AS revision_uid
FROM revision r
INNER JOIN
page p ON p.page_id = r.rev_page
WHERE
p.page_namespace NOT IN(3000,3001,3010,3011,3020,2021)
UNION ALL
SELECT user.user_id AS user_id FROM user
) AS articles
GROUP BY articles.revision_uid
HAVING edit_count > 5
ORDER BY edit_count DESC;
+---------+------------+
| user_id | edit_count |
+---------+------------+
| 50 | 9734 |
| 9289 | 3102 |
| 0 | 2563 |
| 6 | 1719 |
| 11 | 1704 |
| 9 | 1498 |
| 10080 | 815 |
| 115 | 807 |
| 33730 | 783 |
| 291 | 770 |
| 64 | 570 |
| 26452 | 506 |
| 5 | 486 |
| 42430 | 463 |
| 2 | 445 |
| 15716 | 442 |
| 10462 | 438 |
| 15623 | 408 |
| 6982 | 404 |
| 37466 | 327 |
| 9180 | 308 |
| 15582 | 292 |
| 69 | 286 |
| 36 | 279 |
| 91336 | 239 |
| 36119 | 238 |
| 8 | 219 |
| 14278 | 213 |
| 9756 | 213 |
| 6938 | 182 |
| 5184 | 180 |
| 32584 | 165 |
| 14342 | 160 |
| 3 | 145 |
| 71 | 144 |
| 33 | 142 |
| 119 | 134 |
| 127 | 127 |
| 12419 | 119 |
| 2824 | 114 |
| 14173 | 112 |
| 77 | 111 |
| 15146 | 109 |
| 124 | 107 |
| 11242 | 105 |
| 5375 | 96 |
...
- Each entry represent a page edited by a user
- Main content namespace only (0)
- Didn’t include 3000 in query because its where the staff make notes (i.e. not docs pages)
- Date range is between 2015-03-01 AND 2015-04-09; the date of the snapshot the database snapshot we made queries against
SELECT
revision.rev_timestamp AS rev_timestamp
,page.page_title AS page_title
FROM
revision, page
WHERE
page.page_id = revision.rev_page
AND revision.rev_timestamp > '20150301000000'
AND page.page_namespace IN(0)
ORDER BY page_title DESC;
- Uses cached count
- Data feels legit since we see "Darthnish", his account is relatively new
- Doesn’t consider when they made edits
- In order to edit, user MUST have a valid email
- Users can be considered as "autoconfirmed" since they made more than 20 edits
SELECT
user_id
,user_name
,user_editcount
FROM user
WHERE
user_editcount > 20
ORDER BY user_editcount DESC;