Last active
September 15, 2016 13:45
-
-
Save milimetric/8b5f447e3ef09b6fe4384e0f75cc0b34 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- ran this on the hadoop cluster (not available publicly) | |
select std(edits_per_page), | |
avg(edits_per_page), | |
std(days_from_first_to_last_edit), | |
avg(days_from_first_to_last_edit) | |
from (select rev_page pid, | |
(unix_timestamp(max(rev_timestamp),'yyyyMMddHHmmss') - unix_timestamp(min(rev_timestamp),'yyyyMMddHHmmss')) / 86400 days_from_first_to_last_edit, | |
count(*) edits_per_page | |
from mediawiki_revision | |
inner join | |
mediawiki_page on page_id = rev_page | |
where page_namespace = 0 | |
and page_is_redirect = 0 | |
and mediawiki_page.wiki_db = 'enwiki' | |
and mediawiki_revision.wiki_db = 'enwiki' | |
group by rev_page | |
) t | |
; | |
-- total counts | |
select count(*) total_namespace_0_revisions | |
from mediawiki_revision | |
inner join | |
mediawiki_page on page_id = rev_page | |
where page_namespace = 0 | |
and page_is_redirect = 0 | |
and mediawiki_page.wiki_db = 'enwiki' | |
and mediawiki_revision.wiki_db = 'enwiki' | |
; | |
select count(*) total_namespace_0_pages | |
from mediawiki_page | |
where page_namespace = 0 | |
and page_is_redirect = 0 | |
and wiki_db = 'enwiki' | |
; | |
-- tried to run this on labs / analytics-store research dbs, didn't finish | |
select std(edits_per_page), | |
avg(edits_per_page) | |
from (select rev_page pid, | |
count(*) edits_per_page | |
from revision | |
inner join | |
page on page_id = rev_page | |
where page_namespace = 0 | |
and page_is_redirect = 0 | |
group by rev_page | |
) t | |
; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment