Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save milimetric/8b5f447e3ef09b6fe4384e0f75cc0b34 to your computer and use it in GitHub Desktop.
Save milimetric/8b5f447e3ef09b6fe4384e0f75cc0b34 to your computer and use it in GitHub Desktop.
-- ran this on the hadoop cluster (not available publicly)
select std(edits_per_page),
avg(edits_per_page),
std(days_from_first_to_last_edit),
avg(days_from_first_to_last_edit)
from (select rev_page pid,
(unix_timestamp(max(rev_timestamp),'yyyyMMddHHmmss') - unix_timestamp(min(rev_timestamp),'yyyyMMddHHmmss')) / 86400 days_from_first_to_last_edit,
count(*) edits_per_page
from mediawiki_revision
inner join
mediawiki_page on page_id = rev_page
where page_namespace = 0
and page_is_redirect = 0
and mediawiki_page.wiki_db = 'enwiki'
and mediawiki_revision.wiki_db = 'enwiki'
group by rev_page
) t
;
-- total counts
select count(*) total_namespace_0_revisions
from mediawiki_revision
inner join
mediawiki_page on page_id = rev_page
where page_namespace = 0
and page_is_redirect = 0
and mediawiki_page.wiki_db = 'enwiki'
and mediawiki_revision.wiki_db = 'enwiki'
;
select count(*) total_namespace_0_pages
from mediawiki_page
where page_namespace = 0
and page_is_redirect = 0
and wiki_db = 'enwiki'
;
-- tried to run this on labs / analytics-store research dbs, didn't finish
select std(edits_per_page),
avg(edits_per_page)
from (select rev_page pid,
count(*) edits_per_page
from revision
inner join
page on page_id = rev_page
where page_namespace = 0
and page_is_redirect = 0
group by rev_page
) t
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment