theianchan/firefox.sql

## firefox.sql
-- How many rows do we have in each table?

select
	count(u.id) as users,
	count(distinct u.id) as distinct_users,
	count(s.user_id) as survey_responses,
	count(distinct s.user_id) as distinct_survey_responses,
	count(e.*) as events
from users u, survey s, events e;

--    27,267 unique users
-- 2,400,491 events
--     4,081 unique survey responses

-- Why does the combined query take over 30 minutes to return when we can get the results of `select count(*)` on individual tables in like 2 seconds?
-- Even without the events table included

--------------------------------------------------
--------------------------------------------------

-- Can we use the survey data to make reliable assumptions about our population? How representative are the users in the survey?

--------------------
-- fx_version     --
--------------------

with
	population as (
		with total as
			(select count(*) as "total" from users)
		select
			fx_version,
			count(*) as "count_population",
			round((count(*) / avg(total.total) * 100), 2) as "percent_population"
		from users, total
		group by 1
		order by 1
	),
	sample as (
		with total as
			(select count(*) as "total" from survey)
		select
			fx_version,
			count(*) as "count_sample",
			round((count(*) / avg(total.total) * 100), 2) as "percent_sample"
		from users, total
		where users.id in
			(select user_id from survey)
		group by 1
		order by 1
	)
select
	*
from population
	left join sample
	using (fx_version)
order by 1;

-- 27 FX versions starting with 3.5.11 and ending with 4.0b8pre
-- Sample/population distribution is pretty comparable w/89% of users on 4.0b6 in both cases

--------------------
-- os             --
--------------------

with
	population as (
		with total as
			(select count(*) as "total" from users)
		select
			os,
			count(*) as "count_population",
			round((count(*) / avg(total.total) * 100), 2) as "percent_population"
		from users, total
		group by 1
		order by 1
	),
	sample as (
		with total as
			(select count(*) as "total" from survey)
		select
			os,
			count(*) as "count_sample",
			round((count(*) / avg(total.total) * 100), 2) as "percent_sample"
		from users, total
		where users.id in
			(select user_id from survey)
		group by 1
		order by 1
	)
select
	*
from population
	left join sample
	using (os)
order by 1;
-- 18 OS types (3 Mac, 4 Linux, 10 Windows, 1 Sun)
with
	population as (
		with total as
			(select count(*) as "total" from users)
		select
			os,
			count(*) as "count_population",
			round((count(*) / avg(total.total) * 100), 2) as "percent_population"
		from users, total
		group by 1
		order by 1
	),
	sample as (
		with total as
			(select count(*) as "total" from survey)
		select
			os,
			count(*) as "count_sample",
			round((count(*) / avg(total.total) * 100), 2) as "percent_sample"
		from users, total
		where users.id in
			(select user_id from survey)
		group by 1
		order by 1
	)
select
	case
		when os ilike '%mac%'
			then 'Mac'
		when os ilike '%linux%'
			then 'Linux'
		when os ilike '%windows%'
			then 'Windows'
		when os ilike '%sunos%'
			then 'Sun'
		else 'Other'
	end as "os_group",
	sum(count_population) as count_population,
	sum(percent_population) as percent_population,
	sum(count_sample) as count_sample,
	sum(percent_sample) as percent_sample
from (
	select * from population left join sample using (os)
) as discrete
group by 1;
-- Grouping them we can see that Mac OS users are slightly overrepresented in the sample (10% vs 5% in the population)
-- Not a big problem but we can just keep that in mind

--------------------
-- version        --
--------------------

with
	population as (
		with total as
			(select count(*) as "total" from users)
		select
			version,
			count(*) as "count_population",
			round((count(*) / avg(total.total) * 100), 2) as "percent_population"
		from users, total
		group by 1
		order by 1
	),
	sample as (
		with total as
			(select count(*) as "total" from survey)
		select
			version,
			count(*) as "count_sample",
			round((count(*) / avg(total.total) * 100), 2) as "percent_sample"
		from users, total
		where users.id in
			(select user_id from survey)
		group by 1
		order by 1
	)
select
	*
from population
	left join sample
	using (version)
order by 1;
-- 4 different test pilot extension versions
-- 93-94% of users (sample and pop) are on the most recent version, 1.0.3

--------------------
-- extensions     --
--------------------

with
	population as (
		with total as
			(select count(*) as "total" from users)
		select
			number_extensions,
			count(*) as "count_population",
			round((count(*) / avg(total.total) * 100), 2) as "percent_population"
		from users, total
		group by 1
		order by 1
	),
	sample as (
		with total as
			(select count(*) as "total" from survey)
		select
			number_extensions,
			count(*) as "count_sample",
			round((count(*) / avg(total.total) * 100), 2) as "percent_sample"
		from users, total
		where users.id in
			(select user_id from survey)
		group by 1
		order by 1
	)
select
	*
from population
	left join sample
	using (number_extensions)
order by 1;
-- The user with the most extensions has 185 extensions
with
	population as (
		with total as
			(select count(*) as "total" from users)
		select
			number_extensions,
			count(*) as "count_population",
			round((count(*) / avg(total.total) * 100), 2) as "percent_population"
		from users, total
		group by 1
		order by 1
	),
	sample as (
		with total as
			(select count(*) as "total" from survey)
		select
			number_extensions,
			count(*) as "count_sample",
			round((count(*) / avg(total.total) * 100), 2) as "percent_sample"
		from users, total
		where users.id in
			(select user_id from survey)
		group by 1
		order by 1
	)
select
	case
		when number_extensions = 1
			then '1. 1'
		when number_extensions <= 5
			then '2. 2-5'
		when number_extensions <= 15
			then '3. 6-15'
		when number_extensions <= 25
			then '4. 16-25'
		else '5. 25+'
	end as "number_extensions",
	sum(count_population) as count_population,
	sum(percent_population) as percent_population,
	sum(count_sample) as count_sample,
	sum(percent_sample) as percent_sample
from (
	select * from population left join sample using (number_extensions)
) as discrete
group by 1
order by 1;
-- This is where we have the biggest delta between sample/pop - users with more extensions were more likely to be represented in the survey
-- Average number of extensions installed per user in sample vs population?
select
	avg(p.number_extensions) as population_avg_ext,
	avg(s.number_extensions) as sample_avg_ext
from users p,
	(
		select number_extensions from users
		where users.id in (select user_id from survey)
	) as s;
-- Population: 6
-- Sample: 9

--------------------------------------------------
--------------------------------------------------

-- Summary statistics based on survey data

-- "How long have you used Firefox?"
select q1, count(*), round(count(*)::numeric/(select count(*) from survey) * 100, 2) from survey group by 1 order by 1;
-- 9% < 1 year, 91% > 1 year, 69% > 3 years (!)

-- "Do you use more than one browser in your daily life?"
-- 65% use more than one browser

-- "If you use other browsers besides Firefox, what are they?"
select
	round((select count(*) from survey where q3 ilike '%0%')::numeric / sample.total * 100, 2) as "chrome",
	round((select count(*) from survey where q3 ilike '%1%')::numeric / sample.total * 100, 2) as "safari",
	round((select count(*) from survey where q3 ilike '%2%')::numeric / sample.total * 100, 2) as "opera",
	round((select count(*) from survey where q3 ilike '%3%')::numeric / sample.total * 100, 2) as "ie",
	round((select count(*) from survey where q3 ilike '%<%')::numeric / sample.total * 100, 2) as "other"
from (select count(*) as "total" from survey) as sample;
-- 46% IE, 42% Chrome, 17% Safari, 16% Opera, 5% Other (non-exclusive)

-- "If you use multiple browsers, what do you consider to be your primary browser?"
-- 17% only use Firefox, 65% Firefox, 6% Chrome, 1% Safari, 1% Opera, 3% IE

-- "What is your gender?"
-- 91% male lol

-- "How old are you?"
-- 63% between 18-35

-- "How much time do you spend on the Web each day?"
-- 89% > 2 hours, 20% > 10 hours

--------------------------------------------------
--------------------------------------------------

-- Let's create our user segments

-- New/casual users
select count(*) from survey
where (
		(q1 = '0' or q1 = '1')
		and
		(q7 = '0' or q7 = '1' or q7 = '2')
	)

-- Loyal/casual users
select count(*) from survey
where (
		(q1 <> '0' and q1 <> '1')
		and
		(q7 = '0' or q7 = '1' or q7 = '2')
	)

-- New/super users
select count(*) from survey
where (
		(q1 = '0' or q1 = '1')
		and
		(q7 <> '0' and q7 <> '1' and q7 <> '2')
	)

-- Loyal/super users
select count(*) from survey
where (
		(q1 <> '0' and q1 <> '1')
		and
		(q7 <> '0' and q7 <> '1' and q7 <> '2')
	)

-- Let's save the IDs of our loyal/super users
drop table if exists super_users;
select user_id as id
into temp table super_users
from survey
where (
		(q1 <> '0' and q1 <> '1')
		and
		(q7 <> '0' and q7 <> '1' and q7 <> '2')
	)
-- 2,429

-- And let's create a list of IDs for everyone else
drop table if exists other_users;
select id as id
into temp table other_users
from users
where id not in (select id from super_users);
-- 24,838

--------------------------------------------------
--------------------------------------------------

-- Now let's examine our events data each in segment

select * from events limit 10;

--------------------
-- event code ref --
--------------------

-- bookmark_status: 8
-- bookmark_create: 9
-- bookmark_choose: 10
-- bookmark_modify: 11
-- num_tabs: 26

-- Put them in their own table

drop table if exists event_codes;
select distinct event_code as code
into temp table event_codes
from events
where (event_code = 8 or event_code = 9 or event_code = 10 or event_code = 11 or event_code = 26);
select * from event_codes;

-- number of bookmarks created: bookmark_status.data1
-- number of bookmarks chosen: bookmark_choose
-- number of tabs: num_tabs.data2

-- Compare event/user counts across the two groups

with super as (
	with su as (select count(*) as "total" from super_users)
	select
		event_code,
		count(*) as "su_total",
		round(count(*) / avg(su.total), 2) as "su_capita"
	from events, su
	where
		event_code in (select code from event_codes)
		and
		user_id in (select id from super_users)
	group by 1
), other as (
	with ot as (select count(*) as "total" from other_users)
	select
		event_code,
		count(*) as "ot_total",
		round(count(*) / avg(ot.total), 2) as "ot_capita"
	from events, ot
	where
		event_code in (select code from event_codes)
		and
		user_id in (select id from other_users)
	group by 1
)
select
	*
from super left join other using (event_code)
order by 1;

--------------------
-- sanity checks  --
--------------------

-- Super users: 2,429
-- Other users: 24,838

-- How many unique users in the events table?
select count(distinct user_id) from events;
-- 14,718

-- How many users have at least 1 event recorded?
select count(distinct user_id) from events where user_id in
(select id from super_users);
-- Super: 1,430 (59%)
-- Other: 13,288 (53%)

-- At least 1 bookmark_status event?
select count(distinct user_id)
from events
where
	user_id in (select id from super_users)
	and event_code = 8;
-- Super: 874 (36%)
-- Other: 9,721 (39%)

-- At least 1 num_tabs event?
select count(distinct user_id)
from events
where
	user_id in (select id from super_users)
	and event_code = 26;
-- 1,412 (58%)
-- 13,186 (53%)

--------------------
-- bookmarks      --
--------------------

-- Number of bookmark_status events?
select count(*) from events where event_code = 8;
-- 23,758

-- Show distribution of bookmarks across both segments
select
	user_id,
	round(avg(split_part(data1, ' ', 1)::int))
from events
where
	event_code = 8
	and user_id in (select id from super_users)
group by 1
order by 2 desc;

-- Summary stats on num_bookmarks for each segment?
with super as	(
	select
		user_id,
		round(avg(split_part(data1, ' ', 1)::int)) as "bookmarks"
	from events
	where
		event_code = 8
		and user_id in (select id from super_users)
	group by 1
)
select
	count(bookmarks) as "users",
	round(avg(bookmarks)) as "mean_bk",
	round(median(bookmarks)) as "median_bk",
	max(bookmarks) as "max_bk",
	min(bookmarks) as "min_bk",
	round(stddev(bookmarks)) as "stdev_bk",
	round(variance(bookmarks)) as "var_bk"
from super;

--------------------
-- tabs           --
--------------------

select count(*) from events where event_code = 26;
-- 452,715 events

-- Show distribution
select
	user_id,
	round(avg(split_part(data2, ' ', 1)::int))
from events
where
	event_code = 26
	and user_id in (select id from super_users)
group by 1
order by 2 desc;

-- Summary stats (using max)
with super as	(
	select
		user_id,
		max(split_part(data2, ' ', 1)::int) as "tabs"
	from events
	where
		event_code = 26
		and user_id in (select id from other_users)
	group by 1
)
select
	count(user_id) as "users",
	(select count(user_id) from super where tabs >= 10) as "max_tabs_10+",
	round((select count(user_id) from super where tabs >= 10)::numeric / count(user_id) * 100, 2) as "%max_tabs_10+",
	(select count(user_id) from super where tabs >= 8) as "max_tabs_8+",
	round((select count(user_id) from super where tabs >= 8)::numeric / count(user_id) * 100, 2) as "%max_tabs_8+",
	(select count(user_id) from super where tabs >= 6) as "max_tabs_6+",
	round((select count(user_id) from super where tabs >= 6)::numeric / count(user_id) * 100, 2) as "%max_tabs_6+",
	(select count(user_id) from super where tabs >= 4) as "max_tabs_4+",
	round((select count(user_id) from super where tabs >= 4)::numeric / count(user_id) * 100, 2) as "%max_tabs_4+",
 	round(avg(tabs)) as "mean_tab",
	round(median(tabs)) as "median_tab",
	max(tabs) as "max_tab",
	min(tabs) as "min_tab",
	round(stddev(tabs)) as "stdev_tab",
	round(variance(tabs)) as "var_tab"
from super;
	-- How many rows do we have in each table?

	select
	count(u.id) as users,
	count(distinct u.id) as distinct_users,
	count(s.user_id) as survey_responses,
	count(distinct s.user_id) as distinct_survey_responses,
	count(e.*) as events
	from users u, survey s, events e;

	-- 27,267 unique users
	-- 2,400,491 events
	-- 4,081 unique survey responses

	-- Why does the combined query take over 30 minutes to return when we can get the results of `select count(*)` on individual tables in like 2 seconds?
	-- Even without the events table included

	--------------------------------------------------
	--------------------------------------------------

	-- Can we use the survey data to make reliable assumptions about our population? How representative are the users in the survey?

	--------------------
	-- fx_version --
	--------------------

	with
	population as (
	with total as
	(select count(*) as "total" from users)
	select
	fx_version,
	count(*) as "count_population",
	round((count() / avg(total.total) 100), 2) as "percent_population"
	from users, total
	group by 1
	order by 1
	),
	sample as (
	with total as
	(select count(*) as "total" from survey)
	select
	fx_version,
	count(*) as "count_sample",
	round((count() / avg(total.total) 100), 2) as "percent_sample"
	from users, total
	where users.id in
	(select user_id from survey)
	group by 1
	order by 1
	)
	select
	*
	from population
	left join sample
	using (fx_version)
	order by 1;

	-- 27 FX versions starting with 3.5.11 and ending with 4.0b8pre
	-- Sample/population distribution is pretty comparable w/89% of users on 4.0b6 in both cases

	--------------------
	-- os --
	--------------------

	with
	population as (
	with total as
	(select count(*) as "total" from users)
	select
	os,
	count(*) as "count_population",
	round((count() / avg(total.total) 100), 2) as "percent_population"
	from users, total
	group by 1
	order by 1
	),
	sample as (
	with total as
	(select count(*) as "total" from survey)
	select
	os,
	count(*) as "count_sample",
	round((count() / avg(total.total) 100), 2) as "percent_sample"
	from users, total
	where users.id in
	(select user_id from survey)
	group by 1
	order by 1
	)
	select
	*
	from population
	left join sample
	using (os)
	order by 1;
	-- 18 OS types (3 Mac, 4 Linux, 10 Windows, 1 Sun)
	with
	population as (
	with total as
	(select count(*) as "total" from users)
	select
	os,
	count(*) as "count_population",
	round((count() / avg(total.total) 100), 2) as "percent_population"
	from users, total
	group by 1
	order by 1
	),
	sample as (
	with total as
	(select count(*) as "total" from survey)
	select
	os,
	count(*) as "count_sample",
	round((count() / avg(total.total) 100), 2) as "percent_sample"
	from users, total
	where users.id in
	(select user_id from survey)
	group by 1
	order by 1
	)
	select
	case
	when os ilike '%mac%'
	then 'Mac'
	when os ilike '%linux%'
	then 'Linux'
	when os ilike '%windows%'
	then 'Windows'
	when os ilike '%sunos%'
	then 'Sun'
	else 'Other'
	end as "os_group",
	sum(count_population) as count_population,
	sum(percent_population) as percent_population,
	sum(count_sample) as count_sample,
	sum(percent_sample) as percent_sample
	from (
	select * from population left join sample using (os)
	) as discrete
	group by 1;
	-- Grouping them we can see that Mac OS users are slightly overrepresented in the sample (10% vs 5% in the population)
	-- Not a big problem but we can just keep that in mind

	--------------------
	-- version --
	--------------------

	with
	population as (
	with total as
	(select count(*) as "total" from users)
	select
	version,
	count(*) as "count_population",
	round((count() / avg(total.total) 100), 2) as "percent_population"
	from users, total
	group by 1
	order by 1
	),
	sample as (
	with total as
	(select count(*) as "total" from survey)
	select
	version,
	count(*) as "count_sample",
	round((count() / avg(total.total) 100), 2) as "percent_sample"
	from users, total
	where users.id in
	(select user_id from survey)
	group by 1
	order by 1
	)
	select
	*
	from population
	left join sample
	using (version)
	order by 1;
	-- 4 different test pilot extension versions
	-- 93-94% of users (sample and pop) are on the most recent version, 1.0.3

	--------------------
	-- extensions --
	--------------------

	with
	population as (
	with total as
	(select count(*) as "total" from users)
	select
	number_extensions,
	count(*) as "count_population",
	round((count() / avg(total.total) 100), 2) as "percent_population"
	from users, total
	group by 1
	order by 1
	),
	sample as (
	with total as
	(select count(*) as "total" from survey)
	select
	number_extensions,
	count(*) as "count_sample",
	round((count() / avg(total.total) 100), 2) as "percent_sample"
	from users, total
	where users.id in
	(select user_id from survey)
	group by 1
	order by 1
	)
	select
	*
	from population
	left join sample
	using (number_extensions)
	order by 1;
	-- The user with the most extensions has 185 extensions
	with
	population as (
	with total as
	(select count(*) as "total" from users)
	select
	number_extensions,
	count(*) as "count_population",
	round((count() / avg(total.total) 100), 2) as "percent_population"
	from users, total
	group by 1
	order by 1
	),
	sample as (
	with total as
	(select count(*) as "total" from survey)
	select
	number_extensions,
	count(*) as "count_sample",
	round((count() / avg(total.total) 100), 2) as "percent_sample"
	from users, total
	where users.id in
	(select user_id from survey)
	group by 1
	order by 1
	)
	select
	case
	when number_extensions = 1
	then '1. 1'
	when number_extensions <= 5
	then '2. 2-5'
	when number_extensions <= 15
	then '3. 6-15'
	when number_extensions <= 25
	then '4. 16-25'
	else '5. 25+'
	end as "number_extensions",
	sum(count_population) as count_population,
	sum(percent_population) as percent_population,
	sum(count_sample) as count_sample,
	sum(percent_sample) as percent_sample
	from (
	select * from population left join sample using (number_extensions)
	) as discrete
	group by 1
	order by 1;
	-- This is where we have the biggest delta between sample/pop - users with more extensions were more likely to be represented in the survey
	-- Average number of extensions installed per user in sample vs population?
	select
	avg(p.number_extensions) as population_avg_ext,
	avg(s.number_extensions) as sample_avg_ext
	from users p,
	(
	select number_extensions from users
	where users.id in (select user_id from survey)
	) as s;
	-- Population: 6
	-- Sample: 9

	--------------------------------------------------
	--------------------------------------------------

	-- Summary statistics based on survey data

	-- "How long have you used Firefox?"
	select q1, count(), round(count()::numeric/(select count() from survey) 100, 2) from survey group by 1 order by 1;
	-- 9% < 1 year, 91% > 1 year, 69% > 3 years (!)

	-- "Do you use more than one browser in your daily life?"
	-- 65% use more than one browser

	-- "If you use other browsers besides Firefox, what are they?"
	select
	round((select count() from survey where q3 ilike '%0%')::numeric / sample.total 100, 2) as "chrome",
	round((select count() from survey where q3 ilike '%1%')::numeric / sample.total 100, 2) as "safari",
	round((select count() from survey where q3 ilike '%2%')::numeric / sample.total 100, 2) as "opera",
	round((select count() from survey where q3 ilike '%3%')::numeric / sample.total 100, 2) as "ie",
	round((select count() from survey where q3 ilike '%<%')::numeric / sample.total 100, 2) as "other"
	from (select count(*) as "total" from survey) as sample;
	-- 46% IE, 42% Chrome, 17% Safari, 16% Opera, 5% Other (non-exclusive)

	-- "If you use multiple browsers, what do you consider to be your primary browser?"
	-- 17% only use Firefox, 65% Firefox, 6% Chrome, 1% Safari, 1% Opera, 3% IE

	-- "What is your gender?"
	-- 91% male lol

	-- "How old are you?"
	-- 63% between 18-35

	-- "How much time do you spend on the Web each day?"
	-- 89% > 2 hours, 20% > 10 hours

	--------------------------------------------------
	--------------------------------------------------

	-- Let's create our user segments

	-- New/casual users
	select count(*) from survey
	where (
	(q1 = '0' or q1 = '1')
	and
	(q7 = '0' or q7 = '1' or q7 = '2')
	)

	-- Loyal/casual users
	select count(*) from survey
	where (
	(q1 <> '0' and q1 <> '1')
	and
	(q7 = '0' or q7 = '1' or q7 = '2')
	)

	-- New/super users
	select count(*) from survey
	where (
	(q1 = '0' or q1 = '1')
	and
	(q7 <> '0' and q7 <> '1' and q7 <> '2')
	)

	-- Loyal/super users
	select count(*) from survey
	where (
	(q1 <> '0' and q1 <> '1')
	and
	(q7 <> '0' and q7 <> '1' and q7 <> '2')
	)

	-- Let's save the IDs of our loyal/super users
	drop table if exists super_users;
	select user_id as id
	into temp table super_users
	from survey
	where (
	(q1 <> '0' and q1 <> '1')
	and
	(q7 <> '0' and q7 <> '1' and q7 <> '2')
	)
	-- 2,429

	-- And let's create a list of IDs for everyone else
	drop table if exists other_users;
	select id as id
	into temp table other_users
	from users
	where id not in (select id from super_users);
	-- 24,838

	--------------------------------------------------
	--------------------------------------------------

	-- Now let's examine our events data each in segment

	select * from events limit 10;

	--------------------
	-- event code ref --
	--------------------

	-- bookmark_status: 8
	-- bookmark_create: 9
	-- bookmark_choose: 10
	-- bookmark_modify: 11
	-- num_tabs: 26

	-- Put them in their own table

	drop table if exists event_codes;
	select distinct event_code as code
	into temp table event_codes
	from events
	where (event_code = 8 or event_code = 9 or event_code = 10 or event_code = 11 or event_code = 26);
	select * from event_codes;

	-- number of bookmarks created: bookmark_status.data1
	-- number of bookmarks chosen: bookmark_choose
	-- number of tabs: num_tabs.data2

	-- Compare event/user counts across the two groups

	with super as (
	with su as (select count(*) as "total" from super_users)
	select
	event_code,
	count(*) as "su_total",
	round(count(*) / avg(su.total), 2) as "su_capita"
	from events, su
	where
	event_code in (select code from event_codes)
	and
	user_id in (select id from super_users)
	group by 1
	), other as (
	with ot as (select count(*) as "total" from other_users)
	select
	event_code,
	count(*) as "ot_total",
	round(count(*) / avg(ot.total), 2) as "ot_capita"
	from events, ot
	where
	event_code in (select code from event_codes)
	and
	user_id in (select id from other_users)
	group by 1
	)
	select
	*
	from super left join other using (event_code)
	order by 1;

	--------------------
	-- sanity checks --
	--------------------

	-- Super users: 2,429
	-- Other users: 24,838

	-- How many unique users in the events table?
	select count(distinct user_id) from events;
	-- 14,718

	-- How many users have at least 1 event recorded?
	select count(distinct user_id) from events where user_id in
	(select id from super_users);
	-- Super: 1,430 (59%)
	-- Other: 13,288 (53%)

	-- At least 1 bookmark_status event?
	select count(distinct user_id)
	from events
	where
	user_id in (select id from super_users)
	and event_code = 8;
	-- Super: 874 (36%)
	-- Other: 9,721 (39%)

	-- At least 1 num_tabs event?
	select count(distinct user_id)
	from events
	where
	user_id in (select id from super_users)
	and event_code = 26;
	-- 1,412 (58%)
	-- 13,186 (53%)

	--------------------
	-- bookmarks --
	--------------------

	-- Number of bookmark_status events?
	select count(*) from events where event_code = 8;
	-- 23,758

	-- Show distribution of bookmarks across both segments
	select
	user_id,
	round(avg(split_part(data1, ' ', 1)::int))
	from events
	where
	event_code = 8
	and user_id in (select id from super_users)
	group by 1
	order by 2 desc;

	-- Summary stats on num_bookmarks for each segment?
	with super as (
	select
	user_id,
	round(avg(split_part(data1, ' ', 1)::int)) as "bookmarks"
	from events
	where
	event_code = 8
	and user_id in (select id from super_users)
	group by 1
	)
	select
	count(bookmarks) as "users",
	round(avg(bookmarks)) as "mean_bk",
	round(median(bookmarks)) as "median_bk",
	max(bookmarks) as "max_bk",
	min(bookmarks) as "min_bk",
	round(stddev(bookmarks)) as "stdev_bk",
	round(variance(bookmarks)) as "var_bk"
	from super;

	--------------------
	-- tabs --
	--------------------

	select count(*) from events where event_code = 26;
	-- 452,715 events

	-- Show distribution
	select
	user_id,
	round(avg(split_part(data2, ' ', 1)::int))
	from events
	where
	event_code = 26
	and user_id in (select id from super_users)
	group by 1
	order by 2 desc;

	-- Summary stats (using max)
	with super as (
	select
	user_id,
	max(split_part(data2, ' ', 1)::int) as "tabs"
	from events
	where
	event_code = 26
	and user_id in (select id from other_users)
	group by 1
	)
	select
	count(user_id) as "users",
	(select count(user_id) from super where tabs >= 10) as "max_tabs_10+",
	round((select count(user_id) from super where tabs >= 10)::numeric / count(user_id) * 100, 2) as "%max_tabs_10+",
	(select count(user_id) from super where tabs >= 8) as "max_tabs_8+",
	round((select count(user_id) from super where tabs >= 8)::numeric / count(user_id) * 100, 2) as "%max_tabs_8+",
	(select count(user_id) from super where tabs >= 6) as "max_tabs_6+",
	round((select count(user_id) from super where tabs >= 6)::numeric / count(user_id) * 100, 2) as "%max_tabs_6+",
	(select count(user_id) from super where tabs >= 4) as "max_tabs_4+",
	round((select count(user_id) from super where tabs >= 4)::numeric / count(user_id) * 100, 2) as "%max_tabs_4+",
	round(avg(tabs)) as "mean_tab",
	round(median(tabs)) as "median_tab",
	max(tabs) as "max_tab",
	min(tabs) as "min_tab",
	round(stddev(tabs)) as "stdev_tab",
	round(variance(tabs)) as "var_tab"
	from super;