Skip to content

Instantly share code, notes, and snippets.

@diegov
Last active January 23, 2023 18:17
Show Gist options
  • Save diegov/e3705deac2b3229e64b29e11bb92f075 to your computer and use it in GitHub Desktop.
Save diegov/e3705deac2b3229e64b29e11bb92f075 to your computer and use it in GitHub Desktop.
pleroma_post_frequency_lists
create or replace view diego.clustered_posters as
with params as (
select 1 as config_id,
now() as dt,
interval '7 days' as period,
'https://ap.diegoveralli.net/users/diego' as account,
10::integer as cluster_count
union all
select 2,
now(), interval '3 days', 'https://ap.diegoveralli.net/users/diego',
10::integer
union all
select 3,
now(), interval '1 days', 'https://ap.diegoveralli.net/users/diego',
10::integer
),
poster_stats as (
select params.config_id,
actor,
follower_address,
extract(epoch from (params.period / count(1))) as avg_gap,
coalesce(stddev(gap), 0.0) as stdev_gap,
count(1) as post_count
from (
select params.config_id,
actor,
user_actor.follower_address as follower_address,
coalesce(lead(act.inserted_at, 1) over (
partition by actor, user_actor.follower_address, params.config_id
order by act.id nulls first), cast(params.dt as timestamp(0) without time zone)) as next_ts,
act.inserted_at,
data->>'type' as ev_type,
extract(epoch
from (coalesce(lead(act.inserted_at, 1) over (partition by actor, user_actor.follower_address, params.config_id
order by act.id nulls first), cast(params.dt as timestamp(0) without time zone)) - act.inserted_at)) as gap
from params
join activities act on act.inserted_at >= params.dt - params.period
and act.inserted_at < params.dt
and actor <> params.account
and data->>'type' in ('Create',
'Announce',
'Add')
join users as user_recipient on user_recipient.ap_id = params.account
join users user_actor on actor = user_actor.ap_id
join following_relationships fr on user_recipient.id = fr.follower_id
and user_actor.id = fr.following_id
and fr.state = 2
) base_stats
join params on params.config_id = base_stats.config_id
group by actor,
follower_address,
params.period,
params.config_id
order by extract(epoch from (min(params.period) / count(1))), stddev(gap)
)
select config_id,
id,
follower_address,
min_count,
cluster_id,
dense_rank() OVER (partition by config_id order by min_count) as list_id
from (
select config_id,
id,
follower_address,
cluster_id,
stdev_gap / avg_gap,
max(post_count) over (partition by config_id, cluster_id) as max_count,
min(post_count) over (partition by config_id, cluster_id) as min_count
from (
select params.config_id,
actor as id,
follower_address,
stdev_gap,
avg_gap,
post_count,
ST_ClusterKMeans(ST_MakePoint(avg_gap, -- daily rate
250000 * sqrt(post_count::float / (extract(epoch
from params.dt - (params.dt - period)) / 86400.0)))::geometry, params.cluster_count) OVER (partition by params.config_id) as cluster_id
from params
join poster_stats on params.config_id = poster_stats.config_id
) as clustered_activities
) as clusters_by_min_count
create procedure diego.update_tempo_lists(config_id integer)
language sql
begin atomic
update lists
set following = tempo_list.following
from
(select list_id,
array_agg(follower_address
order by follower_address) as following
from diego.clustered_posters
where diego.clustered_posters.config_id = update_tempo_lists.config_id
group by list_id) as tempo_list
where (lists.title = 'Largo'
and list_id = 1)
or (lists.title = 'Adagio'
and list_id = 2)
or (lists.title = 'Andante'
and list_id = 3)
or (lists.title = 'Allegro'
and list_id = 4)
or (lists.title = 'Presto'
and list_id = 5);
end;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment